Merge branch 'ppc-audit-fix/p3-isolated-high' — Phase 3 isolated HIGH bugs
Phase 3 of the PPC instruction audit fix application. Six commits land six independent (or coupled) PPCBUG fixes: -cedee3c: PPCBUG-510 stvewx128 16-byte corruption → 4-byte word write -52ece4b: PPCBUG-424+425 vmaddfp128/vmaddcfp128 operand swap (VA*VD+VB) -3d8e2ce: PPCBUG-053+054 32-bit CTR semantics in bcx/bclrx + mtspr CTR -d4f6ea7: PPCBUG-640+650 fmt_bc spurious bdnzge/bdzge condition suffix -2be25bd: PPCBUG-641+649 sync vs lwsync L-field disambiguation -7609dcd: PPCBUG-700 VMX128 register accessors → canary bitfield layout PPCBUG-700 was a discovery during phase end-to-end review: an independent reviewer cross-checked our va128/vb128/vd128/vx128r_rc_bit accessors against canary's `FormatVX128*` bitfield struct (xenia-canary `ppc_decode_data.h:484-663`) and found the bit positions were wrong on all four. The audit's line-2958 "confirmed-clean" assessment was based on a miscount of LSB-first packed C++ bitfields. Real Xbox 360 game code follows canary's convention, so any production VMX128 instruction with register VR >= 32 was silently mis-decoded — though no unit test exercised that path until 52ece4b's operand-swap fix exposed the inconsistency. Subsumes PPCBUG-422's prescribed Rc-bit position. Verification at merge: `cargo test --workspace --release` clean across all crates; targeted vmx128/decoder/disasm-golden tests green. Acid test (`-n 4B --parallel`) deferred to end-of-all-phases per user direction.
This commit is contained in:
@@ -3414,3 +3414,38 @@ has the wrong extraction. The disassembler was written independently and got the
|
||||
degenerate cases.
|
||||
|
||||
IDs PPCBUG-655 through PPCBUG-679 are unallocated — no further bugs found in Phase C3.
|
||||
|
||||
---
|
||||
|
||||
## Phase C4 — Post-merge audit corrections (2026-05-02)
|
||||
|
||||
### PPCBUG-700 — VMX128 register accessors disagreed with canary's bitfield layout (HIGH)
|
||||
|
||||
- **Severity**: HIGH (silent mis-decoding of any VMX128 instruction with a register >= 32)
|
||||
- **Status**: applied
|
||||
- **Locations**: `decoder.rs:138-160` (`va128`/`vb128`/`vd128`), `decoder.rs:80` (`vx128r_rc_bit`)
|
||||
- **Discovery**: independent reviewer of the P3 phase merge, comparing our rust accessors
|
||||
against canary's `FormatVX128`/`VX128_2`/`VX128_4`/`VX128_5`/`VX128_R` bitfield struct
|
||||
in `xenia-canary/src/xenia/cpu/ppc/ppc_decode_data.h:484-663`.
|
||||
- **Symptom**: this entry contradicts the audit's own line 2958 ("confirmed-clean")
|
||||
assessment. The previous audit miscounted bit-field offsets — under x86_64 LSB-first
|
||||
C++ bitfield packing, the canary fields land at:
|
||||
- `VA128 = VA128l(5) | VA128h(1)<<5 | VA128H(1)<<6` = PPC[11-15] | PPC[26]<<5 | PPC[21]<<6 (3 fields, 7 bits)
|
||||
- `VB128 = VB128l(5) | VB128h(2)<<5` = PPC[16-20] | PPC[30-31]<<5 (2 fields, 7 bits)
|
||||
- `VD128 = VD128l(5) | VD128h(2)<<5` = PPC[6-10] | PPC[28-29]<<5 (2 fields, 7 bits)
|
||||
- `Rc` (VX128_R only) = PPC[25] (host bit 6) — not PPC[27] as PPCBUG-422/562 prescribed.
|
||||
Rust code instead used va128: PPC[11-15] | PPC[29]<<5 (one bit, wrong position); vb128:
|
||||
PPC[16-20] | PPC[28]<<5 | PPC[30]<<6 (wrong positions); vd128: PPC[6-10] | PPC[21]<<5 |
|
||||
PPC[22]<<6 (wrong positions); vx128r_rc_bit at PPC[27].
|
||||
- **Why it lurked**: the buggy convention was internally consistent with hand-crafted
|
||||
test fixtures (which set bit 29 / 21 / 22 to encode "high" registers, matching the
|
||||
buggy accessor). Real Xbox 360 game code follows canary's convention, so any production
|
||||
encoding with VR >= 32 was silently mis-decoded — but no unit test exercised that path.
|
||||
- **Fix**: rewrite the four accessors to canary's bit positions; rewrite the
|
||||
`vmx128_test_word` helper and unit tests; re-encode the goldens for vmaddfp128/
|
||||
vmaddcfp128/vnmsubfp128/vperm128/vsrw128/vpermwi128/vrlimi128. Drop the speculative
|
||||
`key4_dt` dot-form dispatch in `decode_op6` (canary has no separate dot-form opcodes
|
||||
for VX128_R compute ops; Rc is a runtime modifier). Update `encode_vpkd3d128` test
|
||||
helper for canary's VD128h placement.
|
||||
- **Cross-reference**: invalidates the audit's confirmed-clean note at line 2958.
|
||||
Subsumes the partial fix-shape proposed in PPCBUG-422 (Rc-bit position).
|
||||
|
||||
@@ -77,7 +77,9 @@ impl DecodedInstr {
|
||||
/// Rc for VC-form vector compare instructions — PPC bit 21 = host bit 10.
|
||||
#[inline] pub fn vc_rc_bit(&self) -> bool { (self.raw >> 10) & 1 != 0 }
|
||||
/// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4.
|
||||
#[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 }
|
||||
/// VX128_R Rc bit — PPC bit 25 (host bit 6) per canary's FormatVX128_R
|
||||
/// bitfield layout. PPCBUG-700.
|
||||
#[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 6) & 1 != 0 }
|
||||
|
||||
/// IMM field for VX128_4-form instructions (vrlimi128) — 5-bit blend mask at PPC bits 11-15.
|
||||
#[inline] pub fn vx128_4_imm(&self) -> u32 { extract_bits(self.raw, 11, 15) }
|
||||
@@ -133,25 +135,30 @@ impl DecodedInstr {
|
||||
/// crbB (bits 16-20)
|
||||
#[inline] pub fn crbb(&self) -> u32 { extract_bits(self.raw, 16, 20) }
|
||||
|
||||
// VMX128 field extractors
|
||||
// VMX128 field extractors — bit positions match canary's
|
||||
// FormatVX128/VX128_2/VX128_4/VX128_5/VX128_R bitfield layout
|
||||
// (xenia-canary `ppc_decode_data.h:484-663`, LSB-first packed). PPCBUG-700.
|
||||
|
||||
/// VA128 (bits 6-10, plus bit from 29)
|
||||
/// VA128 = VA128l(5) | VA128h(1) << 5 | VA128H(1) << 6.
|
||||
/// Canonical 7-bit register selector: PPC 11-15 (low), PPC 26 (mid), PPC 21 (high).
|
||||
#[inline] pub fn va128(&self) -> usize {
|
||||
(extract_bits(self.raw, 6, 10) | (extract_bits(self.raw, 29, 29) << 5)) as usize
|
||||
(extract_bits(self.raw, 11, 15)
|
||||
| (extract_bits(self.raw, 26, 26) << 5)
|
||||
| (extract_bits(self.raw, 21, 21) << 6)) as usize
|
||||
}
|
||||
|
||||
/// VB128 (bits 16-20, plus bits from 28, 30)
|
||||
/// VB128 = VB128l(5) | VB128h(2) << 5. Canary's VB128h is a 2-bit
|
||||
/// contiguous field at PPC 30-31 (host bits 0-1).
|
||||
#[inline] pub fn vb128(&self) -> usize {
|
||||
(extract_bits(self.raw, 16, 20)
|
||||
| (extract_bits(self.raw, 28, 28) << 5)
|
||||
| (extract_bits(self.raw, 30, 30) << 6)) as usize
|
||||
| (extract_bits(self.raw, 30, 31) << 5)) as usize
|
||||
}
|
||||
|
||||
/// VD128 (bits 6-10, plus bits from 21, 22)
|
||||
/// VD128 = VD128l(5) | VD128h(2) << 5. Canary's VD128h is a 2-bit
|
||||
/// contiguous field at PPC 28-29 (host bits 2-3).
|
||||
#[inline] pub fn vd128(&self) -> usize {
|
||||
(extract_bits(self.raw, 6, 10)
|
||||
| (extract_bits(self.raw, 21, 21) << 5)
|
||||
| (extract_bits(self.raw, 22, 22) << 6)) as usize
|
||||
| (extract_bits(self.raw, 28, 29) << 5)) as usize
|
||||
}
|
||||
|
||||
/// VS128 - same encoding as VD128
|
||||
@@ -664,11 +671,11 @@ fn decode_op6(code: u32) -> PpcOpcode {
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// VMX128 compare (non-dot and dot forms).
|
||||
// Non-dot: bit 27 = 0. Dot: bit 27 = 1, but bit 25 must also be 0 to
|
||||
// distinguish from the shift/merge group (which has bit 25 = 1 when bit 27 = 1).
|
||||
// key4_nd uses bits 22-24 + bit 27 (same as original, covers non-dot).
|
||||
// key4_dt uses bits 22-24 + bit 25 + bit 27 (narrower, covers dot-only).
|
||||
// VMX128 compare (VX128_R form). Single dispatch path: bit 27 = 0 always
|
||||
// for these opcodes per canary's table (`ppc_opcode_table_gen.cc:295-305`).
|
||||
// The Rc bit is at PPC 25 (host bit 6) per the FormatVX128_R bitfield —
|
||||
// it's a runtime modifier read by the interpreter, NOT part of the
|
||||
// secondary-opcode discrimination. PPCBUG-700.
|
||||
let key4_nd = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27);
|
||||
match key4_nd {
|
||||
0b000000 => return PpcOpcode::vcmpeqfp128,
|
||||
@@ -678,16 +685,6 @@ fn decode_op6(code: u32) -> PpcOpcode {
|
||||
0b100000 => return PpcOpcode::vcmpequw128,
|
||||
_ => {}
|
||||
}
|
||||
// Dot forms: bit 27 = 1, bit 25 = 0 (key = bits22-24 + bit25 + bit27, low 3 bits)
|
||||
let key4_dt = (extract_bits(code, 22, 24) << 2) | (extract_bits(code, 25, 25) << 1) | extract_bits(code, 27, 27);
|
||||
match key4_dt {
|
||||
0b00001 => return PpcOpcode::vcmpeqfp128, // bits22-24=000, bit25=0, bit27=1
|
||||
0b00101 => return PpcOpcode::vcmpgefp128, // bits22-24=001, bit25=0, bit27=1
|
||||
0b01001 => return PpcOpcode::vcmpgtfp128, // bits22-24=010, bit25=0, bit27=1
|
||||
0b01101 => return PpcOpcode::vcmpbfp128, // bits22-24=011, bit25=0, bit27=1
|
||||
0b10001 => return PpcOpcode::vcmpequw128, // bits22-24=100, bit25=0, bit27=1
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// VMX128 shift/merge
|
||||
let key5 = (extract_bits(code, 22, 25) << 2) | extract_bits(code, 27, 27);
|
||||
@@ -1055,21 +1052,26 @@ mod tests {
|
||||
// used those extractors). Each test poke-bits exactly the slots the
|
||||
// accessor reads and asserts the assembled register number.
|
||||
|
||||
fn vmx128_test_word(va21: u32, vd6_10: u32, vd21: u32, vd22: u32,
|
||||
vb16_20: u32, vb28: u32, vb30: u32) -> u32 {
|
||||
// PPC bit i -> LSB position 31-i.
|
||||
(vd6_10 << (31 - 10))
|
||||
| (va21 << (31 - 21)) // va128 high bit at PPC 29 in some forms — kept 0 here
|
||||
| (vd21 << (31 - 21))
|
||||
| (vd22 << (31 - 22))
|
||||
| (vb16_20 << (31 - 20))
|
||||
| (vb28 << (31 - 28))
|
||||
| (vb30 << (31 - 30))
|
||||
/// Build a VMX128 test word for the canary-compliant register layout.
|
||||
/// `vd128 = vd_lo | (vd_hi << 5)` where vd_lo is 5 bits (PPC 6-10) and
|
||||
/// vd_hi is 2 bits (PPC 28-29). Same shape for vb128 (vb_lo at PPC 16-20,
|
||||
/// vb_hi 2 bits at PPC 30-31). va128 = va_lo | (va_h26<<5) | (va_h21<<6)
|
||||
/// per canary's 7-bit VA selector.
|
||||
fn vmx128_test_word(vd_lo: u32, vd_hi: u32, va_lo: u32, va_h26: u32, va_h21: u32,
|
||||
vb_lo: u32, vb_hi: u32) -> u32 {
|
||||
// PPC bit i -> host bit (31-i).
|
||||
(vd_lo << (31 - 10)) // VD128l: PPC 6-10 = host 21-25
|
||||
| (vd_hi << (31 - 29)) // VD128h: PPC 28-29 = host 2-3 (LSB at host 2)
|
||||
| (va_lo << (31 - 15)) // VA128l: PPC 11-15 = host 16-20
|
||||
| (va_h26 << (31 - 26)) // VA128h: PPC 26 = host 5
|
||||
| (va_h21 << (31 - 21)) // VA128H: PPC 21 = host 10
|
||||
| (vb_lo << (31 - 20)) // VB128l: PPC 16-20 = host 11-15
|
||||
| (vb_hi << (31 - 31)) // VB128h: PPC 30-31 = host 0-1 (LSB at host 0)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_low_5_bits_only() {
|
||||
// vd_lo = 0..31, vd_b21 = 0, vd_b22 = 0 → vd128 = vd_lo
|
||||
// vd_lo = 0..31, vd_hi = 0 → vd128 = vd_lo
|
||||
for r in 0..32u32 {
|
||||
let raw = (r as u32) << (31 - 10);
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
@@ -1078,45 +1080,51 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_bit21_adds_32() {
|
||||
// vd_lo = 0, vd_b21 = 1, vd_b22 = 0 → vd128 = 32
|
||||
let raw = (0u32 << (31 - 10)) | (1u32 << (31 - 21));
|
||||
fn vmx128_vd128_high_low_bit_adds_32() {
|
||||
// vd_lo = 0, VD128h = 0b01 (LSB only at host bit 2 = PPC 29) → vd128 = 32
|
||||
let raw = (1u32 << (31 - 29));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_bit22_adds_64() {
|
||||
// vd_lo = 0, vd_b21 = 0, vd_b22 = 1 → vd128 = 64
|
||||
let raw = (0u32 << (31 - 10)) | (1u32 << (31 - 22));
|
||||
fn vmx128_vd128_high_high_bit_adds_64() {
|
||||
// vd_lo = 0, VD128h = 0b10 (MSB only at host bit 3 = PPC 28) → vd128 = 64
|
||||
let raw = (1u32 << (31 - 28));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vd128_full_127() {
|
||||
// vd_lo = 31, vd_b21 = 1, vd_b22 = 1 → vd128 = 127
|
||||
// vd_lo = 31, VD128h = 0b11 → vd128 = 127
|
||||
let raw = (31u32 << (31 - 10))
|
||||
| (1u32 << (31 - 21))
|
||||
| (1u32 << (31 - 22));
|
||||
| (1u32 << (31 - 28))
|
||||
| (1u32 << (31 - 29));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 127);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_va128_uses_bit29() {
|
||||
// va128 = bits 6-10 + bit 29. va_lo = 7, bit 29 = 1 → va128 = 7 | 32 = 39.
|
||||
let raw = (7u32 << (31 - 10)) | (1u32 << (31 - 29));
|
||||
fn vmx128_va128_canary_layout() {
|
||||
// va_lo = 7 at PPC 11-15, VA128h = 1 at PPC 26 → va128 = 7 | 32 = 39
|
||||
let raw = (7u32 << (31 - 15)) | (1u32 << (31 - 26));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.va128(), 39);
|
||||
// VA128H = 1 at PPC 21 → va128 += 64 = 103
|
||||
let raw = raw | (1u32 << (31 - 21));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.va128(), 7 | 32 | 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmx128_vb128_uses_bits28_and_30() {
|
||||
// vb128 = bits 16-20 + bit 28 + bit 30. Low 5 = 5, bit 28 = 1 → +32, bit 30 = 1 → +64.
|
||||
let raw = (5u32 << (31 - 20))
|
||||
| (1u32 << (31 - 28))
|
||||
| (1u32 << (31 - 30));
|
||||
fn vmx128_vb128_uses_bits30_31() {
|
||||
// vb_lo = 5 at PPC 16-20. VB128h = 0b01 (LSB at PPC 31 = host 0) → +32.
|
||||
// VB128h = 0b11 → +96.
|
||||
let raw = (5u32 << (31 - 20)) | (1u32 << (31 - 31));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vb128(), 5 | 32);
|
||||
let raw = raw | (1u32 << (31 - 30));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vb128(), 5 | 32 | 64);
|
||||
}
|
||||
@@ -1126,11 +1134,9 @@ mod tests {
|
||||
// vs128 must always equal vd128.
|
||||
for r in [0u32, 31, 32, 64, 96, 127] {
|
||||
let lo = r & 0x1F;
|
||||
let b21 = (r >> 5) & 1;
|
||||
let b22 = (r >> 6) & 1;
|
||||
let hi = (r >> 5) & 0x3;
|
||||
let raw = (lo << (31 - 10))
|
||||
| (b21 << (31 - 21))
|
||||
| (b22 << (31 - 22));
|
||||
| (hi << (31 - 29));
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), r as usize, "vd128 mismatch for r={r}");
|
||||
assert_eq!(d.vs128(), r as usize, "vs128 mismatch for r={r}");
|
||||
@@ -1142,7 +1148,8 @@ mod tests {
|
||||
#[allow(dead_code)]
|
||||
fn _vmx128_test_word_helper_compiles() {
|
||||
// Keep the helper validated against the real accessor.
|
||||
let raw = vmx128_test_word(0, 5, 1, 1, 0, 0, 0);
|
||||
// vd_lo=5, vd_hi=0b11 → vd128 = 5 | 96 = 101
|
||||
let raw = vmx128_test_word(5, 3, 0, 0, 0, 0, 0);
|
||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||
assert_eq!(d.vd128(), 5 | 32 | 64);
|
||||
}
|
||||
|
||||
@@ -361,7 +361,15 @@ pub fn format(instr: &DecodedInstr) -> DisasmText {
|
||||
PpcOpcode::dcbz => fmt_cache(instr, "dcbz"),
|
||||
PpcOpcode::dcbz128 => fmt_cache(instr, "dcbz128"),
|
||||
PpcOpcode::icbi => fmt_cache(instr, "icbi"),
|
||||
PpcOpcode::sync => base("sync", String::new(), 0),
|
||||
PpcOpcode::sync => {
|
||||
// L-field at PPC bit 10 (host bit 21) selects lwsync (L=1), the
|
||||
// acquire barrier in every Xbox 360 spinlock. PPCBUG-641.
|
||||
if (instr.raw >> 21) & 1 == 1 {
|
||||
with_ext("sync", String::new(), 0, "lwsync", String::new(), 0)
|
||||
} else {
|
||||
base("sync", String::new(), 0)
|
||||
}
|
||||
}
|
||||
PpcOpcode::eieio => base("eieio", String::new(), 0),
|
||||
PpcOpcode::isync => base("isync", String::new(), 0),
|
||||
|
||||
@@ -828,7 +836,10 @@ fn fmt_bc(instr: &DecodedInstr) -> DisasmText {
|
||||
|
||||
if decr {
|
||||
let z = if bo & 0x02 != 0 { "z" } else { "nz" };
|
||||
let cond_str = cond_name_opt.unwrap_or("");
|
||||
// BO bit 4 (uncond) means CR is ignored — pure CTR-decrement branch.
|
||||
// Without this guard, bdnz/bdz would emit a spurious `ge` suffix derived
|
||||
// from the don't-care BI=0 / cond_true=false pair (PPCBUG-640).
|
||||
let cond_str = if uncond { "" } else { cond_name_opt.unwrap_or("") };
|
||||
let ext_mnem = format!("bd{z}{cond_str}{a}{l}");
|
||||
let ext_ops = format!("{cr}0x{target:08X}");
|
||||
with_ext(&base_mnem, base_ops, 8, &ext_mnem, ext_ops, 8)
|
||||
|
||||
@@ -846,7 +846,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
}
|
||||
|
||||
let ctr_ok = (bo & 0b00100) != 0
|
||||
|| ((ctx.ctr != 0) ^ ((bo & 0b00010) != 0));
|
||||
|| (((ctx.ctr as u32) != 0) ^ ((bo & 0b00010) != 0));
|
||||
let cond_ok = (bo & 0b10000) != 0
|
||||
|| (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0));
|
||||
|
||||
@@ -876,7 +876,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
}
|
||||
|
||||
let ctr_ok = (bo & 0b00100) != 0
|
||||
|| ((ctx.ctr != 0) ^ ((bo & 0b00010) != 0));
|
||||
|| (((ctx.ctr as u32) != 0) ^ ((bo & 0b00010) != 0));
|
||||
let cond_ok = (bo & 0b10000) != 0
|
||||
|| (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0));
|
||||
|
||||
@@ -1520,7 +1520,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
match spr {
|
||||
crate::context::spr::XER => ctx.set_xer(val as u32),
|
||||
crate::context::spr::LR => ctx.lr = val,
|
||||
crate::context::spr::CTR => ctx.ctr = val,
|
||||
crate::context::spr::CTR => ctx.ctr = val as u32 as u64,
|
||||
crate::context::spr::DEC => ctx.dec = val as u32,
|
||||
crate::context::spr::TBL_WRITE => {
|
||||
ctx.timebase = (ctx.timebase & 0xFFFF_FFFF_0000_0000) | (val & 0xFFFF_FFFF);
|
||||
@@ -1919,11 +1919,10 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::vmaddfp128 => {
|
||||
// VMX128 form: vD <- (vA * vB) + vD (vD reused as accumulator;
|
||||
// Canary `InstrEmit_vmaddfp128` routes guest VA/VB/VD through
|
||||
// `InstrEmit_vmaddfp_` with arg order swapped so the resulting
|
||||
// HIR computation is `VA * VB + VD`). Same unconditional denorm
|
||||
// flush of all three inputs as scalar `vmaddfp`.
|
||||
// ISA: (VD) <- (VA × VD) + VB. VD is both the second multiplicand and destination.
|
||||
// Canary InstrEmit_vmaddfp128 (ppc_emit_altivec.cc:806-809): MulAdd(VA, VD, VB).
|
||||
// Previous code computed ai.mul_add(bi, di) = VA×VB+VD — VB and VD roles swapped
|
||||
// (PPCBUG-424). Fix: ai.mul_add(di, bi) = VA×VD+VB.
|
||||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||||
let d = ctx.vr[instr.vd128()].as_f32x4();
|
||||
@@ -1932,7 +1931,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
let ai = vmx::flush_denorm(a[i]);
|
||||
let bi = vmx::flush_denorm(b[i]);
|
||||
let di = vmx::flush_denorm(d[i]);
|
||||
r[i] = ai.mul_add(bi, di);
|
||||
r[i] = ai.mul_add(di, bi);
|
||||
}
|
||||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||
ctx.pc += 4;
|
||||
@@ -2982,16 +2981,21 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::stvewx128 => {
|
||||
let ea = ea_indexed(ctx, instr) & !0xF;
|
||||
// TODO PPCBUG-510: stvewx128 currently writes 16 bytes at ea & !0xF; the EA scope is
|
||||
// wrong (should be word-aligned, 4 bytes only). When P3 fixes EA, this invalidate's
|
||||
// range narrows automatically.
|
||||
// PPCBUG-512: stvewx128 was missing invalidate_for_write.
|
||||
// Mirror of stvewx: word-align EA, extract one 32-bit lane, write 4 bytes only.
|
||||
// Previous code used & !0xF (16-byte) and wrote all 16 bytes, corrupting 12
|
||||
// adjacent bytes on every execution (PPCBUG-510).
|
||||
let ea_unaligned = ea_indexed(ctx, instr);
|
||||
let ea = ea_unaligned & !0x3u32;
|
||||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||||
}
|
||||
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
|
||||
let bytes = ctx.vr[instr.vs128()].as_bytes();
|
||||
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
|
||||
let w = ((bytes[slot * 4] as u32) << 24)
|
||||
| ((bytes[slot * 4 + 1] as u32) << 16)
|
||||
| ((bytes[slot * 4 + 2] as u32) << 8)
|
||||
| (bytes[slot * 4 + 3] as u32);
|
||||
mem.write_u32(ea, w);
|
||||
ctx.pc += 4;
|
||||
}
|
||||
|
||||
@@ -4292,11 +4296,11 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
// ═════════════════════════════════════════════════════════════════
|
||||
// §4j — VMX128 FMA / permute
|
||||
// ═════════════════════════════════════════════════════════════════
|
||||
// vmaddcfp128: vD = vD * vB + vA (using vD's current value as accumulator)
|
||||
// vmaddcfp128: ISA (VD) <- (VA × VD) + VB — same operation as vmaddfp128
|
||||
PpcOpcode::vmaddcfp128 => {
|
||||
// Xbox-360-specific: vD = (vD * vB) + vA. Note the VD-reuse: VD is both
|
||||
// a source operand (as multiplicand) and the destination. Canary &
|
||||
// POWER8 hardware confirm denormal inputs are flushed regardless of NJ.
|
||||
// ISA: (VD) <- (VA × VD) + VB. Canary InstrEmit_vmaddcfp128 (cc:819): MulAdd(VA, VD, VB).
|
||||
// Previous code computed di.mul_add(bi, ai) = VD×VB+VA — both operands wrong
|
||||
// (PPCBUG-425). Fix: ai.mul_add(di, bi) = VA×VD+VB.
|
||||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||||
let d = ctx.vr[instr.vd128()].as_f32x4();
|
||||
@@ -4305,7 +4309,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
let ai = vmx::flush_denorm(a[i]);
|
||||
let bi = vmx::flush_denorm(b[i]);
|
||||
let di = vmx::flush_denorm(d[i]);
|
||||
r[i] = di.mul_add(bi, ai);
|
||||
r[i] = ai.mul_add(di, bi);
|
||||
}
|
||||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||||
ctx.pc += 4;
|
||||
@@ -5319,32 +5323,64 @@ mod tests {
|
||||
}
|
||||
|
||||
/// VMX128 variant `vmaddfp128 vD, vA, vB` (primary op 5, key2 = 0b001101)
|
||||
/// reuses vD as the accumulator: `vD <- (vA * vB) + vD`. Canary
|
||||
/// reuses vD as the accumulator: `vD <- (vA × vD) + vB`. Canary
|
||||
/// `ppc_emit_altivec.cc:786-810` flushes *all three* inputs
|
||||
/// unconditionally before the fused multiply-add — the 128-bit form
|
||||
/// must match the scalar `vmaddfp` behaviour. Prior to this fix the
|
||||
/// interpreter skipped the flush, leaving subnormal noise in math-
|
||||
/// heavy game code.
|
||||
/// unconditionally before the fused multiply-add.
|
||||
#[test]
|
||||
fn vmaddfp128_flushes_denormal_inputs() {
|
||||
let mut ctx = PpcContext::new();
|
||||
let mut mem = TestMem::new();
|
||||
let denorm = f32::from_bits(1);
|
||||
// vA=v2 carries denorms, which is also vD's accumulator input.
|
||||
// VA=v1, VD=v2, VB=v3 — all carry denormals.
|
||||
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
|
||||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
|
||||
// vB=v3 = 1.0 — denormal input survives only if not flushed.
|
||||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([1.0f32; 4]);
|
||||
// vmaddfp128 vD=v2, vA=v2, vB=v3: low 5 bits 00010 shared
|
||||
// between vA and vD, vB=3 at PPC bits 16-20, key2=0b001101.
|
||||
let raw: u32 = 0x1440_18D0;
|
||||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
|
||||
// vmaddfp128 vD=v2, vA=v1, vB=v3: op6=5, vd_lo=2, va_lo=1, vb_lo=3, key2=0b001101.
|
||||
// VA×VD+VB: all three flushed → 0*0+0 = 0.
|
||||
let raw: u32 = (5u32 << 26) | (2 << 21) | (1 << 16) | (3 << 11) | (3 << 6) | (1 << 4);
|
||||
write_instr(&mut mem, 0, raw);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
// Without flush: denorm*1.0 + denorm = 2*denorm ≠ 0.
|
||||
// With flush: 0*0 + 0 = 0.
|
||||
assert_eq!(ctx.vr[2].as_f32x4(), [0.0f32; 4]);
|
||||
}
|
||||
|
||||
// ---- PPCBUG-424+425: vmaddfp128/vmaddcfp128 operand swap ----
|
||||
// ISA for both: (VD) <- (VA × VD) + VB. Previous code computed VA×VB+VD and VD×VB+VA.
|
||||
// Test uses distinct VA, VB, VD registers so the swap is visible.
|
||||
// Encoding: op6=5, key2=0b001101 (vmaddfp128) / 0b010001 (vmaddcfp128).
|
||||
// VA=v1=[2.0], VB=v2=[10.0], VD=v3=[3.0] → expected 2.0×3.0+10.0 = 16.0.
|
||||
// Buggy vmaddfp128: 2.0×10.0+3.0 = 23.0. Buggy vmaddcfp128: 3.0×10.0+2.0 = 32.0.
|
||||
|
||||
#[test]
|
||||
fn vmaddfp128_operand_order_va_times_vd_plus_vb() {
|
||||
let mut ctx = PpcContext::new();
|
||||
let mut mem = TestMem::new();
|
||||
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([2.0f32; 4]); // VA=v1
|
||||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([10.0f32; 4]); // VB=v2
|
||||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([3.0f32; 4]); // VD=v3 (also destination)
|
||||
// vmaddfp128 vD=v3, vA=v1, vB=v2 — op5, key2=0b001101 (bits22-25=3, bit27=1)
|
||||
let raw: u32 = (5u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (1 << 4);
|
||||
write_instr(&mut mem, 0, raw);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
assert_eq!(ctx.vr[3].as_f32x4(), [16.0f32; 4], "VA×VD+VB = 2*3+10 = 16");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vmaddcfp128_operand_order_va_times_vd_plus_vb() {
|
||||
let mut ctx = PpcContext::new();
|
||||
let mut mem = TestMem::new();
|
||||
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([2.0f32; 4]); // VA=v1
|
||||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([10.0f32; 4]); // VB=v2
|
||||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([3.0f32; 4]); // VD=v3
|
||||
// vmaddcfp128 vD=v3, vA=v1, vB=v2 — op5, key2=0b010001 (bits22-25=4, bit27=1)
|
||||
let raw: u32 = (5u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (4 << 6) | (1 << 4);
|
||||
write_instr(&mut mem, 0, raw);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
assert_eq!(ctx.vr[3].as_f32x4(), [16.0f32; 4], "VA×VD+VB = 2*3+10 = 16");
|
||||
}
|
||||
|
||||
/// VMX128 `vnmsubfp128 vD, vA, vB` (key2 = 0b010101). Canary
|
||||
/// `ppc_emit_altivec.cc:1133-1160` flushes all three inputs in the
|
||||
/// helper. Semantics: `vD <- -((vA * vB) - vD) = vD - vA*vB`.
|
||||
@@ -5798,6 +5834,59 @@ mod tests {
|
||||
assert_eq!(ctx.timebase >> 32, 0xAAAA_BBBB);
|
||||
}
|
||||
|
||||
// PPCBUG-053: bcx CTR zero-test must use 32-bit comparison. When prior
|
||||
// 64-bit pollution (e.g. via negx → mtctr) leaves CTR upper 32 bits
|
||||
// non-zero, the 64-bit `ctx.ctr != 0` would loop forever even when the
|
||||
// 32-bit counter has decremented to zero.
|
||||
#[test]
|
||||
fn bcx_bdnz_uses_32bit_ctr_compare() {
|
||||
let mut ctx = PpcContext::new();
|
||||
let mut mem = TestMem::new();
|
||||
ctx.ctr = 0x0000_0001_0000_0001;
|
||||
// bdnz +8: BO=16 (decrement, branch if CTR!=0, ignore CR), BI=0, BD/4=2
|
||||
let raw = (16u32 << 26) | (16 << 21) | (0 << 16) | (2 << 2);
|
||||
write_instr(&mut mem, 0, raw);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
// After decrement: low 32 = 0, high 32 = 1. 32-bit test says zero → no branch.
|
||||
assert_eq!(ctx.ctr, 0x0000_0001_0000_0000);
|
||||
assert_eq!(ctx.pc, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bclrx_uses_32bit_ctr_compare() {
|
||||
let mut ctx = PpcContext::new();
|
||||
let mut mem = TestMem::new();
|
||||
ctx.ctr = 0x0000_0001_0000_0001;
|
||||
ctx.lr = 0x100;
|
||||
// bdnzlr: opcode 19, BO=16 (decrement, branch if CTR!=0), BI=0, XO=16
|
||||
let raw = (19u32 << 26) | (16 << 21) | (0 << 16) | (16 << 1);
|
||||
write_instr(&mut mem, 0, raw);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
// 32-bit CTR=0 after decrement → don't branch to LR.
|
||||
assert_eq!(ctx.ctr, 0x0000_0001_0000_0000);
|
||||
assert_eq!(ctx.pc, 4);
|
||||
}
|
||||
|
||||
// PPCBUG-054: mtspr CTR must truncate the source GPR to 32 bits, matching
|
||||
// canary's `f.Truncate(ctr, INT32_TYPE)`. Prevents upstream 64-bit GPR
|
||||
// pollution from poisoning the 32-bit CTR counter independently of the
|
||||
// bcx zero-test fix.
|
||||
#[test]
|
||||
fn mtspr_ctr_truncates_to_32_bits() {
|
||||
let mut ctx = PpcContext::new();
|
||||
let mut mem = TestMem::new();
|
||||
ctx.gpr[3] = 0xFFFF_FFFF_8000_0001;
|
||||
// mtspr CTR (9), r3
|
||||
let spr_swapped = ((9u32 & 0x1F) << 5) | ((9u32 >> 5) & 0x1F);
|
||||
let raw = (31u32 << 26) | (3 << 21) | (spr_swapped << 11) | (467 << 1);
|
||||
write_instr(&mut mem, 0, raw);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
assert_eq!(ctx.ctr, 0x8000_0001);
|
||||
}
|
||||
|
||||
// ---------- Block-cache parity tests ----------
|
||||
//
|
||||
// These confirm that running a program through the basic-block
|
||||
@@ -6395,13 +6484,25 @@ mod tests {
|
||||
// For vpkd3d128, PPC bits 21-22 are always 1 (key2 discriminant), so
|
||||
// vd128 is always in range [96, 127] for vd_lo in [0, 31].
|
||||
|
||||
fn encode_vpkd3d128(vd_lo: u32, vb_lo: u32, imm: u32, z: u32) -> u32 {
|
||||
// op6=6 (all VMX128 compute ops); VD[4:0] at host 25-21; IMM at host 20-16;
|
||||
// VB[4:0] at host 15-11; host bits 10,9 = 1,1 (PPC bits 21,22, key2=0b110);
|
||||
// z (2-bit) at host 7-6 (PPC bits 24-25); host bit 4 = 1 (PPC bit 27, key2 low=0b01).
|
||||
// decode_op6 key2 = (bits21-23<<4)|(bits26-27) = (0b110<<4)|0b01 = 0b1100001.
|
||||
(6u32 << 26) | (vd_lo << 21) | (imm << 16) | (vb_lo << 11)
|
||||
| (1 << 10) | (1 << 9) | (z << 6) | (1 << 4)
|
||||
fn encode_vpkd3d128(vd: u32, vb_lo: u32, imm: u32, z: u32) -> u32 {
|
||||
// op6=6, FormatVX128_4 layout (canary):
|
||||
// VD low at PPC 6-10 (host 21-25); VD high (2 bits) at PPC 28-29 (host 2-3).
|
||||
// IMM at PPC 11-15; VB low at PPC 16-20.
|
||||
// z (2-bit) at PPC 24-25 (host 6-7).
|
||||
// key2 = 0b1100001 over bits 21-23 + 26-27:
|
||||
// bits 21-23 = 0b110 → bit 21=1, bit 22=1, bit 23=0
|
||||
// bits 26-27 = 0b01 → bit 26=0, bit 27=1
|
||||
let vd_lo = vd & 0x1F;
|
||||
let vd_hi = (vd >> 5) & 0x3;
|
||||
(6u32 << 26)
|
||||
| (vd_lo << 21)
|
||||
| (vd_hi << 2)
|
||||
| (imm << 16)
|
||||
| (vb_lo << 11)
|
||||
| (1 << 10) // bit 21 (key2)
|
||||
| (1 << 9) // bit 22 (key2)
|
||||
| (z << 6) // z at PPC 24-25
|
||||
| (1 << 4) // bit 27 (key2)
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -6415,7 +6516,7 @@ mod tests {
|
||||
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0);
|
||||
// prev vd=96: sentinel values that should NOT appear in result
|
||||
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD);
|
||||
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 0, 0));
|
||||
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 0, 0));
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mem);
|
||||
let r = ctx.vr[96].as_u32x4();
|
||||
@@ -6435,7 +6536,7 @@ mod tests {
|
||||
let mem = TestMem::new();
|
||||
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
|
||||
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
|
||||
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 0));
|
||||
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 0));
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mem);
|
||||
let r = ctx.vr[96].as_u32x4();
|
||||
@@ -6454,7 +6555,7 @@ mod tests {
|
||||
let mem = TestMem::new();
|
||||
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
|
||||
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
|
||||
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 3));
|
||||
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 3));
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mem);
|
||||
let r = ctx.vr[96].as_u32x4();
|
||||
@@ -6463,4 +6564,47 @@ mod tests {
|
||||
assert_eq!(r[2], 0x3333_3333, "pack=1 shift=3: lane 2 from prev");
|
||||
assert_eq!(r[3], 0x4444_4444, "pack=1 shift=3: lane 3 from prev");
|
||||
}
|
||||
|
||||
// ---- PPCBUG-510: stvewx128 should write one word (4 bytes), not 16 ----
|
||||
|
||||
fn encode_stvewx128(vs_lo: u32, ra: u32, rb: u32) -> u32 {
|
||||
// stvewx128 is a VMX128 load/store at op6=4.
|
||||
// decode_op4 key1 = (bits21-27 << 4) | bits30-31 = 0b00110000011 for stvewx128.
|
||||
// bits21-27 = 0b0011000 (host bits 10-4), bits30-31 = 0b11 (host bits 1-0).
|
||||
// VS128[4:0] at host bits 25-21; RA at host bits 20-16; RB at host bits 15-11.
|
||||
// VS128[5] at host bit 3 (PPC bit 28); VS128[6] at host bit 1 (PPC bit 30).
|
||||
(4u32 << 26)
|
||||
| (vs_lo << 21) // VS128[4:0]
|
||||
| (ra << 16) // RA
|
||||
| (rb << 11) // RB
|
||||
| (0b0011000 << 4) // bits 21-27 of key1 pattern
|
||||
| 0b11 // bits 30-31 of key1 pattern
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stvewx128_writes_one_word_at_word_aligned_ea() {
|
||||
// PPCBUG-510: old code wrote all 16 bytes at ea & !0xF, corrupting 12 adjacent bytes.
|
||||
// Fix: word-align EA, extract lane from (ea & 0xF) >> 2, write 4 bytes only.
|
||||
let mut ctx = PpcContext::new();
|
||||
let mem = TestMem::new();
|
||||
// VS128 = v96 (vs_lo=0 | key bits → vs128=0 since key bits 21-27 set bit4=1 and bit5=1
|
||||
// in the key, but vs128 uses bits 6-10 for low 5 bits).
|
||||
// Actually: vs128 uses decode bits 6-10 (host 25-21) and bits 21,22 (host 10,9).
|
||||
// encode_stvewx128 sets vs_lo in bits 25-21 and key bits at bits 10-4.
|
||||
// vs128 = bits6-10 | (bit21<<5) | (bit22<<6) = vs_lo | 0 | 0 = vs_lo.
|
||||
// So vs128 = vs_lo. We'll use vs_lo=3 → vs128=3.
|
||||
let raw = encode_stvewx128(3, 1, 2);
|
||||
ctx.vr[3] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
|
||||
ctx.gpr[1] = 0x1000; // base
|
||||
ctx.gpr[2] = 0x008; // offset → EA = 0x1008 → word-aligned EA = 0x1008, slot = (0x8 & 0xF)>>2 = 2
|
||||
write_instr(&mem, 0, raw);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mem);
|
||||
assert_eq!(ctx.pc, 4, "PC must advance");
|
||||
// Slot 2 → lane 2 = 0x3333_3333
|
||||
assert_eq!(mem.read_u32(0x1008), 0x3333_3333, "only lane 2 word at ea");
|
||||
// Adjacent words must be untouched (mem is zero-init)
|
||||
assert_eq!(mem.read_u32(0x1000), 0x0000_0000, "byte below must be untouched");
|
||||
assert_eq!(mem.read_u32(0x100C), 0x0000_0000, "byte above must be untouched");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,6 +50,23 @@ fn fixture_path(name: &str) -> PathBuf {
|
||||
.join(name)
|
||||
}
|
||||
|
||||
/// Encode a VMX128 VX128-form (or VX128_R/_2) instruction with canary's
|
||||
/// 7-bit register layout: VD low at PPC 6-10, high 2 bits at PPC 28-29;
|
||||
/// VA low at PPC 11-15, mid bit at PPC 26, high bit at PPC 21; VB low at
|
||||
/// PPC 16-20, high 2 bits at PPC 30-31. `secondary_bits` carries any
|
||||
/// secondary opcode + VC + Rc + key bits the caller needs.
|
||||
fn encode_vx128(op6: u32, vd: u32, va: u32, vb: u32, secondary_bits: u32) -> u32 {
|
||||
((op6 & 0x3F) << 26)
|
||||
| ((vd & 0x1F) << 21)
|
||||
| (((vd >> 5) & 0x3) << 2)
|
||||
| ((va & 0x1F) << 16)
|
||||
| (((va >> 5) & 0x1) << 5)
|
||||
| (((va >> 6) & 0x1) << 10)
|
||||
| ((vb & 0x1F) << 11)
|
||||
| (((vb >> 5) & 0x3) << 0)
|
||||
| secondary_bits
|
||||
}
|
||||
|
||||
fn build_rows(cases: &[(u32, u32, &str)]) -> Vec<GoldenRow> {
|
||||
cases
|
||||
.iter()
|
||||
@@ -428,77 +445,57 @@ fn vmx128_registers() {
|
||||
((4u32 << 26) | (5 << 11) | 1604, 0x82000000, "mtvscr v5"),
|
||||
];
|
||||
|
||||
// VMX128 op=5 — uses vd128/va128/vb128 (7-bit registers, high bits at
|
||||
// 21+22). These are the silent-bug-area encodings; we exercise low
|
||||
// register indices here because the secondary-opcode key for op=5
|
||||
// includes bits 21-22, constraining vd128 high bits to 0 in this form.
|
||||
// High-index examples for vd128 live in the op=6 series below.
|
||||
// VMX128 op=5: vperm128 v3, v4, v5, vc=0. Canary FormatVX128: VD low
|
||||
// at PPC 6-10, VA low at PPC 11-15, VB low at PPC 16-20, VC at PPC 23-25.
|
||||
// key1 = (bit22<<5)|bit27 = 0 selects vperm128.
|
||||
let vmx128_op5 = [
|
||||
// vaddfp128 v3, v4, v5 : op=5, key2=0b000001
|
||||
((5u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (0 << 6) | (1 << 0), 0x82000000, "vaddfp128 (encoded sloppily)"),
|
||||
(encode_vx128(5, 3, 4, 5, 0), 0x82000000, "vperm128 v3, v4, v5, 0 (canary)"),
|
||||
];
|
||||
|
||||
// VMX128 op=6 — vrlimi128 has secondary key in bits 23-25 + 26-27, so
|
||||
// bits 21-22 ARE the high bits of vd128 (canonical silent-bug-area).
|
||||
// These instructions exercise vd128 = 32, 64, 96 — covering the bit-21
|
||||
// and bit-22 split that ppc.rs's old extractor (now deleted) miscoded.
|
||||
let vrlimi128 = |vd: u32, vb: u32, imm: u32, z: u32| -> u32 {
|
||||
// op=6, vd128 = bits 6-10 + bit 21 + bit 22, vb128 = bits 16-20 + bits 30+31,
|
||||
// IMM = bits 11-15, Z = bits 24-25, key2 = (bits 23-25 << 4) | bits 26-27 = 0b1110001
|
||||
let vd_lo = vd & 0x1F;
|
||||
let vd_b21 = (vd >> 5) & 1;
|
||||
let vd_b22 = (vd >> 6) & 1;
|
||||
let vb_lo = vb & 0x1F;
|
||||
let vb_b30 = (vb >> 5) & 1;
|
||||
let vb_b31 = (vb >> 6) & 1;
|
||||
// bits 23-25 = 111, bits 26-27 = 00, bit 27 = 1 → key2 lower 4 bits = 0001
|
||||
// Encoded: bits 23-25 = 111, bits 26-27 = 00 are actually overlapping with z field (bits 24-25)
|
||||
// The plan view: (bits 23 << 6) | (bits 24-25 << 4) | (bits 26-27 << 2) but the table uses different.
|
||||
// Easiest: hand-encode known bit pattern matching decoder.rs's match:
|
||||
// key2 = (extract_bits(code, 23, 25) << 4) | extract_bits(code, 26, 27) = 0b1110001
|
||||
// bits 23-25 = 111, bits 26-27 = 01
|
||||
// Bit positions 23-27 = 11101 (5 bits, MSB at 23).
|
||||
// PPC bit 23 (LSB index 8): set
|
||||
// PPC bit 24 (LSB index 7): set -- this is z bit 0
|
||||
// PPC bit 25 (LSB index 6): set -- this is z bit 1
|
||||
// PPC bit 26 (LSB index 5): unset
|
||||
// PPC bit 27 (LSB index 4): set
|
||||
// We let z = bits 24-25 stored with vd128 bits at 21-22.
|
||||
// To preserve key2 = 0b1110001, we need bits 24-25 = 11, bit 26 = 0, bit 27 = 1.
|
||||
// BUT bits 24-25 ARE the z field; if we set them = 11 the z value is 3.
|
||||
// So Z is constrained for vrlimi128. Choose Z = 3 (matches Sylpheed examples).
|
||||
let z3 = z & 0x3;
|
||||
(6u32 << 26)
|
||||
| (vd_lo << 21)
|
||||
| (imm << 16)
|
||||
| (vb_lo << 11)
|
||||
| (vd_b21 << 10) // bit 21 (LSB pos 10)
|
||||
| (vd_b22 << 9) // bit 22 (LSB pos 9)
|
||||
| (1 << 8) // bit 23
|
||||
| (z3 << 6) // bits 24-25
|
||||
| (0 << 5) // bit 26
|
||||
| (1 << 4) // bit 27
|
||||
| (vb_b30 << 1) // bit 30
|
||||
| vb_b31 // bit 31
|
||||
// VMX128 op=6 — exercise full 0-127 vd128 range under canary's layout.
|
||||
// VD128h is at PPC 28-29 (host 2-3): no overlap with secondary opcode key,
|
||||
// so vd can be freely 0-127 for any op6 instruction.
|
||||
let vsrw128 = |vd: u32, vb: u32| -> u32 {
|
||||
// vsrw128 secondary: 0x000001D0 (decode_op6 key5 = 0b011101).
|
||||
encode_vx128(6, vd, 0, vb, 0x000001D0)
|
||||
};
|
||||
let vpermwi128 = |vd: u32, vb: u32, perm: u32| -> u32 {
|
||||
// vpermwi128: PERMl at PPC 11-15, PERMh at PPC 23-25, key1 sets bit 22 + bit 27.
|
||||
let perml = perm & 0x1F;
|
||||
let permh = (perm >> 5) & 0x7;
|
||||
let mut raw = (6u32 << 26)
|
||||
| ((vd & 0x1F) << 21)
|
||||
| (((vd >> 5) & 0x3) << 2) // VD128h
|
||||
| (perml << 16)
|
||||
| ((vb & 0x1F) << 11)
|
||||
| (((vb >> 5) & 0x3) << 0) // VB128h
|
||||
| (permh << 6) // PERMh at PPC 23-25
|
||||
| (1 << 9) // bit 22 (key1 high)
|
||||
| (1 << 4); // bit 27 (key1 low)
|
||||
raw &= !(1 << 10); // PPC 21 = 0 for vpermwi128
|
||||
raw
|
||||
};
|
||||
let vrlimi128 = |vd: u32, vb: u32, imm: u32, z: u32| -> u32 {
|
||||
// vrlimi128: IMM at PPC 11-15, z at PPC 24-25, key2 = 0b1110001 over
|
||||
// bits 21-23 + 26-27 → bits 21,22,23 = 1, bit 26 = 0, bit 27 = 1.
|
||||
(6u32 << 26)
|
||||
| ((vd & 0x1F) << 21)
|
||||
| (((vd >> 5) & 0x3) << 2) // VD128h
|
||||
| ((imm & 0x1F) << 16)
|
||||
| ((vb & 0x1F) << 11)
|
||||
| (((vb >> 5) & 0x3) << 0) // VB128h
|
||||
| ((z & 0x3) << 6) // z at PPC 24-25 = host 6-7
|
||||
| (1 << 8) // bit 23 (key2)
|
||||
| (1 << 9) // bit 22 (key2)
|
||||
| (1 << 10) // bit 21 (key2)
|
||||
| (1 << 4) // bit 27 (key2)
|
||||
};
|
||||
// Note: VMX128 op6 secondary keys constrain bits 21-23. For
|
||||
// vrlimi128 (key2 = 0b1110001 over bits 21-23 + 26-27) the only
|
||||
// valid vd128 range is 96..=127 — lower values change the secondary
|
||||
// key into some other instruction. The cases below record what the
|
||||
// disassembler emits for the borderline encodings, so a regression
|
||||
// in either the lookup table or the formatter would surface here.
|
||||
let vmx128_high = [
|
||||
// bits 21-22 = 00 → key2 ≠ vrlimi128 → decodes to vsrw128 (key5
|
||||
// branch). Locks current behavior; shows the silent-bug-area
|
||||
// encoding constraint.
|
||||
(vrlimi128(0, 12, 4, 3), 0x82000000, "encoding vd_hi=00: actually vsrw128"),
|
||||
// bits 21-22 = 10 → still not vrlimi128.
|
||||
(vrlimi128(32, 12, 4, 3), 0x82000000, "encoding vd_hi=10: actually vsrw128 v32"),
|
||||
// bits 21-22 = 01 → key1 matches vpermwi128.
|
||||
(vrlimi128(64, 12, 4, 3), 0x82000000, "encoding vd_hi=01: actually vpermwi128"),
|
||||
// bits 21-22 = 11 → key2 matches vrlimi128 with vd128=96.
|
||||
(vrlimi128(96, 12, 4, 3), 0x82000000, "vrlimi128 v96, v12, 4, 3 (real)"),
|
||||
(vrlimi128(127, 127, 4, 3), 0x82000000, "vrlimi128 v127, v127, 4, 3 (real)"),
|
||||
(vsrw128(0, 12), 0x82000000, "vsrw128 v0, v0, v12 (canary, vd_hi=00)"),
|
||||
(vsrw128(32, 12), 0x82000000, "vsrw128 v32, v0, v12 (canary, VD128h=01)"),
|
||||
(vpermwi128(64, 12, 0xE4), 0x82000000, "vpermwi128 v64, v12, 0xE4 (canary, VD128h=10)"),
|
||||
(vrlimi128(96, 12, 4, 3), 0x82000000, "vrlimi128 v96, v12, 4, 3 (canary, VD128h=11)"),
|
||||
(vrlimi128(127, 95, 4, 3), 0x82000000, "vrlimi128 v127, v95, 4, 3 (canary)"),
|
||||
];
|
||||
|
||||
// Fix 4: VMX128 multiply-add 4-operand layouts. Per canary, the addend
|
||||
@@ -514,12 +511,11 @@ fn vmx128_registers() {
|
||||
// vmaddcfp128 VD, VA, VD, VB → "v3, v35, v3, v5"
|
||||
// vnmsubfp128 VD, VA, VD, VB → "v3, v35, v3, v5"
|
||||
let vmx128_4op = [
|
||||
// vmaddfp128: bits 24=1, 25=1, 27=1, bit 29=1 (VA high), VB=5
|
||||
(0x146028D4u32, 0x82000000, "vmaddfp128 v3, v35, v5, v3"),
|
||||
// vmaddcfp128: bits 23=1, 27=1, bit 29=1, VB=5
|
||||
(0x14602914u32, 0x82000000, "vmaddcfp128 v3, v35, v3, v5"),
|
||||
// vnmsubfp128: bits 23=1, 25=1, 27=1, bit 29=1, VB=5
|
||||
(0x14602954u32, 0x82000000, "vnmsubfp128 v3, v35, v3, v5"),
|
||||
// Canary FormatVX128 layout: vd=3 (PPC 6-10), va=35 (low 3 at PPC 11-15 + VA128h=1 at PPC 26),
|
||||
// vb=5 (PPC 16-20), key2 at PPC 22-25 + bit 27.
|
||||
(0x146328F0u32, 0x82000000, "vmaddfp128 v3, v35, v5, v3"),
|
||||
(0x14632930u32, 0x82000000, "vmaddcfp128 v3, v35, v3, v5"),
|
||||
(0x14632970u32, 0x82000000, "vnmsubfp128 v3, v35, v3, v5"),
|
||||
];
|
||||
|
||||
let mut all = Vec::new();
|
||||
|
||||
@@ -366,7 +366,7 @@
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "16, lt, 0x82000040",
|
||||
"ext_mnemonic": "bdnzge",
|
||||
"ext_mnemonic": "bdnz",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
@@ -376,7 +376,7 @@
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "bc",
|
||||
"operands": "18, lt, 0x82000040",
|
||||
"ext_mnemonic": "bdzge",
|
||||
"ext_mnemonic": "bdz",
|
||||
"ext_operands": "0x82000040",
|
||||
"branch_target": "0x82000040"
|
||||
},
|
||||
@@ -525,7 +525,9 @@
|
||||
"raw": "0x7C2004AC",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "sync",
|
||||
"operands": ""
|
||||
"operands": "",
|
||||
"ext_mnemonic": "lwsync",
|
||||
"ext_operands": ""
|
||||
},
|
||||
{
|
||||
"label": "trap",
|
||||
|
||||
@@ -71,64 +71,64 @@
|
||||
"operands": "v5"
|
||||
},
|
||||
{
|
||||
"label": "vaddfp128 (encoded sloppily)",
|
||||
"raw": "0x14642801",
|
||||
"label": "vperm128 v3, v4, v5, 0 (canary)",
|
||||
"raw": "0x14642800",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vperm128",
|
||||
"operands": "v3, v3, v5, 0"
|
||||
"operands": "v3, v4, v5, 0"
|
||||
},
|
||||
{
|
||||
"label": "encoding vd_hi=00: actually vsrw128",
|
||||
"raw": "0x180461D0",
|
||||
"label": "vsrw128 v0, v0, v12 (canary, vd_hi=00)",
|
||||
"raw": "0x180061D0",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vsrw128",
|
||||
"operands": "v0, v0, v12"
|
||||
},
|
||||
{
|
||||
"label": "encoding vd_hi=10: actually vsrw128 v32",
|
||||
"raw": "0x180465D0",
|
||||
"label": "vsrw128 v32, v0, v12 (canary, VD128h=01)",
|
||||
"raw": "0x180061D4",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vsrw128",
|
||||
"operands": "v32, v0, v12"
|
||||
},
|
||||
{
|
||||
"label": "encoding vd_hi=01: actually vpermwi128",
|
||||
"raw": "0x180463D0",
|
||||
"label": "vpermwi128 v64, v12, 0xE4 (canary, VD128h=10)",
|
||||
"raw": "0x180463D8",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vpermwi128",
|
||||
"operands": "v64, v12, 0xE4"
|
||||
},
|
||||
{
|
||||
"label": "vrlimi128 v96, v12, 4, 3 (real)",
|
||||
"raw": "0x180467D0",
|
||||
"label": "vrlimi128 v96, v12, 4, 3 (canary, VD128h=11)",
|
||||
"raw": "0x180467DC",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vrlimi128",
|
||||
"operands": "v96, v12, 4, 3"
|
||||
},
|
||||
{
|
||||
"label": "vrlimi128 v127, v127, 4, 3 (real)",
|
||||
"raw": "0x1BE4FFD3",
|
||||
"label": "vrlimi128 v127, v95, 4, 3 (canary)",
|
||||
"raw": "0x1BE4FFDE",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vrlimi128",
|
||||
"operands": "v127, v95, 4, 3"
|
||||
},
|
||||
{
|
||||
"label": "vmaddfp128 v3, v35, v5, v3",
|
||||
"raw": "0x146028D4",
|
||||
"raw": "0x146328F0",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vmaddfp128",
|
||||
"operands": "v3, v35, v5, v3"
|
||||
},
|
||||
{
|
||||
"label": "vmaddcfp128 v3, v35, v3, v5",
|
||||
"raw": "0x14602914",
|
||||
"raw": "0x14632930",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vmaddcfp128",
|
||||
"operands": "v3, v35, v3, v5"
|
||||
},
|
||||
{
|
||||
"label": "vnmsubfp128 v3, v35, v3, v5",
|
||||
"raw": "0x14602954",
|
||||
"raw": "0x14632970",
|
||||
"addr": "0x82000000",
|
||||
"mnemonic": "vnmsubfp128",
|
||||
"operands": "v3, v35, v3, v5"
|
||||
|
||||
Reference in New Issue
Block a user