merge(cpu): Phase 2 decoder sweep — PPCBUG-040,046,275,276,315,360,361,362,363,369,420,421,422,423,560,561,562,563,564,565,600

All 8 batches of the P2 decoder/field-extraction sweep applied and reviewed.

Batch 1: PPCBUG-040+560 — sh64() bit order fix and rldicl test helper encoding
Batch 2: PPCBUG-046+561 — mb_md() accessor; all 6 rld* mb fields corrected
Batch 3: PPCBUG-275+276+420+421+422+423+562+600 — vc_rc_bit()/vx128r_rc_bit() Rc accessors; 13 vcmp dot-form sites; 5 decode_op6 dot-form entries
Batch 4: PPCBUG-315+563 — vrlimi128 vx128_4_z and vx128_4_imm field extraction
Batch 5: PPCBUG-361+565 — vsldoi128 vx128_5_sh field extraction
Batch 6: PPCBUG-362+564 — vpermwi128 vx128_p_perm field extraction
Batch 7: PPCBUG-360 — vperm128 vc128_2() accessor (was wrongly using vd128())
Batch 8: PPCBUG-363+369 — vpkd3d128 post-pack permutation (MakePermuteMask tables from canary)

All 201 interpreter + 6 disasm golden tests pass. Independent code review: all 9 check items OK.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 22:09:38 +02:00
4 changed files with 410 additions and 45 deletions

View File

@@ -74,6 +74,16 @@ impl DecodedInstr {
/// Rc bit (bit 31) - record CR0
#[inline] pub fn rc_bit(&self) -> bool { self.raw & 1 != 0 }
/// Rc for VC-form vector compare instructions — PPC bit 21 = host bit 10.
#[inline] pub fn vc_rc_bit(&self) -> bool { (self.raw >> 10) & 1 != 0 }
/// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4.
#[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 }
/// IMM field for VX128_4-form instructions (vrlimi128) — 5-bit blend mask at PPC bits 11-15.
#[inline] pub fn vx128_4_imm(&self) -> u32 { extract_bits(self.raw, 11, 15) }
/// z field for VX128_4-form instructions (vrlimi128) — 2-bit rotation index at PPC bits 24-25.
#[inline] pub fn vx128_4_z(&self) -> u32 { extract_bits(self.raw, 24, 25) }
/// OE bit (bit 21) - overflow enable
#[inline] pub fn oe(&self) -> bool { extract_bits(self.raw, 21, 21) != 0 }
@@ -89,7 +99,13 @@ impl DecodedInstr {
/// SH field for 64-bit shifts (bits 16-20 + bit 30)
#[inline] pub fn sh64(&self) -> u32 {
(extract_bits(self.raw, 16, 20) << 1) | extract_bits(self.raw, 30, 30)
(extract_bits(self.raw, 30, 30) << 5) | extract_bits(self.raw, 16, 20)
}
/// MB/ME field for MD-form and MDS-form instructions (6-bit field, split encoding).
/// MB[4:0] at PPC bits 21-25; MB[5] at PPC bit 26.
#[inline] pub fn mb_md(&self) -> u32 {
extract_bits(self.raw, 21, 25) | (extract_bits(self.raw, 26, 26) << 5)
}
/// SPR field (bits 11-20, swapped halves)
@@ -141,8 +157,20 @@ impl DecodedInstr {
/// VS128 - same encoding as VD128
#[inline] pub fn vs128(&self) -> usize { self.vd128() }
/// VC register for VX128_2-form instructions (vperm128) — 3-bit at PPC bits 23-25.
#[inline] pub fn vc128_2(&self) -> usize { extract_bits(self.raw, 23, 25) as usize }
/// NB field (bits 16-20) for lswi/stswi
#[inline] pub fn nb(&self) -> u32 { extract_bits(self.raw, 16, 20) }
/// PERM field for VX128_P-form instructions (vpermwi128) — 8-bit split encoding.
/// PERMl (5 bits) at PPC bits 11-15; PERMh (3 bits) at PPC bits 23-25.
#[inline] pub fn vx128_p_perm(&self) -> u32 {
extract_bits(self.raw, 11, 15) | (extract_bits(self.raw, 23, 25) << 5)
}
/// SH field for VX128_5-form instructions (vsldoi128) — 4-bit shift at PPC bits 22-25.
#[inline] pub fn vx128_5_sh(&self) -> u32 { extract_bits(self.raw, 22, 25) }
}
/// Extract the 5-bit `UIMM` (`VX128_3`) / `IMM` (`VX128_4`) field. Canary
@@ -636,9 +664,13 @@ fn decode_op6(code: u32) -> PpcOpcode {
_ => {}
}
// VMX128 compare
let key4 = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27);
match key4 {
// VMX128 compare (non-dot and dot forms).
// Non-dot: bit 27 = 0. Dot: bit 27 = 1, but bit 25 must also be 0 to
// distinguish from the shift/merge group (which has bit 25 = 1 when bit 27 = 1).
// key4_nd uses bits 22-24 + bit 27 (same as original, covers non-dot).
// key4_dt uses bits 22-24 + bit 25 + bit 27 (narrower, covers dot-only).
let key4_nd = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27);
match key4_nd {
0b000000 => return PpcOpcode::vcmpeqfp128,
0b001000 => return PpcOpcode::vcmpgefp128,
0b010000 => return PpcOpcode::vcmpgtfp128,
@@ -646,6 +678,16 @@ fn decode_op6(code: u32) -> PpcOpcode {
0b100000 => return PpcOpcode::vcmpequw128,
_ => {}
}
// Dot forms: bit 27 = 1, bit 25 = 0 (key = bits22-24 + bit25 + bit27, low 3 bits)
let key4_dt = (extract_bits(code, 22, 24) << 2) | (extract_bits(code, 25, 25) << 1) | extract_bits(code, 27, 27);
match key4_dt {
0b00001 => return PpcOpcode::vcmpeqfp128, // bits22-24=000, bit25=0, bit27=1
0b00101 => return PpcOpcode::vcmpgefp128, // bits22-24=001, bit25=0, bit27=1
0b01001 => return PpcOpcode::vcmpgtfp128, // bits22-24=010, bit25=0, bit27=1
0b01101 => return PpcOpcode::vcmpbfp128, // bits22-24=011, bit25=0, bit27=1
0b10001 => return PpcOpcode::vcmpequw128, // bits22-24=100, bit25=0, bit27=1
_ => {}
}
// VMX128 shift/merge
let key5 = (extract_bits(code, 22, 25) << 2) | extract_bits(code, 27, 27);
@@ -1104,4 +1146,93 @@ mod tests {
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vd128(), 5 | 32 | 64);
}
#[test]
fn vx128_5_sh_bit_positions() {
// SH=8 (binary 1000): bit 3 = 1, bits 0-2 = 0.
// Host bit 9 = 1 (PPC bit 22), host bits 6-8 = 0.
// So raw bit 9 set = raw |= 1 << 9 = 0x200
let raw = 0x200u32; // host bit 9 set only
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_5_sh(), 8, "SH=8: MSB at PPC bit 22");
// SH=1 (binary 0001): host bit 6 set = raw |= 1 << 6 = 0x40
let raw = 0x40u32;
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_5_sh(), 1, "SH=1: LSB at PPC bit 25");
// SH=15 (binary 1111): host bits 6-9 all set = raw |= 0xF << 6 = 0x3C0
let raw = 0x3C0u32;
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_5_sh(), 15, "SH=15: all 4 bits set");
// SH=0: raw=0
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 };
assert_eq!(d.vx128_5_sh(), 0, "SH=0");
}
#[test]
fn vx128_4_accessors_correct_bit_positions() {
// z=3 (binary 11) at PPC bits 24-25 = host bits 6-7
let raw = 0b11u32 << 6;
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_4_z(), 3, "z=3 from host bits 6-7");
// IMM=0x15 (binary 10101) at PPC bits 11-15 = host bits 16-20
let raw2 = 0x15u32 << 16;
let d2 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw2, addr: 0 };
assert_eq!(d2.vx128_4_imm(), 0x15, "IMM=0x15 from host bits 16-20");
// Combined: z=1, IMM=0xA — fields must not bleed into each other
let raw3 = (0x1u32 << 6) | (0xAu32 << 16);
let d3 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw3, addr: 0 };
assert_eq!(d3.vx128_4_z(), 1, "z=1 combined");
assert_eq!(d3.vx128_4_imm(), 0xA, "IMM=0xA combined");
// z=2, IMM=0xF — max 4-bit blend mask, exercises the full lower nibble
let raw4 = (0b10u32 << 6) | (0xFu32 << 16);
let d4 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw4, addr: 0 };
assert_eq!(d4.vx128_4_z(), 2, "z=2 from binary 10");
assert_eq!(d4.vx128_4_imm(), 0xF, "IMM=0xF all-ones nibble");
}
#[test]
fn vc128_2_extracts_ppc_bits_23_25() {
// VC=5 (binary 101) at PPC bits 23-25 = host bits 6-8
// extract_bits(raw, 23, 25) = (raw >> (31-25)) & 0x7 = (raw >> 6) & 0x7
let raw = 5u32 << 6; // host bits 6-8 = 5
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vc128_2(), 5);
let d0 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 };
assert_eq!(d0.vc128_2(), 0);
let d7 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 7u32 << 6, addr: 0 };
assert_eq!(d7.vc128_2(), 7);
let d1 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 1u32 << 6, addr: 0 };
assert_eq!(d1.vc128_2(), 1);
}
#[test]
fn vx128_p_perm_assembles_correctly() {
// PERMl=0x1F (all 5 bits set) at host bits 16-20: raw = 0x1F << 16
let raw = 0x1Fu32 << 16;
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_p_perm(), 0x1F, "PERMl only");
// PERMh=0x7 (all 3 bits set) at host bits 6-8: raw = 0x7 << 6 = 0x1C0
let raw = 0x7u32 << 6;
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_p_perm(), 0x7 << 5, "PERMh only: bits 5-7");
// PERMl=0xA, PERMh=0x5: raw = (0xA << 16) | (0x5 << 6)
let raw = (0xAu32 << 16) | (0x5u32 << 6);
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_p_perm(), 0xA | (0x5 << 5));
// PERMl and PERMh bits must not bleed into each other
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 };
assert_eq!(d.vx128_p_perm(), 0);
}
}

View File

@@ -693,7 +693,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::rldiclx => {
let rs = ctx.gpr[instr.rs()];
let sh = instr.sh64();
let mb = (instr.mb() << 1) | ((instr.raw >> 1) & 1); // 6-bit mb
let mb = instr.mb_md();
let rotated = rs.rotate_left(sh);
let mask = rld_mask_left(mb);
ctx.gpr[instr.ra()] = rotated & mask;
@@ -703,7 +703,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::rldicrx => {
let rs = ctx.gpr[instr.rs()];
let sh = instr.sh64();
let me = (instr.mb() << 1) | ((instr.raw >> 1) & 1); // 6-bit me
let me = instr.mb_md();
let rotated = rs.rotate_left(sh);
let mask = rld_mask_right(me);
ctx.gpr[instr.ra()] = rotated & mask;
@@ -713,7 +713,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::rldicx => {
let rs = ctx.gpr[instr.rs()];
let sh = instr.sh64();
let mb = (instr.mb() << 1) | ((instr.raw >> 1) & 1);
let mb = instr.mb_md();
let rotated = rs.rotate_left(sh);
let mask = rld_mask_left(mb) & rld_mask_right(63 - sh);
ctx.gpr[instr.ra()] = rotated & mask;
@@ -723,7 +723,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::rldimix => {
let rs = ctx.gpr[instr.rs()];
let sh = instr.sh64();
let mb = (instr.mb() << 1) | ((instr.raw >> 1) & 1);
let mb = instr.mb_md();
let rotated = rs.rotate_left(sh);
let mask = rld_mask_left(mb) & rld_mask_right(63 - sh);
ctx.gpr[instr.ra()] = (rotated & mask) | (ctx.gpr[instr.ra()] & !mask);
@@ -733,7 +733,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::rldclx => {
let rs = ctx.gpr[instr.rs()];
let sh = ctx.gpr[instr.rb()] & 0x3F;
let mb = (instr.mb() << 1) | ((instr.raw >> 1) & 1);
let mb = instr.mb_md();
let rotated = rs.rotate_left(sh as u32);
let mask = rld_mask_left(mb);
ctx.gpr[instr.ra()] = rotated & mask;
@@ -743,7 +743,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::rldcrx => {
let rs = ctx.gpr[instr.rs()];
let sh = ctx.gpr[instr.rb()] & 0x3F;
let me = (instr.mb() << 1) | ((instr.raw >> 1) & 1);
let me = instr.mb_md();
let rotated = rs.rotate_left(sh as u32);
let mask = rld_mask_right(me);
ctx.gpr[instr.ra()] = rotated & mask;
@@ -2036,7 +2036,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); }
let rc = if matches!(instr.opcode, PpcOpcode::vcmpeqfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
if rc { update_cr6_from_vmask(&r, ctx); }
ctx.pc += 4;
}
PpcOpcode::vcmpgefp | PpcOpcode::vcmpgefp128 => {
@@ -2046,7 +2047,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] >= b[i] { 0xFFFF_FFFF } else { 0 }; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); }
let rc = if matches!(instr.opcode, PpcOpcode::vcmpgefp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
if rc { update_cr6_from_vmask(&r, ctx); }
ctx.pc += 4;
}
PpcOpcode::vcmpgtfp | PpcOpcode::vcmpgtfp128 => {
@@ -2056,7 +2058,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFF_FFFF } else { 0 }; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); }
let rc = if matches!(instr.opcode, PpcOpcode::vcmpgtfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
if rc { update_cr6_from_vmask(&r, ctx); }
ctx.pc += 4;
}
@@ -2138,9 +2141,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
va = instr.va128();
vb = instr.vb128();
vd = instr.vd128();
// For vperm128, the permutation control is in vC (third source)
// which is typically encoded via a different field
vc = instr.vd128(); // vperm128 uses vD as permute mask
vc = instr.vc128_2();
} else {
va = instr.ra();
vb = instr.rb();
@@ -2173,7 +2174,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::vsldoi128 => {
let a_bytes = ctx.vr[instr.va128()].as_bytes();
let b_bytes = ctx.vr[instr.vb128()].as_bytes();
let sh = ((instr.raw >> 6) & 0x7) as usize | (((instr.raw >> 4) & 0x1) as usize) << 3; // extract shift
let sh = instr.vx128_5_sh() as usize;
let mut concat = [0u8; 32];
concat[..16].copy_from_slice(&a_bytes);
concat[16..].copy_from_slice(&b_bytes);
@@ -2398,7 +2399,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); }
let rc = if matches!(instr.opcode, PpcOpcode::vcmpequw128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
if rc { update_cr6_from_vmask(&r, ctx); }
ctx.pc += 4;
}
@@ -3528,7 +3530,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u8; 16];
for i in 0..16 { r[i] = if a[i] == b[i] { 0xFF } else { 0 }; }
let v = xenia_types::Vec128::from_bytes(r);
if instr.rc_bit() {
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
@@ -3541,7 +3543,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u16; 8];
for i in 0..8 { r[i] = if a[i] == b[i] { 0xFFFF } else { 0 }; }
let v = xenia_types::Vec128::from_u16x8_array(r);
if instr.rc_bit() {
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
@@ -3554,7 +3556,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u8; 16];
for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; }
let v = xenia_types::Vec128::from_bytes(r);
if instr.rc_bit() {
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
@@ -3567,7 +3569,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u8; 16];
for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; }
let v = xenia_types::Vec128::from_bytes(r);
if instr.rc_bit() {
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
@@ -3580,7 +3582,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u16; 8];
for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; }
let v = xenia_types::Vec128::from_u16x8_array(r);
if instr.rc_bit() {
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
@@ -3593,7 +3595,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u16; 8];
for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; }
let v = xenia_types::Vec128::from_u16x8_array(r);
if instr.rc_bit() {
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
@@ -3606,7 +3608,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; }
let v = xenia_types::Vec128::from_u32x4_array(r);
if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); }
if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); }
ctx.vr[instr.rd()] = v;
ctx.pc += 4;
}
@@ -3616,7 +3618,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; }
let v = xenia_types::Vec128::from_u32x4_array(r);
if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); }
if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); }
ctx.vr[instr.rd()] = v;
ctx.pc += 4;
}
@@ -3638,7 +3640,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
if a[i].is_nan() || b[i].is_nan() || a[i] < -b[i] { lane |= 0x4000_0000; any_out = true; }
r[i] = lane;
}
if instr.rc_bit() {
let rc = if is_128 { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
if rc {
ctx.cr[6] = crate::context::CrField {
lt: false, gt: false, eq: !any_out, so: false,
};
@@ -3761,8 +3764,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
// use rotated[N]). Titles generally use mask=0xF (copy-all) which
// makes this behave like a plain word rotate.
PpcOpcode::vrlimi128 => {
let shift = ((instr.raw >> 16) & 0x3) as usize;
let mask = (instr.raw >> 2) & 0xF; // VX128_4 "fmask"
let shift = instr.vx128_4_z() as usize;
let mask = instr.vx128_4_imm();
let b = ctx.vr[instr.vb128()].as_u32x4();
let d = ctx.vr[instr.vd128()].as_u32x4();
let rot = [b[shift % 4], b[(shift + 1) % 4], b[(shift + 2) % 4], b[(shift + 3) % 4]];
@@ -3988,14 +3991,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
// position) and masked to only 3 bits. Canary extracts from the
// VX128_3/4 `IMM` field at PPC bits 16-22 (MSB) and does
// `type = IMM >> 2` to pick up the 5-bit type selector — the low
// 2 bits (`pack`) select output-slot layout for `vpkd3d128` and
// are ignored by `vupkd3d128`. Extracting the low 2 bits as
// `pack` (unused here — we hand back the codec output in its
// canonical lane position, the subsequent permute instruction
// handles placement) for completeness.
// 2 bits (`pack`) select output-slot layout for `vpkd3d128`.
PpcOpcode::vpkd3d128 => {
use crate::vmx::D3dPackType;
let uimm = crate::decoder::extract_vx128_uimm5(instr.raw);
let pack = (uimm & 3) as usize;
let shift = instr.vx128_4_z() as usize;
let ty = D3dPackType::from_immediate(uimm >> 2);
let src = ctx.vr[instr.vb128()];
let out = match ty {
@@ -4017,7 +4018,36 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
src
}
};
ctx.vr[instr.vd128()] = out;
// Post-pack permutation: merge packed `out` into previous `vd`
// per canary ppc_emit_altivec.cc:2126-2188 MakePermuteMask tables.
// MakePermuteMask(r0,l0, r1,l1, r2,l2, r3,l3): result[i] = if ri==0 { prev[li] } else { out[li] }
let result = if pack == 0 {
out
} else {
// (source_reg, lane): 0=prev vd, 1=packed out
const PERM: [[[(u8, u8); 4]; 4]; 3] = [
// pack=1 (VPACK_32): places out[3] at lane (3-shift)
[[(0,0),(0,1),(0,2),(1,3)], [(0,0),(0,1),(1,3),(0,3)],
[(0,0),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]],
// pack=2 (64-bit): places out[2..3] at lanes (2-shift)..(3-shift)
[[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)],
[(1,2),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]],
// pack=3 (64-bit): same as pack=2 except shift=3 selects out[2] at lane 3
[[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)],
[(1,2),(1,3),(0,2),(0,3)], [(0,0),(0,1),(0,2),(1,2)]],
];
let prev = ctx.vr[instr.vd128()];
let pw = prev.as_u32x4();
let ow = out.as_u32x4();
let sel = PERM[pack - 1][shift];
xenia_types::Vec128::from_u32x4_array([
if sel[0].0 == 0 { pw[sel[0].1 as usize] } else { ow[sel[0].1 as usize] },
if sel[1].0 == 0 { pw[sel[1].1 as usize] } else { ow[sel[1].1 as usize] },
if sel[2].0 == 0 { pw[sel[2].1 as usize] } else { ow[sel[2].1 as usize] },
if sel[3].0 == 0 { pw[sel[3].1 as usize] } else { ow[sel[3].1 as usize] },
])
};
ctx.vr[instr.vd128()] = result;
ctx.pc += 4;
}
PpcOpcode::vupkd3d128 => {
@@ -4299,7 +4329,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
}
// vpermwi128: permute words of vB using an 8-bit immediate (2 bits per output lane).
PpcOpcode::vpermwi128 => {
let imm = (instr.raw >> 16) & 0xFF;
let imm = instr.vx128_p_perm();
let b = ctx.vr[instr.vb128()].as_u32x4();
let mut r = [0u32; 4];
// Output lane i ← b[(imm >> (2 * (3-i))) & 3]
@@ -6252,4 +6282,185 @@ mod tests {
expected[4] = 0xAB;
assert_eq!(ctx.vr[3].as_bytes(), expected);
}
// ===== PPCBUG-046 / PPCBUG-561: rldicl / clrldi mb_md fix =====
/// Encode rldicl (MD-form, opcode=30, XO=0) in host bit notation.
/// rs: source register, ra: dest register, sh: shift amount (6-bit),
/// mb: mask-begin (6-bit), rc: record bit.
fn encode_rldicl(rs: u32, ra: u32, sh: u32, mb: u32, rc: u32) -> u32 {
(30 << 26)
| (rs << 21)
| (ra << 16)
| ((sh & 0x1F) << 11)
| ((mb & 0x1F) << 6)
| (((mb >> 5) & 1) << 5)
| (((sh >> 5) & 1) << 1)
| (rc & 1)
}
#[test]
fn clrldi_zero_extends_low_32_bits() {
// clrldi r3, r4, 32 = rldicl r3, r4, 0, 32, 0
// After PPCBUG-046 fix: mask must be 0x00000000_FFFFFFFF (mb=32 → mask from bit 32 to 63)
// If mb=32 was decoded as mb=0, the mask would be all-ones and the result would be 0xDEAD_BEEF_CAFE_BABE (no-op)
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[4] = 0xDEAD_BEEF_CAFE_BABE_u64;
let raw = encode_rldicl(4, 3, 0, 32, 0); // sh=0, mb=32
write_instr(&mem, 0x100, raw);
ctx.pc = 0x100;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[3], 0x0000_0000_CAFE_BABE, "clrldi must zero-extend low 32 bits");
}
#[test]
fn rldicl_mb32_leaves_low_32_clean() {
// Same as above but verify upper 32 are zeroed
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[5] = 0xFFFF_FFFF_1234_5678_u64;
let raw = encode_rldicl(5, 6, 0, 32, 0);
write_instr(&mem, 0x100, raw);
ctx.pc = 0x100;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[6], 0x0000_0000_1234_5678_u64);
}
// ===== PPCBUG-275/276/562: vc_rc_bit fix for VC-form vcmpequb =====
/// VC-form: opcode=4 (VMX), vD at 6-10, vA at 11-15, vB at 16-20, Rc at PPC bit 21 = host bit 10, XO=6.
/// vcmpequb.: (4<<26)|(vD<<21)|(vA<<16)|(vB<<11)|(1<<10)|6
fn encode_vcmpequb_dot(vd: u32, va: u32, vb: u32) -> u32 {
(4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | (1 << 10) | 6
}
/// vcmpequb (no dot form): same but Rc=0
fn encode_vcmpequb(vd: u32, va: u32, vb: u32) -> u32 {
(4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | 6
}
#[test]
fn vcmpequb_dot_all_true_sets_cr6_lt() {
// All bytes equal → all lanes 0xFF → CR6.LT=1 (all-true), CR6.EQ=0
let mut ctx = PpcContext::new();
let mem = TestMem::new();
let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]);
ctx.vr[1] = v;
ctx.vr[2] = v;
write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2));
ctx.pc = 0x100;
step(&mut ctx, &mem);
assert!(ctx.cr[6].lt, "all-true: CR6.LT must be 1");
assert!(!ctx.cr[6].eq, "all-true: CR6.EQ must be 0");
}
#[test]
fn vcmpequb_no_dot_does_not_update_cr6() {
// Without dot form, CR6 must be unchanged
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.cr[6] = crate::context::CrField { lt: true, gt: false, eq: true, so: false };
let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]);
ctx.vr[1] = v;
ctx.vr[2] = v;
write_instr(&mem, 0x100, encode_vcmpequb(0, 1, 2));
ctx.pc = 0x100;
step(&mut ctx, &mem);
// CR6 unchanged: no dot form
assert!(ctx.cr[6].lt && ctx.cr[6].eq, "CR6 must be unchanged without dot");
}
#[test]
fn vcmpequb_dot_all_false_sets_cr6_eq() {
// No bytes equal → all lanes 0x00 → CR6.LT=0, CR6.EQ=1 (all-false)
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_bytes([0xAAu8; 16]);
ctx.vr[2] = xenia_types::Vec128::from_bytes([0xBBu8; 16]);
write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2));
ctx.pc = 0x100;
step(&mut ctx, &mem);
assert!(!ctx.cr[6].lt, "all-false: CR6.LT must be 0");
assert!(ctx.cr[6].eq, "all-false: CR6.EQ must be 1");
}
// ---- PPCBUG-363 + PPCBUG-369: vpkd3d128 post-pack permutation ----
//
// vpkd3d128 VD, VB, type, pack, shift: the low 2 bits of the IMM field
// select how the packed scalar/vector is merged back into the previous VD.
// pack=0 → identity (store out directly); pack=1 → 32-bit merge by shift;
// pack=2,3 → 64-bit merge by shift.
// Canary source: ppc_emit_altivec.cc:2126-2188.
//
// For vpkd3d128, PPC bits 21-22 are always 1 (key2 discriminant), so
// vd128 is always in range [96, 127] for vd_lo in [0, 31].
fn encode_vpkd3d128(vd_lo: u32, vb_lo: u32, imm: u32, z: u32) -> u32 {
// op6=6 (all VMX128 compute ops); VD[4:0] at host 25-21; IMM at host 20-16;
// VB[4:0] at host 15-11; host bits 10,9 = 1,1 (PPC bits 21,22, key2=0b110);
// z (2-bit) at host 7-6 (PPC bits 24-25); host bit 4 = 1 (PPC bit 27, key2 low=0b01).
// decode_op6 key2 = (bits21-23<<4)|(bits26-27) = (0b110<<4)|0b01 = 0b1100001.
(6u32 << 26) | (vd_lo << 21) | (imm << 16) | (vb_lo << 11)
| (1 << 10) | (1 << 9) | (z << 6) | (1 << 4)
}
#[test]
fn vpkd3d128_pack0_legacy_unchanged() {
// pack=0 → identity: result = out (packed value), no blend with prev vd.
// type=0 (D3dColor), pack=0 → IMM=0; z=0 (don't care for pack=0).
// vd=96 (vd_lo=0 | bits21=1,22=1→+96).
let mut ctx = PpcContext::new();
let mem = TestMem::new();
// vb=1: R=1.0, G=0, B=0, A=0 → D3dColor packs to word (0<<24)|(255<<16)|(0<<8)|0 = 0x00FF0000
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0);
// prev vd=96: sentinel values that should NOT appear in result
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD);
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 0, 0));
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[96].as_u32x4();
// out = [0, 0, 0, 0x00FF_0000]; pack=0 → result = out
assert_eq!(r[0], 0x0000_0000, "pack=0: lane 0 must be out[0]=0");
assert_eq!(r[1], 0x0000_0000, "pack=0: lane 1 must be out[1]=0");
assert_eq!(r[2], 0x0000_0000, "pack=0: lane 2 must be out[2]=0");
assert_eq!(r[3], 0x00FF_0000, "pack=0: lane 3 must be packed D3dColor");
}
#[test]
fn vpkd3d128_pack1_shift0_d3d_vertex_pack() {
// pack=1, shift=0 (VPACK_32): out[3] placed at lane 3; prev[0..2] preserved.
// MakePermuteMask(0,0, 0,1, 0,2, 1,3) → [prev[0], prev[1], prev[2], out[3]]
// IMM = (type=0 D3dColor << 2) | pack=1 = 1; z=0.
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 0));
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[96].as_u32x4();
assert_eq!(r[0], 0x1111_1111, "pack=1 shift=0: lane 0 from prev");
assert_eq!(r[1], 0x2222_2222, "pack=1 shift=0: lane 1 from prev");
assert_eq!(r[2], 0x3333_3333, "pack=1 shift=0: lane 2 from prev");
assert_eq!(r[3], 0x00FF_0000, "pack=1 shift=0: lane 3 from out[3]");
}
#[test]
fn vpkd3d128_pack1_shift3_puts_out3_at_lane0() {
// pack=1, shift=3 (VPACK_32): out[3] placed at lane 0; prev[1..3] preserved.
// MakePermuteMask(1,3, 0,1, 0,2, 0,3) → [out[3], prev[1], prev[2], prev[3]]
// IMM = 1; z=3.
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 3));
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[96].as_u32x4();
assert_eq!(r[0], 0x00FF_0000, "pack=1 shift=3: lane 0 from out[3]");
assert_eq!(r[1], 0x2222_2222, "pack=1 shift=3: lane 1 from prev");
assert_eq!(r[2], 0x3333_3333, "pack=1 shift=3: lane 2 from prev");
assert_eq!(r[3], 0x4444_4444, "pack=1 shift=3: lane 3 from prev");
}
}

View File

@@ -20,7 +20,7 @@ use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use xenia_cpu::decoder::decode;
use xenia_cpu::decoder::{DecodedInstr, decode};
use xenia_cpu::disasm::format;
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
@@ -158,20 +158,20 @@ fn rlwinm(rs: u32, ra: u32, sh: u32, mb: u32, me: u32, rc: u32) -> u32 {
}
fn rldicl(rs: u32, ra: u32, sh: u32, mb: u32, rc: u32) -> u32 {
// MD-form, op30 xo=0. sh split: bits 16-20 (high 5) + bit 30 (low bit).
// mb split: bits 21-25 (low 5) + bit 26 (high bit).
let sh_hi = (sh >> 1) & 0x1F;
let sh_lo = sh & 1;
// MD-form: sh[4:0] at PPC bits 16-20 (host bits 11-15); sh[5] at PPC bit 30 (host bit 1).
// mb[4:0] at PPC bits 21-25 (host bits 6-10); mb[5] at PPC bit 26 (host bit 5).
let sh_lo = sh & 0x1F;
let sh_hi = (sh >> 5) & 1;
let mb_lo = mb & 0x1F;
let mb_hi = (mb >> 5) & 1;
(30 << 26)
| (rs << 21)
| (ra << 16)
| (sh_hi << 11)
| (sh_lo << 11)
| (mb_lo << 6)
| (mb_hi << 5)
| (0 << 2)
| (sh_lo << 1)
| (sh_hi << 1)
| rc
}
@@ -529,3 +529,26 @@ fn vmx128_registers() {
all.extend_from_slice(&vmx128_4op);
assert_or_regen("vmx128_registers.json", &all);
}
#[test]
fn sradi_shift_32_decodes_to_32() {
// sradi rA, rS, 32: sh=32 → sh[4:0]=0, sh[5]=1
// After PPCBUG-040 fix, sh64() must return 32, not 1.
let instr: DecodedInstr = decode(rldicl(3, 4, 32, 63, 0), 0);
// rldicl with mb=63 is not sradi, but tests sh64() extraction.
assert_eq!(instr.sh64(), 32, "sh64 must return 32 for sh=32 (sh5=1, sh_lo=0)");
}
#[test]
fn sh64_shift_1_decodes_correctly() {
// sh=1: sh[4:0]=1, sh[5]=0 → sh64() must return 1
let instr: DecodedInstr = decode(rldicl(3, 4, 1, 0, 0), 0);
assert_eq!(instr.sh64(), 1, "sh64 must return 1 for sh=1");
}
#[test]
fn sh64_shift_63_decodes_correctly() {
// sh=63: sh[4:0]=31=0x1F, sh[5]=1 → sh64() must return 63
let instr: DecodedInstr = decode(rldicl(3, 4, 63, 0, 0), 0);
assert_eq!(instr.sh64(), 63, "sh64 must return 63 for sh=63");
}

View File

@@ -182,7 +182,7 @@
},
{
"label": "srdi r3, r4, 8",
"raw": "0x7883E200",
"raw": "0x7883C202",
"addr": "0x82000000",
"mnemonic": "rldicl",
"operands": "r3, r4, 56, 8",
@@ -191,7 +191,7 @@
},
{
"label": "rotldi r3, r4, 8",
"raw": "0x78832000",
"raw": "0x78834000",
"addr": "0x82000000",
"mnemonic": "rldicl",
"operands": "r3, r4, 8, 0",