fix(cpu): PPCBUG-315 PPCBUG-563 fix vrlimi128 z and IMM field extraction

PPCBUG-563: Add vx128_4_imm() (PPC bits 11-15) and vx128_4_z() (PPC bits
24-25) accessors to decoder.rs for VX128_4-form instructions.

PPCBUG-315: vrlimi128 was reading z from host bits 16-17 (a subset of IMM)
and mask from host bits 2-5 (a reserved/XO region). Replace with the
correct accessors: z selects which word-lane to start the rotation from
(0-3); IMM is the 5-bit per-lane blend mask.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 21:26:26 +02:00
parent d51b9346df
commit 197d76c44e
2 changed files with 112 additions and 7 deletions

View File

@@ -79,6 +79,11 @@ impl DecodedInstr {
/// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4. /// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4.
#[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 } #[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 }
/// IMM field for VX128_4-form instructions (vrlimi128) — 5-bit blend mask at PPC bits 11-15.
#[inline] pub fn vx128_4_imm(&self) -> u32 { extract_bits(self.raw, 11, 15) }
/// z field for VX128_4-form instructions (vrlimi128) — 2-bit rotation index at PPC bits 24-25.
#[inline] pub fn vx128_4_z(&self) -> u32 { extract_bits(self.raw, 24, 25) }
/// OE bit (bit 21) - overflow enable /// OE bit (bit 21) - overflow enable
#[inline] pub fn oe(&self) -> bool { extract_bits(self.raw, 21, 21) != 0 } #[inline] pub fn oe(&self) -> bool { extract_bits(self.raw, 21, 21) != 0 }
@@ -152,8 +157,20 @@ impl DecodedInstr {
/// VS128 - same encoding as VD128 /// VS128 - same encoding as VD128
#[inline] pub fn vs128(&self) -> usize { self.vd128() } #[inline] pub fn vs128(&self) -> usize { self.vd128() }
/// VC register for VX128_2-form instructions (vperm128) — 3-bit at PPC bits 23-25.
#[inline] pub fn vc128_2(&self) -> usize { extract_bits(self.raw, 23, 25) as usize }
/// NB field (bits 16-20) for lswi/stswi /// NB field (bits 16-20) for lswi/stswi
#[inline] pub fn nb(&self) -> u32 { extract_bits(self.raw, 16, 20) } #[inline] pub fn nb(&self) -> u32 { extract_bits(self.raw, 16, 20) }
/// PERM field for VX128_P-form instructions (vpermwi128) — 8-bit split encoding.
/// PERMl (5 bits) at PPC bits 11-15; PERMh (3 bits) at PPC bits 23-25.
#[inline] pub fn vx128_p_perm(&self) -> u32 {
extract_bits(self.raw, 11, 15) | (extract_bits(self.raw, 23, 25) << 5)
}
/// SH field for VX128_5-form instructions (vsldoi128) — 4-bit shift at PPC bits 22-25.
#[inline] pub fn vx128_5_sh(&self) -> u32 { extract_bits(self.raw, 22, 25) }
} }
/// Extract the 5-bit `UIMM` (`VX128_3`) / `IMM` (`VX128_4`) field. Canary /// Extract the 5-bit `UIMM` (`VX128_3`) / `IMM` (`VX128_4`) field. Canary
@@ -1129,4 +1146,94 @@ mod tests {
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vd128(), 5 | 32 | 64); assert_eq!(d.vd128(), 5 | 32 | 64);
} }
#[test]
fn vx128_5_sh_bit_positions() {
// SH=8 (binary 1000): bit 3 = 1, bits 0-2 = 0.
// Host bit 9 = 1 (PPC bit 22), host bits 6-8 = 0.
// So raw bit 9 set = raw |= 1 << 9 = 0x200
let raw = 0x200u32; // host bit 9 set only
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_5_sh(), 8, "SH=8: MSB at PPC bit 22");
// SH=1 (binary 0001): host bit 6 set = raw |= 1 << 6 = 0x40
let raw = 0x40u32;
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_5_sh(), 1, "SH=1: LSB at PPC bit 25");
// SH=15 (binary 1111): host bits 6-9 all set = raw |= 0xF << 6 = 0x3C0
let raw = 0x3C0u32;
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_5_sh(), 15, "SH=15: all 4 bits set");
// SH=0: raw=0
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 };
assert_eq!(d.vx128_5_sh(), 0, "SH=0");
}
#[test]
fn vx128_4_accessors_correct_bit_positions() {
// z=3 (binary 11) at PPC bits 24-25 = host bits 6-7
let raw = 0b11u32 << 6;
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vx128_4_z(), 3, "z=3 from host bits 6-7");
// IMM=0x15 (binary 10101) at PPC bits 11-15 = host bits 16-20
let raw2 = 0x15u32 << 16;
let d2 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw2, addr: 0 };
assert_eq!(d2.vx128_4_imm(), 0x15, "IMM=0x15 from host bits 16-20");
// Combined: z=1, IMM=0xA — fields must not bleed into each other
let raw3 = (0x1u32 << 6) | (0xAu32 << 16);
let d3 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw3, addr: 0 };
assert_eq!(d3.vx128_4_z(), 1, "z=1 combined");
assert_eq!(d3.vx128_4_imm(), 0xA, "IMM=0xA combined");
// z=2, IMM=0xF — max 4-bit blend mask, exercises the full lower nibble
let raw4 = (0b10u32 << 6) | (0xFu32 << 16);
let d4 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw4, addr: 0 };
assert_eq!(d4.vx128_4_z(), 2, "z=2 from binary 10");
assert_eq!(d4.vx128_4_imm(), 0xF, "IMM=0xF all-ones nibble");
}
#[test]
fn vc128_2_extracts_ppc_bits_23_25() {
// VC=5 (binary 101) at PPC bits 23-25 = host bits 6-8
// extract_bits(raw, 23, 25) = (raw >> (31-25)) & 0x7 = (raw >> 6) & 0x7
let raw = 5u32 << 6; // host bits 6-8 = 5
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
assert_eq!(d.vc128_2(), 5);
let d0 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 };
assert_eq!(d0.vc128_2(), 0);
let d7 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 7u32 << 6, addr: 0 };
assert_eq!(d7.vc128_2(), 7);
let d1 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 1u32 << 6, addr: 0 };
assert_eq!(d1.vc128_2(), 1);
}
#[test]
fn vx128_p_perm_assembles_correctly() {
// PERMl=0x1F (all 5 bits set) at host bits 16-20: raw = 0x1F << 16
let raw = 0x1Fu32 << 16;
assert_eq!(DecodedInstr::from_raw(raw).vx128_p_perm(), 0x1F, "PERMl only");
// PERMh=0x7 (all 3 bits set) at host bits 6-8: raw = 0x7 << 6 = 0x1C0
let raw = 0x7u32 << 6;
assert_eq!(
DecodedInstr::from_raw(raw).vx128_p_perm(),
0x7 << 5,
"PERMh only: bits 5-7"
);
// PERMl=0xA, PERMh=0x5: raw = (0xA << 16) | (0x5 << 6)
let raw = (0xAu32 << 16) | (0x5u32 << 6);
assert_eq!(DecodedInstr::from_raw(raw).vx128_p_perm(), 0xA | (0x5 << 5));
// PERMl and PERMh bits must not bleed into each other
let raw = 0u32;
assert_eq!(DecodedInstr::from_raw(raw).vx128_p_perm(), 0);
}
} }

View File

@@ -2141,9 +2141,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
va = instr.va128(); va = instr.va128();
vb = instr.vb128(); vb = instr.vb128();
vd = instr.vd128(); vd = instr.vd128();
// For vperm128, the permutation control is in vC (third source) vc = instr.vc128_2();
// which is typically encoded via a different field
vc = instr.vd128(); // vperm128 uses vD as permute mask
} else { } else {
va = instr.ra(); va = instr.ra();
vb = instr.rb(); vb = instr.rb();
@@ -2176,7 +2174,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
PpcOpcode::vsldoi128 => { PpcOpcode::vsldoi128 => {
let a_bytes = ctx.vr[instr.va128()].as_bytes(); let a_bytes = ctx.vr[instr.va128()].as_bytes();
let b_bytes = ctx.vr[instr.vb128()].as_bytes(); let b_bytes = ctx.vr[instr.vb128()].as_bytes();
let sh = ((instr.raw >> 6) & 0x7) as usize | (((instr.raw >> 4) & 0x1) as usize) << 3; // extract shift let sh = instr.vx128_5_sh() as usize;
let mut concat = [0u8; 32]; let mut concat = [0u8; 32];
concat[..16].copy_from_slice(&a_bytes); concat[..16].copy_from_slice(&a_bytes);
concat[16..].copy_from_slice(&b_bytes); concat[16..].copy_from_slice(&b_bytes);
@@ -3766,8 +3764,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
// use rotated[N]). Titles generally use mask=0xF (copy-all) which // use rotated[N]). Titles generally use mask=0xF (copy-all) which
// makes this behave like a plain word rotate. // makes this behave like a plain word rotate.
PpcOpcode::vrlimi128 => { PpcOpcode::vrlimi128 => {
let shift = ((instr.raw >> 16) & 0x3) as usize; let shift = instr.vx128_4_z() as usize;
let mask = (instr.raw >> 2) & 0xF; // VX128_4 "fmask" let mask = instr.vx128_4_imm();
let b = ctx.vr[instr.vb128()].as_u32x4(); let b = ctx.vr[instr.vb128()].as_u32x4();
let d = ctx.vr[instr.vd128()].as_u32x4(); let d = ctx.vr[instr.vd128()].as_u32x4();
let rot = [b[shift % 4], b[(shift + 1) % 4], b[(shift + 2) % 4], b[(shift + 3) % 4]]; let rot = [b[shift % 4], b[(shift + 1) % 4], b[(shift + 2) % 4], b[(shift + 3) % 4]];
@@ -4304,7 +4302,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
} }
// vpermwi128: permute words of vB using an 8-bit immediate (2 bits per output lane). // vpermwi128: permute words of vB using an 8-bit immediate (2 bits per output lane).
PpcOpcode::vpermwi128 => { PpcOpcode::vpermwi128 => {
let imm = (instr.raw >> 16) & 0xFF; let imm = instr.vx128_p_perm();
let b = ctx.vr[instr.vb128()].as_u32x4(); let b = ctx.vr[instr.vb128()].as_u32x4();
let mut r = [0u32; 4]; let mut r = [0u32; 4];
// Output lane i ← b[(imm >> (2 * (3-i))) & 3] // Output lane i ← b[(imm >> (2 * (3-i))) & 3]