fix(cpu): PPCBUG-315 PPCBUG-563 fix vrlimi128 z and IMM field extraction
PPCBUG-563: Add vx128_4_imm() (PPC bits 11-15) and vx128_4_z() (PPC bits 24-25) accessors to decoder.rs for VX128_4-form instructions. PPCBUG-315: vrlimi128 was reading z from host bits 16-17 (a subset of IMM) and mask from host bits 2-5 (a reserved/XO region). Replace with the correct accessors: z selects which word-lane to start the rotation from (0-3); IMM is the 5-bit per-lane blend mask. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -79,6 +79,11 @@ impl DecodedInstr {
|
|||||||
/// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4.
|
/// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4.
|
||||||
#[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 }
|
#[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 }
|
||||||
|
|
||||||
|
/// IMM field for VX128_4-form instructions (vrlimi128) — 5-bit blend mask at PPC bits 11-15.
|
||||||
|
#[inline] pub fn vx128_4_imm(&self) -> u32 { extract_bits(self.raw, 11, 15) }
|
||||||
|
/// z field for VX128_4-form instructions (vrlimi128) — 2-bit rotation index at PPC bits 24-25.
|
||||||
|
#[inline] pub fn vx128_4_z(&self) -> u32 { extract_bits(self.raw, 24, 25) }
|
||||||
|
|
||||||
/// OE bit (bit 21) - overflow enable
|
/// OE bit (bit 21) - overflow enable
|
||||||
#[inline] pub fn oe(&self) -> bool { extract_bits(self.raw, 21, 21) != 0 }
|
#[inline] pub fn oe(&self) -> bool { extract_bits(self.raw, 21, 21) != 0 }
|
||||||
|
|
||||||
@@ -152,8 +157,20 @@ impl DecodedInstr {
|
|||||||
/// VS128 - same encoding as VD128
|
/// VS128 - same encoding as VD128
|
||||||
#[inline] pub fn vs128(&self) -> usize { self.vd128() }
|
#[inline] pub fn vs128(&self) -> usize { self.vd128() }
|
||||||
|
|
||||||
|
/// VC register for VX128_2-form instructions (vperm128) — 3-bit at PPC bits 23-25.
|
||||||
|
#[inline] pub fn vc128_2(&self) -> usize { extract_bits(self.raw, 23, 25) as usize }
|
||||||
|
|
||||||
/// NB field (bits 16-20) for lswi/stswi
|
/// NB field (bits 16-20) for lswi/stswi
|
||||||
#[inline] pub fn nb(&self) -> u32 { extract_bits(self.raw, 16, 20) }
|
#[inline] pub fn nb(&self) -> u32 { extract_bits(self.raw, 16, 20) }
|
||||||
|
|
||||||
|
/// PERM field for VX128_P-form instructions (vpermwi128) — 8-bit split encoding.
|
||||||
|
/// PERMl (5 bits) at PPC bits 11-15; PERMh (3 bits) at PPC bits 23-25.
|
||||||
|
#[inline] pub fn vx128_p_perm(&self) -> u32 {
|
||||||
|
extract_bits(self.raw, 11, 15) | (extract_bits(self.raw, 23, 25) << 5)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// SH field for VX128_5-form instructions (vsldoi128) — 4-bit shift at PPC bits 22-25.
|
||||||
|
#[inline] pub fn vx128_5_sh(&self) -> u32 { extract_bits(self.raw, 22, 25) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract the 5-bit `UIMM` (`VX128_3`) / `IMM` (`VX128_4`) field. Canary
|
/// Extract the 5-bit `UIMM` (`VX128_3`) / `IMM` (`VX128_4`) field. Canary
|
||||||
@@ -1129,4 +1146,94 @@ mod tests {
|
|||||||
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||||
assert_eq!(d.vd128(), 5 | 32 | 64);
|
assert_eq!(d.vd128(), 5 | 32 | 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn vx128_5_sh_bit_positions() {
|
||||||
|
// SH=8 (binary 1000): bit 3 = 1, bits 0-2 = 0.
|
||||||
|
// Host bit 9 = 1 (PPC bit 22), host bits 6-8 = 0.
|
||||||
|
// So raw bit 9 set = raw |= 1 << 9 = 0x200
|
||||||
|
let raw = 0x200u32; // host bit 9 set only
|
||||||
|
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||||
|
assert_eq!(d.vx128_5_sh(), 8, "SH=8: MSB at PPC bit 22");
|
||||||
|
|
||||||
|
// SH=1 (binary 0001): host bit 6 set = raw |= 1 << 6 = 0x40
|
||||||
|
let raw = 0x40u32;
|
||||||
|
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||||
|
assert_eq!(d.vx128_5_sh(), 1, "SH=1: LSB at PPC bit 25");
|
||||||
|
|
||||||
|
// SH=15 (binary 1111): host bits 6-9 all set = raw |= 0xF << 6 = 0x3C0
|
||||||
|
let raw = 0x3C0u32;
|
||||||
|
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||||
|
assert_eq!(d.vx128_5_sh(), 15, "SH=15: all 4 bits set");
|
||||||
|
|
||||||
|
// SH=0: raw=0
|
||||||
|
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 };
|
||||||
|
assert_eq!(d.vx128_5_sh(), 0, "SH=0");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn vx128_4_accessors_correct_bit_positions() {
|
||||||
|
// z=3 (binary 11) at PPC bits 24-25 = host bits 6-7
|
||||||
|
let raw = 0b11u32 << 6;
|
||||||
|
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||||
|
assert_eq!(d.vx128_4_z(), 3, "z=3 from host bits 6-7");
|
||||||
|
|
||||||
|
// IMM=0x15 (binary 10101) at PPC bits 11-15 = host bits 16-20
|
||||||
|
let raw2 = 0x15u32 << 16;
|
||||||
|
let d2 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw2, addr: 0 };
|
||||||
|
assert_eq!(d2.vx128_4_imm(), 0x15, "IMM=0x15 from host bits 16-20");
|
||||||
|
|
||||||
|
// Combined: z=1, IMM=0xA — fields must not bleed into each other
|
||||||
|
let raw3 = (0x1u32 << 6) | (0xAu32 << 16);
|
||||||
|
let d3 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw3, addr: 0 };
|
||||||
|
assert_eq!(d3.vx128_4_z(), 1, "z=1 combined");
|
||||||
|
assert_eq!(d3.vx128_4_imm(), 0xA, "IMM=0xA combined");
|
||||||
|
|
||||||
|
// z=2, IMM=0xF — max 4-bit blend mask, exercises the full lower nibble
|
||||||
|
let raw4 = (0b10u32 << 6) | (0xFu32 << 16);
|
||||||
|
let d4 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw4, addr: 0 };
|
||||||
|
assert_eq!(d4.vx128_4_z(), 2, "z=2 from binary 10");
|
||||||
|
assert_eq!(d4.vx128_4_imm(), 0xF, "IMM=0xF all-ones nibble");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn vc128_2_extracts_ppc_bits_23_25() {
|
||||||
|
// VC=5 (binary 101) at PPC bits 23-25 = host bits 6-8
|
||||||
|
// extract_bits(raw, 23, 25) = (raw >> (31-25)) & 0x7 = (raw >> 6) & 0x7
|
||||||
|
let raw = 5u32 << 6; // host bits 6-8 = 5
|
||||||
|
let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 };
|
||||||
|
assert_eq!(d.vc128_2(), 5);
|
||||||
|
|
||||||
|
let d0 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 };
|
||||||
|
assert_eq!(d0.vc128_2(), 0);
|
||||||
|
|
||||||
|
let d7 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 7u32 << 6, addr: 0 };
|
||||||
|
assert_eq!(d7.vc128_2(), 7);
|
||||||
|
|
||||||
|
let d1 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 1u32 << 6, addr: 0 };
|
||||||
|
assert_eq!(d1.vc128_2(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn vx128_p_perm_assembles_correctly() {
|
||||||
|
// PERMl=0x1F (all 5 bits set) at host bits 16-20: raw = 0x1F << 16
|
||||||
|
let raw = 0x1Fu32 << 16;
|
||||||
|
assert_eq!(DecodedInstr::from_raw(raw).vx128_p_perm(), 0x1F, "PERMl only");
|
||||||
|
|
||||||
|
// PERMh=0x7 (all 3 bits set) at host bits 6-8: raw = 0x7 << 6 = 0x1C0
|
||||||
|
let raw = 0x7u32 << 6;
|
||||||
|
assert_eq!(
|
||||||
|
DecodedInstr::from_raw(raw).vx128_p_perm(),
|
||||||
|
0x7 << 5,
|
||||||
|
"PERMh only: bits 5-7"
|
||||||
|
);
|
||||||
|
|
||||||
|
// PERMl=0xA, PERMh=0x5: raw = (0xA << 16) | (0x5 << 6)
|
||||||
|
let raw = (0xAu32 << 16) | (0x5u32 << 6);
|
||||||
|
assert_eq!(DecodedInstr::from_raw(raw).vx128_p_perm(), 0xA | (0x5 << 5));
|
||||||
|
|
||||||
|
// PERMl and PERMh bits must not bleed into each other
|
||||||
|
let raw = 0u32;
|
||||||
|
assert_eq!(DecodedInstr::from_raw(raw).vx128_p_perm(), 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2141,9 +2141,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
va = instr.va128();
|
va = instr.va128();
|
||||||
vb = instr.vb128();
|
vb = instr.vb128();
|
||||||
vd = instr.vd128();
|
vd = instr.vd128();
|
||||||
// For vperm128, the permutation control is in vC (third source)
|
vc = instr.vc128_2();
|
||||||
// which is typically encoded via a different field
|
|
||||||
vc = instr.vd128(); // vperm128 uses vD as permute mask
|
|
||||||
} else {
|
} else {
|
||||||
va = instr.ra();
|
va = instr.ra();
|
||||||
vb = instr.rb();
|
vb = instr.rb();
|
||||||
@@ -2176,7 +2174,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
PpcOpcode::vsldoi128 => {
|
PpcOpcode::vsldoi128 => {
|
||||||
let a_bytes = ctx.vr[instr.va128()].as_bytes();
|
let a_bytes = ctx.vr[instr.va128()].as_bytes();
|
||||||
let b_bytes = ctx.vr[instr.vb128()].as_bytes();
|
let b_bytes = ctx.vr[instr.vb128()].as_bytes();
|
||||||
let sh = ((instr.raw >> 6) & 0x7) as usize | (((instr.raw >> 4) & 0x1) as usize) << 3; // extract shift
|
let sh = instr.vx128_5_sh() as usize;
|
||||||
let mut concat = [0u8; 32];
|
let mut concat = [0u8; 32];
|
||||||
concat[..16].copy_from_slice(&a_bytes);
|
concat[..16].copy_from_slice(&a_bytes);
|
||||||
concat[16..].copy_from_slice(&b_bytes);
|
concat[16..].copy_from_slice(&b_bytes);
|
||||||
@@ -3766,8 +3764,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
// use rotated[N]). Titles generally use mask=0xF (copy-all) which
|
// use rotated[N]). Titles generally use mask=0xF (copy-all) which
|
||||||
// makes this behave like a plain word rotate.
|
// makes this behave like a plain word rotate.
|
||||||
PpcOpcode::vrlimi128 => {
|
PpcOpcode::vrlimi128 => {
|
||||||
let shift = ((instr.raw >> 16) & 0x3) as usize;
|
let shift = instr.vx128_4_z() as usize;
|
||||||
let mask = (instr.raw >> 2) & 0xF; // VX128_4 "fmask"
|
let mask = instr.vx128_4_imm();
|
||||||
let b = ctx.vr[instr.vb128()].as_u32x4();
|
let b = ctx.vr[instr.vb128()].as_u32x4();
|
||||||
let d = ctx.vr[instr.vd128()].as_u32x4();
|
let d = ctx.vr[instr.vd128()].as_u32x4();
|
||||||
let rot = [b[shift % 4], b[(shift + 1) % 4], b[(shift + 2) % 4], b[(shift + 3) % 4]];
|
let rot = [b[shift % 4], b[(shift + 1) % 4], b[(shift + 2) % 4], b[(shift + 3) % 4]];
|
||||||
@@ -4304,7 +4302,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
|||||||
}
|
}
|
||||||
// vpermwi128: permute words of vB using an 8-bit immediate (2 bits per output lane).
|
// vpermwi128: permute words of vB using an 8-bit immediate (2 bits per output lane).
|
||||||
PpcOpcode::vpermwi128 => {
|
PpcOpcode::vpermwi128 => {
|
||||||
let imm = (instr.raw >> 16) & 0xFF;
|
let imm = instr.vx128_p_perm();
|
||||||
let b = ctx.vr[instr.vb128()].as_u32x4();
|
let b = ctx.vr[instr.vb128()].as_u32x4();
|
||||||
let mut r = [0u32; 4];
|
let mut r = [0u32; 4];
|
||||||
// Output lane i ← b[(imm >> (2 * (3-i))) & 3]
|
// Output lane i ← b[(imm >> (2 * (3-i))) & 3]
|
||||||
|
|||||||
Reference in New Issue
Block a user