diff --git a/crates/xenia-cpu/src/decoder.rs b/crates/xenia-cpu/src/decoder.rs index 8299447..718aa9b 100644 --- a/crates/xenia-cpu/src/decoder.rs +++ b/crates/xenia-cpu/src/decoder.rs @@ -79,6 +79,11 @@ impl DecodedInstr { /// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4. #[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 } + /// IMM field for VX128_4-form instructions (vrlimi128) — 5-bit blend mask at PPC bits 11-15. + #[inline] pub fn vx128_4_imm(&self) -> u32 { extract_bits(self.raw, 11, 15) } + /// z field for VX128_4-form instructions (vrlimi128) — 2-bit rotation index at PPC bits 24-25. + #[inline] pub fn vx128_4_z(&self) -> u32 { extract_bits(self.raw, 24, 25) } + /// OE bit (bit 21) - overflow enable #[inline] pub fn oe(&self) -> bool { extract_bits(self.raw, 21, 21) != 0 } @@ -152,8 +157,20 @@ impl DecodedInstr { /// VS128 - same encoding as VD128 #[inline] pub fn vs128(&self) -> usize { self.vd128() } + /// VC register for VX128_2-form instructions (vperm128) — 3-bit at PPC bits 23-25. + #[inline] pub fn vc128_2(&self) -> usize { extract_bits(self.raw, 23, 25) as usize } + /// NB field (bits 16-20) for lswi/stswi #[inline] pub fn nb(&self) -> u32 { extract_bits(self.raw, 16, 20) } + + /// PERM field for VX128_P-form instructions (vpermwi128) — 8-bit split encoding. + /// PERMl (5 bits) at PPC bits 11-15; PERMh (3 bits) at PPC bits 23-25. + #[inline] pub fn vx128_p_perm(&self) -> u32 { + extract_bits(self.raw, 11, 15) | (extract_bits(self.raw, 23, 25) << 5) + } + + /// SH field for VX128_5-form instructions (vsldoi128) — 4-bit shift at PPC bits 22-25. + #[inline] pub fn vx128_5_sh(&self) -> u32 { extract_bits(self.raw, 22, 25) } } /// Extract the 5-bit `UIMM` (`VX128_3`) / `IMM` (`VX128_4`) field. Canary @@ -1129,4 +1146,94 @@ mod tests { let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; assert_eq!(d.vd128(), 5 | 32 | 64); } + + #[test] + fn vx128_5_sh_bit_positions() { + // SH=8 (binary 1000): bit 3 = 1, bits 0-2 = 0. + // Host bit 9 = 1 (PPC bit 22), host bits 6-8 = 0. + // So raw bit 9 set = raw |= 1 << 9 = 0x200 + let raw = 0x200u32; // host bit 9 set only + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_5_sh(), 8, "SH=8: MSB at PPC bit 22"); + + // SH=1 (binary 0001): host bit 6 set = raw |= 1 << 6 = 0x40 + let raw = 0x40u32; + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_5_sh(), 1, "SH=1: LSB at PPC bit 25"); + + // SH=15 (binary 1111): host bits 6-9 all set = raw |= 0xF << 6 = 0x3C0 + let raw = 0x3C0u32; + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_5_sh(), 15, "SH=15: all 4 bits set"); + + // SH=0: raw=0 + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 }; + assert_eq!(d.vx128_5_sh(), 0, "SH=0"); + } + + #[test] + fn vx128_4_accessors_correct_bit_positions() { + // z=3 (binary 11) at PPC bits 24-25 = host bits 6-7 + let raw = 0b11u32 << 6; + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_4_z(), 3, "z=3 from host bits 6-7"); + + // IMM=0x15 (binary 10101) at PPC bits 11-15 = host bits 16-20 + let raw2 = 0x15u32 << 16; + let d2 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw2, addr: 0 }; + assert_eq!(d2.vx128_4_imm(), 0x15, "IMM=0x15 from host bits 16-20"); + + // Combined: z=1, IMM=0xA — fields must not bleed into each other + let raw3 = (0x1u32 << 6) | (0xAu32 << 16); + let d3 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw3, addr: 0 }; + assert_eq!(d3.vx128_4_z(), 1, "z=1 combined"); + assert_eq!(d3.vx128_4_imm(), 0xA, "IMM=0xA combined"); + + // z=2, IMM=0xF — max 4-bit blend mask, exercises the full lower nibble + let raw4 = (0b10u32 << 6) | (0xFu32 << 16); + let d4 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw4, addr: 0 }; + assert_eq!(d4.vx128_4_z(), 2, "z=2 from binary 10"); + assert_eq!(d4.vx128_4_imm(), 0xF, "IMM=0xF all-ones nibble"); + } + + #[test] + fn vc128_2_extracts_ppc_bits_23_25() { + // VC=5 (binary 101) at PPC bits 23-25 = host bits 6-8 + // extract_bits(raw, 23, 25) = (raw >> (31-25)) & 0x7 = (raw >> 6) & 0x7 + let raw = 5u32 << 6; // host bits 6-8 = 5 + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vc128_2(), 5); + + let d0 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 }; + assert_eq!(d0.vc128_2(), 0); + + let d7 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 7u32 << 6, addr: 0 }; + assert_eq!(d7.vc128_2(), 7); + + let d1 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 1u32 << 6, addr: 0 }; + assert_eq!(d1.vc128_2(), 1); + } + + #[test] + fn vx128_p_perm_assembles_correctly() { + // PERMl=0x1F (all 5 bits set) at host bits 16-20: raw = 0x1F << 16 + let raw = 0x1Fu32 << 16; + assert_eq!(DecodedInstr::from_raw(raw).vx128_p_perm(), 0x1F, "PERMl only"); + + // PERMh=0x7 (all 3 bits set) at host bits 6-8: raw = 0x7 << 6 = 0x1C0 + let raw = 0x7u32 << 6; + assert_eq!( + DecodedInstr::from_raw(raw).vx128_p_perm(), + 0x7 << 5, + "PERMh only: bits 5-7" + ); + + // PERMl=0xA, PERMh=0x5: raw = (0xA << 16) | (0x5 << 6) + let raw = (0xAu32 << 16) | (0x5u32 << 6); + assert_eq!(DecodedInstr::from_raw(raw).vx128_p_perm(), 0xA | (0x5 << 5)); + + // PERMl and PERMh bits must not bleed into each other + let raw = 0u32; + assert_eq!(DecodedInstr::from_raw(raw).vx128_p_perm(), 0); + } } diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 65c6a9d..fee3a07 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -2141,9 +2141,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - va = instr.va128(); vb = instr.vb128(); vd = instr.vd128(); - // For vperm128, the permutation control is in vC (third source) - // which is typically encoded via a different field - vc = instr.vd128(); // vperm128 uses vD as permute mask + vc = instr.vc128_2(); } else { va = instr.ra(); vb = instr.rb(); @@ -2176,7 +2174,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::vsldoi128 => { let a_bytes = ctx.vr[instr.va128()].as_bytes(); let b_bytes = ctx.vr[instr.vb128()].as_bytes(); - let sh = ((instr.raw >> 6) & 0x7) as usize | (((instr.raw >> 4) & 0x1) as usize) << 3; // extract shift + let sh = instr.vx128_5_sh() as usize; let mut concat = [0u8; 32]; concat[..16].copy_from_slice(&a_bytes); concat[16..].copy_from_slice(&b_bytes); @@ -3766,8 +3764,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // use rotated[N]). Titles generally use mask=0xF (copy-all) which // makes this behave like a plain word rotate. PpcOpcode::vrlimi128 => { - let shift = ((instr.raw >> 16) & 0x3) as usize; - let mask = (instr.raw >> 2) & 0xF; // VX128_4 "fmask" + let shift = instr.vx128_4_z() as usize; + let mask = instr.vx128_4_imm(); let b = ctx.vr[instr.vb128()].as_u32x4(); let d = ctx.vr[instr.vd128()].as_u32x4(); let rot = [b[shift % 4], b[(shift + 1) % 4], b[(shift + 2) % 4], b[(shift + 3) % 4]]; @@ -4304,7 +4302,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } // vpermwi128: permute words of vB using an 8-bit immediate (2 bits per output lane). PpcOpcode::vpermwi128 => { - let imm = (instr.raw >> 16) & 0xFF; + let imm = instr.vx128_p_perm(); let b = ctx.vr[instr.vb128()].as_u32x4(); let mut r = [0u32; 4]; // Output lane i ← b[(imm >> (2 * (3-i))) & 3]