diff --git a/crates/xenia-cpu/src/decoder.rs b/crates/xenia-cpu/src/decoder.rs index 136d3e3..ef28270 100644 --- a/crates/xenia-cpu/src/decoder.rs +++ b/crates/xenia-cpu/src/decoder.rs @@ -74,6 +74,16 @@ impl DecodedInstr { /// Rc bit (bit 31) - record CR0 #[inline] pub fn rc_bit(&self) -> bool { self.raw & 1 != 0 } + /// Rc for VC-form vector compare instructions — PPC bit 21 = host bit 10. + #[inline] pub fn vc_rc_bit(&self) -> bool { (self.raw >> 10) & 1 != 0 } + /// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4. + #[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 } + + /// IMM field for VX128_4-form instructions (vrlimi128) — 5-bit blend mask at PPC bits 11-15. + #[inline] pub fn vx128_4_imm(&self) -> u32 { extract_bits(self.raw, 11, 15) } + /// z field for VX128_4-form instructions (vrlimi128) — 2-bit rotation index at PPC bits 24-25. + #[inline] pub fn vx128_4_z(&self) -> u32 { extract_bits(self.raw, 24, 25) } + /// OE bit (bit 21) - overflow enable #[inline] pub fn oe(&self) -> bool { extract_bits(self.raw, 21, 21) != 0 } @@ -89,7 +99,13 @@ impl DecodedInstr { /// SH field for 64-bit shifts (bits 16-20 + bit 30) #[inline] pub fn sh64(&self) -> u32 { - (extract_bits(self.raw, 16, 20) << 1) | extract_bits(self.raw, 30, 30) + (extract_bits(self.raw, 30, 30) << 5) | extract_bits(self.raw, 16, 20) + } + + /// MB/ME field for MD-form and MDS-form instructions (6-bit field, split encoding). + /// MB[4:0] at PPC bits 21-25; MB[5] at PPC bit 26. + #[inline] pub fn mb_md(&self) -> u32 { + extract_bits(self.raw, 21, 25) | (extract_bits(self.raw, 26, 26) << 5) } /// SPR field (bits 11-20, swapped halves) @@ -141,8 +157,20 @@ impl DecodedInstr { /// VS128 - same encoding as VD128 #[inline] pub fn vs128(&self) -> usize { self.vd128() } + /// VC register for VX128_2-form instructions (vperm128) — 3-bit at PPC bits 23-25. + #[inline] pub fn vc128_2(&self) -> usize { extract_bits(self.raw, 23, 25) as usize } + /// NB field (bits 16-20) for lswi/stswi #[inline] pub fn nb(&self) -> u32 { extract_bits(self.raw, 16, 20) } + + /// PERM field for VX128_P-form instructions (vpermwi128) — 8-bit split encoding. + /// PERMl (5 bits) at PPC bits 11-15; PERMh (3 bits) at PPC bits 23-25. + #[inline] pub fn vx128_p_perm(&self) -> u32 { + extract_bits(self.raw, 11, 15) | (extract_bits(self.raw, 23, 25) << 5) + } + + /// SH field for VX128_5-form instructions (vsldoi128) — 4-bit shift at PPC bits 22-25. + #[inline] pub fn vx128_5_sh(&self) -> u32 { extract_bits(self.raw, 22, 25) } } /// Extract the 5-bit `UIMM` (`VX128_3`) / `IMM` (`VX128_4`) field. Canary @@ -636,9 +664,13 @@ fn decode_op6(code: u32) -> PpcOpcode { _ => {} } - // VMX128 compare - let key4 = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27); - match key4 { + // VMX128 compare (non-dot and dot forms). + // Non-dot: bit 27 = 0. Dot: bit 27 = 1, but bit 25 must also be 0 to + // distinguish from the shift/merge group (which has bit 25 = 1 when bit 27 = 1). + // key4_nd uses bits 22-24 + bit 27 (same as original, covers non-dot). + // key4_dt uses bits 22-24 + bit 25 + bit 27 (narrower, covers dot-only). + let key4_nd = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27); + match key4_nd { 0b000000 => return PpcOpcode::vcmpeqfp128, 0b001000 => return PpcOpcode::vcmpgefp128, 0b010000 => return PpcOpcode::vcmpgtfp128, @@ -646,6 +678,16 @@ fn decode_op6(code: u32) -> PpcOpcode { 0b100000 => return PpcOpcode::vcmpequw128, _ => {} } + // Dot forms: bit 27 = 1, bit 25 = 0 (key = bits22-24 + bit25 + bit27, low 3 bits) + let key4_dt = (extract_bits(code, 22, 24) << 2) | (extract_bits(code, 25, 25) << 1) | extract_bits(code, 27, 27); + match key4_dt { + 0b00001 => return PpcOpcode::vcmpeqfp128, // bits22-24=000, bit25=0, bit27=1 + 0b00101 => return PpcOpcode::vcmpgefp128, // bits22-24=001, bit25=0, bit27=1 + 0b01001 => return PpcOpcode::vcmpgtfp128, // bits22-24=010, bit25=0, bit27=1 + 0b01101 => return PpcOpcode::vcmpbfp128, // bits22-24=011, bit25=0, bit27=1 + 0b10001 => return PpcOpcode::vcmpequw128, // bits22-24=100, bit25=0, bit27=1 + _ => {} + } // VMX128 shift/merge let key5 = (extract_bits(code, 22, 25) << 2) | extract_bits(code, 27, 27); @@ -1104,4 +1146,93 @@ mod tests { let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; assert_eq!(d.vd128(), 5 | 32 | 64); } + + #[test] + fn vx128_5_sh_bit_positions() { + // SH=8 (binary 1000): bit 3 = 1, bits 0-2 = 0. + // Host bit 9 = 1 (PPC bit 22), host bits 6-8 = 0. + // So raw bit 9 set = raw |= 1 << 9 = 0x200 + let raw = 0x200u32; // host bit 9 set only + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_5_sh(), 8, "SH=8: MSB at PPC bit 22"); + + // SH=1 (binary 0001): host bit 6 set = raw |= 1 << 6 = 0x40 + let raw = 0x40u32; + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_5_sh(), 1, "SH=1: LSB at PPC bit 25"); + + // SH=15 (binary 1111): host bits 6-9 all set = raw |= 0xF << 6 = 0x3C0 + let raw = 0x3C0u32; + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_5_sh(), 15, "SH=15: all 4 bits set"); + + // SH=0: raw=0 + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 }; + assert_eq!(d.vx128_5_sh(), 0, "SH=0"); + } + + #[test] + fn vx128_4_accessors_correct_bit_positions() { + // z=3 (binary 11) at PPC bits 24-25 = host bits 6-7 + let raw = 0b11u32 << 6; + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_4_z(), 3, "z=3 from host bits 6-7"); + + // IMM=0x15 (binary 10101) at PPC bits 11-15 = host bits 16-20 + let raw2 = 0x15u32 << 16; + let d2 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw2, addr: 0 }; + assert_eq!(d2.vx128_4_imm(), 0x15, "IMM=0x15 from host bits 16-20"); + + // Combined: z=1, IMM=0xA — fields must not bleed into each other + let raw3 = (0x1u32 << 6) | (0xAu32 << 16); + let d3 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw3, addr: 0 }; + assert_eq!(d3.vx128_4_z(), 1, "z=1 combined"); + assert_eq!(d3.vx128_4_imm(), 0xA, "IMM=0xA combined"); + + // z=2, IMM=0xF — max 4-bit blend mask, exercises the full lower nibble + let raw4 = (0b10u32 << 6) | (0xFu32 << 16); + let d4 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: raw4, addr: 0 }; + assert_eq!(d4.vx128_4_z(), 2, "z=2 from binary 10"); + assert_eq!(d4.vx128_4_imm(), 0xF, "IMM=0xF all-ones nibble"); + } + + #[test] + fn vc128_2_extracts_ppc_bits_23_25() { + // VC=5 (binary 101) at PPC bits 23-25 = host bits 6-8 + // extract_bits(raw, 23, 25) = (raw >> (31-25)) & 0x7 = (raw >> 6) & 0x7 + let raw = 5u32 << 6; // host bits 6-8 = 5 + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vc128_2(), 5); + + let d0 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 }; + assert_eq!(d0.vc128_2(), 0); + + let d7 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 7u32 << 6, addr: 0 }; + assert_eq!(d7.vc128_2(), 7); + + let d1 = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 1u32 << 6, addr: 0 }; + assert_eq!(d1.vc128_2(), 1); + } + + #[test] + fn vx128_p_perm_assembles_correctly() { + // PERMl=0x1F (all 5 bits set) at host bits 16-20: raw = 0x1F << 16 + let raw = 0x1Fu32 << 16; + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_p_perm(), 0x1F, "PERMl only"); + + // PERMh=0x7 (all 3 bits set) at host bits 6-8: raw = 0x7 << 6 = 0x1C0 + let raw = 0x7u32 << 6; + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_p_perm(), 0x7 << 5, "PERMh only: bits 5-7"); + + // PERMl=0xA, PERMh=0x5: raw = (0xA << 16) | (0x5 << 6) + let raw = (0xAu32 << 16) | (0x5u32 << 6); + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw, addr: 0 }; + assert_eq!(d.vx128_p_perm(), 0xA | (0x5 << 5)); + + // PERMl and PERMh bits must not bleed into each other + let d = DecodedInstr { opcode: PpcOpcode::Invalid, raw: 0, addr: 0 }; + assert_eq!(d.vx128_p_perm(), 0); + } } diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index c22cd0b..3bb3d56 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -693,7 +693,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::rldiclx => { let rs = ctx.gpr[instr.rs()]; let sh = instr.sh64(); - let mb = (instr.mb() << 1) | ((instr.raw >> 1) & 1); // 6-bit mb + let mb = instr.mb_md(); let rotated = rs.rotate_left(sh); let mask = rld_mask_left(mb); ctx.gpr[instr.ra()] = rotated & mask; @@ -703,7 +703,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::rldicrx => { let rs = ctx.gpr[instr.rs()]; let sh = instr.sh64(); - let me = (instr.mb() << 1) | ((instr.raw >> 1) & 1); // 6-bit me + let me = instr.mb_md(); let rotated = rs.rotate_left(sh); let mask = rld_mask_right(me); ctx.gpr[instr.ra()] = rotated & mask; @@ -713,7 +713,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::rldicx => { let rs = ctx.gpr[instr.rs()]; let sh = instr.sh64(); - let mb = (instr.mb() << 1) | ((instr.raw >> 1) & 1); + let mb = instr.mb_md(); let rotated = rs.rotate_left(sh); let mask = rld_mask_left(mb) & rld_mask_right(63 - sh); ctx.gpr[instr.ra()] = rotated & mask; @@ -723,7 +723,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::rldimix => { let rs = ctx.gpr[instr.rs()]; let sh = instr.sh64(); - let mb = (instr.mb() << 1) | ((instr.raw >> 1) & 1); + let mb = instr.mb_md(); let rotated = rs.rotate_left(sh); let mask = rld_mask_left(mb) & rld_mask_right(63 - sh); ctx.gpr[instr.ra()] = (rotated & mask) | (ctx.gpr[instr.ra()] & !mask); @@ -733,7 +733,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::rldclx => { let rs = ctx.gpr[instr.rs()]; let sh = ctx.gpr[instr.rb()] & 0x3F; - let mb = (instr.mb() << 1) | ((instr.raw >> 1) & 1); + let mb = instr.mb_md(); let rotated = rs.rotate_left(sh as u32); let mask = rld_mask_left(mb); ctx.gpr[instr.ra()] = rotated & mask; @@ -743,7 +743,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::rldcrx => { let rs = ctx.gpr[instr.rs()]; let sh = ctx.gpr[instr.rb()] & 0x3F; - let me = (instr.mb() << 1) | ((instr.raw >> 1) & 1); + let me = instr.mb_md(); let rotated = rs.rotate_left(sh as u32); let mask = rld_mask_right(me); ctx.gpr[instr.ra()] = rotated & mask; @@ -2036,7 +2036,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + let rc = if matches!(instr.opcode, PpcOpcode::vcmpeqfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; + if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } PpcOpcode::vcmpgefp | PpcOpcode::vcmpgefp128 => { @@ -2046,7 +2047,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] >= b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + let rc = if matches!(instr.opcode, PpcOpcode::vcmpgefp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; + if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } PpcOpcode::vcmpgtfp | PpcOpcode::vcmpgtfp128 => { @@ -2056,7 +2058,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + let rc = if matches!(instr.opcode, PpcOpcode::vcmpgtfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; + if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } @@ -2138,9 +2141,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - va = instr.va128(); vb = instr.vb128(); vd = instr.vd128(); - // For vperm128, the permutation control is in vC (third source) - // which is typically encoded via a different field - vc = instr.vd128(); // vperm128 uses vD as permute mask + vc = instr.vc128_2(); } else { va = instr.ra(); vb = instr.rb(); @@ -2173,7 +2174,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - PpcOpcode::vsldoi128 => { let a_bytes = ctx.vr[instr.va128()].as_bytes(); let b_bytes = ctx.vr[instr.vb128()].as_bytes(); - let sh = ((instr.raw >> 6) & 0x7) as usize | (((instr.raw >> 4) & 0x1) as usize) << 3; // extract shift + let sh = instr.vx128_5_sh() as usize; let mut concat = [0u8; 32]; concat[..16].copy_from_slice(&a_bytes); concat[16..].copy_from_slice(&b_bytes); @@ -2398,7 +2399,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + let rc = if matches!(instr.opcode, PpcOpcode::vcmpequw128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; + if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } @@ -3528,7 +3530,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u8; 16]; for i in 0..16 { r[i] = if a[i] == b[i] { 0xFF } else { 0 }; } let v = xenia_types::Vec128::from_bytes(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3541,7 +3543,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u16; 8]; for i in 0..8 { r[i] = if a[i] == b[i] { 0xFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u16x8_array(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3554,7 +3556,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u8; 16]; for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; } let v = xenia_types::Vec128::from_bytes(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3567,7 +3569,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u8; 16]; for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; } let v = xenia_types::Vec128::from_bytes(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3580,7 +3582,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u16; 8]; for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u16x8_array(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3593,7 +3595,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u16; 8]; for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u16x8_array(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3606,7 +3608,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); } ctx.vr[instr.rd()] = v; ctx.pc += 4; } @@ -3616,7 +3618,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); } ctx.vr[instr.rd()] = v; ctx.pc += 4; } @@ -3638,7 +3640,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - if a[i].is_nan() || b[i].is_nan() || a[i] < -b[i] { lane |= 0x4000_0000; any_out = true; } r[i] = lane; } - if instr.rc_bit() { + let rc = if is_128 { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; + if rc { ctx.cr[6] = crate::context::CrField { lt: false, gt: false, eq: !any_out, so: false, }; @@ -3761,8 +3764,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // use rotated[N]). Titles generally use mask=0xF (copy-all) which // makes this behave like a plain word rotate. PpcOpcode::vrlimi128 => { - let shift = ((instr.raw >> 16) & 0x3) as usize; - let mask = (instr.raw >> 2) & 0xF; // VX128_4 "fmask" + let shift = instr.vx128_4_z() as usize; + let mask = instr.vx128_4_imm(); let b = ctx.vr[instr.vb128()].as_u32x4(); let d = ctx.vr[instr.vd128()].as_u32x4(); let rot = [b[shift % 4], b[(shift + 1) % 4], b[(shift + 2) % 4], b[(shift + 3) % 4]]; @@ -3988,14 +3991,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // position) and masked to only 3 bits. Canary extracts from the // VX128_3/4 `IMM` field at PPC bits 16-22 (MSB) and does // `type = IMM >> 2` to pick up the 5-bit type selector — the low - // 2 bits (`pack`) select output-slot layout for `vpkd3d128` and - // are ignored by `vupkd3d128`. Extracting the low 2 bits as - // `pack` (unused here — we hand back the codec output in its - // canonical lane position, the subsequent permute instruction - // handles placement) for completeness. + // 2 bits (`pack`) select output-slot layout for `vpkd3d128`. PpcOpcode::vpkd3d128 => { use crate::vmx::D3dPackType; let uimm = crate::decoder::extract_vx128_uimm5(instr.raw); + let pack = (uimm & 3) as usize; + let shift = instr.vx128_4_z() as usize; let ty = D3dPackType::from_immediate(uimm >> 2); let src = ctx.vr[instr.vb128()]; let out = match ty { @@ -4017,7 +4018,36 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - src } }; - ctx.vr[instr.vd128()] = out; + // Post-pack permutation: merge packed `out` into previous `vd` + // per canary ppc_emit_altivec.cc:2126-2188 MakePermuteMask tables. + // MakePermuteMask(r0,l0, r1,l1, r2,l2, r3,l3): result[i] = if ri==0 { prev[li] } else { out[li] } + let result = if pack == 0 { + out + } else { + // (source_reg, lane): 0=prev vd, 1=packed out + const PERM: [[[(u8, u8); 4]; 4]; 3] = [ + // pack=1 (VPACK_32): places out[3] at lane (3-shift) + [[(0,0),(0,1),(0,2),(1,3)], [(0,0),(0,1),(1,3),(0,3)], + [(0,0),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]], + // pack=2 (64-bit): places out[2..3] at lanes (2-shift)..(3-shift) + [[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)], + [(1,2),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]], + // pack=3 (64-bit): same as pack=2 except shift=3 selects out[2] at lane 3 + [[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)], + [(1,2),(1,3),(0,2),(0,3)], [(0,0),(0,1),(0,2),(1,2)]], + ]; + let prev = ctx.vr[instr.vd128()]; + let pw = prev.as_u32x4(); + let ow = out.as_u32x4(); + let sel = PERM[pack - 1][shift]; + xenia_types::Vec128::from_u32x4_array([ + if sel[0].0 == 0 { pw[sel[0].1 as usize] } else { ow[sel[0].1 as usize] }, + if sel[1].0 == 0 { pw[sel[1].1 as usize] } else { ow[sel[1].1 as usize] }, + if sel[2].0 == 0 { pw[sel[2].1 as usize] } else { ow[sel[2].1 as usize] }, + if sel[3].0 == 0 { pw[sel[3].1 as usize] } else { ow[sel[3].1 as usize] }, + ]) + }; + ctx.vr[instr.vd128()] = result; ctx.pc += 4; } PpcOpcode::vupkd3d128 => { @@ -4299,7 +4329,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - } // vpermwi128: permute words of vB using an 8-bit immediate (2 bits per output lane). PpcOpcode::vpermwi128 => { - let imm = (instr.raw >> 16) & 0xFF; + let imm = instr.vx128_p_perm(); let b = ctx.vr[instr.vb128()].as_u32x4(); let mut r = [0u32; 4]; // Output lane i ← b[(imm >> (2 * (3-i))) & 3] @@ -6252,4 +6282,185 @@ mod tests { expected[4] = 0xAB; assert_eq!(ctx.vr[3].as_bytes(), expected); } + + // ===== PPCBUG-046 / PPCBUG-561: rldicl / clrldi mb_md fix ===== + + /// Encode rldicl (MD-form, opcode=30, XO=0) in host bit notation. + /// rs: source register, ra: dest register, sh: shift amount (6-bit), + /// mb: mask-begin (6-bit), rc: record bit. + fn encode_rldicl(rs: u32, ra: u32, sh: u32, mb: u32, rc: u32) -> u32 { + (30 << 26) + | (rs << 21) + | (ra << 16) + | ((sh & 0x1F) << 11) + | ((mb & 0x1F) << 6) + | (((mb >> 5) & 1) << 5) + | (((sh >> 5) & 1) << 1) + | (rc & 1) + } + + #[test] + fn clrldi_zero_extends_low_32_bits() { + // clrldi r3, r4, 32 = rldicl r3, r4, 0, 32, 0 + // After PPCBUG-046 fix: mask must be 0x00000000_FFFFFFFF (mb=32 → mask from bit 32 to 63) + // If mb=32 was decoded as mb=0, the mask would be all-ones and the result would be 0xDEAD_BEEF_CAFE_BABE (no-op) + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + ctx.gpr[4] = 0xDEAD_BEEF_CAFE_BABE_u64; + let raw = encode_rldicl(4, 3, 0, 32, 0); // sh=0, mb=32 + write_instr(&mem, 0x100, raw); + ctx.pc = 0x100; + step(&mut ctx, &mem); + assert_eq!(ctx.gpr[3], 0x0000_0000_CAFE_BABE, "clrldi must zero-extend low 32 bits"); + } + + #[test] + fn rldicl_mb32_leaves_low_32_clean() { + // Same as above but verify upper 32 are zeroed + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + ctx.gpr[5] = 0xFFFF_FFFF_1234_5678_u64; + let raw = encode_rldicl(5, 6, 0, 32, 0); + write_instr(&mem, 0x100, raw); + ctx.pc = 0x100; + step(&mut ctx, &mem); + assert_eq!(ctx.gpr[6], 0x0000_0000_1234_5678_u64); + } + + // ===== PPCBUG-275/276/562: vc_rc_bit fix for VC-form vcmpequb ===== + + /// VC-form: opcode=4 (VMX), vD at 6-10, vA at 11-15, vB at 16-20, Rc at PPC bit 21 = host bit 10, XO=6. + /// vcmpequb.: (4<<26)|(vD<<21)|(vA<<16)|(vB<<11)|(1<<10)|6 + fn encode_vcmpequb_dot(vd: u32, va: u32, vb: u32) -> u32 { + (4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | (1 << 10) | 6 + } + /// vcmpequb (no dot form): same but Rc=0 + fn encode_vcmpequb(vd: u32, va: u32, vb: u32) -> u32 { + (4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | 6 + } + + #[test] + fn vcmpequb_dot_all_true_sets_cr6_lt() { + // All bytes equal → all lanes 0xFF → CR6.LT=1 (all-true), CR6.EQ=0 + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]); + ctx.vr[1] = v; + ctx.vr[2] = v; + write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2)); + ctx.pc = 0x100; + step(&mut ctx, &mem); + assert!(ctx.cr[6].lt, "all-true: CR6.LT must be 1"); + assert!(!ctx.cr[6].eq, "all-true: CR6.EQ must be 0"); + } + + #[test] + fn vcmpequb_no_dot_does_not_update_cr6() { + // Without dot form, CR6 must be unchanged + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + ctx.cr[6] = crate::context::CrField { lt: true, gt: false, eq: true, so: false }; + let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]); + ctx.vr[1] = v; + ctx.vr[2] = v; + write_instr(&mem, 0x100, encode_vcmpequb(0, 1, 2)); + ctx.pc = 0x100; + step(&mut ctx, &mem); + // CR6 unchanged: no dot form + assert!(ctx.cr[6].lt && ctx.cr[6].eq, "CR6 must be unchanged without dot"); + } + + #[test] + fn vcmpequb_dot_all_false_sets_cr6_eq() { + // No bytes equal → all lanes 0x00 → CR6.LT=0, CR6.EQ=1 (all-false) + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + ctx.vr[1] = xenia_types::Vec128::from_bytes([0xAAu8; 16]); + ctx.vr[2] = xenia_types::Vec128::from_bytes([0xBBu8; 16]); + write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2)); + ctx.pc = 0x100; + step(&mut ctx, &mem); + assert!(!ctx.cr[6].lt, "all-false: CR6.LT must be 0"); + assert!(ctx.cr[6].eq, "all-false: CR6.EQ must be 1"); + } + + // ---- PPCBUG-363 + PPCBUG-369: vpkd3d128 post-pack permutation ---- + // + // vpkd3d128 VD, VB, type, pack, shift: the low 2 bits of the IMM field + // select how the packed scalar/vector is merged back into the previous VD. + // pack=0 → identity (store out directly); pack=1 → 32-bit merge by shift; + // pack=2,3 → 64-bit merge by shift. + // Canary source: ppc_emit_altivec.cc:2126-2188. + // + // For vpkd3d128, PPC bits 21-22 are always 1 (key2 discriminant), so + // vd128 is always in range [96, 127] for vd_lo in [0, 31]. + + fn encode_vpkd3d128(vd_lo: u32, vb_lo: u32, imm: u32, z: u32) -> u32 { + // op6=6 (all VMX128 compute ops); VD[4:0] at host 25-21; IMM at host 20-16; + // VB[4:0] at host 15-11; host bits 10,9 = 1,1 (PPC bits 21,22, key2=0b110); + // z (2-bit) at host 7-6 (PPC bits 24-25); host bit 4 = 1 (PPC bit 27, key2 low=0b01). + // decode_op6 key2 = (bits21-23<<4)|(bits26-27) = (0b110<<4)|0b01 = 0b1100001. + (6u32 << 26) | (vd_lo << 21) | (imm << 16) | (vb_lo << 11) + | (1 << 10) | (1 << 9) | (z << 6) | (1 << 4) + } + + #[test] + fn vpkd3d128_pack0_legacy_unchanged() { + // pack=0 → identity: result = out (packed value), no blend with prev vd. + // type=0 (D3dColor), pack=0 → IMM=0; z=0 (don't care for pack=0). + // vd=96 (vd_lo=0 | bits21=1,22=1→+96). + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + // vb=1: R=1.0, G=0, B=0, A=0 → D3dColor packs to word (0<<24)|(255<<16)|(0<<8)|0 = 0x00FF0000 + ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); + // prev vd=96: sentinel values that should NOT appear in result + ctx.vr[96] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD); + write_instr(&mem, 0, encode_vpkd3d128(0, 1, 0, 0)); + ctx.pc = 0; + step(&mut ctx, &mem); + let r = ctx.vr[96].as_u32x4(); + // out = [0, 0, 0, 0x00FF_0000]; pack=0 → result = out + assert_eq!(r[0], 0x0000_0000, "pack=0: lane 0 must be out[0]=0"); + assert_eq!(r[1], 0x0000_0000, "pack=0: lane 1 must be out[1]=0"); + assert_eq!(r[2], 0x0000_0000, "pack=0: lane 2 must be out[2]=0"); + assert_eq!(r[3], 0x00FF_0000, "pack=0: lane 3 must be packed D3dColor"); + } + + #[test] + fn vpkd3d128_pack1_shift0_d3d_vertex_pack() { + // pack=1, shift=0 (VPACK_32): out[3] placed at lane 3; prev[0..2] preserved. + // MakePermuteMask(0,0, 0,1, 0,2, 1,3) → [prev[0], prev[1], prev[2], out[3]] + // IMM = (type=0 D3dColor << 2) | pack=1 = 1; z=0. + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000 + ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444); + write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 0)); + ctx.pc = 0; + step(&mut ctx, &mem); + let r = ctx.vr[96].as_u32x4(); + assert_eq!(r[0], 0x1111_1111, "pack=1 shift=0: lane 0 from prev"); + assert_eq!(r[1], 0x2222_2222, "pack=1 shift=0: lane 1 from prev"); + assert_eq!(r[2], 0x3333_3333, "pack=1 shift=0: lane 2 from prev"); + assert_eq!(r[3], 0x00FF_0000, "pack=1 shift=0: lane 3 from out[3]"); + } + + #[test] + fn vpkd3d128_pack1_shift3_puts_out3_at_lane0() { + // pack=1, shift=3 (VPACK_32): out[3] placed at lane 0; prev[1..3] preserved. + // MakePermuteMask(1,3, 0,1, 0,2, 0,3) → [out[3], prev[1], prev[2], prev[3]] + // IMM = 1; z=3. + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000 + ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444); + write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 3)); + ctx.pc = 0; + step(&mut ctx, &mem); + let r = ctx.vr[96].as_u32x4(); + assert_eq!(r[0], 0x00FF_0000, "pack=1 shift=3: lane 0 from out[3]"); + assert_eq!(r[1], 0x2222_2222, "pack=1 shift=3: lane 1 from prev"); + assert_eq!(r[2], 0x3333_3333, "pack=1 shift=3: lane 2 from prev"); + assert_eq!(r[3], 0x4444_4444, "pack=1 shift=3: lane 3 from prev"); + } } diff --git a/crates/xenia-cpu/tests/disasm_goldens.rs b/crates/xenia-cpu/tests/disasm_goldens.rs index 6c39d54..97b3825 100644 --- a/crates/xenia-cpu/tests/disasm_goldens.rs +++ b/crates/xenia-cpu/tests/disasm_goldens.rs @@ -20,7 +20,7 @@ use std::path::PathBuf; use serde::{Deserialize, Serialize}; -use xenia_cpu::decoder::decode; +use xenia_cpu::decoder::{DecodedInstr, decode}; use xenia_cpu::disasm::format; #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] @@ -158,20 +158,20 @@ fn rlwinm(rs: u32, ra: u32, sh: u32, mb: u32, me: u32, rc: u32) -> u32 { } fn rldicl(rs: u32, ra: u32, sh: u32, mb: u32, rc: u32) -> u32 { - // MD-form, op30 xo=0. sh split: bits 16-20 (high 5) + bit 30 (low bit). - // mb split: bits 21-25 (low 5) + bit 26 (high bit). - let sh_hi = (sh >> 1) & 0x1F; - let sh_lo = sh & 1; + // MD-form: sh[4:0] at PPC bits 16-20 (host bits 11-15); sh[5] at PPC bit 30 (host bit 1). + // mb[4:0] at PPC bits 21-25 (host bits 6-10); mb[5] at PPC bit 26 (host bit 5). + let sh_lo = sh & 0x1F; + let sh_hi = (sh >> 5) & 1; let mb_lo = mb & 0x1F; let mb_hi = (mb >> 5) & 1; (30 << 26) | (rs << 21) | (ra << 16) - | (sh_hi << 11) + | (sh_lo << 11) | (mb_lo << 6) | (mb_hi << 5) | (0 << 2) - | (sh_lo << 1) + | (sh_hi << 1) | rc } @@ -529,3 +529,26 @@ fn vmx128_registers() { all.extend_from_slice(&vmx128_4op); assert_or_regen("vmx128_registers.json", &all); } + +#[test] +fn sradi_shift_32_decodes_to_32() { + // sradi rA, rS, 32: sh=32 → sh[4:0]=0, sh[5]=1 + // After PPCBUG-040 fix, sh64() must return 32, not 1. + let instr: DecodedInstr = decode(rldicl(3, 4, 32, 63, 0), 0); + // rldicl with mb=63 is not sradi, but tests sh64() extraction. + assert_eq!(instr.sh64(), 32, "sh64 must return 32 for sh=32 (sh5=1, sh_lo=0)"); +} + +#[test] +fn sh64_shift_1_decodes_correctly() { + // sh=1: sh[4:0]=1, sh[5]=0 → sh64() must return 1 + let instr: DecodedInstr = decode(rldicl(3, 4, 1, 0, 0), 0); + assert_eq!(instr.sh64(), 1, "sh64 must return 1 for sh=1"); +} + +#[test] +fn sh64_shift_63_decodes_correctly() { + // sh=63: sh[4:0]=31=0x1F, sh[5]=1 → sh64() must return 63 + let instr: DecodedInstr = decode(rldicl(3, 4, 63, 0, 0), 0); + assert_eq!(instr.sh64(), 63, "sh64 must return 63 for sh=63"); +} diff --git a/crates/xenia-cpu/tests/golden/extended_mnemonics.json b/crates/xenia-cpu/tests/golden/extended_mnemonics.json index d869109..efb251b 100644 --- a/crates/xenia-cpu/tests/golden/extended_mnemonics.json +++ b/crates/xenia-cpu/tests/golden/extended_mnemonics.json @@ -182,7 +182,7 @@ }, { "label": "srdi r3, r4, 8", - "raw": "0x7883E200", + "raw": "0x7883C202", "addr": "0x82000000", "mnemonic": "rldicl", "operands": "r3, r4, 56, 8", @@ -191,7 +191,7 @@ }, { "label": "rotldi r3, r4, 8", - "raw": "0x78832000", + "raw": "0x78834000", "addr": "0x82000000", "mnemonic": "rldicl", "operands": "r3, r4, 8, 0",