From d51b9346dfc75392118e0bf20a0f17962dc7acac Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Fri, 1 May 2026 21:15:06 +0200 Subject: [PATCH] fix(cpu): PPCBUG-275 276 420 421 422 423 562 600 fix vcmp Rc bit + decode dot forms PPCBUG-562: Add vc_rc_bit() (PPC bit 21) and vx128r_rc_bit() (PPC bit 27) to decoder.rs. The generic rc_bit() reads bit 0 (PPC bit 31); all vcmp XO values are even so bit 0 is always 0, making CR6 permanently dead. PPCBUG-275/276/420/421: Replace rc_bit() with vc_rc_bit() at all 8 pure VC-form vcmp arms (vcmpequb, vcmpequh, vcmpgtub, vcmpgtsb, vcmpgtuh, vcmpgtsh, vcmpgtuw, vcmpgtsw) and with the correct per-form accessor at the 4 combined arms (vcmpeqfp|128, vcmpgefp|128, vcmpgtfp|128, vcmpequw|128) and vcmpbfp|128. PPCBUG-422: VX128_R-form 128-variants in combined arms now use vx128r_rc_bit() instead of vc_rc_bit(). PPCBUG-423/600: Add 5 dot-form key entries to decode_op6 so vcmp*fp128./vcmpequw128. decode as the correct opcode instead of Invalid. Uses a 5-bit key (bits22-24 + bit25 + bit27) for dot-forms to avoid aliasing against the shift/merge group (which sets bit25=1 when bit27=1). Interpreter uses vx128r_rc_bit() to conditionally update CR6. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-cpu/src/decoder.rs | 25 +++++++- crates/xenia-cpu/src/interpreter.rs | 88 ++++++++++++++++++++++++----- 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/crates/xenia-cpu/src/decoder.rs b/crates/xenia-cpu/src/decoder.rs index 80a5a7e..8299447 100644 --- a/crates/xenia-cpu/src/decoder.rs +++ b/crates/xenia-cpu/src/decoder.rs @@ -74,6 +74,11 @@ impl DecodedInstr { /// Rc bit (bit 31) - record CR0 #[inline] pub fn rc_bit(&self) -> bool { self.raw & 1 != 0 } + /// Rc for VC-form vector compare instructions — PPC bit 21 = host bit 10. + #[inline] pub fn vc_rc_bit(&self) -> bool { (self.raw >> 10) & 1 != 0 } + /// Rc for VX128_R-form vector compare instructions — PPC bit 27 = host bit 4. + #[inline] pub fn vx128r_rc_bit(&self) -> bool { (self.raw >> 4) & 1 != 0 } + /// OE bit (bit 21) - overflow enable #[inline] pub fn oe(&self) -> bool { extract_bits(self.raw, 21, 21) != 0 } @@ -642,9 +647,13 @@ fn decode_op6(code: u32) -> PpcOpcode { _ => {} } - // VMX128 compare - let key4 = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27); - match key4 { + // VMX128 compare (non-dot and dot forms). + // Non-dot: bit 27 = 0. Dot: bit 27 = 1, but bit 25 must also be 0 to + // distinguish from the shift/merge group (which has bit 25 = 1 when bit 27 = 1). + // key4_nd uses bits 22-24 + bit 27 (same as original, covers non-dot). + // key4_dt uses bits 22-24 + bit 25 + bit 27 (narrower, covers dot-only). + let key4_nd = (extract_bits(code, 22, 24) << 3) | extract_bits(code, 27, 27); + match key4_nd { 0b000000 => return PpcOpcode::vcmpeqfp128, 0b001000 => return PpcOpcode::vcmpgefp128, 0b010000 => return PpcOpcode::vcmpgtfp128, @@ -652,6 +661,16 @@ fn decode_op6(code: u32) -> PpcOpcode { 0b100000 => return PpcOpcode::vcmpequw128, _ => {} } + // Dot forms: bit 27 = 1, bit 25 = 0 (key = bits22-24 + bit25 + bit27, low 3 bits) + let key4_dt = (extract_bits(code, 22, 24) << 2) | (extract_bits(code, 25, 25) << 1) | extract_bits(code, 27, 27); + match key4_dt { + 0b00001 => return PpcOpcode::vcmpeqfp128, // bits22-24=000, bit25=0, bit27=1 + 0b00101 => return PpcOpcode::vcmpgefp128, // bits22-24=001, bit25=0, bit27=1 + 0b01001 => return PpcOpcode::vcmpgtfp128, // bits22-24=010, bit25=0, bit27=1 + 0b01101 => return PpcOpcode::vcmpbfp128, // bits22-24=011, bit25=0, bit27=1 + 0b10001 => return PpcOpcode::vcmpequw128, // bits22-24=100, bit25=0, bit27=1 + _ => {} + } // VMX128 shift/merge let key5 = (extract_bits(code, 22, 25) << 2) | extract_bits(code, 27, 27); diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 90177f2..65c6a9d 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -2036,7 +2036,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + let rc = if matches!(instr.opcode, PpcOpcode::vcmpeqfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; + if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } PpcOpcode::vcmpgefp | PpcOpcode::vcmpgefp128 => { @@ -2046,7 +2047,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] >= b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + let rc = if matches!(instr.opcode, PpcOpcode::vcmpgefp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; + if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } PpcOpcode::vcmpgtfp | PpcOpcode::vcmpgtfp128 => { @@ -2056,7 +2058,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + let rc = if matches!(instr.opcode, PpcOpcode::vcmpgtfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; + if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } @@ -2398,7 +2401,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + let rc = if matches!(instr.opcode, PpcOpcode::vcmpequw128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; + if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } @@ -3528,7 +3532,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u8; 16]; for i in 0..16 { r[i] = if a[i] == b[i] { 0xFF } else { 0 }; } let v = xenia_types::Vec128::from_bytes(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3541,7 +3545,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u16; 8]; for i in 0..8 { r[i] = if a[i] == b[i] { 0xFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u16x8_array(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3554,7 +3558,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u8; 16]; for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; } let v = xenia_types::Vec128::from_bytes(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3567,7 +3571,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u8; 16]; for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; } let v = xenia_types::Vec128::from_bytes(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3580,7 +3584,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u16; 8]; for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u16x8_array(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3593,7 +3597,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u16; 8]; for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u16x8_array(r); - if instr.rc_bit() { + if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } @@ -3606,7 +3610,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); } ctx.vr[instr.rd()] = v; ctx.pc += 4; } @@ -3616,7 +3620,7 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u32x4_array(r); - if instr.rc_bit() { update_cr6_from_vmask(&r, ctx); } + if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); } ctx.vr[instr.rd()] = v; ctx.pc += 4; } @@ -3638,7 +3642,8 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - if a[i].is_nan() || b[i].is_nan() || a[i] < -b[i] { lane |= 0x4000_0000; any_out = true; } r[i] = lane; } - if instr.rc_bit() { + let rc = if is_128 { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; + if rc { ctx.cr[6] = crate::context::CrField { lt: false, gt: false, eq: !any_out, so: false, }; @@ -6296,4 +6301,61 @@ mod tests { step(&mut ctx, &mem); assert_eq!(ctx.gpr[6], 0x0000_0000_1234_5678_u64); } + + // ===== PPCBUG-275/276/562: vc_rc_bit fix for VC-form vcmpequb ===== + + /// VC-form: opcode=4 (VMX), vD at 6-10, vA at 11-15, vB at 16-20, Rc at PPC bit 21 = host bit 10, XO=6. + /// vcmpequb.: (4<<26)|(vD<<21)|(vA<<16)|(vB<<11)|(1<<10)|6 + fn encode_vcmpequb_dot(vd: u32, va: u32, vb: u32) -> u32 { + (4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | (1 << 10) | 6 + } + /// vcmpequb (no dot form): same but Rc=0 + fn encode_vcmpequb(vd: u32, va: u32, vb: u32) -> u32 { + (4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | 6 + } + + #[test] + fn vcmpequb_dot_all_true_sets_cr6_lt() { + // All bytes equal → all lanes 0xFF → CR6.LT=1 (all-true), CR6.EQ=0 + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]); + ctx.vr[1] = v; + ctx.vr[2] = v; + write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2)); + ctx.pc = 0x100; + step(&mut ctx, &mem); + assert!(ctx.cr[6].lt, "all-true: CR6.LT must be 1"); + assert!(!ctx.cr[6].eq, "all-true: CR6.EQ must be 0"); + } + + #[test] + fn vcmpequb_no_dot_does_not_update_cr6() { + // Without dot form, CR6 must be unchanged + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + ctx.cr[6] = crate::context::CrField { lt: true, gt: false, eq: true, so: false }; + let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]); + ctx.vr[1] = v; + ctx.vr[2] = v; + write_instr(&mem, 0x100, encode_vcmpequb(0, 1, 2)); + ctx.pc = 0x100; + step(&mut ctx, &mem); + // CR6 unchanged: no dot form + assert!(ctx.cr[6].lt && ctx.cr[6].eq, "CR6 must be unchanged without dot"); + } + + #[test] + fn vcmpequb_dot_all_false_sets_cr6_eq() { + // No bytes equal → all lanes 0x00 → CR6.LT=0, CR6.EQ=1 (all-false) + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + ctx.vr[1] = xenia_types::Vec128::from_bytes([0xAAu8; 16]); + ctx.vr[2] = xenia_types::Vec128::from_bytes([0xBBu8; 16]); + write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2)); + ctx.pc = 0x100; + step(&mut ctx, &mem); + assert!(!ctx.cr[6].lt, "all-false: CR6.LT must be 0"); + assert!(ctx.cr[6].eq, "all-false: CR6.EQ must be 1"); + } }