//! PPC interpreter - executes instructions one at a time. //! This is the core execution engine. Every instruction is observable //! by the debugger (pre_step/post_step hooks on every cycle). use crate::context::PpcContext; use crate::decoder::{decode, DecodedInstr}; use crate::fpscr; use crate::opcode::PpcOpcode; use crate::overflow; use crate::trap; use crate::vmx; use xenia_memory::MemoryAccess; /// Xenon reservation granule: one L2 cache line (128 bytes). /// `reserved_line = ea & !RESERVATION_MASK` in [context::PpcContext]. pub const RESERVATION_MASK: u32 = 0x7F; /// Result of executing a single instruction. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum StepResult { /// Normal execution, advance to next instruction. Continue, /// Hit a system call (sc instruction). Kernel should handle. SystemCall, /// Hit an unimplemented opcode. Unimplemented(PpcOpcode), /// Hit a trap instruction. Trap, /// Execution halted (by debugger or error). Halted, } /// Execute a single PPC instruction. pub fn step(ctx: &mut PpcContext, mem: &dyn MemoryAccess) -> StepResult { let raw = mem.read_u32(ctx.pc); let instr = decode(raw, ctx.pc); let result = execute(ctx, mem, &instr); ctx.cycle_count += 1; ctx.timebase += 1; result } /// Tier-2 perf — same semantics as [`step`], but looks the decoded /// instruction up in a PC-keyed cache first. Misses fill the cache from /// a fresh [`decode`] call; writes to the containing guest page bump /// `page_version` and naturally invalidate the entry. /// /// The cache is shared across all HW threads — PC is thread-independent /// and `DecodeCacheEntry` stays put after fill. `current_page_version` /// is wired through the caller since memory is touched just above anyway /// (the `read_u32` + the version read amortize to one touch of the page /// table). Use `GuestMemory::page_version(pc)` to source it. pub fn step_cached( ctx: &mut PpcContext, mem: &dyn MemoryAccess, cache: &mut crate::decoder::DecodeCache, current_page_version: u64, ) -> StepResult { let raw = mem.read_u32(ctx.pc); let instr = cache.lookup(ctx.pc, raw, current_page_version); let result = execute(ctx, mem, &instr); ctx.cycle_count += 1; ctx.timebase += 1; result } /// Tier-4 perf — execute every instruction in a pre-decoded /// [`crate::block_cache::DecodedBlock`], bumping `cycle_count` and /// `timebase` once per executed instruction. Bails out as soon as a /// non-`Continue` step result fires (system call, trap, halt, or /// unimplemented opcode), or when an instruction unexpectedly changes /// the PC mid-block (defensive — only the terminator at the tail of /// the block is allowed to do that). /// /// Caller (in `xenia-app/src/main.rs`) is responsible for choosing this /// path only when **no per-instruction observation is requested** — /// i.e., `Debugger::wants_hooks() == false` and no `--trace-*` flag is /// active. Once those gates flip, the caller falls back to /// [`step_cached`] so every PC remains observable. pub fn step_block( ctx: &mut PpcContext, mem: &dyn MemoryAccess, block: &crate::block_cache::DecodedBlock, ) -> StepResult { let mut result = StepResult::Continue; for instr in &block.instrs { let expected_next = instr.addr.wrapping_add(4); result = execute(ctx, mem, instr); ctx.cycle_count += 1; ctx.timebase += 1; if !matches!(result, StepResult::Continue) { return result; } // PC discontinuity within a block. By construction only the // terminator (last instruction) can branch — and when it does, // we want to stop here, not continue executing past it. if ctx.pc != expected_next { break; } } result } /// Execute a decoded instruction, updating context and memory. fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -> StepResult { match instr.opcode { // ===== ALU: Immediate ===== PpcOpcode::addi => { // PPCBUG-001: 32-bit ABI. `li rT, -1` (= addi rT, r0, -1) must produce // 0x00000000_FFFFFFFF, not 0xFFFFFFFF_FFFFFFFF (sign-extended simm16). let ra_val = if instr.ra() == 0 { 0 } else { ctx.gpr[instr.ra()] }; ctx.gpr[instr.rd()] = ra_val.wrapping_add(instr.simm16() as i64 as u64) as u32 as u64; ctx.pc += 4; } PpcOpcode::addis => { // Xbox 360 user mode is 32-bit ABI (MSR.SF=0), so addis must // produce a value whose upper 32 bits don't pollute downstream // 64-bit arithmetic. The PPC ISA in 64-bit mode sign-extends // simm16 before the shift, producing 0xFFFFFFFF_xxxx0000 for // negative simm16 (high bit set). When this value flows into // a 64-bit subfc against a zero-extended lwz value, the unsigned // 64-bit comparison yields wrong CA. Truncate to 32 bits to // simulate 32-bit ABI behavior. let ra_val = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let result = ra_val.wrapping_add((instr.simm16() as i64 as u64) << 16); ctx.gpr[instr.rd()] = result as u32 as u64; ctx.pc += 4; } PpcOpcode::addic => { // PPCBUG-002: 32-bit ABI. CA must be from a 32-bit unsigned compare; // canary's `AddDidCarry` truncates both operands to int32 first. let ra32 = ctx.gpr[instr.ra()] as u32; let imm32 = instr.simm16() as i32 as u32; let result32 = ra32.wrapping_add(imm32); ctx.xer_ca = if result32 < ra32 { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; ctx.pc += 4; } PpcOpcode::addicx => { // PPCBUG-003: same fix as addic plus CR0 i32 view. let ra32 = ctx.gpr[instr.ra()] as u32; let imm32 = instr.simm16() as i32 as u32; let result32 = ra32.wrapping_add(imm32); ctx.xer_ca = if result32 < ra32 { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; ctx.update_cr_signed(0, result32 as i32 as i64); ctx.pc += 4; } PpcOpcode::subficx => { // PPCBUG-005: 32-bit ABI. Sign-extended imm has bits 32-63 set for // negative SIMM, poisoning the writeback. Canary uses 32-bit form. let ra32 = ctx.gpr[instr.ra()] as u32; let imm32 = instr.simm16() as i32 as u32; let result32 = imm32.wrapping_sub(ra32); ctx.xer_ca = if imm32 >= ra32 { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; ctx.pc += 4; } PpcOpcode::mulli => { // PPCBUG-004: 32-bit ABI. Read RA as i32 (low 32, sign-extended for // multiply), product fits in 32 bits per ISA (overflow wraps). let ra = ctx.gpr[instr.ra()] as i32 as i64; let imm = instr.simm16() as i64; ctx.gpr[instr.rd()] = (ra.wrapping_mul(imm) as u32) as u64; ctx.pc += 4; } // ===== ALU: Register ===== PpcOpcode::addx => { // PPCBUG-012+020: 32-bit ABI writeback truncation + CR0 i32 view. let ra32 = ctx.gpr[instr.ra()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; let result32 = ra32.wrapping_add(rb32); ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128); overflow::apply(ctx, true_sum != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::addcx => { // PPCBUG-013+020: 32-bit truncation; CA from u32 unsigned compare. let ra32 = ctx.gpr[instr.ra()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; let result32 = ra32.wrapping_add(rb32); ctx.xer_ca = if result32 < ra32 { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128); overflow::apply(ctx, true_sum != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::addex => { // PPCBUG-014+020: 32-bit truncation; CA from u32 unsigned compare. let ra32 = ctx.gpr[instr.ra()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; let ca = ctx.xer_ca as u32; let result32 = ra32.wrapping_add(rb32).wrapping_add(ca); ctx.xer_ca = if result32 < ra32 || (ca != 0 && result32 == ra32) { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128) + (ca as i128); overflow::apply(ctx, true_sum != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::addzex => { // PPCBUG-015+020: 32-bit truncation. let ra32 = ctx.gpr[instr.ra()] as u32; let ca = ctx.xer_ca as u32; let result32 = ra32.wrapping_add(ca); ctx.xer_ca = if result32 < ra32 { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_sum = (ra32 as i32 as i128) + (ca as i128); overflow::apply(ctx, true_sum != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::addmex => { // PPCBUG-016+020: 32-bit truncation. RT = RA + CA - 1. let ra32 = ctx.gpr[instr.ra()] as u32; let ca = ctx.xer_ca as u32; let result32 = ra32.wrapping_add(ca).wrapping_sub(1); ctx.xer_ca = if ra32 != 0 || ca != 0 { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_sum = (ra32 as i32 as i128) + (ca as i128) - 1; overflow::apply(ctx, true_sum != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::subfx => { // PPCBUG-017+020: 32-bit truncation. let ra32 = ctx.gpr[instr.ra()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; let result32 = rb32.wrapping_sub(ra32); ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128); overflow::apply(ctx, true_diff != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::subfcx => { // PPCBUG-007: 32-bit ABI. The `rb >= ra` u64 unsigned compare is // exactly the shape that broke addis. Defensive 32-bit truncation // is required for correct CA even after upstream cleanup. let ra32 = ctx.gpr[instr.ra()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; let result32 = rb32.wrapping_sub(ra32); ctx.xer_ca = if rb32 >= ra32 { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128); overflow::apply(ctx, true_diff != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::subfex => { // PPCBUG-008: 32-bit ABI. Compute in u32 space — `!ra` on u64 always // pollutes the upper 32 bits, making this an active poisoner. let ra32 = ctx.gpr[instr.ra()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; let ca = ctx.xer_ca as u32; let result32 = (!ra32).wrapping_add(rb32).wrapping_add(ca); ctx.xer_ca = if rb32 > ra32 || (rb32 == ra32 && ca != 0) { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { // RT <- !RA + RB + CA == RB - RA - 1 + CA (32-bit semantics). let true_sum = (rb32 as i32 as i128) - (ra32 as i32 as i128) - 1 + (ca as i128); overflow::apply(ctx, true_sum != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::subfzex => { // PPCBUG-018: same active-poisoning shape as subfex; operate in u32. let ra32 = ctx.gpr[instr.ra()] as u32; let ca = ctx.xer_ca as u32; let result32 = (!ra32).wrapping_add(ca); // RT <- !RA + CA (no -1 term). 32-bit carry-out only when // !ra32 = u32::MAX (i.e. ra32 = 0) AND ca = 1. ctx.xer_ca = if ra32 == 0 && ca != 0 { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_sum = -(ra32 as i32 as i128) - 1 + (ca as i128); overflow::apply(ctx, true_sum != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::subfmex => { // PPCBUG-019: also fixes the always-true CA edge — `!ra` on u64 // is non-zero when ra32==0xFFFFFFFF and ca==0, so CA was stuck at 1. let ra32 = ctx.gpr[instr.ra()] as u32; let ca = ctx.xer_ca as u32; let result32 = (!ra32).wrapping_add(ca).wrapping_sub(1); ctx.xer_ca = if (!ra32) != 0 || ca != 0 { 1 } else { 0 }; ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { let true_sum = -(ra32 as i32 as i128) - 2 + (ca as i128); overflow::apply(ctx, true_sum != (result32 as i32) as i128); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::negx => { // PPCBUG-006: 32-bit ABI. `(!ra).wrapping_add(1)` on u64 always // sets upper 32 bits — every neg poisoned the GPR. neg_ov also // checks at 64-bit INT_MIN; should be 32-bit INT_MIN. let ra32 = ctx.gpr[instr.ra()] as u32; let result32 = (!ra32).wrapping_add(1); ctx.gpr[instr.rd()] = result32 as u64; if instr.oe() { overflow::apply(ctx, ra32 == 0x8000_0000); } if instr.rc_bit() { ctx.update_cr_signed(0, result32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::mullwx => { // PPCBUG-009: 32-bit ABI. Truncate product to u32 — overflow detection // (mullw_ov) still uses the full i64 product to catch the overflow. let ra = ctx.gpr[instr.ra()] as i32 as i64; let rb = ctx.gpr[instr.rb()] as i32 as i64; let product = ra.wrapping_mul(rb); ctx.gpr[instr.rd()] = product as u32 as u64; if instr.oe() { overflow::apply(ctx, overflow::mullw_ov(product)); } if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::mulhwx => { // PPCBUG-020: 32-bit ABI CR0 view. let ra = ctx.gpr[instr.ra()] as i32 as i64; let rb = ctx.gpr[instr.rb()] as i32 as i64; let result = ra.wrapping_mul(rb); ctx.gpr[instr.rd()] = ((result >> 32) as u32) as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::mulhwux => { // PPCBUG-020: 32-bit ABI CR0 view. let ra = ctx.gpr[instr.ra()] as u32 as u64; let rb = ctx.gpr[instr.rb()] as u32 as u64; let result = ra.wrapping_mul(rb); ctx.gpr[instr.rd()] = (result >> 32) & 0xFFFF_FFFF; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::divwx => { // PPCBUG-010+011 coupled: 32-bit ABI. Quotient zero-extended to u64 // (canary explicitly uses ZeroExtend(v, INT64_TYPE)). CR0 view via i32. let ra = ctx.gpr[instr.ra()] as i32; let rb = ctx.gpr[instr.rb()] as i32; let ov = overflow::divw_ov_signed(ra, rb); if ov { ctx.gpr[instr.rd()] = 0; } else { ctx.gpr[instr.rd()] = (ra / rb) as u32 as u64; } if instr.oe() { overflow::apply(ctx, ov); } if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::divwux => { // PPCBUG-020: 32-bit ABI CR0 view. let ra = ctx.gpr[instr.ra()] as u32; let rb = ctx.gpr[instr.rb()] as u32; let ov = overflow::divw_ov_unsigned(rb); if ov { ctx.gpr[instr.rd()] = 0; } else { ctx.gpr[instr.rd()] = (ra / rb) as u64; } if instr.oe() { overflow::apply(ctx, ov); } if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64); } ctx.pc += 4; } // ===== 64-bit Arithmetic ===== PpcOpcode::mulldx => { let ra = ctx.gpr[instr.ra()] as i64; let rb = ctx.gpr[instr.rb()] as i64; ctx.gpr[instr.rd()] = ra.wrapping_mul(rb) as u64; if instr.oe() { overflow::apply(ctx, overflow::mulld_ov(ra, rb)); } if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64); } ctx.pc += 4; } PpcOpcode::mulhdx => { let ra = ctx.gpr[instr.ra()] as i64 as i128; let rb = ctx.gpr[instr.rb()] as i64 as i128; ctx.gpr[instr.rd()] = (ra.wrapping_mul(rb) >> 64) as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64); } ctx.pc += 4; } PpcOpcode::mulhdux => { let ra = ctx.gpr[instr.ra()] as u128; let rb = ctx.gpr[instr.rb()] as u128; ctx.gpr[instr.rd()] = (ra.wrapping_mul(rb) >> 64) as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64); } ctx.pc += 4; } PpcOpcode::divdx => { let ra = ctx.gpr[instr.ra()] as i64; let rb = ctx.gpr[instr.rb()] as i64; let ov = overflow::divd_ov_signed(ra, rb); if ov { ctx.gpr[instr.rd()] = 0; } else { ctx.gpr[instr.rd()] = (ra / rb) as u64; } if instr.oe() { overflow::apply(ctx, ov); } if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64); } ctx.pc += 4; } PpcOpcode::divdux => { let ra = ctx.gpr[instr.ra()]; let rb = ctx.gpr[instr.rb()]; let ov = overflow::divd_ov_unsigned(rb); if ov { ctx.gpr[instr.rd()] = 0; } else { ctx.gpr[instr.rd()] = ra / rb; } if instr.oe() { overflow::apply(ctx, ov); } if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64); } ctx.pc += 4; } // ===== Logical ===== PpcOpcode::andix => { // PPCBUG-020: 32-bit ABI CR0 view. ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & (instr.uimm16() as u64); ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); ctx.pc += 4; } PpcOpcode::andisx => { // PPCBUG-023: 32-bit ABI CR0 view. `andis. rA, rS, 0x8000` to test // sign bit of a 32-bit word now correctly classifies bit 31 = 1 as LT. ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & ((instr.uimm16() as u64) << 16); ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); ctx.pc += 4; } PpcOpcode::ori => { ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | (instr.uimm16() as u64); ctx.pc += 4; } PpcOpcode::oris => { ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | ((instr.uimm16() as u64) << 16); ctx.pc += 4; } PpcOpcode::xori => { ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] ^ (instr.uimm16() as u64); ctx.pc += 4; } PpcOpcode::xoris => { ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] ^ ((instr.uimm16() as u64) << 16); ctx.pc += 4; } PpcOpcode::andx => { // PPCBUG-032+020: 32-bit ABI CR0 view (latent under clean inputs). ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & ctx.gpr[instr.rb()]; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::andcx => { // PPCBUG-033: !rb on u64 flips upper 32 bits — active poisoning. let rs32 = ctx.gpr[instr.rs()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; ctx.gpr[instr.ra()] = (rs32 & !rb32) as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::orx => { // PPCBUG-032+020: 32-bit ABI CR0 view. ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | ctx.gpr[instr.rb()]; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::orcx => { // PPCBUG-028: same shape as andcx — operate in u32. let rs32 = ctx.gpr[instr.rs()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; ctx.gpr[instr.ra()] = (rs32 | !rb32) as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::xorx => { // PPCBUG-032+020: 32-bit ABI CR0 view. ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] ^ ctx.gpr[instr.rb()]; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::norx => { // PPCBUG-029: `not` simplified mnemonic — every `not` poisoned the GPR. let rs32 = ctx.gpr[instr.rs()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; ctx.gpr[instr.ra()] = (!(rs32 | rb32)) as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::nandx => { // PPCBUG-030: same shape — operate in u32. let rs32 = ctx.gpr[instr.rs()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; ctx.gpr[instr.ra()] = (!(rs32 & rb32)) as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::eqvx => { // PPCBUG-031: `eqv rA, rA, rA` is a common "set to all-ones" idiom; // 64-bit form gave 0xFFFFFFFFFFFFFFFF but 32-bit ABI expects 0x00000000FFFFFFFF. let rs32 = ctx.gpr[instr.rs()] as u32; let rb32 = ctx.gpr[instr.rb()] as u32; ctx.gpr[instr.ra()] = (!(rs32 ^ rb32)) as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } // ===== Extend/Count ===== PpcOpcode::extsbx => { // PPCBUG-034: 32-bit ABI — sign-extend byte to i32, write zero-extended. // PPCBUG-036 (coupled): CR0 must view result as i32, not i64. ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i8 as i32 as u32 as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::extshx => { // PPCBUG-035: same shape as extsbx for halfwords. // PPCBUG-037 (coupled): CR0 i32 view. ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i16 as i32 as u32 as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::extswx => { ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i32 as i64 as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } PpcOpcode::cntlzwx => { // Result is 0..=32, fits in u32 with bit 31 always zero, so the // CR0 view is benign — use the catch-all 32-bit form for consistency. ctx.gpr[instr.ra()] = (ctx.gpr[instr.rs()] as u32).leading_zeros() as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::cntlzdx => { ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()].leading_zeros() as u64; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } // ===== Shift ===== PpcOpcode::slwx => { // PPCBUG-044: 32-bit ABI CR0 view. A result with bit 31 set // (e.g. 0x80000000) is negative in i32 view but positive in i64. let sh = ctx.gpr[instr.rb()] as u32; ctx.gpr[instr.ra()] = if sh < 32 { ((ctx.gpr[instr.rs()] as u32) << sh) as u64 } else { 0 }; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::srwx => { // PPCBUG-044: 32-bit ABI CR0 view (zero-extended right shift can never // have bit 31 set, but use the canonical form for consistency). let sh = ctx.gpr[instr.rb()] as u32; ctx.gpr[instr.ra()] = if sh < 32 { ((ctx.gpr[instr.rs()] as u32) >> sh) as u64 } else { 0 }; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::srawx => { // PPCBUG-041+043 coupled: 32-bit ABI writeback truncation + CR0 i32. // CA logic is independently correct (uses u32 shifted-out test). let rs = ctx.gpr[instr.rs()] as i32; let sh = ctx.gpr[instr.rb()] as u32 & 0x3F; if sh == 0 { ctx.gpr[instr.ra()] = rs as u32 as u64; ctx.xer_ca = 0; } else if sh < 32 { let result = rs >> sh; ctx.xer_ca = if rs < 0 && (rs as u32) << (32 - sh) != 0 { 1 } else { 0 }; ctx.gpr[instr.ra()] = result as u32 as u64; } else { ctx.gpr[instr.ra()] = if rs < 0 { 0xFFFF_FFFFu64 } else { 0 }; ctx.xer_ca = if rs < 0 { 1 } else { 0 }; } if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::srawix => { // PPCBUG-042+043 coupled: same shape as srawx for the sh-immediate form. let rs = ctx.gpr[instr.rs()] as i32; let sh = instr.sh(); if sh == 0 { ctx.gpr[instr.ra()] = rs as u32 as u64; ctx.xer_ca = 0; } else { let result = rs >> sh; ctx.xer_ca = if rs < 0 && (rs as u32) << (32 - sh) != 0 { 1 } else { 0 }; ctx.gpr[instr.ra()] = result as u32 as u64; } if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::sldx => { let sh = ctx.gpr[instr.rb()] & 0x7F; ctx.gpr[instr.ra()] = if sh < 64 { ctx.gpr[instr.rs()] << sh } else { 0 }; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } PpcOpcode::srdx => { let sh = ctx.gpr[instr.rb()] & 0x7F; ctx.gpr[instr.ra()] = if sh < 64 { ctx.gpr[instr.rs()] >> sh } else { 0 }; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } PpcOpcode::sradx => { let rs = ctx.gpr[instr.rs()] as i64; let sh = ctx.gpr[instr.rb()] & 0x7F; if sh == 0 { ctx.gpr[instr.ra()] = rs as u64; ctx.xer_ca = 0; } else if sh < 64 { let result = rs >> sh; ctx.xer_ca = if rs < 0 && (rs as u64) << (64 - sh) != 0 { 1 } else { 0 }; ctx.gpr[instr.ra()] = result as u64; } else { ctx.gpr[instr.ra()] = if rs < 0 { u64::MAX } else { 0 }; ctx.xer_ca = if rs < 0 { 1 } else { 0 }; } if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } PpcOpcode::sradix => { let rs = ctx.gpr[instr.rs()] as i64; let sh = instr.sh64(); if sh == 0 { ctx.gpr[instr.ra()] = rs as u64; ctx.xer_ca = 0; } else { let result = rs >> sh; ctx.xer_ca = if rs < 0 && (rs as u64) << (64 - sh) != 0 { 1 } else { 0 }; ctx.gpr[instr.ra()] = result as u64; } if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } // ===== Rotate ===== PpcOpcode::rlwinmx => { let rs = ctx.gpr[instr.rs()] as u32; let sh = instr.sh(); let mb = instr.mb(); let me = instr.me(); let rotated = rs.rotate_left(sh); let mask = rlw_mask(mb, me); ctx.gpr[instr.ra()] = (rotated & mask) as u64; // PPCBUG-024: 32-bit ABI CR0 view. if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::rlwimix => { let rs = ctx.gpr[instr.rs()] as u32; let sh = instr.sh(); let mb = instr.mb(); let me = instr.me(); let rotated = rs.rotate_left(sh); let mask = rlw_mask(mb, me); let ra = ctx.gpr[instr.ra()] as u32; ctx.gpr[instr.ra()] = ((rotated & mask) | (ra & !mask)) as u64; // PPCBUG-025: 32-bit ABI CR0 view. if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::rlwnmx => { let rs = ctx.gpr[instr.rs()] as u32; let sh = ctx.gpr[instr.rb()] as u32 & 0x1F; let mb = instr.mb(); let me = instr.me(); let rotated = rs.rotate_left(sh); let mask = rlw_mask(mb, me); ctx.gpr[instr.ra()] = (rotated & mask) as u64; // PPCBUG-026: 32-bit ABI CR0 view. if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); } ctx.pc += 4; } PpcOpcode::rldiclx => { let rs = ctx.gpr[instr.rs()]; let sh = instr.sh64(); let mb = instr.mb_md(); let rotated = rs.rotate_left(sh); let mask = rld_mask_left(mb); ctx.gpr[instr.ra()] = rotated & mask; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } PpcOpcode::rldicrx => { let rs = ctx.gpr[instr.rs()]; let sh = instr.sh64(); let me = instr.mb_md(); let rotated = rs.rotate_left(sh); let mask = rld_mask_right(me); ctx.gpr[instr.ra()] = rotated & mask; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } PpcOpcode::rldicx => { let rs = ctx.gpr[instr.rs()]; let sh = instr.sh64(); let mb = instr.mb_md(); let rotated = rs.rotate_left(sh); let mask = rld_mask_left(mb) & rld_mask_right(63 - sh); ctx.gpr[instr.ra()] = rotated & mask; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } PpcOpcode::rldimix => { let rs = ctx.gpr[instr.rs()]; let sh = instr.sh64(); let mb = instr.mb_md(); let rotated = rs.rotate_left(sh); let mask = rld_mask_left(mb) & rld_mask_right(63 - sh); ctx.gpr[instr.ra()] = (rotated & mask) | (ctx.gpr[instr.ra()] & !mask); if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } PpcOpcode::rldclx => { let rs = ctx.gpr[instr.rs()]; let sh = ctx.gpr[instr.rb()] & 0x3F; let mb = instr.mb_md(); let rotated = rs.rotate_left(sh as u32); let mask = rld_mask_left(mb); ctx.gpr[instr.ra()] = rotated & mask; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } PpcOpcode::rldcrx => { let rs = ctx.gpr[instr.rs()]; let sh = ctx.gpr[instr.rb()] & 0x3F; let me = instr.mb_md(); let rotated = rs.rotate_left(sh as u32); let mask = rld_mask_right(me); ctx.gpr[instr.ra()] = rotated & mask; if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); } ctx.pc += 4; } // ===== Compare ===== PpcOpcode::cmpi => { let bf = instr.crfd(); if instr.l() { // 64-bit compare. Compare directly so boundary i64 values // (e.g. ra=i64::MIN, imm=1) don't mis-sign through a // wrapped subtract. let ra = ctx.gpr[instr.ra()] as i64; let imm = instr.simm16() as i64; ctx.cr[bf] = crate::context::CrField { lt: ra < imm, gt: ra > imm, eq: ra == imm, so: ctx.xer_so != 0, }; } else { let ra = ctx.gpr[instr.ra()] as i32; let imm = instr.simm16() as i32; ctx.cr[bf] = crate::context::CrField { lt: ra < imm, gt: ra > imm, eq: ra == imm, so: ctx.xer_so != 0, }; } ctx.pc += 4; } PpcOpcode::cmpli => { let bf = instr.crfd(); if instr.l() { let ra = ctx.gpr[instr.ra()]; let imm = instr.uimm16() as u64; ctx.update_cr_unsigned(bf, ra, imm); } else { let ra = ctx.gpr[instr.ra()] as u32 as u64; let imm = instr.uimm16() as u64; ctx.update_cr_unsigned(bf, ra, imm); } ctx.pc += 4; } PpcOpcode::cmp => { let bf = instr.crfd(); if instr.l() { let ra = ctx.gpr[instr.ra()] as i64; let rb = ctx.gpr[instr.rb()] as i64; ctx.cr[bf] = crate::context::CrField { lt: ra < rb, gt: ra > rb, eq: ra == rb, so: ctx.xer_so != 0, }; } else { let ra = ctx.gpr[instr.ra()] as i32; let rb = ctx.gpr[instr.rb()] as i32; ctx.cr[bf] = crate::context::CrField { lt: ra < rb, gt: ra > rb, eq: ra == rb, so: ctx.xer_so != 0, }; } ctx.pc += 4; } PpcOpcode::cmpl => { let bf = instr.crfd(); if instr.l() { ctx.update_cr_unsigned(bf, ctx.gpr[instr.ra()], ctx.gpr[instr.rb()]); } else { ctx.update_cr_unsigned(bf, ctx.gpr[instr.ra()] as u32 as u64, ctx.gpr[instr.rb()] as u32 as u64); } ctx.pc += 4; } // ===== Branch ===== PpcOpcode::bx => { let target = if instr.aa() { instr.li() as u32 } else { ctx.pc.wrapping_add(instr.li() as u32) }; if instr.lk() { ctx.lr = (ctx.pc + 4) as u64; } ctx.pc = target; } PpcOpcode::bcx => { let bo = instr.bo(); let bi = instr.bi(); // Decrement CTR if needed if bo & 0b00100 == 0 { ctx.ctr = ctx.ctr.wrapping_sub(1); } let ctr_ok = (bo & 0b00100) != 0 || (((ctx.ctr as u32) != 0) ^ ((bo & 0b00010) != 0)); let cond_ok = (bo & 0b10000) != 0 || (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0)); if ctr_ok && cond_ok { let target = if instr.aa() { instr.bd() as u32 } else { ctx.pc.wrapping_add(instr.bd() as u32) }; if instr.lk() { ctx.lr = (ctx.pc + 4) as u64; } ctx.pc = target; } else { if instr.lk() { ctx.lr = (ctx.pc + 4) as u64; } ctx.pc += 4; } } PpcOpcode::bclrx => { let bo = instr.bo(); let bi = instr.bi(); if bo & 0b00100 == 0 { ctx.ctr = ctx.ctr.wrapping_sub(1); } let ctr_ok = (bo & 0b00100) != 0 || (((ctx.ctr as u32) != 0) ^ ((bo & 0b00010) != 0)); let cond_ok = (bo & 0b10000) != 0 || (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0)); let next_pc = ctx.pc + 4; if ctr_ok && cond_ok { ctx.pc = (ctx.lr as u32) & !3; } else { ctx.pc = next_pc; } if instr.lk() { ctx.lr = next_pc as u64; } } PpcOpcode::bcctrx => { let bo = instr.bo(); let bi = instr.bi(); let cond_ok = (bo & 0b10000) != 0 || (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0)); if cond_ok { let next_pc = ctx.pc + 4; ctx.pc = (ctx.ctr as u32) & !3; if instr.lk() { ctx.lr = next_pc as u64; } } else { if instr.lk() { ctx.lr = (ctx.pc + 4) as u64; } ctx.pc += 4; } } // ===== System call ===== PpcOpcode::sc => { // PPCBUG-064: log non-zero LEV (`sc 2` is the Xbox 360 hypervisor-call // convention; canary dispatches it to a different handler than `sc 0`). // Routing LEV=2 requires a StepResult variant extension; deferred. let lev = (instr.raw >> 5) & 0x7F; if lev != 0 { tracing::warn!( "sc with LEV={} at {:#010x}: dispatched as plain SystemCall (HVcall routing not implemented)", lev, ctx.pc ); } ctx.pc += 4; return StepResult::SystemCall; } // ===== Load instructions ===== PpcOpcode::lwz => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64; ctx.pc += 4; } PpcOpcode::lwzu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::lwzx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64; ctx.pc += 4; } PpcOpcode::lwzux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::lbz => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64; ctx.pc += 4; } PpcOpcode::lbzu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::lbzx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64; ctx.pc += 4; } PpcOpcode::lbzux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::lhz => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64; ctx.pc += 4; } PpcOpcode::lhzu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::lhzx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64; ctx.pc += 4; } PpcOpcode::lha => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64; ctx.pc += 4; } PpcOpcode::lhax => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64; ctx.pc += 4; } PpcOpcode::lhzux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::lhau => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::lhaux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::ld => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u64(ea); ctx.pc += 4; } PpcOpcode::ldx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u64(ea); ctx.pc += 4; } PpcOpcode::lwa => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64; ctx.pc += 4; } PpcOpcode::lwax => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64; ctx.pc += 4; } PpcOpcode::lwaux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::ldu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.ds() as i64 as u64) as u32; ctx.gpr[instr.rd()] = mem.read_u64(ea); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::ldux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.gpr[instr.rd()] = mem.read_u64(ea); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } // FP loads PpcOpcode::lfs => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64; ctx.pc += 4; } PpcOpcode::lfsx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64; ctx.pc += 4; } PpcOpcode::lfd => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; ctx.fpr[instr.rd()] = mem.read_f64(ea); ctx.pc += 4; } PpcOpcode::lfdx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.fpr[instr.rd()] = mem.read_f64(ea); ctx.pc += 4; } PpcOpcode::lfsu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::lfsux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64; ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::lfdu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; ctx.fpr[instr.rd()] = mem.read_f64(ea); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::lfdux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; ctx.fpr[instr.rd()] = mem.read_f64(ea); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } // Reservation (lwarx/stwcx) // // M3.7 — when `ctx.reservation_table` is `Some` and the table is // enabled, route reservations through the inter-thread table so // concurrent host threads can mediate reservation conflicts. // Otherwise (the default in lockstep mode), use the legacy // per-`PpcContext` fields. Both paths leave the per-ctx fields // in a coherent state so a flag flip mid-run doesn't corrupt // outstanding reservations. // // PPCBUG-108: lwarx + stwcx. atomicity is provided by `ReservationTable` // in the M3 multi-HW-thread runtime. The legacy per-ctx fallback (when // `reservation_table` is None or the table is disabled) cannot observe // stores from other host threads — a store by thread B cannot clear // `ctx_A.has_reservation`. This path is only correct in strict lockstep // (single-host-thread) mode. The M3 scheduler MUST enable the table // before spawning a second host thread. See stwcx./stdcx. for the // debug_assert that fires if a non-primary slot takes this path. PpcOpcode::lwarx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; let val = mem.read_u32(ea); ctx.gpr[instr.rd()] = val as u64; ctx.reserved_line = ea & !RESERVATION_MASK; ctx.reserved_val = val as u64; ctx.has_reservation = true; ctx.reservation_width = 4; // PPCBUG-151: word reservation if let Some(t) = &ctx.reservation_table { if t.is_enabled() { ctx.reserved_generation = t.reserve(ea, ctx.hw_id); } } ctx.pc += 4; } // PPCBUG-108: see lwarx comment above. stwcx. legacy path cannot observe // cross-thread reservation invalidations; only safe in lockstep mode. PpcOpcode::stwcx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; let line = ea & !RESERVATION_MASK; let table_route = ctx .reservation_table .as_ref() .filter(|t| t.is_enabled()) .cloned(); // PPCBUG-151: stwcx. requires a word (lwarx) reservation; // a doubleword (ldarx) reservation must not commit here. let width_ok = ctx.reservation_width == 4; let success = if let Some(t) = &table_route { // Table-routed: success iff the slot still holds our // reservation AND the per-ctx flag agrees (the per-ctx // flag would be cleared by an intervening write or // context switch). ctx.has_reservation && width_ok && ctx.reserved_line == line && t.try_commit(ea, ctx.reserved_generation, ctx.hw_id) } else { // Legacy per-ctx path (M2 default / lockstep). // PPCBUG-108: fires on non-primary HW slots under misconfig — // if the table is disabled while workers are active, slots // 1..N will trip this assert, surfacing the misconfiguration // early in debug builds. Note: hw_id==0 (primary slot) taking // this path while other slots run in parallel would NOT be // caught; that case requires the table to be enabled instead. debug_assert!( ctx.hw_id == 0, "PPCBUG-108: legacy per-ctx stwcx. on non-primary HW slot \ (hw_id={}) — ReservationTable must be enabled under --parallel", ctx.hw_id ); ctx.has_reservation && width_ok && ctx.reserved_line == line }; if success { mem.write_u32(ea, ctx.gpr[instr.rs()] as u32); ctx.cr[0] = crate::context::CrField { lt: false, gt: false, eq: true, so: ctx.xer_so != 0, }; } else { ctx.cr[0] = crate::context::CrField { lt: false, gt: false, eq: false, so: ctx.xer_so != 0, }; // Failed stwcx: if we held the reservation in the table // (someone else displaced our gen), release it from the // counter so `has_active_reservers` returns to zero // when no real reserver exists. if let Some(t) = &table_route { t.release(ea, ctx.reserved_generation, ctx.hw_id); } } ctx.has_reservation = false; ctx.reservation_width = 0; // PPCBUG-151: always clear on exit ctx.pc += 4; } // ===== Store instructions ===== PpcOpcode::stw => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u32(ea, ctx.gpr[instr.rs()] as u32); ctx.pc += 4; } PpcOpcode::stwu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u32(ea, ctx.gpr[instr.rs()] as u32); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::stwx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u32(ea, ctx.gpr[instr.rs()] as u32); ctx.pc += 4; } PpcOpcode::stwux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u32(ea, ctx.gpr[instr.rs()] as u32); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::stb => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u8(ea, ctx.gpr[instr.rs()] as u8); ctx.pc += 4; } PpcOpcode::stbu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u8(ea, ctx.gpr[instr.rs()] as u8); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::stbx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u8(ea, ctx.gpr[instr.rs()] as u8); ctx.pc += 4; } PpcOpcode::stbux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u8(ea, ctx.gpr[instr.rs()] as u8); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::sth => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u16(ea, ctx.gpr[instr.rs()] as u16); ctx.pc += 4; } PpcOpcode::sthu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u16(ea, ctx.gpr[instr.rs()] as u16); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::sthx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u16(ea, ctx.gpr[instr.rs()] as u16); ctx.pc += 4; } PpcOpcode::sthux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u16(ea, ctx.gpr[instr.rs()] as u16); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::std => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u64(ea, ctx.gpr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stdx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u64(ea, ctx.gpr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stdu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.ds() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u64(ea, ctx.gpr[instr.rs()]); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::stdux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u64(ea, ctx.gpr[instr.rs()]); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } // FP stores PpcOpcode::stfs => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_f32(ea, ctx.fpr[instr.rs()] as f32); ctx.pc += 4; } PpcOpcode::stfsu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_f32(ea, ctx.fpr[instr.rs()] as f32); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::stfsx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_f32(ea, ctx.fpr[instr.rs()] as f32); ctx.pc += 4; } PpcOpcode::stfsux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_f32(ea, ctx.fpr[instr.rs()] as f32); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::stfd => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_f64(ea, ctx.fpr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stfdu => { let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_f64(ea, ctx.fpr[instr.rs()]); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::stfdx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_f64(ea, ctx.fpr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stfdux => { let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_f64(ea, ctx.fpr[instr.rs()]); ctx.gpr[instr.ra()] = ea as u64; ctx.pc += 4; } PpcOpcode::stfiwx => { // Store FP as integer word: stores low 32 bits of FPR as-is let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u32(ea, ctx.fpr[instr.rs()].to_bits() as u32); ctx.pc += 4; } // String load/store PpcOpcode::lswi => { let mut ea = if instr.ra() == 0 { 0u32 } else { ctx.gpr[instr.ra()] as u32 }; let nb = if instr.rb() == 0 { 32 } else { instr.rb() as u32 }; let mut rd = instr.rd(); let mut bytes_left = nb; while bytes_left > 0 { let mut val = 0u32; for byte_idx in 0..4 { if bytes_left == 0 { break; } let b = mem.read_u8(ea) as u32; val |= b << (24 - byte_idx * 8); ea = ea.wrapping_add(1); bytes_left -= 1; } ctx.gpr[rd] = val as u64; rd = (rd + 1) % 32; } ctx.pc += 4; } PpcOpcode::stswi => { let mut ea = if instr.ra() == 0 { 0u32 } else { ctx.gpr[instr.ra()] as u32 }; let nb = if instr.rb() == 0 { 32 } else { instr.rb() as u32 }; let mut rs = instr.rs(); let mut bytes_left = nb; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { let first_line = ea & !RESERVATION_MASK; let last_line = ea.wrapping_add(nb - 1) & !RESERVATION_MASK; t.invalidate_for_write(first_line); if last_line != first_line { t.invalidate_for_write(last_line); } } } while bytes_left > 0 { let val = ctx.gpr[rs] as u32; for byte_idx in 0..4 { if bytes_left == 0 { break; } mem.write_u8(ea, (val >> (24 - byte_idx * 8)) as u8); ea = ea.wrapping_add(1); bytes_left -= 1; } rs = (rs + 1) % 32; } ctx.pc += 4; } // ===== Special register moves ===== PpcOpcode::mfspr => { let spr = instr.spr(); ctx.gpr[instr.rd()] = match spr { crate::context::spr::XER => ctx.xer() as u64, crate::context::spr::LR => ctx.lr, crate::context::spr::CTR => ctx.ctr, crate::context::spr::DEC => ctx.dec as u64, crate::context::spr::TBL => ctx.timebase & 0xFFFF_FFFF, crate::context::spr::TBU => ctx.timebase >> 32, crate::context::spr::VRSAVE => ctx.vrsave as u64, // Xbox 360 Xenon processor signature (from canary). crate::context::spr::PVR => 0x0071_0800, // Benign SPRs — titles read these but we don't model them. crate::context::spr::SPRG0 | crate::context::spr::SPRG1 | crate::context::spr::SPRG2 | crate::context::spr::SPRG3 | crate::context::spr::HID0 | crate::context::spr::HID1 | crate::context::spr::DAR | crate::context::spr::DSISR | crate::context::spr::PIR => 0, _ => { tracing::warn!("mfspr: unimplemented SPR {}", spr); 0 } }; ctx.pc += 4; } PpcOpcode::mtspr => { let spr = instr.spr(); let val = ctx.gpr[instr.rs()]; match spr { crate::context::spr::XER => ctx.set_xer(val as u32), crate::context::spr::LR => ctx.lr = val, crate::context::spr::CTR => ctx.ctr = val as u32 as u64, crate::context::spr::DEC => ctx.dec = val as u32, crate::context::spr::TBL_WRITE => { ctx.timebase = (ctx.timebase & 0xFFFF_FFFF_0000_0000) | (val & 0xFFFF_FFFF); } crate::context::spr::TBU_WRITE => { ctx.timebase = (ctx.timebase & 0x0000_0000_FFFF_FFFF) | ((val & 0xFFFF_FFFF) << 32); } crate::context::spr::VRSAVE => ctx.vrsave = val as u32, // Benign writes — swallow silently to avoid false Unimplemented // warnings on SPRs that have no observable effect in userspace. crate::context::spr::SPRG0 | crate::context::spr::SPRG1 | crate::context::spr::SPRG2 | crate::context::spr::SPRG3 | crate::context::spr::HID0 | crate::context::spr::HID1 | crate::context::spr::DAR | crate::context::spr::DSISR => {} _ => { tracing::warn!("mtspr: unimplemented SPR {}", spr); } } ctx.pc += 4; } PpcOpcode::mfcr => { ctx.gpr[instr.rd()] = ctx.cr() as u64; ctx.pc += 4; } PpcOpcode::mtcrf => { let crm = instr.crm(); let val = ctx.gpr[instr.rs()] as u32; let old = ctx.cr(); let mut new = old; for i in 0..8u32 { if crm & (1 << (7 - i)) != 0 { let mask = 0xF << (28 - i * 4); new = (new & !mask) | (val & mask); } } ctx.set_cr(new); ctx.pc += 4; } PpcOpcode::mfmsr => { ctx.gpr[instr.rd()] = ctx.msr; ctx.pc += 4; } PpcOpcode::mtmsr | PpcOpcode::mtmsrd => { ctx.msr = ctx.gpr[instr.rs()]; ctx.pc += 4; } PpcOpcode::mftb => { let tbr = instr.spr(); ctx.gpr[instr.rd()] = match tbr { 268 => ctx.timebase & 0xFFFF_FFFF, 269 => ctx.timebase >> 32, _ => 0, }; ctx.pc += 4; } // CR logical PpcOpcode::crand => { cr_logical(ctx, instr, |a, b| a & b); ctx.pc += 4; } PpcOpcode::crandc => { cr_logical(ctx, instr, |a, b| a & !b); ctx.pc += 4; } PpcOpcode::creqv => { cr_logical(ctx, instr, |a, b| !(a ^ b)); ctx.pc += 4; } PpcOpcode::crnand => { cr_logical(ctx, instr, |a, b| !(a & b)); ctx.pc += 4; } PpcOpcode::crnor => { cr_logical(ctx, instr, |a, b| !(a | b)); ctx.pc += 4; } PpcOpcode::cror => { cr_logical(ctx, instr, |a, b| a | b); ctx.pc += 4; } PpcOpcode::crorc => { cr_logical(ctx, instr, |a, b| a | !b); ctx.pc += 4; } PpcOpcode::crxor => { cr_logical(ctx, instr, |a, b| a ^ b); ctx.pc += 4; } PpcOpcode::mcrf => { ctx.cr[instr.crfd()] = ctx.cr[instr.crfs()]; ctx.pc += 4; } // ===== Cache/sync (no-ops in interpreter) ===== PpcOpcode::dcbf | PpcOpcode::dcbi | PpcOpcode::dcbst | PpcOpcode::dcbt | PpcOpcode::dcbtst | PpcOpcode::icbi | PpcOpcode::sync | PpcOpcode::eieio | PpcOpcode::isync => { ctx.pc += 4; } PpcOpcode::dcbz => { // Zero 32 bytes at effective address let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) as u32) & !31; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } for i in 0..8 { mem.write_u32(ea + i * 4, 0); } ctx.pc += 4; } PpcOpcode::dcbz128 => { // Zero 128 bytes let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) as u32) & !127; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } for i in 0..32 { mem.write_u32(ea + i * 4, 0); } ctx.pc += 4; } // ===== Load multiple ===== PpcOpcode::lmw => { let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; ea = ea.wrapping_add(instr.d() as i64 as u64); for r in instr.rd()..32 { ctx.gpr[r] = mem.read_u32(ea as u32) as u64; ea = ea.wrapping_add(4); } ctx.pc += 4; } PpcOpcode::stmw => { let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; ea = ea.wrapping_add(instr.d() as i64 as u64); // PPCBUG-160: stmw can span two cache lines when (32-rs)*4 > one line. // Iterate over every touched line so any reservation on a later line // is also invalidated (same guarantee as single-word stores). if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { let start_ea = ea as u32; let last_ea = start_ea.wrapping_add((32 - instr.rs() as u32) * 4).wrapping_sub(1); let line_size = RESERVATION_MASK + 1; let mut line = start_ea & !RESERVATION_MASK; loop { t.invalidate_for_write(line); if line >= (last_ea & !RESERVATION_MASK) { break; } line = line.wrapping_add(line_size); } } } for r in instr.rs()..32 { mem.write_u32(ea as u32, ctx.gpr[r] as u32); ea = ea.wrapping_add(4); } ctx.pc += 4; } // ===== Trap ===== PpcOpcode::tw | PpcOpcode::twi | PpcOpcode::td | PpcOpcode::tdi => { // PPCBUG-063: save CIA before incrementing so a trap handler reads // the faulting instruction address, not CIA+4. // PPCBUG-065: log the SIMM type code on `twi 31, r0, IMM` (Xbox 360 // typed-trap convention used by the CRT/kernel for C++ exception // class dispatch). The audit notes this is relevant to the Sylpheed // throw investigation; routing the type code via a payload requires // a StepResult enum extension that's deferred for now. let trap_pc = ctx.pc; let a = ctx.gpr[instr.ra()]; let b = match instr.opcode { PpcOpcode::twi | PpcOpcode::tdi => instr.simm16() as i64 as u64, _ => ctx.gpr[instr.rb()], }; let width = match instr.opcode { PpcOpcode::tw | PpcOpcode::twi => trap::TrapWidth::Word, _ => trap::TrapWidth::Doubleword, }; let fired = trap::evaluate(instr.to(), a, b, width); if fired { let typed_trap_simm = if matches!(instr.opcode, PpcOpcode::twi) && instr.to() == 31 && instr.ra() == 0 { Some(instr.simm16() as u16) } else { None }; tracing::warn!( "Trap fired at {:#010x}: {:?} TO={} a={:#x} b={:#x}{}", trap_pc, instr.opcode, instr.to(), a, b, typed_trap_simm.map_or(String::new(), |t| format!(" typed_trap_simm={:#06x}", t)) ); // Leave ctx.pc at CIA (NOT NIA) so trap handlers / SEH delivery // can read the faulting instruction address from ctx.pc. return StepResult::Trap; } ctx.pc += 4; } // ===== Byte-reverse loads ===== PpcOpcode::lwbrx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; let val = mem.read_u32(ea); ctx.gpr[instr.rd()] = val.swap_bytes() as u64; ctx.pc += 4; } PpcOpcode::lhbrx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; let val = mem.read_u16(ea); ctx.gpr[instr.rd()] = val.swap_bytes() as u64; ctx.pc += 4; } PpcOpcode::stwbrx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u32(ea, (ctx.gpr[instr.rs()] as u32).swap_bytes()); ctx.pc += 4; } PpcOpcode::sthbrx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u16(ea, (ctx.gpr[instr.rs()] as u16).swap_bytes()); ctx.pc += 4; } // ===== VMX/VMX128: Vector Load/Store ===== PpcOpcode::lvx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; // aligned let mut bytes = [0u8; 16]; for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes); ctx.pc += 4; } PpcOpcode::lvx128 => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; let mut bytes = [0u8; 16]; for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_bytes(bytes); ctx.pc += 4; } PpcOpcode::stvx => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; // PPCBUG-511: stvx was missing invalidate_for_write. if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } let bytes = ctx.vr[instr.rs()].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } ctx.pc += 4; } PpcOpcode::stvx128 => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; // PPCBUG-511: stvx128 was missing invalidate_for_write. if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } let bytes = ctx.vr[instr.vs128()].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } ctx.pc += 4; } // lvewx, lvebx, lvehx all load aligned 16 bytes (per xenia reference) PpcOpcode::lvebx => { // Load 1 byte from EA into vD[EA & 0xF]. PowerISA marks the // other lanes as "undefined" but real Xenon (and Canary) // preserve their prior contents, so seed from vD. let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; let slot = (ea & 0xF) as usize; let mut bytes = ctx.vr[instr.rd()].as_bytes(); bytes[slot] = mem.read_u8(ea); ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes); ctx.pc += 4; } PpcOpcode::lvehx => { // Load a halfword from (EA & ~1) into vD at halfword slot // (EA & 0xF) >> 1. Other halfword lanes preserved (see lvebx). let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; let ea = ea_unaligned & !0x1u32; let slot = ((ea_unaligned & 0xF) >> 1) as usize; let mut bytes = ctx.vr[instr.rd()].as_bytes(); let h = mem.read_u16(ea); bytes[slot * 2] = (h >> 8) as u8; bytes[slot * 2 + 1] = (h & 0xFF) as u8; ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes); ctx.pc += 4; } PpcOpcode::lvewx => { // Load a word from (EA & ~3) into vD at word slot // (EA & 0xF) >> 2. Other word lanes preserved (see lvebx). let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; let ea = ea_unaligned & !0x3u32; let slot = ((ea_unaligned & 0xF) >> 2) as usize; let mut bytes = ctx.vr[instr.rd()].as_bytes(); let w = mem.read_u32(ea); bytes[slot * 4] = (w >> 24) as u8; bytes[slot * 4 + 1] = (w >> 16) as u8; bytes[slot * 4 + 2] = (w >> 8) as u8; bytes[slot * 4 + 3] = (w & 0xFF) as u8; ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes); ctx.pc += 4; } PpcOpcode::stvebx => { // Store vS[EA & 0xF] (1 byte) to memory at EA. let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; // PPCBUG-512: stvebx was missing invalidate_for_write. if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } let slot = (ea & 0xF) as usize; let bytes = ctx.vr[instr.rs()].as_bytes(); mem.write_u8(ea, bytes[slot]); ctx.pc += 4; } PpcOpcode::stvehx => { // Store vS[slot] (1 halfword) at EA & ~1. slot = (EA & 0xF) >> 1. let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; let ea = ea_unaligned & !0x1u32; // PPCBUG-512: stvehx was missing invalidate_for_write. if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } let slot = ((ea_unaligned & 0xF) >> 1) as usize; let bytes = ctx.vr[instr.rs()].as_bytes(); let h = ((bytes[slot * 2] as u16) << 8) | (bytes[slot * 2 + 1] as u16); mem.write_u16(ea, h); ctx.pc += 4; } PpcOpcode::stvewx => { // Store vS[slot] (1 word) at EA & ~3. slot = (EA & 0xF) >> 2. let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32; let ea = ea_unaligned & !0x3u32; // PPCBUG-512: stvewx was missing invalidate_for_write. if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } let slot = ((ea_unaligned & 0xF) >> 2) as usize; let bytes = ctx.vr[instr.rs()].as_bytes(); let w = ((bytes[slot * 4] as u32) << 24) | ((bytes[slot * 4 + 1] as u32) << 16) | ((bytes[slot * 4 + 2] as u32) << 8) | (bytes[slot * 4 + 3] as u32); mem.write_u32(ea, w); ctx.pc += 4; } PpcOpcode::lvxl | PpcOpcode::lvxl128 => { // Same as lvx but with cache hint (ignored) let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; let mut bytes = [0u8; 16]; for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); } let vd = if matches!(instr.opcode, PpcOpcode::lvxl128) { instr.vd128() } else { instr.rd() }; ctx.vr[vd] = xenia_types::Vec128::from_bytes(bytes); ctx.pc += 4; } PpcOpcode::stvxl | PpcOpcode::stvxl128 => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; // PPCBUG-511: stvxl/stvxl128 were missing invalidate_for_write. if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } let vs = if matches!(instr.opcode, PpcOpcode::stvxl128) { instr.vs128() } else { instr.rs() }; let bytes = ctx.vr[vs].as_bytes(); for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); } ctx.pc += 4; } // ===== VMX: Float Arithmetic ===== PpcOpcode::vaddfp => { // PPCBUG-435: VSCR.NJ=1 (Xbox 360 always boots with this set) requires // flush-to-zero on subnormal inputs and outputs. Canary VMX float // arithmetic flushes denormals unconditionally. let a = ctx.vr[instr.ra()].as_f32x4(); let b = ctx.vr[instr.rb()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); r[i] = vmx::flush_denorm(ai + bi); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vaddfp128 => { // PPCBUG-435: same as vaddfp. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); r[i] = vmx::flush_denorm(ai + bi); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vsubfp => { // PPCBUG-435. let a = ctx.vr[instr.ra()].as_f32x4(); let b = ctx.vr[instr.rb()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); r[i] = vmx::flush_denorm(ai - bi); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vsubfp128 => { // PPCBUG-435. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); r[i] = vmx::flush_denorm(ai - bi); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vmaddfp => { // vD = (vA * vC) + vB. AltiVec unconditionally flushes denormal // *inputs* to 0 regardless of VSCR[NJ] (confirmed on POWER8 hw). let a = ctx.vr[instr.ra()].as_f32x4(); let b = ctx.vr[instr.rb()].as_f32x4(); let c = ctx.vr[instr.rc()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); let ci = vmx::flush_denorm(c[i]); // PPCBUG-437: flush subnormal output too. r[i] = vmx::flush_denorm(ai.mul_add(ci, bi)); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vmaddfp128 => { // ISA: (VD) <- (VA × VD) + VB. VD is both the second multiplicand and destination. // Canary InstrEmit_vmaddfp128 (ppc_emit_altivec.cc:806-809): MulAdd(VA, VD, VB). // Previous code computed ai.mul_add(bi, di) = VA×VB+VD — VB and VD roles swapped // (PPCBUG-424). Fix: ai.mul_add(di, bi) = VA×VD+VB. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let d = ctx.vr[instr.vd128()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); let di = vmx::flush_denorm(d[i]); // PPCBUG-437. r[i] = vmx::flush_denorm(ai.mul_add(di, bi)); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vnmsubfp => { // vD = -(vA * vC - vB) = vB - vA * vC. Same denorm-flush rule as vmaddfp. let a = ctx.vr[instr.ra()].as_f32x4(); let b = ctx.vr[instr.rb()].as_f32x4(); let c = ctx.vr[instr.rc()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); let ci = vmx::flush_denorm(c[i]); // PPCBUG-426: single FMA rounding instead of two-step (b - a*c). r[i] = vmx::flush_denorm(-ai.mul_add(ci, -bi)); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vnmsubfp128 => { // VMX128 form: vD <- -((vA * vB) - vD) = vD - (vA * vB). Canary // routes through `InstrEmit_vnmsubfp_` with the same arg-swap, // which flushes all inputs unconditionally. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let d = ctx.vr[instr.vd128()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); let di = vmx::flush_denorm(d[i]); // PPCBUG-427: single FMA rounding. r[i] = vmx::flush_denorm(-ai.mul_add(bi, -di)); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vmulfp128 => { // PPCBUG-435 + PPCBUG-437. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); r[i] = vmx::flush_denorm(ai * bi); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vmaxfp => { let a = ctx.vr[instr.ra()].as_f32x4(); let b = ctx.vr[instr.rb()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = vmx::max_nan(a[i], b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vmaxfp128 => { let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = vmx::max_nan(a[i], b[i]); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vminfp => { let a = ctx.vr[instr.ra()].as_f32x4(); let b = ctx.vr[instr.rb()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = vmx::min_nan(a[i], b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vminfp128 => { let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = vmx::min_nan(a[i], b[i]); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vrefp | PpcOpcode::vrefp128 => { let vb = if matches!(instr.opcode, PpcOpcode::vrefp128) { instr.vb128() } else { instr.rb() }; let vd = if matches!(instr.opcode, PpcOpcode::vrefp128) { instr.vd128() } else { instr.rd() }; let b = ctx.vr[vb].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = 1.0 / b[i]; } ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vrsqrtefp | PpcOpcode::vrsqrtefp128 => { let vb = if matches!(instr.opcode, PpcOpcode::vrsqrtefp128) { instr.vb128() } else { instr.rb() }; let vd = if matches!(instr.opcode, PpcOpcode::vrsqrtefp128) { instr.vd128() } else { instr.rd() }; let b = ctx.vr[vb].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = 1.0 / b[i].sqrt(); } ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } // ===== VMX: Float Compare ===== PpcOpcode::vcmpeqfp | PpcOpcode::vcmpeqfp128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_f32x4(); let b = ctx.vr[vb].as_f32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); let rc = if matches!(instr.opcode, PpcOpcode::vcmpeqfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } PpcOpcode::vcmpgefp | PpcOpcode::vcmpgefp128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_f32x4(); let b = ctx.vr[vb].as_f32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] >= b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); let rc = if matches!(instr.opcode, PpcOpcode::vcmpgefp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } PpcOpcode::vcmpgtfp | PpcOpcode::vcmpgtfp128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_f32x4(); let b = ctx.vr[vb].as_f32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); let rc = if matches!(instr.opcode, PpcOpcode::vcmpgtfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } // ===== VMX: Logical ===== PpcOpcode::vand | PpcOpcode::vand128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = a[i] & b[i]; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vandc | PpcOpcode::vandc128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = a[i] & !b[i]; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vor | PpcOpcode::vor128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = a[i] | b[i]; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vxor | PpcOpcode::vxor128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = a[i] ^ b[i]; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vnor | PpcOpcode::vnor128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = !(a[i] | b[i]); } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vsel | PpcOpcode::vsel128 => { // vD = (vA & ~vC) | (vB & vC) let (va, vb, vd); let vc; if matches!(instr.opcode, PpcOpcode::vsel128) { va = instr.va128(); vb = instr.vb128(); vd = instr.vd128(); vc = vd; // for 128, vC is encoded in vD field } else { va = instr.ra(); vb = instr.rb(); vd = instr.rd(); vc = instr.rc(); } let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let c = ctx.vr[vc].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = (a[i] & !c[i]) | (b[i] & c[i]); } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } // ===== VMX: Permute/Splat/Shift ===== PpcOpcode::vperm | PpcOpcode::vperm128 => { let (va, vb, vd); let vc; if matches!(instr.opcode, PpcOpcode::vperm128) { va = instr.va128(); vb = instr.vb128(); vd = instr.vd128(); vc = instr.vc128_2(); } else { va = instr.ra(); vb = instr.rb(); vd = instr.rd(); vc = instr.rc(); } let a_bytes = ctx.vr[va].as_bytes(); let b_bytes = ctx.vr[vb].as_bytes(); let c_bytes = ctx.vr[vc].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { let idx = (c_bytes[i] & 0x1F) as usize; r[i] = if idx < 16 { a_bytes[idx] } else { b_bytes[idx - 16] }; } ctx.vr[vd] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vsldoi => { let a_bytes = ctx.vr[instr.ra()].as_bytes(); let b_bytes = ctx.vr[instr.rb()].as_bytes(); let sh = ((instr.raw >> 6) & 0xF) as usize; // SH field bits 6-9 let mut concat = [0u8; 32]; concat[..16].copy_from_slice(&a_bytes); concat[16..].copy_from_slice(&b_bytes); let mut r = [0u8; 16]; r.copy_from_slice(&concat[sh..sh + 16]); ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vsldoi128 => { let a_bytes = ctx.vr[instr.va128()].as_bytes(); let b_bytes = ctx.vr[instr.vb128()].as_bytes(); let sh = instr.vx128_5_sh() as usize; let mut concat = [0u8; 32]; concat[..16].copy_from_slice(&a_bytes); concat[16..].copy_from_slice(&b_bytes); let mut r = [0u8; 16]; let sh = sh.min(16); r.copy_from_slice(&concat[sh..sh + 16]); ctx.vr[instr.vd128()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vspltw => { let uimm = ((instr.raw >> 16) & 0x3) as usize; // UIMM (2 bits for word index) let b = ctx.vr[instr.rb()].as_u32x4(); let val = b[uimm]; ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4(val, val, val, val); ctx.pc += 4; } PpcOpcode::vspltw128 => { let uimm = ((instr.raw >> 16) & 0x3) as usize; let b = ctx.vr[instr.vb128()].as_u32x4(); let val = b[uimm]; ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4(val, val, val, val); ctx.pc += 4; } PpcOpcode::vsplth => { let uimm = ((instr.raw >> 16) & 0x7) as usize; let b = ctx.vr[instr.rb()].as_u16x8(); let val = b[uimm]; ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array([val; 8]); ctx.pc += 4; } PpcOpcode::vspltb => { let uimm = ((instr.raw >> 16) & 0xF) as usize; let b = ctx.vr[instr.rb()].as_bytes(); let val = b[uimm]; ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes([val; 16]); ctx.pc += 4; } PpcOpcode::vspltisw | PpcOpcode::vspltisw128 => { let simm = ((instr.raw >> 16) & 0x1F) as i32; let simm = if simm & 0x10 != 0 { simm | !0x1F } else { simm }; // sign extend 5-bit let val = simm as u32; let vd = if matches!(instr.opcode, PpcOpcode::vspltisw128) { instr.vd128() } else { instr.rd() }; ctx.vr[vd] = xenia_types::Vec128::from_u32x4(val, val, val, val); ctx.pc += 4; } PpcOpcode::vspltisb => { let simm = ((instr.raw >> 16) & 0x1F) as i8; let simm = if simm & 0x10 != 0 { simm | !0x1F } else { simm }; ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes([simm as u8; 16]); ctx.pc += 4; } PpcOpcode::vspltish => { let simm = ((instr.raw >> 16) & 0x1F) as i16; let simm = if simm & 0x10 != 0 { simm | !0x1F } else { simm }; ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array([simm as u16; 8]); ctx.pc += 4; } // ===== VMX: Merge/Shuffle ===== PpcOpcode::vmrghw | PpcOpcode::vmrghw128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); // Merge high words: [a0, b0, a1, b1] ctx.vr[vd] = xenia_types::Vec128::from_u32x4(a[0], b[0], a[1], b[1]); ctx.pc += 4; } PpcOpcode::vmrglw | PpcOpcode::vmrglw128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); // Merge low words: [a2, b2, a3, b3] ctx.vr[vd] = xenia_types::Vec128::from_u32x4(a[2], b[2], a[3], b[3]); ctx.pc += 4; } // ===== VMX: Integer Arithmetic ===== PpcOpcode::vadduwm => { let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = a[i].wrapping_add(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vsubuwm => { let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = a[i].wrapping_sub(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } // ===== VMX: Shift ===== PpcOpcode::vslw | PpcOpcode::vslw128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { let sh = b[i] & 0x1F; r[i] = a[i] << sh; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vsrw | PpcOpcode::vsrw128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { let sh = b[i] & 0x1F; r[i] = a[i] >> sh; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vsraw | PpcOpcode::vsraw128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { let sh = b[i] & 0x1F; r[i] = (a[i] as i32 >> sh) as u32; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vrlw | PpcOpcode::vrlw128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { let sh = b[i] & 0x1F; r[i] = a[i].rotate_left(sh); } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } // VMX: Round/Convert PpcOpcode::vrfiz | PpcOpcode::vrfiz128 => { let vb = if matches!(instr.opcode, PpcOpcode::vrfiz128) { instr.vb128() } else { instr.rb() }; let vd = if matches!(instr.opcode, PpcOpcode::vrfiz128) { instr.vd128() } else { instr.rd() }; let b = ctx.vr[vb].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = b[i].trunc(); } ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vrfin | PpcOpcode::vrfin128 => { // PPCBUG-432: ISA round-to-nearest-even, NOT Rust's `round()` // (which is round-half-away-from-zero). let vb = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vb128() } else { instr.rb() }; let vd = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vd128() } else { instr.rd() }; let b = ctx.vr[vb].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = b[i].round_ties_even(); } ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vrfip | PpcOpcode::vrfip128 => { let vb = if matches!(instr.opcode, PpcOpcode::vrfip128) { instr.vb128() } else { instr.rb() }; let vd = if matches!(instr.opcode, PpcOpcode::vrfip128) { instr.vd128() } else { instr.rd() }; let b = ctx.vr[vb].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = b[i].ceil(); } ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vrfim | PpcOpcode::vrfim128 => { let vb = if matches!(instr.opcode, PpcOpcode::vrfim128) { instr.vb128() } else { instr.rb() }; let vd = if matches!(instr.opcode, PpcOpcode::vrfim128) { instr.vd128() } else { instr.rd() }; let b = ctx.vr[vb].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = b[i].floor(); } ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } // VMX: MFVSCR/MTVSCR — VSCR lives in word 3; only NJ (bit 16) and // SAT (bit 31) are defined. Canary stores the full Vec128 so we do // the same: mfvscr copies the register, mtvscr overwrites it. PpcOpcode::mfvscr => { ctx.vr[instr.rd()] = ctx.vscr; ctx.pc += 4; } PpcOpcode::mtvscr => { ctx.vscr = ctx.vr[instr.rb()]; ctx.pc += 4; } // ===== VMX: lvsl/lvsr (generate permute vectors) ===== PpcOpcode::lvsl | PpcOpcode::lvsl128 => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]); let sh = (ea & 0xF) as u8; let mut r = [0u8; 16]; for i in 0..16 { r[i] = sh + i as u8; } let vd = if matches!(instr.opcode, PpcOpcode::lvsl128) { instr.vd128() } else { instr.rd() }; ctx.vr[vd] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::lvsr | PpcOpcode::lvsr128 => { let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; let ea = ea.wrapping_add(ctx.gpr[instr.rb()]); let sh = (ea & 0xF) as u8; let mut r = [0u8; 16]; for i in 0..16 { r[i] = (16 - sh) + i as u8; } let vd = if matches!(instr.opcode, PpcOpcode::lvsr128) { instr.vd128() } else { instr.rd() }; ctx.vr[vd] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } // ===== VMX: Integer compare ===== PpcOpcode::vcmpequw | PpcOpcode::vcmpequw128 => { let (va, vb, vd) = vmx_reg_triple(instr); let a = ctx.vr[va].as_u32x4(); let b = ctx.vr[vb].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; } ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r); let rc = if matches!(instr.opcode, PpcOpcode::vcmpequw128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; if rc { update_cr6_from_vmask(&r, ctx); } ctx.pc += 4; } // ===== FPU: Arithmetic ===== PpcOpcode::faddx => { let a = ctx.fpr[instr.ra()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_add(ctx, a, b, false); let result = a + b; ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::faddsx => { let a = ctx.fpr[instr.ra()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_add(ctx, a, b, false); let result = to_single(ctx, a + b); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fsubx => { let a = ctx.fpr[instr.ra()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_add(ctx, a, b, true); let result = a - b; ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fsubsx => { let a = ctx.fpr[instr.ra()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_add(ctx, a, b, true); let result = to_single(ctx, a - b); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fmulx => { // A-form: frD = frA * frC (frC is at rc() field, bits 21-25) let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; fpscr::check_invalid_mul(ctx, a, c); let result = a * c; ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fmulsx => { let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; fpscr::check_invalid_mul(ctx, a, c); let result = to_single(ctx, a * c); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fdivx => { let a = ctx.fpr[instr.ra()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_div(ctx, a, b); fpscr::check_zero_divide(ctx, a, b); let result = a / b; ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && b != 0.0); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fdivsx => { let a = ctx.fpr[instr.ra()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_div(ctx, a, b); fpscr::check_zero_divide(ctx, a, b); let result = to_single(ctx, a / b); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && b != 0.0); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } // ===== FPU: Multiply-Add ===== PpcOpcode::fmaddx => { // PPCBUG-202: VXISI from input properties (not from `a*c` which has wrong sign on overflow). let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); fpscr::check_invalid_fma_add(ctx, a, c, b, false); let result = a.mul_add(c, b); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fmaddsx => { // PPCBUG-181: missing VXISI on add step. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); fpscr::check_invalid_fma_add(ctx, a, c, b, false); let result = to_single(ctx, a.mul_add(c, b)); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fmsubx => { // PPCBUG-203: missing VXISI on sub step. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); fpscr::check_invalid_fma_add(ctx, a, c, b, true); let result = a.mul_add(c, -b); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fmsubsx => { // PPCBUG-182: missing VXISI on sub step. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); fpscr::check_invalid_fma_add(ctx, a, c, b, true); let result = to_single(ctx, a.mul_add(c, -b)); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnmaddx => { // PPCBUG-203: missing VXISI. PPCBUG-205: NaN sign preserved (no negation on NaN). let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); fpscr::check_invalid_fma_add(ctx, a, c, b, false); let fma = a.mul_add(c, b); let result = if fma.is_nan() { fma } else { -fma }; ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnmaddsx => { // PPCBUG-181 + PPCBUG-183: VXISI + NaN sign preservation. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); fpscr::check_invalid_fma_add(ctx, a, c, b, false); let fma = a.mul_add(c, b); let neg = if fma.is_nan() { fma } else { -fma }; let result = to_single(ctx, neg); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnmsubx => { // PPCBUG-203: VXISI. PPCBUG-205: NaN sign preservation. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); fpscr::check_invalid_fma_add(ctx, a, c, b, true); let fma = a.mul_add(c, -b); let result = if fma.is_nan() { fma } else { -fma }; ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnmsubsx => { // PPCBUG-182 + PPCBUG-183: VXISI + NaN sign preservation. let a = ctx.fpr[instr.ra()]; let c = ctx.fpr[instr.rc()]; let b = ctx.fpr[instr.rb()]; fpscr::check_invalid_mul(ctx, a, c); fpscr::check_invalid_fma_add(ctx, a, c, b, true); let fma = a.mul_add(c, -b); let neg = if fma.is_nan() { fma } else { -fma }; let result = to_single(ctx, neg); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } // ===== FPU: Move/Sign ===== PpcOpcode::fmrx => { ctx.fpr[instr.rd()] = ctx.fpr[instr.rb()]; if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fabsx => { ctx.fpr[instr.rd()] = ctx.fpr[instr.rb()].abs(); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnegx => { ctx.fpr[instr.rd()] = -ctx.fpr[instr.rb()]; if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fnabsx => { ctx.fpr[instr.rd()] = -(ctx.fpr[instr.rb()].abs()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } // ===== FPU: Select ===== PpcOpcode::fselx => { // frD = if frA >= 0.0 then frC else frB ctx.fpr[instr.rd()] = if ctx.fpr[instr.ra()] >= 0.0 { ctx.fpr[instr.rc()] } else { ctx.fpr[instr.rb()] }; if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } // ===== FPU: Square root / Reciprocal ===== PpcOpcode::fsqrtx => { let b = ctx.fpr[instr.rb()]; // sqrt of negative (non-zero) is invalid operation → VXSQRT. if b.is_sign_negative() && b != 0.0 && !b.is_nan() { fpscr::set_exception(ctx, fpscr::VXSQRT); } if fpscr::is_snan(b) { fpscr::set_exception(ctx, fpscr::VXSNAN); } let result = b.sqrt(); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, b.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fsqrtsx => { let b = ctx.fpr[instr.rb()]; if b.is_sign_negative() && b != 0.0 && !b.is_nan() { fpscr::set_exception(ctx, fpscr::VXSQRT); } if fpscr::is_snan(b) { fpscr::set_exception(ctx, fpscr::VXSNAN); } let result = to_single(ctx, b.sqrt()); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, b.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fresx => { // Single-precision reciprocal estimate: frD = 1.0 / frB. // PPCBUG-184: pre-quantize input to f32 to match canary's // `f.Recip(f.Convert(frB, FLOAT32_TYPE))` behavior. Hardware // produces a ~12-bit LUT estimate; both emulators produce a // fully-IEEE single reciprocal, but the f32 quantization at // least makes the input precision match. let b_full = ctx.fpr[instr.rb()]; let b = b_full as f32 as f64; if b == 0.0 { fpscr::set_exception(ctx, fpscr::ZX); } if fpscr::is_snan(b_full) { fpscr::set_exception(ctx, fpscr::VXSNAN); } let result = to_single(ctx, 1.0 / b); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, b.is_finite() && b != 0.0); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::frsqrtex => { // Reciprocal square root estimate: frD = 1.0 / sqrt(frB) let b = ctx.fpr[instr.rb()]; if b == 0.0 { fpscr::set_exception(ctx, fpscr::ZX); } if b.is_sign_negative() && b != 0.0 && !b.is_nan() { fpscr::set_exception(ctx, fpscr::VXSQRT); } if fpscr::is_snan(b) { fpscr::set_exception(ctx, fpscr::VXSNAN); } let result = 1.0 / b.sqrt(); ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, b.is_finite() && b > 0.0); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } // ===== FPU: Rounding/Conversion ===== PpcOpcode::frspx => { // Round to single precision honouring FPSCR[RN]. // PPCBUG-225: set XX on inexact rounding (almost every frsp call). let b = ctx.fpr[instr.rb()]; if fpscr::is_snan(b) { fpscr::set_exception(ctx, fpscr::VXSNAN); } let result = to_single(ctx, b); if b.is_finite() && result.is_finite() && result != b { fpscr::set_exception(ctx, fpscr::XX); } ctx.fpr[instr.rd()] = result; fpscr::update_after_op(ctx, result, b.is_finite()); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fcfidx => { // Convert from integer doubleword: frD = (double)(int64_t)frB_as_bits. // PPCBUG-224: set XX when |i64| > 2^53 (precision loss in conversion). let bits = ctx.fpr[instr.rb()].to_bits(); let i = bits as i64; let result = i as f64; if (result as i64) != i { fpscr::set_exception(ctx, fpscr::XX); } ctx.fpr[instr.rd()] = result; fpscr::set_fprf(ctx, fpscr::classify_fprf(result)); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fctidx => { // Convert to integer doubleword (round per FPSCR[RN]). // PPCBUG-229: set XX on inexact (fractional input). let val = ctx.fpr[instr.rb()]; let result = if val.is_nan() { fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 }); 0x8000_0000_0000_0000u64 } else if val >= (i64::MAX as f64) { fpscr::set_exception(ctx, fpscr::VXCVI); 0x7FFF_FFFF_FFFF_FFFFu64 } else if val < (i64::MIN as f64) { fpscr::set_exception(ctx, fpscr::VXCVI); 0x8000_0000_0000_0000u64 } else { if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); } fpscr::round_to_i64(ctx, val) as u64 }; ctx.fpr[instr.rd()] = f64::from_bits(result); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fctidzx => { // Convert to integer doubleword (round toward zero). // PPCBUG-229: set XX on inexact. let val = ctx.fpr[instr.rb()]; let result = if val.is_nan() { fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 }); 0x8000_0000_0000_0000u64 } else if val >= (i64::MAX as f64) { fpscr::set_exception(ctx, fpscr::VXCVI); 0x7FFF_FFFF_FFFF_FFFFu64 } else if val < (i64::MIN as f64) { fpscr::set_exception(ctx, fpscr::VXCVI); 0x8000_0000_0000_0000u64 } else { if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); } (val.trunc() as i64) as u64 }; ctx.fpr[instr.rd()] = f64::from_bits(result); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fctiwx => { // Convert to integer word (round per FPSCR[RN]). // PPCBUG-230: set XX on inexact. let val = ctx.fpr[instr.rb()]; let result_u32: u32 = if val.is_nan() { fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 }); 0x8000_0000 } else if val > (i32::MAX as f64) { fpscr::set_exception(ctx, fpscr::VXCVI); 0x7FFF_FFFF } else if val < (i32::MIN as f64) { fpscr::set_exception(ctx, fpscr::VXCVI); 0x8000_0000 } else { if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); } fpscr::round_to_i32(ctx, val) as u32 }; ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::fctiwzx => { // Convert to integer word (round toward zero). // PPCBUG-230: set XX on inexact. let val = ctx.fpr[instr.rb()]; let result_u32: u32 = if val.is_nan() { fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 }); 0x8000_0000 } else if val > (i32::MAX as f64) { fpscr::set_exception(ctx, fpscr::VXCVI); 0x7FFF_FFFF } else if val < (i32::MIN as f64) { fpscr::set_exception(ctx, fpscr::VXCVI); 0x8000_0000 } else { if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); } val.trunc() as i32 as u32 }; ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } // ===== FPU: Compare ===== PpcOpcode::fcmpu => { let fra = ctx.fpr[instr.ra()]; let frb = ctx.fpr[instr.rb()]; let crfd = instr.crfd(); if fra.is_nan() || frb.is_nan() { ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: false, so: true }; // fcmpu: VXSNAN on SNaN input; no VXVC even on QNaN. if fpscr::is_snan(fra) || fpscr::is_snan(frb) { fpscr::set_exception(ctx, fpscr::VXSNAN); } } else if fra < frb { ctx.cr[crfd] = crate::context::CrField { lt: true, gt: false, eq: false, so: false }; } else if fra > frb { ctx.cr[crfd] = crate::context::CrField { lt: false, gt: true, eq: false, so: false }; } else { ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: true, so: false }; } // Also mirror the comparison result into FPSCR[FPRF (FL/FG/FE/FU)]. let fprf = if fra.is_nan() || frb.is_nan() { 0b0_0001 } else if fra < frb { 0b0_1000 } else if fra > frb { 0b0_0100 } else { 0b0_0010 }; fpscr::set_fprf(ctx, fprf); ctx.pc += 4; } PpcOpcode::fcmpo => { // Ordered compare: like fcmpu but also sets VXVC on QNaN (or VXSNAN on SNaN). let fra = ctx.fpr[instr.ra()]; let frb = ctx.fpr[instr.rb()]; let crfd = instr.crfd(); if fra.is_nan() || frb.is_nan() { ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: false, so: true }; if fpscr::is_snan(fra) || fpscr::is_snan(frb) { fpscr::set_exception(ctx, fpscr::VXSNAN | fpscr::VXVC); } else { fpscr::set_exception(ctx, fpscr::VXVC); } } else if fra < frb { ctx.cr[crfd] = crate::context::CrField { lt: true, gt: false, eq: false, so: false }; } else if fra > frb { ctx.cr[crfd] = crate::context::CrField { lt: false, gt: true, eq: false, so: false }; } else { ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: true, so: false }; } let fprf = if fra.is_nan() || frb.is_nan() { 0b0_0001 } else if fra < frb { 0b0_1000 } else if fra > frb { 0b0_0100 } else { 0b0_0010 }; fpscr::set_fprf(ctx, fprf); ctx.pc += 4; } // ===== FPU: Status/Control ===== PpcOpcode::mffsx => { // Move from FPSCR: frD = FPSCR as double (low 32 bits) ctx.fpr[instr.rd()] = f64::from_bits(ctx.fpscr as u64); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::mtfsfx => { // Move to FPSCR fields: fm mask in bits 7-14, frB value let fm = (instr.raw >> 17) & 0xFF; let val = ctx.fpr[instr.rb()].to_bits() as u32; let mut mask = 0u32; for i in 0..8 { if fm & (1 << (7 - i)) != 0 { mask |= 0xF << (28 - i * 4); } } ctx.fpscr = (ctx.fpscr & !mask) | (val & mask); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::mtfsb0x => { // Clear FPSCR bit crbd let bit = instr.crbd(); ctx.fpscr &= !(1 << (31 - bit)); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::mtfsb1x => { // Set FPSCR bit crbd let bit = instr.crbd(); ctx.fpscr |= 1 << (31 - bit); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } PpcOpcode::mtfsfix => { // Move to FPSCR field immediate: crfD = IMM (4 bits) let crfd = instr.crfd(); let imm = (instr.raw >> 12) & 0xF; let shift = 28 - crfd as u32 * 4; ctx.fpscr = (ctx.fpscr & !(0xF << shift)) | (imm << shift); if instr.rc_bit() { update_cr1_from_fpscr(ctx); } ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4b — Unaligned vector load/store // ═════════════════════════════════════════════════════════════════ // lvlx / lvlx128 / lvlxl / lvlxl128: load left-aligned from EA. PpcOpcode::lvlx | PpcOpcode::lvlxl => { let ea = ea_indexed(ctx, instr); ctx.vr[instr.rd()] = crate::vmx::load_vector_left(mem, ea); ctx.pc += 4; } PpcOpcode::lvlx128 | PpcOpcode::lvlxl128 => { let ea = ea_indexed(ctx, instr); ctx.vr[instr.vd128()] = crate::vmx::load_vector_left(mem, ea); ctx.pc += 4; } PpcOpcode::lvrx | PpcOpcode::lvrxl => { let ea = ea_indexed(ctx, instr); ctx.vr[instr.rd()] = crate::vmx::load_vector_right(mem, ea); ctx.pc += 4; } PpcOpcode::lvrx128 | PpcOpcode::lvrxl128 => { let ea = ea_indexed(ctx, instr); ctx.vr[instr.vd128()] = crate::vmx::load_vector_right(mem, ea); ctx.pc += 4; } PpcOpcode::stvlx | PpcOpcode::stvlxl => { let ea = ea_indexed(ctx, instr); // PPCBUG-513: stvlx/stvlxl were missing invalidate_for_write. // store_vector_left writes [ea, (ea & !0xF)+15]; in the worst case (ea & 0xF == 0) // that is exactly 16 bytes all within the same 16-byte block, so ea+15 lands in the // same 128-byte cache line. Two-call form is kept for defensive correctness. if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { let first_line = ea & !RESERVATION_MASK; let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; t.invalidate_for_write(first_line); if last_line != first_line { t.invalidate_for_write(last_line); } } } crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stvlx128 | PpcOpcode::stvlxl128 => { let ea = ea_indexed(ctx, instr); // PPCBUG-513: stvlx128/stvlxl128 were missing invalidate_for_write. if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { let first_line = ea & !RESERVATION_MASK; let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; t.invalidate_for_write(first_line); if last_line != first_line { t.invalidate_for_write(last_line); } } } crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.vs128()]); ctx.pc += 4; } PpcOpcode::stvrx | PpcOpcode::stvrxl => { let ea = ea_indexed(ctx, instr); // PPCBUG-514: stvrx/stvrxl were missing invalidate_for_write. // store_vector_right writes [ea & !0xF, ea-1] (up to 15 bytes, all within a single // 16-byte-aligned block). Two-call form is kept for defensive correctness. // stvrx at shift==0 is a no-op; the guard fires unconditionally (cheap). if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { let first_line = ea & !RESERVATION_MASK; let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; t.invalidate_for_write(first_line); if last_line != first_line { t.invalidate_for_write(last_line); } } } crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.rs()]); ctx.pc += 4; } PpcOpcode::stvrx128 | PpcOpcode::stvrxl128 => { let ea = ea_indexed(ctx, instr); // PPCBUG-514: stvrx128/stvrxl128 were missing invalidate_for_write. if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { let first_line = ea & !RESERVATION_MASK; let last_line = ea.wrapping_add(15) & !RESERVATION_MASK; t.invalidate_for_write(first_line); if last_line != first_line { t.invalidate_for_write(last_line); } } } crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.vs128()]); ctx.pc += 4; } // lvewx128 / stvewx128: VMX128 element-indexed 32-bit load/store. // Like lvewx the whole 16 bytes at the aligned EA go into VD; the // element-of-interest is implied by EA's low bits. PpcOpcode::lvewx128 => { let ea = ea_indexed(ctx, instr) & !0xF; let mut bytes = [0u8; 16]; for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_bytes(bytes); ctx.pc += 4; } PpcOpcode::stvewx128 => { // Mirror of stvewx: word-align EA, extract one 32-bit lane, write 4 bytes only. // Previous code used & !0xF (16-byte) and wrote all 16 bytes, corrupting 12 // adjacent bytes on every execution (PPCBUG-510). let ea_unaligned = ea_indexed(ctx, instr); let ea = ea_unaligned & !0x3u32; if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } let slot = ((ea_unaligned & 0xF) >> 2) as usize; let bytes = ctx.vr[instr.vs128()].as_bytes(); let w = ((bytes[slot * 4] as u32) << 24) | ((bytes[slot * 4 + 1] as u32) << 16) | ((bytes[slot * 4 + 2] as u32) << 8) | (bytes[slot * 4 + 3] as u32); mem.write_u32(ea, w); ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4a — VMX integer add/sub (modulo and saturating), mul, avg, sum // ═════════════════════════════════════════════════════════════════ // -------- modulo add/sub (byte/halfword/word) -------- PpcOpcode::vaddubm => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { r[i] = a[i].wrapping_add(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vsububm => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { r[i] = a[i].wrapping_sub(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vadduhm => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = a[i].wrapping_add(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vsubuhm => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = a[i].wrapping_sub(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } // vadduwm / vsubuwm are implemented above (modulo word add/sub arms). // -------- saturating add/sub (signed + unsigned) -------- PpcOpcode::vaddubs => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; let mut sat = false; for i in 0..16 { let (v, s) = crate::vmx::sat_add_u8(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vsububs => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; let mut sat = false; for i in 0..16 { let (v, s) = crate::vmx::sat_sub_u8(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vaddsbs => { let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]); let mut r = [0i8; 16]; let mut sat = false; for i in 0..16 { let (v, s) = crate::vmx::sat_add_i8(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r); ctx.pc += 4; } PpcOpcode::vsubsbs => { let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]); let mut r = [0i8; 16]; let mut sat = false; for i in 0..16 { let (v, s) = crate::vmx::sat_sub_i8(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r); ctx.pc += 4; } PpcOpcode::vadduhs => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; let mut sat = false; for i in 0..8 { let (v, s) = crate::vmx::sat_add_u16(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vsubuhs => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; let mut sat = false; for i in 0..8 { let (v, s) = crate::vmx::sat_sub_u16(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vaddshs => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let mut r = [0i16; 8]; let mut sat = false; for i in 0..8 { let (v, s) = crate::vmx::sat_add_i16(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vsubshs => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let mut r = [0i16; 8]; let mut sat = false; for i in 0..8 { let (v, s) = crate::vmx::sat_sub_i16(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vadduws => { let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::sat_add_u32(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vsubuws => { let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::sat_sub_u32(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vaddsws => { let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]); let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::sat_add_i32(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } PpcOpcode::vsubsws => { let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]); let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::sat_sub_i32(a[i], b[i]); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } // -------- vaddcuw / vsubcuw: per-lane carry / borrow out -------- PpcOpcode::vaddcuw => { let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { let (_, c) = a[i].overflowing_add(b[i]); r[i] = if c { 1 } else { 0 }; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vsubcuw => { // "Subtract Carryout": r = 1 if a >= b (no borrow), 0 otherwise. let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] >= b[i] { 1 } else { 0 }; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } // -------- averages -------- PpcOpcode::vavgub => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { r[i] = crate::vmx::avg_u8(a[i], b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vavgsb => { let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]); let mut r = [0i8; 16]; for i in 0..16 { r[i] = crate::vmx::avg_i8(a[i], b[i]); } ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r); ctx.pc += 4; } PpcOpcode::vavguh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = crate::vmx::avg_u16(a[i], b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vavgsh => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let mut r = [0i16; 8]; for i in 0..8 { r[i] = crate::vmx::avg_i16(a[i], b[i]); } ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vavguw => { let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = crate::vmx::avg_u32(a[i], b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vavgsw => { let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]); let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; for i in 0..4 { r[i] = crate::vmx::avg_i32(a[i], b[i]); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } // -------- multiplies (even / odd lanes — see §5 hazard note) -------- // vmuleub: even u8 lanes (BE index 0,2,4,...,14) → u16 lanes. PpcOpcode::vmuleub => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = a[2 * i] as u16 * b[2 * i] as u16; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vmuloub => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = a[2 * i + 1] as u16 * b[2 * i + 1] as u16; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vmulesb => { let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]); let mut r = [0i16; 8]; for i in 0..8 { r[i] = a[2 * i] as i16 * b[2 * i] as i16; } ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vmulosb => { let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]); let mut r = [0i16; 8]; for i in 0..8 { r[i] = a[2 * i + 1] as i16 * b[2 * i + 1] as i16; } ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vmuleuh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = a[2 * i] as u32 * b[2 * i] as u32; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vmulouh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = a[2 * i + 1] as u32 * b[2 * i + 1] as u32; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vmulesh => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; for i in 0..4 { r[i] = a[2 * i] as i32 * b[2 * i] as i32; } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } PpcOpcode::vmulosh => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; for i in 0..4 { r[i] = a[2 * i + 1] as i32 * b[2 * i + 1] as i32; } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } // -------- multiply-add halfword (saturating) -------- PpcOpcode::vmhaddshs => { // vD[i] = sat_i16((vA[i] * vB[i]) >> 15 + vC[i]) let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let c = crate::vmx::as_i16x8(ctx.vr[instr.rc()]); let mut r = [0i16; 8]; let mut sat = false; for i in 0..8 { let prod = (a[i] as i32 * b[i] as i32) >> 15; let (v, s) = crate::vmx::sat_i32_to_i16(prod + c[i] as i32); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vmhraddshs => { // Rounded multiply-add: (vA[i]*vB[i] + 0x4000) >> 15 + vC[i], saturating. let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let c = crate::vmx::as_i16x8(ctx.vr[instr.rc()]); let mut r = [0i16; 8]; let mut sat = false; for i in 0..8 { let prod = (a[i] as i32 * b[i] as i32 + 0x4000) >> 15; let (v, s) = crate::vmx::sat_i32_to_i16(prod + c[i] as i32); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vmladduhm => { // Multiply-low add (modulo): vD[i] = u16(vA[i] * vB[i] + vC[i]). let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let c = ctx.vr[instr.rc()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = a[i].wrapping_mul(b[i]).wrapping_add(c[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } // -------- VMX sum-of-products -------- // vmsumubm: vD[i:u32] = sum over j in [0..4] of vA[4i+j:u8] * vB[4i+j:u8] + vC[i]. PpcOpcode::vmsumubm => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let c = ctx.vr[instr.rc()].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { let mut s = c[i]; for j in 0..4 { s = s.wrapping_add(a[4*i+j] as u32 * b[4*i+j] as u32); } r[i] = s; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vmsummbm => { // signed bytes × unsigned bytes, signed accumulator let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let b = ctx.vr[instr.rb()].as_bytes(); let c = crate::vmx::as_i32x4(ctx.vr[instr.rc()]); let mut r = [0i32; 4]; for i in 0..4 { let mut s = c[i]; for j in 0..4 { s = s.wrapping_add(a[4*i+j] as i32 * b[4*i+j] as i32); } r[i] = s; } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } PpcOpcode::vmsumuhm => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let c = ctx.vr[instr.rc()].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { let s = (a[2*i] as u32 * b[2*i] as u32) .wrapping_add(a[2*i+1] as u32 * b[2*i+1] as u32) .wrapping_add(c[i]); r[i] = s; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vmsumuhs => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let c = ctx.vr[instr.rc()].as_u32x4(); let mut r = [0u32; 4]; let mut sat = false; for i in 0..4 { let s = (a[2*i] as u64 * b[2*i] as u64) + (a[2*i+1] as u64 * b[2*i+1] as u64) + c[i] as u64; let (v, overflow) = if s > u32::MAX as u64 { (u32::MAX, true) } else { (s as u32, false) }; r[i] = v; sat |= overflow; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vmsumshm => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let c = crate::vmx::as_i32x4(ctx.vr[instr.rc()]); let mut r = [0i32; 4]; for i in 0..4 { let s = (a[2*i] as i32 * b[2*i] as i32) .wrapping_add(a[2*i+1] as i32 * b[2*i+1] as i32) .wrapping_add(c[i]); r[i] = s; } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } PpcOpcode::vmsumshs => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let c = crate::vmx::as_i32x4(ctx.vr[instr.rc()]); let mut r = [0i32; 4]; let mut sat = false; for i in 0..4 { // Running-sum saturation: accumulate in i64, clamp once at end. let s = (a[2*i] as i64 * b[2*i] as i64) + (a[2*i+1] as i64 * b[2*i+1] as i64) + c[i] as i64; let (v, o) = crate::vmx::sat_i64_to_i32(s); r[i] = v; sat |= o; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } // -------- VMX sum-across -------- PpcOpcode::vsumsws => { // vD[3] = sat_i32(vC[3] + sum over i in 0..4 of vA[i]) let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]); let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let s = a.iter().map(|&x| x as i64).sum::() + c[3] as i64; let (v, sat) = crate::vmx::sat_i64_to_i32(s); if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4([0, 0, 0, v]); ctx.pc += 4; } PpcOpcode::vsum2sws => { // Two 2-word partial sums at lanes 1 and 3. let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]); let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let s0 = a[0] as i64 + a[1] as i64 + c[1] as i64; let s1 = a[2] as i64 + a[3] as i64 + c[3] as i64; let (v0, sat0) = crate::vmx::sat_i64_to_i32(s0); let (v1, sat1) = crate::vmx::sat_i64_to_i32(s1); if sat0 | sat1 { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4([0, v0, 0, v1]); ctx.pc += 4; } PpcOpcode::vsum4sbs => { let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; let mut sat = false; for i in 0..4 { let s = a[4*i] as i64 + a[4*i+1] as i64 + a[4*i+2] as i64 + a[4*i+3] as i64 + c[i] as i64; let (v, o) = crate::vmx::sat_i64_to_i32(s); r[i] = v; sat |= o; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } PpcOpcode::vsum4ubs => { let a = ctx.vr[instr.ra()].as_bytes(); let c = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; let mut sat = false; for i in 0..4 { let s = a[4*i] as u64 + a[4*i+1] as u64 + a[4*i+2] as u64 + a[4*i+3] as u64 + c[i] as u64; let (v, o) = if s > u32::MAX as u64 { (u32::MAX, true) } else { (s as u32, false) }; r[i] = v; sat |= o; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vsum4shs => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; let mut sat = false; for i in 0..4 { let s = a[2*i] as i64 + a[2*i+1] as i64 + c[i] as i64; let (v, o) = crate::vmx::sat_i64_to_i32(s); r[i] = v; sat |= o; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4c — VMX integer compares (all set 0xFF/0xFFFF/0xFFFFFFFF per lane) // ═════════════════════════════════════════════════════════════════ PpcOpcode::vcmpequb => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { r[i] = if a[i] == b[i] { 0xFF } else { 0 }; } let v = xenia_types::Vec128::from_bytes(r); if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } ctx.vr[instr.rd()] = v; ctx.pc += 4; } PpcOpcode::vcmpequh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = if a[i] == b[i] { 0xFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u16x8_array(r); if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } ctx.vr[instr.rd()] = v; ctx.pc += 4; } PpcOpcode::vcmpgtub => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; } let v = xenia_types::Vec128::from_bytes(r); if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } ctx.vr[instr.rd()] = v; ctx.pc += 4; } PpcOpcode::vcmpgtsb => { let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]); let mut r = [0u8; 16]; for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; } let v = xenia_types::Vec128::from_bytes(r); if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } ctx.vr[instr.rd()] = v; ctx.pc += 4; } PpcOpcode::vcmpgtuh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u16x8_array(r); if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } ctx.vr[instr.rd()] = v; ctx.pc += 4; } PpcOpcode::vcmpgtsh => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let mut r = [0u16; 8]; for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u16x8_array(r); if instr.vc_rc_bit() { let (t, f) = crate::vmx::cr6_flags_from_mask(v); ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false }; } ctx.vr[instr.rd()] = v; ctx.pc += 4; } PpcOpcode::vcmpgtuw => { let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u32x4_array(r); if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); } ctx.vr[instr.rd()] = v; ctx.pc += 4; } PpcOpcode::vcmpgtsw => { let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]); let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let mut r = [0u32; 4]; for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; } let v = xenia_types::Vec128::from_u32x4_array(r); if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); } ctx.vr[instr.rd()] = v; ctx.pc += 4; } // vcmpbfp(128): set upper/lower nibbles per lane based on bounds test. PpcOpcode::vcmpbfp | PpcOpcode::vcmpbfp128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vcmpbfp128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = ctx.vr[ra].as_f32x4(); let b = ctx.vr[rb].as_f32x4(); let mut r = [0u32; 4]; let mut any_out = false; for i in 0..4 { let mut lane: u32 = 0; if a[i].is_nan() || b[i].is_nan() || a[i] > b[i] { lane |= 0x8000_0000; any_out = true; } if a[i].is_nan() || b[i].is_nan() || a[i] < -b[i] { lane |= 0x4000_0000; any_out = true; } r[i] = lane; } let rc = if is_128 { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() }; if rc { ctx.cr[6] = crate::context::CrField { lt: false, gt: false, eq: !any_out, so: false, }; } ctx.vr[rd] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4d — VMX shifts and rotates // ═════════════════════════════════════════════════════════════════ PpcOpcode::vslb => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { r[i] = a[i] << (b[i] & 7); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vsrb => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { r[i] = a[i] >> (b[i] & 7); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vsrab => { let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0i8; 16]; for i in 0..16 { r[i] = a[i] >> (b[i] & 7); } ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r); ctx.pc += 4; } PpcOpcode::vrlb => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { r[i] = a[i].rotate_left((b[i] & 7) as u32); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vslh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = a[i] << (b[i] & 0xF); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vsrh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = a[i] >> (b[i] & 0xF); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vsrah => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0i16; 8]; for i in 0..8 { r[i] = a[i] >> (b[i] & 0xF); } ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vrlh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = a[i].rotate_left((b[i] & 0xF) as u32); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } // vslw / vsrw / vsraw / vrlw (word shifts) are implemented above via // vmx_reg_triple — skip here. // Full 128-bit bit shifts (vsl/vsr): shift by the low 3 bits of vB[15]. PpcOpcode::vsl => { let a = u128::from_be_bytes(ctx.vr[instr.ra()].as_bytes()); let shift = (ctx.vr[instr.rb()].as_bytes()[15] & 7) as u32; let r = if shift == 0 { a } else { a << shift }; ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r.to_be_bytes()); ctx.pc += 4; } PpcOpcode::vsr => { let a = u128::from_be_bytes(ctx.vr[instr.ra()].as_bytes()); let shift = (ctx.vr[instr.rb()].as_bytes()[15] & 7) as u32; let r = if shift == 0 { a } else { a >> shift }; ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r.to_be_bytes()); ctx.pc += 4; } // vslo/vsro: 128-bit octet (byte) shift. vB[15] & 0x78 gives bit count / 8 * 8. PpcOpcode::vslo | PpcOpcode::vslo128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vslo128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = u128::from_be_bytes(ctx.vr[ra].as_bytes()); let nbytes = ((ctx.vr[rb].as_bytes()[15] >> 3) & 0xF) as u32; let r = if nbytes == 0 { a } else { a << (nbytes * 8) }; ctx.vr[rd] = xenia_types::Vec128::from_bytes(r.to_be_bytes()); ctx.pc += 4; } PpcOpcode::vsro | PpcOpcode::vsro128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vsro128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = u128::from_be_bytes(ctx.vr[ra].as_bytes()); let nbytes = ((ctx.vr[rb].as_bytes()[15] >> 3) & 0xF) as u32; let r = if nbytes == 0 { a } else { a >> (nbytes * 8) }; ctx.vr[rd] = xenia_types::Vec128::from_bytes(r.to_be_bytes()); ctx.pc += 4; } // vrlimi128: rotate-left-immediate then partial-merge into vD. // Field layout (from canary ppc_decode_data.cc VX128_4): // imm = bits(22..=23,28..=29) for shift, mask = bits(24..=27) // Simplified semantics: r = vB rotated left by `shift` words, merged // into vD using a per-word `mask` (mask bit N == 1 ⇒ use vD[N], else // use rotated[N]). Titles generally use mask=0xF (copy-all) which // makes this behave like a plain word rotate. PpcOpcode::vrlimi128 => { let shift = instr.vx128_4_z() as usize; let mask = instr.vx128_4_imm(); let b = ctx.vr[instr.vb128()].as_u32x4(); let d = ctx.vr[instr.vd128()].as_u32x4(); let rot = [b[shift % 4], b[(shift + 1) % 4], b[(shift + 2) % 4], b[(shift + 3) % 4]]; let mut r = [0u32; 4]; for i in 0..4 { // mask bit 3 corresponds to word 0 (BE-first). Use rot when // the corresponding mask bit is set. let use_rot = (mask >> (3 - i)) & 1 == 1; r[i] = if use_rot { rot[i] } else { d[i] }; } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4e — VMX merge (interleave high / low halves) // ═════════════════════════════════════════════════════════════════ PpcOpcode::vmrghb => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..8 { r[2*i] = a[i]; r[2*i+1] = b[i]; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vmrglb => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..8 { r[2*i] = a[8+i]; r[2*i+1] = b[8+i]; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vmrghh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..4 { r[2*i] = a[i]; r[2*i+1] = b[i]; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vmrglh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..4 { r[2*i] = a[4+i]; r[2*i+1] = b[4+i]; } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4f — VMX pack / unpack (saturating and modulo + D3D + 5-6-5) // ═════════════════════════════════════════════════════════════════ // ---- Pack modulo (truncate) ---- PpcOpcode::vpkuhum | PpcOpcode::vpkuhum128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vpkuhum128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = ctx.vr[ra].as_u16x8(); let b = ctx.vr[rb].as_u16x8(); let mut r = [0u8; 16]; for i in 0..8 { r[i] = a[i] as u8; } for i in 0..8 { r[8 + i] = b[i] as u8; } ctx.vr[rd] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vpkuwum | PpcOpcode::vpkuwum128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vpkuwum128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = ctx.vr[ra].as_u32x4(); let b = ctx.vr[rb].as_u32x4(); let mut r = [0u16; 8]; for i in 0..4 { r[i] = a[i] as u16; } for i in 0..4 { r[4 + i] = b[i] as u16; } ctx.vr[rd] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } // ---- Pack with saturation ---- PpcOpcode::vpkuhus | PpcOpcode::vpkuhus128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vpkuhus128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = ctx.vr[ra].as_u16x8(); let b = ctx.vr[rb].as_u16x8(); let mut r = [0u8; 16]; let mut sat = false; for i in 0..8 { let (v, s) = crate::vmx::sat_u16_to_u8(a[i]); r[i] = v; sat |= s; } for i in 0..8 { let (v, s) = crate::vmx::sat_u16_to_u8(b[i]); r[8 + i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[rd] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vpkshus | PpcOpcode::vpkshus128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vpkshus128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = crate::vmx::as_i16x8(ctx.vr[ra]); let b = crate::vmx::as_i16x8(ctx.vr[rb]); let mut r = [0u8; 16]; let mut sat = false; for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_u8(a[i]); r[i] = v; sat |= s; } for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_u8(b[i]); r[8 + i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[rd] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vpkshss | PpcOpcode::vpkshss128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vpkshss128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = crate::vmx::as_i16x8(ctx.vr[ra]); let b = crate::vmx::as_i16x8(ctx.vr[rb]); let mut r = [0i8; 16]; let mut sat = false; for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_i8(a[i]); r[i] = v; sat |= s; } for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_i8(b[i]); r[8 + i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[rd] = crate::vmx::from_i8x16(r); ctx.pc += 4; } PpcOpcode::vpkuwus | PpcOpcode::vpkuwus128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vpkuwus128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = ctx.vr[ra].as_u32x4(); let b = ctx.vr[rb].as_u32x4(); let mut r = [0u16; 8]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::sat_u32_to_u16(a[i]); r[i] = v; sat |= s; } for i in 0..4 { let (v, s) = crate::vmx::sat_u32_to_u16(b[i]); r[4 + i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[rd] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vpkswus | PpcOpcode::vpkswus128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vpkswus128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = crate::vmx::as_i32x4(ctx.vr[ra]); let b = crate::vmx::as_i32x4(ctx.vr[rb]); let mut r = [0u16; 8]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_u16(a[i]); r[i] = v; sat |= s; } for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_u16(b[i]); r[4 + i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[rd] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vpkswss | PpcOpcode::vpkswss128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vpkswss128); let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) }; let a = crate::vmx::as_i32x4(ctx.vr[ra]); let b = crate::vmx::as_i32x4(ctx.vr[rb]); let mut r = [0i16; 8]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_i16(a[i]); r[i] = v; sat |= s; } for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_i16(b[i]); r[4 + i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[rd] = crate::vmx::from_i16x8(r); ctx.pc += 4; } // vpkpx: pack two u32 vectors into one u16 (5-5-5 pixel) vector. PpcOpcode::vpkpx => { let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u16; 8]; for i in 0..4 { r[i] = crate::vmx::pack_pixel_555(a[i]); } for i in 0..4 { r[4 + i] = crate::vmx::pack_pixel_555(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } // ---- Unpack (sign-extend) ---- PpcOpcode::vupkhsb | PpcOpcode::vupkhsb128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vupkhsb128); let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) } else { (instr.rb(), instr.rd()) }; let b = crate::vmx::as_i8x16(ctx.vr[rb]); let mut r = [0i16; 8]; for i in 0..8 { r[i] = b[i] as i16; } ctx.vr[rd] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vupklsb | PpcOpcode::vupklsb128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vupklsb128); let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) } else { (instr.rb(), instr.rd()) }; let b = crate::vmx::as_i8x16(ctx.vr[rb]); let mut r = [0i16; 8]; for i in 0..8 { r[i] = b[8 + i] as i16; } ctx.vr[rd] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vupkhsh => { let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; for i in 0..4 { r[i] = b[i] as i32; } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } PpcOpcode::vupklsh => { let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; for i in 0..4 { r[i] = b[4 + i] as i32; } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } PpcOpcode::vupkhpx => { let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = crate::vmx::unpack_pixel_555(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vupklpx => { let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = crate::vmx::unpack_pixel_555(b[4 + i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } // ---- D3D pack / unpack (VMX128-only) ---- // // First-Pixels M3: fixed immediate extraction + added pack types // 1-6. The prior `(instr.raw >> 6) & 0x7` was LSB-numbered (wrong // position) and masked to only 3 bits. Canary extracts from the // VX128_3/4 `IMM` field at PPC bits 16-22 (MSB) and does // `type = IMM >> 2` to pick up the 5-bit type selector — the low // 2 bits (`pack`) select output-slot layout for `vpkd3d128`. PpcOpcode::vpkd3d128 => { use crate::vmx::D3dPackType; let uimm = crate::decoder::extract_vx128_uimm5(instr.raw); let pack = (uimm & 3) as usize; let shift = instr.vx128_4_z() as usize; let ty = D3dPackType::from_immediate(uimm >> 2); let src = ctx.vr[instr.vb128()]; let out = match ty { D3dPackType::D3dColor => crate::vmx::pack_d3dcolor(src), D3dPackType::NormShort2 => crate::vmx::pack_normshort2(src), D3dPackType::NormPacked32 => crate::vmx::pack_normpacked32(src), D3dPackType::Float16_2 => crate::vmx::pack_float16_2(src), D3dPackType::NormShort4 => crate::vmx::pack_normshort4(src), D3dPackType::Float16_4 => crate::vmx::pack_float16_4(src), D3dPackType::NormPacked64 => crate::vmx::pack_normpacked64(src), D3dPackType::Other(t) => { tracing::warn!( raw = format_args!("{:#010x}", instr.raw), uimm, ty = t, "vpkd3d128: unhandled pack type at {:#010x}", ctx.pc, ); src } }; // Post-pack permutation: merge packed `out` into previous `vd` // per canary ppc_emit_altivec.cc:2126-2188 MakePermuteMask tables. // MakePermuteMask(r0,l0, r1,l1, r2,l2, r3,l3): result[i] = if ri==0 { prev[li] } else { out[li] } let result = if pack == 0 { out } else { // (source_reg, lane): 0=prev vd, 1=packed out const PERM: [[[(u8, u8); 4]; 4]; 3] = [ // pack=1 (VPACK_32): places out[3] at lane (3-shift) [[(0,0),(0,1),(0,2),(1,3)], [(0,0),(0,1),(1,3),(0,3)], [(0,0),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]], // pack=2 (64-bit): places out[2..3] at lanes (2-shift)..(3-shift) [[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)], [(1,2),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]], // pack=3 (64-bit): same as pack=2 except shift=3 selects out[2] at lane 3 [[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)], [(1,2),(1,3),(0,2),(0,3)], [(0,0),(0,1),(0,2),(1,2)]], ]; let prev = ctx.vr[instr.vd128()]; let pw = prev.as_u32x4(); let ow = out.as_u32x4(); let sel = PERM[pack - 1][shift]; xenia_types::Vec128::from_u32x4_array([ if sel[0].0 == 0 { pw[sel[0].1 as usize] } else { ow[sel[0].1 as usize] }, if sel[1].0 == 0 { pw[sel[1].1 as usize] } else { ow[sel[1].1 as usize] }, if sel[2].0 == 0 { pw[sel[2].1 as usize] } else { ow[sel[2].1 as usize] }, if sel[3].0 == 0 { pw[sel[3].1 as usize] } else { ow[sel[3].1 as usize] }, ]) }; ctx.vr[instr.vd128()] = result; ctx.pc += 4; } PpcOpcode::vupkd3d128 => { use crate::vmx::D3dPackType; let uimm = crate::decoder::extract_vx128_uimm5(instr.raw); let ty = D3dPackType::from_immediate(uimm >> 2); let src = ctx.vr[instr.vb128()]; let out = match ty { D3dPackType::D3dColor => crate::vmx::unpack_d3dcolor(src), D3dPackType::NormShort2 => crate::vmx::unpack_normshort2(src), D3dPackType::NormPacked32 => crate::vmx::unpack_normpacked32(src), D3dPackType::Float16_2 => crate::vmx::unpack_float16_2(src), D3dPackType::NormShort4 => crate::vmx::unpack_normshort4(src), D3dPackType::Float16_4 => crate::vmx::unpack_float16_4(src), D3dPackType::NormPacked64 => crate::vmx::unpack_normpacked64(src), D3dPackType::Other(t) => { tracing::warn!( raw = format_args!("{:#010x}", instr.raw), uimm, ty = t, "vupkd3d128: unhandled pack type at {:#010x}", ctx.pc, ); src } }; ctx.vr[instr.vd128()] = out; ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4g — VMX convert (float ↔ fixed-point) // ═════════════════════════════════════════════════════════════════ // vctsxs / vctuxs: f32 → i32/u32, scaled by 2^uimm, saturating. PpcOpcode::vctsxs => { let uimm = (instr.raw >> 16) & 0x1F; let b = ctx.vr[instr.rb()].as_f32x4(); let mut r = [0i32; 4]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::cvt_f32_to_i32_sat(b[i], uimm); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } PpcOpcode::vctuxs => { let uimm = (instr.raw >> 16) & 0x1F; let b = ctx.vr[instr.rb()].as_f32x4(); let mut r = [0u32; 4]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::cvt_f32_to_u32_sat(b[i], uimm); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } // vcfsx / vcfux: i32/u32 → f32, scaled by 2^-uimm. PpcOpcode::vcfsx => { let uimm = (instr.raw >> 16) & 0x1F; let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let mut r = [0f32; 4]; for i in 0..4 { r[i] = crate::vmx::cvt_i32_to_f32(b[i], uimm); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vcfux => { let uimm = (instr.raw >> 16) & 0x1F; let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = crate::vmx::cvt_u32_to_f32(b[i], uimm); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } // VMX128 convert variants. uimm lives in bits 16-20 of the encoded form. PpcOpcode::vcfpsxws128 => { let uimm = (instr.raw >> 16) & 0x1F; let b = ctx.vr[instr.vb128()].as_f32x4(); let mut r = [0i32; 4]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::cvt_f32_to_i32_sat(b[i], uimm); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.vd128()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } PpcOpcode::vcfpuxws128 => { let uimm = (instr.raw >> 16) & 0x1F; let b = ctx.vr[instr.vb128()].as_f32x4(); let mut r = [0u32; 4]; let mut sat = false; for i in 0..4 { let (v, s) = crate::vmx::cvt_f32_to_u32_sat(b[i], uimm); r[i] = v; sat |= s; } if sat { ctx.set_vscr_sat(true); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vcsxwfp128 => { let uimm = (instr.raw >> 16) & 0x1F; let b = crate::vmx::as_i32x4(ctx.vr[instr.vb128()]); let mut r = [0f32; 4]; for i in 0..4 { r[i] = crate::vmx::cvt_i32_to_f32(b[i], uimm); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vcuxwfp128 => { let uimm = (instr.raw >> 16) & 0x1F; let b = ctx.vr[instr.vb128()].as_u32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = crate::vmx::cvt_u32_to_f32(b[i], uimm); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4h — VMX vector FPU (exp / log) // ═════════════════════════════════════════════════════════════════ PpcOpcode::vexptefp | PpcOpcode::vexptefp128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vexptefp128); let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) } else { (instr.rb(), instr.rd()) }; let b = ctx.vr[rb].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = b[i].exp2(); } ctx.vr[rd] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } PpcOpcode::vlogefp | PpcOpcode::vlogefp128 => { let is_128 = matches!(instr.opcode, PpcOpcode::vlogefp128); let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) } else { (instr.rb(), instr.rd()) }; let b = ctx.vr[rb].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { r[i] = b[i].log2(); } ctx.vr[rd] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4i — VMX integer max / min // ═════════════════════════════════════════════════════════════════ PpcOpcode::vmaxub => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { r[i] = a[i].max(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vminub => { let a = ctx.vr[instr.ra()].as_bytes(); let b = ctx.vr[instr.rb()].as_bytes(); let mut r = [0u8; 16]; for i in 0..16 { r[i] = a[i].min(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r); ctx.pc += 4; } PpcOpcode::vmaxsb => { let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]); let mut r = [0i8; 16]; for i in 0..16 { r[i] = a[i].max(b[i]); } ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r); ctx.pc += 4; } PpcOpcode::vminsb => { let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]); let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]); let mut r = [0i8; 16]; for i in 0..16 { r[i] = a[i].min(b[i]); } ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r); ctx.pc += 4; } PpcOpcode::vmaxuh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = a[i].max(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vminuh => { let a = ctx.vr[instr.ra()].as_u16x8(); let b = ctx.vr[instr.rb()].as_u16x8(); let mut r = [0u16; 8]; for i in 0..8 { r[i] = a[i].min(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r); ctx.pc += 4; } PpcOpcode::vmaxsh => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let mut r = [0i16; 8]; for i in 0..8 { r[i] = a[i].max(b[i]); } ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vminsh => { let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]); let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]); let mut r = [0i16; 8]; for i in 0..8 { r[i] = a[i].min(b[i]); } ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r); ctx.pc += 4; } PpcOpcode::vmaxuw => { let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = a[i].max(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vminuw => { let a = ctx.vr[instr.ra()].as_u32x4(); let b = ctx.vr[instr.rb()].as_u32x4(); let mut r = [0u32; 4]; for i in 0..4 { r[i] = a[i].min(b[i]); } ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } PpcOpcode::vmaxsw => { let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]); let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; for i in 0..4 { r[i] = a[i].max(b[i]); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } PpcOpcode::vminsw => { let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]); let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]); let mut r = [0i32; 4]; for i in 0..4 { r[i] = a[i].min(b[i]); } ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r); ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4j — VMX128 FMA / permute // ═════════════════════════════════════════════════════════════════ // vmaddcfp128: ISA (VD) <- (VA × VD) + VB — same operation as vmaddfp128 PpcOpcode::vmaddcfp128 => { // ISA: (VD) <- (VA × VD) + VB. Canary InstrEmit_vmaddcfp128 (cc:819): MulAdd(VA, VD, VB). // Previous code computed di.mul_add(bi, ai) = VD×VB+VA — both operands wrong // (PPCBUG-425). Fix: ai.mul_add(di, bi) = VA×VD+VB. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let d = ctx.vr[instr.vd128()].as_f32x4(); let mut r = [0f32; 4]; for i in 0..4 { let ai = vmx::flush_denorm(a[i]); let bi = vmx::flush_denorm(b[i]); let di = vmx::flush_denorm(d[i]); // PPCBUG-437: flush subnormal output too. r[i] = vmx::flush_denorm(ai.mul_add(di, bi)); } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r); ctx.pc += 4; } // vmsum3fp128: horizontal sum of (vA * vB) over lanes 0..3, broadcast to all 4 output lanes. // Canary `InstrEmit_vmsum3fp128` flushes the *output* denormal // unconditionally (not the inputs) — see ppc_emit_altivec.cc:1067-1075. PpcOpcode::vmsum3fp128 => { // PPCBUG-436: flush per-product intermediates (not just the final sum). let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let p0 = vmx::flush_denorm(a[0] * b[0]); let p1 = vmx::flush_denorm(a[1] * b[1]); let p2 = vmx::flush_denorm(a[2] * b[2]); let s = vmx::flush_denorm(p0 + p1 + p2); ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s); ctx.pc += 4; } PpcOpcode::vmsum4fp128 => { // PPCBUG-436. let a = ctx.vr[instr.va128()].as_f32x4(); let b = ctx.vr[instr.vb128()].as_f32x4(); let p0 = vmx::flush_denorm(a[0] * b[0]); let p1 = vmx::flush_denorm(a[1] * b[1]); let p2 = vmx::flush_denorm(a[2] * b[2]); let p3 = vmx::flush_denorm(a[3] * b[3]); let s = vmx::flush_denorm(p0 + p1 + p2 + p3); ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s); ctx.pc += 4; } // vpermwi128: permute words of vB using an 8-bit immediate (2 bits per output lane). PpcOpcode::vpermwi128 => { let imm = instr.vx128_p_perm(); let b = ctx.vr[instr.vb128()].as_u32x4(); let mut r = [0u32; 4]; // Output lane i ← b[(imm >> (2 * (3-i))) & 3] for i in 0..4 { let sel = ((imm >> (2 * (3 - i))) & 3) as usize; r[i] = b[sel]; } ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4_array(r); ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4k — Scalar reservation / byte-reverse (doubleword) // ═════════════════════════════════════════════════════════════════ // M3.7 — same table-vs-legacy split as lwarx/stwcx. // PPCBUG-108: ldarx + stdcx. have the same cross-thread atomicity // limitation as lwarx/stwcx. in the legacy per-ctx fallback path. // See the lwarx block comment for the full explanation. The M3 // scheduler must enable `ReservationTable` before spawning a second // host thread. stdcx. carries the debug_assert (see below). PpcOpcode::ldarx => { let ea = ea_indexed(ctx, instr); let val = mem.read_u64(ea); ctx.gpr[instr.rd()] = val; ctx.reserved_line = ea & !RESERVATION_MASK; ctx.reserved_val = val; ctx.has_reservation = true; ctx.reservation_width = 8; // PPCBUG-151: doubleword reservation if let Some(t) = &ctx.reservation_table { if t.is_enabled() { ctx.reserved_generation = t.reserve(ea, ctx.hw_id); } } ctx.pc += 4; } // PPCBUG-108: see ldarx comment above. stdcx. legacy path cannot observe // cross-thread reservation invalidations; only safe in lockstep mode. PpcOpcode::stdcx => { let ea = ea_indexed(ctx, instr); let line = ea & !RESERVATION_MASK; let table_route = ctx .reservation_table .as_ref() .filter(|t| t.is_enabled()) .cloned(); // PPCBUG-151: stdcx. requires a doubleword (ldarx) reservation; // a word (lwarx) reservation must not commit here. let width_ok = ctx.reservation_width == 8; let success = if let Some(t) = &table_route { ctx.has_reservation && width_ok && ctx.reserved_line == line && t.try_commit(ea, ctx.reserved_generation, ctx.hw_id) } else { // Legacy per-ctx path (M2 default / lockstep). // PPCBUG-108: same sentinel as stwcx. — fires on non-primary // HW slots if the table is disabled under --parallel. debug_assert!( ctx.hw_id == 0, "PPCBUG-108: legacy per-ctx stdcx. on non-primary HW slot \ (hw_id={}) — ReservationTable must be enabled under --parallel", ctx.hw_id ); ctx.has_reservation && width_ok && ctx.reserved_line == line }; if success { mem.write_u64(ea, ctx.gpr[instr.rs()]); ctx.cr[0] = crate::context::CrField { lt: false, gt: false, eq: true, so: ctx.xer_so != 0, }; } else { ctx.cr[0] = crate::context::CrField { lt: false, gt: false, eq: false, so: ctx.xer_so != 0, }; if let Some(t) = &table_route { t.release(ea, ctx.reserved_generation, ctx.hw_id); } } ctx.has_reservation = false; ctx.reservation_width = 0; // PPCBUG-151: always clear on exit ctx.pc += 4; } PpcOpcode::ldbrx => { let ea = ea_indexed(ctx, instr); ctx.gpr[instr.rd()] = mem.read_u64(ea).swap_bytes(); ctx.pc += 4; } PpcOpcode::stdbrx => { let ea = ea_indexed(ctx, instr); if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { t.invalidate_for_write(ea); } } mem.write_u64(ea, ctx.gpr[instr.rs()].swap_bytes()); ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4l — Scalar string load / store (register-length) // ═════════════════════════════════════════════════════════════════ PpcOpcode::lswx => { let mut ea = ea_indexed(ctx, instr); let nb = ctx.xer() & 0x7F; // XER[25..31] let mut rd = instr.rd(); let mut bytes_left = nb; while bytes_left > 0 { let mut val = 0u32; for byte_idx in 0..4 { if bytes_left == 0 { break; } let b = mem.read_u8(ea) as u32; val |= b << (24 - byte_idx * 8); ea = ea.wrapping_add(1); bytes_left -= 1; } ctx.gpr[rd] = val as u64; rd = (rd + 1) % 32; } ctx.pc += 4; } PpcOpcode::stswx => { let mut ea = ea_indexed(ctx, instr); let nb = ctx.xer() & 0x7F; let mut rs = instr.rs(); let mut bytes_left = nb; if nb > 0 { if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) { if t.has_active_reservers() { let first_line = ea & !RESERVATION_MASK; let last_line = ea.wrapping_add(nb - 1) & !RESERVATION_MASK; t.invalidate_for_write(first_line); if last_line != first_line { t.invalidate_for_write(last_line); } } } } while bytes_left > 0 { let val = ctx.gpr[rs] as u32; for byte_idx in 0..4 { if bytes_left == 0 { break; } mem.write_u8(ea, (val >> (24 - byte_idx * 8)) as u8); ea = ea.wrapping_add(1); bytes_left -= 1; } rs = (rs + 1) % 32; } ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // §4m — mcrxr: move XER condition bits to CR field, clear XER[SO/OV/CA] // ═════════════════════════════════════════════════════════════════ PpcOpcode::mcrxr => { let crfd = instr.crfd(); ctx.cr[crfd] = crate::context::CrField { lt: ctx.xer_so != 0, gt: ctx.xer_ov != 0, eq: ctx.xer_ca != 0, so: false, }; ctx.xer_so = 0; ctx.xer_ov = 0; ctx.xer_ca = 0; ctx.pc += 4; } // ═════════════════════════════════════════════════════════════════ // mcrfs — move FPSCR field to CR field and clear corresponding // FPSCR exception bits. CR field crfD ← FPSCR[(crfS*4)..(crfS*4+3)] // and then FPSCR bits in that nibble that are exception bits are // cleared (FX, OX, UX, ZX, XX, VXSNAN, VXISI, VXIDI, VXZDZ, VXIMZ, // VXVC, VXSOFT, VXSQRT, VXCVI are cleared; FEX/VX are read-only // summaries and are recomputed later). // ═════════════════════════════════════════════════════════════════ PpcOpcode::mcrfs => { let crfd = instr.crfd(); let crfs = instr.crfs(); let shift = 28 - (crfs as u32 * 4); let nibble = ((ctx.fpscr >> shift) & 0xF) as u8; ctx.cr[crfd] = crate::context::CrField::from_u8(nibble); // Clearable exception bits: 0 (FX), 3 (OX), 4 (UX), 5 (ZX), // 6 (XX), 7 (VXSNAN), 8 (VXISI), 9 (VXIDI), 10 (VXZDZ), // 11 (VXIMZ), 12 (VXVC), 21 (VXSOFT), 22 (VXSQRT), 23 (VXCVI). // (Bit positions are PowerISA MSB-0; here 'FPSCR bit n' means // the bit at (31-n) in our little-endian u32.) const CLEARABLE_MASK: u32 = (1 << 31) | (1 << (31 - 3)) | (1 << (31 - 4)) | (1 << (31 - 5)) | (1 << (31 - 6)) | (1 << (31 - 7)) | (1 << (31 - 8)) | (1 << (31 - 9)) | (1 << (31 - 10)) | (1 << (31 - 11)) | (1 << (31 - 12)) | (1 << (31 - 21)) | (1 << (31 - 22)) | (1 << (31 - 23)); let nibble_mask = 0xFu32 << shift; ctx.fpscr &= !(nibble_mask & CLEARABLE_MASK); ctx.pc += 4; } // Anything not yet implemented _ => { tracing::warn!("Unimplemented opcode at {:#010x}: {:?} [{:08X}]", ctx.pc, instr.opcode, instr.raw); ctx.pc += 4; return StepResult::Unimplemented(instr.opcode); } } StepResult::Continue } /// Compute an X-form indexed effective address: EA = (rA==0 ? 0 : GPR[rA]) + GPR[rB]. #[inline] fn ea_indexed(ctx: &PpcContext, instr: &DecodedInstr) -> u32 { let a = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; a.wrapping_add(ctx.gpr[instr.rb()]) as u32 } /// Helper for CR logical operations. fn cr_logical(ctx: &mut PpcContext, instr: &DecodedInstr, op: fn(bool, bool) -> bool) { let a = ctx.get_cr_bit(instr.crba()); let b = ctx.get_cr_bit(instr.crbb()); ctx.set_cr_bit(instr.crbd(), op(a, b)); } /// Generate 32-bit rotate mask for rlwinm/rlwimi/rlwnm. fn rlw_mask(mb: u32, me: u32) -> u32 { if mb <= me { (u32::MAX >> mb) & (u32::MAX << (31 - me)) } else { (u32::MAX >> mb) | (u32::MAX << (31 - me)) } } /// Generate 64-bit mask clearing bits 0..mb-1 (left mask for rldicl). fn rld_mask_left(mb: u32) -> u64 { if mb == 0 { u64::MAX } else { u64::MAX >> mb } } /// Generate 64-bit mask clearing bits me+1..63 (right mask for rldicr). fn rld_mask_right(me: u32) -> u64 { if me >= 63 { u64::MAX } else { u64::MAX << (63 - me) } } /// Extract VMX register indices, handling both standard (opcode 4) and 128-bit forms. #[inline] fn vmx_reg_triple(instr: &DecodedInstr) -> (usize, usize, usize) { // Check if this is a VMX128 form (opcode 4 with extended register fields) // Standard Altivec: vD=rd, vA=ra, vB=rb // VMX128: vD=vd128, vA=va128, vB=vb128 let is_128 = matches!( instr.opcode, PpcOpcode::vand128 | PpcOpcode::vandc128 | PpcOpcode::vor128 | PpcOpcode::vxor128 | PpcOpcode::vnor128 | PpcOpcode::vsel128 | PpcOpcode::vcmpeqfp128 | PpcOpcode::vcmpgefp128 | PpcOpcode::vcmpgtfp128 | PpcOpcode::vmrghw128 | PpcOpcode::vmrglw128 | PpcOpcode::vslw128 | PpcOpcode::vsrw128 | PpcOpcode::vsraw128 | PpcOpcode::vrlw128 | PpcOpcode::vcmpequw128 ); if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) } else { (instr.ra(), instr.rb(), instr.rd()) } } /// Update CR6 from vector compare result mask (used when Rc=1 on vector compares). /// CR6: bit 0 (LT) = all elements true, bit 2 (EQ) = all elements false #[inline] fn update_cr6_from_vmask(r: &[u32; 4], ctx: &mut PpcContext) { let all_true = r.iter().all(|&v| v == 0xFFFF_FFFF); let all_false = r.iter().all(|&v| v == 0); ctx.cr[6].lt = all_true; ctx.cr[6].gt = false; ctx.cr[6].eq = all_false; ctx.cr[6].so = false; } /// Round a double to single precision and back (matches xenia's ToSingle). #[inline] /// Round an f64 to single precision, honouring FPSCR[RN]. fn to_single(ctx: &PpcContext, val: f64) -> f64 { fpscr::round_to_single(ctx, val) } /// Update CR1 from FPSCR (used when Rc=1 on FPU instructions). /// CR1 = FPSCR[FX, FEX, VX, OX] (bits 0-3). #[inline] fn update_cr1_from_fpscr(ctx: &mut PpcContext) { fpscr::update_cr1(ctx); } #[cfg(test)] mod tests { use super::*; /// Simple test memory (64KB). Backed by `Box<[Cell]>` so the /// MemoryAccess writes can take `&self`. struct TestMem { data: Box<[std::cell::Cell]>, } impl TestMem { fn new() -> Self { Self { data: (0..65536u32).map(|_| std::cell::Cell::new(0)).collect(), } } } impl MemoryAccess for TestMem { fn read_u8(&self, addr: u32) -> u8 { self.data[addr as usize].get() } fn read_u16(&self, addr: u32) -> u16 { let a = addr as usize; u16::from_be_bytes([self.data[a].get(), self.data[a+1].get()]) } fn read_u32(&self, addr: u32) -> u32 { let a = addr as usize; u32::from_be_bytes([ self.data[a].get(), self.data[a+1].get(), self.data[a+2].get(), self.data[a+3].get(), ]) } fn read_u64(&self, addr: u32) -> u64 { let a = addr as usize; u64::from_be_bytes([ self.data[a].get(), self.data[a+1].get(), self.data[a+2].get(), self.data[a+3].get(), self.data[a+4].get(), self.data[a+5].get(), self.data[a+6].get(), self.data[a+7].get(), ]) } fn write_u8(&self, addr: u32, val: u8) { self.data[addr as usize].set(val); } fn write_u16(&self, addr: u32, val: u16) { let a = addr as usize; let bytes = val.to_be_bytes(); self.data[a].set(bytes[0]); self.data[a+1].set(bytes[1]); } fn write_u32(&self, addr: u32, val: u32) { let a = addr as usize; let bytes = val.to_be_bytes(); for (i, b) in bytes.iter().enumerate() { self.data[a+i].set(*b); } } fn write_u64(&self, addr: u32, val: u64) { let a = addr as usize; let bytes = val.to_be_bytes(); for (i, b) in bytes.iter().enumerate() { self.data[a+i].set(*b); } } fn translate(&self, _addr: u32) -> Option<*const u8> { None } fn translate_mut(&self, _addr: u32) -> Option<*mut u8> { None } } fn write_instr(mem: &TestMem, addr: u32, raw: u32) { mem.write_u32(addr, raw); } #[test] fn test_addi() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // addi r3, r0, 42 write_instr(&mut mem, 0, (14 << 26) | (3 << 21) | (0 << 16) | 42); ctx.pc = 0; let result = step(&mut ctx, &mut mem); assert_eq!(result, StepResult::Continue); assert_eq!(ctx.gpr[3], 42); assert_eq!(ctx.pc, 4); } #[test] fn test_addis() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // addis r3, r0, 1 => r3 = 0x10000 write_instr(&mut mem, 0, (15 << 26) | (3 << 21) | (0 << 16) | 1); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[3], 0x10000); } #[test] fn test_lwz_stw() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // Store 0xDEADBEEF at address 0x100 mem.write_u32(0x100, 0xDEADBEEF); // addi r1, r0, 0x100 write_instr(&mut mem, 0, (14 << 26) | (1 << 21) | (0 << 16) | 0x100); // lwz r3, 0(r1) write_instr(&mut mem, 4, (32 << 26) | (3 << 21) | (1 << 16) | 0); ctx.pc = 0; step(&mut ctx, &mut mem); step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[3], 0xDEADBEEF); } #[test] fn test_branch() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // b +0x10 (from addr 0x100) write_instr(&mut mem, 0x100, (18 << 26) | (4 << 2)); // LI=4, shifted=0x10 ctx.pc = 0x100; step(&mut ctx, &mut mem); assert_eq!(ctx.pc, 0x110); } #[test] fn test_bl_updates_lr() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // bl +0x10 (from addr 0x200) write_instr(&mut mem, 0x200, (18 << 26) | (4 << 2) | 1); // LK=1 ctx.pc = 0x200; step(&mut ctx, &mut mem); assert_eq!(ctx.pc, 0x210); assert_eq!(ctx.lr, 0x204); } #[test] fn test_cmp_and_bc() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 10; // cmpi cr0, 0, r3, 10 (32-bit compare) write_instr(&mut mem, 0, (11 << 26) | (0 << 23) | (0 << 21) | (3 << 16) | (10u32 & 0xFFFF)); // bc 12,2,+8 (branch if CR0.EQ, bo=12, bi=2) write_instr(&mut mem, 4, (16 << 26) | (12 << 21) | (2 << 16) | (2 << 2)); ctx.pc = 0; step(&mut ctx, &mut mem); // cmpi assert!(ctx.cr[0].eq); step(&mut ctx, &mut mem); // bc - should branch assert_eq!(ctx.pc, 12); // 4 + 8 } #[test] fn test_rlwinm() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0xFF00_FF00; // rlwinm r4, r3, 8, 0, 31 (rotate left 8, full mask = shift left 8) let raw = (21 << 26) | (3 << 21) | (4 << 16) | (8 << 11) | (0 << 6) | (31 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[4], 0x00FF_00FF); } #[test] fn test_ori_nop() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // ori r0, r0, 0 (NOP) write_instr(&mut mem, 0, 0x60000000); ctx.pc = 0; ctx.gpr[0] = 0xDEAD; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[0], 0xDEAD); assert_eq!(ctx.pc, 4); } #[test] fn test_fadd() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = 3.14; ctx.fpr[2] = 2.86; // fadd f3, f1, f2: opcode 63, subop 21 (bits 1-5), frD=3, frA=1, frB=2 // 63<<26 | 3<<21 | 1<<16 | 2<<11 | 21<<1 let raw = (63 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert!((ctx.fpr[3] - 6.0).abs() < 1e-10); } #[test] fn test_fmul() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = 3.0; ctx.fpr[2] = 4.0; // fmul f3, f1, f2: opcode 63, subop 25, frD=3, frA=1, frC=2 (bits 21-25) // 63<<26 | 3<<21 | 1<<16 | 0<<11 | 2<<6 | 25<<1 let raw = (63 << 26) | (3 << 21) | (1 << 16) | (0 << 11) | (2 << 6) | (25 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert!((ctx.fpr[3] - 12.0).abs() < 1e-10); } #[test] fn test_fcmpu() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = 5.0; ctx.fpr[2] = 3.0; // fcmpu cr0, f1, f2: opcode 63, subop 0 (X-form), crfD=0, frA=1, frB=2 // 63<<26 | 0<<23 | 0<<21 | 1<<16 | 2<<11 | 0<<1 let raw = (63 << 26) | (0 << 23) | (0 << 21) | (1 << 16) | (2 << 11) | (0 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.cr[0].gt); // 5.0 > 3.0 assert!(!ctx.cr[0].lt); assert!(!ctx.cr[0].eq); } #[test] fn test_fctiwzx() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = 42.7; // fctiwz f2, f1: opcode 63, subop 15 (X-form), frD=2, frB=1 // 63<<26 | 2<<21 | 0<<16 | 1<<11 | 15<<1 let raw = (63 << 26) | (2 << 21) | (0 << 16) | (1 << 11) | (15 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // Result stored as bits in FPR: should be 42 as int let bits = ctx.fpr[2].to_bits(); assert_eq!(bits as u32, 42); } #[test] fn test_fmadd() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = 2.0; // frA ctx.fpr[2] = 3.0; // frB (addend) ctx.fpr[3] = 5.0; // frC (multiplier) // fmadd f4, f1, f3, f2: frD=4, frA=1, frB=2, frC=3 // opcode 63, subop 29 (bits 1-5) // 63<<26 | 4<<21 | 1<<16 | 2<<11 | 3<<6 | 29<<1 let raw = (63 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (29 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // (2.0 * 5.0) + 3.0 = 13.0 assert!((ctx.fpr[4] - 13.0).abs() < 1e-10); } #[test] fn test_ctx_default_state_matches_canary() { let ctx = PpcContext::new(); // LR initialized to halt sentinel so a top-level blr drops out cleanly. assert_eq!(ctx.lr, crate::context::LR_HALT_SENTINEL); // VSCR starts with NJ bit set (denormals flush to zero). assert!(ctx.vscr_nj()); assert!(!ctx.vscr_sat()); // VRSAVE defaults to "save all" per canary. assert_eq!(ctx.vrsave, 0xFFFF_FFFF); } #[test] fn test_vaddubs_saturates_and_sets_vscr_sat() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // Fill vA with 0xF0, vB with 0x20 → 0x110, saturates to 0xFF per lane. ctx.vr[2] = xenia_types::Vec128::from_bytes([0xF0; 16]); ctx.vr[3] = xenia_types::Vec128::from_bytes([0x20; 16]); // vaddubs vD=4, vA=2, vB=3. XO=512 (PPC: opcode 4, VA-form). let raw: u32 = (4u32 << 26) | (4u32 << 21) | (2u32 << 16) | (3u32 << 11) | 512u32; write_instr(&mut mem, 0, raw); ctx.pc = 0; let r = step(&mut ctx, &mut mem); assert_eq!(r, StepResult::Continue); assert_eq!(ctx.vr[4].as_bytes(), [0xFFu8; 16]); assert!(ctx.vscr_sat(), "SAT should be set after saturation"); } #[test] fn test_ldarx_stdcx_pair() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); mem.write_u64(0x1000, 0xDEADBEEF_CAFEBABE); ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; // ldarx r3, r4, r5: (31 << 26) | (3<<21) | (4<<16) | (5<<11) | (84<<1) let raw_ld: u32 = (31u32 << 26) | (3u32 << 21) | (4u32 << 16) | (5u32 << 11) | (84u32 << 1); write_instr(&mut mem, 0, raw_ld); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[3], 0xDEADBEEF_CAFEBABE); assert!(ctx.has_reservation); // stdcx. r6, r4, r5: (31 << 26) | (6<<21) | (4<<16) | (5<<11) | (214<<1) | 1 ctx.gpr[6] = 0x1111_1111_2222_2222; let raw_st: u32 = (31u32 << 26) | (6u32 << 21) | (4u32 << 16) | (5u32 << 11) | (214u32 << 1) | 1; write_instr(&mut mem, 4, raw_st); step(&mut ctx, &mut mem); assert!(ctx.cr[0].eq, "stdcx. should succeed and set CR0.EQ"); assert_eq!(mem.read_u64(0x1000), 0x1111_1111_2222_2222); assert!(!ctx.has_reservation); } #[test] fn test_mcrxr_moves_xer_condition_bits_and_clears_them() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.xer_so = 1; ctx.xer_ov = 0; ctx.xer_ca = 1; // mcrxr crfD=3: (31 << 26) | (3<<23) | (512<<1) let raw: u32 = (31u32 << 26) | (3u32 << 23) | (512u32 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.cr[3].lt, "LT should mirror old XER[SO]"); assert!(!ctx.cr[3].gt, "GT should mirror old XER[OV]"); assert!(ctx.cr[3].eq, "EQ should mirror old XER[CA]"); assert_eq!(ctx.xer_so, 0); assert_eq!(ctx.xer_ov, 0); assert_eq!(ctx.xer_ca, 0); } // ---------- Phase 2 fixes: OE / overflow ---------- fn addx_raw(rd: u32, ra: u32, rb: u32, oe: bool, rc: bool) -> u32 { (31 << 26) | (rd << 21) | (ra << 16) | (rb << 11) | ((oe as u32) << 10) | (266 << 1) | (rc as u32) } #[test] fn addo_sets_xer_ov_on_signed_overflow_and_stickies_so() { // PPCBUG-012: 32-bit ABI. INT32_MAX + 1 overflows to INT32_MIN. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = i32::MAX as u32 as u64; ctx.gpr[4] = 1; write_instr(&mut mem, 0, addx_raw(5, 3, 4, true, false)); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x8000_0000u64); assert_eq!(ctx.xer_ov, 1, "OV must be set on signed overflow"); assert_eq!(ctx.xer_so, 1, "SO must be stickied from OV"); } #[test] fn addo_clears_xer_ov_when_no_overflow_but_keeps_sticky_so() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.xer_ov = 1; // stale from a previous overflow ctx.xer_so = 1; ctx.gpr[3] = 1; ctx.gpr[4] = 2; write_instr(&mut mem, 0, addx_raw(5, 3, 4, true, false)); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 3); assert_eq!(ctx.xer_ov, 0, "OV must clear when no overflow"); assert_eq!(ctx.xer_so, 1, "SO is sticky; stays set"); } #[test] fn add_without_oe_does_not_touch_xer() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = i64::MAX as u64; ctx.gpr[4] = 1; write_instr(&mut mem, 0, addx_raw(5, 3, 4, false, false)); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.xer_ov, 0); assert_eq!(ctx.xer_so, 0); } #[test] fn addx_rc_uses_32bit_compare_in_xbox_abi() { // PPCBUG-012+020: 32-bit ABI. r3 + r4 = 0xFFFFFFFF (low 32). As i32 // this is -1 (CR0.LT). The previous 64-bit compare wrongly classified // this as positive (CR0.GT) for Xbox 360 binaries. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0x0000_0000_FFFF_FFFF; ctx.gpr[4] = 0; write_instr(&mut mem, 0, addx_raw(5, 3, 4, false, true)); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFF); assert!(ctx.cr[0].lt, "32-bit ABI: 0xFFFFFFFF as i32 is -1, CR0.LT"); assert!(!ctx.cr[0].gt); assert!(!ctx.cr[0].eq); } #[test] fn subfo_sets_xer_ov_on_int32_min_minus_one() { // PPCBUG-017: 32-bit ABI subfo overflow detection. r4=INT32_MIN, r3=1 // → result = INT32_MIN - 1 → wraps to INT32_MAX with OV=1. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 1; ctx.gpr[4] = 0x8000_0000u64; let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x7FFF_FFFFu64); assert_eq!(ctx.xer_ov, 1); assert_eq!(ctx.xer_so, 1); } #[test] fn subfo_no_spurious_ov_when_result_has_bit31_set() { // PPCBUG-017 review-fix regression: subfo r5, r3, r4 with r3=1, r4=0x80000001 // → result = 0x80000000. This is i32::MIN — a legitimate negative value // with no 32-bit overflow (true_diff = -2147483648, fits in i32). // The legacy `sum_overflow_64` predicate compared against the u64 view // of result (= +2147483648), spuriously flagging OV=1. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 1; ctx.gpr[4] = 0x8000_0001u64; // subfo r5, r3, r4 let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x8000_0000u64); assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV"); } #[test] fn subfco_no_spurious_ov_when_result_has_bit31_set() { // PPCBUG-007 same review-fix: subfcx OE handler must use 32-bit predicate. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 1; ctx.gpr[4] = 0x8000_0001u64; // subfco r5, r3, r4 (XO=8, OE=1) let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (8 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x8000_0000u64); assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV"); } #[test] fn mullwo_sets_xer_ov_when_product_overflows_32_bits() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // mullwo r5, r3, r4 (XO=235, OE=1) ctx.gpr[3] = i32::MAX as u64; ctx.gpr[4] = 2u64; let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (235 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.xer_ov, 1); assert_eq!(ctx.xer_so, 1); } #[test] fn divwo_sets_xer_ov_on_divide_by_zero() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // divwo r5, r3, r4 (XO=491, OE=1) ctx.gpr[3] = 10; ctx.gpr[4] = 0; let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (491 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.xer_ov, 1); assert_eq!(ctx.gpr[5], 0); // undefined in spec; canary uses 0 } #[test] fn nego_sets_ov_only_on_int_min() { // PPCBUG-006: 32-bit ABI. INT_MIN is 0x80000000 (low 32), not 0x8000000000000000. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // nego r5, r3 (XO=104, OE=1) ctx.gpr[3] = 0x8000_0000; let raw = (31 << 26) | (5 << 21) | (3 << 16) | (1 << 10) | (104 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.xer_ov, 1); // -INT_MIN wraps to INT_MIN (low 32 bits) with upper 32 bits zero. assert_eq!(ctx.gpr[5], 0x0000_0000_8000_0000); } #[test] fn neg_clean_input_no_upper_bits() { // PPCBUG-006 regression: neg r3=5 must produce 0x00000000_FFFFFFFB, // not 0xFFFFFFFF_FFFFFFFB (the 64-bit !ra-then-add-1 result). let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 5; let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (104 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFB); } #[test] fn norx_not_simplified_keeps_upper_bits_clean() { // PPCBUG-029: `not rA, rB` (norx with rs==rb) is the canonical not // simplified mnemonic. 64-bit !val poisons upper 32 bits of every // execution; under the 32-bit ABI we must truncate. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0x0000_0000_0000_00FF; // norx r5, r3, r3 (XO=124) let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (3 << 11) | (124 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FF00, "upper 32 bits must be zero"); } #[test] fn eqvx_self_self_self_sets_low32_to_all_ones() { // PPCBUG-031: `eqv rA, rA, rA` is a common "set-to-all-ones" idiom. // 64-bit !(0^0) gives u64::MAX (0xFFFFFFFF_FFFFFFFF); 32-bit ABI // expects 0x00000000_FFFFFFFF. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0; // eqvx r3, r3, r3 (XO=284) let raw = (31u32 << 26) | (3 << 21) | (3 << 16) | (3 << 11) | (284 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[3], 0x0000_0000_FFFF_FFFF); } #[test] fn andcx_bit_clear_keeps_upper_clean() { // PPCBUG-033: `andc rA, rS, rB` = rS & !rB. 64-bit !rB poisons. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0xFFFF_FFFF; // rS ctx.gpr[4] = 0x000F; // rB (low bits to clear) // andcx r5, r3, r4 (XO=60) let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (60 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFF0); } #[test] fn subfex_clean_inputs_no_upper_bits() { // PPCBUG-008: 32-bit ABI. RT = !RA + RB + CA. RA=5, RB=10, CA=1 // → !5u32 = 0xFFFFFFFA, +10 = 0x100000004, +1 = 0x100000005, low32 = 5. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 5; ctx.gpr[4] = 10; ctx.xer_ca = 1; // subfex r5, r3, r4 (XO=136) let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (136 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 5); assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)"); } #[test] fn andisx_sign_bit_set_classifies_lt() { // PPCBUG-023: andis. r4, r3, 0x8000 with r3=0xFFFFFFFF should produce // result=0x80000000 with CR0.LT=1 (i32 view). let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0xFFFF_FFFFu64; // andis. r4, r3, 0x8000: opcode 29, uimm16 = 0x8000 let raw = (29u32 << 26) | (3 << 21) | (4 << 16) | 0x8000; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[4], 0x8000_0000u64); assert!(ctx.cr[0].lt, "result=0x80000000 → i32 view negative → CR0.LT"); } #[test] fn slwx_high_bit_result_classifies_lt() { // PPCBUG-044: slwx producing 0x80000000 must classify as CR0.LT under // the 32-bit ABI, not CR0.GT (which 64-bit view would give). let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0x4000_0000u64; ctx.gpr[4] = 1; // slwx. r5, r3, r4 (XO=24, Rc=1) let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (24 << 1) | 1; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x8000_0000u64); assert!(ctx.cr[0].lt, "0x80000000 as i32 is negative"); } #[test] fn lha_negative_halfword_zero_extends_upper() { // PPCBUG-095: memory 0x8000 must yield gpr[rD] = 0x00000000_FFFF8000. let mut ctx = PpcContext::new(); let mem = TestMem::new(); mem.write_u16(0x100, 0x8000); ctx.gpr[3] = 0x100; // lha r5, 0(r3): opcode 42 let raw = (42u32 << 26) | (5 << 21) | (3 << 16) | 0; write_instr(&mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mem); assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_8000u64); } #[test] fn lhaux_negative_halfword_clean_writeback() { // PPCBUG-098: indexed update form. Memory 0xFFFF → rD = 0x00000000_FFFFFFFF; // rA must update to the EA. let mut ctx = PpcContext::new(); let mem = TestMem::new(); mem.write_u16(0x200, 0xFFFF); ctx.gpr[3] = 0x100; // ra ctx.gpr[4] = 0x100; // rb // lhaux r5, r3, r4 (XO=375) let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (375 << 1); write_instr(&mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mem); assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFFu64); assert_eq!(ctx.gpr[3], 0x200, "rA updated to EA"); } #[test] fn lwa_high_bit_set_zero_extends_upper() { // PPCBUG-105: memory 0x80000000 must yield rD = 0x00000000_80000000 // under 32-bit ABI (no sign extension to bits 32-63). let mut ctx = PpcContext::new(); let mem = TestMem::new(); mem.write_u32(0x100, 0x8000_0000); ctx.gpr[3] = 0x100; // lwa r5, 0(r3): opcode 58, XO=2 (DS-form, ds=0) let raw = (58u32 << 26) | (5 << 21) | (3 << 16) | 2; write_instr(&mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mem); assert_eq!(ctx.gpr[5], 0x0000_0000_8000_0000u64); } #[test] fn mullwx_overflow_truncates_to_32() { // PPCBUG-009: mullwo r5, r3, r4 with ra=0x10000, rb=0x10000 → product // 0x100000000 (overflow). Low 32 = 0; OE must fire. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0x10000; ctx.gpr[4] = 0x10000; // mullwo r5, r3, r4 (XO=235, OE=1) let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (235 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0, "low 32 bits = 0"); assert_eq!(ctx.xer_ov, 1, "overflow detected"); } #[test] fn divwx_negative_quotient_zero_extends() { // PPCBUG-010+011: -10 / 3 = -3 must produce 0x00000000_FFFFFFFD, // not 0xFFFFFFFF_FFFFFFFD. CR0.LT must still fire (i32 view of FFFFFFFD is negative). let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = (-10i32) as u32 as u64; ctx.gpr[4] = 3; // divwx. r5, r3, r4 (XO=491, Rc=1) let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (491 << 1) | 1; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFDu64); assert!(ctx.cr[0].lt, "CR0.LT must fire for negative i32 quotient"); } #[test] fn srawx_negative_value_zero_extends_upper() { // PPCBUG-041+043: srawx of negative i32 by 1 produces a negative i32; // writeback must zero-extend to u64 (not sign-extend). let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0x8000_0000u64; // i32::MIN ctx.gpr[4] = 1; // srawx. r5, r3, r4 (XO=792, Rc=1) let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (792 << 1) | 1; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x0000_0000_C000_0000u64); assert!(ctx.cr[0].lt); } #[test] fn srawix_high_count_negative_input_yields_low32_all_ones() { // PPCBUG-042+043: srawi with count=31 on negative input → low 32 bits // all ones (0xFFFFFFFF), upper 32 zero (was u64::MAX before fix). let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0x8000_0000u64; // srawix r5, r3, 31 (XO=824) let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (31 << 11) | (824 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFFu64); } #[test] fn addi_li_neg_one_zero_extends_upper() { // PPCBUG-001: `li r3, -1` (= addi r3, r0, -1) must produce // 0x00000000_FFFFFFFF, not 0xFFFFFFFF_FFFFFFFF. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // addi r3, r0, -1: opcode 14, simm16 = 0xFFFF let raw = (14u32 << 26) | (3 << 21) | (0 << 16) | 0xFFFF; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[3], 0x0000_0000_FFFF_FFFFu64); } #[test] fn addic_carry_uses_32bit_compare() { // PPCBUG-002: addic ra=0xFFFFFFFF_00000001, simm=-1 (0xFFFF). // 32-bit: 0x00000001 + 0xFFFFFFFF = 0x00000000 with CA=1. // 64-bit (buggy): result < ra → since 64-bit ra has high bits set, // the buggy form would compare against the polluted u64 and could // give wrong CA. Truncated form ignores upper 32 bits. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0xFFFFFFFF_00000001u64; // addic r4, r3, -1: opcode 12 let raw = (12u32 << 26) | (4 << 21) | (3 << 16) | 0xFFFF; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // Result low 32: 0x00000001 + 0xFFFFFFFF = 0x00000000 with carry. assert_eq!(ctx.gpr[4], 0); assert_eq!(ctx.xer_ca, 1, "32-bit compare must see CA=1"); } #[test] fn mulli_overflow_wraps_to_32() { // PPCBUG-004: mulli must truncate to 32 bits even when the upper 32 bits // of RA are polluted (e.g. by upstream bugs). Pre-fix: ra = u64::MAX as // i64 = -1, * 2 = -2, written to GPR as `0xFFFFFFFF_FFFFFFFE`. Post-fix: // truncated to `0xFFFFFFFE`. Discriminating regression test. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = u64::MAX; // mulli r4, r3, 2: opcode 7 let raw = (7u32 << 26) | (4 << 21) | (3 << 16) | 2; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[4], 0xFFFF_FFFEu64, "low 32 bits = -2 in i32; upper 32 zero"); } #[test] fn subficx_neg_simm_zero_extends() { // PPCBUG-005: subfic r4, r3, -1 with r3=5: imm-ra = 0xFFFFFFFF - 5 = 0xFFFFFFFA. // Buggy form: imm sign-extended to u64 0xFFFFFFFFFFFFFFFF - 5 = poisoned. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 5; // subfic r4, r3, -1: opcode 8, simm = 0xFFFF let raw = (8u32 << 26) | (4 << 21) | (3 << 16) | 0xFFFF; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[4], 0x0000_0000_FFFF_FFFAu64); assert_eq!(ctx.xer_ca, 1, "0xFFFFFFFF >= 5 → CA=1"); } #[test] fn subfcx_addis_incident_case() { // PPCBUG-007: regression for the exact case that revealed the addis bug. // After P1's addis fix this works coincidentally; P4 batch 3 makes // subfcx itself robust to 64-bit GPR pollution. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // ra polluted in upper 32 bits, low 32 = 0x828F3F98 ctx.gpr[3] = 0xFFFF_FFFF_828F_3F98u64; // rb clean low 32 = 0x828F3F68 ctx.gpr[4] = 0x0000_0000_828F_3F68u64; // subfcx r5, r3, r4 (XO=8): result = rb - ra = 0xFFFFFFD0 (low 32) let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (8 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // 32-bit unsigned: 0x828F3F68 < 0x828F3F98 → CA=0 assert_eq!(ctx.xer_ca, 0, "32-bit unsigned: rb < ra → CA=0"); // result = 0x828F3F68 - 0x828F3F98 = 0xFFFFFFD0 (low 32, upper 32 zero) assert_eq!(ctx.gpr[5], 0xFFFF_FFD0u64); } #[test] fn extsbx_negative_byte_zero_extends_upper() { // PPCBUG-034+036 coupled: extsb of 0x80 (negative byte) must produce // 0x00000000_FFFFFF80, NOT 0xFFFFFFFF_FFFFFF80. CR0.LT must still fire // (i32 view of 0xFFFFFF80 is negative). let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0x80; // extsbx. r5, r3 (XO=954, Rc=1) let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (954 << 1) | 1; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FF80); assert!(ctx.cr[0].lt, "CR0.LT must fire for negative i32"); assert!(!ctx.cr[0].gt); } #[test] fn extshx_negative_halfword_zero_extends_upper() { // PPCBUG-035+037 coupled: extsh of 0x8000 must produce 0x00000000_FFFF8000. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0x8000; // extshx. r5, r3 (XO=922, Rc=1) let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (922 << 1) | 1; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_8000); assert!(ctx.cr[0].lt); } #[test] fn subfmex_ra_max_ca_zero_clears_ca() { // PPCBUG-019: `subfme` with RA=u32::MAX and CA=0 should set CA=0 // (because !u32::MAX = 0). The buggy code's `!ra != 0` predicate // on u64 was always true (because !u64-cast-of-u32::MAX has high // bits flipped non-zero), wrongly setting CA=1. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0xFFFF_FFFFu64; ctx.xer_ca = 0; // subfmex r5, r3 (XO=232) let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (232 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.xer_ca, 0, "RA=u32::MAX, CA=0 → !RA32==0, CA=0"); } // ---------- Phase 2 fixes: trap TO-field ---------- #[test] fn tw_with_to_zero_never_fires() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 5; ctx.gpr[4] = 5; // tw 0, r3, r4 (XO=4). TO in bits 6-10. let raw = (31 << 26) | (0 << 21) | (3 << 16) | (4 << 11) | (4 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; let r = step(&mut ctx, &mut mem); assert_eq!(r, StepResult::Continue, "TO=0 must never trap"); assert_eq!(ctx.pc, 4); } #[test] fn tw_eq_fires_on_equal() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 5; ctx.gpr[4] = 5; // TO=4 (EQ only) let raw = (31 << 26) | (4 << 21) | (3 << 16) | (4 << 11) | (4 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; let r = step(&mut ctx, &mut mem); assert_eq!(r, StepResult::Trap); } #[test] fn tw_eq_does_not_fire_on_unequal() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 5; ctx.gpr[4] = 7; // TO=4 (EQ only) let raw = (31 << 26) | (4 << 21) | (3 << 16) | (4 << 11) | (4 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; let r = step(&mut ctx, &mut mem); assert_eq!(r, StepResult::Continue); } #[test] fn twi_compares_low_32_bits_only() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0xDEAD_BEEF_0000_0005; // low 32 = 5 // twi 4, r3, 5: primary=3, TO=4, RA=3, SI=5 let raw = (3 << 26) | (4 << 21) | (3 << 16) | (5u32 & 0xFFFF); write_instr(&mut mem, 0, raw); ctx.pc = 0; let r = step(&mut ctx, &mut mem); assert_eq!(r, StepResult::Trap, "word-width compare matches low 32"); } // ---------- Phase 2 fixes: mcrfs ---------- // ---------- Phase 2h: FPU / FPSCR ---------- #[test] fn fmsub_inf_minus_inf_sets_vxisi() { // PPCBUG-203 regression: fmsub with a*c = +∞, -b = -∞ (b=+∞) → // +∞ + (-∞) → VXISI. Pre-fix had no add-step VXISI check. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = f64::INFINITY; ctx.fpr[2] = f64::INFINITY; // b ctx.fpr[3] = 1.0; // fmsub f4, f1, f3, f2 → 1*∞ - ∞ = VXISI // A-form: opcode=63, XO=28 (fmsub double): (63<<26)|(rd<<21)|(ra<<16)|(rb<<11)|(rc<<6)|(28<<1) let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (28 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_ne!(ctx.fpscr & fpscr::VXISI, 0, "fmsub ∞-∞ must set VXISI"); } #[test] fn fnmadd_nan_input_preserves_nan_sign() { // PPCBUG-205 regression: ISA forbids negating a NaN result. // a*c+b producing a NaN → result must be the NaN unchanged, not -NaN. let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); let qnan = f64::NAN; ctx.fpr[1] = qnan; ctx.fpr[2] = 1.0; ctx.fpr[3] = 2.0; // fnmadd f4, f1, f3, f2 (XO=31) let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (31 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // Result must be NaN with the same sign bit as the input NaN. let r = ctx.fpr[4]; assert!(r.is_nan(), "result must be NaN"); assert_eq!(r.is_sign_negative(), qnan.is_sign_negative(), "fnmadd must preserve NaN sign (no negation on NaN)"); } #[test] fn fadd_inf_minus_inf_sets_vxisi() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = f64::INFINITY; ctx.fpr[2] = f64::NEG_INFINITY; // fadd f3, f1, f2 → inf + (-inf) = VXISI let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // VXISI bit is PPC bit 8 → mask 1<<23 assert_ne!(ctx.fpscr & fpscr::VXISI, 0); // FX sticky is set on any new exception → mask 1<<31 assert_ne!(ctx.fpscr & fpscr::FX, 0); // VX summary set → 1<<29 assert_ne!(ctx.fpscr & fpscr::VX, 0); } #[test] fn fdiv_zero_over_zero_sets_vxzdz() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = 0.0; ctx.fpr[2] = 0.0; // fdiv f3, f1, f2 (opcode 63, subop 18) let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (18 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_ne!(ctx.fpscr & fpscr::VXZDZ, 0); } #[test] fn fdiv_finite_over_zero_sets_zx() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = 1.0; ctx.fpr[2] = 0.0; // fdiv f3, f1, f2 let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (18 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_ne!(ctx.fpscr & fpscr::ZX, 0); } #[test] fn fadd_sets_fprf_from_result() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = 2.5; ctx.fpr[2] = 3.5; let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // Result = +6.0 → FPRF = POS_NORMAL = 0b0_0100 let fprf = ((ctx.fpscr & fpscr::FPRF_MASK) >> 12) as u8; assert_eq!(fprf, fpscr::fprf::POS_NORMAL); } #[test] fn frsp_honours_fpscr_rn_toward_zero() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // A value whose nearest-even rounding would go up but trunc goes down. // Choose v = 1 + 0x1.00_0001_ * 2^-24-ish so low bit is 1 in the f32 mantissa. let v = f64::from_bits(0x3FF0_0000_0000_0001); // 1.0 + ULP at double ctx.fpr[1] = v; ctx.fpscr = 0x1; // RN = 01 → toward zero // frsp f3, f1 (opcode 63, subop 12) let raw = (63u32 << 26) | (3 << 21) | (1 << 11) | (12 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // Result rounded toward zero should be exactly 1.0_f64 assert_eq!(ctx.fpr[3], 1.0_f64); } #[test] fn fcmpu_sets_so_on_nan_and_fprf_unordered() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = f64::NAN; ctx.fpr[2] = 1.0; // fcmpu crfD=4, f1, f2 : (63<<26) | (crfd<<23) | (ra<<16) | (rb<<11) | (0<<1) let raw = (63u32 << 26) | (4 << 23) | (1 << 16) | (2 << 11); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.cr[4].so, "unordered → SO set"); assert!(!ctx.cr[4].lt && !ctx.cr[4].gt && !ctx.cr[4].eq); // FPRF unordered = 0b0_0001 let fprf = ((ctx.fpscr & fpscr::FPRF_MASK) >> 12) as u8; assert_eq!(fprf, 0b0_0001); } #[test] fn fcmpo_on_qnan_sets_vxvc() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.fpr[1] = f64::NAN; // QNaN (Rust's NAN) ctx.fpr[2] = 1.0; // fcmpo (opcode 63, subop 32) let raw = (63u32 << 26) | (4 << 23) | (1 << 16) | (2 << 11) | (32 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_ne!(ctx.fpscr & fpscr::VXVC, 0); } // ---------- Phase 2i: VMX NaN propagation ---------- #[test] fn vmaxfp_propagates_nan() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); let mut a = [1.0f32, 2.0, 3.0, 4.0]; let b = [5.0f32, 6.0, 7.0, 8.0]; a[1] = f32::NAN; ctx.vr[2] = xenia_types::Vec128::from_f32x4_array(a); ctx.vr[3] = xenia_types::Vec128::from_f32x4_array(b); // vmaxfp vD=4, vA=2, vB=3 (opcode 4, XO=1034) let raw = (4u32 << 26) | (4 << 21) | (2 << 16) | (3 << 11) | 1034; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); let r = ctx.vr[4].as_f32x4(); // lane 1 must be NaN, not 6.0 (the b side) assert!(r[1].is_nan()); // Other lanes should pick the max correctly assert_eq!(r[0], 5.0); assert_eq!(r[2], 7.0); assert_eq!(r[3], 8.0); } #[test] fn vminfp_propagates_nan() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); let a = [1.0f32, 2.0, 3.0, 4.0]; let mut b = [5.0f32, 6.0, 7.0, 8.0]; b[2] = f32::NAN; ctx.vr[2] = xenia_types::Vec128::from_f32x4_array(a); ctx.vr[3] = xenia_types::Vec128::from_f32x4_array(b); // vminfp XO=1098 let raw = (4u32 << 26) | (4 << 21) | (2 << 16) | (3 << 11) | 1098; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); let r = ctx.vr[4].as_f32x4(); assert!(r[2].is_nan()); } // ---------- Phase 2j: VMX denorm flush ---------- #[test] fn vmaddfp_flushes_denormal_inputs() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // Smallest positive denormal f32 = f32::from_bits(1) let denorm = f32::from_bits(1); let a = [denorm; 4]; let b = [0.0f32; 4]; let c = [1.0f32; 4]; ctx.vr[2] = xenia_types::Vec128::from_f32x4_array(a); ctx.vr[3] = xenia_types::Vec128::from_f32x4_array(b); ctx.vr[4] = xenia_types::Vec128::from_f32x4_array(c); // vmaddfp vD=5, vA=2, vB=3, vC=4 (A-form: opcode 4, XO=46, vC at rc field) // layout: (4<<26) | (5<<21) | (2<<16) | (3<<11) | (4<<6) | 46 let raw = (4u32 << 26) | (5 << 21) | (2 << 16) | (3 << 11) | (4 << 6) | 46; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); let r = ctx.vr[5].as_f32x4(); // denorm*1 + 0 should be flushed: denorm→0, so result is 0. assert_eq!(r, [0.0f32; 4]); } /// VMX128 variant `vmaddfp128 vD, vA, vB` (primary op 5, key2 = 0b001101) /// reuses vD as the accumulator: `vD <- (vA × vD) + vB`. Canary /// `ppc_emit_altivec.cc:786-810` flushes *all three* inputs /// unconditionally before the fused multiply-add. #[test] fn vmaddfp128_flushes_denormal_inputs() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); let denorm = f32::from_bits(1); // VA=v1, VD=v2, VB=v3 — all carry denormals. ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([denorm; 4]); ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([denorm; 4]); ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([denorm; 4]); // vmaddfp128 vD=v2, vA=v1, vB=v3: op6=5, vd_lo=2, va_lo=1, vb_lo=3, key2=0b001101. // VA×VD+VB: all three flushed → 0*0+0 = 0. let raw: u32 = (5u32 << 26) | (2 << 21) | (1 << 16) | (3 << 11) | (3 << 6) | (1 << 4); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.vr[2].as_f32x4(), [0.0f32; 4]); } // ---- PPCBUG-424+425: vmaddfp128/vmaddcfp128 operand swap ---- // ISA for both: (VD) <- (VA × VD) + VB. Previous code computed VA×VB+VD and VD×VB+VA. // Test uses distinct VA, VB, VD registers so the swap is visible. // Encoding: op6=5, key2=0b001101 (vmaddfp128) / 0b010001 (vmaddcfp128). // VA=v1=[2.0], VB=v2=[10.0], VD=v3=[3.0] → expected 2.0×3.0+10.0 = 16.0. // Buggy vmaddfp128: 2.0×10.0+3.0 = 23.0. Buggy vmaddcfp128: 3.0×10.0+2.0 = 32.0. #[test] fn vmaddfp128_operand_order_va_times_vd_plus_vb() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([2.0f32; 4]); // VA=v1 ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([10.0f32; 4]); // VB=v2 ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([3.0f32; 4]); // VD=v3 (also destination) // vmaddfp128 vD=v3, vA=v1, vB=v2 — op5, key2=0b001101 (bits22-25=3, bit27=1) let raw: u32 = (5u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (1 << 4); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.vr[3].as_f32x4(), [16.0f32; 4], "VA×VD+VB = 2*3+10 = 16"); } #[test] fn vmaddcfp128_operand_order_va_times_vd_plus_vb() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([2.0f32; 4]); // VA=v1 ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([10.0f32; 4]); // VB=v2 ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([3.0f32; 4]); // VD=v3 // vmaddcfp128 vD=v3, vA=v1, vB=v2 — op5, key2=0b010001 (bits22-25=4, bit27=1) let raw: u32 = (5u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (4 << 6) | (1 << 4); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.vr[3].as_f32x4(), [16.0f32; 4], "VA×VD+VB = 2*3+10 = 16"); } /// VMX128 `vnmsubfp128 vD, vA, vB` (key2 = 0b010101). Canary /// `ppc_emit_altivec.cc:1133-1160` flushes all three inputs in the /// helper. Semantics: `vD <- -((vA * vB) - vD) = vD - vA*vB`. #[test] fn vnmsubfp128_flushes_denormal_inputs() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); let denorm = f32::from_bits(1); ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([denorm; 4]); ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([1.0f32; 4]); // vnmsubfp128 vD=v2, vA=v2, vB=v3: key2 = 0b010101 (21) encoded // via bits 22-25 = 0101 and bit 27 = 1. let raw: u32 = 0x1440_1950; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // Without flush: denorm - denorm*1.0 = 0 (but the intermediate // values propagate subnormals through the compute); with flush // everything is 0 cleanly. assert_eq!(ctx.vr[2].as_f32x4(), [0.0f32; 4]); } /// VMX128 `vmsum4fp128 vD, vA, vB` computes the 4-lane dot product /// and broadcasts the result. Canary /// `ppc_emit_altivec.cc:1077-1084` flushes the *output* denormal /// (not the inputs). A dot product that sums to a subnormal must /// read back as 0. #[test] fn vmsum4fp128_flushes_denormal_output() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); let denorm = f32::from_bits(1); // Dot product = denorm * 1.0 + 0 + 0 + 0 = denorm. ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([denorm, 0.0, 0.0, 0.0]); ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([1.0f32, 0.0, 0.0, 0.0]); // vmsum4fp128 vD=v2, vA=v2, vB=v3: key2 = 0b011101 (29). let raw: u32 = 0x1440_19D0; write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // Subnormal output must flush to 0 and broadcast across all lanes. assert_eq!(ctx.vr[2].as_f32x4(), [0.0f32; 4]); } // ---------- Phase 2k: lve*x / stve*x element masking ---------- #[test] fn lvebx_loads_byte_into_ea_slot() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); mem.write_u8(0x1003, 0xAB); ctx.gpr[4] = 0x1003; ctx.gpr[5] = 0; // lvebx v1, r4, r5 : (31<<26) | (1<<21) | (4<<16) | (5<<11) | (7<<1) let raw = (31u32 << 26) | (1 << 21) | (4 << 16) | (5 << 11) | (7 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); let bytes = ctx.vr[1].as_bytes(); // Byte at slot 3 (EA & 0xF = 3) assert_eq!(bytes[3], 0xAB); // Other bytes zero for i in 0..16 { if i != 3 { assert_eq!(bytes[i], 0, "byte {} should be zero", i); } } } #[test] fn stvewx_stores_only_word_slot() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // Prepare surrounding memory with a sentinel value so we can check non-overwrite. for i in 0..16 { mem.write_u8(0x1000 + i, 0x55); } // vS lanes: (big-endian view) word0=0xDEADBEEF, word1..3=0. let mut src = [0u8; 16]; src[0] = 0xDE; src[1] = 0xAD; src[2] = 0xBE; src[3] = 0xEF; ctx.vr[1] = xenia_types::Vec128::from_bytes(src); // EA = 0x1000 (slot 0): store word0 at 0x1000. ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; // stvewx v1, r4, r5 : (31<<26) | (1<<21) | (4<<16) | (5<<11) | (199<<1) let raw = (31u32 << 26) | (1 << 21) | (4 << 16) | (5 << 11) | (199 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(mem.read_u32(0x1000), 0xDEAD_BEEF); // Adjacent bytes untouched (still 0x55). for i in 4..16 { assert_eq!(mem.data[0x1000 + i as usize].get(), 0x55, "byte {} was overwritten", 0x1000+i); } } // ---------- Phase 2l: reservation cache-line granule ---------- #[test] fn stwcx_succeeds_within_same_cache_line() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); mem.write_u32(0x1004, 0xAAAA_AAAA); ctx.gpr[4] = 0x1004; ctx.gpr[5] = 0; // lwarx r3, r4, r5 : (31<<26)|(3<<21)|(4<<16)|(5<<11)|(20<<1) let ld = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, ld); ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation); // Now stwcx. to a DIFFERENT address in the SAME cache line (offset within 128 bytes). ctx.gpr[4] = 0x1008; // 4 bytes over; same line. ctx.gpr[6] = 0xBBBB_BBBB; let st = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 4, st); step(&mut ctx, &mut mem); // Matches cache line → succeeds. assert!(ctx.cr[0].eq); assert_eq!(mem.read_u32(0x1008), 0xBBBB_BBBB); } #[test] fn stwcx_fails_across_cache_lines() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; let ld = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, ld); ctx.pc = 0; step(&mut ctx, &mut mem); // Different cache line (0x1080). ctx.gpr[4] = 0x1080; ctx.gpr[6] = 0xCCCC_CCCC; let st = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 4, st); step(&mut ctx, &mut mem); assert!(!ctx.cr[0].eq, "should fail across cache line"); assert_eq!(mem.read_u32(0x1080), 0, "memory not written on failure"); } // ---------- PPCBUG-107/140: invalidate_for_write via plain stw ---------- /// PPCBUG-107/140: A plain `stw` to a reserved line must invalidate the /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). #[test] fn lwarx_then_plain_stw_invalidates_reservation() { let table = std::sync::Arc::new(crate::ReservationTable::new()); table.enable(); let mut ctx = PpcContext::new(); ctx.reservation_table = Some(table.clone()); ctx.hw_id = 0; let mut mem = TestMem::new(); // Set up registers: r4=0x1000 (target addr), r5=0 (index), r6=plain store val, r7=stwcx val. ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; ctx.gpr[6] = 0xBBBB_BBBB; ctx.gpr[7] = 0xCCCC_CCCC; // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, lwarx); // Instr 1: stw r6, 0(r4) (opcode 36, D-form) let stw_plain = (36u32 << 26) | (6 << 21) | (4 << 16) | 0; write_instr(&mut mem, 4, stw_plain); // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 8, stwcx); // Execute lwarx — reserves 0x1000's cache line. ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation, "lwarx must set has_reservation"); // Execute plain stw — must call invalidate_for_write and clear the reservation. step(&mut ctx, &mut mem); assert_eq!(mem.read_u32(0x1000), 0xBBBB_BBBB, "plain stw must land"); // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). step(&mut ctx, &mut mem); assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stw"); // Memory must still hold the value from the plain stw, not from stwcx.. assert_eq!(mem.read_u32(0x1000), 0xBBBB_BBBB, "stwcx. must not overwrite on failure"); } /// Regression: without any intervening store, `lwarx` + `stwcx.` must still /// succeed (CR0.EQ=1). Ensures the fix didn't accidentally break the happy path. #[test] fn lwarx_then_stwcx_succeeds_without_intervening_store() { let table = std::sync::Arc::new(crate::ReservationTable::new()); table.enable(); let mut ctx = PpcContext::new(); ctx.reservation_table = Some(table.clone()); ctx.hw_id = 0; let mut mem = TestMem::new(); ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; ctx.gpr[7] = 0xDEAD_BEEF; // Instr 0: lwarx r3, r4, r5 let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, lwarx); // Instr 1: stwcx. r7, r4, r5 let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 4, stwcx); ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation, "lwarx must set has_reservation"); step(&mut ctx, &mut mem); assert!(ctx.cr[0].eq, "stwcx. must succeed when reservation is intact"); assert_eq!(mem.read_u32(0x1000), 0xDEAD_BEEF, "stwcx. must write on success"); } // ---------- PPCBUG-130: invalidate_for_write via plain stb ---------- /// PPCBUG-130: A plain `stb` to a reserved line must invalidate the /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). #[test] fn lwarx_then_plain_stb_invalidates_reservation() { let table = std::sync::Arc::new(crate::ReservationTable::new()); table.enable(); let mut ctx = PpcContext::new(); ctx.reservation_table = Some(table.clone()); ctx.hw_id = 0; let mut mem = TestMem::new(); // r4=0x1000 (target addr), r5=0 (index), r6=byte store val, r7=stwcx val. ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; ctx.gpr[6] = 0xAB; ctx.gpr[7] = 0xCCCC_CCCC; // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, lwarx); // Instr 1: stb r6, 0(r4) (opcode 38, D-form) let stb_plain = (38u32 << 26) | (6 << 21) | (4 << 16) | 0; write_instr(&mut mem, 4, stb_plain); // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 8, stwcx); // Execute lwarx — reserves 0x1000's cache line. ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation, "lwarx must set has_reservation"); // Execute plain stb — must call invalidate_for_write and clear the reservation. step(&mut ctx, &mut mem); assert_eq!(mem.read_u8(0x1000), 0xAB, "plain stb must land"); // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). step(&mut ctx, &mut mem); assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stb"); assert_eq!(mem.read_u8(0x1000), 0xAB, "stwcx. must not overwrite on failure"); } // ---------- PPCBUG-150: invalidate_for_write via plain std ---------- /// PPCBUG-150: A plain `std` to a reserved line must invalidate the /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). #[test] fn lwarx_then_plain_std_invalidates_reservation() { let table = std::sync::Arc::new(crate::ReservationTable::new()); table.enable(); let mut ctx = PpcContext::new(); ctx.reservation_table = Some(table.clone()); ctx.hw_id = 0; let mut mem = TestMem::new(); // r4=0x1000 (target addr), r5=0 (index), r6=doubleword store val, r7=stwcx val. ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; ctx.gpr[6] = 0xDEADBEEF_CAFEBABEu64; ctx.gpr[7] = 0xCCCC_CCCC; // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, lwarx); // Instr 1: std r6, 0(r4) (opcode 62, DS-form, XO=0b00) let std_plain = (62u32 << 26) | (6 << 21) | (4 << 16) | 0; write_instr(&mut mem, 4, std_plain); // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 8, stwcx); // Execute lwarx — reserves 0x1000's cache line. ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation, "lwarx must set has_reservation"); // Execute plain std — must call invalidate_for_write and clear the reservation. step(&mut ctx, &mut mem); assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "plain std must land"); // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). step(&mut ctx, &mut mem); assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain std"); assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "stwcx. must not overwrite on failure"); } // ---------- PPCBUG-160: stmw multi-line invalidation ---------- /// PPCBUG-160: `stmw r28, 0x78(r4)` with r4=0x1000 writes four words at /// 0x1078, 0x107C, 0x1080, 0x1084 — crossing the cache-line boundary at /// 0x1080 (RESERVATION_MASK=0x7F, line size=128 bytes). /// /// A reservation on the *second* line (0x1080) must be invalidated even /// though the store starts in the first line (0x1000-0x107F). This /// verifies the multi-line loop added to the stmw arm. #[test] fn lwarx_then_plain_stmw_spans_two_lines_and_invalidates() { let table = std::sync::Arc::new(crate::ReservationTable::new()); table.enable(); let mut ctx = PpcContext::new(); ctx.reservation_table = Some(table.clone()); ctx.hw_id = 0; let mut mem = TestMem::new(); // r4=0x1080 (reservation address — second line), r5=0 (index for lwarx/stwcx.) // r8=0x1000 (stmw base), r28-r31 = store values ctx.gpr[4] = 0x1080; ctx.gpr[5] = 0; ctx.gpr[8] = 0x1000; ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value ctx.gpr[28] = 0xAAAA_0001; ctx.gpr[29] = 0xBBBB_0002; ctx.gpr[30] = 0xCCCC_0003; ctx.gpr[31] = 0xDDDD_0004; // Instr 0: lwarx r3, r4, r5 — reserves line containing 0x1080 let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, lwarx); // Instr 1: stmw r28, 0x78(r8) — writes 4 words at 0x1078,0x107C,0x1080,0x1084 // opcode=47, rs=28, ra=8, d=0x0078 let stmw = (47u32 << 26) | (28 << 21) | (8 << 16) | 0x0078; write_instr(&mut mem, 4, stmw); // Instr 2: stwcx. r7, r4, r5 — should fail (line 2 reservation invalidated) let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 8, stwcx); // Execute lwarx — reserves 0x1080's cache line. ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation, "lwarx must set has_reservation"); // Execute stmw — must invalidate both lines including the one reserved at 0x1080. step(&mut ctx, &mut mem); assert_eq!(mem.read_u32(0x1078), 0xAAAA_0001, "stmw word 0 must land"); assert_eq!(mem.read_u32(0x107C), 0xBBBB_0002, "stmw word 1 must land"); assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stmw word 2 (line 2) must land"); assert_eq!(mem.read_u32(0x1084), 0xDDDD_0004, "stmw word 3 must land"); // Execute stwcx. — reservation on line 2 (0x1080) was invalidated; must fail. step(&mut ctx, &mut mem); assert!(!ctx.cr[0].eq, "stwcx. must fail: stmw invalidated the reservation on line 2"); assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stwcx. must not overwrite on failure"); } // ---------- PPCBUG-167: invalidate_for_write via plain stfd ---------- /// PPCBUG-167: A plain `stfd` to a reserved line must invalidate the /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). /// Also verifies big-endian byte layout of the stored double. #[test] fn lwarx_then_plain_stfd_invalidates_reservation() { let table = std::sync::Arc::new(crate::ReservationTable::new()); table.enable(); let mut ctx = PpcContext::new(); ctx.reservation_table = Some(table.clone()); ctx.hw_id = 0; let mut mem = TestMem::new(); // r4=0x1000 (target addr), r5=0 (index), r7=stwcx val. ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; ctx.gpr[7] = 0xCCCC_CCCC; // FPR 5 holds a specific bit pattern. ctx.fpr[5] = f64::from_bits(0xCAFEBABE_DEADBEEFu64); // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, lwarx); // Instr 1: stfd f5, 0(r4) (opcode 54, D-form) let stfd_plain = (54u32 << 26) | (5 << 21) | (4 << 16) | 0; write_instr(&mut mem, 4, stfd_plain); // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 8, stwcx); // Execute lwarx — reserves 0x1000's cache line. ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation, "lwarx must set has_reservation"); // Execute plain stfd — must call invalidate_for_write and clear the reservation. step(&mut ctx, &mut mem); // write_f64 delegates to write_u64, which writes big-endian; verify layout. assert_eq!( mem.read_u64(0x1000), 0xCAFEBABE_DEADBEEFu64, "stfd must store FPR bit pattern in big-endian order" ); // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). step(&mut ctx, &mut mem); assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stfd"); assert_eq!(mem.read_u64(0x1000), 0xCAFEBABE_DEADBEEFu64, "stwcx. must not overwrite on failure"); } // ---------- Phase 2m: SPR DEC + TBL/TBU write ---------- #[test] fn mfspr_dec_returns_dec_field() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.dec = 0x1234_5678; // mfspr r3, DEC (22). SPR encoded with halves swapped: (22 & 0x1F)<<5 | (22>>5)&0x1F = 0x2C0 in bits 11..20. // The decoder does the un-swap, so the raw SPR field stores the swapped form. let spr_swapped = ((22u32 & 0x1F) << 5) | ((22u32 >> 5) & 0x1F); let raw = (31u32 << 26) | (3 << 21) | (spr_swapped << 11) | (339 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.gpr[3], 0x1234_5678); } #[test] fn mtspr_tbl_write_updates_low_half() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.timebase = 0xAAAA_BBBB_CCCC_DDDD; ctx.gpr[3] = 0x1111_2222; // mtspr TBL_WRITE (284), r3 let spr_swapped = ((284u32 & 0x1F) << 5) | ((284u32 >> 5) & 0x1F); let raw = (31u32 << 26) | (3 << 21) | (spr_swapped << 11) | (467 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // step() post-increments timebase by 1, so the observable low half is value+1. assert_eq!(ctx.timebase & 0xFFFF_FFFF, 0x1111_2222u64 + 1); assert_eq!(ctx.timebase >> 32, 0xAAAA_BBBB); } // PPCBUG-053: bcx CTR zero-test must use 32-bit comparison. When prior // 64-bit pollution (e.g. via negx → mtctr) leaves CTR upper 32 bits // non-zero, the 64-bit `ctx.ctr != 0` would loop forever even when the // 32-bit counter has decremented to zero. #[test] fn bcx_bdnz_uses_32bit_ctr_compare() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.ctr = 0x0000_0001_0000_0001; // bdnz +8: BO=16 (decrement, branch if CTR!=0, ignore CR), BI=0, BD/4=2 let raw = (16u32 << 26) | (16 << 21) | (0 << 16) | (2 << 2); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // After decrement: low 32 = 0, high 32 = 1. 32-bit test says zero → no branch. assert_eq!(ctx.ctr, 0x0000_0001_0000_0000); assert_eq!(ctx.pc, 4); } #[test] fn bclrx_uses_32bit_ctr_compare() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.ctr = 0x0000_0001_0000_0001; ctx.lr = 0x100; // bdnzlr: opcode 19, BO=16 (decrement, branch if CTR!=0), BI=0, XO=16 let raw = (19u32 << 26) | (16 << 21) | (0 << 16) | (16 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // 32-bit CTR=0 after decrement → don't branch to LR. assert_eq!(ctx.ctr, 0x0000_0001_0000_0000); assert_eq!(ctx.pc, 4); } // PPCBUG-054: mtspr CTR must truncate the source GPR to 32 bits, matching // canary's `f.Truncate(ctr, INT32_TYPE)`. Prevents upstream 64-bit GPR // pollution from poisoning the 32-bit CTR counter independently of the // bcx zero-test fix. #[test] fn mtspr_ctr_truncates_to_32_bits() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); ctx.gpr[3] = 0xFFFF_FFFF_8000_0001; // mtspr CTR (9), r3 let spr_swapped = ((9u32 & 0x1F) << 5) | ((9u32 >> 5) & 0x1F); let raw = (31u32 << 26) | (3 << 21) | (spr_swapped << 11) | (467 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); assert_eq!(ctx.ctr, 0x8000_0001); } // ---------- Block-cache parity tests ---------- // // These confirm that running a program through the basic-block // cache (crate::block_cache::BlockCache + step_block) produces a // bit-identical PpcContext to running it through step_cached // (per-instruction). If this ever fails the block cache is not // safe to engage in production. fn enc_addi_t(rd: u32, ra: u32, simm: i16) -> u32 { (14 << 26) | (rd << 21) | (ra << 16) | (simm as u16 as u32) } fn enc_lwz_t(rd: u32, ra: u32, d: i16) -> u32 { (32 << 26) | (rd << 21) | (ra << 16) | (d as u16 as u32) } fn enc_stw_t(rs: u32, ra: u32, d: i16) -> u32 { (36 << 26) | (rs << 21) | (ra << 16) | (d as u16 as u32) } fn enc_b_t(li_words: i32) -> u32 { // Branch: opcode 18, AA=0, LK=0, LI = li_words << 2 (signed). let li = (li_words as u32) & 0x00FF_FFFF; (18u32 << 26) | (li << 2) } /// Snapshot of the parts of `PpcContext` that block_matches_per_instr /// is asked to keep identical between dispatch paths. Comparing the /// whole struct is impractical (vector regs, fp regs, large arrays); /// the GPR file + pc + lr + cr + cycle counters cover everything the /// interpreter touches in the test programs below. #[derive(Debug, PartialEq, Eq)] struct CtxSnap { gpr: [u64; 32], pc: u32, lr: u64, ctr: u64, cycle_count: u64, cr_packed: u32, } impl CtxSnap { fn from(ctx: &PpcContext) -> Self { Self { gpr: ctx.gpr, pc: ctx.pc, lr: ctx.lr, ctr: ctx.ctr, cycle_count: ctx.cycle_count, cr_packed: ctx.cr(), } } } fn run_per_instruction(prog: &[u32], iters: u32, init_gpr: &[(usize, u64)]) -> CtxSnap { let mut ctx = PpcContext::new(); for &(i, v) in init_gpr { ctx.gpr[i] = v; } let mut mem = TestMem::new(); for (i, &raw) in prog.iter().enumerate() { write_instr(&mut mem, (i as u32) * 4, raw); } let mut cache = crate::decoder::DecodeCache::new(); ctx.pc = 0; for _ in 0..iters { // Run one instruction at a time. Memory has constant // page_version (default trait impl returns 1) so the cache // entries stay valid forever. let r = step_cached(&mut ctx, &mut mem, &mut cache, 1); assert!(matches!(r, StepResult::Continue)); } CtxSnap::from(&ctx) } fn run_block(prog: &[u32], iters: u32, init_gpr: &[(usize, u64)]) -> CtxSnap { let mut ctx = PpcContext::new(); for &(i, v) in init_gpr { ctx.gpr[i] = v; } let mut mem = TestMem::new(); for (i, &raw) in prog.iter().enumerate() { write_instr(&mut mem, (i as u32) * 4, raw); } let mut bc = crate::block_cache::BlockCache::new(); ctx.pc = 0; let mut total_steps = 0u32; // Iterate by *blocks* until we've covered at least `iters` // instructions. The block path runs N instructions per call // where N is the block length; we still want to compare on a // per-instruction footing, so accumulate cycle_count. while total_steps < iters { // Borrow bc only long enough to copy the slice we need — // step_block needs &mut MemoryAccess so we can't hold a // shared borrow on bc across the call. let block_ptr: *const crate::block_cache::DecodedBlock = { let b: &crate::block_cache::DecodedBlock = bc.lookup_or_build(ctx.pc, &mem); b }; // Safety: the BlockCache::lookup_or_build contract is that // the returned reference stays valid until the next // lookup_or_build on the same cache. We don't call // lookup_or_build inside step_block and we drop the raw // pointer at the end of the iteration, so no aliasing. let block: &crate::block_cache::DecodedBlock = unsafe { &*block_ptr }; let n_before = ctx.cycle_count; let r = step_block(&mut ctx, &mut mem, block); assert!(matches!(r, StepResult::Continue)); let stepped = (ctx.cycle_count - n_before) as u32; total_steps += stepped; } CtxSnap::from(&ctx) } #[test] fn block_dispatch_matches_per_instruction_alu_loop() { // 4-instruction loop: r3 += 1, r3 += 2, r3 += 3, b -12 (back to start). let prog = [ enc_addi_t(3, 3, 1), enc_addi_t(3, 3, 2), enc_addi_t(3, 3, 3), enc_b_t(-3), // -3 words → back to instr 0 ]; let init = [(3usize, 0u64)]; let snap_a = run_per_instruction(&prog, 100, &init); let snap_b = run_block(&prog, 100, &init); assert_eq!(snap_a, snap_b); } #[test] fn block_dispatch_matches_per_instruction_loadstore_loop() { // r4 = 0x800 (data pointer), r3 = 1 // loop: // stw r3, 0(r4) // lwz r5, 0(r4) // addi r3, r5, 1 // b -12 let prog = [ enc_stw_t(3, 4, 0), enc_lwz_t(5, 4, 0), enc_addi_t(3, 5, 1), enc_b_t(-3), ]; let init = [(3usize, 1u64), (4usize, 0x800u64)]; let snap_a = run_per_instruction(&prog, 200, &init); let snap_b = run_block(&prog, 200, &init); assert_eq!(snap_a, snap_b); } #[test] fn mcrfs_moves_fpscr_nibble_and_clears_exception_bits() { let mut ctx = PpcContext::new(); let mut mem = TestMem::new(); // Set FPSCR bit 0 (FX) = 1 and bit 3 (OX) = 1. In our layout: // FX at (31-0) = 31 // OX at (31-3) = 28 ctx.fpscr = (1u32 << 31) | (1u32 << 28); // mcrfs crfD=2, crfS=0: (63 << 26) | (crfD<<23) | (crfS<<18) | (64<<1) let raw = (63 << 26) | (2 << 23) | (0 << 18) | (64 << 1); write_instr(&mut mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mut mem); // FPSCR bits 0..3 of crfS=0 → ctx.cr[2] should have FX(lt)=1 and OX(so)=0 // and the FEX/VX nibble positions are bits 1,2 // Nibble contents: FX=1, FEX=0, VX=0, OX=1 → 0b1001 = 9 assert_eq!(ctx.cr[2].as_u8(), 0b1001); // FX and OX are clearable → FPSCR now has those nibble bits cleared assert_eq!(ctx.fpscr & (1 << 31), 0, "FX cleared"); assert_eq!(ctx.fpscr & (1 << 28), 0, "OX cleared"); } /// Regression: `subfze` is `RT ← !RA + CA` (no -1 term), so 64-bit /// carry-out only happens when `RA == 0 && CA == 1`. The previous /// predicate (`!ra != 0 || ca != 0`) was copy-pasted from `subfme` /// and reported CA=1 in nearly every case. #[test] fn test_subfze_carry_only_when_ra_zero_and_ca_one() { // subfze rD, rA: opcode 31, XO=200 (bits 22-30), OE=0, Rc=0. // Encoding: (31<<26) | (rd<<21) | (ra<<16) | (200<<1) let raw = (31u32 << 26) | (3 << 21) | (4 << 16) | (200 << 1); // Case 1: ra=0, ca=1 → CA=1 (the only carry case) { let mut ctx = PpcContext::new(); let mem = TestMem::new(); write_instr(&mem, 0, raw); ctx.pc = 0; ctx.gpr[4] = 0; ctx.xer_ca = 1; step(&mut ctx, &mem); assert_eq!(ctx.xer_ca, 1, "ra=0, ca=1 should produce CA=1"); assert_eq!(ctx.gpr[3], 0, "result = !0 + 1 = 0 (wraps)"); } // Case 2: ra=0, ca=0 → CA=0 (old buggy code reported CA=1) { let mut ctx = PpcContext::new(); let mem = TestMem::new(); write_instr(&mem, 0, raw); ctx.pc = 0; ctx.gpr[4] = 0; ctx.xer_ca = 0; step(&mut ctx, &mem); assert_eq!(ctx.xer_ca, 0, "ra=0, ca=0 should produce CA=0"); // PPCBUG-018: 32-bit ABI. !0u32 + 0 = u32::MAX, with upper 32 bits zero. assert_eq!(ctx.gpr[3], 0xFFFF_FFFFu64, "result = !0u32 + 0 = u32::MAX"); } // Case 3: ra=1, ca=0 → CA=0 (old buggy code reported CA=1) { let mut ctx = PpcContext::new(); let mem = TestMem::new(); write_instr(&mem, 0, raw); ctx.pc = 0; ctx.gpr[4] = 1; ctx.xer_ca = 0; step(&mut ctx, &mem); assert_eq!(ctx.xer_ca, 0, "ra=1, ca=0 should produce CA=0"); // PPCBUG-018: 32-bit ABI. !1u32 + 0 = u32::MAX - 1, with upper 32 bits zero. assert_eq!(ctx.gpr[3], 0xFFFF_FFFEu64, "result = !1u32 + 0 = u32::MAX - 1"); } // Case 4: ra=u32::MAX, ca=1 → CA=0; result = !u32::MAX + 1 = 1. { let mut ctx = PpcContext::new(); let mem = TestMem::new(); write_instr(&mem, 0, raw); ctx.pc = 0; ctx.gpr[4] = 0xFFFF_FFFFu64; ctx.xer_ca = 1; step(&mut ctx, &mem); assert_eq!(ctx.xer_ca, 0, "ra=u32::MAX, ca=1 should produce CA=0"); assert_eq!(ctx.gpr[3], 1, "result = !u32::MAX + 1 = 1"); } } /// Regression: `cmp` (L=1) must not derive LT/GT from the sign of a /// (potentially overflowing) 64-bit subtract. The old code used /// `update_cr_signed(bf, ra.wrapping_sub(rb))` which mis-signed the /// result for boundary i64 values like `ra=i64::MIN, rb=1`. #[test] fn test_cmp_signed_at_i64_boundaries() { // cmp BF=0, L=1, RA, RB: (31<<26) | (1<<21) | (ra<<16) | (rb<<11) // (XO=0; Rc field is reserved on cmp, leave 0) let raw = |ra: u32, rb: u32| (31u32 << 26) | (1 << 21) | (ra << 16) | (rb << 11); // i64::MIN < 1 → LT must be set { let mut ctx = PpcContext::new(); let mem = TestMem::new(); write_instr(&mem, 0, raw(3, 4)); ctx.pc = 0; ctx.gpr[3] = i64::MIN as u64; ctx.gpr[4] = 1; step(&mut ctx, &mem); assert!(ctx.cr[0].lt, "i64::MIN < 1 must be LT"); assert!(!ctx.cr[0].gt); assert!(!ctx.cr[0].eq); } // i64::MAX > -1 → GT must be set (the symmetric overflow corner) { let mut ctx = PpcContext::new(); let mem = TestMem::new(); write_instr(&mem, 0, raw(3, 4)); ctx.pc = 0; ctx.gpr[3] = i64::MAX as u64; ctx.gpr[4] = (-1i64) as u64; step(&mut ctx, &mem); assert!(!ctx.cr[0].lt); assert!(ctx.cr[0].gt, "i64::MAX > -1 must be GT"); assert!(!ctx.cr[0].eq); } // Equal at the extreme is still EQ { let mut ctx = PpcContext::new(); let mem = TestMem::new(); write_instr(&mem, 0, raw(3, 4)); ctx.pc = 0; ctx.gpr[3] = i64::MIN as u64; ctx.gpr[4] = i64::MIN as u64; step(&mut ctx, &mem); assert!(!ctx.cr[0].lt); assert!(!ctx.cr[0].gt); assert!(ctx.cr[0].eq, "i64::MIN == i64::MIN must be EQ"); } } // ---------- PPCBUG-511/513: invalidate_for_write via VMX stores ---------- /// PPCBUG-511: A plain `stvx` to a reserved line must invalidate the /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). #[test] fn lwarx_then_plain_stvx_invalidates_reservation() { let table = std::sync::Arc::new(crate::ReservationTable::new()); table.enable(); let mut ctx = PpcContext::new(); ctx.reservation_table = Some(table.clone()); ctx.hw_id = 0; let mut mem = TestMem::new(); // r4=0x1000 (reservation + store address), r5=0 (index for lwarx/stwcx.), r7=stwcx val. ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; ctx.gpr[7] = 0xCCCC_CCCC; // VR 0: recognizable pattern to confirm the store lands. ctx.vr[0] = xenia_types::Vec128::from_bytes([0xAA; 16]); // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, lwarx); // Instr 1: stvx v0, r0, r4 (opcode 31, XO 231; rA=0 → base=0, EA = 0 + r4 = 0x1000, aligned) // (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=4<<11) | (231<<1) let stvx = (31u32 << 26) | (0 << 21) | (0 << 16) | (4 << 11) | (231 << 1); write_instr(&mut mem, 4, stvx); // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 8, stwcx); // Execute lwarx — reserves 0x1000's cache line. ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation, "lwarx must set has_reservation"); // Execute stvx — must call invalidate_for_write and clear the reservation. step(&mut ctx, &mut mem); assert_eq!(mem.read_u8(0x1000), 0xAA, "stvx must write the VR bytes"); // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). step(&mut ctx, &mut mem); assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvx"); assert_eq!(mem.read_u8(0x1000), 0xAA, "stwcx. must not overwrite on failure"); } /// PPCBUG-513: A plain `stvlx` to a reserved line must invalidate the /// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0). /// stvlx with EA=0x1003 writes bytes 0x1003-0x100F (13 bytes from VR0's high lanes). #[test] fn lwarx_then_plain_stvlx_invalidates_reservation() { let table = std::sync::Arc::new(crate::ReservationTable::new()); table.enable(); let mut ctx = PpcContext::new(); ctx.reservation_table = Some(table.clone()); ctx.hw_id = 0; let mut mem = TestMem::new(); // Reserve at 0x1000 (same cache line as the stvlx target 0x1003). ctx.gpr[4] = 0x1000; // lwarx/stwcx. reservation address ctx.gpr[5] = 0; // index register (0 for lwarx/stwcx.) ctx.gpr[6] = 0x1003; // stvlx EA: rb=6, ra=0 → ea = 0 + 0x1003 = 0x1003 ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value // VR 0: recognizable pattern. ctx.vr[0] = xenia_types::Vec128::from_bytes([0xBB; 16]); // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20) let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, lwarx); // Instr 1: stvlx v0, r0, r6 (opcode 31, XO 647; rA=0 → base=0, EA = r6 = 0x1003) // store_vector_left writes shift=3 skipped bytes, then bytes 3..15 of VR0 → 0x1003..0x100F // (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=6<<11) | (647<<1) let stvlx = (31u32 << 26) | (0 << 21) | (0 << 16) | (6 << 11) | (647 << 1); write_instr(&mut mem, 4, stvlx); // Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1) let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 8, stwcx); // Execute lwarx — reserves 0x1000's cache line. ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation, "lwarx must set has_reservation"); // Execute stvlx — must call invalidate_for_write and clear the reservation. step(&mut ctx, &mut mem); // store_vector_left(ea=0x1003): shift=3, n=13 → writes bytes 0x1003-0x100F = 0xBB. assert_eq!(mem.read_u8(0x1003), 0xBB, "stvlx must write VR bytes starting at EA"); assert_eq!(mem.read_u8(0x100F), 0xBB, "stvlx must write up to (ea & !0xF)+15"); // Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0). step(&mut ctx, &mut mem); assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvlx"); } /// Regression: `lvebx` must preserve the prior contents of the /// destination VR for lanes other than the loaded byte. Previously // ---------- PPCBUG-151: cross-width reservation pairs must fail ---------- /// PPCBUG-151: `lwarx` (width=4) followed by `stdcx.` (requires width=8) /// must fail with CR0.EQ=0. Memory must remain unchanged. #[test] fn lwarx_then_stdcx_cross_width_fails() { let table = std::sync::Arc::new(crate::ReservationTable::new()); table.enable(); let mut ctx = PpcContext::new(); ctx.reservation_table = Some(table.clone()); ctx.hw_id = 0; let mut mem = TestMem::new(); // r4=0x1000 (target addr), r5=0 (index), r6=value to (attempt to) store. ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; ctx.gpr[6] = 0xDEAD_BEEF_CAFE_BABEu64; // Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20, Rc=0) let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1); write_instr(&mut mem, 0, lwarx); // Instr 1: stdcx. r6, r4, r5 (opcode 31, XO 214, Rc=1) let stdcx = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (214 << 1) | 1; write_instr(&mut mem, 4, stdcx); // Execute lwarx — must set a word reservation (width=4). ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation, "lwarx must set has_reservation"); assert_eq!(ctx.reservation_width, 4, "lwarx must set reservation_width=4"); // Execute stdcx. — width mismatch (needs 8, got 4); must fail. step(&mut ctx, &mut mem); assert!(!ctx.cr[0].eq, "stdcx. must fail when reservation was set by lwarx (cross-width)"); // Memory at 0x1000-0x1007 must be unchanged (still zero). assert_eq!(mem.read_u64(0x1000), 0, "stdcx. must not write on cross-width failure"); // Width must be cleared on exit. assert_eq!(ctx.reservation_width, 0, "stdcx. must clear reservation_width on exit"); } /// PPCBUG-151: `ldarx` (width=8) followed by `stwcx.` (requires width=4) /// must fail with CR0.EQ=0. Memory must remain unchanged. #[test] fn ldarx_then_stwcx_cross_width_fails() { let table = std::sync::Arc::new(crate::ReservationTable::new()); table.enable(); let mut ctx = PpcContext::new(); ctx.reservation_table = Some(table.clone()); ctx.hw_id = 0; let mut mem = TestMem::new(); // r4=0x1000 (target addr), r5=0 (index), r6=value to (attempt to) store. ctx.gpr[4] = 0x1000; ctx.gpr[5] = 0; ctx.gpr[6] = 0xCCCC_CCCCu64; // Instr 0: ldarx r3, r4, r5 (opcode 31, XO 84, Rc=0) let ldarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (84 << 1); write_instr(&mut mem, 0, ldarx); // Instr 1: stwcx. r6, r4, r5 (opcode 31, XO 150, Rc=1) let stwcx = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1; write_instr(&mut mem, 4, stwcx); // Execute ldarx — must set a doubleword reservation (width=8). ctx.pc = 0; step(&mut ctx, &mut mem); assert!(ctx.has_reservation, "ldarx must set has_reservation"); assert_eq!(ctx.reservation_width, 8, "ldarx must set reservation_width=8"); // Execute stwcx. — width mismatch (needs 4, got 8); must fail. step(&mut ctx, &mut mem); assert!(!ctx.cr[0].eq, "stwcx. must fail when reservation was set by ldarx (cross-width)"); // Memory at 0x1000 must be unchanged (still zero). assert_eq!(mem.read_u32(0x1000), 0, "stwcx. must not write on cross-width failure"); // Width must be cleared on exit. assert_eq!(ctx.reservation_width, 0, "stwcx. must clear reservation_width on exit"); } /// the handler started from a zeroed buffer. #[test] fn test_lvebx_preserves_other_lanes() { let mut ctx = PpcContext::new(); let mem = TestMem::new(); // Pre-seed vr[3] with a recognizable pattern. let pattern: [u8; 16] = [ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, ]; ctx.vr[3] = xenia_types::Vec128::from_bytes(pattern); // Place a byte at memory address 0x1004; EA & 0xF == 4 → slot 4. mem.write_u8(0x1004, 0xAB); // r4 = 0x1000, r5 = 4 → EA = 0x1004 ctx.gpr[4] = 0x1000; ctx.gpr[5] = 4; // lvebx vD=3, rA=4, rB=5: opcode 31, XO=7 → (31<<26)|(3<<21)|(4<<16)|(5<<11)|(7<<1) let raw = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (7 << 1); write_instr(&mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mem); // Expected: lane 4 holds 0xAB, every other lane unchanged. let mut expected = pattern; expected[4] = 0xAB; assert_eq!(ctx.vr[3].as_bytes(), expected); } // ===== PPCBUG-046 / PPCBUG-561: rldicl / clrldi mb_md fix ===== /// Encode rldicl (MD-form, opcode=30, XO=0) in host bit notation. /// rs: source register, ra: dest register, sh: shift amount (6-bit), /// mb: mask-begin (6-bit), rc: record bit. fn encode_rldicl(rs: u32, ra: u32, sh: u32, mb: u32, rc: u32) -> u32 { (30 << 26) | (rs << 21) | (ra << 16) | ((sh & 0x1F) << 11) | ((mb & 0x1F) << 6) | (((mb >> 5) & 1) << 5) | (((sh >> 5) & 1) << 1) | (rc & 1) } #[test] fn clrldi_zero_extends_low_32_bits() { // clrldi r3, r4, 32 = rldicl r3, r4, 0, 32, 0 // After PPCBUG-046 fix: mask must be 0x00000000_FFFFFFFF (mb=32 → mask from bit 32 to 63) // If mb=32 was decoded as mb=0, the mask would be all-ones and the result would be 0xDEAD_BEEF_CAFE_BABE (no-op) let mut ctx = PpcContext::new(); let mem = TestMem::new(); ctx.gpr[4] = 0xDEAD_BEEF_CAFE_BABE_u64; let raw = encode_rldicl(4, 3, 0, 32, 0); // sh=0, mb=32 write_instr(&mem, 0x100, raw); ctx.pc = 0x100; step(&mut ctx, &mem); assert_eq!(ctx.gpr[3], 0x0000_0000_CAFE_BABE, "clrldi must zero-extend low 32 bits"); } #[test] fn rldicl_mb32_leaves_low_32_clean() { // Same as above but verify upper 32 are zeroed let mut ctx = PpcContext::new(); let mem = TestMem::new(); ctx.gpr[5] = 0xFFFF_FFFF_1234_5678_u64; let raw = encode_rldicl(5, 6, 0, 32, 0); write_instr(&mem, 0x100, raw); ctx.pc = 0x100; step(&mut ctx, &mem); assert_eq!(ctx.gpr[6], 0x0000_0000_1234_5678_u64); } // ===== PPCBUG-275/276/562: vc_rc_bit fix for VC-form vcmpequb ===== /// VC-form: opcode=4 (VMX), vD at 6-10, vA at 11-15, vB at 16-20, Rc at PPC bit 21 = host bit 10, XO=6. /// vcmpequb.: (4<<26)|(vD<<21)|(vA<<16)|(vB<<11)|(1<<10)|6 fn encode_vcmpequb_dot(vd: u32, va: u32, vb: u32) -> u32 { (4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | (1 << 10) | 6 } /// vcmpequb (no dot form): same but Rc=0 fn encode_vcmpequb(vd: u32, va: u32, vb: u32) -> u32 { (4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | 6 } #[test] fn vcmpequb_dot_all_true_sets_cr6_lt() { // All bytes equal → all lanes 0xFF → CR6.LT=1 (all-true), CR6.EQ=0 let mut ctx = PpcContext::new(); let mem = TestMem::new(); let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]); ctx.vr[1] = v; ctx.vr[2] = v; write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2)); ctx.pc = 0x100; step(&mut ctx, &mem); assert!(ctx.cr[6].lt, "all-true: CR6.LT must be 1"); assert!(!ctx.cr[6].eq, "all-true: CR6.EQ must be 0"); } #[test] fn vcmpequb_no_dot_does_not_update_cr6() { // Without dot form, CR6 must be unchanged let mut ctx = PpcContext::new(); let mem = TestMem::new(); ctx.cr[6] = crate::context::CrField { lt: true, gt: false, eq: true, so: false }; let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]); ctx.vr[1] = v; ctx.vr[2] = v; write_instr(&mem, 0x100, encode_vcmpequb(0, 1, 2)); ctx.pc = 0x100; step(&mut ctx, &mem); // CR6 unchanged: no dot form assert!(ctx.cr[6].lt && ctx.cr[6].eq, "CR6 must be unchanged without dot"); } #[test] fn vcmpequb_dot_all_false_sets_cr6_eq() { // No bytes equal → all lanes 0x00 → CR6.LT=0, CR6.EQ=1 (all-false) let mut ctx = PpcContext::new(); let mem = TestMem::new(); ctx.vr[1] = xenia_types::Vec128::from_bytes([0xAAu8; 16]); ctx.vr[2] = xenia_types::Vec128::from_bytes([0xBBu8; 16]); write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2)); ctx.pc = 0x100; step(&mut ctx, &mem); assert!(!ctx.cr[6].lt, "all-false: CR6.LT must be 0"); assert!(ctx.cr[6].eq, "all-false: CR6.EQ must be 1"); } // ---- PPCBUG-363 + PPCBUG-369: vpkd3d128 post-pack permutation ---- // // vpkd3d128 VD, VB, type, pack, shift: the low 2 bits of the IMM field // select how the packed scalar/vector is merged back into the previous VD. // pack=0 → identity (store out directly); pack=1 → 32-bit merge by shift; // pack=2,3 → 64-bit merge by shift. // Canary source: ppc_emit_altivec.cc:2126-2188. // // For vpkd3d128, PPC bits 21-22 are always 1 (key2 discriminant), so // vd128 is always in range [96, 127] for vd_lo in [0, 31]. fn encode_vpkd3d128(vd: u32, vb_lo: u32, imm: u32, z: u32) -> u32 { // op6=6, FormatVX128_4 layout (canary): // VD low at PPC 6-10 (host 21-25); VD high (2 bits) at PPC 28-29 (host 2-3). // IMM at PPC 11-15; VB low at PPC 16-20. // z (2-bit) at PPC 24-25 (host 6-7). // key2 = 0b1100001 over bits 21-23 + 26-27: // bits 21-23 = 0b110 → bit 21=1, bit 22=1, bit 23=0 // bits 26-27 = 0b01 → bit 26=0, bit 27=1 let vd_lo = vd & 0x1F; let vd_hi = (vd >> 5) & 0x3; (6u32 << 26) | (vd_lo << 21) | (vd_hi << 2) | (imm << 16) | (vb_lo << 11) | (1 << 10) // bit 21 (key2) | (1 << 9) // bit 22 (key2) | (z << 6) // z at PPC 24-25 | (1 << 4) // bit 27 (key2) } #[test] fn vpkd3d128_pack0_legacy_unchanged() { // pack=0 → identity: result = out (packed value), no blend with prev vd. // type=0 (D3dColor), pack=0 → IMM=0; z=0 (don't care for pack=0). // vd=96 (vd_lo=0 | bits21=1,22=1→+96). let mut ctx = PpcContext::new(); let mem = TestMem::new(); // vb=1: R=1.0, G=0, B=0, A=0 → D3dColor packs to word (0<<24)|(255<<16)|(0<<8)|0 = 0x00FF0000 ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // prev vd=96: sentinel values that should NOT appear in result ctx.vr[96] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD); write_instr(&mem, 0, encode_vpkd3d128(96, 1, 0, 0)); ctx.pc = 0; step(&mut ctx, &mem); let r = ctx.vr[96].as_u32x4(); // out = [0, 0, 0, 0x00FF_0000]; pack=0 → result = out assert_eq!(r[0], 0x0000_0000, "pack=0: lane 0 must be out[0]=0"); assert_eq!(r[1], 0x0000_0000, "pack=0: lane 1 must be out[1]=0"); assert_eq!(r[2], 0x0000_0000, "pack=0: lane 2 must be out[2]=0"); assert_eq!(r[3], 0x00FF_0000, "pack=0: lane 3 must be packed D3dColor"); } #[test] fn vpkd3d128_pack1_shift0_d3d_vertex_pack() { // pack=1, shift=0 (VPACK_32): out[3] placed at lane 3; prev[0..2] preserved. // MakePermuteMask(0,0, 0,1, 0,2, 1,3) → [prev[0], prev[1], prev[2], out[3]] // IMM = (type=0 D3dColor << 2) | pack=1 = 1; z=0. let mut ctx = PpcContext::new(); let mem = TestMem::new(); ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000 ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444); write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 0)); ctx.pc = 0; step(&mut ctx, &mem); let r = ctx.vr[96].as_u32x4(); assert_eq!(r[0], 0x1111_1111, "pack=1 shift=0: lane 0 from prev"); assert_eq!(r[1], 0x2222_2222, "pack=1 shift=0: lane 1 from prev"); assert_eq!(r[2], 0x3333_3333, "pack=1 shift=0: lane 2 from prev"); assert_eq!(r[3], 0x00FF_0000, "pack=1 shift=0: lane 3 from out[3]"); } #[test] fn vpkd3d128_pack1_shift3_puts_out3_at_lane0() { // pack=1, shift=3 (VPACK_32): out[3] placed at lane 0; prev[1..3] preserved. // MakePermuteMask(1,3, 0,1, 0,2, 0,3) → [out[3], prev[1], prev[2], prev[3]] // IMM = 1; z=3. let mut ctx = PpcContext::new(); let mem = TestMem::new(); ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000 ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444); write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 3)); ctx.pc = 0; step(&mut ctx, &mem); let r = ctx.vr[96].as_u32x4(); assert_eq!(r[0], 0x00FF_0000, "pack=1 shift=3: lane 0 from out[3]"); assert_eq!(r[1], 0x2222_2222, "pack=1 shift=3: lane 1 from prev"); assert_eq!(r[2], 0x3333_3333, "pack=1 shift=3: lane 2 from prev"); assert_eq!(r[3], 0x4444_4444, "pack=1 shift=3: lane 3 from prev"); } // ---- PPCBUG-510: stvewx128 should write one word (4 bytes), not 16 ---- fn encode_stvewx128(vs_lo: u32, ra: u32, rb: u32) -> u32 { // stvewx128 is a VMX128 load/store at op6=4. // decode_op4 key1 = (bits21-27 << 4) | bits30-31 = 0b00110000011 for stvewx128. // bits21-27 = 0b0011000 (host bits 10-4), bits30-31 = 0b11 (host bits 1-0). // VS128[4:0] at host bits 25-21; RA at host bits 20-16; RB at host bits 15-11. // VS128[5] at host bit 3 (PPC bit 28); VS128[6] at host bit 1 (PPC bit 30). (4u32 << 26) | (vs_lo << 21) // VS128[4:0] | (ra << 16) // RA | (rb << 11) // RB | (0b0011000 << 4) // bits 21-27 of key1 pattern | 0b11 // bits 30-31 of key1 pattern } #[test] fn stvewx128_writes_one_word_at_word_aligned_ea() { // PPCBUG-510: old code wrote all 16 bytes at ea & !0xF, corrupting 12 adjacent bytes. // Fix: word-align EA, extract lane from (ea & 0xF) >> 2, write 4 bytes only. let mut ctx = PpcContext::new(); let mem = TestMem::new(); // VS128 = v96 (vs_lo=0 | key bits → vs128=0 since key bits 21-27 set bit4=1 and bit5=1 // in the key, but vs128 uses bits 6-10 for low 5 bits). // Actually: vs128 uses decode bits 6-10 (host 25-21) and bits 21,22 (host 10,9). // encode_stvewx128 sets vs_lo in bits 25-21 and key bits at bits 10-4. // vs128 = bits6-10 | (bit21<<5) | (bit22<<6) = vs_lo | 0 | 0 = vs_lo. // So vs128 = vs_lo. We'll use vs_lo=3 → vs128=3. let raw = encode_stvewx128(3, 1, 2); ctx.vr[3] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444); ctx.gpr[1] = 0x1000; // base ctx.gpr[2] = 0x008; // offset → EA = 0x1008 → word-aligned EA = 0x1008, slot = (0x8 & 0xF)>>2 = 2 write_instr(&mem, 0, raw); ctx.pc = 0; step(&mut ctx, &mem); assert_eq!(ctx.pc, 4, "PC must advance"); // Slot 2 → lane 2 = 0x3333_3333 assert_eq!(mem.read_u32(0x1008), 0x3333_3333, "only lane 2 word at ea"); // Adjacent words must be untouched (mem is zero-init) assert_eq!(mem.read_u32(0x1000), 0x0000_0000, "byte below must be untouched"); assert_eq!(mem.read_u32(0x100C), 0x0000_0000, "byte above must be untouched"); } }