Phase 6 batch 1 — trap/sc semantics. - PPCBUG-063 trap PC: previously ctx.pc was incremented to CIA+4 BEFORE StepResult::Trap returned, forcing handlers to .wrapping_sub(4) to recover the faulting instruction address. Now ctx.pc stays at CIA on trap, matching SRR0 semantics on real hardware. Critical for any future SEH/exception-delivery path (e.g. the Sylpheed C++ throw work). - PPCBUG-065 typed-trap logging: `twi 31, r0, IMM` is the Xbox 360 CRT/kernel typed-trap convention encoding C++ exception class via SIMM. The trace now logs the SIMM type code when this pattern fires. Routing the type code via a StepResult payload requires an enum extension (multiple consumer sites) that's deferred. - PPCBUG-064 sc LEV logging: `sc 2` is the Xbox 360 hypervisor-call convention; canary dispatches it to a different handler than `sc 0`. Now logs a warning when LEV != 0. Routing LEV=2 to a HypervisorCall variant also requires a StepResult enum extension; deferred. The two enum-extension follow-ups can land as a structural sub-batch once a clear consumer (SEH dispatch, hypervisor-call HLE) is in place. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
7239 lines
314 KiB
Rust
7239 lines
314 KiB
Rust
//! PPC interpreter - executes instructions one at a time.
|
||
//! This is the core execution engine. Every instruction is observable
|
||
//! by the debugger (pre_step/post_step hooks on every cycle).
|
||
|
||
use crate::context::PpcContext;
|
||
use crate::decoder::{decode, DecodedInstr};
|
||
use crate::fpscr;
|
||
use crate::opcode::PpcOpcode;
|
||
use crate::overflow;
|
||
use crate::trap;
|
||
use crate::vmx;
|
||
use xenia_memory::MemoryAccess;
|
||
|
||
/// Xenon reservation granule: one L2 cache line (128 bytes).
|
||
/// `reserved_line = ea & !RESERVATION_MASK` in [context::PpcContext].
|
||
pub const RESERVATION_MASK: u32 = 0x7F;
|
||
|
||
/// Result of executing a single instruction.
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
pub enum StepResult {
|
||
/// Normal execution, advance to next instruction.
|
||
Continue,
|
||
/// Hit a system call (sc instruction). Kernel should handle.
|
||
SystemCall,
|
||
/// Hit an unimplemented opcode.
|
||
Unimplemented(PpcOpcode),
|
||
/// Hit a trap instruction.
|
||
Trap,
|
||
/// Execution halted (by debugger or error).
|
||
Halted,
|
||
}
|
||
|
||
/// Execute a single PPC instruction.
|
||
pub fn step(ctx: &mut PpcContext, mem: &dyn MemoryAccess) -> StepResult {
|
||
let raw = mem.read_u32(ctx.pc);
|
||
let instr = decode(raw, ctx.pc);
|
||
|
||
let result = execute(ctx, mem, &instr);
|
||
|
||
ctx.cycle_count += 1;
|
||
ctx.timebase += 1;
|
||
|
||
result
|
||
}
|
||
|
||
/// Tier-2 perf — same semantics as [`step`], but looks the decoded
|
||
/// instruction up in a PC-keyed cache first. Misses fill the cache from
|
||
/// a fresh [`decode`] call; writes to the containing guest page bump
|
||
/// `page_version` and naturally invalidate the entry.
|
||
///
|
||
/// The cache is shared across all HW threads — PC is thread-independent
|
||
/// and `DecodeCacheEntry` stays put after fill. `current_page_version`
|
||
/// is wired through the caller since memory is touched just above anyway
|
||
/// (the `read_u32` + the version read amortize to one touch of the page
|
||
/// table). Use `GuestMemory::page_version(pc)` to source it.
|
||
pub fn step_cached(
|
||
ctx: &mut PpcContext,
|
||
mem: &dyn MemoryAccess,
|
||
cache: &mut crate::decoder::DecodeCache,
|
||
current_page_version: u64,
|
||
) -> StepResult {
|
||
let raw = mem.read_u32(ctx.pc);
|
||
let instr = cache.lookup(ctx.pc, raw, current_page_version);
|
||
|
||
let result = execute(ctx, mem, &instr);
|
||
|
||
ctx.cycle_count += 1;
|
||
ctx.timebase += 1;
|
||
|
||
result
|
||
}
|
||
|
||
/// Tier-4 perf — execute every instruction in a pre-decoded
|
||
/// [`crate::block_cache::DecodedBlock`], bumping `cycle_count` and
|
||
/// `timebase` once per executed instruction. Bails out as soon as a
|
||
/// non-`Continue` step result fires (system call, trap, halt, or
|
||
/// unimplemented opcode), or when an instruction unexpectedly changes
|
||
/// the PC mid-block (defensive — only the terminator at the tail of
|
||
/// the block is allowed to do that).
|
||
///
|
||
/// Caller (in `xenia-app/src/main.rs`) is responsible for choosing this
|
||
/// path only when **no per-instruction observation is requested** —
|
||
/// i.e., `Debugger::wants_hooks() == false` and no `--trace-*` flag is
|
||
/// active. Once those gates flip, the caller falls back to
|
||
/// [`step_cached`] so every PC remains observable.
|
||
pub fn step_block(
|
||
ctx: &mut PpcContext,
|
||
mem: &dyn MemoryAccess,
|
||
block: &crate::block_cache::DecodedBlock,
|
||
) -> StepResult {
|
||
let mut result = StepResult::Continue;
|
||
for instr in &block.instrs {
|
||
let expected_next = instr.addr.wrapping_add(4);
|
||
result = execute(ctx, mem, instr);
|
||
ctx.cycle_count += 1;
|
||
ctx.timebase += 1;
|
||
if !matches!(result, StepResult::Continue) {
|
||
return result;
|
||
}
|
||
// PC discontinuity within a block. By construction only the
|
||
// terminator (last instruction) can branch — and when it does,
|
||
// we want to stop here, not continue executing past it.
|
||
if ctx.pc != expected_next {
|
||
break;
|
||
}
|
||
}
|
||
result
|
||
}
|
||
|
||
/// Execute a decoded instruction, updating context and memory.
|
||
fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -> StepResult {
|
||
match instr.opcode {
|
||
// ===== ALU: Immediate =====
|
||
PpcOpcode::addi => {
|
||
// PPCBUG-001: 32-bit ABI. `li rT, -1` (= addi rT, r0, -1) must produce
|
||
// 0x00000000_FFFFFFFF, not 0xFFFFFFFF_FFFFFFFF (sign-extended simm16).
|
||
let ra_val = if instr.ra() == 0 { 0 } else { ctx.gpr[instr.ra()] };
|
||
ctx.gpr[instr.rd()] = ra_val.wrapping_add(instr.simm16() as i64 as u64) as u32 as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::addis => {
|
||
// Xbox 360 user mode is 32-bit ABI (MSR.SF=0), so addis must
|
||
// produce a value whose upper 32 bits don't pollute downstream
|
||
// 64-bit arithmetic. The PPC ISA in 64-bit mode sign-extends
|
||
// simm16 before the shift, producing 0xFFFFFFFF_xxxx0000 for
|
||
// negative simm16 (high bit set). When this value flows into
|
||
// a 64-bit subfc against a zero-extended lwz value, the unsigned
|
||
// 64-bit comparison yields wrong CA. Truncate to 32 bits to
|
||
// simulate 32-bit ABI behavior.
|
||
let ra_val = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let result = ra_val.wrapping_add((instr.simm16() as i64 as u64) << 16);
|
||
ctx.gpr[instr.rd()] = result as u32 as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::addic => {
|
||
// PPCBUG-002: 32-bit ABI. CA must be from a 32-bit unsigned compare;
|
||
// canary's `AddDidCarry` truncates both operands to int32 first.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let imm32 = instr.simm16() as i32 as u32;
|
||
let result32 = ra32.wrapping_add(imm32);
|
||
ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::addicx => {
|
||
// PPCBUG-003: same fix as addic plus CR0 i32 view.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let imm32 = instr.simm16() as i32 as u32;
|
||
let result32 = ra32.wrapping_add(imm32);
|
||
ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::subficx => {
|
||
// PPCBUG-005: 32-bit ABI. Sign-extended imm has bits 32-63 set for
|
||
// negative SIMM, poisoning the writeback. Canary uses 32-bit form.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let imm32 = instr.simm16() as i32 as u32;
|
||
let result32 = imm32.wrapping_sub(ra32);
|
||
ctx.xer_ca = if imm32 >= ra32 { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mulli => {
|
||
// PPCBUG-004: 32-bit ABI. Read RA as i32 (low 32, sign-extended for
|
||
// multiply), product fits in 32 bits per ISA (overflow wraps).
|
||
let ra = ctx.gpr[instr.ra()] as i32 as i64;
|
||
let imm = instr.simm16() as i64;
|
||
ctx.gpr[instr.rd()] = (ra.wrapping_mul(imm) as u32) as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== ALU: Register =====
|
||
PpcOpcode::addx => {
|
||
// PPCBUG-012+020: 32-bit ABI writeback truncation + CR0 i32 view.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
let result32 = ra32.wrapping_add(rb32);
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128);
|
||
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::addcx => {
|
||
// PPCBUG-013+020: 32-bit truncation; CA from u32 unsigned compare.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
let result32 = ra32.wrapping_add(rb32);
|
||
ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128);
|
||
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::addex => {
|
||
// PPCBUG-014+020: 32-bit truncation; CA from u32 unsigned compare.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
let ca = ctx.xer_ca as u32;
|
||
let result32 = ra32.wrapping_add(rb32).wrapping_add(ca);
|
||
ctx.xer_ca = if result32 < ra32 || (ca != 0 && result32 == ra32) { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128) + (ca as i128);
|
||
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::addzex => {
|
||
// PPCBUG-015+020: 32-bit truncation.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let ca = ctx.xer_ca as u32;
|
||
let result32 = ra32.wrapping_add(ca);
|
||
ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
let true_sum = (ra32 as i32 as i128) + (ca as i128);
|
||
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::addmex => {
|
||
// PPCBUG-016+020: 32-bit truncation. RT = RA + CA - 1.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let ca = ctx.xer_ca as u32;
|
||
let result32 = ra32.wrapping_add(ca).wrapping_sub(1);
|
||
ctx.xer_ca = if ra32 != 0 || ca != 0 { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
let true_sum = (ra32 as i32 as i128) + (ca as i128) - 1;
|
||
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::subfx => {
|
||
// PPCBUG-017+020: 32-bit truncation.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
let result32 = rb32.wrapping_sub(ra32);
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
|
||
overflow::apply(ctx, true_diff != (result32 as i32) as i128);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::subfcx => {
|
||
// PPCBUG-007: 32-bit ABI. The `rb >= ra` u64 unsigned compare is
|
||
// exactly the shape that broke addis. Defensive 32-bit truncation
|
||
// is required for correct CA even after upstream cleanup.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
let result32 = rb32.wrapping_sub(ra32);
|
||
ctx.xer_ca = if rb32 >= ra32 { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
|
||
overflow::apply(ctx, true_diff != (result32 as i32) as i128);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::subfex => {
|
||
// PPCBUG-008: 32-bit ABI. Compute in u32 space — `!ra` on u64 always
|
||
// pollutes the upper 32 bits, making this an active poisoner.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
let ca = ctx.xer_ca as u32;
|
||
let result32 = (!ra32).wrapping_add(rb32).wrapping_add(ca);
|
||
ctx.xer_ca = if rb32 > ra32 || (rb32 == ra32 && ca != 0) { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
// RT <- !RA + RB + CA == RB - RA - 1 + CA (32-bit semantics).
|
||
let true_sum = (rb32 as i32 as i128) - (ra32 as i32 as i128) - 1 + (ca as i128);
|
||
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::subfzex => {
|
||
// PPCBUG-018: same active-poisoning shape as subfex; operate in u32.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let ca = ctx.xer_ca as u32;
|
||
let result32 = (!ra32).wrapping_add(ca);
|
||
// RT <- !RA + CA (no -1 term). 32-bit carry-out only when
|
||
// !ra32 = u32::MAX (i.e. ra32 = 0) AND ca = 1.
|
||
ctx.xer_ca = if ra32 == 0 && ca != 0 { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
let true_sum = -(ra32 as i32 as i128) - 1 + (ca as i128);
|
||
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::subfmex => {
|
||
// PPCBUG-019: also fixes the always-true CA edge — `!ra` on u64
|
||
// is non-zero when ra32==0xFFFFFFFF and ca==0, so CA was stuck at 1.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let ca = ctx.xer_ca as u32;
|
||
let result32 = (!ra32).wrapping_add(ca).wrapping_sub(1);
|
||
ctx.xer_ca = if (!ra32) != 0 || ca != 0 { 1 } else { 0 };
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
let true_sum = -(ra32 as i32 as i128) - 2 + (ca as i128);
|
||
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::negx => {
|
||
// PPCBUG-006: 32-bit ABI. `(!ra).wrapping_add(1)` on u64 always
|
||
// sets upper 32 bits — every neg poisoned the GPR. neg_ov also
|
||
// checks at 64-bit INT_MIN; should be 32-bit INT_MIN.
|
||
let ra32 = ctx.gpr[instr.ra()] as u32;
|
||
let result32 = (!ra32).wrapping_add(1);
|
||
ctx.gpr[instr.rd()] = result32 as u64;
|
||
if instr.oe() {
|
||
overflow::apply(ctx, ra32 == 0x8000_0000);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, result32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mullwx => {
|
||
// PPCBUG-009: 32-bit ABI. Truncate product to u32 — overflow detection
|
||
// (mullw_ov) still uses the full i64 product to catch the overflow.
|
||
let ra = ctx.gpr[instr.ra()] as i32 as i64;
|
||
let rb = ctx.gpr[instr.rb()] as i32 as i64;
|
||
let product = ra.wrapping_mul(rb);
|
||
ctx.gpr[instr.rd()] = product as u32 as u64;
|
||
if instr.oe() {
|
||
overflow::apply(ctx, overflow::mullw_ov(product));
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mulhwx => {
|
||
// PPCBUG-020: 32-bit ABI CR0 view.
|
||
let ra = ctx.gpr[instr.ra()] as i32 as i64;
|
||
let rb = ctx.gpr[instr.rb()] as i32 as i64;
|
||
let result = ra.wrapping_mul(rb);
|
||
ctx.gpr[instr.rd()] = ((result >> 32) as u32) as u64;
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mulhwux => {
|
||
// PPCBUG-020: 32-bit ABI CR0 view.
|
||
let ra = ctx.gpr[instr.ra()] as u32 as u64;
|
||
let rb = ctx.gpr[instr.rb()] as u32 as u64;
|
||
let result = ra.wrapping_mul(rb);
|
||
ctx.gpr[instr.rd()] = (result >> 32) & 0xFFFF_FFFF;
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::divwx => {
|
||
// PPCBUG-010+011 coupled: 32-bit ABI. Quotient zero-extended to u64
|
||
// (canary explicitly uses ZeroExtend(v, INT64_TYPE)). CR0 view via i32.
|
||
let ra = ctx.gpr[instr.ra()] as i32;
|
||
let rb = ctx.gpr[instr.rb()] as i32;
|
||
let ov = overflow::divw_ov_signed(ra, rb);
|
||
if ov {
|
||
ctx.gpr[instr.rd()] = 0;
|
||
} else {
|
||
ctx.gpr[instr.rd()] = (ra / rb) as u32 as u64;
|
||
}
|
||
if instr.oe() {
|
||
overflow::apply(ctx, ov);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::divwux => {
|
||
// PPCBUG-020: 32-bit ABI CR0 view.
|
||
let ra = ctx.gpr[instr.ra()] as u32;
|
||
let rb = ctx.gpr[instr.rb()] as u32;
|
||
let ov = overflow::divw_ov_unsigned(rb);
|
||
if ov {
|
||
ctx.gpr[instr.rd()] = 0;
|
||
} else {
|
||
ctx.gpr[instr.rd()] = (ra / rb) as u64;
|
||
}
|
||
if instr.oe() {
|
||
overflow::apply(ctx, ov);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== 64-bit Arithmetic =====
|
||
PpcOpcode::mulldx => {
|
||
let ra = ctx.gpr[instr.ra()] as i64;
|
||
let rb = ctx.gpr[instr.rb()] as i64;
|
||
ctx.gpr[instr.rd()] = ra.wrapping_mul(rb) as u64;
|
||
if instr.oe() {
|
||
overflow::apply(ctx, overflow::mulld_ov(ra, rb));
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mulhdx => {
|
||
let ra = ctx.gpr[instr.ra()] as i64 as i128;
|
||
let rb = ctx.gpr[instr.rb()] as i64 as i128;
|
||
ctx.gpr[instr.rd()] = (ra.wrapping_mul(rb) >> 64) as u64;
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mulhdux => {
|
||
let ra = ctx.gpr[instr.ra()] as u128;
|
||
let rb = ctx.gpr[instr.rb()] as u128;
|
||
ctx.gpr[instr.rd()] = (ra.wrapping_mul(rb) >> 64) as u64;
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::divdx => {
|
||
let ra = ctx.gpr[instr.ra()] as i64;
|
||
let rb = ctx.gpr[instr.rb()] as i64;
|
||
let ov = overflow::divd_ov_signed(ra, rb);
|
||
if ov {
|
||
ctx.gpr[instr.rd()] = 0;
|
||
} else {
|
||
ctx.gpr[instr.rd()] = (ra / rb) as u64;
|
||
}
|
||
if instr.oe() {
|
||
overflow::apply(ctx, ov);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::divdux => {
|
||
let ra = ctx.gpr[instr.ra()];
|
||
let rb = ctx.gpr[instr.rb()];
|
||
let ov = overflow::divd_ov_unsigned(rb);
|
||
if ov {
|
||
ctx.gpr[instr.rd()] = 0;
|
||
} else {
|
||
ctx.gpr[instr.rd()] = ra / rb;
|
||
}
|
||
if instr.oe() {
|
||
overflow::apply(ctx, ov);
|
||
}
|
||
if instr.rc_bit() {
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Logical =====
|
||
PpcOpcode::andix => {
|
||
// PPCBUG-020: 32-bit ABI CR0 view.
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & (instr.uimm16() as u64);
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::andisx => {
|
||
// PPCBUG-023: 32-bit ABI CR0 view. `andis. rA, rS, 0x8000` to test
|
||
// sign bit of a 32-bit word now correctly classifies bit 31 = 1 as LT.
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & ((instr.uimm16() as u64) << 16);
|
||
ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::ori => {
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | (instr.uimm16() as u64);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::oris => {
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | ((instr.uimm16() as u64) << 16);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::xori => {
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] ^ (instr.uimm16() as u64);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::xoris => {
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] ^ ((instr.uimm16() as u64) << 16);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::andx => {
|
||
// PPCBUG-032+020: 32-bit ABI CR0 view (latent under clean inputs).
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & ctx.gpr[instr.rb()];
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::andcx => {
|
||
// PPCBUG-033: !rb on u64 flips upper 32 bits — active poisoning.
|
||
let rs32 = ctx.gpr[instr.rs()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
ctx.gpr[instr.ra()] = (rs32 & !rb32) as u64;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::orx => {
|
||
// PPCBUG-032+020: 32-bit ABI CR0 view.
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | ctx.gpr[instr.rb()];
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::orcx => {
|
||
// PPCBUG-028: same shape as andcx — operate in u32.
|
||
let rs32 = ctx.gpr[instr.rs()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
ctx.gpr[instr.ra()] = (rs32 | !rb32) as u64;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::xorx => {
|
||
// PPCBUG-032+020: 32-bit ABI CR0 view.
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] ^ ctx.gpr[instr.rb()];
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::norx => {
|
||
// PPCBUG-029: `not` simplified mnemonic — every `not` poisoned the GPR.
|
||
let rs32 = ctx.gpr[instr.rs()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
ctx.gpr[instr.ra()] = (!(rs32 | rb32)) as u64;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::nandx => {
|
||
// PPCBUG-030: same shape — operate in u32.
|
||
let rs32 = ctx.gpr[instr.rs()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
ctx.gpr[instr.ra()] = (!(rs32 & rb32)) as u64;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::eqvx => {
|
||
// PPCBUG-031: `eqv rA, rA, rA` is a common "set to all-ones" idiom;
|
||
// 64-bit form gave 0xFFFFFFFFFFFFFFFF but 32-bit ABI expects 0x00000000FFFFFFFF.
|
||
let rs32 = ctx.gpr[instr.rs()] as u32;
|
||
let rb32 = ctx.gpr[instr.rb()] as u32;
|
||
ctx.gpr[instr.ra()] = (!(rs32 ^ rb32)) as u64;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Extend/Count =====
|
||
PpcOpcode::extsbx => {
|
||
// PPCBUG-034: 32-bit ABI — sign-extend byte to i32, write zero-extended.
|
||
// PPCBUG-036 (coupled): CR0 must view result as i32, not i64.
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i8 as i32 as u32 as u64;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::extshx => {
|
||
// PPCBUG-035: same shape as extsbx for halfwords.
|
||
// PPCBUG-037 (coupled): CR0 i32 view.
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i16 as i32 as u32 as u64;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::extswx => {
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i32 as i64 as u64;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::cntlzwx => {
|
||
// Result is 0..=32, fits in u32 with bit 31 always zero, so the
|
||
// CR0 view is benign — use the catch-all 32-bit form for consistency.
|
||
ctx.gpr[instr.ra()] = (ctx.gpr[instr.rs()] as u32).leading_zeros() as u64;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::cntlzdx => {
|
||
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()].leading_zeros() as u64;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Shift =====
|
||
PpcOpcode::slwx => {
|
||
// PPCBUG-044: 32-bit ABI CR0 view. A result with bit 31 set
|
||
// (e.g. 0x80000000) is negative in i32 view but positive in i64.
|
||
let sh = ctx.gpr[instr.rb()] as u32;
|
||
ctx.gpr[instr.ra()] = if sh < 32 {
|
||
((ctx.gpr[instr.rs()] as u32) << sh) as u64
|
||
} else { 0 };
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::srwx => {
|
||
// PPCBUG-044: 32-bit ABI CR0 view (zero-extended right shift can never
|
||
// have bit 31 set, but use the canonical form for consistency).
|
||
let sh = ctx.gpr[instr.rb()] as u32;
|
||
ctx.gpr[instr.ra()] = if sh < 32 {
|
||
((ctx.gpr[instr.rs()] as u32) >> sh) as u64
|
||
} else { 0 };
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::srawx => {
|
||
// PPCBUG-041+043 coupled: 32-bit ABI writeback truncation + CR0 i32.
|
||
// CA logic is independently correct (uses u32 shifted-out test).
|
||
let rs = ctx.gpr[instr.rs()] as i32;
|
||
let sh = ctx.gpr[instr.rb()] as u32 & 0x3F;
|
||
if sh == 0 {
|
||
ctx.gpr[instr.ra()] = rs as u32 as u64;
|
||
ctx.xer_ca = 0;
|
||
} else if sh < 32 {
|
||
let result = rs >> sh;
|
||
ctx.xer_ca = if rs < 0 && (rs as u32) << (32 - sh) != 0 { 1 } else { 0 };
|
||
ctx.gpr[instr.ra()] = result as u32 as u64;
|
||
} else {
|
||
ctx.gpr[instr.ra()] = if rs < 0 { 0xFFFF_FFFFu64 } else { 0 };
|
||
ctx.xer_ca = if rs < 0 { 1 } else { 0 };
|
||
}
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::srawix => {
|
||
// PPCBUG-042+043 coupled: same shape as srawx for the sh-immediate form.
|
||
let rs = ctx.gpr[instr.rs()] as i32;
|
||
let sh = instr.sh();
|
||
if sh == 0 {
|
||
ctx.gpr[instr.ra()] = rs as u32 as u64;
|
||
ctx.xer_ca = 0;
|
||
} else {
|
||
let result = rs >> sh;
|
||
ctx.xer_ca = if rs < 0 && (rs as u32) << (32 - sh) != 0 { 1 } else { 0 };
|
||
ctx.gpr[instr.ra()] = result as u32 as u64;
|
||
}
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::sldx => {
|
||
let sh = ctx.gpr[instr.rb()] & 0x7F;
|
||
ctx.gpr[instr.ra()] = if sh < 64 {
|
||
ctx.gpr[instr.rs()] << sh
|
||
} else { 0 };
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::srdx => {
|
||
let sh = ctx.gpr[instr.rb()] & 0x7F;
|
||
ctx.gpr[instr.ra()] = if sh < 64 {
|
||
ctx.gpr[instr.rs()] >> sh
|
||
} else { 0 };
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::sradx => {
|
||
let rs = ctx.gpr[instr.rs()] as i64;
|
||
let sh = ctx.gpr[instr.rb()] & 0x7F;
|
||
if sh == 0 {
|
||
ctx.gpr[instr.ra()] = rs as u64;
|
||
ctx.xer_ca = 0;
|
||
} else if sh < 64 {
|
||
let result = rs >> sh;
|
||
ctx.xer_ca = if rs < 0 && (rs as u64) << (64 - sh) != 0 { 1 } else { 0 };
|
||
ctx.gpr[instr.ra()] = result as u64;
|
||
} else {
|
||
ctx.gpr[instr.ra()] = if rs < 0 { u64::MAX } else { 0 };
|
||
ctx.xer_ca = if rs < 0 { 1 } else { 0 };
|
||
}
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::sradix => {
|
||
let rs = ctx.gpr[instr.rs()] as i64;
|
||
let sh = instr.sh64();
|
||
if sh == 0 {
|
||
ctx.gpr[instr.ra()] = rs as u64;
|
||
ctx.xer_ca = 0;
|
||
} else {
|
||
let result = rs >> sh;
|
||
ctx.xer_ca = if rs < 0 && (rs as u64) << (64 - sh) != 0 { 1 } else { 0 };
|
||
ctx.gpr[instr.ra()] = result as u64;
|
||
}
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Rotate =====
|
||
PpcOpcode::rlwinmx => {
|
||
let rs = ctx.gpr[instr.rs()] as u32;
|
||
let sh = instr.sh();
|
||
let mb = instr.mb();
|
||
let me = instr.me();
|
||
let rotated = rs.rotate_left(sh);
|
||
let mask = rlw_mask(mb, me);
|
||
ctx.gpr[instr.ra()] = (rotated & mask) as u64;
|
||
// PPCBUG-024: 32-bit ABI CR0 view.
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::rlwimix => {
|
||
let rs = ctx.gpr[instr.rs()] as u32;
|
||
let sh = instr.sh();
|
||
let mb = instr.mb();
|
||
let me = instr.me();
|
||
let rotated = rs.rotate_left(sh);
|
||
let mask = rlw_mask(mb, me);
|
||
let ra = ctx.gpr[instr.ra()] as u32;
|
||
ctx.gpr[instr.ra()] = ((rotated & mask) | (ra & !mask)) as u64;
|
||
// PPCBUG-025: 32-bit ABI CR0 view.
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::rlwnmx => {
|
||
let rs = ctx.gpr[instr.rs()] as u32;
|
||
let sh = ctx.gpr[instr.rb()] as u32 & 0x1F;
|
||
let mb = instr.mb();
|
||
let me = instr.me();
|
||
let rotated = rs.rotate_left(sh);
|
||
let mask = rlw_mask(mb, me);
|
||
ctx.gpr[instr.ra()] = (rotated & mask) as u64;
|
||
// PPCBUG-026: 32-bit ABI CR0 view.
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::rldiclx => {
|
||
let rs = ctx.gpr[instr.rs()];
|
||
let sh = instr.sh64();
|
||
let mb = instr.mb_md();
|
||
let rotated = rs.rotate_left(sh);
|
||
let mask = rld_mask_left(mb);
|
||
ctx.gpr[instr.ra()] = rotated & mask;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::rldicrx => {
|
||
let rs = ctx.gpr[instr.rs()];
|
||
let sh = instr.sh64();
|
||
let me = instr.mb_md();
|
||
let rotated = rs.rotate_left(sh);
|
||
let mask = rld_mask_right(me);
|
||
ctx.gpr[instr.ra()] = rotated & mask;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::rldicx => {
|
||
let rs = ctx.gpr[instr.rs()];
|
||
let sh = instr.sh64();
|
||
let mb = instr.mb_md();
|
||
let rotated = rs.rotate_left(sh);
|
||
let mask = rld_mask_left(mb) & rld_mask_right(63 - sh);
|
||
ctx.gpr[instr.ra()] = rotated & mask;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::rldimix => {
|
||
let rs = ctx.gpr[instr.rs()];
|
||
let sh = instr.sh64();
|
||
let mb = instr.mb_md();
|
||
let rotated = rs.rotate_left(sh);
|
||
let mask = rld_mask_left(mb) & rld_mask_right(63 - sh);
|
||
ctx.gpr[instr.ra()] = (rotated & mask) | (ctx.gpr[instr.ra()] & !mask);
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::rldclx => {
|
||
let rs = ctx.gpr[instr.rs()];
|
||
let sh = ctx.gpr[instr.rb()] & 0x3F;
|
||
let mb = instr.mb_md();
|
||
let rotated = rs.rotate_left(sh as u32);
|
||
let mask = rld_mask_left(mb);
|
||
ctx.gpr[instr.ra()] = rotated & mask;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::rldcrx => {
|
||
let rs = ctx.gpr[instr.rs()];
|
||
let sh = ctx.gpr[instr.rb()] & 0x3F;
|
||
let me = instr.mb_md();
|
||
let rotated = rs.rotate_left(sh as u32);
|
||
let mask = rld_mask_right(me);
|
||
ctx.gpr[instr.ra()] = rotated & mask;
|
||
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Compare =====
|
||
PpcOpcode::cmpi => {
|
||
let bf = instr.crfd();
|
||
if instr.l() {
|
||
// 64-bit compare. Compare directly so boundary i64 values
|
||
// (e.g. ra=i64::MIN, imm=1) don't mis-sign through a
|
||
// wrapped subtract.
|
||
let ra = ctx.gpr[instr.ra()] as i64;
|
||
let imm = instr.simm16() as i64;
|
||
ctx.cr[bf] = crate::context::CrField {
|
||
lt: ra < imm,
|
||
gt: ra > imm,
|
||
eq: ra == imm,
|
||
so: ctx.xer_so != 0,
|
||
};
|
||
} else {
|
||
let ra = ctx.gpr[instr.ra()] as i32;
|
||
let imm = instr.simm16() as i32;
|
||
ctx.cr[bf] = crate::context::CrField {
|
||
lt: ra < imm,
|
||
gt: ra > imm,
|
||
eq: ra == imm,
|
||
so: ctx.xer_so != 0,
|
||
};
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::cmpli => {
|
||
let bf = instr.crfd();
|
||
if instr.l() {
|
||
let ra = ctx.gpr[instr.ra()];
|
||
let imm = instr.uimm16() as u64;
|
||
ctx.update_cr_unsigned(bf, ra, imm);
|
||
} else {
|
||
let ra = ctx.gpr[instr.ra()] as u32 as u64;
|
||
let imm = instr.uimm16() as u64;
|
||
ctx.update_cr_unsigned(bf, ra, imm);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::cmp => {
|
||
let bf = instr.crfd();
|
||
if instr.l() {
|
||
let ra = ctx.gpr[instr.ra()] as i64;
|
||
let rb = ctx.gpr[instr.rb()] as i64;
|
||
ctx.cr[bf] = crate::context::CrField {
|
||
lt: ra < rb,
|
||
gt: ra > rb,
|
||
eq: ra == rb,
|
||
so: ctx.xer_so != 0,
|
||
};
|
||
} else {
|
||
let ra = ctx.gpr[instr.ra()] as i32;
|
||
let rb = ctx.gpr[instr.rb()] as i32;
|
||
ctx.cr[bf] = crate::context::CrField {
|
||
lt: ra < rb,
|
||
gt: ra > rb,
|
||
eq: ra == rb,
|
||
so: ctx.xer_so != 0,
|
||
};
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::cmpl => {
|
||
let bf = instr.crfd();
|
||
if instr.l() {
|
||
ctx.update_cr_unsigned(bf, ctx.gpr[instr.ra()], ctx.gpr[instr.rb()]);
|
||
} else {
|
||
ctx.update_cr_unsigned(bf, ctx.gpr[instr.ra()] as u32 as u64, ctx.gpr[instr.rb()] as u32 as u64);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Branch =====
|
||
PpcOpcode::bx => {
|
||
let target = if instr.aa() {
|
||
instr.li() as u32
|
||
} else {
|
||
ctx.pc.wrapping_add(instr.li() as u32)
|
||
};
|
||
if instr.lk() {
|
||
ctx.lr = (ctx.pc + 4) as u64;
|
||
}
|
||
ctx.pc = target;
|
||
}
|
||
PpcOpcode::bcx => {
|
||
let bo = instr.bo();
|
||
let bi = instr.bi();
|
||
|
||
// Decrement CTR if needed
|
||
if bo & 0b00100 == 0 {
|
||
ctx.ctr = ctx.ctr.wrapping_sub(1);
|
||
}
|
||
|
||
let ctr_ok = (bo & 0b00100) != 0
|
||
|| (((ctx.ctr as u32) != 0) ^ ((bo & 0b00010) != 0));
|
||
let cond_ok = (bo & 0b10000) != 0
|
||
|| (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0));
|
||
|
||
if ctr_ok && cond_ok {
|
||
let target = if instr.aa() {
|
||
instr.bd() as u32
|
||
} else {
|
||
ctx.pc.wrapping_add(instr.bd() as u32)
|
||
};
|
||
if instr.lk() {
|
||
ctx.lr = (ctx.pc + 4) as u64;
|
||
}
|
||
ctx.pc = target;
|
||
} else {
|
||
if instr.lk() {
|
||
ctx.lr = (ctx.pc + 4) as u64;
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
}
|
||
PpcOpcode::bclrx => {
|
||
let bo = instr.bo();
|
||
let bi = instr.bi();
|
||
|
||
if bo & 0b00100 == 0 {
|
||
ctx.ctr = ctx.ctr.wrapping_sub(1);
|
||
}
|
||
|
||
let ctr_ok = (bo & 0b00100) != 0
|
||
|| (((ctx.ctr as u32) != 0) ^ ((bo & 0b00010) != 0));
|
||
let cond_ok = (bo & 0b10000) != 0
|
||
|| (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0));
|
||
|
||
let next_pc = ctx.pc + 4;
|
||
if ctr_ok && cond_ok {
|
||
ctx.pc = (ctx.lr as u32) & !3;
|
||
} else {
|
||
ctx.pc = next_pc;
|
||
}
|
||
if instr.lk() {
|
||
ctx.lr = next_pc as u64;
|
||
}
|
||
}
|
||
PpcOpcode::bcctrx => {
|
||
let bo = instr.bo();
|
||
let bi = instr.bi();
|
||
|
||
let cond_ok = (bo & 0b10000) != 0
|
||
|| (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0));
|
||
|
||
if cond_ok {
|
||
let next_pc = ctx.pc + 4;
|
||
ctx.pc = (ctx.ctr as u32) & !3;
|
||
if instr.lk() {
|
||
ctx.lr = next_pc as u64;
|
||
}
|
||
} else {
|
||
if instr.lk() {
|
||
ctx.lr = (ctx.pc + 4) as u64;
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
}
|
||
|
||
// ===== System call =====
|
||
PpcOpcode::sc => {
|
||
// PPCBUG-064: log non-zero LEV (`sc 2` is the Xbox 360 hypervisor-call
|
||
// convention; canary dispatches it to a different handler than `sc 0`).
|
||
// Routing LEV=2 requires a StepResult variant extension; deferred.
|
||
let lev = (instr.raw >> 5) & 0x7F;
|
||
if lev != 0 {
|
||
tracing::warn!(
|
||
"sc with LEV={} at {:#010x}: dispatched as plain SystemCall (HVcall routing not implemented)",
|
||
lev, ctx.pc
|
||
);
|
||
}
|
||
ctx.pc += 4;
|
||
return StepResult::SystemCall;
|
||
}
|
||
|
||
// ===== Load instructions =====
|
||
PpcOpcode::lwz => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lwzu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lwzx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lwzux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lbz => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lbzu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lbzx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lbzux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lhz => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lhzu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lhzx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lha => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lhax => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lhzux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lhau => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lhaux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::ld => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u64(ea);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::ldx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u64(ea);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lwa => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lwax => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lwaux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::ldu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.ds() as i64 as u64) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u64(ea);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::ldux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.gpr[instr.rd()] = mem.read_u64(ea);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// FP loads
|
||
PpcOpcode::lfs => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lfsx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lfd => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.fpr[instr.rd()] = mem.read_f64(ea);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lfdx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.fpr[instr.rd()] = mem.read_f64(ea);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lfsu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lfsux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64;
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lfdu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
ctx.fpr[instr.rd()] = mem.read_f64(ea);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lfdux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
ctx.fpr[instr.rd()] = mem.read_f64(ea);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// Reservation (lwarx/stwcx)
|
||
//
|
||
// M3.7 — when `ctx.reservation_table` is `Some` and the table is
|
||
// enabled, route reservations through the inter-thread table so
|
||
// concurrent host threads can mediate reservation conflicts.
|
||
// Otherwise (the default in lockstep mode), use the legacy
|
||
// per-`PpcContext` fields. Both paths leave the per-ctx fields
|
||
// in a coherent state so a flag flip mid-run doesn't corrupt
|
||
// outstanding reservations.
|
||
//
|
||
// PPCBUG-108: lwarx + stwcx. atomicity is provided by `ReservationTable`
|
||
// in the M3 multi-HW-thread runtime. The legacy per-ctx fallback (when
|
||
// `reservation_table` is None or the table is disabled) cannot observe
|
||
// stores from other host threads — a store by thread B cannot clear
|
||
// `ctx_A.has_reservation`. This path is only correct in strict lockstep
|
||
// (single-host-thread) mode. The M3 scheduler MUST enable the table
|
||
// before spawning a second host thread. See stwcx./stdcx. for the
|
||
// debug_assert that fires if a non-primary slot takes this path.
|
||
PpcOpcode::lwarx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
let val = mem.read_u32(ea);
|
||
ctx.gpr[instr.rd()] = val as u64;
|
||
ctx.reserved_line = ea & !RESERVATION_MASK;
|
||
ctx.reserved_val = val as u64;
|
||
ctx.has_reservation = true;
|
||
ctx.reservation_width = 4; // PPCBUG-151: word reservation
|
||
if let Some(t) = &ctx.reservation_table {
|
||
if t.is_enabled() {
|
||
ctx.reserved_generation = t.reserve(ea, ctx.hw_id);
|
||
}
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
// PPCBUG-108: see lwarx comment above. stwcx. legacy path cannot observe
|
||
// cross-thread reservation invalidations; only safe in lockstep mode.
|
||
PpcOpcode::stwcx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
let line = ea & !RESERVATION_MASK;
|
||
let table_route = ctx
|
||
.reservation_table
|
||
.as_ref()
|
||
.filter(|t| t.is_enabled())
|
||
.cloned();
|
||
// PPCBUG-151: stwcx. requires a word (lwarx) reservation;
|
||
// a doubleword (ldarx) reservation must not commit here.
|
||
let width_ok = ctx.reservation_width == 4;
|
||
let success = if let Some(t) = &table_route {
|
||
// Table-routed: success iff the slot still holds our
|
||
// reservation AND the per-ctx flag agrees (the per-ctx
|
||
// flag would be cleared by an intervening write or
|
||
// context switch).
|
||
ctx.has_reservation
|
||
&& width_ok
|
||
&& ctx.reserved_line == line
|
||
&& t.try_commit(ea, ctx.reserved_generation, ctx.hw_id)
|
||
} else {
|
||
// Legacy per-ctx path (M2 default / lockstep).
|
||
// PPCBUG-108: fires on non-primary HW slots under misconfig —
|
||
// if the table is disabled while workers are active, slots
|
||
// 1..N will trip this assert, surfacing the misconfiguration
|
||
// early in debug builds. Note: hw_id==0 (primary slot) taking
|
||
// this path while other slots run in parallel would NOT be
|
||
// caught; that case requires the table to be enabled instead.
|
||
debug_assert!(
|
||
ctx.hw_id == 0,
|
||
"PPCBUG-108: legacy per-ctx stwcx. on non-primary HW slot \
|
||
(hw_id={}) — ReservationTable must be enabled under --parallel",
|
||
ctx.hw_id
|
||
);
|
||
ctx.has_reservation && width_ok && ctx.reserved_line == line
|
||
};
|
||
if success {
|
||
mem.write_u32(ea, ctx.gpr[instr.rs()] as u32);
|
||
ctx.cr[0] = crate::context::CrField {
|
||
lt: false,
|
||
gt: false,
|
||
eq: true,
|
||
so: ctx.xer_so != 0,
|
||
};
|
||
} else {
|
||
ctx.cr[0] = crate::context::CrField {
|
||
lt: false,
|
||
gt: false,
|
||
eq: false,
|
||
so: ctx.xer_so != 0,
|
||
};
|
||
// Failed stwcx: if we held the reservation in the table
|
||
// (someone else displaced our gen), release it from the
|
||
// counter so `has_active_reservers` returns to zero
|
||
// when no real reserver exists.
|
||
if let Some(t) = &table_route {
|
||
t.release(ea, ctx.reserved_generation, ctx.hw_id);
|
||
}
|
||
}
|
||
ctx.has_reservation = false;
|
||
ctx.reservation_width = 0; // PPCBUG-151: always clear on exit
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Store instructions =====
|
||
PpcOpcode::stw => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u32(ea, ctx.gpr[instr.rs()] as u32);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stwu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u32(ea, ctx.gpr[instr.rs()] as u32);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stwx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u32(ea, ctx.gpr[instr.rs()] as u32);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stwux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u32(ea, ctx.gpr[instr.rs()] as u32);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stb => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u8(ea, ctx.gpr[instr.rs()] as u8);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stbu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u8(ea, ctx.gpr[instr.rs()] as u8);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stbx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u8(ea, ctx.gpr[instr.rs()] as u8);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stbux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u8(ea, ctx.gpr[instr.rs()] as u8);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::sth => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u16(ea, ctx.gpr[instr.rs()] as u16);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::sthu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u16(ea, ctx.gpr[instr.rs()] as u16);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::sthx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u16(ea, ctx.gpr[instr.rs()] as u16);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::sthux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u16(ea, ctx.gpr[instr.rs()] as u16);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::std => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u64(ea, ctx.gpr[instr.rs()]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stdx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u64(ea, ctx.gpr[instr.rs()]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stdu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.ds() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u64(ea, ctx.gpr[instr.rs()]);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stdux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u64(ea, ctx.gpr[instr.rs()]);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// FP stores
|
||
PpcOpcode::stfs => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stfsu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stfsx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stfsux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stfd => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stfdu => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stfdx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stfdux => {
|
||
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_f64(ea, ctx.fpr[instr.rs()]);
|
||
ctx.gpr[instr.ra()] = ea as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stfiwx => {
|
||
// Store FP as integer word: stores low 32 bits of FPR as-is
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u32(ea, ctx.fpr[instr.rs()].to_bits() as u32);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// String load/store
|
||
PpcOpcode::lswi => {
|
||
let mut ea = if instr.ra() == 0 { 0u32 } else { ctx.gpr[instr.ra()] as u32 };
|
||
let nb = if instr.rb() == 0 { 32 } else { instr.rb() as u32 };
|
||
let mut rd = instr.rd();
|
||
let mut bytes_left = nb;
|
||
while bytes_left > 0 {
|
||
let mut val = 0u32;
|
||
for byte_idx in 0..4 {
|
||
if bytes_left == 0 { break; }
|
||
let b = mem.read_u8(ea) as u32;
|
||
val |= b << (24 - byte_idx * 8);
|
||
ea = ea.wrapping_add(1);
|
||
bytes_left -= 1;
|
||
}
|
||
ctx.gpr[rd] = val as u64;
|
||
rd = (rd + 1) % 32;
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stswi => {
|
||
let mut ea = if instr.ra() == 0 { 0u32 } else { ctx.gpr[instr.ra()] as u32 };
|
||
let nb = if instr.rb() == 0 { 32 } else { instr.rb() as u32 };
|
||
let mut rs = instr.rs();
|
||
let mut bytes_left = nb;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() {
|
||
let first_line = ea & !RESERVATION_MASK;
|
||
let last_line = ea.wrapping_add(nb - 1) & !RESERVATION_MASK;
|
||
t.invalidate_for_write(first_line);
|
||
if last_line != first_line { t.invalidate_for_write(last_line); }
|
||
}
|
||
}
|
||
while bytes_left > 0 {
|
||
let val = ctx.gpr[rs] as u32;
|
||
for byte_idx in 0..4 {
|
||
if bytes_left == 0 { break; }
|
||
mem.write_u8(ea, (val >> (24 - byte_idx * 8)) as u8);
|
||
ea = ea.wrapping_add(1);
|
||
bytes_left -= 1;
|
||
}
|
||
rs = (rs + 1) % 32;
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Special register moves =====
|
||
PpcOpcode::mfspr => {
|
||
let spr = instr.spr();
|
||
ctx.gpr[instr.rd()] = match spr {
|
||
crate::context::spr::XER => ctx.xer() as u64,
|
||
crate::context::spr::LR => ctx.lr,
|
||
crate::context::spr::CTR => ctx.ctr,
|
||
crate::context::spr::DEC => ctx.dec as u64,
|
||
crate::context::spr::TBL => ctx.timebase & 0xFFFF_FFFF,
|
||
crate::context::spr::TBU => ctx.timebase >> 32,
|
||
crate::context::spr::VRSAVE => ctx.vrsave as u64,
|
||
// Xbox 360 Xenon processor signature (from canary).
|
||
crate::context::spr::PVR => 0x0071_0800,
|
||
// Benign SPRs — titles read these but we don't model them.
|
||
crate::context::spr::SPRG0
|
||
| crate::context::spr::SPRG1
|
||
| crate::context::spr::SPRG2
|
||
| crate::context::spr::SPRG3
|
||
| crate::context::spr::HID0
|
||
| crate::context::spr::HID1
|
||
| crate::context::spr::DAR
|
||
| crate::context::spr::DSISR
|
||
| crate::context::spr::PIR => 0,
|
||
_ => {
|
||
tracing::warn!("mfspr: unimplemented SPR {}", spr);
|
||
0
|
||
}
|
||
};
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mtspr => {
|
||
let spr = instr.spr();
|
||
let val = ctx.gpr[instr.rs()];
|
||
match spr {
|
||
crate::context::spr::XER => ctx.set_xer(val as u32),
|
||
crate::context::spr::LR => ctx.lr = val,
|
||
crate::context::spr::CTR => ctx.ctr = val as u32 as u64,
|
||
crate::context::spr::DEC => ctx.dec = val as u32,
|
||
crate::context::spr::TBL_WRITE => {
|
||
ctx.timebase = (ctx.timebase & 0xFFFF_FFFF_0000_0000) | (val & 0xFFFF_FFFF);
|
||
}
|
||
crate::context::spr::TBU_WRITE => {
|
||
ctx.timebase = (ctx.timebase & 0x0000_0000_FFFF_FFFF) | ((val & 0xFFFF_FFFF) << 32);
|
||
}
|
||
crate::context::spr::VRSAVE => ctx.vrsave = val as u32,
|
||
// Benign writes — swallow silently to avoid false Unimplemented
|
||
// warnings on SPRs that have no observable effect in userspace.
|
||
crate::context::spr::SPRG0
|
||
| crate::context::spr::SPRG1
|
||
| crate::context::spr::SPRG2
|
||
| crate::context::spr::SPRG3
|
||
| crate::context::spr::HID0
|
||
| crate::context::spr::HID1
|
||
| crate::context::spr::DAR
|
||
| crate::context::spr::DSISR => {}
|
||
_ => {
|
||
tracing::warn!("mtspr: unimplemented SPR {}", spr);
|
||
}
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mfcr => {
|
||
ctx.gpr[instr.rd()] = ctx.cr() as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mtcrf => {
|
||
let crm = instr.crm();
|
||
let val = ctx.gpr[instr.rs()] as u32;
|
||
let old = ctx.cr();
|
||
let mut new = old;
|
||
for i in 0..8u32 {
|
||
if crm & (1 << (7 - i)) != 0 {
|
||
let mask = 0xF << (28 - i * 4);
|
||
new = (new & !mask) | (val & mask);
|
||
}
|
||
}
|
||
ctx.set_cr(new);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mfmsr => {
|
||
ctx.gpr[instr.rd()] = ctx.msr;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mtmsr | PpcOpcode::mtmsrd => {
|
||
ctx.msr = ctx.gpr[instr.rs()];
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mftb => {
|
||
let tbr = instr.spr();
|
||
ctx.gpr[instr.rd()] = match tbr {
|
||
268 => ctx.timebase & 0xFFFF_FFFF,
|
||
269 => ctx.timebase >> 32,
|
||
_ => 0,
|
||
};
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// CR logical
|
||
PpcOpcode::crand => { cr_logical(ctx, instr, |a, b| a & b); ctx.pc += 4; }
|
||
PpcOpcode::crandc => { cr_logical(ctx, instr, |a, b| a & !b); ctx.pc += 4; }
|
||
PpcOpcode::creqv => { cr_logical(ctx, instr, |a, b| !(a ^ b)); ctx.pc += 4; }
|
||
PpcOpcode::crnand => { cr_logical(ctx, instr, |a, b| !(a & b)); ctx.pc += 4; }
|
||
PpcOpcode::crnor => { cr_logical(ctx, instr, |a, b| !(a | b)); ctx.pc += 4; }
|
||
PpcOpcode::cror => { cr_logical(ctx, instr, |a, b| a | b); ctx.pc += 4; }
|
||
PpcOpcode::crorc => { cr_logical(ctx, instr, |a, b| a | !b); ctx.pc += 4; }
|
||
PpcOpcode::crxor => { cr_logical(ctx, instr, |a, b| a ^ b); ctx.pc += 4; }
|
||
PpcOpcode::mcrf => {
|
||
ctx.cr[instr.crfd()] = ctx.cr[instr.crfs()];
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Cache/sync (no-ops in interpreter) =====
|
||
PpcOpcode::dcbf | PpcOpcode::dcbi | PpcOpcode::dcbst |
|
||
PpcOpcode::dcbt | PpcOpcode::dcbtst | PpcOpcode::icbi |
|
||
PpcOpcode::sync | PpcOpcode::eieio | PpcOpcode::isync => {
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::dcbz => {
|
||
// Zero 32 bytes at effective address
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) as u32) & !31;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
for i in 0..8 {
|
||
mem.write_u32(ea + i * 4, 0);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::dcbz128 => {
|
||
// Zero 128 bytes
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) as u32) & !127;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
for i in 0..32 {
|
||
mem.write_u32(ea + i * 4, 0);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Load multiple =====
|
||
PpcOpcode::lmw => {
|
||
let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
ea = ea.wrapping_add(instr.d() as i64 as u64);
|
||
for r in instr.rd()..32 {
|
||
ctx.gpr[r] = mem.read_u32(ea as u32) as u64;
|
||
ea = ea.wrapping_add(4);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stmw => {
|
||
let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
ea = ea.wrapping_add(instr.d() as i64 as u64);
|
||
// PPCBUG-160: stmw can span two cache lines when (32-rs)*4 > one line.
|
||
// Iterate over every touched line so any reservation on a later line
|
||
// is also invalidated (same guarantee as single-word stores).
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() {
|
||
let start_ea = ea as u32;
|
||
let last_ea = start_ea.wrapping_add((32 - instr.rs() as u32) * 4).wrapping_sub(1);
|
||
let line_size = RESERVATION_MASK + 1;
|
||
let mut line = start_ea & !RESERVATION_MASK;
|
||
loop {
|
||
t.invalidate_for_write(line);
|
||
if line >= (last_ea & !RESERVATION_MASK) { break; }
|
||
line = line.wrapping_add(line_size);
|
||
}
|
||
}
|
||
}
|
||
for r in instr.rs()..32 {
|
||
mem.write_u32(ea as u32, ctx.gpr[r] as u32);
|
||
ea = ea.wrapping_add(4);
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Trap =====
|
||
PpcOpcode::tw | PpcOpcode::twi | PpcOpcode::td | PpcOpcode::tdi => {
|
||
// PPCBUG-063: save CIA before incrementing so a trap handler reads
|
||
// the faulting instruction address, not CIA+4.
|
||
// PPCBUG-065: log the SIMM type code on `twi 31, r0, IMM` (Xbox 360
|
||
// typed-trap convention used by the CRT/kernel for C++ exception
|
||
// class dispatch). The audit notes this is relevant to the Sylpheed
|
||
// throw investigation; routing the type code via a payload requires
|
||
// a StepResult enum extension that's deferred for now.
|
||
let trap_pc = ctx.pc;
|
||
let a = ctx.gpr[instr.ra()];
|
||
let b = match instr.opcode {
|
||
PpcOpcode::twi | PpcOpcode::tdi => instr.simm16() as i64 as u64,
|
||
_ => ctx.gpr[instr.rb()],
|
||
};
|
||
let width = match instr.opcode {
|
||
PpcOpcode::tw | PpcOpcode::twi => trap::TrapWidth::Word,
|
||
_ => trap::TrapWidth::Doubleword,
|
||
};
|
||
let fired = trap::evaluate(instr.to(), a, b, width);
|
||
if fired {
|
||
let typed_trap_simm = if matches!(instr.opcode, PpcOpcode::twi)
|
||
&& instr.to() == 31 && instr.ra() == 0 {
|
||
Some(instr.simm16() as u16)
|
||
} else { None };
|
||
tracing::warn!(
|
||
"Trap fired at {:#010x}: {:?} TO={} a={:#x} b={:#x}{}",
|
||
trap_pc, instr.opcode, instr.to(), a, b,
|
||
typed_trap_simm.map_or(String::new(), |t| format!(" typed_trap_simm={:#06x}", t))
|
||
);
|
||
// Leave ctx.pc at CIA (NOT NIA) so trap handlers / SEH delivery
|
||
// can read the faulting instruction address from ctx.pc.
|
||
return StepResult::Trap;
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== Byte-reverse loads =====
|
||
PpcOpcode::lwbrx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
let val = mem.read_u32(ea);
|
||
ctx.gpr[instr.rd()] = val.swap_bytes() as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lhbrx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
let val = mem.read_u16(ea);
|
||
ctx.gpr[instr.rd()] = val.swap_bytes() as u64;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stwbrx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u32(ea, (ctx.gpr[instr.rs()] as u32).swap_bytes());
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::sthbrx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u16(ea, (ctx.gpr[instr.rs()] as u16).swap_bytes());
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== VMX/VMX128: Vector Load/Store =====
|
||
PpcOpcode::lvx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; // aligned
|
||
let mut bytes = [0u8; 16];
|
||
for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lvx128 => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
|
||
let mut bytes = [0u8; 16];
|
||
for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); }
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_bytes(bytes);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvx => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
|
||
// PPCBUG-511: stvx was missing invalidate_for_write.
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
let bytes = ctx.vr[instr.rs()].as_bytes();
|
||
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvx128 => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
|
||
// PPCBUG-511: stvx128 was missing invalidate_for_write.
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
let bytes = ctx.vr[instr.vs128()].as_bytes();
|
||
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
|
||
ctx.pc += 4;
|
||
}
|
||
// lvewx, lvebx, lvehx all load aligned 16 bytes (per xenia reference)
|
||
PpcOpcode::lvebx => {
|
||
// Load 1 byte from EA into vD[EA & 0xF]. PowerISA marks the
|
||
// other lanes as "undefined" but real Xenon (and Canary)
|
||
// preserve their prior contents, so seed from vD.
|
||
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
let slot = (ea & 0xF) as usize;
|
||
let mut bytes = ctx.vr[instr.rd()].as_bytes();
|
||
bytes[slot] = mem.read_u8(ea);
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lvehx => {
|
||
// Load a halfword from (EA & ~1) into vD at halfword slot
|
||
// (EA & 0xF) >> 1. Other halfword lanes preserved (see lvebx).
|
||
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
let ea = ea_unaligned & !0x1u32;
|
||
let slot = ((ea_unaligned & 0xF) >> 1) as usize;
|
||
let mut bytes = ctx.vr[instr.rd()].as_bytes();
|
||
let h = mem.read_u16(ea);
|
||
bytes[slot * 2] = (h >> 8) as u8;
|
||
bytes[slot * 2 + 1] = (h & 0xFF) as u8;
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lvewx => {
|
||
// Load a word from (EA & ~3) into vD at word slot
|
||
// (EA & 0xF) >> 2. Other word lanes preserved (see lvebx).
|
||
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
let ea = ea_unaligned & !0x3u32;
|
||
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
|
||
let mut bytes = ctx.vr[instr.rd()].as_bytes();
|
||
let w = mem.read_u32(ea);
|
||
bytes[slot * 4] = (w >> 24) as u8;
|
||
bytes[slot * 4 + 1] = (w >> 16) as u8;
|
||
bytes[slot * 4 + 2] = (w >> 8) as u8;
|
||
bytes[slot * 4 + 3] = (w & 0xFF) as u8;
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvebx => {
|
||
// Store vS[EA & 0xF] (1 byte) to memory at EA.
|
||
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
// PPCBUG-512: stvebx was missing invalidate_for_write.
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
let slot = (ea & 0xF) as usize;
|
||
let bytes = ctx.vr[instr.rs()].as_bytes();
|
||
mem.write_u8(ea, bytes[slot]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvehx => {
|
||
// Store vS[slot] (1 halfword) at EA & ~1. slot = (EA & 0xF) >> 1.
|
||
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
let ea = ea_unaligned & !0x1u32;
|
||
// PPCBUG-512: stvehx was missing invalidate_for_write.
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
let slot = ((ea_unaligned & 0xF) >> 1) as usize;
|
||
let bytes = ctx.vr[instr.rs()].as_bytes();
|
||
let h = ((bytes[slot * 2] as u16) << 8) | (bytes[slot * 2 + 1] as u16);
|
||
mem.write_u16(ea, h);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvewx => {
|
||
// Store vS[slot] (1 word) at EA & ~3. slot = (EA & 0xF) >> 2.
|
||
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
|
||
let ea = ea_unaligned & !0x3u32;
|
||
// PPCBUG-512: stvewx was missing invalidate_for_write.
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
|
||
let bytes = ctx.vr[instr.rs()].as_bytes();
|
||
let w = ((bytes[slot * 4] as u32) << 24)
|
||
| ((bytes[slot * 4 + 1] as u32) << 16)
|
||
| ((bytes[slot * 4 + 2] as u32) << 8)
|
||
| (bytes[slot * 4 + 3] as u32);
|
||
mem.write_u32(ea, w);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lvxl | PpcOpcode::lvxl128 => {
|
||
// Same as lvx but with cache hint (ignored)
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
|
||
let mut bytes = [0u8; 16];
|
||
for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); }
|
||
let vd = if matches!(instr.opcode, PpcOpcode::lvxl128) { instr.vd128() } else { instr.rd() };
|
||
ctx.vr[vd] = xenia_types::Vec128::from_bytes(bytes);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvxl | PpcOpcode::stvxl128 => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
|
||
// PPCBUG-511: stvxl/stvxl128 were missing invalidate_for_write.
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
let vs = if matches!(instr.opcode, PpcOpcode::stvxl128) { instr.vs128() } else { instr.rs() };
|
||
let bytes = ctx.vr[vs].as_bytes();
|
||
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== VMX: Float Arithmetic =====
|
||
PpcOpcode::vaddfp => {
|
||
// PPCBUG-435: VSCR.NJ=1 (Xbox 360 always boots with this set) requires
|
||
// flush-to-zero on subnormal inputs and outputs. Canary VMX float
|
||
// arithmetic flushes denormals unconditionally.
|
||
let a = ctx.vr[instr.ra()].as_f32x4();
|
||
let b = ctx.vr[instr.rb()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 {
|
||
let ai = vmx::flush_denorm(a[i]);
|
||
let bi = vmx::flush_denorm(b[i]);
|
||
r[i] = vmx::flush_denorm(ai + bi);
|
||
}
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vaddfp128 => {
|
||
// PPCBUG-435: same as vaddfp.
|
||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 {
|
||
let ai = vmx::flush_denorm(a[i]);
|
||
let bi = vmx::flush_denorm(b[i]);
|
||
r[i] = vmx::flush_denorm(ai + bi);
|
||
}
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsubfp => {
|
||
// PPCBUG-435.
|
||
let a = ctx.vr[instr.ra()].as_f32x4();
|
||
let b = ctx.vr[instr.rb()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 {
|
||
let ai = vmx::flush_denorm(a[i]);
|
||
let bi = vmx::flush_denorm(b[i]);
|
||
r[i] = vmx::flush_denorm(ai - bi);
|
||
}
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsubfp128 => {
|
||
// PPCBUG-435.
|
||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 {
|
||
let ai = vmx::flush_denorm(a[i]);
|
||
let bi = vmx::flush_denorm(b[i]);
|
||
r[i] = vmx::flush_denorm(ai - bi);
|
||
}
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmaddfp => {
|
||
// vD = (vA * vC) + vB. AltiVec unconditionally flushes denormal
|
||
// *inputs* to 0 regardless of VSCR[NJ] (confirmed on POWER8 hw).
|
||
let a = ctx.vr[instr.ra()].as_f32x4();
|
||
let b = ctx.vr[instr.rb()].as_f32x4();
|
||
let c = ctx.vr[instr.rc()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 {
|
||
let ai = vmx::flush_denorm(a[i]);
|
||
let bi = vmx::flush_denorm(b[i]);
|
||
let ci = vmx::flush_denorm(c[i]);
|
||
// PPCBUG-437: flush subnormal output too.
|
||
r[i] = vmx::flush_denorm(ai.mul_add(ci, bi));
|
||
}
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmaddfp128 => {
|
||
// ISA: (VD) <- (VA × VD) + VB. VD is both the second multiplicand and destination.
|
||
// Canary InstrEmit_vmaddfp128 (ppc_emit_altivec.cc:806-809): MulAdd(VA, VD, VB).
|
||
// Previous code computed ai.mul_add(bi, di) = VA×VB+VD — VB and VD roles swapped
|
||
// (PPCBUG-424). Fix: ai.mul_add(di, bi) = VA×VD+VB.
|
||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let d = ctx.vr[instr.vd128()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 {
|
||
let ai = vmx::flush_denorm(a[i]);
|
||
let bi = vmx::flush_denorm(b[i]);
|
||
let di = vmx::flush_denorm(d[i]);
|
||
// PPCBUG-437.
|
||
r[i] = vmx::flush_denorm(ai.mul_add(di, bi));
|
||
}
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vnmsubfp => {
|
||
// vD = -(vA * vC - vB) = vB - vA * vC. Same denorm-flush rule as vmaddfp.
|
||
let a = ctx.vr[instr.ra()].as_f32x4();
|
||
let b = ctx.vr[instr.rb()].as_f32x4();
|
||
let c = ctx.vr[instr.rc()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 {
|
||
let ai = vmx::flush_denorm(a[i]);
|
||
let bi = vmx::flush_denorm(b[i]);
|
||
let ci = vmx::flush_denorm(c[i]);
|
||
// PPCBUG-426: single FMA rounding instead of two-step (b - a*c).
|
||
r[i] = vmx::flush_denorm(-ai.mul_add(ci, -bi));
|
||
}
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vnmsubfp128 => {
|
||
// VMX128 form: vD <- -((vA * vB) - vD) = vD - (vA * vB). Canary
|
||
// routes through `InstrEmit_vnmsubfp_` with the same arg-swap,
|
||
// which flushes all inputs unconditionally.
|
||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let d = ctx.vr[instr.vd128()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 {
|
||
let ai = vmx::flush_denorm(a[i]);
|
||
let bi = vmx::flush_denorm(b[i]);
|
||
let di = vmx::flush_denorm(d[i]);
|
||
// PPCBUG-427: single FMA rounding.
|
||
r[i] = vmx::flush_denorm(-ai.mul_add(bi, -di));
|
||
}
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmulfp128 => {
|
||
// PPCBUG-435 + PPCBUG-437.
|
||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 {
|
||
let ai = vmx::flush_denorm(a[i]);
|
||
let bi = vmx::flush_denorm(b[i]);
|
||
r[i] = vmx::flush_denorm(ai * bi);
|
||
}
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmaxfp => {
|
||
let a = ctx.vr[instr.ra()].as_f32x4();
|
||
let b = ctx.vr[instr.rb()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = vmx::max_nan(a[i], b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmaxfp128 => {
|
||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = vmx::max_nan(a[i], b[i]); }
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vminfp => {
|
||
let a = ctx.vr[instr.ra()].as_f32x4();
|
||
let b = ctx.vr[instr.rb()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = vmx::min_nan(a[i], b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vminfp128 => {
|
||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = vmx::min_nan(a[i], b[i]); }
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vrefp | PpcOpcode::vrefp128 => {
|
||
let vb = if matches!(instr.opcode, PpcOpcode::vrefp128) { instr.vb128() } else { instr.rb() };
|
||
let vd = if matches!(instr.opcode, PpcOpcode::vrefp128) { instr.vd128() } else { instr.rd() };
|
||
let b = ctx.vr[vb].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = 1.0 / b[i]; }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vrsqrtefp | PpcOpcode::vrsqrtefp128 => {
|
||
let vb = if matches!(instr.opcode, PpcOpcode::vrsqrtefp128) { instr.vb128() } else { instr.rb() };
|
||
let vd = if matches!(instr.opcode, PpcOpcode::vrsqrtefp128) { instr.vd128() } else { instr.rd() };
|
||
let b = ctx.vr[vb].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = 1.0 / b[i].sqrt(); }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== VMX: Float Compare =====
|
||
PpcOpcode::vcmpeqfp | PpcOpcode::vcmpeqfp128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_f32x4();
|
||
let b = ctx.vr[vb].as_f32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
let rc = if matches!(instr.opcode, PpcOpcode::vcmpeqfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
|
||
if rc { update_cr6_from_vmask(&r, ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcmpgefp | PpcOpcode::vcmpgefp128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_f32x4();
|
||
let b = ctx.vr[vb].as_f32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = if a[i] >= b[i] { 0xFFFF_FFFF } else { 0 }; }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
let rc = if matches!(instr.opcode, PpcOpcode::vcmpgefp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
|
||
if rc { update_cr6_from_vmask(&r, ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcmpgtfp | PpcOpcode::vcmpgtfp128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_f32x4();
|
||
let b = ctx.vr[vb].as_f32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFF_FFFF } else { 0 }; }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
let rc = if matches!(instr.opcode, PpcOpcode::vcmpgtfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
|
||
if rc { update_cr6_from_vmask(&r, ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== VMX: Logical =====
|
||
PpcOpcode::vand | PpcOpcode::vand128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = a[i] & b[i]; }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vandc | PpcOpcode::vandc128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = a[i] & !b[i]; }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vor | PpcOpcode::vor128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = a[i] | b[i]; }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vxor | PpcOpcode::vxor128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = a[i] ^ b[i]; }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vnor | PpcOpcode::vnor128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = !(a[i] | b[i]); }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsel | PpcOpcode::vsel128 => {
|
||
// vD = (vA & ~vC) | (vB & vC)
|
||
let (va, vb, vd);
|
||
let vc;
|
||
if matches!(instr.opcode, PpcOpcode::vsel128) {
|
||
va = instr.va128();
|
||
vb = instr.vb128();
|
||
vd = instr.vd128();
|
||
vc = vd; // for 128, vC is encoded in vD field
|
||
} else {
|
||
va = instr.ra();
|
||
vb = instr.rb();
|
||
vd = instr.rd();
|
||
vc = instr.rc();
|
||
}
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let c = ctx.vr[vc].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = (a[i] & !c[i]) | (b[i] & c[i]); }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== VMX: Permute/Splat/Shift =====
|
||
PpcOpcode::vperm | PpcOpcode::vperm128 => {
|
||
let (va, vb, vd);
|
||
let vc;
|
||
if matches!(instr.opcode, PpcOpcode::vperm128) {
|
||
va = instr.va128();
|
||
vb = instr.vb128();
|
||
vd = instr.vd128();
|
||
vc = instr.vc128_2();
|
||
} else {
|
||
va = instr.ra();
|
||
vb = instr.rb();
|
||
vd = instr.rd();
|
||
vc = instr.rc();
|
||
}
|
||
let a_bytes = ctx.vr[va].as_bytes();
|
||
let b_bytes = ctx.vr[vb].as_bytes();
|
||
let c_bytes = ctx.vr[vc].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 {
|
||
let idx = (c_bytes[i] & 0x1F) as usize;
|
||
r[i] = if idx < 16 { a_bytes[idx] } else { b_bytes[idx - 16] };
|
||
}
|
||
ctx.vr[vd] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsldoi => {
|
||
let a_bytes = ctx.vr[instr.ra()].as_bytes();
|
||
let b_bytes = ctx.vr[instr.rb()].as_bytes();
|
||
let sh = ((instr.raw >> 6) & 0xF) as usize; // SH field bits 6-9
|
||
let mut concat = [0u8; 32];
|
||
concat[..16].copy_from_slice(&a_bytes);
|
||
concat[16..].copy_from_slice(&b_bytes);
|
||
let mut r = [0u8; 16];
|
||
r.copy_from_slice(&concat[sh..sh + 16]);
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsldoi128 => {
|
||
let a_bytes = ctx.vr[instr.va128()].as_bytes();
|
||
let b_bytes = ctx.vr[instr.vb128()].as_bytes();
|
||
let sh = instr.vx128_5_sh() as usize;
|
||
let mut concat = [0u8; 32];
|
||
concat[..16].copy_from_slice(&a_bytes);
|
||
concat[16..].copy_from_slice(&b_bytes);
|
||
let mut r = [0u8; 16];
|
||
let sh = sh.min(16);
|
||
r.copy_from_slice(&concat[sh..sh + 16]);
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vspltw => {
|
||
let uimm = ((instr.raw >> 16) & 0x3) as usize; // UIMM (2 bits for word index)
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let val = b[uimm];
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4(val, val, val, val);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vspltw128 => {
|
||
let uimm = ((instr.raw >> 16) & 0x3) as usize;
|
||
let b = ctx.vr[instr.vb128()].as_u32x4();
|
||
let val = b[uimm];
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4(val, val, val, val);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsplth => {
|
||
let uimm = ((instr.raw >> 16) & 0x7) as usize;
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let val = b[uimm];
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array([val; 8]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vspltb => {
|
||
let uimm = ((instr.raw >> 16) & 0xF) as usize;
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let val = b[uimm];
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes([val; 16]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vspltisw | PpcOpcode::vspltisw128 => {
|
||
let simm = ((instr.raw >> 16) & 0x1F) as i32;
|
||
let simm = if simm & 0x10 != 0 { simm | !0x1F } else { simm }; // sign extend 5-bit
|
||
let val = simm as u32;
|
||
let vd = if matches!(instr.opcode, PpcOpcode::vspltisw128) { instr.vd128() } else { instr.rd() };
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4(val, val, val, val);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vspltisb => {
|
||
let simm = ((instr.raw >> 16) & 0x1F) as i8;
|
||
let simm = if simm & 0x10 != 0 { simm | !0x1F } else { simm };
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes([simm as u8; 16]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vspltish => {
|
||
let simm = ((instr.raw >> 16) & 0x1F) as i16;
|
||
let simm = if simm & 0x10 != 0 { simm | !0x1F } else { simm };
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array([simm as u16; 8]);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== VMX: Merge/Shuffle =====
|
||
PpcOpcode::vmrghw | PpcOpcode::vmrghw128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
// Merge high words: [a0, b0, a1, b1]
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4(a[0], b[0], a[1], b[1]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmrglw | PpcOpcode::vmrglw128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
// Merge low words: [a2, b2, a3, b3]
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4(a[2], b[2], a[3], b[3]);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== VMX: Integer Arithmetic =====
|
||
PpcOpcode::vadduwm => {
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = a[i].wrapping_add(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsubuwm => {
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = a[i].wrapping_sub(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== VMX: Shift =====
|
||
PpcOpcode::vslw | PpcOpcode::vslw128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 {
|
||
let sh = b[i] & 0x1F;
|
||
r[i] = a[i] << sh;
|
||
}
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsrw | PpcOpcode::vsrw128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 {
|
||
let sh = b[i] & 0x1F;
|
||
r[i] = a[i] >> sh;
|
||
}
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsraw | PpcOpcode::vsraw128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 {
|
||
let sh = b[i] & 0x1F;
|
||
r[i] = (a[i] as i32 >> sh) as u32;
|
||
}
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vrlw | PpcOpcode::vrlw128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 {
|
||
let sh = b[i] & 0x1F;
|
||
r[i] = a[i].rotate_left(sh);
|
||
}
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// VMX: Round/Convert
|
||
PpcOpcode::vrfiz | PpcOpcode::vrfiz128 => {
|
||
let vb = if matches!(instr.opcode, PpcOpcode::vrfiz128) { instr.vb128() } else { instr.rb() };
|
||
let vd = if matches!(instr.opcode, PpcOpcode::vrfiz128) { instr.vd128() } else { instr.rd() };
|
||
let b = ctx.vr[vb].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = b[i].trunc(); }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vrfin | PpcOpcode::vrfin128 => {
|
||
// PPCBUG-432: ISA round-to-nearest-even, NOT Rust's `round()`
|
||
// (which is round-half-away-from-zero).
|
||
let vb = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vb128() } else { instr.rb() };
|
||
let vd = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vd128() } else { instr.rd() };
|
||
let b = ctx.vr[vb].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = b[i].round_ties_even(); }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vrfip | PpcOpcode::vrfip128 => {
|
||
let vb = if matches!(instr.opcode, PpcOpcode::vrfip128) { instr.vb128() } else { instr.rb() };
|
||
let vd = if matches!(instr.opcode, PpcOpcode::vrfip128) { instr.vd128() } else { instr.rd() };
|
||
let b = ctx.vr[vb].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = b[i].ceil(); }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vrfim | PpcOpcode::vrfim128 => {
|
||
let vb = if matches!(instr.opcode, PpcOpcode::vrfim128) { instr.vb128() } else { instr.rb() };
|
||
let vd = if matches!(instr.opcode, PpcOpcode::vrfim128) { instr.vd128() } else { instr.rd() };
|
||
let b = ctx.vr[vb].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = b[i].floor(); }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// VMX: MFVSCR/MTVSCR — VSCR lives in word 3; only NJ (bit 16) and
|
||
// SAT (bit 31) are defined. Canary stores the full Vec128 so we do
|
||
// the same: mfvscr copies the register, mtvscr overwrites it.
|
||
PpcOpcode::mfvscr => {
|
||
ctx.vr[instr.rd()] = ctx.vscr;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mtvscr => {
|
||
ctx.vscr = ctx.vr[instr.rb()];
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== VMX: lvsl/lvsr (generate permute vectors) =====
|
||
PpcOpcode::lvsl | PpcOpcode::lvsl128 => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]);
|
||
let sh = (ea & 0xF) as u8;
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = sh + i as u8; }
|
||
let vd = if matches!(instr.opcode, PpcOpcode::lvsl128) { instr.vd128() } else { instr.rd() };
|
||
ctx.vr[vd] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lvsr | PpcOpcode::lvsr128 => {
|
||
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]);
|
||
let sh = (ea & 0xF) as u8;
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = (16 - sh) + i as u8; }
|
||
let vd = if matches!(instr.opcode, PpcOpcode::lvsr128) { instr.vd128() } else { instr.rd() };
|
||
ctx.vr[vd] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== VMX: Integer compare =====
|
||
PpcOpcode::vcmpequw | PpcOpcode::vcmpequw128 => {
|
||
let (va, vb, vd) = vmx_reg_triple(instr);
|
||
let a = ctx.vr[va].as_u32x4();
|
||
let b = ctx.vr[vb].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; }
|
||
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
let rc = if matches!(instr.opcode, PpcOpcode::vcmpequw128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
|
||
if rc { update_cr6_from_vmask(&r, ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== FPU: Arithmetic =====
|
||
PpcOpcode::faddx => {
|
||
let a = ctx.fpr[instr.ra()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_add(ctx, a, b, false);
|
||
let result = a + b;
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::faddsx => {
|
||
let a = ctx.fpr[instr.ra()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_add(ctx, a, b, false);
|
||
let result = to_single(ctx, a + b);
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fsubx => {
|
||
let a = ctx.fpr[instr.ra()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_add(ctx, a, b, true);
|
||
let result = a - b;
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fsubsx => {
|
||
let a = ctx.fpr[instr.ra()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_add(ctx, a, b, true);
|
||
let result = to_single(ctx, a - b);
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fmulx => {
|
||
// A-form: frD = frA * frC (frC is at rc() field, bits 21-25)
|
||
let a = ctx.fpr[instr.ra()];
|
||
let c = ctx.fpr[instr.rc()];
|
||
fpscr::check_invalid_mul(ctx, a, c);
|
||
let result = a * c;
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && c.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fmulsx => {
|
||
let a = ctx.fpr[instr.ra()];
|
||
let c = ctx.fpr[instr.rc()];
|
||
fpscr::check_invalid_mul(ctx, a, c);
|
||
let result = to_single(ctx, a * c);
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && c.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fdivx => {
|
||
let a = ctx.fpr[instr.ra()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_div(ctx, a, b);
|
||
fpscr::check_zero_divide(ctx, a, b);
|
||
let result = a / b;
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && b != 0.0);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fdivsx => {
|
||
let a = ctx.fpr[instr.ra()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_div(ctx, a, b);
|
||
fpscr::check_zero_divide(ctx, a, b);
|
||
let result = to_single(ctx, a / b);
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && b != 0.0);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== FPU: Multiply-Add =====
|
||
PpcOpcode::fmaddx => {
|
||
// PPCBUG-202: VXISI from input properties (not from `a*c` which has wrong sign on overflow).
|
||
let a = ctx.fpr[instr.ra()];
|
||
let c = ctx.fpr[instr.rc()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_mul(ctx, a, c);
|
||
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
|
||
let result = a.mul_add(c, b);
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fmaddsx => {
|
||
// PPCBUG-181: missing VXISI on add step.
|
||
let a = ctx.fpr[instr.ra()];
|
||
let c = ctx.fpr[instr.rc()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_mul(ctx, a, c);
|
||
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
|
||
let result = to_single(ctx, a.mul_add(c, b));
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fmsubx => {
|
||
// PPCBUG-203: missing VXISI on sub step.
|
||
let a = ctx.fpr[instr.ra()];
|
||
let c = ctx.fpr[instr.rc()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_mul(ctx, a, c);
|
||
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
|
||
let result = a.mul_add(c, -b);
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fmsubsx => {
|
||
// PPCBUG-182: missing VXISI on sub step.
|
||
let a = ctx.fpr[instr.ra()];
|
||
let c = ctx.fpr[instr.rc()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_mul(ctx, a, c);
|
||
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
|
||
let result = to_single(ctx, a.mul_add(c, -b));
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fnmaddx => {
|
||
// PPCBUG-203: missing VXISI. PPCBUG-205: NaN sign preserved (no negation on NaN).
|
||
let a = ctx.fpr[instr.ra()];
|
||
let c = ctx.fpr[instr.rc()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_mul(ctx, a, c);
|
||
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
|
||
let fma = a.mul_add(c, b);
|
||
let result = if fma.is_nan() { fma } else { -fma };
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fnmaddsx => {
|
||
// PPCBUG-181 + PPCBUG-183: VXISI + NaN sign preservation.
|
||
let a = ctx.fpr[instr.ra()];
|
||
let c = ctx.fpr[instr.rc()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_mul(ctx, a, c);
|
||
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
|
||
let fma = a.mul_add(c, b);
|
||
let neg = if fma.is_nan() { fma } else { -fma };
|
||
let result = to_single(ctx, neg);
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fnmsubx => {
|
||
// PPCBUG-203: VXISI. PPCBUG-205: NaN sign preservation.
|
||
let a = ctx.fpr[instr.ra()];
|
||
let c = ctx.fpr[instr.rc()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_mul(ctx, a, c);
|
||
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
|
||
let fma = a.mul_add(c, -b);
|
||
let result = if fma.is_nan() { fma } else { -fma };
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fnmsubsx => {
|
||
// PPCBUG-182 + PPCBUG-183: VXISI + NaN sign preservation.
|
||
let a = ctx.fpr[instr.ra()];
|
||
let c = ctx.fpr[instr.rc()];
|
||
let b = ctx.fpr[instr.rb()];
|
||
fpscr::check_invalid_mul(ctx, a, c);
|
||
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
|
||
let fma = a.mul_add(c, -b);
|
||
let neg = if fma.is_nan() { fma } else { -fma };
|
||
let result = to_single(ctx, neg);
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== FPU: Move/Sign =====
|
||
PpcOpcode::fmrx => {
|
||
ctx.fpr[instr.rd()] = ctx.fpr[instr.rb()];
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fabsx => {
|
||
ctx.fpr[instr.rd()] = ctx.fpr[instr.rb()].abs();
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fnegx => {
|
||
ctx.fpr[instr.rd()] = -ctx.fpr[instr.rb()];
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fnabsx => {
|
||
ctx.fpr[instr.rd()] = -(ctx.fpr[instr.rb()].abs());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== FPU: Select =====
|
||
PpcOpcode::fselx => {
|
||
// frD = if frA >= 0.0 then frC else frB
|
||
ctx.fpr[instr.rd()] = if ctx.fpr[instr.ra()] >= 0.0 {
|
||
ctx.fpr[instr.rc()]
|
||
} else {
|
||
ctx.fpr[instr.rb()]
|
||
};
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== FPU: Square root / Reciprocal =====
|
||
PpcOpcode::fsqrtx => {
|
||
let b = ctx.fpr[instr.rb()];
|
||
// sqrt of negative (non-zero) is invalid operation → VXSQRT.
|
||
if b.is_sign_negative() && b != 0.0 && !b.is_nan() {
|
||
fpscr::set_exception(ctx, fpscr::VXSQRT);
|
||
}
|
||
if fpscr::is_snan(b) {
|
||
fpscr::set_exception(ctx, fpscr::VXSNAN);
|
||
}
|
||
let result = b.sqrt();
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, b.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fsqrtsx => {
|
||
let b = ctx.fpr[instr.rb()];
|
||
if b.is_sign_negative() && b != 0.0 && !b.is_nan() {
|
||
fpscr::set_exception(ctx, fpscr::VXSQRT);
|
||
}
|
||
if fpscr::is_snan(b) {
|
||
fpscr::set_exception(ctx, fpscr::VXSNAN);
|
||
}
|
||
let result = to_single(ctx, b.sqrt());
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, b.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fresx => {
|
||
// Single-precision reciprocal estimate: frD = 1.0 / frB.
|
||
// PPCBUG-184: pre-quantize input to f32 to match canary's
|
||
// `f.Recip(f.Convert(frB, FLOAT32_TYPE))` behavior. Hardware
|
||
// produces a ~12-bit LUT estimate; both emulators produce a
|
||
// fully-IEEE single reciprocal, but the f32 quantization at
|
||
// least makes the input precision match.
|
||
let b_full = ctx.fpr[instr.rb()];
|
||
let b = b_full as f32 as f64;
|
||
if b == 0.0 {
|
||
fpscr::set_exception(ctx, fpscr::ZX);
|
||
}
|
||
if fpscr::is_snan(b_full) {
|
||
fpscr::set_exception(ctx, fpscr::VXSNAN);
|
||
}
|
||
let result = to_single(ctx, 1.0 / b);
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, b.is_finite() && b != 0.0);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::frsqrtex => {
|
||
// Reciprocal square root estimate: frD = 1.0 / sqrt(frB)
|
||
let b = ctx.fpr[instr.rb()];
|
||
if b == 0.0 {
|
||
fpscr::set_exception(ctx, fpscr::ZX);
|
||
}
|
||
if b.is_sign_negative() && b != 0.0 && !b.is_nan() {
|
||
fpscr::set_exception(ctx, fpscr::VXSQRT);
|
||
}
|
||
if fpscr::is_snan(b) {
|
||
fpscr::set_exception(ctx, fpscr::VXSNAN);
|
||
}
|
||
let result = 1.0 / b.sqrt();
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, b.is_finite() && b > 0.0);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== FPU: Rounding/Conversion =====
|
||
PpcOpcode::frspx => {
|
||
// Round to single precision honouring FPSCR[RN].
|
||
// PPCBUG-225: set XX on inexact rounding (almost every frsp call).
|
||
let b = ctx.fpr[instr.rb()];
|
||
if fpscr::is_snan(b) {
|
||
fpscr::set_exception(ctx, fpscr::VXSNAN);
|
||
}
|
||
let result = to_single(ctx, b);
|
||
if b.is_finite() && result.is_finite() && result != b {
|
||
fpscr::set_exception(ctx, fpscr::XX);
|
||
}
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::update_after_op(ctx, result, b.is_finite());
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fcfidx => {
|
||
// Convert from integer doubleword: frD = (double)(int64_t)frB_as_bits.
|
||
// PPCBUG-224: set XX when |i64| > 2^53 (precision loss in conversion).
|
||
let bits = ctx.fpr[instr.rb()].to_bits();
|
||
let i = bits as i64;
|
||
let result = i as f64;
|
||
if (result as i64) != i {
|
||
fpscr::set_exception(ctx, fpscr::XX);
|
||
}
|
||
ctx.fpr[instr.rd()] = result;
|
||
fpscr::set_fprf(ctx, fpscr::classify_fprf(result));
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fctidx => {
|
||
// Convert to integer doubleword (round per FPSCR[RN]).
|
||
// PPCBUG-229: set XX on inexact (fractional input).
|
||
let val = ctx.fpr[instr.rb()];
|
||
let result = if val.is_nan() {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
||
0x8000_0000_0000_0000u64
|
||
} else if val >= (i64::MAX as f64) {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||
0x7FFF_FFFF_FFFF_FFFFu64
|
||
} else if val < (i64::MIN as f64) {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||
0x8000_0000_0000_0000u64
|
||
} else {
|
||
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
|
||
fpscr::round_to_i64(ctx, val) as u64
|
||
};
|
||
ctx.fpr[instr.rd()] = f64::from_bits(result);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fctidzx => {
|
||
// Convert to integer doubleword (round toward zero).
|
||
// PPCBUG-229: set XX on inexact.
|
||
let val = ctx.fpr[instr.rb()];
|
||
let result = if val.is_nan() {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
||
0x8000_0000_0000_0000u64
|
||
} else if val >= (i64::MAX as f64) {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||
0x7FFF_FFFF_FFFF_FFFFu64
|
||
} else if val < (i64::MIN as f64) {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||
0x8000_0000_0000_0000u64
|
||
} else {
|
||
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
|
||
(val.trunc() as i64) as u64
|
||
};
|
||
ctx.fpr[instr.rd()] = f64::from_bits(result);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fctiwx => {
|
||
// Convert to integer word (round per FPSCR[RN]).
|
||
// PPCBUG-230: set XX on inexact.
|
||
let val = ctx.fpr[instr.rb()];
|
||
let result_u32: u32 = if val.is_nan() {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
||
0x8000_0000
|
||
} else if val > (i32::MAX as f64) {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||
0x7FFF_FFFF
|
||
} else if val < (i32::MIN as f64) {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||
0x8000_0000
|
||
} else {
|
||
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
|
||
fpscr::round_to_i32(ctx, val) as u32
|
||
};
|
||
ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fctiwzx => {
|
||
// Convert to integer word (round toward zero).
|
||
// PPCBUG-230: set XX on inexact.
|
||
let val = ctx.fpr[instr.rb()];
|
||
let result_u32: u32 = if val.is_nan() {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
|
||
0x8000_0000
|
||
} else if val > (i32::MAX as f64) {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||
0x7FFF_FFFF
|
||
} else if val < (i32::MIN as f64) {
|
||
fpscr::set_exception(ctx, fpscr::VXCVI);
|
||
0x8000_0000
|
||
} else {
|
||
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
|
||
val.trunc() as i32 as u32
|
||
};
|
||
ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== FPU: Compare =====
|
||
PpcOpcode::fcmpu => {
|
||
let fra = ctx.fpr[instr.ra()];
|
||
let frb = ctx.fpr[instr.rb()];
|
||
let crfd = instr.crfd();
|
||
if fra.is_nan() || frb.is_nan() {
|
||
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: false, so: true };
|
||
// fcmpu: VXSNAN on SNaN input; no VXVC even on QNaN.
|
||
if fpscr::is_snan(fra) || fpscr::is_snan(frb) {
|
||
fpscr::set_exception(ctx, fpscr::VXSNAN);
|
||
}
|
||
} else if fra < frb {
|
||
ctx.cr[crfd] = crate::context::CrField { lt: true, gt: false, eq: false, so: false };
|
||
} else if fra > frb {
|
||
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: true, eq: false, so: false };
|
||
} else {
|
||
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: true, so: false };
|
||
}
|
||
// Also mirror the comparison result into FPSCR[FPRF (FL/FG/FE/FU)].
|
||
let fprf = if fra.is_nan() || frb.is_nan() {
|
||
0b0_0001
|
||
} else if fra < frb {
|
||
0b0_1000
|
||
} else if fra > frb {
|
||
0b0_0100
|
||
} else {
|
||
0b0_0010
|
||
};
|
||
fpscr::set_fprf(ctx, fprf);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::fcmpo => {
|
||
// Ordered compare: like fcmpu but also sets VXVC on QNaN (or VXSNAN on SNaN).
|
||
let fra = ctx.fpr[instr.ra()];
|
||
let frb = ctx.fpr[instr.rb()];
|
||
let crfd = instr.crfd();
|
||
if fra.is_nan() || frb.is_nan() {
|
||
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: false, so: true };
|
||
if fpscr::is_snan(fra) || fpscr::is_snan(frb) {
|
||
fpscr::set_exception(ctx, fpscr::VXSNAN | fpscr::VXVC);
|
||
} else {
|
||
fpscr::set_exception(ctx, fpscr::VXVC);
|
||
}
|
||
} else if fra < frb {
|
||
ctx.cr[crfd] = crate::context::CrField { lt: true, gt: false, eq: false, so: false };
|
||
} else if fra > frb {
|
||
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: true, eq: false, so: false };
|
||
} else {
|
||
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: true, so: false };
|
||
}
|
||
let fprf = if fra.is_nan() || frb.is_nan() {
|
||
0b0_0001
|
||
} else if fra < frb {
|
||
0b0_1000
|
||
} else if fra > frb {
|
||
0b0_0100
|
||
} else {
|
||
0b0_0010
|
||
};
|
||
fpscr::set_fprf(ctx, fprf);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ===== FPU: Status/Control =====
|
||
PpcOpcode::mffsx => {
|
||
// Move from FPSCR: frD = FPSCR as double (low 32 bits)
|
||
ctx.fpr[instr.rd()] = f64::from_bits(ctx.fpscr as u64);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mtfsfx => {
|
||
// Move to FPSCR fields: fm mask in bits 7-14, frB value
|
||
let fm = (instr.raw >> 17) & 0xFF;
|
||
let val = ctx.fpr[instr.rb()].to_bits() as u32;
|
||
let mut mask = 0u32;
|
||
for i in 0..8 {
|
||
if fm & (1 << (7 - i)) != 0 {
|
||
mask |= 0xF << (28 - i * 4);
|
||
}
|
||
}
|
||
ctx.fpscr = (ctx.fpscr & !mask) | (val & mask);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mtfsb0x => {
|
||
// Clear FPSCR bit crbd
|
||
let bit = instr.crbd();
|
||
ctx.fpscr &= !(1 << (31 - bit));
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mtfsb1x => {
|
||
// Set FPSCR bit crbd
|
||
let bit = instr.crbd();
|
||
ctx.fpscr |= 1 << (31 - bit);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::mtfsfix => {
|
||
// Move to FPSCR field immediate: crfD = IMM (4 bits)
|
||
let crfd = instr.crfd();
|
||
let imm = (instr.raw >> 12) & 0xF;
|
||
let shift = 28 - crfd as u32 * 4;
|
||
ctx.fpscr = (ctx.fpscr & !(0xF << shift)) | (imm << shift);
|
||
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4b — Unaligned vector load/store
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// lvlx / lvlx128 / lvlxl / lvlxl128: load left-aligned from EA.
|
||
PpcOpcode::lvlx | PpcOpcode::lvlxl => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
ctx.vr[instr.rd()] = crate::vmx::load_vector_left(mem, ea);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lvlx128 | PpcOpcode::lvlxl128 => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
ctx.vr[instr.vd128()] = crate::vmx::load_vector_left(mem, ea);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lvrx | PpcOpcode::lvrxl => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
ctx.vr[instr.rd()] = crate::vmx::load_vector_right(mem, ea);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::lvrx128 | PpcOpcode::lvrxl128 => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
ctx.vr[instr.vd128()] = crate::vmx::load_vector_right(mem, ea);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvlx | PpcOpcode::stvlxl => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
// PPCBUG-513: stvlx/stvlxl were missing invalidate_for_write.
|
||
// store_vector_left writes [ea, (ea & !0xF)+15]; in the worst case (ea & 0xF == 0)
|
||
// that is exactly 16 bytes all within the same 16-byte block, so ea+15 lands in the
|
||
// same 128-byte cache line. Two-call form is kept for defensive correctness.
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() {
|
||
let first_line = ea & !RESERVATION_MASK;
|
||
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
|
||
t.invalidate_for_write(first_line);
|
||
if last_line != first_line { t.invalidate_for_write(last_line); }
|
||
}
|
||
}
|
||
crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.rs()]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvlx128 | PpcOpcode::stvlxl128 => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
// PPCBUG-513: stvlx128/stvlxl128 were missing invalidate_for_write.
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() {
|
||
let first_line = ea & !RESERVATION_MASK;
|
||
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
|
||
t.invalidate_for_write(first_line);
|
||
if last_line != first_line { t.invalidate_for_write(last_line); }
|
||
}
|
||
}
|
||
crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.vs128()]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvrx | PpcOpcode::stvrxl => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
// PPCBUG-514: stvrx/stvrxl were missing invalidate_for_write.
|
||
// store_vector_right writes [ea & !0xF, ea-1] (up to 15 bytes, all within a single
|
||
// 16-byte-aligned block). Two-call form is kept for defensive correctness.
|
||
// stvrx at shift==0 is a no-op; the guard fires unconditionally (cheap).
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() {
|
||
let first_line = ea & !RESERVATION_MASK;
|
||
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
|
||
t.invalidate_for_write(first_line);
|
||
if last_line != first_line { t.invalidate_for_write(last_line); }
|
||
}
|
||
}
|
||
crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.rs()]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvrx128 | PpcOpcode::stvrxl128 => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
// PPCBUG-514: stvrx128/stvrxl128 were missing invalidate_for_write.
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() {
|
||
let first_line = ea & !RESERVATION_MASK;
|
||
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
|
||
t.invalidate_for_write(first_line);
|
||
if last_line != first_line { t.invalidate_for_write(last_line); }
|
||
}
|
||
}
|
||
crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.vs128()]);
|
||
ctx.pc += 4;
|
||
}
|
||
// lvewx128 / stvewx128: VMX128 element-indexed 32-bit load/store.
|
||
// Like lvewx the whole 16 bytes at the aligned EA go into VD; the
|
||
// element-of-interest is implied by EA's low bits.
|
||
PpcOpcode::lvewx128 => {
|
||
let ea = ea_indexed(ctx, instr) & !0xF;
|
||
let mut bytes = [0u8; 16];
|
||
for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); }
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_bytes(bytes);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stvewx128 => {
|
||
// Mirror of stvewx: word-align EA, extract one 32-bit lane, write 4 bytes only.
|
||
// Previous code used & !0xF (16-byte) and wrote all 16 bytes, corrupting 12
|
||
// adjacent bytes on every execution (PPCBUG-510).
|
||
let ea_unaligned = ea_indexed(ctx, instr);
|
||
let ea = ea_unaligned & !0x3u32;
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
|
||
let bytes = ctx.vr[instr.vs128()].as_bytes();
|
||
let w = ((bytes[slot * 4] as u32) << 24)
|
||
| ((bytes[slot * 4 + 1] as u32) << 16)
|
||
| ((bytes[slot * 4 + 2] as u32) << 8)
|
||
| (bytes[slot * 4 + 3] as u32);
|
||
mem.write_u32(ea, w);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4a — VMX integer add/sub (modulo and saturating), mul, avg, sum
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// -------- modulo add/sub (byte/halfword/word) --------
|
||
PpcOpcode::vaddubm => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = a[i].wrapping_add(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsububm => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = a[i].wrapping_sub(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vadduhm => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = a[i].wrapping_add(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsubuhm => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = a[i].wrapping_sub(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
// vadduwm / vsubuwm are implemented above (modulo word add/sub arms).
|
||
|
||
// -------- saturating add/sub (signed + unsigned) --------
|
||
PpcOpcode::vaddubs => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
let mut sat = false;
|
||
for i in 0..16 {
|
||
let (v, s) = crate::vmx::sat_add_u8(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsububs => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16]; let mut sat = false;
|
||
for i in 0..16 {
|
||
let (v, s) = crate::vmx::sat_sub_u8(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vaddsbs => {
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
|
||
let mut r = [0i8; 16]; let mut sat = false;
|
||
for i in 0..16 {
|
||
let (v, s) = crate::vmx::sat_add_i8(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsubsbs => {
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
|
||
let mut r = [0i8; 16]; let mut sat = false;
|
||
for i in 0..16 {
|
||
let (v, s) = crate::vmx::sat_sub_i8(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vadduhs => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8]; let mut sat = false;
|
||
for i in 0..8 {
|
||
let (v, s) = crate::vmx::sat_add_u16(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsubuhs => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8]; let mut sat = false;
|
||
for i in 0..8 {
|
||
let (v, s) = crate::vmx::sat_sub_u16(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vaddshs => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let mut r = [0i16; 8]; let mut sat = false;
|
||
for i in 0..8 {
|
||
let (v, s) = crate::vmx::sat_add_i16(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsubshs => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let mut r = [0i16; 8]; let mut sat = false;
|
||
for i in 0..8 {
|
||
let (v, s) = crate::vmx::sat_sub_i16(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vadduws => {
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let (v, s) = crate::vmx::sat_add_u32(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsubuws => {
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let (v, s) = crate::vmx::sat_sub_u32(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vaddsws => {
|
||
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let (v, s) = crate::vmx::sat_add_i32(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsubsws => {
|
||
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let (v, s) = crate::vmx::sat_sub_i32(a[i], b[i]);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// -------- vaddcuw / vsubcuw: per-lane carry / borrow out --------
|
||
PpcOpcode::vaddcuw => {
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 {
|
||
let (_, c) = a[i].overflowing_add(b[i]);
|
||
r[i] = if c { 1 } else { 0 };
|
||
}
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsubcuw => {
|
||
// "Subtract Carryout": r = 1 if a >= b (no borrow), 0 otherwise.
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = if a[i] >= b[i] { 1 } else { 0 }; }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// -------- averages --------
|
||
PpcOpcode::vavgub => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = crate::vmx::avg_u8(a[i], b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vavgsb => {
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
|
||
let mut r = [0i8; 16];
|
||
for i in 0..16 { r[i] = crate::vmx::avg_i8(a[i], b[i]); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vavguh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = crate::vmx::avg_u16(a[i], b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vavgsh => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let mut r = [0i16; 8];
|
||
for i in 0..8 { r[i] = crate::vmx::avg_i16(a[i], b[i]); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vavguw => {
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = crate::vmx::avg_u32(a[i], b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vavgsw => {
|
||
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4];
|
||
for i in 0..4 { r[i] = crate::vmx::avg_i32(a[i], b[i]); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// -------- multiplies (even / odd lanes — see §5 hazard note) --------
|
||
// vmuleub: even u8 lanes (BE index 0,2,4,...,14) → u16 lanes.
|
||
PpcOpcode::vmuleub => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = a[2 * i] as u16 * b[2 * i] as u16; }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmuloub => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = a[2 * i + 1] as u16 * b[2 * i + 1] as u16; }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmulesb => {
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
|
||
let mut r = [0i16; 8];
|
||
for i in 0..8 { r[i] = a[2 * i] as i16 * b[2 * i] as i16; }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmulosb => {
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
|
||
let mut r = [0i16; 8];
|
||
for i in 0..8 { r[i] = a[2 * i + 1] as i16 * b[2 * i + 1] as i16; }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmuleuh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = a[2 * i] as u32 * b[2 * i] as u32; }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmulouh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = a[2 * i + 1] as u32 * b[2 * i + 1] as u32; }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmulesh => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4];
|
||
for i in 0..4 { r[i] = a[2 * i] as i32 * b[2 * i] as i32; }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmulosh => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4];
|
||
for i in 0..4 { r[i] = a[2 * i + 1] as i32 * b[2 * i + 1] as i32; }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// -------- multiply-add halfword (saturating) --------
|
||
PpcOpcode::vmhaddshs => {
|
||
// vD[i] = sat_i16((vA[i] * vB[i]) >> 15 + vC[i])
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let c = crate::vmx::as_i16x8(ctx.vr[instr.rc()]);
|
||
let mut r = [0i16; 8]; let mut sat = false;
|
||
for i in 0..8 {
|
||
let prod = (a[i] as i32 * b[i] as i32) >> 15;
|
||
let (v, s) = crate::vmx::sat_i32_to_i16(prod + c[i] as i32);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmhraddshs => {
|
||
// Rounded multiply-add: (vA[i]*vB[i] + 0x4000) >> 15 + vC[i], saturating.
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let c = crate::vmx::as_i16x8(ctx.vr[instr.rc()]);
|
||
let mut r = [0i16; 8]; let mut sat = false;
|
||
for i in 0..8 {
|
||
let prod = (a[i] as i32 * b[i] as i32 + 0x4000) >> 15;
|
||
let (v, s) = crate::vmx::sat_i32_to_i16(prod + c[i] as i32);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmladduhm => {
|
||
// Multiply-low add (modulo): vD[i] = u16(vA[i] * vB[i] + vC[i]).
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let c = ctx.vr[instr.rc()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 {
|
||
r[i] = a[i].wrapping_mul(b[i]).wrapping_add(c[i]);
|
||
}
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// -------- VMX sum-of-products --------
|
||
// vmsumubm: vD[i:u32] = sum over j in [0..4] of vA[4i+j:u8] * vB[4i+j:u8] + vC[i].
|
||
PpcOpcode::vmsumubm => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let c = ctx.vr[instr.rc()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 {
|
||
let mut s = c[i];
|
||
for j in 0..4 {
|
||
s = s.wrapping_add(a[4*i+j] as u32 * b[4*i+j] as u32);
|
||
}
|
||
r[i] = s;
|
||
}
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmsummbm => {
|
||
// signed bytes × unsigned bytes, signed accumulator
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let c = crate::vmx::as_i32x4(ctx.vr[instr.rc()]);
|
||
let mut r = [0i32; 4];
|
||
for i in 0..4 {
|
||
let mut s = c[i];
|
||
for j in 0..4 {
|
||
s = s.wrapping_add(a[4*i+j] as i32 * b[4*i+j] as i32);
|
||
}
|
||
r[i] = s;
|
||
}
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmsumuhm => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let c = ctx.vr[instr.rc()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 {
|
||
let s = (a[2*i] as u32 * b[2*i] as u32)
|
||
.wrapping_add(a[2*i+1] as u32 * b[2*i+1] as u32)
|
||
.wrapping_add(c[i]);
|
||
r[i] = s;
|
||
}
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmsumuhs => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let c = ctx.vr[instr.rc()].as_u32x4();
|
||
let mut r = [0u32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let s = (a[2*i] as u64 * b[2*i] as u64)
|
||
+ (a[2*i+1] as u64 * b[2*i+1] as u64)
|
||
+ c[i] as u64;
|
||
let (v, overflow) = if s > u32::MAX as u64 { (u32::MAX, true) } else { (s as u32, false) };
|
||
r[i] = v; sat |= overflow;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmsumshm => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let c = crate::vmx::as_i32x4(ctx.vr[instr.rc()]);
|
||
let mut r = [0i32; 4];
|
||
for i in 0..4 {
|
||
let s = (a[2*i] as i32 * b[2*i] as i32)
|
||
.wrapping_add(a[2*i+1] as i32 * b[2*i+1] as i32)
|
||
.wrapping_add(c[i]);
|
||
r[i] = s;
|
||
}
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmsumshs => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let c = crate::vmx::as_i32x4(ctx.vr[instr.rc()]);
|
||
let mut r = [0i32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
// Running-sum saturation: accumulate in i64, clamp once at end.
|
||
let s = (a[2*i] as i64 * b[2*i] as i64)
|
||
+ (a[2*i+1] as i64 * b[2*i+1] as i64)
|
||
+ c[i] as i64;
|
||
let (v, o) = crate::vmx::sat_i64_to_i32(s);
|
||
r[i] = v; sat |= o;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// -------- VMX sum-across --------
|
||
PpcOpcode::vsumsws => {
|
||
// vD[3] = sat_i32(vC[3] + sum over i in 0..4 of vA[i])
|
||
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
|
||
let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let s = a.iter().map(|&x| x as i64).sum::<i64>() + c[3] as i64;
|
||
let (v, sat) = crate::vmx::sat_i64_to_i32(s);
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4([0, 0, 0, v]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsum2sws => {
|
||
// Two 2-word partial sums at lanes 1 and 3.
|
||
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
|
||
let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let s0 = a[0] as i64 + a[1] as i64 + c[1] as i64;
|
||
let s1 = a[2] as i64 + a[3] as i64 + c[3] as i64;
|
||
let (v0, sat0) = crate::vmx::sat_i64_to_i32(s0);
|
||
let (v1, sat1) = crate::vmx::sat_i64_to_i32(s1);
|
||
if sat0 | sat1 { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4([0, v0, 0, v1]);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsum4sbs => {
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let s = a[4*i] as i64 + a[4*i+1] as i64 + a[4*i+2] as i64 + a[4*i+3] as i64 + c[i] as i64;
|
||
let (v, o) = crate::vmx::sat_i64_to_i32(s);
|
||
r[i] = v; sat |= o;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsum4ubs => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let c = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let s = a[4*i] as u64 + a[4*i+1] as u64 + a[4*i+2] as u64 + a[4*i+3] as u64 + c[i] as u64;
|
||
let (v, o) = if s > u32::MAX as u64 { (u32::MAX, true) } else { (s as u32, false) };
|
||
r[i] = v; sat |= o;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsum4shs => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let s = a[2*i] as i64 + a[2*i+1] as i64 + c[i] as i64;
|
||
let (v, o) = crate::vmx::sat_i64_to_i32(s);
|
||
r[i] = v; sat |= o;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4c — VMX integer compares (all set 0xFF/0xFFFF/0xFFFFFFFF per lane)
|
||
// ═════════════════════════════════════════════════════════════════
|
||
PpcOpcode::vcmpequb => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = if a[i] == b[i] { 0xFF } else { 0 }; }
|
||
let v = xenia_types::Vec128::from_bytes(r);
|
||
if instr.vc_rc_bit() {
|
||
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
|
||
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
|
||
}
|
||
ctx.vr[instr.rd()] = v;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcmpequh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = if a[i] == b[i] { 0xFFFF } else { 0 }; }
|
||
let v = xenia_types::Vec128::from_u16x8_array(r);
|
||
if instr.vc_rc_bit() {
|
||
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
|
||
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
|
||
}
|
||
ctx.vr[instr.rd()] = v;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcmpgtub => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; }
|
||
let v = xenia_types::Vec128::from_bytes(r);
|
||
if instr.vc_rc_bit() {
|
||
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
|
||
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
|
||
}
|
||
ctx.vr[instr.rd()] = v;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcmpgtsb => {
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; }
|
||
let v = xenia_types::Vec128::from_bytes(r);
|
||
if instr.vc_rc_bit() {
|
||
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
|
||
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
|
||
}
|
||
ctx.vr[instr.rd()] = v;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcmpgtuh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; }
|
||
let v = xenia_types::Vec128::from_u16x8_array(r);
|
||
if instr.vc_rc_bit() {
|
||
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
|
||
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
|
||
}
|
||
ctx.vr[instr.rd()] = v;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcmpgtsh => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; }
|
||
let v = xenia_types::Vec128::from_u16x8_array(r);
|
||
if instr.vc_rc_bit() {
|
||
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
|
||
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
|
||
}
|
||
ctx.vr[instr.rd()] = v;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcmpgtuw => {
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; }
|
||
let v = xenia_types::Vec128::from_u32x4_array(r);
|
||
if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); }
|
||
ctx.vr[instr.rd()] = v;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcmpgtsw => {
|
||
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; }
|
||
let v = xenia_types::Vec128::from_u32x4_array(r);
|
||
if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); }
|
||
ctx.vr[instr.rd()] = v;
|
||
ctx.pc += 4;
|
||
}
|
||
// vcmpbfp(128): set upper/lower nibbles per lane based on bounds test.
|
||
PpcOpcode::vcmpbfp | PpcOpcode::vcmpbfp128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vcmpbfp128);
|
||
let (ra, rb, rd) = if is_128 {
|
||
(instr.va128(), instr.vb128(), instr.vd128())
|
||
} else {
|
||
(instr.ra(), instr.rb(), instr.rd())
|
||
};
|
||
let a = ctx.vr[ra].as_f32x4();
|
||
let b = ctx.vr[rb].as_f32x4();
|
||
let mut r = [0u32; 4];
|
||
let mut any_out = false;
|
||
for i in 0..4 {
|
||
let mut lane: u32 = 0;
|
||
if a[i].is_nan() || b[i].is_nan() || a[i] > b[i] { lane |= 0x8000_0000; any_out = true; }
|
||
if a[i].is_nan() || b[i].is_nan() || a[i] < -b[i] { lane |= 0x4000_0000; any_out = true; }
|
||
r[i] = lane;
|
||
}
|
||
let rc = if is_128 { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
|
||
if rc {
|
||
ctx.cr[6] = crate::context::CrField {
|
||
lt: false, gt: false, eq: !any_out, so: false,
|
||
};
|
||
}
|
||
ctx.vr[rd] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4d — VMX shifts and rotates
|
||
// ═════════════════════════════════════════════════════════════════
|
||
PpcOpcode::vslb => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = a[i] << (b[i] & 7); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsrb => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = a[i] >> (b[i] & 7); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsrab => {
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0i8; 16];
|
||
for i in 0..16 { r[i] = a[i] >> (b[i] & 7); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vrlb => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = a[i].rotate_left((b[i] & 7) as u32); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vslh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = a[i] << (b[i] & 0xF); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsrh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = a[i] >> (b[i] & 0xF); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsrah => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0i16; 8];
|
||
for i in 0..8 { r[i] = a[i] >> (b[i] & 0xF); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vrlh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = a[i].rotate_left((b[i] & 0xF) as u32); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
// vslw / vsrw / vsraw / vrlw (word shifts) are implemented above via
|
||
// vmx_reg_triple — skip here.
|
||
|
||
// Full 128-bit bit shifts (vsl/vsr): shift by the low 3 bits of vB[15].
|
||
PpcOpcode::vsl => {
|
||
let a = u128::from_be_bytes(ctx.vr[instr.ra()].as_bytes());
|
||
let shift = (ctx.vr[instr.rb()].as_bytes()[15] & 7) as u32;
|
||
let r = if shift == 0 { a } else { a << shift };
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r.to_be_bytes());
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsr => {
|
||
let a = u128::from_be_bytes(ctx.vr[instr.ra()].as_bytes());
|
||
let shift = (ctx.vr[instr.rb()].as_bytes()[15] & 7) as u32;
|
||
let r = if shift == 0 { a } else { a >> shift };
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r.to_be_bytes());
|
||
ctx.pc += 4;
|
||
}
|
||
// vslo/vsro: 128-bit octet (byte) shift. vB[15] & 0x78 gives bit count / 8 * 8.
|
||
PpcOpcode::vslo | PpcOpcode::vslo128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vslo128);
|
||
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
|
||
else { (instr.ra(), instr.rb(), instr.rd()) };
|
||
let a = u128::from_be_bytes(ctx.vr[ra].as_bytes());
|
||
let nbytes = ((ctx.vr[rb].as_bytes()[15] >> 3) & 0xF) as u32;
|
||
let r = if nbytes == 0 { a } else { a << (nbytes * 8) };
|
||
ctx.vr[rd] = xenia_types::Vec128::from_bytes(r.to_be_bytes());
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vsro | PpcOpcode::vsro128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vsro128);
|
||
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
|
||
else { (instr.ra(), instr.rb(), instr.rd()) };
|
||
let a = u128::from_be_bytes(ctx.vr[ra].as_bytes());
|
||
let nbytes = ((ctx.vr[rb].as_bytes()[15] >> 3) & 0xF) as u32;
|
||
let r = if nbytes == 0 { a } else { a >> (nbytes * 8) };
|
||
ctx.vr[rd] = xenia_types::Vec128::from_bytes(r.to_be_bytes());
|
||
ctx.pc += 4;
|
||
}
|
||
// vrlimi128: rotate-left-immediate then partial-merge into vD.
|
||
// Field layout (from canary ppc_decode_data.cc VX128_4):
|
||
// imm = bits(22..=23,28..=29) for shift, mask = bits(24..=27)
|
||
// Simplified semantics: r = vB rotated left by `shift` words, merged
|
||
// into vD using a per-word `mask` (mask bit N == 1 ⇒ use vD[N], else
|
||
// use rotated[N]). Titles generally use mask=0xF (copy-all) which
|
||
// makes this behave like a plain word rotate.
|
||
PpcOpcode::vrlimi128 => {
|
||
let shift = instr.vx128_4_z() as usize;
|
||
let mask = instr.vx128_4_imm();
|
||
let b = ctx.vr[instr.vb128()].as_u32x4();
|
||
let d = ctx.vr[instr.vd128()].as_u32x4();
|
||
let rot = [b[shift % 4], b[(shift + 1) % 4], b[(shift + 2) % 4], b[(shift + 3) % 4]];
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 {
|
||
// mask bit 3 corresponds to word 0 (BE-first). Use rot when
|
||
// the corresponding mask bit is set.
|
||
let use_rot = (mask >> (3 - i)) & 1 == 1;
|
||
r[i] = if use_rot { rot[i] } else { d[i] };
|
||
}
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4e — VMX merge (interleave high / low halves)
|
||
// ═════════════════════════════════════════════════════════════════
|
||
PpcOpcode::vmrghb => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..8 { r[2*i] = a[i]; r[2*i+1] = b[i]; }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmrglb => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..8 { r[2*i] = a[8+i]; r[2*i+1] = b[8+i]; }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmrghh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..4 { r[2*i] = a[i]; r[2*i+1] = b[i]; }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmrglh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..4 { r[2*i] = a[4+i]; r[2*i+1] = b[4+i]; }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4f — VMX pack / unpack (saturating and modulo + D3D + 5-6-5)
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// ---- Pack modulo (truncate) ----
|
||
PpcOpcode::vpkuhum | PpcOpcode::vpkuhum128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vpkuhum128);
|
||
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
|
||
else { (instr.ra(), instr.rb(), instr.rd()) };
|
||
let a = ctx.vr[ra].as_u16x8();
|
||
let b = ctx.vr[rb].as_u16x8();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..8 { r[i] = a[i] as u8; }
|
||
for i in 0..8 { r[8 + i] = b[i] as u8; }
|
||
ctx.vr[rd] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vpkuwum | PpcOpcode::vpkuwum128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vpkuwum128);
|
||
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
|
||
else { (instr.ra(), instr.rb(), instr.rd()) };
|
||
let a = ctx.vr[ra].as_u32x4();
|
||
let b = ctx.vr[rb].as_u32x4();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..4 { r[i] = a[i] as u16; }
|
||
for i in 0..4 { r[4 + i] = b[i] as u16; }
|
||
ctx.vr[rd] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
// ---- Pack with saturation ----
|
||
PpcOpcode::vpkuhus | PpcOpcode::vpkuhus128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vpkuhus128);
|
||
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
|
||
else { (instr.ra(), instr.rb(), instr.rd()) };
|
||
let a = ctx.vr[ra].as_u16x8();
|
||
let b = ctx.vr[rb].as_u16x8();
|
||
let mut r = [0u8; 16]; let mut sat = false;
|
||
for i in 0..8 { let (v, s) = crate::vmx::sat_u16_to_u8(a[i]); r[i] = v; sat |= s; }
|
||
for i in 0..8 { let (v, s) = crate::vmx::sat_u16_to_u8(b[i]); r[8 + i] = v; sat |= s; }
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[rd] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vpkshus | PpcOpcode::vpkshus128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vpkshus128);
|
||
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
|
||
else { (instr.ra(), instr.rb(), instr.rd()) };
|
||
let a = crate::vmx::as_i16x8(ctx.vr[ra]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[rb]);
|
||
let mut r = [0u8; 16]; let mut sat = false;
|
||
for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_u8(a[i]); r[i] = v; sat |= s; }
|
||
for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_u8(b[i]); r[8 + i] = v; sat |= s; }
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[rd] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vpkshss | PpcOpcode::vpkshss128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vpkshss128);
|
||
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
|
||
else { (instr.ra(), instr.rb(), instr.rd()) };
|
||
let a = crate::vmx::as_i16x8(ctx.vr[ra]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[rb]);
|
||
let mut r = [0i8; 16]; let mut sat = false;
|
||
for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_i8(a[i]); r[i] = v; sat |= s; }
|
||
for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_i8(b[i]); r[8 + i] = v; sat |= s; }
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[rd] = crate::vmx::from_i8x16(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vpkuwus | PpcOpcode::vpkuwus128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vpkuwus128);
|
||
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
|
||
else { (instr.ra(), instr.rb(), instr.rd()) };
|
||
let a = ctx.vr[ra].as_u32x4();
|
||
let b = ctx.vr[rb].as_u32x4();
|
||
let mut r = [0u16; 8]; let mut sat = false;
|
||
for i in 0..4 { let (v, s) = crate::vmx::sat_u32_to_u16(a[i]); r[i] = v; sat |= s; }
|
||
for i in 0..4 { let (v, s) = crate::vmx::sat_u32_to_u16(b[i]); r[4 + i] = v; sat |= s; }
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[rd] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vpkswus | PpcOpcode::vpkswus128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vpkswus128);
|
||
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
|
||
else { (instr.ra(), instr.rb(), instr.rd()) };
|
||
let a = crate::vmx::as_i32x4(ctx.vr[ra]);
|
||
let b = crate::vmx::as_i32x4(ctx.vr[rb]);
|
||
let mut r = [0u16; 8]; let mut sat = false;
|
||
for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_u16(a[i]); r[i] = v; sat |= s; }
|
||
for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_u16(b[i]); r[4 + i] = v; sat |= s; }
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[rd] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vpkswss | PpcOpcode::vpkswss128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vpkswss128);
|
||
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
|
||
else { (instr.ra(), instr.rb(), instr.rd()) };
|
||
let a = crate::vmx::as_i32x4(ctx.vr[ra]);
|
||
let b = crate::vmx::as_i32x4(ctx.vr[rb]);
|
||
let mut r = [0i16; 8]; let mut sat = false;
|
||
for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_i16(a[i]); r[i] = v; sat |= s; }
|
||
for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_i16(b[i]); r[4 + i] = v; sat |= s; }
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[rd] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
// vpkpx: pack two u32 vectors into one u16 (5-5-5 pixel) vector.
|
||
PpcOpcode::vpkpx => {
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..4 { r[i] = crate::vmx::pack_pixel_555(a[i]); }
|
||
for i in 0..4 { r[4 + i] = crate::vmx::pack_pixel_555(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ---- Unpack (sign-extend) ----
|
||
PpcOpcode::vupkhsb | PpcOpcode::vupkhsb128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vupkhsb128);
|
||
let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) }
|
||
else { (instr.rb(), instr.rd()) };
|
||
let b = crate::vmx::as_i8x16(ctx.vr[rb]);
|
||
let mut r = [0i16; 8];
|
||
for i in 0..8 { r[i] = b[i] as i16; }
|
||
ctx.vr[rd] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vupklsb | PpcOpcode::vupklsb128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vupklsb128);
|
||
let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) }
|
||
else { (instr.rb(), instr.rd()) };
|
||
let b = crate::vmx::as_i8x16(ctx.vr[rb]);
|
||
let mut r = [0i16; 8];
|
||
for i in 0..8 { r[i] = b[8 + i] as i16; }
|
||
ctx.vr[rd] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vupkhsh => {
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4];
|
||
for i in 0..4 { r[i] = b[i] as i32; }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vupklsh => {
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4];
|
||
for i in 0..4 { r[i] = b[4 + i] as i32; }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vupkhpx => {
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = crate::vmx::unpack_pixel_555(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vupklpx => {
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = crate::vmx::unpack_pixel_555(b[4 + i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ---- D3D pack / unpack (VMX128-only) ----
|
||
//
|
||
// First-Pixels M3: fixed immediate extraction + added pack types
|
||
// 1-6. The prior `(instr.raw >> 6) & 0x7` was LSB-numbered (wrong
|
||
// position) and masked to only 3 bits. Canary extracts from the
|
||
// VX128_3/4 `IMM` field at PPC bits 16-22 (MSB) and does
|
||
// `type = IMM >> 2` to pick up the 5-bit type selector — the low
|
||
// 2 bits (`pack`) select output-slot layout for `vpkd3d128`.
|
||
PpcOpcode::vpkd3d128 => {
|
||
use crate::vmx::D3dPackType;
|
||
let uimm = crate::decoder::extract_vx128_uimm5(instr.raw);
|
||
let pack = (uimm & 3) as usize;
|
||
let shift = instr.vx128_4_z() as usize;
|
||
let ty = D3dPackType::from_immediate(uimm >> 2);
|
||
let src = ctx.vr[instr.vb128()];
|
||
let out = match ty {
|
||
D3dPackType::D3dColor => crate::vmx::pack_d3dcolor(src),
|
||
D3dPackType::NormShort2 => crate::vmx::pack_normshort2(src),
|
||
D3dPackType::NormPacked32 => crate::vmx::pack_normpacked32(src),
|
||
D3dPackType::Float16_2 => crate::vmx::pack_float16_2(src),
|
||
D3dPackType::NormShort4 => crate::vmx::pack_normshort4(src),
|
||
D3dPackType::Float16_4 => crate::vmx::pack_float16_4(src),
|
||
D3dPackType::NormPacked64 => crate::vmx::pack_normpacked64(src),
|
||
D3dPackType::Other(t) => {
|
||
tracing::warn!(
|
||
raw = format_args!("{:#010x}", instr.raw),
|
||
uimm,
|
||
ty = t,
|
||
"vpkd3d128: unhandled pack type at {:#010x}",
|
||
ctx.pc,
|
||
);
|
||
src
|
||
}
|
||
};
|
||
// Post-pack permutation: merge packed `out` into previous `vd`
|
||
// per canary ppc_emit_altivec.cc:2126-2188 MakePermuteMask tables.
|
||
// MakePermuteMask(r0,l0, r1,l1, r2,l2, r3,l3): result[i] = if ri==0 { prev[li] } else { out[li] }
|
||
let result = if pack == 0 {
|
||
out
|
||
} else {
|
||
// (source_reg, lane): 0=prev vd, 1=packed out
|
||
const PERM: [[[(u8, u8); 4]; 4]; 3] = [
|
||
// pack=1 (VPACK_32): places out[3] at lane (3-shift)
|
||
[[(0,0),(0,1),(0,2),(1,3)], [(0,0),(0,1),(1,3),(0,3)],
|
||
[(0,0),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]],
|
||
// pack=2 (64-bit): places out[2..3] at lanes (2-shift)..(3-shift)
|
||
[[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)],
|
||
[(1,2),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]],
|
||
// pack=3 (64-bit): same as pack=2 except shift=3 selects out[2] at lane 3
|
||
[[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)],
|
||
[(1,2),(1,3),(0,2),(0,3)], [(0,0),(0,1),(0,2),(1,2)]],
|
||
];
|
||
let prev = ctx.vr[instr.vd128()];
|
||
let pw = prev.as_u32x4();
|
||
let ow = out.as_u32x4();
|
||
let sel = PERM[pack - 1][shift];
|
||
xenia_types::Vec128::from_u32x4_array([
|
||
if sel[0].0 == 0 { pw[sel[0].1 as usize] } else { ow[sel[0].1 as usize] },
|
||
if sel[1].0 == 0 { pw[sel[1].1 as usize] } else { ow[sel[1].1 as usize] },
|
||
if sel[2].0 == 0 { pw[sel[2].1 as usize] } else { ow[sel[2].1 as usize] },
|
||
if sel[3].0 == 0 { pw[sel[3].1 as usize] } else { ow[sel[3].1 as usize] },
|
||
])
|
||
};
|
||
ctx.vr[instr.vd128()] = result;
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vupkd3d128 => {
|
||
use crate::vmx::D3dPackType;
|
||
let uimm = crate::decoder::extract_vx128_uimm5(instr.raw);
|
||
let ty = D3dPackType::from_immediate(uimm >> 2);
|
||
let src = ctx.vr[instr.vb128()];
|
||
let out = match ty {
|
||
D3dPackType::D3dColor => crate::vmx::unpack_d3dcolor(src),
|
||
D3dPackType::NormShort2 => crate::vmx::unpack_normshort2(src),
|
||
D3dPackType::NormPacked32 => crate::vmx::unpack_normpacked32(src),
|
||
D3dPackType::Float16_2 => crate::vmx::unpack_float16_2(src),
|
||
D3dPackType::NormShort4 => crate::vmx::unpack_normshort4(src),
|
||
D3dPackType::Float16_4 => crate::vmx::unpack_float16_4(src),
|
||
D3dPackType::NormPacked64 => crate::vmx::unpack_normpacked64(src),
|
||
D3dPackType::Other(t) => {
|
||
tracing::warn!(
|
||
raw = format_args!("{:#010x}", instr.raw),
|
||
uimm,
|
||
ty = t,
|
||
"vupkd3d128: unhandled pack type at {:#010x}",
|
||
ctx.pc,
|
||
);
|
||
src
|
||
}
|
||
};
|
||
ctx.vr[instr.vd128()] = out;
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4g — VMX convert (float ↔ fixed-point)
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// vctsxs / vctuxs: f32 → i32/u32, scaled by 2^uimm, saturating.
|
||
PpcOpcode::vctsxs => {
|
||
let uimm = (instr.raw >> 16) & 0x1F;
|
||
let b = ctx.vr[instr.rb()].as_f32x4();
|
||
let mut r = [0i32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let (v, s) = crate::vmx::cvt_f32_to_i32_sat(b[i], uimm);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vctuxs => {
|
||
let uimm = (instr.raw >> 16) & 0x1F;
|
||
let b = ctx.vr[instr.rb()].as_f32x4();
|
||
let mut r = [0u32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let (v, s) = crate::vmx::cvt_f32_to_u32_sat(b[i], uimm);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
// vcfsx / vcfux: i32/u32 → f32, scaled by 2^-uimm.
|
||
PpcOpcode::vcfsx => {
|
||
let uimm = (instr.raw >> 16) & 0x1F;
|
||
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = crate::vmx::cvt_i32_to_f32(b[i], uimm); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcfux => {
|
||
let uimm = (instr.raw >> 16) & 0x1F;
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = crate::vmx::cvt_u32_to_f32(b[i], uimm); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
// VMX128 convert variants. uimm lives in bits 16-20 of the encoded form.
|
||
PpcOpcode::vcfpsxws128 => {
|
||
let uimm = (instr.raw >> 16) & 0x1F;
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let mut r = [0i32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let (v, s) = crate::vmx::cvt_f32_to_i32_sat(b[i], uimm);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.vd128()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcfpuxws128 => {
|
||
let uimm = (instr.raw >> 16) & 0x1F;
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let mut r = [0u32; 4]; let mut sat = false;
|
||
for i in 0..4 {
|
||
let (v, s) = crate::vmx::cvt_f32_to_u32_sat(b[i], uimm);
|
||
r[i] = v; sat |= s;
|
||
}
|
||
if sat { ctx.set_vscr_sat(true); }
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcsxwfp128 => {
|
||
let uimm = (instr.raw >> 16) & 0x1F;
|
||
let b = crate::vmx::as_i32x4(ctx.vr[instr.vb128()]);
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = crate::vmx::cvt_i32_to_f32(b[i], uimm); }
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vcuxwfp128 => {
|
||
let uimm = (instr.raw >> 16) & 0x1F;
|
||
let b = ctx.vr[instr.vb128()].as_u32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = crate::vmx::cvt_u32_to_f32(b[i], uimm); }
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4h — VMX vector FPU (exp / log)
|
||
// ═════════════════════════════════════════════════════════════════
|
||
PpcOpcode::vexptefp | PpcOpcode::vexptefp128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vexptefp128);
|
||
let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) }
|
||
else { (instr.rb(), instr.rd()) };
|
||
let b = ctx.vr[rb].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = b[i].exp2(); }
|
||
ctx.vr[rd] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vlogefp | PpcOpcode::vlogefp128 => {
|
||
let is_128 = matches!(instr.opcode, PpcOpcode::vlogefp128);
|
||
let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) }
|
||
else { (instr.rb(), instr.rd()) };
|
||
let b = ctx.vr[rb].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 { r[i] = b[i].log2(); }
|
||
ctx.vr[rd] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4i — VMX integer max / min
|
||
// ═════════════════════════════════════════════════════════════════
|
||
PpcOpcode::vmaxub => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = a[i].max(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vminub => {
|
||
let a = ctx.vr[instr.ra()].as_bytes();
|
||
let b = ctx.vr[instr.rb()].as_bytes();
|
||
let mut r = [0u8; 16];
|
||
for i in 0..16 { r[i] = a[i].min(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmaxsb => {
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
|
||
let mut r = [0i8; 16];
|
||
for i in 0..16 { r[i] = a[i].max(b[i]); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vminsb => {
|
||
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
|
||
let mut r = [0i8; 16];
|
||
for i in 0..16 { r[i] = a[i].min(b[i]); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmaxuh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = a[i].max(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vminuh => {
|
||
let a = ctx.vr[instr.ra()].as_u16x8();
|
||
let b = ctx.vr[instr.rb()].as_u16x8();
|
||
let mut r = [0u16; 8];
|
||
for i in 0..8 { r[i] = a[i].min(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmaxsh => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let mut r = [0i16; 8];
|
||
for i in 0..8 { r[i] = a[i].max(b[i]); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vminsh => {
|
||
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
|
||
let mut r = [0i16; 8];
|
||
for i in 0..8 { r[i] = a[i].min(b[i]); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmaxuw => {
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = a[i].max(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vminuw => {
|
||
let a = ctx.vr[instr.ra()].as_u32x4();
|
||
let b = ctx.vr[instr.rb()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
for i in 0..4 { r[i] = a[i].min(b[i]); }
|
||
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmaxsw => {
|
||
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4];
|
||
for i in 0..4 { r[i] = a[i].max(b[i]); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vminsw => {
|
||
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
|
||
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
|
||
let mut r = [0i32; 4];
|
||
for i in 0..4 { r[i] = a[i].min(b[i]); }
|
||
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4j — VMX128 FMA / permute
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// vmaddcfp128: ISA (VD) <- (VA × VD) + VB — same operation as vmaddfp128
|
||
PpcOpcode::vmaddcfp128 => {
|
||
// ISA: (VD) <- (VA × VD) + VB. Canary InstrEmit_vmaddcfp128 (cc:819): MulAdd(VA, VD, VB).
|
||
// Previous code computed di.mul_add(bi, ai) = VD×VB+VA — both operands wrong
|
||
// (PPCBUG-425). Fix: ai.mul_add(di, bi) = VA×VD+VB.
|
||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let d = ctx.vr[instr.vd128()].as_f32x4();
|
||
let mut r = [0f32; 4];
|
||
for i in 0..4 {
|
||
let ai = vmx::flush_denorm(a[i]);
|
||
let bi = vmx::flush_denorm(b[i]);
|
||
let di = vmx::flush_denorm(d[i]);
|
||
// PPCBUG-437: flush subnormal output too.
|
||
r[i] = vmx::flush_denorm(ai.mul_add(di, bi));
|
||
}
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
// vmsum3fp128: horizontal sum of (vA * vB) over lanes 0..3, broadcast to all 4 output lanes.
|
||
// Canary `InstrEmit_vmsum3fp128` flushes the *output* denormal
|
||
// unconditionally (not the inputs) — see ppc_emit_altivec.cc:1067-1075.
|
||
PpcOpcode::vmsum3fp128 => {
|
||
// PPCBUG-436: flush per-product intermediates (not just the final sum).
|
||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let p0 = vmx::flush_denorm(a[0] * b[0]);
|
||
let p1 = vmx::flush_denorm(a[1] * b[1]);
|
||
let p2 = vmx::flush_denorm(a[2] * b[2]);
|
||
let s = vmx::flush_denorm(p0 + p1 + p2);
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s);
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::vmsum4fp128 => {
|
||
// PPCBUG-436.
|
||
let a = ctx.vr[instr.va128()].as_f32x4();
|
||
let b = ctx.vr[instr.vb128()].as_f32x4();
|
||
let p0 = vmx::flush_denorm(a[0] * b[0]);
|
||
let p1 = vmx::flush_denorm(a[1] * b[1]);
|
||
let p2 = vmx::flush_denorm(a[2] * b[2]);
|
||
let p3 = vmx::flush_denorm(a[3] * b[3]);
|
||
let s = vmx::flush_denorm(p0 + p1 + p2 + p3);
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s);
|
||
ctx.pc += 4;
|
||
}
|
||
// vpermwi128: permute words of vB using an 8-bit immediate (2 bits per output lane).
|
||
PpcOpcode::vpermwi128 => {
|
||
let imm = instr.vx128_p_perm();
|
||
let b = ctx.vr[instr.vb128()].as_u32x4();
|
||
let mut r = [0u32; 4];
|
||
// Output lane i ← b[(imm >> (2 * (3-i))) & 3]
|
||
for i in 0..4 {
|
||
let sel = ((imm >> (2 * (3 - i))) & 3) as usize;
|
||
r[i] = b[sel];
|
||
}
|
||
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4_array(r);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4k — Scalar reservation / byte-reverse (doubleword)
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// M3.7 — same table-vs-legacy split as lwarx/stwcx.
|
||
// PPCBUG-108: ldarx + stdcx. have the same cross-thread atomicity
|
||
// limitation as lwarx/stwcx. in the legacy per-ctx fallback path.
|
||
// See the lwarx block comment for the full explanation. The M3
|
||
// scheduler must enable `ReservationTable` before spawning a second
|
||
// host thread. stdcx. carries the debug_assert (see below).
|
||
PpcOpcode::ldarx => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
let val = mem.read_u64(ea);
|
||
ctx.gpr[instr.rd()] = val;
|
||
ctx.reserved_line = ea & !RESERVATION_MASK;
|
||
ctx.reserved_val = val;
|
||
ctx.has_reservation = true;
|
||
ctx.reservation_width = 8; // PPCBUG-151: doubleword reservation
|
||
if let Some(t) = &ctx.reservation_table {
|
||
if t.is_enabled() {
|
||
ctx.reserved_generation = t.reserve(ea, ctx.hw_id);
|
||
}
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
// PPCBUG-108: see ldarx comment above. stdcx. legacy path cannot observe
|
||
// cross-thread reservation invalidations; only safe in lockstep mode.
|
||
PpcOpcode::stdcx => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
let line = ea & !RESERVATION_MASK;
|
||
let table_route = ctx
|
||
.reservation_table
|
||
.as_ref()
|
||
.filter(|t| t.is_enabled())
|
||
.cloned();
|
||
// PPCBUG-151: stdcx. requires a doubleword (ldarx) reservation;
|
||
// a word (lwarx) reservation must not commit here.
|
||
let width_ok = ctx.reservation_width == 8;
|
||
let success = if let Some(t) = &table_route {
|
||
ctx.has_reservation
|
||
&& width_ok
|
||
&& ctx.reserved_line == line
|
||
&& t.try_commit(ea, ctx.reserved_generation, ctx.hw_id)
|
||
} else {
|
||
// Legacy per-ctx path (M2 default / lockstep).
|
||
// PPCBUG-108: same sentinel as stwcx. — fires on non-primary
|
||
// HW slots if the table is disabled under --parallel.
|
||
debug_assert!(
|
||
ctx.hw_id == 0,
|
||
"PPCBUG-108: legacy per-ctx stdcx. on non-primary HW slot \
|
||
(hw_id={}) — ReservationTable must be enabled under --parallel",
|
||
ctx.hw_id
|
||
);
|
||
ctx.has_reservation && width_ok && ctx.reserved_line == line
|
||
};
|
||
if success {
|
||
mem.write_u64(ea, ctx.gpr[instr.rs()]);
|
||
ctx.cr[0] = crate::context::CrField {
|
||
lt: false,
|
||
gt: false,
|
||
eq: true,
|
||
so: ctx.xer_so != 0,
|
||
};
|
||
} else {
|
||
ctx.cr[0] = crate::context::CrField {
|
||
lt: false,
|
||
gt: false,
|
||
eq: false,
|
||
so: ctx.xer_so != 0,
|
||
};
|
||
if let Some(t) = &table_route {
|
||
t.release(ea, ctx.reserved_generation, ctx.hw_id);
|
||
}
|
||
}
|
||
ctx.has_reservation = false;
|
||
ctx.reservation_width = 0; // PPCBUG-151: always clear on exit
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::ldbrx => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
ctx.gpr[instr.rd()] = mem.read_u64(ea).swap_bytes();
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stdbrx => {
|
||
let ea = ea_indexed(ctx, instr);
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() { t.invalidate_for_write(ea); }
|
||
}
|
||
mem.write_u64(ea, ctx.gpr[instr.rs()].swap_bytes());
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4l — Scalar string load / store (register-length)
|
||
// ═════════════════════════════════════════════════════════════════
|
||
PpcOpcode::lswx => {
|
||
let mut ea = ea_indexed(ctx, instr);
|
||
let nb = ctx.xer() & 0x7F; // XER[25..31]
|
||
let mut rd = instr.rd();
|
||
let mut bytes_left = nb;
|
||
while bytes_left > 0 {
|
||
let mut val = 0u32;
|
||
for byte_idx in 0..4 {
|
||
if bytes_left == 0 { break; }
|
||
let b = mem.read_u8(ea) as u32;
|
||
val |= b << (24 - byte_idx * 8);
|
||
ea = ea.wrapping_add(1);
|
||
bytes_left -= 1;
|
||
}
|
||
ctx.gpr[rd] = val as u64;
|
||
rd = (rd + 1) % 32;
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
PpcOpcode::stswx => {
|
||
let mut ea = ea_indexed(ctx, instr);
|
||
let nb = ctx.xer() & 0x7F;
|
||
let mut rs = instr.rs();
|
||
let mut bytes_left = nb;
|
||
if nb > 0 {
|
||
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
|
||
if t.has_active_reservers() {
|
||
let first_line = ea & !RESERVATION_MASK;
|
||
let last_line = ea.wrapping_add(nb - 1) & !RESERVATION_MASK;
|
||
t.invalidate_for_write(first_line);
|
||
if last_line != first_line { t.invalidate_for_write(last_line); }
|
||
}
|
||
}
|
||
}
|
||
while bytes_left > 0 {
|
||
let val = ctx.gpr[rs] as u32;
|
||
for byte_idx in 0..4 {
|
||
if bytes_left == 0 { break; }
|
||
mem.write_u8(ea, (val >> (24 - byte_idx * 8)) as u8);
|
||
ea = ea.wrapping_add(1);
|
||
bytes_left -= 1;
|
||
}
|
||
rs = (rs + 1) % 32;
|
||
}
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// §4m — mcrxr: move XER condition bits to CR field, clear XER[SO/OV/CA]
|
||
// ═════════════════════════════════════════════════════════════════
|
||
PpcOpcode::mcrxr => {
|
||
let crfd = instr.crfd();
|
||
ctx.cr[crfd] = crate::context::CrField {
|
||
lt: ctx.xer_so != 0,
|
||
gt: ctx.xer_ov != 0,
|
||
eq: ctx.xer_ca != 0,
|
||
so: false,
|
||
};
|
||
ctx.xer_so = 0;
|
||
ctx.xer_ov = 0;
|
||
ctx.xer_ca = 0;
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// ═════════════════════════════════════════════════════════════════
|
||
// mcrfs — move FPSCR field to CR field and clear corresponding
|
||
// FPSCR exception bits. CR field crfD ← FPSCR[(crfS*4)..(crfS*4+3)]
|
||
// and then FPSCR bits in that nibble that are exception bits are
|
||
// cleared (FX, OX, UX, ZX, XX, VXSNAN, VXISI, VXIDI, VXZDZ, VXIMZ,
|
||
// VXVC, VXSOFT, VXSQRT, VXCVI are cleared; FEX/VX are read-only
|
||
// summaries and are recomputed later).
|
||
// ═════════════════════════════════════════════════════════════════
|
||
PpcOpcode::mcrfs => {
|
||
let crfd = instr.crfd();
|
||
let crfs = instr.crfs();
|
||
let shift = 28 - (crfs as u32 * 4);
|
||
let nibble = ((ctx.fpscr >> shift) & 0xF) as u8;
|
||
ctx.cr[crfd] = crate::context::CrField::from_u8(nibble);
|
||
// Clearable exception bits: 0 (FX), 3 (OX), 4 (UX), 5 (ZX),
|
||
// 6 (XX), 7 (VXSNAN), 8 (VXISI), 9 (VXIDI), 10 (VXZDZ),
|
||
// 11 (VXIMZ), 12 (VXVC), 21 (VXSOFT), 22 (VXSQRT), 23 (VXCVI).
|
||
// (Bit positions are PowerISA MSB-0; here 'FPSCR bit n' means
|
||
// the bit at (31-n) in our little-endian u32.)
|
||
const CLEARABLE_MASK: u32 =
|
||
(1 << 31) | (1 << (31 - 3)) | (1 << (31 - 4)) |
|
||
(1 << (31 - 5)) | (1 << (31 - 6)) | (1 << (31 - 7)) |
|
||
(1 << (31 - 8)) | (1 << (31 - 9)) | (1 << (31 - 10)) |
|
||
(1 << (31 - 11)) | (1 << (31 - 12)) |
|
||
(1 << (31 - 21)) | (1 << (31 - 22)) | (1 << (31 - 23));
|
||
let nibble_mask = 0xFu32 << shift;
|
||
ctx.fpscr &= !(nibble_mask & CLEARABLE_MASK);
|
||
ctx.pc += 4;
|
||
}
|
||
|
||
// Anything not yet implemented
|
||
_ => {
|
||
tracing::warn!("Unimplemented opcode at {:#010x}: {:?} [{:08X}]", ctx.pc, instr.opcode, instr.raw);
|
||
ctx.pc += 4;
|
||
return StepResult::Unimplemented(instr.opcode);
|
||
}
|
||
}
|
||
StepResult::Continue
|
||
}
|
||
|
||
/// Compute an X-form indexed effective address: EA = (rA==0 ? 0 : GPR[rA]) + GPR[rB].
|
||
#[inline]
|
||
fn ea_indexed(ctx: &PpcContext, instr: &DecodedInstr) -> u32 {
|
||
let a = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||
a.wrapping_add(ctx.gpr[instr.rb()]) as u32
|
||
}
|
||
|
||
/// Helper for CR logical operations.
|
||
fn cr_logical(ctx: &mut PpcContext, instr: &DecodedInstr, op: fn(bool, bool) -> bool) {
|
||
let a = ctx.get_cr_bit(instr.crba());
|
||
let b = ctx.get_cr_bit(instr.crbb());
|
||
ctx.set_cr_bit(instr.crbd(), op(a, b));
|
||
}
|
||
|
||
/// Generate 32-bit rotate mask for rlwinm/rlwimi/rlwnm.
|
||
fn rlw_mask(mb: u32, me: u32) -> u32 {
|
||
if mb <= me {
|
||
(u32::MAX >> mb) & (u32::MAX << (31 - me))
|
||
} else {
|
||
(u32::MAX >> mb) | (u32::MAX << (31 - me))
|
||
}
|
||
}
|
||
|
||
/// Generate 64-bit mask clearing bits 0..mb-1 (left mask for rldicl).
|
||
fn rld_mask_left(mb: u32) -> u64 {
|
||
if mb == 0 { u64::MAX } else { u64::MAX >> mb }
|
||
}
|
||
|
||
/// Generate 64-bit mask clearing bits me+1..63 (right mask for rldicr).
|
||
fn rld_mask_right(me: u32) -> u64 {
|
||
if me >= 63 { u64::MAX } else { u64::MAX << (63 - me) }
|
||
}
|
||
|
||
/// Extract VMX register indices, handling both standard (opcode 4) and 128-bit forms.
|
||
#[inline]
|
||
fn vmx_reg_triple(instr: &DecodedInstr) -> (usize, usize, usize) {
|
||
// Check if this is a VMX128 form (opcode 4 with extended register fields)
|
||
// Standard Altivec: vD=rd, vA=ra, vB=rb
|
||
// VMX128: vD=vd128, vA=va128, vB=vb128
|
||
let is_128 = matches!(
|
||
instr.opcode,
|
||
PpcOpcode::vand128 | PpcOpcode::vandc128 | PpcOpcode::vor128 |
|
||
PpcOpcode::vxor128 | PpcOpcode::vnor128 | PpcOpcode::vsel128 |
|
||
PpcOpcode::vcmpeqfp128 | PpcOpcode::vcmpgefp128 | PpcOpcode::vcmpgtfp128 |
|
||
PpcOpcode::vmrghw128 | PpcOpcode::vmrglw128 |
|
||
PpcOpcode::vslw128 | PpcOpcode::vsrw128 | PpcOpcode::vsraw128 | PpcOpcode::vrlw128 |
|
||
PpcOpcode::vcmpequw128
|
||
);
|
||
if is_128 {
|
||
(instr.va128(), instr.vb128(), instr.vd128())
|
||
} else {
|
||
(instr.ra(), instr.rb(), instr.rd())
|
||
}
|
||
}
|
||
|
||
/// Update CR6 from vector compare result mask (used when Rc=1 on vector compares).
|
||
/// CR6: bit 0 (LT) = all elements true, bit 2 (EQ) = all elements false
|
||
#[inline]
|
||
fn update_cr6_from_vmask(r: &[u32; 4], ctx: &mut PpcContext) {
|
||
let all_true = r.iter().all(|&v| v == 0xFFFF_FFFF);
|
||
let all_false = r.iter().all(|&v| v == 0);
|
||
ctx.cr[6].lt = all_true;
|
||
ctx.cr[6].gt = false;
|
||
ctx.cr[6].eq = all_false;
|
||
ctx.cr[6].so = false;
|
||
}
|
||
|
||
/// Round a double to single precision and back (matches xenia's ToSingle).
|
||
#[inline]
|
||
/// Round an f64 to single precision, honouring FPSCR[RN].
|
||
fn to_single(ctx: &PpcContext, val: f64) -> f64 {
|
||
fpscr::round_to_single(ctx, val)
|
||
}
|
||
|
||
/// Update CR1 from FPSCR (used when Rc=1 on FPU instructions).
|
||
/// CR1 = FPSCR[FX, FEX, VX, OX] (bits 0-3).
|
||
#[inline]
|
||
fn update_cr1_from_fpscr(ctx: &mut PpcContext) {
|
||
fpscr::update_cr1(ctx);
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
/// Simple test memory (64KB). Backed by `Box<[Cell<u8>]>` so the
|
||
/// MemoryAccess writes can take `&self`.
|
||
struct TestMem {
|
||
data: Box<[std::cell::Cell<u8>]>,
|
||
}
|
||
|
||
impl TestMem {
|
||
fn new() -> Self {
|
||
Self {
|
||
data: (0..65536u32).map(|_| std::cell::Cell::new(0)).collect(),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl MemoryAccess for TestMem {
|
||
fn read_u8(&self, addr: u32) -> u8 { self.data[addr as usize].get() }
|
||
fn read_u16(&self, addr: u32) -> u16 {
|
||
let a = addr as usize;
|
||
u16::from_be_bytes([self.data[a].get(), self.data[a+1].get()])
|
||
}
|
||
fn read_u32(&self, addr: u32) -> u32 {
|
||
let a = addr as usize;
|
||
u32::from_be_bytes([
|
||
self.data[a].get(), self.data[a+1].get(),
|
||
self.data[a+2].get(), self.data[a+3].get(),
|
||
])
|
||
}
|
||
fn read_u64(&self, addr: u32) -> u64 {
|
||
let a = addr as usize;
|
||
u64::from_be_bytes([
|
||
self.data[a].get(), self.data[a+1].get(),
|
||
self.data[a+2].get(), self.data[a+3].get(),
|
||
self.data[a+4].get(), self.data[a+5].get(),
|
||
self.data[a+6].get(), self.data[a+7].get(),
|
||
])
|
||
}
|
||
fn write_u8(&self, addr: u32, val: u8) { self.data[addr as usize].set(val); }
|
||
fn write_u16(&self, addr: u32, val: u16) {
|
||
let a = addr as usize;
|
||
let bytes = val.to_be_bytes();
|
||
self.data[a].set(bytes[0]);
|
||
self.data[a+1].set(bytes[1]);
|
||
}
|
||
fn write_u32(&self, addr: u32, val: u32) {
|
||
let a = addr as usize;
|
||
let bytes = val.to_be_bytes();
|
||
for (i, b) in bytes.iter().enumerate() {
|
||
self.data[a+i].set(*b);
|
||
}
|
||
}
|
||
fn write_u64(&self, addr: u32, val: u64) {
|
||
let a = addr as usize;
|
||
let bytes = val.to_be_bytes();
|
||
for (i, b) in bytes.iter().enumerate() {
|
||
self.data[a+i].set(*b);
|
||
}
|
||
}
|
||
fn translate(&self, _addr: u32) -> Option<*const u8> { None }
|
||
fn translate_mut(&self, _addr: u32) -> Option<*mut u8> { None }
|
||
}
|
||
|
||
fn write_instr(mem: &TestMem, addr: u32, raw: u32) {
|
||
mem.write_u32(addr, raw);
|
||
}
|
||
|
||
#[test]
|
||
fn test_addi() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// addi r3, r0, 42
|
||
write_instr(&mut mem, 0, (14 << 26) | (3 << 21) | (0 << 16) | 42);
|
||
ctx.pc = 0;
|
||
let result = step(&mut ctx, &mut mem);
|
||
assert_eq!(result, StepResult::Continue);
|
||
assert_eq!(ctx.gpr[3], 42);
|
||
assert_eq!(ctx.pc, 4);
|
||
}
|
||
|
||
#[test]
|
||
fn test_addis() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// addis r3, r0, 1 => r3 = 0x10000
|
||
write_instr(&mut mem, 0, (15 << 26) | (3 << 21) | (0 << 16) | 1);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[3], 0x10000);
|
||
}
|
||
|
||
#[test]
|
||
fn test_lwz_stw() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// Store 0xDEADBEEF at address 0x100
|
||
mem.write_u32(0x100, 0xDEADBEEF);
|
||
// addi r1, r0, 0x100
|
||
write_instr(&mut mem, 0, (14 << 26) | (1 << 21) | (0 << 16) | 0x100);
|
||
// lwz r3, 0(r1)
|
||
write_instr(&mut mem, 4, (32 << 26) | (3 << 21) | (1 << 16) | 0);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[3], 0xDEADBEEF);
|
||
}
|
||
|
||
#[test]
|
||
fn test_branch() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// b +0x10 (from addr 0x100)
|
||
write_instr(&mut mem, 0x100, (18 << 26) | (4 << 2)); // LI=4, shifted=0x10
|
||
ctx.pc = 0x100;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.pc, 0x110);
|
||
}
|
||
|
||
#[test]
|
||
fn test_bl_updates_lr() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// bl +0x10 (from addr 0x200)
|
||
write_instr(&mut mem, 0x200, (18 << 26) | (4 << 2) | 1); // LK=1
|
||
ctx.pc = 0x200;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.pc, 0x210);
|
||
assert_eq!(ctx.lr, 0x204);
|
||
}
|
||
|
||
#[test]
|
||
fn test_cmp_and_bc() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 10;
|
||
// cmpi cr0, 0, r3, 10 (32-bit compare)
|
||
write_instr(&mut mem, 0, (11 << 26) | (0 << 23) | (0 << 21) | (3 << 16) | (10u32 & 0xFFFF));
|
||
// bc 12,2,+8 (branch if CR0.EQ, bo=12, bi=2)
|
||
write_instr(&mut mem, 4, (16 << 26) | (12 << 21) | (2 << 16) | (2 << 2));
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem); // cmpi
|
||
assert!(ctx.cr[0].eq);
|
||
step(&mut ctx, &mut mem); // bc - should branch
|
||
assert_eq!(ctx.pc, 12); // 4 + 8
|
||
}
|
||
|
||
#[test]
|
||
fn test_rlwinm() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0xFF00_FF00;
|
||
// rlwinm r4, r3, 8, 0, 31 (rotate left 8, full mask = shift left 8)
|
||
let raw = (21 << 26) | (3 << 21) | (4 << 16) | (8 << 11) | (0 << 6) | (31 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[4], 0x00FF_00FF);
|
||
}
|
||
|
||
#[test]
|
||
fn test_ori_nop() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// ori r0, r0, 0 (NOP)
|
||
write_instr(&mut mem, 0, 0x60000000);
|
||
ctx.pc = 0;
|
||
ctx.gpr[0] = 0xDEAD;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[0], 0xDEAD);
|
||
assert_eq!(ctx.pc, 4);
|
||
}
|
||
|
||
#[test]
|
||
fn test_fadd() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = 3.14;
|
||
ctx.fpr[2] = 2.86;
|
||
// fadd f3, f1, f2: opcode 63, subop 21 (bits 1-5), frD=3, frA=1, frB=2
|
||
// 63<<26 | 3<<21 | 1<<16 | 2<<11 | 21<<1
|
||
let raw = (63 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!((ctx.fpr[3] - 6.0).abs() < 1e-10);
|
||
}
|
||
|
||
#[test]
|
||
fn test_fmul() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = 3.0;
|
||
ctx.fpr[2] = 4.0;
|
||
// fmul f3, f1, f2: opcode 63, subop 25, frD=3, frA=1, frC=2 (bits 21-25)
|
||
// 63<<26 | 3<<21 | 1<<16 | 0<<11 | 2<<6 | 25<<1
|
||
let raw = (63 << 26) | (3 << 21) | (1 << 16) | (0 << 11) | (2 << 6) | (25 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!((ctx.fpr[3] - 12.0).abs() < 1e-10);
|
||
}
|
||
|
||
#[test]
|
||
fn test_fcmpu() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = 5.0;
|
||
ctx.fpr[2] = 3.0;
|
||
// fcmpu cr0, f1, f2: opcode 63, subop 0 (X-form), crfD=0, frA=1, frB=2
|
||
// 63<<26 | 0<<23 | 0<<21 | 1<<16 | 2<<11 | 0<<1
|
||
let raw = (63 << 26) | (0 << 23) | (0 << 21) | (1 << 16) | (2 << 11) | (0 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.cr[0].gt); // 5.0 > 3.0
|
||
assert!(!ctx.cr[0].lt);
|
||
assert!(!ctx.cr[0].eq);
|
||
}
|
||
|
||
#[test]
|
||
fn test_fctiwzx() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = 42.7;
|
||
// fctiwz f2, f1: opcode 63, subop 15 (X-form), frD=2, frB=1
|
||
// 63<<26 | 2<<21 | 0<<16 | 1<<11 | 15<<1
|
||
let raw = (63 << 26) | (2 << 21) | (0 << 16) | (1 << 11) | (15 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// Result stored as bits in FPR: should be 42 as int
|
||
let bits = ctx.fpr[2].to_bits();
|
||
assert_eq!(bits as u32, 42);
|
||
}
|
||
|
||
#[test]
|
||
fn test_fmadd() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = 2.0; // frA
|
||
ctx.fpr[2] = 3.0; // frB (addend)
|
||
ctx.fpr[3] = 5.0; // frC (multiplier)
|
||
// fmadd f4, f1, f3, f2: frD=4, frA=1, frB=2, frC=3
|
||
// opcode 63, subop 29 (bits 1-5)
|
||
// 63<<26 | 4<<21 | 1<<16 | 2<<11 | 3<<6 | 29<<1
|
||
let raw = (63 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (29 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// (2.0 * 5.0) + 3.0 = 13.0
|
||
assert!((ctx.fpr[4] - 13.0).abs() < 1e-10);
|
||
}
|
||
|
||
#[test]
|
||
fn test_ctx_default_state_matches_canary() {
|
||
let ctx = PpcContext::new();
|
||
// LR initialized to halt sentinel so a top-level blr drops out cleanly.
|
||
assert_eq!(ctx.lr, crate::context::LR_HALT_SENTINEL);
|
||
// VSCR starts with NJ bit set (denormals flush to zero).
|
||
assert!(ctx.vscr_nj());
|
||
assert!(!ctx.vscr_sat());
|
||
// VRSAVE defaults to "save all" per canary.
|
||
assert_eq!(ctx.vrsave, 0xFFFF_FFFF);
|
||
}
|
||
|
||
#[test]
|
||
fn test_vaddubs_saturates_and_sets_vscr_sat() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// Fill vA with 0xF0, vB with 0x20 → 0x110, saturates to 0xFF per lane.
|
||
ctx.vr[2] = xenia_types::Vec128::from_bytes([0xF0; 16]);
|
||
ctx.vr[3] = xenia_types::Vec128::from_bytes([0x20; 16]);
|
||
// vaddubs vD=4, vA=2, vB=3. XO=512 (PPC: opcode 4, VA-form).
|
||
let raw: u32 = (4u32 << 26) | (4u32 << 21) | (2u32 << 16) | (3u32 << 11) | 512u32;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
let r = step(&mut ctx, &mut mem);
|
||
assert_eq!(r, StepResult::Continue);
|
||
assert_eq!(ctx.vr[4].as_bytes(), [0xFFu8; 16]);
|
||
assert!(ctx.vscr_sat(), "SAT should be set after saturation");
|
||
}
|
||
|
||
#[test]
|
||
fn test_ldarx_stdcx_pair() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
mem.write_u64(0x1000, 0xDEADBEEF_CAFEBABE);
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
// ldarx r3, r4, r5: (31 << 26) | (3<<21) | (4<<16) | (5<<11) | (84<<1)
|
||
let raw_ld: u32 = (31u32 << 26) | (3u32 << 21) | (4u32 << 16) | (5u32 << 11) | (84u32 << 1);
|
||
write_instr(&mut mem, 0, raw_ld);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[3], 0xDEADBEEF_CAFEBABE);
|
||
assert!(ctx.has_reservation);
|
||
// stdcx. r6, r4, r5: (31 << 26) | (6<<21) | (4<<16) | (5<<11) | (214<<1) | 1
|
||
ctx.gpr[6] = 0x1111_1111_2222_2222;
|
||
let raw_st: u32 = (31u32 << 26) | (6u32 << 21) | (4u32 << 16) | (5u32 << 11) | (214u32 << 1) | 1;
|
||
write_instr(&mut mem, 4, raw_st);
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.cr[0].eq, "stdcx. should succeed and set CR0.EQ");
|
||
assert_eq!(mem.read_u64(0x1000), 0x1111_1111_2222_2222);
|
||
assert!(!ctx.has_reservation);
|
||
}
|
||
|
||
#[test]
|
||
fn test_mcrxr_moves_xer_condition_bits_and_clears_them() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.xer_so = 1;
|
||
ctx.xer_ov = 0;
|
||
ctx.xer_ca = 1;
|
||
// mcrxr crfD=3: (31 << 26) | (3<<23) | (512<<1)
|
||
let raw: u32 = (31u32 << 26) | (3u32 << 23) | (512u32 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.cr[3].lt, "LT should mirror old XER[SO]");
|
||
assert!(!ctx.cr[3].gt, "GT should mirror old XER[OV]");
|
||
assert!(ctx.cr[3].eq, "EQ should mirror old XER[CA]");
|
||
assert_eq!(ctx.xer_so, 0);
|
||
assert_eq!(ctx.xer_ov, 0);
|
||
assert_eq!(ctx.xer_ca, 0);
|
||
}
|
||
|
||
// ---------- Phase 2 fixes: OE / overflow ----------
|
||
|
||
fn addx_raw(rd: u32, ra: u32, rb: u32, oe: bool, rc: bool) -> u32 {
|
||
(31 << 26) | (rd << 21) | (ra << 16) | (rb << 11)
|
||
| ((oe as u32) << 10) | (266 << 1) | (rc as u32)
|
||
}
|
||
|
||
#[test]
|
||
fn addo_sets_xer_ov_on_signed_overflow_and_stickies_so() {
|
||
// PPCBUG-012: 32-bit ABI. INT32_MAX + 1 overflows to INT32_MIN.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = i32::MAX as u32 as u64;
|
||
ctx.gpr[4] = 1;
|
||
write_instr(&mut mem, 0, addx_raw(5, 3, 4, true, false));
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x8000_0000u64);
|
||
assert_eq!(ctx.xer_ov, 1, "OV must be set on signed overflow");
|
||
assert_eq!(ctx.xer_so, 1, "SO must be stickied from OV");
|
||
}
|
||
|
||
#[test]
|
||
fn addo_clears_xer_ov_when_no_overflow_but_keeps_sticky_so() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.xer_ov = 1; // stale from a previous overflow
|
||
ctx.xer_so = 1;
|
||
ctx.gpr[3] = 1;
|
||
ctx.gpr[4] = 2;
|
||
write_instr(&mut mem, 0, addx_raw(5, 3, 4, true, false));
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 3);
|
||
assert_eq!(ctx.xer_ov, 0, "OV must clear when no overflow");
|
||
assert_eq!(ctx.xer_so, 1, "SO is sticky; stays set");
|
||
}
|
||
|
||
#[test]
|
||
fn add_without_oe_does_not_touch_xer() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = i64::MAX as u64;
|
||
ctx.gpr[4] = 1;
|
||
write_instr(&mut mem, 0, addx_raw(5, 3, 4, false, false));
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.xer_ov, 0);
|
||
assert_eq!(ctx.xer_so, 0);
|
||
}
|
||
|
||
#[test]
|
||
fn addx_rc_uses_32bit_compare_in_xbox_abi() {
|
||
// PPCBUG-012+020: 32-bit ABI. r3 + r4 = 0xFFFFFFFF (low 32). As i32
|
||
// this is -1 (CR0.LT). The previous 64-bit compare wrongly classified
|
||
// this as positive (CR0.GT) for Xbox 360 binaries.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0x0000_0000_FFFF_FFFF;
|
||
ctx.gpr[4] = 0;
|
||
write_instr(&mut mem, 0, addx_raw(5, 3, 4, false, true));
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFF);
|
||
assert!(ctx.cr[0].lt, "32-bit ABI: 0xFFFFFFFF as i32 is -1, CR0.LT");
|
||
assert!(!ctx.cr[0].gt);
|
||
assert!(!ctx.cr[0].eq);
|
||
}
|
||
|
||
#[test]
|
||
fn subfo_sets_xer_ov_on_int32_min_minus_one() {
|
||
// PPCBUG-017: 32-bit ABI subfo overflow detection. r4=INT32_MIN, r3=1
|
||
// → result = INT32_MIN - 1 → wraps to INT32_MAX with OV=1.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 1;
|
||
ctx.gpr[4] = 0x8000_0000u64;
|
||
let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x7FFF_FFFFu64);
|
||
assert_eq!(ctx.xer_ov, 1);
|
||
assert_eq!(ctx.xer_so, 1);
|
||
}
|
||
|
||
#[test]
|
||
fn subfo_no_spurious_ov_when_result_has_bit31_set() {
|
||
// PPCBUG-017 review-fix regression: subfo r5, r3, r4 with r3=1, r4=0x80000001
|
||
// → result = 0x80000000. This is i32::MIN — a legitimate negative value
|
||
// with no 32-bit overflow (true_diff = -2147483648, fits in i32).
|
||
// The legacy `sum_overflow_64` predicate compared against the u64 view
|
||
// of result (= +2147483648), spuriously flagging OV=1.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 1;
|
||
ctx.gpr[4] = 0x8000_0001u64;
|
||
// subfo r5, r3, r4
|
||
let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x8000_0000u64);
|
||
assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV");
|
||
}
|
||
|
||
#[test]
|
||
fn subfco_no_spurious_ov_when_result_has_bit31_set() {
|
||
// PPCBUG-007 same review-fix: subfcx OE handler must use 32-bit predicate.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 1;
|
||
ctx.gpr[4] = 0x8000_0001u64;
|
||
// subfco r5, r3, r4 (XO=8, OE=1)
|
||
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (8 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x8000_0000u64);
|
||
assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV");
|
||
}
|
||
|
||
#[test]
|
||
fn mullwo_sets_xer_ov_when_product_overflows_32_bits() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// mullwo r5, r3, r4 (XO=235, OE=1)
|
||
ctx.gpr[3] = i32::MAX as u64;
|
||
ctx.gpr[4] = 2u64;
|
||
let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (235 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.xer_ov, 1);
|
||
assert_eq!(ctx.xer_so, 1);
|
||
}
|
||
|
||
#[test]
|
||
fn divwo_sets_xer_ov_on_divide_by_zero() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// divwo r5, r3, r4 (XO=491, OE=1)
|
||
ctx.gpr[3] = 10;
|
||
ctx.gpr[4] = 0;
|
||
let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (491 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.xer_ov, 1);
|
||
assert_eq!(ctx.gpr[5], 0); // undefined in spec; canary uses 0
|
||
}
|
||
|
||
#[test]
|
||
fn nego_sets_ov_only_on_int_min() {
|
||
// PPCBUG-006: 32-bit ABI. INT_MIN is 0x80000000 (low 32), not 0x8000000000000000.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// nego r5, r3 (XO=104, OE=1)
|
||
ctx.gpr[3] = 0x8000_0000;
|
||
let raw = (31 << 26) | (5 << 21) | (3 << 16) | (1 << 10) | (104 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.xer_ov, 1);
|
||
// -INT_MIN wraps to INT_MIN (low 32 bits) with upper 32 bits zero.
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_8000_0000);
|
||
}
|
||
|
||
#[test]
|
||
fn neg_clean_input_no_upper_bits() {
|
||
// PPCBUG-006 regression: neg r3=5 must produce 0x00000000_FFFFFFFB,
|
||
// not 0xFFFFFFFF_FFFFFFFB (the 64-bit !ra-then-add-1 result).
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 5;
|
||
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (104 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFB);
|
||
}
|
||
|
||
#[test]
|
||
fn norx_not_simplified_keeps_upper_bits_clean() {
|
||
// PPCBUG-029: `not rA, rB` (norx with rs==rb) is the canonical not
|
||
// simplified mnemonic. 64-bit !val poisons upper 32 bits of every
|
||
// execution; under the 32-bit ABI we must truncate.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0x0000_0000_0000_00FF;
|
||
// norx r5, r3, r3 (XO=124)
|
||
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (3 << 11) | (124 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FF00, "upper 32 bits must be zero");
|
||
}
|
||
|
||
#[test]
|
||
fn eqvx_self_self_self_sets_low32_to_all_ones() {
|
||
// PPCBUG-031: `eqv rA, rA, rA` is a common "set-to-all-ones" idiom.
|
||
// 64-bit !(0^0) gives u64::MAX (0xFFFFFFFF_FFFFFFFF); 32-bit ABI
|
||
// expects 0x00000000_FFFFFFFF.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0;
|
||
// eqvx r3, r3, r3 (XO=284)
|
||
let raw = (31u32 << 26) | (3 << 21) | (3 << 16) | (3 << 11) | (284 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[3], 0x0000_0000_FFFF_FFFF);
|
||
}
|
||
|
||
#[test]
|
||
fn andcx_bit_clear_keeps_upper_clean() {
|
||
// PPCBUG-033: `andc rA, rS, rB` = rS & !rB. 64-bit !rB poisons.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0xFFFF_FFFF; // rS
|
||
ctx.gpr[4] = 0x000F; // rB (low bits to clear)
|
||
// andcx r5, r3, r4 (XO=60)
|
||
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (60 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFF0);
|
||
}
|
||
|
||
#[test]
|
||
fn subfex_clean_inputs_no_upper_bits() {
|
||
// PPCBUG-008: 32-bit ABI. RT = !RA + RB + CA. RA=5, RB=10, CA=1
|
||
// → !5u32 = 0xFFFFFFFA, +10 = 0x100000004, +1 = 0x100000005, low32 = 5.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 5;
|
||
ctx.gpr[4] = 10;
|
||
ctx.xer_ca = 1;
|
||
// subfex r5, r3, r4 (XO=136)
|
||
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (136 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 5);
|
||
assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)");
|
||
}
|
||
|
||
#[test]
|
||
fn andisx_sign_bit_set_classifies_lt() {
|
||
// PPCBUG-023: andis. r4, r3, 0x8000 with r3=0xFFFFFFFF should produce
|
||
// result=0x80000000 with CR0.LT=1 (i32 view).
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0xFFFF_FFFFu64;
|
||
// andis. r4, r3, 0x8000: opcode 29, uimm16 = 0x8000
|
||
let raw = (29u32 << 26) | (3 << 21) | (4 << 16) | 0x8000;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[4], 0x8000_0000u64);
|
||
assert!(ctx.cr[0].lt, "result=0x80000000 → i32 view negative → CR0.LT");
|
||
}
|
||
|
||
#[test]
|
||
fn slwx_high_bit_result_classifies_lt() {
|
||
// PPCBUG-044: slwx producing 0x80000000 must classify as CR0.LT under
|
||
// the 32-bit ABI, not CR0.GT (which 64-bit view would give).
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0x4000_0000u64;
|
||
ctx.gpr[4] = 1;
|
||
// slwx. r5, r3, r4 (XO=24, Rc=1)
|
||
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (24 << 1) | 1;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x8000_0000u64);
|
||
assert!(ctx.cr[0].lt, "0x80000000 as i32 is negative");
|
||
}
|
||
|
||
#[test]
|
||
fn lha_negative_halfword_zero_extends_upper() {
|
||
// PPCBUG-095: memory 0x8000 must yield gpr[rD] = 0x00000000_FFFF8000.
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
mem.write_u16(0x100, 0x8000);
|
||
ctx.gpr[3] = 0x100;
|
||
// lha r5, 0(r3): opcode 42
|
||
let raw = (42u32 << 26) | (5 << 21) | (3 << 16) | 0;
|
||
write_instr(&mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_8000u64);
|
||
}
|
||
|
||
#[test]
|
||
fn lhaux_negative_halfword_clean_writeback() {
|
||
// PPCBUG-098: indexed update form. Memory 0xFFFF → rD = 0x00000000_FFFFFFFF;
|
||
// rA must update to the EA.
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
mem.write_u16(0x200, 0xFFFF);
|
||
ctx.gpr[3] = 0x100; // ra
|
||
ctx.gpr[4] = 0x100; // rb
|
||
// lhaux r5, r3, r4 (XO=375)
|
||
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (375 << 1);
|
||
write_instr(&mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFFu64);
|
||
assert_eq!(ctx.gpr[3], 0x200, "rA updated to EA");
|
||
}
|
||
|
||
#[test]
|
||
fn lwa_high_bit_set_zero_extends_upper() {
|
||
// PPCBUG-105: memory 0x80000000 must yield rD = 0x00000000_80000000
|
||
// under 32-bit ABI (no sign extension to bits 32-63).
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
mem.write_u32(0x100, 0x8000_0000);
|
||
ctx.gpr[3] = 0x100;
|
||
// lwa r5, 0(r3): opcode 58, XO=2 (DS-form, ds=0)
|
||
let raw = (58u32 << 26) | (5 << 21) | (3 << 16) | 2;
|
||
write_instr(&mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_8000_0000u64);
|
||
}
|
||
|
||
#[test]
|
||
fn mullwx_overflow_truncates_to_32() {
|
||
// PPCBUG-009: mullwo r5, r3, r4 with ra=0x10000, rb=0x10000 → product
|
||
// 0x100000000 (overflow). Low 32 = 0; OE must fire.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0x10000;
|
||
ctx.gpr[4] = 0x10000;
|
||
// mullwo r5, r3, r4 (XO=235, OE=1)
|
||
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (235 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0, "low 32 bits = 0");
|
||
assert_eq!(ctx.xer_ov, 1, "overflow detected");
|
||
}
|
||
|
||
#[test]
|
||
fn divwx_negative_quotient_zero_extends() {
|
||
// PPCBUG-010+011: -10 / 3 = -3 must produce 0x00000000_FFFFFFFD,
|
||
// not 0xFFFFFFFF_FFFFFFFD. CR0.LT must still fire (i32 view of FFFFFFFD is negative).
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = (-10i32) as u32 as u64;
|
||
ctx.gpr[4] = 3;
|
||
// divwx. r5, r3, r4 (XO=491, Rc=1)
|
||
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (491 << 1) | 1;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFDu64);
|
||
assert!(ctx.cr[0].lt, "CR0.LT must fire for negative i32 quotient");
|
||
}
|
||
|
||
#[test]
|
||
fn srawx_negative_value_zero_extends_upper() {
|
||
// PPCBUG-041+043: srawx of negative i32 by 1 produces a negative i32;
|
||
// writeback must zero-extend to u64 (not sign-extend).
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0x8000_0000u64; // i32::MIN
|
||
ctx.gpr[4] = 1;
|
||
// srawx. r5, r3, r4 (XO=792, Rc=1)
|
||
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (792 << 1) | 1;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_C000_0000u64);
|
||
assert!(ctx.cr[0].lt);
|
||
}
|
||
|
||
#[test]
|
||
fn srawix_high_count_negative_input_yields_low32_all_ones() {
|
||
// PPCBUG-042+043: srawi with count=31 on negative input → low 32 bits
|
||
// all ones (0xFFFFFFFF), upper 32 zero (was u64::MAX before fix).
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0x8000_0000u64;
|
||
// srawix r5, r3, 31 (XO=824)
|
||
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (31 << 11) | (824 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFFu64);
|
||
}
|
||
|
||
#[test]
|
||
fn addi_li_neg_one_zero_extends_upper() {
|
||
// PPCBUG-001: `li r3, -1` (= addi r3, r0, -1) must produce
|
||
// 0x00000000_FFFFFFFF, not 0xFFFFFFFF_FFFFFFFF.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// addi r3, r0, -1: opcode 14, simm16 = 0xFFFF
|
||
let raw = (14u32 << 26) | (3 << 21) | (0 << 16) | 0xFFFF;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[3], 0x0000_0000_FFFF_FFFFu64);
|
||
}
|
||
|
||
#[test]
|
||
fn addic_carry_uses_32bit_compare() {
|
||
// PPCBUG-002: addic ra=0xFFFFFFFF_00000001, simm=-1 (0xFFFF).
|
||
// 32-bit: 0x00000001 + 0xFFFFFFFF = 0x00000000 with CA=1.
|
||
// 64-bit (buggy): result < ra → since 64-bit ra has high bits set,
|
||
// the buggy form would compare against the polluted u64 and could
|
||
// give wrong CA. Truncated form ignores upper 32 bits.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0xFFFFFFFF_00000001u64;
|
||
// addic r4, r3, -1: opcode 12
|
||
let raw = (12u32 << 26) | (4 << 21) | (3 << 16) | 0xFFFF;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// Result low 32: 0x00000001 + 0xFFFFFFFF = 0x00000000 with carry.
|
||
assert_eq!(ctx.gpr[4], 0);
|
||
assert_eq!(ctx.xer_ca, 1, "32-bit compare must see CA=1");
|
||
}
|
||
|
||
#[test]
|
||
fn mulli_overflow_wraps_to_32() {
|
||
// PPCBUG-004: mulli must truncate to 32 bits even when the upper 32 bits
|
||
// of RA are polluted (e.g. by upstream bugs). Pre-fix: ra = u64::MAX as
|
||
// i64 = -1, * 2 = -2, written to GPR as `0xFFFFFFFF_FFFFFFFE`. Post-fix:
|
||
// truncated to `0xFFFFFFFE`. Discriminating regression test.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = u64::MAX;
|
||
// mulli r4, r3, 2: opcode 7
|
||
let raw = (7u32 << 26) | (4 << 21) | (3 << 16) | 2;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[4], 0xFFFF_FFFEu64, "low 32 bits = -2 in i32; upper 32 zero");
|
||
}
|
||
|
||
#[test]
|
||
fn subficx_neg_simm_zero_extends() {
|
||
// PPCBUG-005: subfic r4, r3, -1 with r3=5: imm-ra = 0xFFFFFFFF - 5 = 0xFFFFFFFA.
|
||
// Buggy form: imm sign-extended to u64 0xFFFFFFFFFFFFFFFF - 5 = poisoned.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 5;
|
||
// subfic r4, r3, -1: opcode 8, simm = 0xFFFF
|
||
let raw = (8u32 << 26) | (4 << 21) | (3 << 16) | 0xFFFF;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[4], 0x0000_0000_FFFF_FFFAu64);
|
||
assert_eq!(ctx.xer_ca, 1, "0xFFFFFFFF >= 5 → CA=1");
|
||
}
|
||
|
||
#[test]
|
||
fn subfcx_addis_incident_case() {
|
||
// PPCBUG-007: regression for the exact case that revealed the addis bug.
|
||
// After P1's addis fix this works coincidentally; P4 batch 3 makes
|
||
// subfcx itself robust to 64-bit GPR pollution.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// ra polluted in upper 32 bits, low 32 = 0x828F3F98
|
||
ctx.gpr[3] = 0xFFFF_FFFF_828F_3F98u64;
|
||
// rb clean low 32 = 0x828F3F68
|
||
ctx.gpr[4] = 0x0000_0000_828F_3F68u64;
|
||
// subfcx r5, r3, r4 (XO=8): result = rb - ra = 0xFFFFFFD0 (low 32)
|
||
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (8 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// 32-bit unsigned: 0x828F3F68 < 0x828F3F98 → CA=0
|
||
assert_eq!(ctx.xer_ca, 0, "32-bit unsigned: rb < ra → CA=0");
|
||
// result = 0x828F3F68 - 0x828F3F98 = 0xFFFFFFD0 (low 32, upper 32 zero)
|
||
assert_eq!(ctx.gpr[5], 0xFFFF_FFD0u64);
|
||
}
|
||
|
||
#[test]
|
||
fn extsbx_negative_byte_zero_extends_upper() {
|
||
// PPCBUG-034+036 coupled: extsb of 0x80 (negative byte) must produce
|
||
// 0x00000000_FFFFFF80, NOT 0xFFFFFFFF_FFFFFF80. CR0.LT must still fire
|
||
// (i32 view of 0xFFFFFF80 is negative).
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0x80;
|
||
// extsbx. r5, r3 (XO=954, Rc=1)
|
||
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (954 << 1) | 1;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FF80);
|
||
assert!(ctx.cr[0].lt, "CR0.LT must fire for negative i32");
|
||
assert!(!ctx.cr[0].gt);
|
||
}
|
||
|
||
#[test]
|
||
fn extshx_negative_halfword_zero_extends_upper() {
|
||
// PPCBUG-035+037 coupled: extsh of 0x8000 must produce 0x00000000_FFFF8000.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0x8000;
|
||
// extshx. r5, r3 (XO=922, Rc=1)
|
||
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (922 << 1) | 1;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_8000);
|
||
assert!(ctx.cr[0].lt);
|
||
}
|
||
|
||
#[test]
|
||
fn subfmex_ra_max_ca_zero_clears_ca() {
|
||
// PPCBUG-019: `subfme` with RA=u32::MAX and CA=0 should set CA=0
|
||
// (because !u32::MAX = 0). The buggy code's `!ra != 0` predicate
|
||
// on u64 was always true (because !u64-cast-of-u32::MAX has high
|
||
// bits flipped non-zero), wrongly setting CA=1.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0xFFFF_FFFFu64;
|
||
ctx.xer_ca = 0;
|
||
// subfmex r5, r3 (XO=232)
|
||
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (232 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.xer_ca, 0, "RA=u32::MAX, CA=0 → !RA32==0, CA=0");
|
||
}
|
||
|
||
// ---------- Phase 2 fixes: trap TO-field ----------
|
||
|
||
#[test]
|
||
fn tw_with_to_zero_never_fires() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 5;
|
||
ctx.gpr[4] = 5;
|
||
// tw 0, r3, r4 (XO=4). TO in bits 6-10.
|
||
let raw = (31 << 26) | (0 << 21) | (3 << 16) | (4 << 11) | (4 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
let r = step(&mut ctx, &mut mem);
|
||
assert_eq!(r, StepResult::Continue, "TO=0 must never trap");
|
||
assert_eq!(ctx.pc, 4);
|
||
}
|
||
|
||
#[test]
|
||
fn tw_eq_fires_on_equal() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 5;
|
||
ctx.gpr[4] = 5;
|
||
// TO=4 (EQ only)
|
||
let raw = (31 << 26) | (4 << 21) | (3 << 16) | (4 << 11) | (4 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
let r = step(&mut ctx, &mut mem);
|
||
assert_eq!(r, StepResult::Trap);
|
||
}
|
||
|
||
#[test]
|
||
fn tw_eq_does_not_fire_on_unequal() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 5;
|
||
ctx.gpr[4] = 7;
|
||
// TO=4 (EQ only)
|
||
let raw = (31 << 26) | (4 << 21) | (3 << 16) | (4 << 11) | (4 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
let r = step(&mut ctx, &mut mem);
|
||
assert_eq!(r, StepResult::Continue);
|
||
}
|
||
|
||
#[test]
|
||
fn twi_compares_low_32_bits_only() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0xDEAD_BEEF_0000_0005; // low 32 = 5
|
||
// twi 4, r3, 5: primary=3, TO=4, RA=3, SI=5
|
||
let raw = (3 << 26) | (4 << 21) | (3 << 16) | (5u32 & 0xFFFF);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
let r = step(&mut ctx, &mut mem);
|
||
assert_eq!(r, StepResult::Trap, "word-width compare matches low 32");
|
||
}
|
||
|
||
// ---------- Phase 2 fixes: mcrfs ----------
|
||
|
||
// ---------- Phase 2h: FPU / FPSCR ----------
|
||
|
||
#[test]
|
||
fn fmsub_inf_minus_inf_sets_vxisi() {
|
||
// PPCBUG-203 regression: fmsub with a*c = +∞, -b = -∞ (b=+∞) →
|
||
// +∞ + (-∞) → VXISI. Pre-fix had no add-step VXISI check.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = f64::INFINITY;
|
||
ctx.fpr[2] = f64::INFINITY; // b
|
||
ctx.fpr[3] = 1.0;
|
||
// fmsub f4, f1, f3, f2 → 1*∞ - ∞ = VXISI
|
||
// A-form: opcode=63, XO=28 (fmsub double): (63<<26)|(rd<<21)|(ra<<16)|(rb<<11)|(rc<<6)|(28<<1)
|
||
let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (28 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_ne!(ctx.fpscr & fpscr::VXISI, 0, "fmsub ∞-∞ must set VXISI");
|
||
}
|
||
|
||
#[test]
|
||
fn fnmadd_nan_input_preserves_nan_sign() {
|
||
// PPCBUG-205 regression: ISA forbids negating a NaN result.
|
||
// a*c+b producing a NaN → result must be the NaN unchanged, not -NaN.
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
let qnan = f64::NAN;
|
||
ctx.fpr[1] = qnan;
|
||
ctx.fpr[2] = 1.0;
|
||
ctx.fpr[3] = 2.0;
|
||
// fnmadd f4, f1, f3, f2 (XO=31)
|
||
let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (31 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// Result must be NaN with the same sign bit as the input NaN.
|
||
let r = ctx.fpr[4];
|
||
assert!(r.is_nan(), "result must be NaN");
|
||
assert_eq!(r.is_sign_negative(), qnan.is_sign_negative(),
|
||
"fnmadd must preserve NaN sign (no negation on NaN)");
|
||
}
|
||
|
||
#[test]
|
||
fn fadd_inf_minus_inf_sets_vxisi() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = f64::INFINITY;
|
||
ctx.fpr[2] = f64::NEG_INFINITY;
|
||
// fadd f3, f1, f2 → inf + (-inf) = VXISI
|
||
let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// VXISI bit is PPC bit 8 → mask 1<<23
|
||
assert_ne!(ctx.fpscr & fpscr::VXISI, 0);
|
||
// FX sticky is set on any new exception → mask 1<<31
|
||
assert_ne!(ctx.fpscr & fpscr::FX, 0);
|
||
// VX summary set → 1<<29
|
||
assert_ne!(ctx.fpscr & fpscr::VX, 0);
|
||
}
|
||
|
||
#[test]
|
||
fn fdiv_zero_over_zero_sets_vxzdz() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = 0.0;
|
||
ctx.fpr[2] = 0.0;
|
||
// fdiv f3, f1, f2 (opcode 63, subop 18)
|
||
let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (18 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_ne!(ctx.fpscr & fpscr::VXZDZ, 0);
|
||
}
|
||
|
||
#[test]
|
||
fn fdiv_finite_over_zero_sets_zx() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = 1.0;
|
||
ctx.fpr[2] = 0.0;
|
||
// fdiv f3, f1, f2
|
||
let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (18 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_ne!(ctx.fpscr & fpscr::ZX, 0);
|
||
}
|
||
|
||
#[test]
|
||
fn fadd_sets_fprf_from_result() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = 2.5;
|
||
ctx.fpr[2] = 3.5;
|
||
let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// Result = +6.0 → FPRF = POS_NORMAL = 0b0_0100
|
||
let fprf = ((ctx.fpscr & fpscr::FPRF_MASK) >> 12) as u8;
|
||
assert_eq!(fprf, fpscr::fprf::POS_NORMAL);
|
||
}
|
||
|
||
#[test]
|
||
fn frsp_honours_fpscr_rn_toward_zero() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// A value whose nearest-even rounding would go up but trunc goes down.
|
||
// Choose v = 1 + 0x1.00_0001_ * 2^-24-ish so low bit is 1 in the f32 mantissa.
|
||
let v = f64::from_bits(0x3FF0_0000_0000_0001); // 1.0 + ULP at double
|
||
ctx.fpr[1] = v;
|
||
ctx.fpscr = 0x1; // RN = 01 → toward zero
|
||
// frsp f3, f1 (opcode 63, subop 12)
|
||
let raw = (63u32 << 26) | (3 << 21) | (1 << 11) | (12 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// Result rounded toward zero should be exactly 1.0_f64
|
||
assert_eq!(ctx.fpr[3], 1.0_f64);
|
||
}
|
||
|
||
#[test]
|
||
fn fcmpu_sets_so_on_nan_and_fprf_unordered() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = f64::NAN;
|
||
ctx.fpr[2] = 1.0;
|
||
// fcmpu crfD=4, f1, f2 : (63<<26) | (crfd<<23) | (ra<<16) | (rb<<11) | (0<<1)
|
||
let raw = (63u32 << 26) | (4 << 23) | (1 << 16) | (2 << 11);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.cr[4].so, "unordered → SO set");
|
||
assert!(!ctx.cr[4].lt && !ctx.cr[4].gt && !ctx.cr[4].eq);
|
||
// FPRF unordered = 0b0_0001
|
||
let fprf = ((ctx.fpscr & fpscr::FPRF_MASK) >> 12) as u8;
|
||
assert_eq!(fprf, 0b0_0001);
|
||
}
|
||
|
||
#[test]
|
||
fn fcmpo_on_qnan_sets_vxvc() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.fpr[1] = f64::NAN; // QNaN (Rust's NAN)
|
||
ctx.fpr[2] = 1.0;
|
||
// fcmpo (opcode 63, subop 32)
|
||
let raw = (63u32 << 26) | (4 << 23) | (1 << 16) | (2 << 11) | (32 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_ne!(ctx.fpscr & fpscr::VXVC, 0);
|
||
}
|
||
|
||
// ---------- Phase 2i: VMX NaN propagation ----------
|
||
|
||
#[test]
|
||
fn vmaxfp_propagates_nan() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
let mut a = [1.0f32, 2.0, 3.0, 4.0];
|
||
let b = [5.0f32, 6.0, 7.0, 8.0];
|
||
a[1] = f32::NAN;
|
||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array(a);
|
||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array(b);
|
||
// vmaxfp vD=4, vA=2, vB=3 (opcode 4, XO=1034)
|
||
let raw = (4u32 << 26) | (4 << 21) | (2 << 16) | (3 << 11) | 1034;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
let r = ctx.vr[4].as_f32x4();
|
||
// lane 1 must be NaN, not 6.0 (the b side)
|
||
assert!(r[1].is_nan());
|
||
// Other lanes should pick the max correctly
|
||
assert_eq!(r[0], 5.0);
|
||
assert_eq!(r[2], 7.0);
|
||
assert_eq!(r[3], 8.0);
|
||
}
|
||
|
||
#[test]
|
||
fn vminfp_propagates_nan() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
let a = [1.0f32, 2.0, 3.0, 4.0];
|
||
let mut b = [5.0f32, 6.0, 7.0, 8.0];
|
||
b[2] = f32::NAN;
|
||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array(a);
|
||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array(b);
|
||
// vminfp XO=1098
|
||
let raw = (4u32 << 26) | (4 << 21) | (2 << 16) | (3 << 11) | 1098;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
let r = ctx.vr[4].as_f32x4();
|
||
assert!(r[2].is_nan());
|
||
}
|
||
|
||
// ---------- Phase 2j: VMX denorm flush ----------
|
||
|
||
#[test]
|
||
fn vmaddfp_flushes_denormal_inputs() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// Smallest positive denormal f32 = f32::from_bits(1)
|
||
let denorm = f32::from_bits(1);
|
||
let a = [denorm; 4];
|
||
let b = [0.0f32; 4];
|
||
let c = [1.0f32; 4];
|
||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array(a);
|
||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array(b);
|
||
ctx.vr[4] = xenia_types::Vec128::from_f32x4_array(c);
|
||
// vmaddfp vD=5, vA=2, vB=3, vC=4 (A-form: opcode 4, XO=46, vC at rc field)
|
||
// layout: (4<<26) | (5<<21) | (2<<16) | (3<<11) | (4<<6) | 46
|
||
let raw = (4u32 << 26) | (5 << 21) | (2 << 16) | (3 << 11) | (4 << 6) | 46;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
let r = ctx.vr[5].as_f32x4();
|
||
// denorm*1 + 0 should be flushed: denorm→0, so result is 0.
|
||
assert_eq!(r, [0.0f32; 4]);
|
||
}
|
||
|
||
/// VMX128 variant `vmaddfp128 vD, vA, vB` (primary op 5, key2 = 0b001101)
|
||
/// reuses vD as the accumulator: `vD <- (vA × vD) + vB`. Canary
|
||
/// `ppc_emit_altivec.cc:786-810` flushes *all three* inputs
|
||
/// unconditionally before the fused multiply-add.
|
||
#[test]
|
||
fn vmaddfp128_flushes_denormal_inputs() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
let denorm = f32::from_bits(1);
|
||
// VA=v1, VD=v2, VB=v3 — all carry denormals.
|
||
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
|
||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
|
||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
|
||
// vmaddfp128 vD=v2, vA=v1, vB=v3: op6=5, vd_lo=2, va_lo=1, vb_lo=3, key2=0b001101.
|
||
// VA×VD+VB: all three flushed → 0*0+0 = 0.
|
||
let raw: u32 = (5u32 << 26) | (2 << 21) | (1 << 16) | (3 << 11) | (3 << 6) | (1 << 4);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.vr[2].as_f32x4(), [0.0f32; 4]);
|
||
}
|
||
|
||
// ---- PPCBUG-424+425: vmaddfp128/vmaddcfp128 operand swap ----
|
||
// ISA for both: (VD) <- (VA × VD) + VB. Previous code computed VA×VB+VD and VD×VB+VA.
|
||
// Test uses distinct VA, VB, VD registers so the swap is visible.
|
||
// Encoding: op6=5, key2=0b001101 (vmaddfp128) / 0b010001 (vmaddcfp128).
|
||
// VA=v1=[2.0], VB=v2=[10.0], VD=v3=[3.0] → expected 2.0×3.0+10.0 = 16.0.
|
||
// Buggy vmaddfp128: 2.0×10.0+3.0 = 23.0. Buggy vmaddcfp128: 3.0×10.0+2.0 = 32.0.
|
||
|
||
#[test]
|
||
fn vmaddfp128_operand_order_va_times_vd_plus_vb() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([2.0f32; 4]); // VA=v1
|
||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([10.0f32; 4]); // VB=v2
|
||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([3.0f32; 4]); // VD=v3 (also destination)
|
||
// vmaddfp128 vD=v3, vA=v1, vB=v2 — op5, key2=0b001101 (bits22-25=3, bit27=1)
|
||
let raw: u32 = (5u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (1 << 4);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.vr[3].as_f32x4(), [16.0f32; 4], "VA×VD+VB = 2*3+10 = 16");
|
||
}
|
||
|
||
#[test]
|
||
fn vmaddcfp128_operand_order_va_times_vd_plus_vb() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([2.0f32; 4]); // VA=v1
|
||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([10.0f32; 4]); // VB=v2
|
||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([3.0f32; 4]); // VD=v3
|
||
// vmaddcfp128 vD=v3, vA=v1, vB=v2 — op5, key2=0b010001 (bits22-25=4, bit27=1)
|
||
let raw: u32 = (5u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (4 << 6) | (1 << 4);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.vr[3].as_f32x4(), [16.0f32; 4], "VA×VD+VB = 2*3+10 = 16");
|
||
}
|
||
|
||
/// VMX128 `vnmsubfp128 vD, vA, vB` (key2 = 0b010101). Canary
|
||
/// `ppc_emit_altivec.cc:1133-1160` flushes all three inputs in the
|
||
/// helper. Semantics: `vD <- -((vA * vB) - vD) = vD - vA*vB`.
|
||
#[test]
|
||
fn vnmsubfp128_flushes_denormal_inputs() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
let denorm = f32::from_bits(1);
|
||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
|
||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([1.0f32; 4]);
|
||
// vnmsubfp128 vD=v2, vA=v2, vB=v3: key2 = 0b010101 (21) encoded
|
||
// via bits 22-25 = 0101 and bit 27 = 1.
|
||
let raw: u32 = 0x1440_1950;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// Without flush: denorm - denorm*1.0 = 0 (but the intermediate
|
||
// values propagate subnormals through the compute); with flush
|
||
// everything is 0 cleanly.
|
||
assert_eq!(ctx.vr[2].as_f32x4(), [0.0f32; 4]);
|
||
}
|
||
|
||
/// VMX128 `vmsum4fp128 vD, vA, vB` computes the 4-lane dot product
|
||
/// and broadcasts the result. Canary
|
||
/// `ppc_emit_altivec.cc:1077-1084` flushes the *output* denormal
|
||
/// (not the inputs). A dot product that sums to a subnormal must
|
||
/// read back as 0.
|
||
#[test]
|
||
fn vmsum4fp128_flushes_denormal_output() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
let denorm = f32::from_bits(1);
|
||
// Dot product = denorm * 1.0 + 0 + 0 + 0 = denorm.
|
||
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([denorm, 0.0, 0.0, 0.0]);
|
||
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([1.0f32, 0.0, 0.0, 0.0]);
|
||
// vmsum4fp128 vD=v2, vA=v2, vB=v3: key2 = 0b011101 (29).
|
||
let raw: u32 = 0x1440_19D0;
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// Subnormal output must flush to 0 and broadcast across all lanes.
|
||
assert_eq!(ctx.vr[2].as_f32x4(), [0.0f32; 4]);
|
||
}
|
||
|
||
// ---------- Phase 2k: lve*x / stve*x element masking ----------
|
||
|
||
#[test]
|
||
fn lvebx_loads_byte_into_ea_slot() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
mem.write_u8(0x1003, 0xAB);
|
||
ctx.gpr[4] = 0x1003;
|
||
ctx.gpr[5] = 0;
|
||
// lvebx v1, r4, r5 : (31<<26) | (1<<21) | (4<<16) | (5<<11) | (7<<1)
|
||
let raw = (31u32 << 26) | (1 << 21) | (4 << 16) | (5 << 11) | (7 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
let bytes = ctx.vr[1].as_bytes();
|
||
// Byte at slot 3 (EA & 0xF = 3)
|
||
assert_eq!(bytes[3], 0xAB);
|
||
// Other bytes zero
|
||
for i in 0..16 {
|
||
if i != 3 { assert_eq!(bytes[i], 0, "byte {} should be zero", i); }
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn stvewx_stores_only_word_slot() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// Prepare surrounding memory with a sentinel value so we can check non-overwrite.
|
||
for i in 0..16 {
|
||
mem.write_u8(0x1000 + i, 0x55);
|
||
}
|
||
// vS lanes: (big-endian view) word0=0xDEADBEEF, word1..3=0.
|
||
let mut src = [0u8; 16];
|
||
src[0] = 0xDE; src[1] = 0xAD; src[2] = 0xBE; src[3] = 0xEF;
|
||
ctx.vr[1] = xenia_types::Vec128::from_bytes(src);
|
||
// EA = 0x1000 (slot 0): store word0 at 0x1000.
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
// stvewx v1, r4, r5 : (31<<26) | (1<<21) | (4<<16) | (5<<11) | (199<<1)
|
||
let raw = (31u32 << 26) | (1 << 21) | (4 << 16) | (5 << 11) | (199 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(mem.read_u32(0x1000), 0xDEAD_BEEF);
|
||
// Adjacent bytes untouched (still 0x55).
|
||
for i in 4..16 {
|
||
assert_eq!(mem.data[0x1000 + i as usize].get(), 0x55, "byte {} was overwritten", 0x1000+i);
|
||
}
|
||
}
|
||
|
||
// ---------- Phase 2l: reservation cache-line granule ----------
|
||
|
||
#[test]
|
||
fn stwcx_succeeds_within_same_cache_line() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
mem.write_u32(0x1004, 0xAAAA_AAAA);
|
||
ctx.gpr[4] = 0x1004;
|
||
ctx.gpr[5] = 0;
|
||
// lwarx r3, r4, r5 : (31<<26)|(3<<21)|(4<<16)|(5<<11)|(20<<1)
|
||
let ld = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, ld);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation);
|
||
// Now stwcx. to a DIFFERENT address in the SAME cache line (offset within 128 bytes).
|
||
ctx.gpr[4] = 0x1008; // 4 bytes over; same line.
|
||
ctx.gpr[6] = 0xBBBB_BBBB;
|
||
let st = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 4, st);
|
||
step(&mut ctx, &mut mem);
|
||
// Matches cache line → succeeds.
|
||
assert!(ctx.cr[0].eq);
|
||
assert_eq!(mem.read_u32(0x1008), 0xBBBB_BBBB);
|
||
}
|
||
|
||
#[test]
|
||
fn stwcx_fails_across_cache_lines() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
let ld = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, ld);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// Different cache line (0x1080).
|
||
ctx.gpr[4] = 0x1080;
|
||
ctx.gpr[6] = 0xCCCC_CCCC;
|
||
let st = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 4, st);
|
||
step(&mut ctx, &mut mem);
|
||
assert!(!ctx.cr[0].eq, "should fail across cache line");
|
||
assert_eq!(mem.read_u32(0x1080), 0, "memory not written on failure");
|
||
}
|
||
|
||
// ---------- PPCBUG-107/140: invalidate_for_write via plain stw ----------
|
||
|
||
/// PPCBUG-107/140: A plain `stw` to a reserved line must invalidate the
|
||
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
|
||
#[test]
|
||
fn lwarx_then_plain_stw_invalidates_reservation() {
|
||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||
table.enable();
|
||
|
||
let mut ctx = PpcContext::new();
|
||
ctx.reservation_table = Some(table.clone());
|
||
ctx.hw_id = 0;
|
||
let mut mem = TestMem::new();
|
||
|
||
// Set up registers: r4=0x1000 (target addr), r5=0 (index), r6=plain store val, r7=stwcx val.
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
ctx.gpr[6] = 0xBBBB_BBBB;
|
||
ctx.gpr[7] = 0xCCCC_CCCC;
|
||
|
||
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
|
||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, lwarx);
|
||
// Instr 1: stw r6, 0(r4) (opcode 36, D-form)
|
||
let stw_plain = (36u32 << 26) | (6 << 21) | (4 << 16) | 0;
|
||
write_instr(&mut mem, 4, stw_plain);
|
||
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
|
||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 8, stwcx);
|
||
|
||
// Execute lwarx — reserves 0x1000's cache line.
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||
|
||
// Execute plain stw — must call invalidate_for_write and clear the reservation.
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(mem.read_u32(0x1000), 0xBBBB_BBBB, "plain stw must land");
|
||
|
||
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
|
||
step(&mut ctx, &mut mem);
|
||
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stw");
|
||
// Memory must still hold the value from the plain stw, not from stwcx..
|
||
assert_eq!(mem.read_u32(0x1000), 0xBBBB_BBBB, "stwcx. must not overwrite on failure");
|
||
}
|
||
|
||
/// Regression: without any intervening store, `lwarx` + `stwcx.` must still
|
||
/// succeed (CR0.EQ=1). Ensures the fix didn't accidentally break the happy path.
|
||
#[test]
|
||
fn lwarx_then_stwcx_succeeds_without_intervening_store() {
|
||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||
table.enable();
|
||
|
||
let mut ctx = PpcContext::new();
|
||
ctx.reservation_table = Some(table.clone());
|
||
ctx.hw_id = 0;
|
||
let mut mem = TestMem::new();
|
||
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
ctx.gpr[7] = 0xDEAD_BEEF;
|
||
|
||
// Instr 0: lwarx r3, r4, r5
|
||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, lwarx);
|
||
// Instr 1: stwcx. r7, r4, r5
|
||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 4, stwcx);
|
||
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.cr[0].eq, "stwcx. must succeed when reservation is intact");
|
||
assert_eq!(mem.read_u32(0x1000), 0xDEAD_BEEF, "stwcx. must write on success");
|
||
}
|
||
|
||
// ---------- PPCBUG-130: invalidate_for_write via plain stb ----------
|
||
|
||
/// PPCBUG-130: A plain `stb` to a reserved line must invalidate the
|
||
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
|
||
#[test]
|
||
fn lwarx_then_plain_stb_invalidates_reservation() {
|
||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||
table.enable();
|
||
|
||
let mut ctx = PpcContext::new();
|
||
ctx.reservation_table = Some(table.clone());
|
||
ctx.hw_id = 0;
|
||
let mut mem = TestMem::new();
|
||
|
||
// r4=0x1000 (target addr), r5=0 (index), r6=byte store val, r7=stwcx val.
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
ctx.gpr[6] = 0xAB;
|
||
ctx.gpr[7] = 0xCCCC_CCCC;
|
||
|
||
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
|
||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, lwarx);
|
||
// Instr 1: stb r6, 0(r4) (opcode 38, D-form)
|
||
let stb_plain = (38u32 << 26) | (6 << 21) | (4 << 16) | 0;
|
||
write_instr(&mut mem, 4, stb_plain);
|
||
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
|
||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 8, stwcx);
|
||
|
||
// Execute lwarx — reserves 0x1000's cache line.
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||
|
||
// Execute plain stb — must call invalidate_for_write and clear the reservation.
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(mem.read_u8(0x1000), 0xAB, "plain stb must land");
|
||
|
||
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
|
||
step(&mut ctx, &mut mem);
|
||
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stb");
|
||
assert_eq!(mem.read_u8(0x1000), 0xAB, "stwcx. must not overwrite on failure");
|
||
}
|
||
|
||
// ---------- PPCBUG-150: invalidate_for_write via plain std ----------
|
||
|
||
/// PPCBUG-150: A plain `std` to a reserved line must invalidate the
|
||
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
|
||
#[test]
|
||
fn lwarx_then_plain_std_invalidates_reservation() {
|
||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||
table.enable();
|
||
|
||
let mut ctx = PpcContext::new();
|
||
ctx.reservation_table = Some(table.clone());
|
||
ctx.hw_id = 0;
|
||
let mut mem = TestMem::new();
|
||
|
||
// r4=0x1000 (target addr), r5=0 (index), r6=doubleword store val, r7=stwcx val.
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
ctx.gpr[6] = 0xDEADBEEF_CAFEBABEu64;
|
||
ctx.gpr[7] = 0xCCCC_CCCC;
|
||
|
||
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
|
||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, lwarx);
|
||
// Instr 1: std r6, 0(r4) (opcode 62, DS-form, XO=0b00)
|
||
let std_plain = (62u32 << 26) | (6 << 21) | (4 << 16) | 0;
|
||
write_instr(&mut mem, 4, std_plain);
|
||
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
|
||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 8, stwcx);
|
||
|
||
// Execute lwarx — reserves 0x1000's cache line.
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||
|
||
// Execute plain std — must call invalidate_for_write and clear the reservation.
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "plain std must land");
|
||
|
||
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
|
||
step(&mut ctx, &mut mem);
|
||
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain std");
|
||
assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "stwcx. must not overwrite on failure");
|
||
}
|
||
|
||
// ---------- PPCBUG-160: stmw multi-line invalidation ----------
|
||
|
||
/// PPCBUG-160: `stmw r28, 0x78(r4)` with r4=0x1000 writes four words at
|
||
/// 0x1078, 0x107C, 0x1080, 0x1084 — crossing the cache-line boundary at
|
||
/// 0x1080 (RESERVATION_MASK=0x7F, line size=128 bytes).
|
||
///
|
||
/// A reservation on the *second* line (0x1080) must be invalidated even
|
||
/// though the store starts in the first line (0x1000-0x107F). This
|
||
/// verifies the multi-line loop added to the stmw arm.
|
||
#[test]
|
||
fn lwarx_then_plain_stmw_spans_two_lines_and_invalidates() {
|
||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||
table.enable();
|
||
|
||
let mut ctx = PpcContext::new();
|
||
ctx.reservation_table = Some(table.clone());
|
||
ctx.hw_id = 0;
|
||
let mut mem = TestMem::new();
|
||
|
||
// r4=0x1080 (reservation address — second line), r5=0 (index for lwarx/stwcx.)
|
||
// r8=0x1000 (stmw base), r28-r31 = store values
|
||
ctx.gpr[4] = 0x1080;
|
||
ctx.gpr[5] = 0;
|
||
ctx.gpr[8] = 0x1000;
|
||
ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value
|
||
ctx.gpr[28] = 0xAAAA_0001;
|
||
ctx.gpr[29] = 0xBBBB_0002;
|
||
ctx.gpr[30] = 0xCCCC_0003;
|
||
ctx.gpr[31] = 0xDDDD_0004;
|
||
|
||
// Instr 0: lwarx r3, r4, r5 — reserves line containing 0x1080
|
||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, lwarx);
|
||
// Instr 1: stmw r28, 0x78(r8) — writes 4 words at 0x1078,0x107C,0x1080,0x1084
|
||
// opcode=47, rs=28, ra=8, d=0x0078
|
||
let stmw = (47u32 << 26) | (28 << 21) | (8 << 16) | 0x0078;
|
||
write_instr(&mut mem, 4, stmw);
|
||
// Instr 2: stwcx. r7, r4, r5 — should fail (line 2 reservation invalidated)
|
||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 8, stwcx);
|
||
|
||
// Execute lwarx — reserves 0x1080's cache line.
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||
|
||
// Execute stmw — must invalidate both lines including the one reserved at 0x1080.
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(mem.read_u32(0x1078), 0xAAAA_0001, "stmw word 0 must land");
|
||
assert_eq!(mem.read_u32(0x107C), 0xBBBB_0002, "stmw word 1 must land");
|
||
assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stmw word 2 (line 2) must land");
|
||
assert_eq!(mem.read_u32(0x1084), 0xDDDD_0004, "stmw word 3 must land");
|
||
|
||
// Execute stwcx. — reservation on line 2 (0x1080) was invalidated; must fail.
|
||
step(&mut ctx, &mut mem);
|
||
assert!(!ctx.cr[0].eq, "stwcx. must fail: stmw invalidated the reservation on line 2");
|
||
assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stwcx. must not overwrite on failure");
|
||
}
|
||
|
||
// ---------- PPCBUG-167: invalidate_for_write via plain stfd ----------
|
||
|
||
/// PPCBUG-167: A plain `stfd` to a reserved line must invalidate the
|
||
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
|
||
/// Also verifies big-endian byte layout of the stored double.
|
||
#[test]
|
||
fn lwarx_then_plain_stfd_invalidates_reservation() {
|
||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||
table.enable();
|
||
|
||
let mut ctx = PpcContext::new();
|
||
ctx.reservation_table = Some(table.clone());
|
||
ctx.hw_id = 0;
|
||
let mut mem = TestMem::new();
|
||
|
||
// r4=0x1000 (target addr), r5=0 (index), r7=stwcx val.
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
ctx.gpr[7] = 0xCCCC_CCCC;
|
||
// FPR 5 holds a specific bit pattern.
|
||
ctx.fpr[5] = f64::from_bits(0xCAFEBABE_DEADBEEFu64);
|
||
|
||
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
|
||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, lwarx);
|
||
// Instr 1: stfd f5, 0(r4) (opcode 54, D-form)
|
||
let stfd_plain = (54u32 << 26) | (5 << 21) | (4 << 16) | 0;
|
||
write_instr(&mut mem, 4, stfd_plain);
|
||
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
|
||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 8, stwcx);
|
||
|
||
// Execute lwarx — reserves 0x1000's cache line.
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||
|
||
// Execute plain stfd — must call invalidate_for_write and clear the reservation.
|
||
step(&mut ctx, &mut mem);
|
||
// write_f64 delegates to write_u64, which writes big-endian; verify layout.
|
||
assert_eq!(
|
||
mem.read_u64(0x1000),
|
||
0xCAFEBABE_DEADBEEFu64,
|
||
"stfd must store FPR bit pattern in big-endian order"
|
||
);
|
||
|
||
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
|
||
step(&mut ctx, &mut mem);
|
||
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stfd");
|
||
assert_eq!(mem.read_u64(0x1000), 0xCAFEBABE_DEADBEEFu64, "stwcx. must not overwrite on failure");
|
||
}
|
||
|
||
// ---------- Phase 2m: SPR DEC + TBL/TBU write ----------
|
||
|
||
#[test]
|
||
fn mfspr_dec_returns_dec_field() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.dec = 0x1234_5678;
|
||
// mfspr r3, DEC (22). SPR encoded with halves swapped: (22 & 0x1F)<<5 | (22>>5)&0x1F = 0x2C0 in bits 11..20.
|
||
// The decoder does the un-swap, so the raw SPR field stores the swapped form.
|
||
let spr_swapped = ((22u32 & 0x1F) << 5) | ((22u32 >> 5) & 0x1F);
|
||
let raw = (31u32 << 26) | (3 << 21) | (spr_swapped << 11) | (339 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.gpr[3], 0x1234_5678);
|
||
}
|
||
|
||
#[test]
|
||
fn mtspr_tbl_write_updates_low_half() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.timebase = 0xAAAA_BBBB_CCCC_DDDD;
|
||
ctx.gpr[3] = 0x1111_2222;
|
||
// mtspr TBL_WRITE (284), r3
|
||
let spr_swapped = ((284u32 & 0x1F) << 5) | ((284u32 >> 5) & 0x1F);
|
||
let raw = (31u32 << 26) | (3 << 21) | (spr_swapped << 11) | (467 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// step() post-increments timebase by 1, so the observable low half is value+1.
|
||
assert_eq!(ctx.timebase & 0xFFFF_FFFF, 0x1111_2222u64 + 1);
|
||
assert_eq!(ctx.timebase >> 32, 0xAAAA_BBBB);
|
||
}
|
||
|
||
// PPCBUG-053: bcx CTR zero-test must use 32-bit comparison. When prior
|
||
// 64-bit pollution (e.g. via negx → mtctr) leaves CTR upper 32 bits
|
||
// non-zero, the 64-bit `ctx.ctr != 0` would loop forever even when the
|
||
// 32-bit counter has decremented to zero.
|
||
#[test]
|
||
fn bcx_bdnz_uses_32bit_ctr_compare() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.ctr = 0x0000_0001_0000_0001;
|
||
// bdnz +8: BO=16 (decrement, branch if CTR!=0, ignore CR), BI=0, BD/4=2
|
||
let raw = (16u32 << 26) | (16 << 21) | (0 << 16) | (2 << 2);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// After decrement: low 32 = 0, high 32 = 1. 32-bit test says zero → no branch.
|
||
assert_eq!(ctx.ctr, 0x0000_0001_0000_0000);
|
||
assert_eq!(ctx.pc, 4);
|
||
}
|
||
|
||
#[test]
|
||
fn bclrx_uses_32bit_ctr_compare() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.ctr = 0x0000_0001_0000_0001;
|
||
ctx.lr = 0x100;
|
||
// bdnzlr: opcode 19, BO=16 (decrement, branch if CTR!=0), BI=0, XO=16
|
||
let raw = (19u32 << 26) | (16 << 21) | (0 << 16) | (16 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// 32-bit CTR=0 after decrement → don't branch to LR.
|
||
assert_eq!(ctx.ctr, 0x0000_0001_0000_0000);
|
||
assert_eq!(ctx.pc, 4);
|
||
}
|
||
|
||
// PPCBUG-054: mtspr CTR must truncate the source GPR to 32 bits, matching
|
||
// canary's `f.Truncate(ctr, INT32_TYPE)`. Prevents upstream 64-bit GPR
|
||
// pollution from poisoning the 32-bit CTR counter independently of the
|
||
// bcx zero-test fix.
|
||
#[test]
|
||
fn mtspr_ctr_truncates_to_32_bits() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
ctx.gpr[3] = 0xFFFF_FFFF_8000_0001;
|
||
// mtspr CTR (9), r3
|
||
let spr_swapped = ((9u32 & 0x1F) << 5) | ((9u32 >> 5) & 0x1F);
|
||
let raw = (31u32 << 26) | (3 << 21) | (spr_swapped << 11) | (467 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(ctx.ctr, 0x8000_0001);
|
||
}
|
||
|
||
// ---------- Block-cache parity tests ----------
|
||
//
|
||
// These confirm that running a program through the basic-block
|
||
// cache (crate::block_cache::BlockCache + step_block) produces a
|
||
// bit-identical PpcContext to running it through step_cached
|
||
// (per-instruction). If this ever fails the block cache is not
|
||
// safe to engage in production.
|
||
|
||
fn enc_addi_t(rd: u32, ra: u32, simm: i16) -> u32 {
|
||
(14 << 26) | (rd << 21) | (ra << 16) | (simm as u16 as u32)
|
||
}
|
||
fn enc_lwz_t(rd: u32, ra: u32, d: i16) -> u32 {
|
||
(32 << 26) | (rd << 21) | (ra << 16) | (d as u16 as u32)
|
||
}
|
||
fn enc_stw_t(rs: u32, ra: u32, d: i16) -> u32 {
|
||
(36 << 26) | (rs << 21) | (ra << 16) | (d as u16 as u32)
|
||
}
|
||
fn enc_b_t(li_words: i32) -> u32 {
|
||
// Branch: opcode 18, AA=0, LK=0, LI = li_words << 2 (signed).
|
||
let li = (li_words as u32) & 0x00FF_FFFF;
|
||
(18u32 << 26) | (li << 2)
|
||
}
|
||
|
||
/// Snapshot of the parts of `PpcContext` that block_matches_per_instr
|
||
/// is asked to keep identical between dispatch paths. Comparing the
|
||
/// whole struct is impractical (vector regs, fp regs, large arrays);
|
||
/// the GPR file + pc + lr + cr + cycle counters cover everything the
|
||
/// interpreter touches in the test programs below.
|
||
#[derive(Debug, PartialEq, Eq)]
|
||
struct CtxSnap {
|
||
gpr: [u64; 32],
|
||
pc: u32,
|
||
lr: u64,
|
||
ctr: u64,
|
||
cycle_count: u64,
|
||
cr_packed: u32,
|
||
}
|
||
impl CtxSnap {
|
||
fn from(ctx: &PpcContext) -> Self {
|
||
Self {
|
||
gpr: ctx.gpr,
|
||
pc: ctx.pc,
|
||
lr: ctx.lr,
|
||
ctr: ctx.ctr,
|
||
cycle_count: ctx.cycle_count,
|
||
cr_packed: ctx.cr(),
|
||
}
|
||
}
|
||
}
|
||
|
||
fn run_per_instruction(prog: &[u32], iters: u32, init_gpr: &[(usize, u64)]) -> CtxSnap {
|
||
let mut ctx = PpcContext::new();
|
||
for &(i, v) in init_gpr {
|
||
ctx.gpr[i] = v;
|
||
}
|
||
let mut mem = TestMem::new();
|
||
for (i, &raw) in prog.iter().enumerate() {
|
||
write_instr(&mut mem, (i as u32) * 4, raw);
|
||
}
|
||
let mut cache = crate::decoder::DecodeCache::new();
|
||
ctx.pc = 0;
|
||
for _ in 0..iters {
|
||
// Run one instruction at a time. Memory has constant
|
||
// page_version (default trait impl returns 1) so the cache
|
||
// entries stay valid forever.
|
||
let r = step_cached(&mut ctx, &mut mem, &mut cache, 1);
|
||
assert!(matches!(r, StepResult::Continue));
|
||
}
|
||
CtxSnap::from(&ctx)
|
||
}
|
||
|
||
fn run_block(prog: &[u32], iters: u32, init_gpr: &[(usize, u64)]) -> CtxSnap {
|
||
let mut ctx = PpcContext::new();
|
||
for &(i, v) in init_gpr {
|
||
ctx.gpr[i] = v;
|
||
}
|
||
let mut mem = TestMem::new();
|
||
for (i, &raw) in prog.iter().enumerate() {
|
||
write_instr(&mut mem, (i as u32) * 4, raw);
|
||
}
|
||
let mut bc = crate::block_cache::BlockCache::new();
|
||
ctx.pc = 0;
|
||
let mut total_steps = 0u32;
|
||
// Iterate by *blocks* until we've covered at least `iters`
|
||
// instructions. The block path runs N instructions per call
|
||
// where N is the block length; we still want to compare on a
|
||
// per-instruction footing, so accumulate cycle_count.
|
||
while total_steps < iters {
|
||
// Borrow bc only long enough to copy the slice we need —
|
||
// step_block needs &mut MemoryAccess so we can't hold a
|
||
// shared borrow on bc across the call.
|
||
let block_ptr: *const crate::block_cache::DecodedBlock = {
|
||
let b: &crate::block_cache::DecodedBlock = bc.lookup_or_build(ctx.pc, &mem);
|
||
b
|
||
};
|
||
// Safety: the BlockCache::lookup_or_build contract is that
|
||
// the returned reference stays valid until the next
|
||
// lookup_or_build on the same cache. We don't call
|
||
// lookup_or_build inside step_block and we drop the raw
|
||
// pointer at the end of the iteration, so no aliasing.
|
||
let block: &crate::block_cache::DecodedBlock = unsafe { &*block_ptr };
|
||
let n_before = ctx.cycle_count;
|
||
let r = step_block(&mut ctx, &mut mem, block);
|
||
assert!(matches!(r, StepResult::Continue));
|
||
let stepped = (ctx.cycle_count - n_before) as u32;
|
||
total_steps += stepped;
|
||
}
|
||
CtxSnap::from(&ctx)
|
||
}
|
||
|
||
#[test]
|
||
fn block_dispatch_matches_per_instruction_alu_loop() {
|
||
// 4-instruction loop: r3 += 1, r3 += 2, r3 += 3, b -12 (back to start).
|
||
let prog = [
|
||
enc_addi_t(3, 3, 1),
|
||
enc_addi_t(3, 3, 2),
|
||
enc_addi_t(3, 3, 3),
|
||
enc_b_t(-3), // -3 words → back to instr 0
|
||
];
|
||
let init = [(3usize, 0u64)];
|
||
let snap_a = run_per_instruction(&prog, 100, &init);
|
||
let snap_b = run_block(&prog, 100, &init);
|
||
assert_eq!(snap_a, snap_b);
|
||
}
|
||
|
||
#[test]
|
||
fn block_dispatch_matches_per_instruction_loadstore_loop() {
|
||
// r4 = 0x800 (data pointer), r3 = 1
|
||
// loop:
|
||
// stw r3, 0(r4)
|
||
// lwz r5, 0(r4)
|
||
// addi r3, r5, 1
|
||
// b -12
|
||
let prog = [
|
||
enc_stw_t(3, 4, 0),
|
||
enc_lwz_t(5, 4, 0),
|
||
enc_addi_t(3, 5, 1),
|
||
enc_b_t(-3),
|
||
];
|
||
let init = [(3usize, 1u64), (4usize, 0x800u64)];
|
||
let snap_a = run_per_instruction(&prog, 200, &init);
|
||
let snap_b = run_block(&prog, 200, &init);
|
||
assert_eq!(snap_a, snap_b);
|
||
}
|
||
|
||
#[test]
|
||
fn mcrfs_moves_fpscr_nibble_and_clears_exception_bits() {
|
||
let mut ctx = PpcContext::new();
|
||
let mut mem = TestMem::new();
|
||
// Set FPSCR bit 0 (FX) = 1 and bit 3 (OX) = 1. In our layout:
|
||
// FX at (31-0) = 31
|
||
// OX at (31-3) = 28
|
||
ctx.fpscr = (1u32 << 31) | (1u32 << 28);
|
||
// mcrfs crfD=2, crfS=0: (63 << 26) | (crfD<<23) | (crfS<<18) | (64<<1)
|
||
let raw = (63 << 26) | (2 << 23) | (0 << 18) | (64 << 1);
|
||
write_instr(&mut mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
// FPSCR bits 0..3 of crfS=0 → ctx.cr[2] should have FX(lt)=1 and OX(so)=0
|
||
// and the FEX/VX nibble positions are bits 1,2
|
||
// Nibble contents: FX=1, FEX=0, VX=0, OX=1 → 0b1001 = 9
|
||
assert_eq!(ctx.cr[2].as_u8(), 0b1001);
|
||
// FX and OX are clearable → FPSCR now has those nibble bits cleared
|
||
assert_eq!(ctx.fpscr & (1 << 31), 0, "FX cleared");
|
||
assert_eq!(ctx.fpscr & (1 << 28), 0, "OX cleared");
|
||
}
|
||
|
||
/// Regression: `subfze` is `RT ← !RA + CA` (no -1 term), so 64-bit
|
||
/// carry-out only happens when `RA == 0 && CA == 1`. The previous
|
||
/// predicate (`!ra != 0 || ca != 0`) was copy-pasted from `subfme`
|
||
/// and reported CA=1 in nearly every case.
|
||
#[test]
|
||
fn test_subfze_carry_only_when_ra_zero_and_ca_one() {
|
||
// subfze rD, rA: opcode 31, XO=200 (bits 22-30), OE=0, Rc=0.
|
||
// Encoding: (31<<26) | (rd<<21) | (ra<<16) | (200<<1)
|
||
let raw = (31u32 << 26) | (3 << 21) | (4 << 16) | (200 << 1);
|
||
|
||
// Case 1: ra=0, ca=1 → CA=1 (the only carry case)
|
||
{
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
write_instr(&mem, 0, raw);
|
||
ctx.pc = 0;
|
||
ctx.gpr[4] = 0;
|
||
ctx.xer_ca = 1;
|
||
step(&mut ctx, &mem);
|
||
assert_eq!(ctx.xer_ca, 1, "ra=0, ca=1 should produce CA=1");
|
||
assert_eq!(ctx.gpr[3], 0, "result = !0 + 1 = 0 (wraps)");
|
||
}
|
||
// Case 2: ra=0, ca=0 → CA=0 (old buggy code reported CA=1)
|
||
{
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
write_instr(&mem, 0, raw);
|
||
ctx.pc = 0;
|
||
ctx.gpr[4] = 0;
|
||
ctx.xer_ca = 0;
|
||
step(&mut ctx, &mem);
|
||
assert_eq!(ctx.xer_ca, 0, "ra=0, ca=0 should produce CA=0");
|
||
// PPCBUG-018: 32-bit ABI. !0u32 + 0 = u32::MAX, with upper 32 bits zero.
|
||
assert_eq!(ctx.gpr[3], 0xFFFF_FFFFu64, "result = !0u32 + 0 = u32::MAX");
|
||
}
|
||
// Case 3: ra=1, ca=0 → CA=0 (old buggy code reported CA=1)
|
||
{
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
write_instr(&mem, 0, raw);
|
||
ctx.pc = 0;
|
||
ctx.gpr[4] = 1;
|
||
ctx.xer_ca = 0;
|
||
step(&mut ctx, &mem);
|
||
assert_eq!(ctx.xer_ca, 0, "ra=1, ca=0 should produce CA=0");
|
||
// PPCBUG-018: 32-bit ABI. !1u32 + 0 = u32::MAX - 1, with upper 32 bits zero.
|
||
assert_eq!(ctx.gpr[3], 0xFFFF_FFFEu64, "result = !1u32 + 0 = u32::MAX - 1");
|
||
}
|
||
// Case 4: ra=u32::MAX, ca=1 → CA=0; result = !u32::MAX + 1 = 1.
|
||
{
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
write_instr(&mem, 0, raw);
|
||
ctx.pc = 0;
|
||
ctx.gpr[4] = 0xFFFF_FFFFu64;
|
||
ctx.xer_ca = 1;
|
||
step(&mut ctx, &mem);
|
||
assert_eq!(ctx.xer_ca, 0, "ra=u32::MAX, ca=1 should produce CA=0");
|
||
assert_eq!(ctx.gpr[3], 1, "result = !u32::MAX + 1 = 1");
|
||
}
|
||
}
|
||
|
||
/// Regression: `cmp` (L=1) must not derive LT/GT from the sign of a
|
||
/// (potentially overflowing) 64-bit subtract. The old code used
|
||
/// `update_cr_signed(bf, ra.wrapping_sub(rb))` which mis-signed the
|
||
/// result for boundary i64 values like `ra=i64::MIN, rb=1`.
|
||
#[test]
|
||
fn test_cmp_signed_at_i64_boundaries() {
|
||
// cmp BF=0, L=1, RA, RB: (31<<26) | (1<<21) | (ra<<16) | (rb<<11)
|
||
// (XO=0; Rc field is reserved on cmp, leave 0)
|
||
let raw = |ra: u32, rb: u32| (31u32 << 26) | (1 << 21) | (ra << 16) | (rb << 11);
|
||
|
||
// i64::MIN < 1 → LT must be set
|
||
{
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
write_instr(&mem, 0, raw(3, 4));
|
||
ctx.pc = 0;
|
||
ctx.gpr[3] = i64::MIN as u64;
|
||
ctx.gpr[4] = 1;
|
||
step(&mut ctx, &mem);
|
||
assert!(ctx.cr[0].lt, "i64::MIN < 1 must be LT");
|
||
assert!(!ctx.cr[0].gt);
|
||
assert!(!ctx.cr[0].eq);
|
||
}
|
||
// i64::MAX > -1 → GT must be set (the symmetric overflow corner)
|
||
{
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
write_instr(&mem, 0, raw(3, 4));
|
||
ctx.pc = 0;
|
||
ctx.gpr[3] = i64::MAX as u64;
|
||
ctx.gpr[4] = (-1i64) as u64;
|
||
step(&mut ctx, &mem);
|
||
assert!(!ctx.cr[0].lt);
|
||
assert!(ctx.cr[0].gt, "i64::MAX > -1 must be GT");
|
||
assert!(!ctx.cr[0].eq);
|
||
}
|
||
// Equal at the extreme is still EQ
|
||
{
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
write_instr(&mem, 0, raw(3, 4));
|
||
ctx.pc = 0;
|
||
ctx.gpr[3] = i64::MIN as u64;
|
||
ctx.gpr[4] = i64::MIN as u64;
|
||
step(&mut ctx, &mem);
|
||
assert!(!ctx.cr[0].lt);
|
||
assert!(!ctx.cr[0].gt);
|
||
assert!(ctx.cr[0].eq, "i64::MIN == i64::MIN must be EQ");
|
||
}
|
||
}
|
||
|
||
// ---------- PPCBUG-511/513: invalidate_for_write via VMX stores ----------
|
||
|
||
/// PPCBUG-511: A plain `stvx` to a reserved line must invalidate the
|
||
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
|
||
#[test]
|
||
fn lwarx_then_plain_stvx_invalidates_reservation() {
|
||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||
table.enable();
|
||
|
||
let mut ctx = PpcContext::new();
|
||
ctx.reservation_table = Some(table.clone());
|
||
ctx.hw_id = 0;
|
||
let mut mem = TestMem::new();
|
||
|
||
// r4=0x1000 (reservation + store address), r5=0 (index for lwarx/stwcx.), r7=stwcx val.
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
ctx.gpr[7] = 0xCCCC_CCCC;
|
||
// VR 0: recognizable pattern to confirm the store lands.
|
||
ctx.vr[0] = xenia_types::Vec128::from_bytes([0xAA; 16]);
|
||
|
||
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
|
||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, lwarx);
|
||
// Instr 1: stvx v0, r0, r4 (opcode 31, XO 231; rA=0 → base=0, EA = 0 + r4 = 0x1000, aligned)
|
||
// (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=4<<11) | (231<<1)
|
||
let stvx = (31u32 << 26) | (0 << 21) | (0 << 16) | (4 << 11) | (231 << 1);
|
||
write_instr(&mut mem, 4, stvx);
|
||
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
|
||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 8, stwcx);
|
||
|
||
// Execute lwarx — reserves 0x1000's cache line.
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||
|
||
// Execute stvx — must call invalidate_for_write and clear the reservation.
|
||
step(&mut ctx, &mut mem);
|
||
assert_eq!(mem.read_u8(0x1000), 0xAA, "stvx must write the VR bytes");
|
||
|
||
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
|
||
step(&mut ctx, &mut mem);
|
||
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvx");
|
||
assert_eq!(mem.read_u8(0x1000), 0xAA, "stwcx. must not overwrite on failure");
|
||
}
|
||
|
||
/// PPCBUG-513: A plain `stvlx` to a reserved line must invalidate the
|
||
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
|
||
/// stvlx with EA=0x1003 writes bytes 0x1003-0x100F (13 bytes from VR0's high lanes).
|
||
#[test]
|
||
fn lwarx_then_plain_stvlx_invalidates_reservation() {
|
||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||
table.enable();
|
||
|
||
let mut ctx = PpcContext::new();
|
||
ctx.reservation_table = Some(table.clone());
|
||
ctx.hw_id = 0;
|
||
let mut mem = TestMem::new();
|
||
|
||
// Reserve at 0x1000 (same cache line as the stvlx target 0x1003).
|
||
ctx.gpr[4] = 0x1000; // lwarx/stwcx. reservation address
|
||
ctx.gpr[5] = 0; // index register (0 for lwarx/stwcx.)
|
||
ctx.gpr[6] = 0x1003; // stvlx EA: rb=6, ra=0 → ea = 0 + 0x1003 = 0x1003
|
||
ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value
|
||
// VR 0: recognizable pattern.
|
||
ctx.vr[0] = xenia_types::Vec128::from_bytes([0xBB; 16]);
|
||
|
||
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
|
||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, lwarx);
|
||
// Instr 1: stvlx v0, r0, r6 (opcode 31, XO 647; rA=0 → base=0, EA = r6 = 0x1003)
|
||
// store_vector_left writes shift=3 skipped bytes, then bytes 3..15 of VR0 → 0x1003..0x100F
|
||
// (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=6<<11) | (647<<1)
|
||
let stvlx = (31u32 << 26) | (0 << 21) | (0 << 16) | (6 << 11) | (647 << 1);
|
||
write_instr(&mut mem, 4, stvlx);
|
||
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
|
||
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 8, stwcx);
|
||
|
||
// Execute lwarx — reserves 0x1000's cache line.
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||
|
||
// Execute stvlx — must call invalidate_for_write and clear the reservation.
|
||
step(&mut ctx, &mut mem);
|
||
// store_vector_left(ea=0x1003): shift=3, n=13 → writes bytes 0x1003-0x100F = 0xBB.
|
||
assert_eq!(mem.read_u8(0x1003), 0xBB, "stvlx must write VR bytes starting at EA");
|
||
assert_eq!(mem.read_u8(0x100F), 0xBB, "stvlx must write up to (ea & !0xF)+15");
|
||
|
||
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
|
||
step(&mut ctx, &mut mem);
|
||
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvlx");
|
||
}
|
||
|
||
/// Regression: `lvebx` must preserve the prior contents of the
|
||
/// destination VR for lanes other than the loaded byte. Previously
|
||
// ---------- PPCBUG-151: cross-width reservation pairs must fail ----------
|
||
|
||
/// PPCBUG-151: `lwarx` (width=4) followed by `stdcx.` (requires width=8)
|
||
/// must fail with CR0.EQ=0. Memory must remain unchanged.
|
||
#[test]
|
||
fn lwarx_then_stdcx_cross_width_fails() {
|
||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||
table.enable();
|
||
|
||
let mut ctx = PpcContext::new();
|
||
ctx.reservation_table = Some(table.clone());
|
||
ctx.hw_id = 0;
|
||
let mut mem = TestMem::new();
|
||
|
||
// r4=0x1000 (target addr), r5=0 (index), r6=value to (attempt to) store.
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
ctx.gpr[6] = 0xDEAD_BEEF_CAFE_BABEu64;
|
||
|
||
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20, Rc=0)
|
||
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
|
||
write_instr(&mut mem, 0, lwarx);
|
||
// Instr 1: stdcx. r6, r4, r5 (opcode 31, XO 214, Rc=1)
|
||
let stdcx = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (214 << 1) | 1;
|
||
write_instr(&mut mem, 4, stdcx);
|
||
|
||
// Execute lwarx — must set a word reservation (width=4).
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation, "lwarx must set has_reservation");
|
||
assert_eq!(ctx.reservation_width, 4, "lwarx must set reservation_width=4");
|
||
|
||
// Execute stdcx. — width mismatch (needs 8, got 4); must fail.
|
||
step(&mut ctx, &mut mem);
|
||
assert!(!ctx.cr[0].eq, "stdcx. must fail when reservation was set by lwarx (cross-width)");
|
||
// Memory at 0x1000-0x1007 must be unchanged (still zero).
|
||
assert_eq!(mem.read_u64(0x1000), 0, "stdcx. must not write on cross-width failure");
|
||
// Width must be cleared on exit.
|
||
assert_eq!(ctx.reservation_width, 0, "stdcx. must clear reservation_width on exit");
|
||
}
|
||
|
||
/// PPCBUG-151: `ldarx` (width=8) followed by `stwcx.` (requires width=4)
|
||
/// must fail with CR0.EQ=0. Memory must remain unchanged.
|
||
#[test]
|
||
fn ldarx_then_stwcx_cross_width_fails() {
|
||
let table = std::sync::Arc::new(crate::ReservationTable::new());
|
||
table.enable();
|
||
|
||
let mut ctx = PpcContext::new();
|
||
ctx.reservation_table = Some(table.clone());
|
||
ctx.hw_id = 0;
|
||
let mut mem = TestMem::new();
|
||
|
||
// r4=0x1000 (target addr), r5=0 (index), r6=value to (attempt to) store.
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 0;
|
||
ctx.gpr[6] = 0xCCCC_CCCCu64;
|
||
|
||
// Instr 0: ldarx r3, r4, r5 (opcode 31, XO 84, Rc=0)
|
||
let ldarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (84 << 1);
|
||
write_instr(&mut mem, 0, ldarx);
|
||
// Instr 1: stwcx. r6, r4, r5 (opcode 31, XO 150, Rc=1)
|
||
let stwcx = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
|
||
write_instr(&mut mem, 4, stwcx);
|
||
|
||
// Execute ldarx — must set a doubleword reservation (width=8).
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mut mem);
|
||
assert!(ctx.has_reservation, "ldarx must set has_reservation");
|
||
assert_eq!(ctx.reservation_width, 8, "ldarx must set reservation_width=8");
|
||
|
||
// Execute stwcx. — width mismatch (needs 4, got 8); must fail.
|
||
step(&mut ctx, &mut mem);
|
||
assert!(!ctx.cr[0].eq, "stwcx. must fail when reservation was set by ldarx (cross-width)");
|
||
// Memory at 0x1000 must be unchanged (still zero).
|
||
assert_eq!(mem.read_u32(0x1000), 0, "stwcx. must not write on cross-width failure");
|
||
// Width must be cleared on exit.
|
||
assert_eq!(ctx.reservation_width, 0, "stwcx. must clear reservation_width on exit");
|
||
}
|
||
|
||
/// the handler started from a zeroed buffer.
|
||
#[test]
|
||
fn test_lvebx_preserves_other_lanes() {
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
// Pre-seed vr[3] with a recognizable pattern.
|
||
let pattern: [u8; 16] = [
|
||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||
];
|
||
ctx.vr[3] = xenia_types::Vec128::from_bytes(pattern);
|
||
// Place a byte at memory address 0x1004; EA & 0xF == 4 → slot 4.
|
||
mem.write_u8(0x1004, 0xAB);
|
||
// r4 = 0x1000, r5 = 4 → EA = 0x1004
|
||
ctx.gpr[4] = 0x1000;
|
||
ctx.gpr[5] = 4;
|
||
// lvebx vD=3, rA=4, rB=5: opcode 31, XO=7 → (31<<26)|(3<<21)|(4<<16)|(5<<11)|(7<<1)
|
||
let raw = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (7 << 1);
|
||
write_instr(&mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mem);
|
||
// Expected: lane 4 holds 0xAB, every other lane unchanged.
|
||
let mut expected = pattern;
|
||
expected[4] = 0xAB;
|
||
assert_eq!(ctx.vr[3].as_bytes(), expected);
|
||
}
|
||
|
||
// ===== PPCBUG-046 / PPCBUG-561: rldicl / clrldi mb_md fix =====
|
||
|
||
/// Encode rldicl (MD-form, opcode=30, XO=0) in host bit notation.
|
||
/// rs: source register, ra: dest register, sh: shift amount (6-bit),
|
||
/// mb: mask-begin (6-bit), rc: record bit.
|
||
fn encode_rldicl(rs: u32, ra: u32, sh: u32, mb: u32, rc: u32) -> u32 {
|
||
(30 << 26)
|
||
| (rs << 21)
|
||
| (ra << 16)
|
||
| ((sh & 0x1F) << 11)
|
||
| ((mb & 0x1F) << 6)
|
||
| (((mb >> 5) & 1) << 5)
|
||
| (((sh >> 5) & 1) << 1)
|
||
| (rc & 1)
|
||
}
|
||
|
||
#[test]
|
||
fn clrldi_zero_extends_low_32_bits() {
|
||
// clrldi r3, r4, 32 = rldicl r3, r4, 0, 32, 0
|
||
// After PPCBUG-046 fix: mask must be 0x00000000_FFFFFFFF (mb=32 → mask from bit 32 to 63)
|
||
// If mb=32 was decoded as mb=0, the mask would be all-ones and the result would be 0xDEAD_BEEF_CAFE_BABE (no-op)
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
ctx.gpr[4] = 0xDEAD_BEEF_CAFE_BABE_u64;
|
||
let raw = encode_rldicl(4, 3, 0, 32, 0); // sh=0, mb=32
|
||
write_instr(&mem, 0x100, raw);
|
||
ctx.pc = 0x100;
|
||
step(&mut ctx, &mem);
|
||
assert_eq!(ctx.gpr[3], 0x0000_0000_CAFE_BABE, "clrldi must zero-extend low 32 bits");
|
||
}
|
||
|
||
#[test]
|
||
fn rldicl_mb32_leaves_low_32_clean() {
|
||
// Same as above but verify upper 32 are zeroed
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
ctx.gpr[5] = 0xFFFF_FFFF_1234_5678_u64;
|
||
let raw = encode_rldicl(5, 6, 0, 32, 0);
|
||
write_instr(&mem, 0x100, raw);
|
||
ctx.pc = 0x100;
|
||
step(&mut ctx, &mem);
|
||
assert_eq!(ctx.gpr[6], 0x0000_0000_1234_5678_u64);
|
||
}
|
||
|
||
// ===== PPCBUG-275/276/562: vc_rc_bit fix for VC-form vcmpequb =====
|
||
|
||
/// VC-form: opcode=4 (VMX), vD at 6-10, vA at 11-15, vB at 16-20, Rc at PPC bit 21 = host bit 10, XO=6.
|
||
/// vcmpequb.: (4<<26)|(vD<<21)|(vA<<16)|(vB<<11)|(1<<10)|6
|
||
fn encode_vcmpequb_dot(vd: u32, va: u32, vb: u32) -> u32 {
|
||
(4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | (1 << 10) | 6
|
||
}
|
||
/// vcmpequb (no dot form): same but Rc=0
|
||
fn encode_vcmpequb(vd: u32, va: u32, vb: u32) -> u32 {
|
||
(4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | 6
|
||
}
|
||
|
||
#[test]
|
||
fn vcmpequb_dot_all_true_sets_cr6_lt() {
|
||
// All bytes equal → all lanes 0xFF → CR6.LT=1 (all-true), CR6.EQ=0
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]);
|
||
ctx.vr[1] = v;
|
||
ctx.vr[2] = v;
|
||
write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2));
|
||
ctx.pc = 0x100;
|
||
step(&mut ctx, &mem);
|
||
assert!(ctx.cr[6].lt, "all-true: CR6.LT must be 1");
|
||
assert!(!ctx.cr[6].eq, "all-true: CR6.EQ must be 0");
|
||
}
|
||
|
||
#[test]
|
||
fn vcmpequb_no_dot_does_not_update_cr6() {
|
||
// Without dot form, CR6 must be unchanged
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
ctx.cr[6] = crate::context::CrField { lt: true, gt: false, eq: true, so: false };
|
||
let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]);
|
||
ctx.vr[1] = v;
|
||
ctx.vr[2] = v;
|
||
write_instr(&mem, 0x100, encode_vcmpequb(0, 1, 2));
|
||
ctx.pc = 0x100;
|
||
step(&mut ctx, &mem);
|
||
// CR6 unchanged: no dot form
|
||
assert!(ctx.cr[6].lt && ctx.cr[6].eq, "CR6 must be unchanged without dot");
|
||
}
|
||
|
||
#[test]
|
||
fn vcmpequb_dot_all_false_sets_cr6_eq() {
|
||
// No bytes equal → all lanes 0x00 → CR6.LT=0, CR6.EQ=1 (all-false)
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
ctx.vr[1] = xenia_types::Vec128::from_bytes([0xAAu8; 16]);
|
||
ctx.vr[2] = xenia_types::Vec128::from_bytes([0xBBu8; 16]);
|
||
write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2));
|
||
ctx.pc = 0x100;
|
||
step(&mut ctx, &mem);
|
||
assert!(!ctx.cr[6].lt, "all-false: CR6.LT must be 0");
|
||
assert!(ctx.cr[6].eq, "all-false: CR6.EQ must be 1");
|
||
}
|
||
|
||
// ---- PPCBUG-363 + PPCBUG-369: vpkd3d128 post-pack permutation ----
|
||
//
|
||
// vpkd3d128 VD, VB, type, pack, shift: the low 2 bits of the IMM field
|
||
// select how the packed scalar/vector is merged back into the previous VD.
|
||
// pack=0 → identity (store out directly); pack=1 → 32-bit merge by shift;
|
||
// pack=2,3 → 64-bit merge by shift.
|
||
// Canary source: ppc_emit_altivec.cc:2126-2188.
|
||
//
|
||
// For vpkd3d128, PPC bits 21-22 are always 1 (key2 discriminant), so
|
||
// vd128 is always in range [96, 127] for vd_lo in [0, 31].
|
||
|
||
fn encode_vpkd3d128(vd: u32, vb_lo: u32, imm: u32, z: u32) -> u32 {
|
||
// op6=6, FormatVX128_4 layout (canary):
|
||
// VD low at PPC 6-10 (host 21-25); VD high (2 bits) at PPC 28-29 (host 2-3).
|
||
// IMM at PPC 11-15; VB low at PPC 16-20.
|
||
// z (2-bit) at PPC 24-25 (host 6-7).
|
||
// key2 = 0b1100001 over bits 21-23 + 26-27:
|
||
// bits 21-23 = 0b110 → bit 21=1, bit 22=1, bit 23=0
|
||
// bits 26-27 = 0b01 → bit 26=0, bit 27=1
|
||
let vd_lo = vd & 0x1F;
|
||
let vd_hi = (vd >> 5) & 0x3;
|
||
(6u32 << 26)
|
||
| (vd_lo << 21)
|
||
| (vd_hi << 2)
|
||
| (imm << 16)
|
||
| (vb_lo << 11)
|
||
| (1 << 10) // bit 21 (key2)
|
||
| (1 << 9) // bit 22 (key2)
|
||
| (z << 6) // z at PPC 24-25
|
||
| (1 << 4) // bit 27 (key2)
|
||
}
|
||
|
||
#[test]
|
||
fn vpkd3d128_pack0_legacy_unchanged() {
|
||
// pack=0 → identity: result = out (packed value), no blend with prev vd.
|
||
// type=0 (D3dColor), pack=0 → IMM=0; z=0 (don't care for pack=0).
|
||
// vd=96 (vd_lo=0 | bits21=1,22=1→+96).
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
// vb=1: R=1.0, G=0, B=0, A=0 → D3dColor packs to word (0<<24)|(255<<16)|(0<<8)|0 = 0x00FF0000
|
||
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0);
|
||
// prev vd=96: sentinel values that should NOT appear in result
|
||
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD);
|
||
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 0, 0));
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mem);
|
||
let r = ctx.vr[96].as_u32x4();
|
||
// out = [0, 0, 0, 0x00FF_0000]; pack=0 → result = out
|
||
assert_eq!(r[0], 0x0000_0000, "pack=0: lane 0 must be out[0]=0");
|
||
assert_eq!(r[1], 0x0000_0000, "pack=0: lane 1 must be out[1]=0");
|
||
assert_eq!(r[2], 0x0000_0000, "pack=0: lane 2 must be out[2]=0");
|
||
assert_eq!(r[3], 0x00FF_0000, "pack=0: lane 3 must be packed D3dColor");
|
||
}
|
||
|
||
#[test]
|
||
fn vpkd3d128_pack1_shift0_d3d_vertex_pack() {
|
||
// pack=1, shift=0 (VPACK_32): out[3] placed at lane 3; prev[0..2] preserved.
|
||
// MakePermuteMask(0,0, 0,1, 0,2, 1,3) → [prev[0], prev[1], prev[2], out[3]]
|
||
// IMM = (type=0 D3dColor << 2) | pack=1 = 1; z=0.
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
|
||
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
|
||
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 0));
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mem);
|
||
let r = ctx.vr[96].as_u32x4();
|
||
assert_eq!(r[0], 0x1111_1111, "pack=1 shift=0: lane 0 from prev");
|
||
assert_eq!(r[1], 0x2222_2222, "pack=1 shift=0: lane 1 from prev");
|
||
assert_eq!(r[2], 0x3333_3333, "pack=1 shift=0: lane 2 from prev");
|
||
assert_eq!(r[3], 0x00FF_0000, "pack=1 shift=0: lane 3 from out[3]");
|
||
}
|
||
|
||
#[test]
|
||
fn vpkd3d128_pack1_shift3_puts_out3_at_lane0() {
|
||
// pack=1, shift=3 (VPACK_32): out[3] placed at lane 0; prev[1..3] preserved.
|
||
// MakePermuteMask(1,3, 0,1, 0,2, 0,3) → [out[3], prev[1], prev[2], prev[3]]
|
||
// IMM = 1; z=3.
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
|
||
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
|
||
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 3));
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mem);
|
||
let r = ctx.vr[96].as_u32x4();
|
||
assert_eq!(r[0], 0x00FF_0000, "pack=1 shift=3: lane 0 from out[3]");
|
||
assert_eq!(r[1], 0x2222_2222, "pack=1 shift=3: lane 1 from prev");
|
||
assert_eq!(r[2], 0x3333_3333, "pack=1 shift=3: lane 2 from prev");
|
||
assert_eq!(r[3], 0x4444_4444, "pack=1 shift=3: lane 3 from prev");
|
||
}
|
||
|
||
// ---- PPCBUG-510: stvewx128 should write one word (4 bytes), not 16 ----
|
||
|
||
fn encode_stvewx128(vs_lo: u32, ra: u32, rb: u32) -> u32 {
|
||
// stvewx128 is a VMX128 load/store at op6=4.
|
||
// decode_op4 key1 = (bits21-27 << 4) | bits30-31 = 0b00110000011 for stvewx128.
|
||
// bits21-27 = 0b0011000 (host bits 10-4), bits30-31 = 0b11 (host bits 1-0).
|
||
// VS128[4:0] at host bits 25-21; RA at host bits 20-16; RB at host bits 15-11.
|
||
// VS128[5] at host bit 3 (PPC bit 28); VS128[6] at host bit 1 (PPC bit 30).
|
||
(4u32 << 26)
|
||
| (vs_lo << 21) // VS128[4:0]
|
||
| (ra << 16) // RA
|
||
| (rb << 11) // RB
|
||
| (0b0011000 << 4) // bits 21-27 of key1 pattern
|
||
| 0b11 // bits 30-31 of key1 pattern
|
||
}
|
||
|
||
#[test]
|
||
fn stvewx128_writes_one_word_at_word_aligned_ea() {
|
||
// PPCBUG-510: old code wrote all 16 bytes at ea & !0xF, corrupting 12 adjacent bytes.
|
||
// Fix: word-align EA, extract lane from (ea & 0xF) >> 2, write 4 bytes only.
|
||
let mut ctx = PpcContext::new();
|
||
let mem = TestMem::new();
|
||
// VS128 = v96 (vs_lo=0 | key bits → vs128=0 since key bits 21-27 set bit4=1 and bit5=1
|
||
// in the key, but vs128 uses bits 6-10 for low 5 bits).
|
||
// Actually: vs128 uses decode bits 6-10 (host 25-21) and bits 21,22 (host 10,9).
|
||
// encode_stvewx128 sets vs_lo in bits 25-21 and key bits at bits 10-4.
|
||
// vs128 = bits6-10 | (bit21<<5) | (bit22<<6) = vs_lo | 0 | 0 = vs_lo.
|
||
// So vs128 = vs_lo. We'll use vs_lo=3 → vs128=3.
|
||
let raw = encode_stvewx128(3, 1, 2);
|
||
ctx.vr[3] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
|
||
ctx.gpr[1] = 0x1000; // base
|
||
ctx.gpr[2] = 0x008; // offset → EA = 0x1008 → word-aligned EA = 0x1008, slot = (0x8 & 0xF)>>2 = 2
|
||
write_instr(&mem, 0, raw);
|
||
ctx.pc = 0;
|
||
step(&mut ctx, &mem);
|
||
assert_eq!(ctx.pc, 4, "PC must advance");
|
||
// Slot 2 → lane 2 = 0x3333_3333
|
||
assert_eq!(mem.read_u32(0x1008), 0x3333_3333, "only lane 2 word at ea");
|
||
// Adjacent words must be untouched (mem is zero-init)
|
||
assert_eq!(mem.read_u32(0x1000), 0x0000_0000, "byte below must be untouched");
|
||
assert_eq!(mem.read_u32(0x100C), 0x0000_0000, "byte above must be untouched");
|
||
}
|
||
}
|