Files
xenia-rs/crates/xenia-cpu/src/interpreter.rs
MechaCat02 9ab986ec09 fix(cpu): SWAPBUG-001 — revert addi 32-bit truncation
The addi opcode was truncating its result to 32 bits per the post-P4-batch3
"32-bit ABI" rationale (commit bf8208e). Hunk-level bisection during the
2026-05 audit (M11) isolated this single cast as the cause of the
post-P8 swap regression: swaps dropped 2 → 1 and the renderer lost a
frame. PowerISA mandates sign-extension to 64 bits; canary does not
truncate addi. The truncation was a canary-divergent over-extension
of the addis fix (which IS canary-divergent by design, see
addis at interpreter.rs:121-134).

The addi_li_neg_one_zero_extends_upper test encoded the wrong invariant.
Replaced with a sign-extension test asserting canonical PowerISA
behavior (gpr[3] == 0xFFFF_FFFF_FFFF_FFFF for `li r3, -1`).

Verification at -n 100M lockstep:
  swaps:                1 → 2     (gate met)
  draws:                0 → 0     (unchanged — gated by Phase C+D+E)
  instructions:         ~100M (unchanged)
  imports:              11.4M → 987k    (game escapes retry loop)
  packets:              281M → 57M      (same)
  interrupts_delivered: 629 → 630
Tests: 551 passing (unchanged). Lockstep determinism: byte-identical
across two 100M runs except packets (±5%, GPU-thread-race noise floor).

Closes SWAPBUG-001 / PPCBUG-001.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 13:37:51 +02:00

8165 lines
349 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! PPC interpreter - executes instructions one at a time.
//! This is the core execution engine. Every instruction is observable
//! by the debugger (pre_step/post_step hooks on every cycle).
use crate::context::PpcContext;
use crate::decoder::{decode, DecodedInstr};
use crate::fpscr;
use crate::opcode::PpcOpcode;
use crate::overflow;
use crate::trap;
use crate::vmx;
use xenia_memory::MemoryAccess;
/// Xenon reservation granule: one L2 cache line (128 bytes).
/// `reserved_line = ea & !RESERVATION_MASK` in [context::PpcContext].
pub const RESERVATION_MASK: u32 = 0x7F;
/// Result of executing a single instruction.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StepResult {
/// Normal execution, advance to next instruction.
Continue,
/// Hit a system call (sc instruction). Kernel should handle.
SystemCall,
/// Hit an unimplemented opcode.
Unimplemented(PpcOpcode),
/// Hit a trap instruction.
Trap,
/// Execution halted (by debugger or error).
Halted,
}
/// Execute a single PPC instruction.
pub fn step(ctx: &mut PpcContext, mem: &dyn MemoryAccess) -> StepResult {
let raw = mem.read_u32(ctx.pc);
let instr = decode(raw, ctx.pc);
let result = execute(ctx, mem, &instr);
ctx.cycle_count += 1;
ctx.timebase += 1;
result
}
/// Tier-2 perf — same semantics as [`step`], but looks the decoded
/// instruction up in a PC-keyed cache first. Misses fill the cache from
/// a fresh [`decode`] call; writes to the containing guest page bump
/// `page_version` and naturally invalidate the entry.
///
/// The cache is shared across all HW threads — PC is thread-independent
/// and `DecodeCacheEntry` stays put after fill. `current_page_version`
/// is wired through the caller since memory is touched just above anyway
/// (the `read_u32` + the version read amortize to one touch of the page
/// table). Use `GuestMemory::page_version(pc)` to source it.
pub fn step_cached(
ctx: &mut PpcContext,
mem: &dyn MemoryAccess,
cache: &mut crate::decoder::DecodeCache,
current_page_version: u64,
) -> StepResult {
let raw = mem.read_u32(ctx.pc);
let instr = cache.lookup(ctx.pc, raw, current_page_version);
let result = execute(ctx, mem, &instr);
ctx.cycle_count += 1;
ctx.timebase += 1;
result
}
/// Tier-4 perf — execute every instruction in a pre-decoded
/// [`crate::block_cache::DecodedBlock`], bumping `cycle_count` and
/// `timebase` once per executed instruction. Bails out as soon as a
/// non-`Continue` step result fires (system call, trap, halt, or
/// unimplemented opcode), or when an instruction unexpectedly changes
/// the PC mid-block (defensive — only the terminator at the tail of
/// the block is allowed to do that).
///
/// Caller (in `xenia-app/src/main.rs`) is responsible for choosing this
/// path only when **no per-instruction observation is requested** —
/// i.e., `Debugger::wants_hooks() == false` and no `--trace-*` flag is
/// active. Once those gates flip, the caller falls back to
/// [`step_cached`] so every PC remains observable.
pub fn step_block(
ctx: &mut PpcContext,
mem: &dyn MemoryAccess,
block: &crate::block_cache::DecodedBlock,
) -> StepResult {
let mut result = StepResult::Continue;
for instr in &block.instrs {
let expected_next = instr.addr.wrapping_add(4);
result = execute(ctx, mem, instr);
ctx.cycle_count += 1;
ctx.timebase += 1;
if !matches!(result, StepResult::Continue) {
return result;
}
// PC discontinuity within a block. By construction only the
// terminator (last instruction) can branch — and when it does,
// we want to stop here, not continue executing past it.
if ctx.pc != expected_next {
break;
}
}
result
}
/// Execute a decoded instruction, updating context and memory.
fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -> StepResult {
match instr.opcode {
// ===== ALU: Immediate =====
PpcOpcode::addi => {
let ra_val = if instr.ra() == 0 { 0 } else { ctx.gpr[instr.ra()] };
ctx.gpr[instr.rd()] = ra_val.wrapping_add(instr.simm16() as i64 as u64);
ctx.pc += 4;
}
PpcOpcode::addis => {
// Xbox 360 user mode is 32-bit ABI (MSR.SF=0), so addis must
// produce a value whose upper 32 bits don't pollute downstream
// 64-bit arithmetic. The PPC ISA in 64-bit mode sign-extends
// simm16 before the shift, producing 0xFFFFFFFF_xxxx0000 for
// negative simm16 (high bit set). When this value flows into
// a 64-bit subfc against a zero-extended lwz value, the unsigned
// 64-bit comparison yields wrong CA. Truncate to 32 bits to
// simulate 32-bit ABI behavior.
let ra_val = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let result = ra_val.wrapping_add((instr.simm16() as i64 as u64) << 16);
ctx.gpr[instr.rd()] = result as u32 as u64;
ctx.pc += 4;
}
PpcOpcode::addic => {
// PPCBUG-002: 32-bit ABI. CA must be from a 32-bit unsigned compare;
// canary's `AddDidCarry` truncates both operands to int32 first.
let ra32 = ctx.gpr[instr.ra()] as u32;
let imm32 = instr.simm16() as i32 as u32;
let result32 = ra32.wrapping_add(imm32);
ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
ctx.pc += 4;
}
PpcOpcode::addicx => {
// PPCBUG-003: same fix as addic plus CR0 i32 view.
let ra32 = ctx.gpr[instr.ra()] as u32;
let imm32 = instr.simm16() as i32 as u32;
let result32 = ra32.wrapping_add(imm32);
ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
ctx.update_cr_signed(0, result32 as i32 as i64);
ctx.pc += 4;
}
PpcOpcode::subficx => {
// PPCBUG-005: 32-bit ABI. Sign-extended imm has bits 32-63 set for
// negative SIMM, poisoning the writeback. Canary uses 32-bit form.
let ra32 = ctx.gpr[instr.ra()] as u32;
let imm32 = instr.simm16() as i32 as u32;
let result32 = imm32.wrapping_sub(ra32);
ctx.xer_ca = if imm32 >= ra32 { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
ctx.pc += 4;
}
PpcOpcode::mulli => {
// PPCBUG-004: 32-bit ABI. Read RA as i32 (low 32, sign-extended for
// multiply), product fits in 32 bits per ISA (overflow wraps).
let ra = ctx.gpr[instr.ra()] as i32 as i64;
let imm = instr.simm16() as i64;
ctx.gpr[instr.rd()] = (ra.wrapping_mul(imm) as u32) as u64;
ctx.pc += 4;
}
// ===== ALU: Register =====
PpcOpcode::addx => {
// PPCBUG-012+020: 32-bit ABI writeback truncation + CR0 i32 view.
let ra32 = ctx.gpr[instr.ra()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
let result32 = ra32.wrapping_add(rb32);
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128);
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::addcx => {
// PPCBUG-013+020: 32-bit truncation; CA from u32 unsigned compare.
let ra32 = ctx.gpr[instr.ra()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
let result32 = ra32.wrapping_add(rb32);
ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128);
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::addex => {
// PPCBUG-014+020: 32-bit truncation; CA from u32 unsigned compare.
let ra32 = ctx.gpr[instr.ra()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
let ca = ctx.xer_ca as u32;
let result32 = ra32.wrapping_add(rb32).wrapping_add(ca);
ctx.xer_ca = if result32 < ra32 || (ca != 0 && result32 == ra32) { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
let true_sum = (ra32 as i32 as i128) + (rb32 as i32 as i128) + (ca as i128);
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::addzex => {
// PPCBUG-015+020: 32-bit truncation.
let ra32 = ctx.gpr[instr.ra()] as u32;
let ca = ctx.xer_ca as u32;
let result32 = ra32.wrapping_add(ca);
ctx.xer_ca = if result32 < ra32 { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
let true_sum = (ra32 as i32 as i128) + (ca as i128);
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::addmex => {
// PPCBUG-016+020: 32-bit truncation. RT = RA + CA - 1.
let ra32 = ctx.gpr[instr.ra()] as u32;
let ca = ctx.xer_ca as u32;
let result32 = ra32.wrapping_add(ca).wrapping_sub(1);
ctx.xer_ca = if ra32 != 0 || ca != 0 { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
let true_sum = (ra32 as i32 as i128) + (ca as i128) - 1;
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::subfx => {
// PPCBUG-017+020: 32-bit truncation.
let ra32 = ctx.gpr[instr.ra()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
let result32 = rb32.wrapping_sub(ra32);
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
overflow::apply(ctx, true_diff != (result32 as i32) as i128);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::subfcx => {
// PPCBUG-007: 32-bit ABI. The `rb >= ra` u64 unsigned compare is
// exactly the shape that broke addis. Defensive 32-bit truncation
// is required for correct CA even after upstream cleanup.
let ra32 = ctx.gpr[instr.ra()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
let result32 = rb32.wrapping_sub(ra32);
ctx.xer_ca = if rb32 >= ra32 { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
let true_diff = (rb32 as i32 as i128) - (ra32 as i32 as i128);
overflow::apply(ctx, true_diff != (result32 as i32) as i128);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::subfex => {
// PPCBUG-008: 32-bit ABI. Compute in u32 space — `!ra` on u64 always
// pollutes the upper 32 bits, making this an active poisoner.
let ra32 = ctx.gpr[instr.ra()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
let ca = ctx.xer_ca as u32;
let result32 = (!ra32).wrapping_add(rb32).wrapping_add(ca);
ctx.xer_ca = if rb32 > ra32 || (rb32 == ra32 && ca != 0) { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
// RT <- !RA + RB + CA == RB - RA - 1 + CA (32-bit semantics).
let true_sum = (rb32 as i32 as i128) - (ra32 as i32 as i128) - 1 + (ca as i128);
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::subfzex => {
// PPCBUG-018: same active-poisoning shape as subfex; operate in u32.
let ra32 = ctx.gpr[instr.ra()] as u32;
let ca = ctx.xer_ca as u32;
let result32 = (!ra32).wrapping_add(ca);
// RT <- !RA + CA (no -1 term). 32-bit carry-out only when
// !ra32 = u32::MAX (i.e. ra32 = 0) AND ca = 1.
ctx.xer_ca = if ra32 == 0 && ca != 0 { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
let true_sum = -(ra32 as i32 as i128) - 1 + (ca as i128);
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::subfmex => {
// PPCBUG-019: also fixes the always-true CA edge — `!ra` on u64
// is non-zero when ra32==0xFFFFFFFF and ca==0, so CA was stuck at 1.
let ra32 = ctx.gpr[instr.ra()] as u32;
let ca = ctx.xer_ca as u32;
let result32 = (!ra32).wrapping_add(ca).wrapping_sub(1);
ctx.xer_ca = if (!ra32) != 0 || ca != 0 { 1 } else { 0 };
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
let true_sum = -(ra32 as i32 as i128) - 2 + (ca as i128);
overflow::apply(ctx, true_sum != (result32 as i32) as i128);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::negx => {
// PPCBUG-006: 32-bit ABI. `(!ra).wrapping_add(1)` on u64 always
// sets upper 32 bits — every neg poisoned the GPR. neg_ov also
// checks at 64-bit INT_MIN; should be 32-bit INT_MIN.
let ra32 = ctx.gpr[instr.ra()] as u32;
let result32 = (!ra32).wrapping_add(1);
ctx.gpr[instr.rd()] = result32 as u64;
if instr.oe() {
overflow::apply(ctx, ra32 == 0x8000_0000);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, result32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::mullwx => {
// PPCBUG-009: 32-bit ABI. Truncate product to u32 — overflow detection
// (mullw_ov) still uses the full i64 product to catch the overflow.
let ra = ctx.gpr[instr.ra()] as i32 as i64;
let rb = ctx.gpr[instr.rb()] as i32 as i64;
let product = ra.wrapping_mul(rb);
ctx.gpr[instr.rd()] = product as u32 as u64;
if instr.oe() {
overflow::apply(ctx, overflow::mullw_ov(product));
}
if instr.rc_bit() {
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::mulhwx => {
// PPCBUG-020: 32-bit ABI CR0 view.
let ra = ctx.gpr[instr.ra()] as i32 as i64;
let rb = ctx.gpr[instr.rb()] as i32 as i64;
let result = ra.wrapping_mul(rb);
ctx.gpr[instr.rd()] = ((result >> 32) as u32) as u64;
if instr.rc_bit() {
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::mulhwux => {
// PPCBUG-020: 32-bit ABI CR0 view.
let ra = ctx.gpr[instr.ra()] as u32 as u64;
let rb = ctx.gpr[instr.rb()] as u32 as u64;
let result = ra.wrapping_mul(rb);
ctx.gpr[instr.rd()] = (result >> 32) & 0xFFFF_FFFF;
if instr.rc_bit() {
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::divwx => {
// PPCBUG-010+011 coupled: 32-bit ABI. Quotient zero-extended to u64
// (canary explicitly uses ZeroExtend(v, INT64_TYPE)). CR0 view via i32.
let ra = ctx.gpr[instr.ra()] as i32;
let rb = ctx.gpr[instr.rb()] as i32;
let ov = overflow::divw_ov_signed(ra, rb);
if ov {
ctx.gpr[instr.rd()] = 0;
} else {
ctx.gpr[instr.rd()] = (ra / rb) as u32 as u64;
}
if instr.oe() {
overflow::apply(ctx, ov);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
}
ctx.pc += 4;
}
PpcOpcode::divwux => {
// PPCBUG-020: 32-bit ABI CR0 view.
let ra = ctx.gpr[instr.ra()] as u32;
let rb = ctx.gpr[instr.rb()] as u32;
let ov = overflow::divw_ov_unsigned(rb);
if ov {
ctx.gpr[instr.rd()] = 0;
} else {
ctx.gpr[instr.rd()] = (ra / rb) as u64;
}
if instr.oe() {
overflow::apply(ctx, ov);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as u32 as i32 as i64);
}
ctx.pc += 4;
}
// ===== 64-bit Arithmetic =====
PpcOpcode::mulldx => {
let ra = ctx.gpr[instr.ra()] as i64;
let rb = ctx.gpr[instr.rb()] as i64;
ctx.gpr[instr.rd()] = ra.wrapping_mul(rb) as u64;
if instr.oe() {
overflow::apply(ctx, overflow::mulld_ov(ra, rb));
}
if instr.rc_bit() {
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
}
ctx.pc += 4;
}
PpcOpcode::mulhdx => {
let ra = ctx.gpr[instr.ra()] as i64 as i128;
let rb = ctx.gpr[instr.rb()] as i64 as i128;
ctx.gpr[instr.rd()] = (ra.wrapping_mul(rb) >> 64) as u64;
if instr.rc_bit() {
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
}
ctx.pc += 4;
}
PpcOpcode::mulhdux => {
let ra = ctx.gpr[instr.ra()] as u128;
let rb = ctx.gpr[instr.rb()] as u128;
ctx.gpr[instr.rd()] = (ra.wrapping_mul(rb) >> 64) as u64;
if instr.rc_bit() {
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
}
ctx.pc += 4;
}
PpcOpcode::divdx => {
let ra = ctx.gpr[instr.ra()] as i64;
let rb = ctx.gpr[instr.rb()] as i64;
let ov = overflow::divd_ov_signed(ra, rb);
if ov {
ctx.gpr[instr.rd()] = 0;
} else {
ctx.gpr[instr.rd()] = (ra / rb) as u64;
}
if instr.oe() {
overflow::apply(ctx, ov);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
}
ctx.pc += 4;
}
PpcOpcode::divdux => {
let ra = ctx.gpr[instr.ra()];
let rb = ctx.gpr[instr.rb()];
let ov = overflow::divd_ov_unsigned(rb);
if ov {
ctx.gpr[instr.rd()] = 0;
} else {
ctx.gpr[instr.rd()] = ra / rb;
}
if instr.oe() {
overflow::apply(ctx, ov);
}
if instr.rc_bit() {
ctx.update_cr_signed(0, ctx.gpr[instr.rd()] as i64);
}
ctx.pc += 4;
}
// ===== Logical =====
PpcOpcode::andix => {
// PPCBUG-020: 32-bit ABI CR0 view.
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & (instr.uimm16() as u64);
ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64);
ctx.pc += 4;
}
PpcOpcode::andisx => {
// PPCBUG-023: 32-bit ABI CR0 view. `andis. rA, rS, 0x8000` to test
// sign bit of a 32-bit word now correctly classifies bit 31 = 1 as LT.
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & ((instr.uimm16() as u64) << 16);
ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64);
ctx.pc += 4;
}
PpcOpcode::ori => {
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | (instr.uimm16() as u64);
ctx.pc += 4;
}
PpcOpcode::oris => {
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | ((instr.uimm16() as u64) << 16);
ctx.pc += 4;
}
PpcOpcode::xori => {
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] ^ (instr.uimm16() as u64);
ctx.pc += 4;
}
PpcOpcode::xoris => {
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] ^ ((instr.uimm16() as u64) << 16);
ctx.pc += 4;
}
PpcOpcode::andx => {
// PPCBUG-032+020: 32-bit ABI CR0 view (latent under clean inputs).
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] & ctx.gpr[instr.rb()];
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::andcx => {
// PPCBUG-033: !rb on u64 flips upper 32 bits — active poisoning.
let rs32 = ctx.gpr[instr.rs()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
ctx.gpr[instr.ra()] = (rs32 & !rb32) as u64;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::orx => {
// PPCBUG-032+020: 32-bit ABI CR0 view.
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] | ctx.gpr[instr.rb()];
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::orcx => {
// PPCBUG-028: same shape as andcx — operate in u32.
let rs32 = ctx.gpr[instr.rs()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
ctx.gpr[instr.ra()] = (rs32 | !rb32) as u64;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::xorx => {
// PPCBUG-032+020: 32-bit ABI CR0 view.
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] ^ ctx.gpr[instr.rb()];
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::norx => {
// PPCBUG-029: `not` simplified mnemonic — every `not` poisoned the GPR.
let rs32 = ctx.gpr[instr.rs()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
ctx.gpr[instr.ra()] = (!(rs32 | rb32)) as u64;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::nandx => {
// PPCBUG-030: same shape — operate in u32.
let rs32 = ctx.gpr[instr.rs()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
ctx.gpr[instr.ra()] = (!(rs32 & rb32)) as u64;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::eqvx => {
// PPCBUG-031: `eqv rA, rA, rA` is a common "set to all-ones" idiom;
// 64-bit form gave 0xFFFFFFFFFFFFFFFF but 32-bit ABI expects 0x00000000FFFFFFFF.
let rs32 = ctx.gpr[instr.rs()] as u32;
let rb32 = ctx.gpr[instr.rb()] as u32;
ctx.gpr[instr.ra()] = (!(rs32 ^ rb32)) as u64;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
// ===== Extend/Count =====
PpcOpcode::extsbx => {
// PPCBUG-034: 32-bit ABI — sign-extend byte to i32, write zero-extended.
// PPCBUG-036 (coupled): CR0 must view result as i32, not i64.
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i8 as i32 as u32 as u64;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::extshx => {
// PPCBUG-035: same shape as extsbx for halfwords.
// PPCBUG-037 (coupled): CR0 i32 view.
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i16 as i32 as u32 as u64;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::extswx => {
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()] as i32 as i64 as u64;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
PpcOpcode::cntlzwx => {
// Result is 0..=32, fits in u32 with bit 31 always zero, so the
// CR0 view is benign — use the catch-all 32-bit form for consistency.
ctx.gpr[instr.ra()] = (ctx.gpr[instr.rs()] as u32).leading_zeros() as u64;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::cntlzdx => {
ctx.gpr[instr.ra()] = ctx.gpr[instr.rs()].leading_zeros() as u64;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
// ===== Shift =====
PpcOpcode::slwx => {
// PPCBUG-044: 32-bit ABI CR0 view. A result with bit 31 set
// (e.g. 0x80000000) is negative in i32 view but positive in i64.
let sh = ctx.gpr[instr.rb()] as u32;
ctx.gpr[instr.ra()] = if sh < 32 {
((ctx.gpr[instr.rs()] as u32) << sh) as u64
} else { 0 };
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::srwx => {
// PPCBUG-044: 32-bit ABI CR0 view (zero-extended right shift can never
// have bit 31 set, but use the canonical form for consistency).
let sh = ctx.gpr[instr.rb()] as u32;
ctx.gpr[instr.ra()] = if sh < 32 {
((ctx.gpr[instr.rs()] as u32) >> sh) as u64
} else { 0 };
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::srawx => {
// PPCBUG-041+043 coupled: 32-bit ABI writeback truncation + CR0 i32.
// CA logic is independently correct (uses u32 shifted-out test).
let rs = ctx.gpr[instr.rs()] as i32;
let sh = ctx.gpr[instr.rb()] as u32 & 0x3F;
if sh == 0 {
ctx.gpr[instr.ra()] = rs as u32 as u64;
ctx.xer_ca = 0;
} else if sh < 32 {
let result = rs >> sh;
ctx.xer_ca = if rs < 0 && (rs as u32) << (32 - sh) != 0 { 1 } else { 0 };
ctx.gpr[instr.ra()] = result as u32 as u64;
} else {
ctx.gpr[instr.ra()] = if rs < 0 { 0xFFFF_FFFFu64 } else { 0 };
ctx.xer_ca = if rs < 0 { 1 } else { 0 };
}
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::srawix => {
// PPCBUG-042+043 coupled: same shape as srawx for the sh-immediate form.
let rs = ctx.gpr[instr.rs()] as i32;
let sh = instr.sh();
if sh == 0 {
ctx.gpr[instr.ra()] = rs as u32 as u64;
ctx.xer_ca = 0;
} else {
let result = rs >> sh;
ctx.xer_ca = if rs < 0 && (rs as u32) << (32 - sh) != 0 { 1 } else { 0 };
ctx.gpr[instr.ra()] = result as u32 as u64;
}
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::sldx => {
let sh = ctx.gpr[instr.rb()] & 0x7F;
ctx.gpr[instr.ra()] = if sh < 64 {
ctx.gpr[instr.rs()] << sh
} else { 0 };
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
PpcOpcode::srdx => {
let sh = ctx.gpr[instr.rb()] & 0x7F;
ctx.gpr[instr.ra()] = if sh < 64 {
ctx.gpr[instr.rs()] >> sh
} else { 0 };
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
PpcOpcode::sradx => {
let rs = ctx.gpr[instr.rs()] as i64;
let sh = ctx.gpr[instr.rb()] & 0x7F;
if sh == 0 {
ctx.gpr[instr.ra()] = rs as u64;
ctx.xer_ca = 0;
} else if sh < 64 {
let result = rs >> sh;
ctx.xer_ca = if rs < 0 && (rs as u64) << (64 - sh) != 0 { 1 } else { 0 };
ctx.gpr[instr.ra()] = result as u64;
} else {
ctx.gpr[instr.ra()] = if rs < 0 { u64::MAX } else { 0 };
ctx.xer_ca = if rs < 0 { 1 } else { 0 };
}
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
PpcOpcode::sradix => {
let rs = ctx.gpr[instr.rs()] as i64;
let sh = instr.sh64();
if sh == 0 {
ctx.gpr[instr.ra()] = rs as u64;
ctx.xer_ca = 0;
} else {
let result = rs >> sh;
ctx.xer_ca = if rs < 0 && (rs as u64) << (64 - sh) != 0 { 1 } else { 0 };
ctx.gpr[instr.ra()] = result as u64;
}
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
// ===== Rotate =====
PpcOpcode::rlwinmx => {
let rs = ctx.gpr[instr.rs()] as u32;
let sh = instr.sh();
let mb = instr.mb();
let me = instr.me();
let rotated = rs.rotate_left(sh);
let mask = rlw_mask(mb, me);
ctx.gpr[instr.ra()] = (rotated & mask) as u64;
// PPCBUG-024: 32-bit ABI CR0 view.
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::rlwimix => {
let rs = ctx.gpr[instr.rs()] as u32;
let sh = instr.sh();
let mb = instr.mb();
let me = instr.me();
let rotated = rs.rotate_left(sh);
let mask = rlw_mask(mb, me);
let ra = ctx.gpr[instr.ra()] as u32;
ctx.gpr[instr.ra()] = ((rotated & mask) | (ra & !mask)) as u64;
// PPCBUG-025: 32-bit ABI CR0 view.
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::rlwnmx => {
let rs = ctx.gpr[instr.rs()] as u32;
let sh = ctx.gpr[instr.rb()] as u32 & 0x1F;
let mb = instr.mb();
let me = instr.me();
let rotated = rs.rotate_left(sh);
let mask = rlw_mask(mb, me);
ctx.gpr[instr.ra()] = (rotated & mask) as u64;
// PPCBUG-026: 32-bit ABI CR0 view.
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as u32 as i32 as i64); }
ctx.pc += 4;
}
PpcOpcode::rldiclx => {
let rs = ctx.gpr[instr.rs()];
let sh = instr.sh64();
let mb = instr.mb_md();
let rotated = rs.rotate_left(sh);
let mask = rld_mask_left(mb);
ctx.gpr[instr.ra()] = rotated & mask;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
PpcOpcode::rldicrx => {
let rs = ctx.gpr[instr.rs()];
let sh = instr.sh64();
let me = instr.mb_md();
let rotated = rs.rotate_left(sh);
let mask = rld_mask_right(me);
ctx.gpr[instr.ra()] = rotated & mask;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
PpcOpcode::rldicx => {
let rs = ctx.gpr[instr.rs()];
let sh = instr.sh64();
let mb = instr.mb_md();
let rotated = rs.rotate_left(sh);
let mask = rld_mask_left(mb) & rld_mask_right(63 - sh);
ctx.gpr[instr.ra()] = rotated & mask;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
PpcOpcode::rldimix => {
let rs = ctx.gpr[instr.rs()];
let sh = instr.sh64();
let mb = instr.mb_md();
let rotated = rs.rotate_left(sh);
let mask = rld_mask_left(mb) & rld_mask_right(63 - sh);
ctx.gpr[instr.ra()] = (rotated & mask) | (ctx.gpr[instr.ra()] & !mask);
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
PpcOpcode::rldclx => {
let rs = ctx.gpr[instr.rs()];
let sh = ctx.gpr[instr.rb()] & 0x3F;
let mb = instr.mb_md();
let rotated = rs.rotate_left(sh as u32);
let mask = rld_mask_left(mb);
ctx.gpr[instr.ra()] = rotated & mask;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
PpcOpcode::rldcrx => {
let rs = ctx.gpr[instr.rs()];
let sh = ctx.gpr[instr.rb()] & 0x3F;
let me = instr.mb_md();
let rotated = rs.rotate_left(sh as u32);
let mask = rld_mask_right(me);
ctx.gpr[instr.ra()] = rotated & mask;
if instr.rc_bit() { ctx.update_cr_signed(0, ctx.gpr[instr.ra()] as i64); }
ctx.pc += 4;
}
// ===== Compare =====
PpcOpcode::cmpi => {
let bf = instr.crfd();
if instr.l() {
// 64-bit compare. Compare directly so boundary i64 values
// (e.g. ra=i64::MIN, imm=1) don't mis-sign through a
// wrapped subtract.
let ra = ctx.gpr[instr.ra()] as i64;
let imm = instr.simm16() as i64;
ctx.cr[bf] = crate::context::CrField {
lt: ra < imm,
gt: ra > imm,
eq: ra == imm,
so: ctx.xer_so != 0,
};
} else {
let ra = ctx.gpr[instr.ra()] as i32;
let imm = instr.simm16() as i32;
ctx.cr[bf] = crate::context::CrField {
lt: ra < imm,
gt: ra > imm,
eq: ra == imm,
so: ctx.xer_so != 0,
};
}
ctx.pc += 4;
}
PpcOpcode::cmpli => {
let bf = instr.crfd();
if instr.l() {
let ra = ctx.gpr[instr.ra()];
let imm = instr.uimm16() as u64;
ctx.update_cr_unsigned(bf, ra, imm);
} else {
let ra = ctx.gpr[instr.ra()] as u32 as u64;
let imm = instr.uimm16() as u64;
ctx.update_cr_unsigned(bf, ra, imm);
}
ctx.pc += 4;
}
PpcOpcode::cmp => {
let bf = instr.crfd();
if instr.l() {
let ra = ctx.gpr[instr.ra()] as i64;
let rb = ctx.gpr[instr.rb()] as i64;
ctx.cr[bf] = crate::context::CrField {
lt: ra < rb,
gt: ra > rb,
eq: ra == rb,
so: ctx.xer_so != 0,
};
} else {
let ra = ctx.gpr[instr.ra()] as i32;
let rb = ctx.gpr[instr.rb()] as i32;
ctx.cr[bf] = crate::context::CrField {
lt: ra < rb,
gt: ra > rb,
eq: ra == rb,
so: ctx.xer_so != 0,
};
}
ctx.pc += 4;
}
PpcOpcode::cmpl => {
let bf = instr.crfd();
if instr.l() {
ctx.update_cr_unsigned(bf, ctx.gpr[instr.ra()], ctx.gpr[instr.rb()]);
} else {
ctx.update_cr_unsigned(bf, ctx.gpr[instr.ra()] as u32 as u64, ctx.gpr[instr.rb()] as u32 as u64);
}
ctx.pc += 4;
}
// ===== Branch =====
PpcOpcode::bx => {
let target = if instr.aa() {
instr.li() as u32
} else {
ctx.pc.wrapping_add(instr.li() as u32)
};
if instr.lk() {
ctx.lr = (ctx.pc + 4) as u64;
}
ctx.pc = target;
}
PpcOpcode::bcx => {
let bo = instr.bo();
let bi = instr.bi();
// Decrement CTR if needed
if bo & 0b00100 == 0 {
ctx.ctr = ctx.ctr.wrapping_sub(1);
}
let ctr_ok = (bo & 0b00100) != 0
|| (((ctx.ctr as u32) != 0) ^ ((bo & 0b00010) != 0));
let cond_ok = (bo & 0b10000) != 0
|| (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0));
if ctr_ok && cond_ok {
let target = if instr.aa() {
instr.bd() as u32
} else {
ctx.pc.wrapping_add(instr.bd() as u32)
};
if instr.lk() {
ctx.lr = (ctx.pc + 4) as u64;
}
ctx.pc = target;
} else {
if instr.lk() {
ctx.lr = (ctx.pc + 4) as u64;
}
ctx.pc += 4;
}
}
PpcOpcode::bclrx => {
let bo = instr.bo();
let bi = instr.bi();
if bo & 0b00100 == 0 {
ctx.ctr = ctx.ctr.wrapping_sub(1);
}
let ctr_ok = (bo & 0b00100) != 0
|| (((ctx.ctr as u32) != 0) ^ ((bo & 0b00010) != 0));
let cond_ok = (bo & 0b10000) != 0
|| (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0));
let next_pc = ctx.pc + 4;
if ctr_ok && cond_ok {
ctx.pc = (ctx.lr as u32) & !3;
} else {
ctx.pc = next_pc;
}
if instr.lk() {
ctx.lr = next_pc as u64;
}
}
PpcOpcode::bcctrx => {
let bo = instr.bo();
let bi = instr.bi();
let cond_ok = (bo & 0b10000) != 0
|| (ctx.get_cr_bit(bi) == ((bo & 0b01000) != 0));
if cond_ok {
let next_pc = ctx.pc + 4;
ctx.pc = (ctx.ctr as u32) & !3;
if instr.lk() {
ctx.lr = next_pc as u64;
}
} else {
if instr.lk() {
ctx.lr = (ctx.pc + 4) as u64;
}
ctx.pc += 4;
}
}
// ===== System call =====
PpcOpcode::sc => {
// PPCBUG-064: log non-zero LEV (`sc 2` is the Xbox 360 hypervisor-call
// convention; canary dispatches it to a different handler than `sc 0`).
// Routing LEV=2 requires a StepResult variant extension; deferred.
let lev = (instr.raw >> 5) & 0x7F;
if lev != 0 {
tracing::warn!(
"sc with LEV={} at {:#010x}: dispatched as plain SystemCall (HVcall routing not implemented)",
lev, ctx.pc
);
}
ctx.pc += 4;
return StepResult::SystemCall;
}
// ===== Load instructions =====
PpcOpcode::lwz => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
ctx.pc += 4;
}
PpcOpcode::lwzu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::lwzx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
ctx.pc += 4;
}
PpcOpcode::lwzux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u32(ea) as u64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::lbz => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64;
ctx.pc += 4;
}
PpcOpcode::lbzu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::lbzx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64;
ctx.pc += 4;
}
PpcOpcode::lbzux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u8(ea) as u64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::lhz => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64;
ctx.pc += 4;
}
PpcOpcode::lhzu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::lhzx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64;
ctx.pc += 4;
}
PpcOpcode::lha => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
ctx.pc += 4;
}
PpcOpcode::lhax => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
ctx.pc += 4;
}
PpcOpcode::lhzux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u16(ea) as u64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::lhau => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::lhaux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u16(ea) as i16 as i32 as u32 as u64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::ld => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u64(ea);
ctx.pc += 4;
}
PpcOpcode::ldx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u64(ea);
ctx.pc += 4;
}
PpcOpcode::lwa => {
// ISA "Load Word and Algebraic" — sign-extend to 64 bits per
// PowerISA. lwa is a 64-bit-mode opcode; canary uses
// `SignExtend(..., INT64_TYPE)`. P4-batch-5 mistakenly converted
// this to zero-extend "for 32-bit-ABI hazard mitigation"; the
// post-P8 end-to-end review caught the deviation. Restored to
// ISA-spec behavior (PPCBUG-105 reverted).
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u32(ea) as i32 as i64 as u64;
ctx.pc += 4;
}
PpcOpcode::lwax => {
// See PpcOpcode::lwa above: ISA sign-extend.
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u32(ea) as i32 as i64 as u64;
ctx.pc += 4;
}
PpcOpcode::lwaux => {
// See PpcOpcode::lwa above: ISA sign-extend.
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u32(ea) as i32 as i64 as u64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::ldu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.ds() as i64 as u64) as u32;
ctx.gpr[instr.rd()] = mem.read_u64(ea);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::ldux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.gpr[instr.rd()] = mem.read_u64(ea);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
// FP loads
PpcOpcode::lfs => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64;
ctx.pc += 4;
}
PpcOpcode::lfsx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64;
ctx.pc += 4;
}
PpcOpcode::lfd => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
ctx.fpr[instr.rd()] = mem.read_f64(ea);
ctx.pc += 4;
}
PpcOpcode::lfdx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.fpr[instr.rd()] = mem.read_f64(ea);
ctx.pc += 4;
}
PpcOpcode::lfsu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::lfsux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.fpr[instr.rd()] = mem.read_f32(ea) as f64;
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::lfdu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
ctx.fpr[instr.rd()] = mem.read_f64(ea);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::lfdux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
ctx.fpr[instr.rd()] = mem.read_f64(ea);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
// Reservation (lwarx/stwcx)
//
// M3.7 — when `ctx.reservation_table` is `Some` and the table is
// enabled, route reservations through the inter-thread table so
// concurrent host threads can mediate reservation conflicts.
// Otherwise (the default in lockstep mode), use the legacy
// per-`PpcContext` fields. Both paths leave the per-ctx fields
// in a coherent state so a flag flip mid-run doesn't corrupt
// outstanding reservations.
//
// PPCBUG-108: lwarx + stwcx. atomicity is provided by `ReservationTable`
// in the M3 multi-HW-thread runtime. The legacy per-ctx fallback (when
// `reservation_table` is None or the table is disabled) cannot observe
// stores from other host threads — a store by thread B cannot clear
// `ctx_A.has_reservation`. This path is only correct in strict lockstep
// (single-host-thread) mode. The M3 scheduler MUST enable the table
// before spawning a second host thread. See stwcx./stdcx. for the
// debug_assert that fires if a non-primary slot takes this path.
PpcOpcode::lwarx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let val = mem.read_u32(ea);
ctx.gpr[instr.rd()] = val as u64;
ctx.reserved_line = ea & !RESERVATION_MASK;
ctx.reserved_val = val as u64;
ctx.has_reservation = true;
ctx.reservation_width = 4; // PPCBUG-151: word reservation
if let Some(t) = &ctx.reservation_table {
if t.is_enabled() {
ctx.reserved_generation = t.reserve(ea, ctx.hw_id);
}
}
ctx.pc += 4;
}
// PPCBUG-108: see lwarx comment above. stwcx. legacy path cannot observe
// cross-thread reservation invalidations; only safe in lockstep mode.
PpcOpcode::stwcx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let line = ea & !RESERVATION_MASK;
let table_route = ctx
.reservation_table
.as_ref()
.filter(|t| t.is_enabled())
.cloned();
// PPCBUG-151: stwcx. requires a word (lwarx) reservation;
// a doubleword (ldarx) reservation must not commit here.
let width_ok = ctx.reservation_width == 4;
let success = if let Some(t) = &table_route {
// Table-routed: success iff the slot still holds our
// reservation AND the per-ctx flag agrees (the per-ctx
// flag would be cleared by an intervening write or
// context switch).
ctx.has_reservation
&& width_ok
&& ctx.reserved_line == line
&& t.try_commit(ea, ctx.reserved_generation, ctx.hw_id)
} else {
// Legacy per-ctx path (M2 default / lockstep).
// PPCBUG-108: fires on non-primary HW slots under misconfig —
// if the table is disabled while workers are active, slots
// 1..N will trip this assert, surfacing the misconfiguration
// early in debug builds. Note: hw_id==0 (primary slot) taking
// this path while other slots run in parallel would NOT be
// caught; that case requires the table to be enabled instead.
debug_assert!(
ctx.hw_id == 0,
"PPCBUG-108: legacy per-ctx stwcx. on non-primary HW slot \
(hw_id={}) — ReservationTable must be enabled under --parallel",
ctx.hw_id
);
ctx.has_reservation && width_ok && ctx.reserved_line == line
};
if success {
mem.write_u32(ea, ctx.gpr[instr.rs()] as u32);
ctx.cr[0] = crate::context::CrField {
lt: false,
gt: false,
eq: true,
so: ctx.xer_so != 0,
};
} else {
ctx.cr[0] = crate::context::CrField {
lt: false,
gt: false,
eq: false,
so: ctx.xer_so != 0,
};
// Failed stwcx: if we held the reservation in the table
// (someone else displaced our gen), release it from the
// counter so `has_active_reservers` returns to zero
// when no real reserver exists.
if let Some(t) = &table_route {
t.release(ea, ctx.reserved_generation, ctx.hw_id);
}
}
ctx.has_reservation = false;
ctx.reservation_width = 0; // PPCBUG-151: always clear on exit
ctx.pc += 4;
}
// ===== Store instructions =====
PpcOpcode::stw => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u32(ea, ctx.gpr[instr.rs()] as u32);
ctx.pc += 4;
}
PpcOpcode::stwu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u32(ea, ctx.gpr[instr.rs()] as u32);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::stwx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u32(ea, ctx.gpr[instr.rs()] as u32);
ctx.pc += 4;
}
PpcOpcode::stwux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u32(ea, ctx.gpr[instr.rs()] as u32);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::stb => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u8(ea, ctx.gpr[instr.rs()] as u8);
ctx.pc += 4;
}
PpcOpcode::stbu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u8(ea, ctx.gpr[instr.rs()] as u8);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::stbx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u8(ea, ctx.gpr[instr.rs()] as u8);
ctx.pc += 4;
}
PpcOpcode::stbux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u8(ea, ctx.gpr[instr.rs()] as u8);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::sth => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u16(ea, ctx.gpr[instr.rs()] as u16);
ctx.pc += 4;
}
PpcOpcode::sthu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u16(ea, ctx.gpr[instr.rs()] as u16);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::sthx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u16(ea, ctx.gpr[instr.rs()] as u16);
ctx.pc += 4;
}
PpcOpcode::sthux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u16(ea, ctx.gpr[instr.rs()] as u16);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::std => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.ds() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u64(ea, ctx.gpr[instr.rs()]);
ctx.pc += 4;
}
PpcOpcode::stdx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u64(ea, ctx.gpr[instr.rs()]);
ctx.pc += 4;
}
PpcOpcode::stdu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.ds() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u64(ea, ctx.gpr[instr.rs()]);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::stdux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u64(ea, ctx.gpr[instr.rs()]);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
// FP stores
PpcOpcode::stfs => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
ctx.pc += 4;
}
PpcOpcode::stfsu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::stfsx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
ctx.pc += 4;
}
PpcOpcode::stfsux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f32(ea, ctx.fpr[instr.rs()] as f32);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::stfd => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f64(ea, ctx.fpr[instr.rs()]);
ctx.pc += 4;
}
PpcOpcode::stfdu => {
let ea = ctx.gpr[instr.ra()].wrapping_add(instr.d() as i64 as u64) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f64(ea, ctx.fpr[instr.rs()]);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::stfdx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f64(ea, ctx.fpr[instr.rs()]);
ctx.pc += 4;
}
PpcOpcode::stfdux => {
let ea = ctx.gpr[instr.ra()].wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_f64(ea, ctx.fpr[instr.rs()]);
ctx.gpr[instr.ra()] = ea as u64;
ctx.pc += 4;
}
PpcOpcode::stfiwx => {
// Store FP as integer word: stores low 32 bits of FPR as-is
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u32(ea, ctx.fpr[instr.rs()].to_bits() as u32);
ctx.pc += 4;
}
// String load/store
PpcOpcode::lswi => {
let mut ea = if instr.ra() == 0 { 0u32 } else { ctx.gpr[instr.ra()] as u32 };
let nb = if instr.nb() == 0 { 32 } else { instr.nb() };
let mut rd = instr.rd();
let mut bytes_left = nb;
while bytes_left > 0 {
let mut val = 0u32;
for byte_idx in 0..4 {
if bytes_left == 0 { break; }
let b = mem.read_u8(ea) as u32;
val |= b << (24 - byte_idx * 8);
ea = ea.wrapping_add(1);
bytes_left -= 1;
}
ctx.gpr[rd] = val as u64;
rd = (rd + 1) % 32;
}
ctx.pc += 4;
}
PpcOpcode::stswi => {
let mut ea = if instr.ra() == 0 { 0u32 } else { ctx.gpr[instr.ra()] as u32 };
let nb = if instr.nb() == 0 { 32 } else { instr.nb() };
let mut rs = instr.rs();
let mut bytes_left = nb;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let first_line = ea & !RESERVATION_MASK;
let last_line = ea.wrapping_add(nb - 1) & !RESERVATION_MASK;
t.invalidate_for_write(first_line);
if last_line != first_line { t.invalidate_for_write(last_line); }
}
}
while bytes_left > 0 {
let val = ctx.gpr[rs] as u32;
for byte_idx in 0..4 {
if bytes_left == 0 { break; }
mem.write_u8(ea, (val >> (24 - byte_idx * 8)) as u8);
ea = ea.wrapping_add(1);
bytes_left -= 1;
}
rs = (rs + 1) % 32;
}
ctx.pc += 4;
}
// ===== Special register moves =====
PpcOpcode::mfspr => {
let spr = instr.spr();
ctx.gpr[instr.rd()] = match spr {
crate::context::spr::XER => ctx.xer() as u64,
crate::context::spr::LR => ctx.lr,
crate::context::spr::CTR => ctx.ctr,
crate::context::spr::DEC => ctx.dec as u64,
crate::context::spr::TBL => ctx.timebase & 0xFFFF_FFFF,
crate::context::spr::TBU => ctx.timebase >> 32,
crate::context::spr::VRSAVE => ctx.vrsave as u64,
// Xbox 360 Xenon processor signature (from canary).
crate::context::spr::PVR => 0x0071_0800,
// Benign SPRs — titles read these but we don't model them.
crate::context::spr::SPRG0
| crate::context::spr::SPRG1
| crate::context::spr::SPRG2
| crate::context::spr::SPRG3
| crate::context::spr::HID0
| crate::context::spr::HID1
| crate::context::spr::DAR
| crate::context::spr::DSISR
| crate::context::spr::PIR => 0,
_ => {
tracing::warn!("mfspr: unimplemented SPR {}", spr);
0
}
};
ctx.pc += 4;
}
PpcOpcode::mtspr => {
let spr = instr.spr();
let val = ctx.gpr[instr.rs()];
match spr {
crate::context::spr::XER => ctx.set_xer(val as u32),
crate::context::spr::LR => ctx.lr = val,
crate::context::spr::CTR => ctx.ctr = val as u32 as u64,
crate::context::spr::DEC => ctx.dec = val as u32,
crate::context::spr::TBL_WRITE => {
ctx.timebase = (ctx.timebase & 0xFFFF_FFFF_0000_0000) | (val & 0xFFFF_FFFF);
}
crate::context::spr::TBU_WRITE => {
ctx.timebase = (ctx.timebase & 0x0000_0000_FFFF_FFFF) | ((val & 0xFFFF_FFFF) << 32);
}
crate::context::spr::VRSAVE => ctx.vrsave = val as u32,
// Benign writes — swallow silently to avoid false Unimplemented
// warnings on SPRs that have no observable effect in userspace.
crate::context::spr::SPRG0
| crate::context::spr::SPRG1
| crate::context::spr::SPRG2
| crate::context::spr::SPRG3
| crate::context::spr::HID0
| crate::context::spr::HID1
| crate::context::spr::DAR
| crate::context::spr::DSISR => {}
_ => {
tracing::warn!("mtspr: unimplemented SPR {}", spr);
}
}
ctx.pc += 4;
}
PpcOpcode::mfcr => {
ctx.gpr[instr.rd()] = ctx.cr() as u64;
ctx.pc += 4;
}
PpcOpcode::mtcrf => {
let crm = instr.crm();
let val = ctx.gpr[instr.rs()] as u32;
let old = ctx.cr();
let mut new = old;
for i in 0..8u32 {
if crm & (1 << (7 - i)) != 0 {
let mask = 0xF << (28 - i * 4);
new = (new & !mask) | (val & mask);
}
}
ctx.set_cr(new);
ctx.pc += 4;
}
PpcOpcode::mfmsr => {
ctx.gpr[instr.rd()] = ctx.msr;
ctx.pc += 4;
}
PpcOpcode::mtmsr | PpcOpcode::mtmsrd => {
// PPCBUG-078: mtmsrd L=1 is a partial-MSR-write — only MSR[EE]
// (u64 bit 15) and MSR[RI] (u64 bit 0) are modified; all other
// MSR bits preserved. Used by kernel code to re-enable external
// interrupts without disturbing the rest of the MSR.
let l = (instr.raw >> (31 - 15)) & 1;
let rs = ctx.gpr[instr.rs()];
if matches!(instr.opcode, PpcOpcode::mtmsrd) && l == 1 {
let mask: u64 = (1u64 << 15) | 1u64;
ctx.msr = (ctx.msr & !mask) | (rs & mask);
} else {
ctx.msr = rs;
}
ctx.pc += 4;
}
PpcOpcode::mftb => {
let tbr = instr.spr();
ctx.gpr[instr.rd()] = match tbr {
268 => ctx.timebase & 0xFFFF_FFFF,
269 => ctx.timebase >> 32,
_ => 0,
};
ctx.pc += 4;
}
// CR logical
PpcOpcode::crand => { cr_logical(ctx, instr, |a, b| a & b); ctx.pc += 4; }
PpcOpcode::crandc => { cr_logical(ctx, instr, |a, b| a & !b); ctx.pc += 4; }
PpcOpcode::creqv => { cr_logical(ctx, instr, |a, b| !(a ^ b)); ctx.pc += 4; }
PpcOpcode::crnand => { cr_logical(ctx, instr, |a, b| !(a & b)); ctx.pc += 4; }
PpcOpcode::crnor => { cr_logical(ctx, instr, |a, b| !(a | b)); ctx.pc += 4; }
PpcOpcode::cror => { cr_logical(ctx, instr, |a, b| a | b); ctx.pc += 4; }
PpcOpcode::crorc => { cr_logical(ctx, instr, |a, b| a | !b); ctx.pc += 4; }
PpcOpcode::crxor => { cr_logical(ctx, instr, |a, b| a ^ b); ctx.pc += 4; }
PpcOpcode::mcrf => {
ctx.cr[instr.crfd()] = ctx.cr[instr.crfs()];
ctx.pc += 4;
}
// ===== Cache/sync (no-ops in interpreter) =====
PpcOpcode::dcbf | PpcOpcode::dcbi | PpcOpcode::dcbst |
PpcOpcode::dcbt | PpcOpcode::dcbtst | PpcOpcode::icbi |
PpcOpcode::sync | PpcOpcode::eieio | PpcOpcode::isync => {
ctx.pc += 4;
}
PpcOpcode::dcbz => {
// Zero 32 bytes at effective address
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) as u32) & !31;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
for i in 0..8 {
mem.write_u32(ea + i * 4, 0);
}
ctx.pc += 4;
}
PpcOpcode::dcbz128 => {
// Zero 128 bytes
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) as u32) & !127;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
for i in 0..32 {
mem.write_u32(ea + i * 4, 0);
}
ctx.pc += 4;
}
// ===== Load multiple =====
PpcOpcode::lmw => {
// PPCBUG-125: PowerISA marks `lmw` invalid when rA is in [rT..31];
// canary skips the write to rA in that case to preserve the EA base.
let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
ea = ea.wrapping_add(instr.d() as i64 as u64);
for r in instr.rd()..32 {
if r == instr.ra() {
ea = ea.wrapping_add(4);
continue;
}
ctx.gpr[r] = mem.read_u32(ea as u32) as u64;
ea = ea.wrapping_add(4);
}
ctx.pc += 4;
}
PpcOpcode::stmw => {
let mut ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
ea = ea.wrapping_add(instr.d() as i64 as u64);
// PPCBUG-160: stmw can span two cache lines when (32-rs)*4 > one line.
// Iterate over every touched line so any reservation on a later line
// is also invalidated (same guarantee as single-word stores).
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let start_ea = ea as u32;
let last_ea = start_ea.wrapping_add((32 - instr.rs() as u32) * 4).wrapping_sub(1);
let line_size = RESERVATION_MASK + 1;
let mut line = start_ea & !RESERVATION_MASK;
loop {
t.invalidate_for_write(line);
if line >= (last_ea & !RESERVATION_MASK) { break; }
line = line.wrapping_add(line_size);
}
}
}
for r in instr.rs()..32 {
mem.write_u32(ea as u32, ctx.gpr[r] as u32);
ea = ea.wrapping_add(4);
}
ctx.pc += 4;
}
// ===== Trap =====
PpcOpcode::tw | PpcOpcode::twi | PpcOpcode::td | PpcOpcode::tdi => {
// PPCBUG-063: save CIA before incrementing so a trap handler reads
// the faulting instruction address, not CIA+4.
// PPCBUG-065: log the SIMM type code on `twi 31, r0, IMM` (Xbox 360
// typed-trap convention used by the CRT/kernel for C++ exception
// class dispatch). The audit notes this is relevant to the Sylpheed
// throw investigation; routing the type code via a payload requires
// a StepResult enum extension that's deferred for now.
let trap_pc = ctx.pc;
let a = ctx.gpr[instr.ra()];
let b = match instr.opcode {
PpcOpcode::twi | PpcOpcode::tdi => instr.simm16() as i64 as u64,
_ => ctx.gpr[instr.rb()],
};
let width = match instr.opcode {
PpcOpcode::tw | PpcOpcode::twi => trap::TrapWidth::Word,
_ => trap::TrapWidth::Doubleword,
};
let fired = trap::evaluate(instr.to(), a, b, width);
if fired {
let typed_trap_simm = if matches!(instr.opcode, PpcOpcode::twi)
&& instr.to() == 31 && instr.ra() == 0 {
Some(instr.simm16() as u16)
} else { None };
tracing::warn!(
"Trap fired at {:#010x}: {:?} TO={} a={:#x} b={:#x}{}",
trap_pc, instr.opcode, instr.to(), a, b,
typed_trap_simm.map_or(String::new(), |t| format!(" typed_trap_simm={:#06x}", t))
);
// Leave ctx.pc at CIA (NOT NIA) so trap handlers / SEH delivery
// can read the faulting instruction address from ctx.pc.
return StepResult::Trap;
}
ctx.pc += 4;
}
// ===== Byte-reverse loads =====
PpcOpcode::lwbrx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let val = mem.read_u32(ea);
ctx.gpr[instr.rd()] = val.swap_bytes() as u64;
ctx.pc += 4;
}
PpcOpcode::lhbrx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let val = mem.read_u16(ea);
ctx.gpr[instr.rd()] = val.swap_bytes() as u64;
ctx.pc += 4;
}
PpcOpcode::stwbrx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u32(ea, (ctx.gpr[instr.rs()] as u32).swap_bytes());
ctx.pc += 4;
}
PpcOpcode::sthbrx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]) as u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u16(ea, (ctx.gpr[instr.rs()] as u16).swap_bytes());
ctx.pc += 4;
}
// ===== VMX/VMX128: Vector Load/Store =====
PpcOpcode::lvx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32; // aligned
let mut bytes = [0u8; 16];
for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes);
ctx.pc += 4;
}
PpcOpcode::lvx128 => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
let mut bytes = [0u8; 16];
for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); }
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_bytes(bytes);
ctx.pc += 4;
}
PpcOpcode::stvx => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
// PPCBUG-511: stvx was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let bytes = ctx.vr[instr.rs()].as_bytes();
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
ctx.pc += 4;
}
PpcOpcode::stvx128 => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
// PPCBUG-511: stvx128 was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let bytes = ctx.vr[instr.vs128()].as_bytes();
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
ctx.pc += 4;
}
// lvewx, lvebx, lvehx all load aligned 16 bytes (per xenia reference)
PpcOpcode::lvebx => {
// Load 1 byte from EA into vD[EA & 0xF]. PowerISA marks the
// other lanes as "undefined" but real Xenon (and Canary)
// preserve their prior contents, so seed from vD.
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let slot = (ea & 0xF) as usize;
let mut bytes = ctx.vr[instr.rd()].as_bytes();
bytes[slot] = mem.read_u8(ea);
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes);
ctx.pc += 4;
}
PpcOpcode::lvehx => {
// Load a halfword from (EA & ~1) into vD at halfword slot
// (EA & 0xF) >> 1. Other halfword lanes preserved (see lvebx).
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let ea = ea_unaligned & !0x1u32;
let slot = ((ea_unaligned & 0xF) >> 1) as usize;
let mut bytes = ctx.vr[instr.rd()].as_bytes();
let h = mem.read_u16(ea);
bytes[slot * 2] = (h >> 8) as u8;
bytes[slot * 2 + 1] = (h & 0xFF) as u8;
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes);
ctx.pc += 4;
}
PpcOpcode::lvewx => {
// Load a word from (EA & ~3) into vD at word slot
// (EA & 0xF) >> 2. Other word lanes preserved (see lvebx).
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let ea = ea_unaligned & !0x3u32;
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
let mut bytes = ctx.vr[instr.rd()].as_bytes();
let w = mem.read_u32(ea);
bytes[slot * 4] = (w >> 24) as u8;
bytes[slot * 4 + 1] = (w >> 16) as u8;
bytes[slot * 4 + 2] = (w >> 8) as u8;
bytes[slot * 4 + 3] = (w & 0xFF) as u8;
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(bytes);
ctx.pc += 4;
}
PpcOpcode::stvebx => {
// Store vS[EA & 0xF] (1 byte) to memory at EA.
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
// PPCBUG-512: stvebx was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let slot = (ea & 0xF) as usize;
let bytes = ctx.vr[instr.rs()].as_bytes();
mem.write_u8(ea, bytes[slot]);
ctx.pc += 4;
}
PpcOpcode::stvehx => {
// Store vS[slot] (1 halfword) at EA & ~1. slot = (EA & 0xF) >> 1.
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let ea = ea_unaligned & !0x1u32;
// PPCBUG-512: stvehx was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let slot = ((ea_unaligned & 0xF) >> 1) as usize;
let bytes = ctx.vr[instr.rs()].as_bytes();
let h = ((bytes[slot * 2] as u16) << 8) | (bytes[slot * 2 + 1] as u16);
mem.write_u16(ea, h);
ctx.pc += 4;
}
PpcOpcode::stvewx => {
// Store vS[slot] (1 word) at EA & ~3. slot = (EA & 0xF) >> 2.
let base = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea_unaligned = base.wrapping_add(ctx.gpr[instr.rb()]) as u32;
let ea = ea_unaligned & !0x3u32;
// PPCBUG-512: stvewx was missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
let bytes = ctx.vr[instr.rs()].as_bytes();
let w = ((bytes[slot * 4] as u32) << 24)
| ((bytes[slot * 4 + 1] as u32) << 16)
| ((bytes[slot * 4 + 2] as u32) << 8)
| (bytes[slot * 4 + 3] as u32);
mem.write_u32(ea, w);
ctx.pc += 4;
}
PpcOpcode::lvxl | PpcOpcode::lvxl128 => {
// Same as lvx but with cache hint (ignored)
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
let mut bytes = [0u8; 16];
for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); }
let vd = if matches!(instr.opcode, PpcOpcode::lvxl128) { instr.vd128() } else { instr.rd() };
ctx.vr[vd] = xenia_types::Vec128::from_bytes(bytes);
ctx.pc += 4;
}
PpcOpcode::stvxl | PpcOpcode::stvxl128 => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = (ea.wrapping_add(ctx.gpr[instr.rb()]) & !0xF) as u32;
// PPCBUG-511: stvxl/stvxl128 were missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let vs = if matches!(instr.opcode, PpcOpcode::stvxl128) { instr.vs128() } else { instr.rs() };
let bytes = ctx.vr[vs].as_bytes();
for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
ctx.pc += 4;
}
// ===== VMX: Float Arithmetic =====
PpcOpcode::vaddfp => {
// PPCBUG-435: VSCR.NJ=1 (Xbox 360 always boots with this set) requires
// flush-to-zero on subnormal inputs and outputs. Canary VMX float
// arithmetic flushes denormals unconditionally.
let a = ctx.vr[instr.ra()].as_f32x4();
let b = ctx.vr[instr.rb()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 {
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
r[i] = vmx::flush_denorm(ai + bi);
}
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vaddfp128 => {
// PPCBUG-435: same as vaddfp.
let a = ctx.vr[instr.va128()].as_f32x4();
let b = ctx.vr[instr.vb128()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 {
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
r[i] = vmx::flush_denorm(ai + bi);
}
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vsubfp => {
// PPCBUG-435.
let a = ctx.vr[instr.ra()].as_f32x4();
let b = ctx.vr[instr.rb()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 {
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
r[i] = vmx::flush_denorm(ai - bi);
}
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vsubfp128 => {
// PPCBUG-435.
let a = ctx.vr[instr.va128()].as_f32x4();
let b = ctx.vr[instr.vb128()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 {
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
r[i] = vmx::flush_denorm(ai - bi);
}
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmaddfp => {
// vD = (vA * vC) + vB. AltiVec unconditionally flushes denormal
// *inputs* to 0 regardless of VSCR[NJ] (confirmed on POWER8 hw).
let a = ctx.vr[instr.ra()].as_f32x4();
let b = ctx.vr[instr.rb()].as_f32x4();
let c = ctx.vr[instr.rc()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 {
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
let ci = vmx::flush_denorm(c[i]);
// PPCBUG-437: flush subnormal output too.
r[i] = vmx::flush_denorm(ai.mul_add(ci, bi));
}
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmaddfp128 => {
// ISA: (VD) <- (VA × VD) + VB. VD is both the second multiplicand and destination.
// Canary InstrEmit_vmaddfp128 (ppc_emit_altivec.cc:806-809): MulAdd(VA, VD, VB).
// Previous code computed ai.mul_add(bi, di) = VA×VB+VD — VB and VD roles swapped
// (PPCBUG-424). Fix: ai.mul_add(di, bi) = VA×VD+VB.
let a = ctx.vr[instr.va128()].as_f32x4();
let b = ctx.vr[instr.vb128()].as_f32x4();
let d = ctx.vr[instr.vd128()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 {
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
let di = vmx::flush_denorm(d[i]);
// PPCBUG-437.
r[i] = vmx::flush_denorm(ai.mul_add(di, bi));
}
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vnmsubfp => {
// vD = -(vA * vC - vB) = vB - vA * vC. Same denorm-flush rule as vmaddfp.
let a = ctx.vr[instr.ra()].as_f32x4();
let b = ctx.vr[instr.rb()].as_f32x4();
let c = ctx.vr[instr.rc()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 {
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
let ci = vmx::flush_denorm(c[i]);
// PPCBUG-426: single FMA rounding instead of two-step (b - a*c).
r[i] = vmx::flush_denorm(-ai.mul_add(ci, -bi));
}
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vnmsubfp128 => {
// VMX128 form: vD <- -((vA * vB) - vD) = vD - (vA * vB). Canary
// routes through `InstrEmit_vnmsubfp_` with the same arg-swap,
// which flushes all inputs unconditionally.
let a = ctx.vr[instr.va128()].as_f32x4();
let b = ctx.vr[instr.vb128()].as_f32x4();
let d = ctx.vr[instr.vd128()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 {
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
let di = vmx::flush_denorm(d[i]);
// PPCBUG-427: single FMA rounding.
r[i] = vmx::flush_denorm(-ai.mul_add(bi, -di));
}
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmulfp128 => {
// PPCBUG-435 + PPCBUG-437.
let a = ctx.vr[instr.va128()].as_f32x4();
let b = ctx.vr[instr.vb128()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 {
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
r[i] = vmx::flush_denorm(ai * bi);
}
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmaxfp => {
let a = ctx.vr[instr.ra()].as_f32x4();
let b = ctx.vr[instr.rb()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = vmx::max_nan(a[i], b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmaxfp128 => {
let a = ctx.vr[instr.va128()].as_f32x4();
let b = ctx.vr[instr.vb128()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = vmx::max_nan(a[i], b[i]); }
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vminfp => {
let a = ctx.vr[instr.ra()].as_f32x4();
let b = ctx.vr[instr.rb()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = vmx::min_nan(a[i], b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vminfp128 => {
let a = ctx.vr[instr.va128()].as_f32x4();
let b = ctx.vr[instr.vb128()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = vmx::min_nan(a[i], b[i]); }
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vrefp | PpcOpcode::vrefp128 => {
let vb = if matches!(instr.opcode, PpcOpcode::vrefp128) { instr.vb128() } else { instr.rb() };
let vd = if matches!(instr.opcode, PpcOpcode::vrefp128) { instr.vd128() } else { instr.rd() };
let b = ctx.vr[vb].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = 1.0 / b[i]; }
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vrsqrtefp | PpcOpcode::vrsqrtefp128 => {
let vb = if matches!(instr.opcode, PpcOpcode::vrsqrtefp128) { instr.vb128() } else { instr.rb() };
let vd = if matches!(instr.opcode, PpcOpcode::vrsqrtefp128) { instr.vd128() } else { instr.rd() };
let b = ctx.vr[vb].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = 1.0 / b[i].sqrt(); }
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
// ===== VMX: Float Compare =====
PpcOpcode::vcmpeqfp | PpcOpcode::vcmpeqfp128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_f32x4();
let b = ctx.vr[vb].as_f32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
let rc = if matches!(instr.opcode, PpcOpcode::vcmpeqfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
if rc { update_cr6_from_vmask(&r, ctx); }
ctx.pc += 4;
}
PpcOpcode::vcmpgefp | PpcOpcode::vcmpgefp128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_f32x4();
let b = ctx.vr[vb].as_f32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] >= b[i] { 0xFFFF_FFFF } else { 0 }; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
let rc = if matches!(instr.opcode, PpcOpcode::vcmpgefp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
if rc { update_cr6_from_vmask(&r, ctx); }
ctx.pc += 4;
}
PpcOpcode::vcmpgtfp | PpcOpcode::vcmpgtfp128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_f32x4();
let b = ctx.vr[vb].as_f32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFF_FFFF } else { 0 }; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
let rc = if matches!(instr.opcode, PpcOpcode::vcmpgtfp128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
if rc { update_cr6_from_vmask(&r, ctx); }
ctx.pc += 4;
}
// ===== VMX: Logical =====
PpcOpcode::vand | PpcOpcode::vand128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = a[i] & b[i]; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vandc | PpcOpcode::vandc128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = a[i] & !b[i]; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vor | PpcOpcode::vor128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = a[i] | b[i]; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vxor | PpcOpcode::vxor128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = a[i] ^ b[i]; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vnor | PpcOpcode::vnor128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = !(a[i] | b[i]); }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vsel | PpcOpcode::vsel128 => {
// vD = (vA & ~vC) | (vB & vC)
let (va, vb, vd);
let vc;
if matches!(instr.opcode, PpcOpcode::vsel128) {
va = instr.va128();
vb = instr.vb128();
vd = instr.vd128();
vc = vd; // for 128, vC is encoded in vD field
} else {
va = instr.ra();
vb = instr.rb();
vd = instr.rd();
vc = instr.rc();
}
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let c = ctx.vr[vc].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = (a[i] & !c[i]) | (b[i] & c[i]); }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
// ===== VMX: Permute/Splat/Shift =====
PpcOpcode::vperm | PpcOpcode::vperm128 => {
let (va, vb, vd);
let vc;
if matches!(instr.opcode, PpcOpcode::vperm128) {
va = instr.va128();
vb = instr.vb128();
vd = instr.vd128();
vc = instr.vc128_2();
} else {
va = instr.ra();
vb = instr.rb();
vd = instr.rd();
vc = instr.rc();
}
let a_bytes = ctx.vr[va].as_bytes();
let b_bytes = ctx.vr[vb].as_bytes();
let c_bytes = ctx.vr[vc].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 {
let idx = (c_bytes[i] & 0x1F) as usize;
r[i] = if idx < 16 { a_bytes[idx] } else { b_bytes[idx - 16] };
}
ctx.vr[vd] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vsldoi => {
let a_bytes = ctx.vr[instr.ra()].as_bytes();
let b_bytes = ctx.vr[instr.rb()].as_bytes();
let sh = ((instr.raw >> 6) & 0xF) as usize; // SH field bits 6-9
let mut concat = [0u8; 32];
concat[..16].copy_from_slice(&a_bytes);
concat[16..].copy_from_slice(&b_bytes);
let mut r = [0u8; 16];
r.copy_from_slice(&concat[sh..sh + 16]);
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vsldoi128 => {
let a_bytes = ctx.vr[instr.va128()].as_bytes();
let b_bytes = ctx.vr[instr.vb128()].as_bytes();
let sh = instr.vx128_5_sh() as usize;
let mut concat = [0u8; 32];
concat[..16].copy_from_slice(&a_bytes);
concat[16..].copy_from_slice(&b_bytes);
let mut r = [0u8; 16];
let sh = sh.min(16);
r.copy_from_slice(&concat[sh..sh + 16]);
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vspltw => {
let uimm = ((instr.raw >> 16) & 0x3) as usize; // UIMM (2 bits for word index)
let b = ctx.vr[instr.rb()].as_u32x4();
let val = b[uimm];
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4(val, val, val, val);
ctx.pc += 4;
}
PpcOpcode::vspltw128 => {
let uimm = ((instr.raw >> 16) & 0x3) as usize;
let b = ctx.vr[instr.vb128()].as_u32x4();
let val = b[uimm];
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4(val, val, val, val);
ctx.pc += 4;
}
PpcOpcode::vsplth => {
let uimm = ((instr.raw >> 16) & 0x7) as usize;
let b = ctx.vr[instr.rb()].as_u16x8();
let val = b[uimm];
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array([val; 8]);
ctx.pc += 4;
}
PpcOpcode::vspltb => {
let uimm = ((instr.raw >> 16) & 0xF) as usize;
let b = ctx.vr[instr.rb()].as_bytes();
let val = b[uimm];
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes([val; 16]);
ctx.pc += 4;
}
PpcOpcode::vspltisw | PpcOpcode::vspltisw128 => {
let simm = ((instr.raw >> 16) & 0x1F) as i32;
let simm = if simm & 0x10 != 0 { simm | !0x1F } else { simm }; // sign extend 5-bit
let val = simm as u32;
let vd = if matches!(instr.opcode, PpcOpcode::vspltisw128) { instr.vd128() } else { instr.rd() };
ctx.vr[vd] = xenia_types::Vec128::from_u32x4(val, val, val, val);
ctx.pc += 4;
}
PpcOpcode::vspltisb => {
let simm = ((instr.raw >> 16) & 0x1F) as i8;
let simm = if simm & 0x10 != 0 { simm | !0x1F } else { simm };
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes([simm as u8; 16]);
ctx.pc += 4;
}
PpcOpcode::vspltish => {
let simm = ((instr.raw >> 16) & 0x1F) as i16;
let simm = if simm & 0x10 != 0 { simm | !0x1F } else { simm };
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array([simm as u16; 8]);
ctx.pc += 4;
}
// ===== VMX: Merge/Shuffle =====
PpcOpcode::vmrghw | PpcOpcode::vmrghw128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
// Merge high words: [a0, b0, a1, b1]
ctx.vr[vd] = xenia_types::Vec128::from_u32x4(a[0], b[0], a[1], b[1]);
ctx.pc += 4;
}
PpcOpcode::vmrglw | PpcOpcode::vmrglw128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
// Merge low words: [a2, b2, a3, b3]
ctx.vr[vd] = xenia_types::Vec128::from_u32x4(a[2], b[2], a[3], b[3]);
ctx.pc += 4;
}
// ===== VMX: Integer Arithmetic =====
PpcOpcode::vadduwm => {
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = a[i].wrapping_add(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vsubuwm => {
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = a[i].wrapping_sub(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
// ===== VMX: Shift =====
PpcOpcode::vslw | PpcOpcode::vslw128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 {
let sh = b[i] & 0x1F;
r[i] = a[i] << sh;
}
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vsrw | PpcOpcode::vsrw128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 {
let sh = b[i] & 0x1F;
r[i] = a[i] >> sh;
}
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vsraw | PpcOpcode::vsraw128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 {
let sh = b[i] & 0x1F;
r[i] = (a[i] as i32 >> sh) as u32;
}
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vrlw | PpcOpcode::vrlw128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 {
let sh = b[i] & 0x1F;
r[i] = a[i].rotate_left(sh);
}
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
// VMX: Round/Convert
PpcOpcode::vrfiz | PpcOpcode::vrfiz128 => {
let vb = if matches!(instr.opcode, PpcOpcode::vrfiz128) { instr.vb128() } else { instr.rb() };
let vd = if matches!(instr.opcode, PpcOpcode::vrfiz128) { instr.vd128() } else { instr.rd() };
let b = ctx.vr[vb].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = b[i].trunc(); }
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vrfin | PpcOpcode::vrfin128 => {
// PPCBUG-432: ISA round-to-nearest-even, NOT Rust's `round()`
// (which is round-half-away-from-zero).
let vb = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vb128() } else { instr.rb() };
let vd = if matches!(instr.opcode, PpcOpcode::vrfin128) { instr.vd128() } else { instr.rd() };
let b = ctx.vr[vb].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = b[i].round_ties_even(); }
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vrfip | PpcOpcode::vrfip128 => {
let vb = if matches!(instr.opcode, PpcOpcode::vrfip128) { instr.vb128() } else { instr.rb() };
let vd = if matches!(instr.opcode, PpcOpcode::vrfip128) { instr.vd128() } else { instr.rd() };
let b = ctx.vr[vb].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = b[i].ceil(); }
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vrfim | PpcOpcode::vrfim128 => {
let vb = if matches!(instr.opcode, PpcOpcode::vrfim128) { instr.vb128() } else { instr.rb() };
let vd = if matches!(instr.opcode, PpcOpcode::vrfim128) { instr.vd128() } else { instr.rd() };
let b = ctx.vr[vb].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = b[i].floor(); }
ctx.vr[vd] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
// VMX: MFVSCR/MTVSCR — VSCR lives in word 3; only NJ (bit 16) and
// SAT (bit 31) are defined. Canary stores the full Vec128 so we do
// the same: mfvscr copies the register, mtvscr overwrites it.
PpcOpcode::mfvscr => {
// PPCBUG-080: ISA places VSCR in the rightmost word of VD with
// bytes 0-11 zeroed. Previously the full 128-bit ctx.vscr was
// copied (leaking stale upper data to guest).
let vscr_word = ctx.vscr.as_u32x4()[3];
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array([0, 0, 0, vscr_word]);
ctx.pc += 4;
}
PpcOpcode::mtvscr => {
ctx.vscr = ctx.vr[instr.rb()];
ctx.pc += 4;
}
// ===== VMX: lvsl/lvsr (generate permute vectors) =====
PpcOpcode::lvsl | PpcOpcode::lvsl128 => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]);
let sh = (ea & 0xF) as u8;
let mut r = [0u8; 16];
for i in 0..16 { r[i] = sh + i as u8; }
let vd = if matches!(instr.opcode, PpcOpcode::lvsl128) { instr.vd128() } else { instr.rd() };
ctx.vr[vd] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::lvsr | PpcOpcode::lvsr128 => {
let ea = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
let ea = ea.wrapping_add(ctx.gpr[instr.rb()]);
let sh = (ea & 0xF) as u8;
let mut r = [0u8; 16];
for i in 0..16 { r[i] = (16 - sh) + i as u8; }
let vd = if matches!(instr.opcode, PpcOpcode::lvsr128) { instr.vd128() } else { instr.rd() };
ctx.vr[vd] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
// ===== VMX: Integer compare =====
PpcOpcode::vcmpequw | PpcOpcode::vcmpequw128 => {
let (va, vb, vd) = vmx_reg_triple(instr);
let a = ctx.vr[va].as_u32x4();
let b = ctx.vr[vb].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] == b[i] { 0xFFFF_FFFF } else { 0 }; }
ctx.vr[vd] = xenia_types::Vec128::from_u32x4_array(r);
let rc = if matches!(instr.opcode, PpcOpcode::vcmpequw128) { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
if rc { update_cr6_from_vmask(&r, ctx); }
ctx.pc += 4;
}
// ===== FPU: Arithmetic =====
PpcOpcode::faddx => {
let a = ctx.fpr[instr.ra()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_add(ctx, a, b, false);
let result = a + b;
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::faddsx => {
let a = ctx.fpr[instr.ra()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_add(ctx, a, b, false);
let result = to_single(ctx, a + b);
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fsubx => {
let a = ctx.fpr[instr.ra()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_add(ctx, a, b, true);
let result = a - b;
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fsubsx => {
let a = ctx.fpr[instr.ra()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_add(ctx, a, b, true);
let result = to_single(ctx, a - b);
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fmulx => {
// A-form: frD = frA * frC (frC is at rc() field, bits 21-25)
let a = ctx.fpr[instr.ra()];
let c = ctx.fpr[instr.rc()];
fpscr::check_invalid_mul(ctx, a, c);
let result = a * c;
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && c.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fmulsx => {
let a = ctx.fpr[instr.ra()];
let c = ctx.fpr[instr.rc()];
fpscr::check_invalid_mul(ctx, a, c);
let result = to_single(ctx, a * c);
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && c.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fdivx => {
let a = ctx.fpr[instr.ra()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_div(ctx, a, b);
fpscr::check_zero_divide(ctx, a, b);
let result = a / b;
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && b != 0.0);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fdivsx => {
let a = ctx.fpr[instr.ra()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_div(ctx, a, b);
fpscr::check_zero_divide(ctx, a, b);
let result = to_single(ctx, a / b);
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && b != 0.0);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
// ===== FPU: Multiply-Add =====
PpcOpcode::fmaddx => {
// PPCBUG-202: VXISI from input properties (not from `a*c` which has wrong sign on overflow).
let a = ctx.fpr[instr.ra()];
let c = ctx.fpr[instr.rc()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_mul(ctx, a, c);
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
let result = a.mul_add(c, b);
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fmaddsx => {
// PPCBUG-181: missing VXISI on add step.
let a = ctx.fpr[instr.ra()];
let c = ctx.fpr[instr.rc()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_mul(ctx, a, c);
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
let result = to_single(ctx, a.mul_add(c, b));
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fmsubx => {
// PPCBUG-203: missing VXISI on sub step.
let a = ctx.fpr[instr.ra()];
let c = ctx.fpr[instr.rc()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_mul(ctx, a, c);
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
let result = a.mul_add(c, -b);
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fmsubsx => {
// PPCBUG-182: missing VXISI on sub step.
let a = ctx.fpr[instr.ra()];
let c = ctx.fpr[instr.rc()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_mul(ctx, a, c);
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
let result = to_single(ctx, a.mul_add(c, -b));
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fnmaddx => {
// PPCBUG-203: missing VXISI. PPCBUG-205: NaN sign preserved (no negation on NaN).
let a = ctx.fpr[instr.ra()];
let c = ctx.fpr[instr.rc()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_mul(ctx, a, c);
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
let fma = a.mul_add(c, b);
let result = if fma.is_nan() { fma } else { -fma };
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fnmaddsx => {
// PPCBUG-181 + PPCBUG-183: VXISI + NaN sign preservation.
let a = ctx.fpr[instr.ra()];
let c = ctx.fpr[instr.rc()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_mul(ctx, a, c);
fpscr::check_invalid_fma_add(ctx, a, c, b, false);
let fma = a.mul_add(c, b);
let neg = if fma.is_nan() { fma } else { -fma };
let result = to_single(ctx, neg);
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fnmsubx => {
// PPCBUG-203: VXISI. PPCBUG-205: NaN sign preservation.
let a = ctx.fpr[instr.ra()];
let c = ctx.fpr[instr.rc()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_mul(ctx, a, c);
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
let fma = a.mul_add(c, -b);
let result = if fma.is_nan() { fma } else { -fma };
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fnmsubsx => {
// PPCBUG-182 + PPCBUG-183: VXISI + NaN sign preservation.
let a = ctx.fpr[instr.ra()];
let c = ctx.fpr[instr.rc()];
let b = ctx.fpr[instr.rb()];
fpscr::check_invalid_mul(ctx, a, c);
fpscr::check_invalid_fma_add(ctx, a, c, b, true);
let fma = a.mul_add(c, -b);
let neg = if fma.is_nan() { fma } else { -fma };
let result = to_single(ctx, neg);
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, a.is_finite() && b.is_finite() && c.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
// ===== FPU: Move/Sign =====
PpcOpcode::fmrx => {
ctx.fpr[instr.rd()] = ctx.fpr[instr.rb()];
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fabsx => {
ctx.fpr[instr.rd()] = ctx.fpr[instr.rb()].abs();
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fnegx => {
ctx.fpr[instr.rd()] = -ctx.fpr[instr.rb()];
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fnabsx => {
ctx.fpr[instr.rd()] = -(ctx.fpr[instr.rb()].abs());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
// ===== FPU: Select =====
PpcOpcode::fselx => {
// frD = if frA >= 0.0 then frC else frB
ctx.fpr[instr.rd()] = if ctx.fpr[instr.ra()] >= 0.0 {
ctx.fpr[instr.rc()]
} else {
ctx.fpr[instr.rb()]
};
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
// ===== FPU: Square root / Reciprocal =====
PpcOpcode::fsqrtx => {
let b = ctx.fpr[instr.rb()];
// sqrt of negative (non-zero) is invalid operation → VXSQRT.
if b.is_sign_negative() && b != 0.0 && !b.is_nan() {
fpscr::set_exception(ctx, fpscr::VXSQRT);
}
if fpscr::is_snan(b) {
fpscr::set_exception(ctx, fpscr::VXSNAN);
}
let result = b.sqrt();
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, b.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fsqrtsx => {
let b = ctx.fpr[instr.rb()];
if b.is_sign_negative() && b != 0.0 && !b.is_nan() {
fpscr::set_exception(ctx, fpscr::VXSQRT);
}
if fpscr::is_snan(b) {
fpscr::set_exception(ctx, fpscr::VXSNAN);
}
let result = to_single(ctx, b.sqrt());
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, b.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fresx => {
// Single-precision reciprocal estimate: frD = 1.0 / frB.
// PPCBUG-184: pre-quantize input to f32 to match canary's
// `f.Recip(f.Convert(frB, FLOAT32_TYPE))` behavior. Hardware
// produces a ~12-bit LUT estimate; both emulators produce a
// fully-IEEE single reciprocal, but the f32 quantization at
// least makes the input precision match.
let b_full = ctx.fpr[instr.rb()];
let b = b_full as f32 as f64;
if b == 0.0 {
fpscr::set_exception(ctx, fpscr::ZX);
}
if fpscr::is_snan(b_full) {
fpscr::set_exception(ctx, fpscr::VXSNAN);
}
let result = to_single(ctx, 1.0 / b);
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, b.is_finite() && b != 0.0);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::frsqrtex => {
// Reciprocal square root estimate: frD = 1.0 / sqrt(frB)
let b = ctx.fpr[instr.rb()];
if b == 0.0 {
fpscr::set_exception(ctx, fpscr::ZX);
}
if b.is_sign_negative() && b != 0.0 && !b.is_nan() {
fpscr::set_exception(ctx, fpscr::VXSQRT);
}
if fpscr::is_snan(b) {
fpscr::set_exception(ctx, fpscr::VXSNAN);
}
let result = 1.0 / b.sqrt();
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, b.is_finite() && b > 0.0);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
// ===== FPU: Rounding/Conversion =====
PpcOpcode::frspx => {
// Round to single precision honouring FPSCR[RN].
// PPCBUG-225: set XX on inexact rounding (almost every frsp call).
let b = ctx.fpr[instr.rb()];
if fpscr::is_snan(b) {
fpscr::set_exception(ctx, fpscr::VXSNAN);
}
let result = to_single(ctx, b);
if b.is_finite() && result.is_finite() && result != b {
fpscr::set_exception(ctx, fpscr::XX);
}
ctx.fpr[instr.rd()] = result;
fpscr::update_after_op(ctx, result, b.is_finite());
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fcfidx => {
// Convert from integer doubleword: frD = (double)(int64_t)frB_as_bits.
// PPCBUG-224: set XX when |i64| > 2^53 (precision loss in conversion).
let bits = ctx.fpr[instr.rb()].to_bits();
let i = bits as i64;
let result = i as f64;
if (result as i64) != i {
fpscr::set_exception(ctx, fpscr::XX);
}
ctx.fpr[instr.rd()] = result;
fpscr::set_fprf(ctx, fpscr::classify_fprf(result));
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fctidx => {
// Convert to integer doubleword (round per FPSCR[RN]).
// PPCBUG-229: set XX on inexact (fractional input).
let val = ctx.fpr[instr.rb()];
let result = if val.is_nan() {
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
0x8000_0000_0000_0000u64
} else if val >= (i64::MAX as f64) {
fpscr::set_exception(ctx, fpscr::VXCVI);
0x7FFF_FFFF_FFFF_FFFFu64
} else if val < (i64::MIN as f64) {
fpscr::set_exception(ctx, fpscr::VXCVI);
0x8000_0000_0000_0000u64
} else {
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
fpscr::round_to_i64(ctx, val) as u64
};
ctx.fpr[instr.rd()] = f64::from_bits(result);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fctidzx => {
// Convert to integer doubleword (round toward zero).
// PPCBUG-229: set XX on inexact.
let val = ctx.fpr[instr.rb()];
let result = if val.is_nan() {
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
0x8000_0000_0000_0000u64
} else if val >= (i64::MAX as f64) {
fpscr::set_exception(ctx, fpscr::VXCVI);
0x7FFF_FFFF_FFFF_FFFFu64
} else if val < (i64::MIN as f64) {
fpscr::set_exception(ctx, fpscr::VXCVI);
0x8000_0000_0000_0000u64
} else {
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
(val.trunc() as i64) as u64
};
ctx.fpr[instr.rd()] = f64::from_bits(result);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fctiwx => {
// Convert to integer word (round per FPSCR[RN]).
// PPCBUG-230: set XX on inexact.
let val = ctx.fpr[instr.rb()];
let result_u32: u32 = if val.is_nan() {
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
0x8000_0000
} else if val > (i32::MAX as f64) {
fpscr::set_exception(ctx, fpscr::VXCVI);
0x7FFF_FFFF
} else if val < (i32::MIN as f64) {
fpscr::set_exception(ctx, fpscr::VXCVI);
0x8000_0000
} else {
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
fpscr::round_to_i32(ctx, val) as u32
};
ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::fctiwzx => {
// Convert to integer word (round toward zero).
// PPCBUG-230: set XX on inexact.
let val = ctx.fpr[instr.rb()];
let result_u32: u32 = if val.is_nan() {
fpscr::set_exception(ctx, fpscr::VXCVI | if fpscr::is_snan(val) { fpscr::VXSNAN } else { 0 });
0x8000_0000
} else if val > (i32::MAX as f64) {
fpscr::set_exception(ctx, fpscr::VXCVI);
0x7FFF_FFFF
} else if val < (i32::MIN as f64) {
fpscr::set_exception(ctx, fpscr::VXCVI);
0x8000_0000
} else {
if val != val.trunc() { fpscr::set_exception(ctx, fpscr::XX); }
val.trunc() as i32 as u32
};
ctx.fpr[instr.rd()] = f64::from_bits(result_u32 as u64);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
// ===== FPU: Compare =====
PpcOpcode::fcmpu => {
let fra = ctx.fpr[instr.ra()];
let frb = ctx.fpr[instr.rb()];
let crfd = instr.crfd();
if fra.is_nan() || frb.is_nan() {
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: false, so: true };
// fcmpu: VXSNAN on SNaN input; no VXVC even on QNaN.
if fpscr::is_snan(fra) || fpscr::is_snan(frb) {
fpscr::set_exception(ctx, fpscr::VXSNAN);
}
} else if fra < frb {
ctx.cr[crfd] = crate::context::CrField { lt: true, gt: false, eq: false, so: false };
} else if fra > frb {
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: true, eq: false, so: false };
} else {
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: true, so: false };
}
// Also mirror the comparison result into FPSCR[FPRF (FL/FG/FE/FU)].
let fprf = if fra.is_nan() || frb.is_nan() {
0b0_0001
} else if fra < frb {
0b0_1000
} else if fra > frb {
0b0_0100
} else {
0b0_0010
};
fpscr::set_fprf(ctx, fprf);
ctx.pc += 4;
}
PpcOpcode::fcmpo => {
// Ordered compare: like fcmpu but also sets VXVC on QNaN (or VXSNAN on SNaN).
let fra = ctx.fpr[instr.ra()];
let frb = ctx.fpr[instr.rb()];
let crfd = instr.crfd();
if fra.is_nan() || frb.is_nan() {
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: false, so: true };
if fpscr::is_snan(fra) || fpscr::is_snan(frb) {
fpscr::set_exception(ctx, fpscr::VXSNAN | fpscr::VXVC);
} else {
fpscr::set_exception(ctx, fpscr::VXVC);
}
} else if fra < frb {
ctx.cr[crfd] = crate::context::CrField { lt: true, gt: false, eq: false, so: false };
} else if fra > frb {
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: true, eq: false, so: false };
} else {
ctx.cr[crfd] = crate::context::CrField { lt: false, gt: false, eq: true, so: false };
}
let fprf = if fra.is_nan() || frb.is_nan() {
0b0_0001
} else if fra < frb {
0b0_1000
} else if fra > frb {
0b0_0100
} else {
0b0_0010
};
fpscr::set_fprf(ctx, fprf);
ctx.pc += 4;
}
// ===== FPU: Status/Control =====
PpcOpcode::mffsx => {
// Move from FPSCR: frD = FPSCR as double (low 32 bits)
ctx.fpr[instr.rd()] = f64::from_bits(ctx.fpscr as u64);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::mtfsfx => {
// Move to FPSCR fields: fm mask in bits 7-14, frB value
let fm = (instr.raw >> 17) & 0xFF;
let val = ctx.fpr[instr.rb()].to_bits() as u32;
let mut mask = 0u32;
for i in 0..8 {
if fm & (1 << (7 - i)) != 0 {
mask |= 0xF << (28 - i * 4);
}
}
ctx.fpscr = (ctx.fpscr & !mask) | (val & mask);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::mtfsb0x => {
// Clear FPSCR bit crbd
let bit = instr.crbd();
ctx.fpscr &= !(1 << (31 - bit));
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::mtfsb1x => {
// Set FPSCR bit crbd
let bit = instr.crbd();
ctx.fpscr |= 1 << (31 - bit);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
PpcOpcode::mtfsfix => {
// Move to FPSCR field immediate: crfD = IMM (4 bits)
let crfd = instr.crfd();
let imm = (instr.raw >> 12) & 0xF;
let shift = 28 - crfd as u32 * 4;
ctx.fpscr = (ctx.fpscr & !(0xF << shift)) | (imm << shift);
if instr.rc_bit() { update_cr1_from_fpscr(ctx); }
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4b — Unaligned vector load/store
// ═════════════════════════════════════════════════════════════════
// lvlx / lvlx128 / lvlxl / lvlxl128: load left-aligned from EA.
PpcOpcode::lvlx | PpcOpcode::lvlxl => {
let ea = ea_indexed(ctx, instr);
ctx.vr[instr.rd()] = crate::vmx::load_vector_left(mem, ea);
ctx.pc += 4;
}
PpcOpcode::lvlx128 | PpcOpcode::lvlxl128 => {
let ea = ea_indexed(ctx, instr);
ctx.vr[instr.vd128()] = crate::vmx::load_vector_left(mem, ea);
ctx.pc += 4;
}
PpcOpcode::lvrx | PpcOpcode::lvrxl => {
let ea = ea_indexed(ctx, instr);
ctx.vr[instr.rd()] = crate::vmx::load_vector_right(mem, ea);
ctx.pc += 4;
}
PpcOpcode::lvrx128 | PpcOpcode::lvrxl128 => {
let ea = ea_indexed(ctx, instr);
ctx.vr[instr.vd128()] = crate::vmx::load_vector_right(mem, ea);
ctx.pc += 4;
}
PpcOpcode::stvlx | PpcOpcode::stvlxl => {
let ea = ea_indexed(ctx, instr);
// PPCBUG-513: stvlx/stvlxl were missing invalidate_for_write.
// store_vector_left writes [ea, (ea & !0xF)+15]; in the worst case (ea & 0xF == 0)
// that is exactly 16 bytes all within the same 16-byte block, so ea+15 lands in the
// same 128-byte cache line. Two-call form is kept for defensive correctness.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let first_line = ea & !RESERVATION_MASK;
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
t.invalidate_for_write(first_line);
if last_line != first_line { t.invalidate_for_write(last_line); }
}
}
crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.rs()]);
ctx.pc += 4;
}
PpcOpcode::stvlx128 | PpcOpcode::stvlxl128 => {
let ea = ea_indexed(ctx, instr);
// PPCBUG-513: stvlx128/stvlxl128 were missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let first_line = ea & !RESERVATION_MASK;
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
t.invalidate_for_write(first_line);
if last_line != first_line { t.invalidate_for_write(last_line); }
}
}
crate::vmx::store_vector_left(mem, ea, ctx.vr[instr.vs128()]);
ctx.pc += 4;
}
PpcOpcode::stvrx | PpcOpcode::stvrxl => {
let ea = ea_indexed(ctx, instr);
// PPCBUG-514: stvrx/stvrxl were missing invalidate_for_write.
// store_vector_right writes [ea & !0xF, ea-1] (up to 15 bytes, all within a single
// 16-byte-aligned block). Two-call form is kept for defensive correctness.
// stvrx at shift==0 is a no-op; the guard fires unconditionally (cheap).
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let first_line = ea & !RESERVATION_MASK;
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
t.invalidate_for_write(first_line);
if last_line != first_line { t.invalidate_for_write(last_line); }
}
}
crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.rs()]);
ctx.pc += 4;
}
PpcOpcode::stvrx128 | PpcOpcode::stvrxl128 => {
let ea = ea_indexed(ctx, instr);
// PPCBUG-514: stvrx128/stvrxl128 were missing invalidate_for_write.
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let first_line = ea & !RESERVATION_MASK;
let last_line = ea.wrapping_add(15) & !RESERVATION_MASK;
t.invalidate_for_write(first_line);
if last_line != first_line { t.invalidate_for_write(last_line); }
}
}
crate::vmx::store_vector_right(mem, ea, ctx.vr[instr.vs128()]);
ctx.pc += 4;
}
// lvewx128 / stvewx128: VMX128 element-indexed 32-bit load/store.
// Like lvewx the whole 16 bytes at the aligned EA go into VD; the
// element-of-interest is implied by EA's low bits.
PpcOpcode::lvewx128 => {
let ea = ea_indexed(ctx, instr) & !0xF;
let mut bytes = [0u8; 16];
for i in 0..16 { bytes[i] = mem.read_u8(ea + i as u32); }
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_bytes(bytes);
ctx.pc += 4;
}
PpcOpcode::stvewx128 => {
// Mirror of stvewx: word-align EA, extract one 32-bit lane, write 4 bytes only.
// Previous code used & !0xF (16-byte) and wrote all 16 bytes, corrupting 12
// adjacent bytes on every execution (PPCBUG-510).
let ea_unaligned = ea_indexed(ctx, instr);
let ea = ea_unaligned & !0x3u32;
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
let slot = ((ea_unaligned & 0xF) >> 2) as usize;
let bytes = ctx.vr[instr.vs128()].as_bytes();
let w = ((bytes[slot * 4] as u32) << 24)
| ((bytes[slot * 4 + 1] as u32) << 16)
| ((bytes[slot * 4 + 2] as u32) << 8)
| (bytes[slot * 4 + 3] as u32);
mem.write_u32(ea, w);
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4a — VMX integer add/sub (modulo and saturating), mul, avg, sum
// ═════════════════════════════════════════════════════════════════
// -------- modulo add/sub (byte/halfword/word) --------
PpcOpcode::vaddubm => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 { r[i] = a[i].wrapping_add(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vsububm => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 { r[i] = a[i].wrapping_sub(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vadduhm => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = a[i].wrapping_add(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vsubuhm => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = a[i].wrapping_sub(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
// vadduwm / vsubuwm are implemented above (modulo word add/sub arms).
// -------- saturating add/sub (signed + unsigned) --------
PpcOpcode::vaddubs => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
let mut sat = false;
for i in 0..16 {
let (v, s) = crate::vmx::sat_add_u8(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vsububs => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16]; let mut sat = false;
for i in 0..16 {
let (v, s) = crate::vmx::sat_sub_u8(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vaddsbs => {
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
let mut r = [0i8; 16]; let mut sat = false;
for i in 0..16 {
let (v, s) = crate::vmx::sat_add_i8(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
ctx.pc += 4;
}
PpcOpcode::vsubsbs => {
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
let mut r = [0i8; 16]; let mut sat = false;
for i in 0..16 {
let (v, s) = crate::vmx::sat_sub_i8(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
ctx.pc += 4;
}
PpcOpcode::vadduhs => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8]; let mut sat = false;
for i in 0..8 {
let (v, s) = crate::vmx::sat_add_u16(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vsubuhs => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8]; let mut sat = false;
for i in 0..8 {
let (v, s) = crate::vmx::sat_sub_u16(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vaddshs => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let mut r = [0i16; 8]; let mut sat = false;
for i in 0..8 {
let (v, s) = crate::vmx::sat_add_i16(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vsubshs => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let mut r = [0i16; 8]; let mut sat = false;
for i in 0..8 {
let (v, s) = crate::vmx::sat_sub_i16(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vadduws => {
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4]; let mut sat = false;
for i in 0..4 {
let (v, s) = crate::vmx::sat_add_u32(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vsubuws => {
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4]; let mut sat = false;
for i in 0..4 {
let (v, s) = crate::vmx::sat_sub_u32(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vaddsws => {
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let mut r = [0i32; 4]; let mut sat = false;
for i in 0..4 {
let (v, s) = crate::vmx::sat_add_i32(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
PpcOpcode::vsubsws => {
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let mut r = [0i32; 4]; let mut sat = false;
for i in 0..4 {
let (v, s) = crate::vmx::sat_sub_i32(a[i], b[i]);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
// -------- vaddcuw / vsubcuw: per-lane carry / borrow out --------
PpcOpcode::vaddcuw => {
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 {
let (_, c) = a[i].overflowing_add(b[i]);
r[i] = if c { 1 } else { 0 };
}
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vsubcuw => {
// "Subtract Carryout": r = 1 if a >= b (no borrow), 0 otherwise.
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] >= b[i] { 1 } else { 0 }; }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
// -------- averages --------
PpcOpcode::vavgub => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 { r[i] = crate::vmx::avg_u8(a[i], b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vavgsb => {
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
let mut r = [0i8; 16];
for i in 0..16 { r[i] = crate::vmx::avg_i8(a[i], b[i]); }
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
ctx.pc += 4;
}
PpcOpcode::vavguh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = crate::vmx::avg_u16(a[i], b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vavgsh => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let mut r = [0i16; 8];
for i in 0..8 { r[i] = crate::vmx::avg_i16(a[i], b[i]); }
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vavguw => {
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = crate::vmx::avg_u32(a[i], b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vavgsw => {
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let mut r = [0i32; 4];
for i in 0..4 { r[i] = crate::vmx::avg_i32(a[i], b[i]); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
// -------- multiplies (even / odd lanes — see §5 hazard note) --------
// vmuleub: even u8 lanes (BE index 0,2,4,...,14) → u16 lanes.
PpcOpcode::vmuleub => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = a[2 * i] as u16 * b[2 * i] as u16; }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vmuloub => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = a[2 * i + 1] as u16 * b[2 * i + 1] as u16; }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vmulesb => {
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
let mut r = [0i16; 8];
for i in 0..8 { r[i] = a[2 * i] as i16 * b[2 * i] as i16; }
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vmulosb => {
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
let mut r = [0i16; 8];
for i in 0..8 { r[i] = a[2 * i + 1] as i16 * b[2 * i + 1] as i16; }
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vmuleuh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = a[2 * i] as u32 * b[2 * i] as u32; }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmulouh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = a[2 * i + 1] as u32 * b[2 * i + 1] as u32; }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmulesh => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let mut r = [0i32; 4];
for i in 0..4 { r[i] = a[2 * i] as i32 * b[2 * i] as i32; }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
PpcOpcode::vmulosh => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let mut r = [0i32; 4];
for i in 0..4 { r[i] = a[2 * i + 1] as i32 * b[2 * i + 1] as i32; }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
// -------- multiply-add halfword (saturating) --------
PpcOpcode::vmhaddshs => {
// vD[i] = sat_i16((vA[i] * vB[i]) >> 15 + vC[i])
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let c = crate::vmx::as_i16x8(ctx.vr[instr.rc()]);
let mut r = [0i16; 8]; let mut sat = false;
for i in 0..8 {
let prod = (a[i] as i32 * b[i] as i32) >> 15;
let (v, s) = crate::vmx::sat_i32_to_i16(prod + c[i] as i32);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vmhraddshs => {
// Rounded multiply-add: (vA[i]*vB[i] + 0x4000) >> 15 + vC[i], saturating.
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let c = crate::vmx::as_i16x8(ctx.vr[instr.rc()]);
let mut r = [0i16; 8]; let mut sat = false;
for i in 0..8 {
let prod = (a[i] as i32 * b[i] as i32 + 0x4000) >> 15;
let (v, s) = crate::vmx::sat_i32_to_i16(prod + c[i] as i32);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vmladduhm => {
// Multiply-low add (modulo): vD[i] = u16(vA[i] * vB[i] + vC[i]).
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let c = ctx.vr[instr.rc()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 {
r[i] = a[i].wrapping_mul(b[i]).wrapping_add(c[i]);
}
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
// -------- VMX sum-of-products --------
// vmsumubm: vD[i:u32] = sum over j in [0..4] of vA[4i+j:u8] * vB[4i+j:u8] + vC[i].
PpcOpcode::vmsumubm => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let c = ctx.vr[instr.rc()].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 {
let mut s = c[i];
for j in 0..4 {
s = s.wrapping_add(a[4*i+j] as u32 * b[4*i+j] as u32);
}
r[i] = s;
}
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmsummbm => {
// signed bytes × unsigned bytes, signed accumulator
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let b = ctx.vr[instr.rb()].as_bytes();
let c = crate::vmx::as_i32x4(ctx.vr[instr.rc()]);
let mut r = [0i32; 4];
for i in 0..4 {
let mut s = c[i];
for j in 0..4 {
s = s.wrapping_add(a[4*i+j] as i32 * b[4*i+j] as i32);
}
r[i] = s;
}
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
PpcOpcode::vmsumuhm => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let c = ctx.vr[instr.rc()].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 {
let s = (a[2*i] as u32 * b[2*i] as u32)
.wrapping_add(a[2*i+1] as u32 * b[2*i+1] as u32)
.wrapping_add(c[i]);
r[i] = s;
}
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmsumuhs => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let c = ctx.vr[instr.rc()].as_u32x4();
let mut r = [0u32; 4]; let mut sat = false;
for i in 0..4 {
let s = (a[2*i] as u64 * b[2*i] as u64)
+ (a[2*i+1] as u64 * b[2*i+1] as u64)
+ c[i] as u64;
let (v, overflow) = if s > u32::MAX as u64 { (u32::MAX, true) } else { (s as u32, false) };
r[i] = v; sat |= overflow;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmsumshm => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let c = crate::vmx::as_i32x4(ctx.vr[instr.rc()]);
let mut r = [0i32; 4];
for i in 0..4 {
let s = (a[2*i] as i32 * b[2*i] as i32)
.wrapping_add(a[2*i+1] as i32 * b[2*i+1] as i32)
.wrapping_add(c[i]);
r[i] = s;
}
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
PpcOpcode::vmsumshs => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let c = crate::vmx::as_i32x4(ctx.vr[instr.rc()]);
let mut r = [0i32; 4]; let mut sat = false;
for i in 0..4 {
// Running-sum saturation: accumulate in i64, clamp once at end.
let s = (a[2*i] as i64 * b[2*i] as i64)
+ (a[2*i+1] as i64 * b[2*i+1] as i64)
+ c[i] as i64;
let (v, o) = crate::vmx::sat_i64_to_i32(s);
r[i] = v; sat |= o;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
// -------- VMX sum-across --------
PpcOpcode::vsumsws => {
// vD[3] = sat_i32(vC[3] + sum over i in 0..4 of vA[i])
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let s = a.iter().map(|&x| x as i64).sum::<i64>() + c[3] as i64;
let (v, sat) = crate::vmx::sat_i64_to_i32(s);
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4([0, 0, 0, v]);
ctx.pc += 4;
}
PpcOpcode::vsum2sws => {
// Two 2-word partial sums at lanes 1 and 3.
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let s0 = a[0] as i64 + a[1] as i64 + c[1] as i64;
let s1 = a[2] as i64 + a[3] as i64 + c[3] as i64;
let (v0, sat0) = crate::vmx::sat_i64_to_i32(s0);
let (v1, sat1) = crate::vmx::sat_i64_to_i32(s1);
if sat0 | sat1 { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4([0, v0, 0, v1]);
ctx.pc += 4;
}
PpcOpcode::vsum4sbs => {
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let mut r = [0i32; 4]; let mut sat = false;
for i in 0..4 {
let s = a[4*i] as i64 + a[4*i+1] as i64 + a[4*i+2] as i64 + a[4*i+3] as i64 + c[i] as i64;
let (v, o) = crate::vmx::sat_i64_to_i32(s);
r[i] = v; sat |= o;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
PpcOpcode::vsum4ubs => {
let a = ctx.vr[instr.ra()].as_bytes();
let c = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4]; let mut sat = false;
for i in 0..4 {
let s = a[4*i] as u64 + a[4*i+1] as u64 + a[4*i+2] as u64 + a[4*i+3] as u64 + c[i] as u64;
let (v, o) = if s > u32::MAX as u64 { (u32::MAX, true) } else { (s as u32, false) };
r[i] = v; sat |= o;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vsum4shs => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let c = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let mut r = [0i32; 4]; let mut sat = false;
for i in 0..4 {
let s = a[2*i] as i64 + a[2*i+1] as i64 + c[i] as i64;
let (v, o) = crate::vmx::sat_i64_to_i32(s);
r[i] = v; sat |= o;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4c — VMX integer compares (all set 0xFF/0xFFFF/0xFFFFFFFF per lane)
// ═════════════════════════════════════════════════════════════════
PpcOpcode::vcmpequb => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 { r[i] = if a[i] == b[i] { 0xFF } else { 0 }; }
let v = xenia_types::Vec128::from_bytes(r);
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
ctx.vr[instr.rd()] = v;
ctx.pc += 4;
}
PpcOpcode::vcmpequh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = if a[i] == b[i] { 0xFFFF } else { 0 }; }
let v = xenia_types::Vec128::from_u16x8_array(r);
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
ctx.vr[instr.rd()] = v;
ctx.pc += 4;
}
PpcOpcode::vcmpgtub => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; }
let v = xenia_types::Vec128::from_bytes(r);
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
ctx.vr[instr.rd()] = v;
ctx.pc += 4;
}
PpcOpcode::vcmpgtsb => {
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
let mut r = [0u8; 16];
for i in 0..16 { r[i] = if a[i] > b[i] { 0xFF } else { 0 }; }
let v = xenia_types::Vec128::from_bytes(r);
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
ctx.vr[instr.rd()] = v;
ctx.pc += 4;
}
PpcOpcode::vcmpgtuh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; }
let v = xenia_types::Vec128::from_u16x8_array(r);
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
ctx.vr[instr.rd()] = v;
ctx.pc += 4;
}
PpcOpcode::vcmpgtsh => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let mut r = [0u16; 8];
for i in 0..8 { r[i] = if a[i] > b[i] { 0xFFFF } else { 0 }; }
let v = xenia_types::Vec128::from_u16x8_array(r);
if instr.vc_rc_bit() {
let (t, f) = crate::vmx::cr6_flags_from_mask(v);
ctx.cr[6] = crate::context::CrField { lt: t, gt: false, eq: f, so: false };
}
ctx.vr[instr.rd()] = v;
ctx.pc += 4;
}
PpcOpcode::vcmpgtuw => {
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; }
let v = xenia_types::Vec128::from_u32x4_array(r);
if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); }
ctx.vr[instr.rd()] = v;
ctx.pc += 4;
}
PpcOpcode::vcmpgtsw => {
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let mut r = [0u32; 4];
for i in 0..4 { r[i] = if a[i] > b[i] { 0xFFFFFFFF } else { 0 }; }
let v = xenia_types::Vec128::from_u32x4_array(r);
if instr.vc_rc_bit() { update_cr6_from_vmask(&r, ctx); }
ctx.vr[instr.rd()] = v;
ctx.pc += 4;
}
// vcmpbfp(128): set upper/lower nibbles per lane based on bounds test.
PpcOpcode::vcmpbfp | PpcOpcode::vcmpbfp128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vcmpbfp128);
let (ra, rb, rd) = if is_128 {
(instr.va128(), instr.vb128(), instr.vd128())
} else {
(instr.ra(), instr.rb(), instr.rd())
};
let a = ctx.vr[ra].as_f32x4();
let b = ctx.vr[rb].as_f32x4();
let mut r = [0u32; 4];
let mut any_out = false;
for i in 0..4 {
let mut lane: u32 = 0;
if a[i].is_nan() || b[i].is_nan() || a[i] > b[i] { lane |= 0x8000_0000; any_out = true; }
if a[i].is_nan() || b[i].is_nan() || a[i] < -b[i] { lane |= 0x4000_0000; any_out = true; }
r[i] = lane;
}
let rc = if is_128 { instr.vx128r_rc_bit() } else { instr.vc_rc_bit() };
if rc {
ctx.cr[6] = crate::context::CrField {
lt: false, gt: false, eq: !any_out, so: false,
};
}
ctx.vr[rd] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4d — VMX shifts and rotates
// ═════════════════════════════════════════════════════════════════
PpcOpcode::vslb => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 { r[i] = a[i] << (b[i] & 7); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vsrb => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 { r[i] = a[i] >> (b[i] & 7); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vsrab => {
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0i8; 16];
for i in 0..16 { r[i] = a[i] >> (b[i] & 7); }
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
ctx.pc += 4;
}
PpcOpcode::vrlb => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 { r[i] = a[i].rotate_left((b[i] & 7) as u32); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vslh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = a[i] << (b[i] & 0xF); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vsrh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = a[i] >> (b[i] & 0xF); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vsrah => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0i16; 8];
for i in 0..8 { r[i] = a[i] >> (b[i] & 0xF); }
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vrlh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = a[i].rotate_left((b[i] & 0xF) as u32); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
// vslw / vsrw / vsraw / vrlw (word shifts) are implemented above via
// vmx_reg_triple — skip here.
// Full 128-bit bit shifts (vsl/vsr): shift by the low 3 bits of vB[15].
PpcOpcode::vsl => {
let a = u128::from_be_bytes(ctx.vr[instr.ra()].as_bytes());
let shift = (ctx.vr[instr.rb()].as_bytes()[15] & 7) as u32;
let r = if shift == 0 { a } else { a << shift };
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r.to_be_bytes());
ctx.pc += 4;
}
PpcOpcode::vsr => {
let a = u128::from_be_bytes(ctx.vr[instr.ra()].as_bytes());
let shift = (ctx.vr[instr.rb()].as_bytes()[15] & 7) as u32;
let r = if shift == 0 { a } else { a >> shift };
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r.to_be_bytes());
ctx.pc += 4;
}
// vslo/vsro: 128-bit octet (byte) shift. vB[15] & 0x78 gives bit count / 8 * 8.
PpcOpcode::vslo | PpcOpcode::vslo128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vslo128);
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
else { (instr.ra(), instr.rb(), instr.rd()) };
let a = u128::from_be_bytes(ctx.vr[ra].as_bytes());
let nbytes = ((ctx.vr[rb].as_bytes()[15] >> 3) & 0xF) as u32;
let r = if nbytes == 0 { a } else { a << (nbytes * 8) };
ctx.vr[rd] = xenia_types::Vec128::from_bytes(r.to_be_bytes());
ctx.pc += 4;
}
PpcOpcode::vsro | PpcOpcode::vsro128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vsro128);
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
else { (instr.ra(), instr.rb(), instr.rd()) };
let a = u128::from_be_bytes(ctx.vr[ra].as_bytes());
let nbytes = ((ctx.vr[rb].as_bytes()[15] >> 3) & 0xF) as u32;
let r = if nbytes == 0 { a } else { a >> (nbytes * 8) };
ctx.vr[rd] = xenia_types::Vec128::from_bytes(r.to_be_bytes());
ctx.pc += 4;
}
// vrlimi128: rotate-left-immediate then partial-merge into vD.
// Field layout (from canary ppc_decode_data.cc VX128_4):
// imm = bits(22..=23,28..=29) for shift, mask = bits(24..=27)
// Simplified semantics: r = vB rotated left by `shift` words, merged
// into vD using a per-word `mask` (mask bit N == 1 ⇒ use vD[N], else
// use rotated[N]). Titles generally use mask=0xF (copy-all) which
// makes this behave like a plain word rotate.
PpcOpcode::vrlimi128 => {
let shift = instr.vx128_4_z() as usize;
let mask = instr.vx128_4_imm();
let b = ctx.vr[instr.vb128()].as_u32x4();
let d = ctx.vr[instr.vd128()].as_u32x4();
let rot = [b[shift % 4], b[(shift + 1) % 4], b[(shift + 2) % 4], b[(shift + 3) % 4]];
let mut r = [0u32; 4];
for i in 0..4 {
// mask bit 3 corresponds to word 0 (BE-first). Use rot when
// the corresponding mask bit is set.
let use_rot = (mask >> (3 - i)) & 1 == 1;
r[i] = if use_rot { rot[i] } else { d[i] };
}
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4e — VMX merge (interleave high / low halves)
// ═════════════════════════════════════════════════════════════════
PpcOpcode::vmrghb => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..8 { r[2*i] = a[i]; r[2*i+1] = b[i]; }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vmrglb => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..8 { r[2*i] = a[8+i]; r[2*i+1] = b[8+i]; }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vmrghh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..4 { r[2*i] = a[i]; r[2*i+1] = b[i]; }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vmrglh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..4 { r[2*i] = a[4+i]; r[2*i+1] = b[4+i]; }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4f — VMX pack / unpack (saturating and modulo + D3D + 5-6-5)
// ═════════════════════════════════════════════════════════════════
// ---- Pack modulo (truncate) ----
PpcOpcode::vpkuhum | PpcOpcode::vpkuhum128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vpkuhum128);
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
else { (instr.ra(), instr.rb(), instr.rd()) };
let a = ctx.vr[ra].as_u16x8();
let b = ctx.vr[rb].as_u16x8();
let mut r = [0u8; 16];
for i in 0..8 { r[i] = a[i] as u8; }
for i in 0..8 { r[8 + i] = b[i] as u8; }
ctx.vr[rd] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vpkuwum | PpcOpcode::vpkuwum128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vpkuwum128);
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
else { (instr.ra(), instr.rb(), instr.rd()) };
let a = ctx.vr[ra].as_u32x4();
let b = ctx.vr[rb].as_u32x4();
let mut r = [0u16; 8];
for i in 0..4 { r[i] = a[i] as u16; }
for i in 0..4 { r[4 + i] = b[i] as u16; }
ctx.vr[rd] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
// ---- Pack with saturation ----
PpcOpcode::vpkuhus | PpcOpcode::vpkuhus128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vpkuhus128);
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
else { (instr.ra(), instr.rb(), instr.rd()) };
let a = ctx.vr[ra].as_u16x8();
let b = ctx.vr[rb].as_u16x8();
let mut r = [0u8; 16]; let mut sat = false;
for i in 0..8 { let (v, s) = crate::vmx::sat_u16_to_u8(a[i]); r[i] = v; sat |= s; }
for i in 0..8 { let (v, s) = crate::vmx::sat_u16_to_u8(b[i]); r[8 + i] = v; sat |= s; }
if sat { ctx.set_vscr_sat(true); }
ctx.vr[rd] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vpkshus | PpcOpcode::vpkshus128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vpkshus128);
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
else { (instr.ra(), instr.rb(), instr.rd()) };
let a = crate::vmx::as_i16x8(ctx.vr[ra]);
let b = crate::vmx::as_i16x8(ctx.vr[rb]);
let mut r = [0u8; 16]; let mut sat = false;
for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_u8(a[i]); r[i] = v; sat |= s; }
for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_u8(b[i]); r[8 + i] = v; sat |= s; }
if sat { ctx.set_vscr_sat(true); }
ctx.vr[rd] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vpkshss | PpcOpcode::vpkshss128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vpkshss128);
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
else { (instr.ra(), instr.rb(), instr.rd()) };
let a = crate::vmx::as_i16x8(ctx.vr[ra]);
let b = crate::vmx::as_i16x8(ctx.vr[rb]);
let mut r = [0i8; 16]; let mut sat = false;
for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_i8(a[i]); r[i] = v; sat |= s; }
for i in 0..8 { let (v, s) = crate::vmx::sat_i16_to_i8(b[i]); r[8 + i] = v; sat |= s; }
if sat { ctx.set_vscr_sat(true); }
ctx.vr[rd] = crate::vmx::from_i8x16(r);
ctx.pc += 4;
}
PpcOpcode::vpkuwus | PpcOpcode::vpkuwus128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vpkuwus128);
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
else { (instr.ra(), instr.rb(), instr.rd()) };
let a = ctx.vr[ra].as_u32x4();
let b = ctx.vr[rb].as_u32x4();
let mut r = [0u16; 8]; let mut sat = false;
for i in 0..4 { let (v, s) = crate::vmx::sat_u32_to_u16(a[i]); r[i] = v; sat |= s; }
for i in 0..4 { let (v, s) = crate::vmx::sat_u32_to_u16(b[i]); r[4 + i] = v; sat |= s; }
if sat { ctx.set_vscr_sat(true); }
ctx.vr[rd] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vpkswus | PpcOpcode::vpkswus128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vpkswus128);
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
else { (instr.ra(), instr.rb(), instr.rd()) };
let a = crate::vmx::as_i32x4(ctx.vr[ra]);
let b = crate::vmx::as_i32x4(ctx.vr[rb]);
let mut r = [0u16; 8]; let mut sat = false;
for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_u16(a[i]); r[i] = v; sat |= s; }
for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_u16(b[i]); r[4 + i] = v; sat |= s; }
if sat { ctx.set_vscr_sat(true); }
ctx.vr[rd] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vpkswss | PpcOpcode::vpkswss128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vpkswss128);
let (ra, rb, rd) = if is_128 { (instr.va128(), instr.vb128(), instr.vd128()) }
else { (instr.ra(), instr.rb(), instr.rd()) };
let a = crate::vmx::as_i32x4(ctx.vr[ra]);
let b = crate::vmx::as_i32x4(ctx.vr[rb]);
let mut r = [0i16; 8]; let mut sat = false;
for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_i16(a[i]); r[i] = v; sat |= s; }
for i in 0..4 { let (v, s) = crate::vmx::sat_i32_to_i16(b[i]); r[4 + i] = v; sat |= s; }
if sat { ctx.set_vscr_sat(true); }
ctx.vr[rd] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
// vpkpx: pack two u32 vectors into one u16 (5-5-5 pixel) vector.
PpcOpcode::vpkpx => {
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u16; 8];
for i in 0..4 { r[i] = crate::vmx::pack_pixel_555(a[i]); }
for i in 0..4 { r[4 + i] = crate::vmx::pack_pixel_555(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
// ---- Unpack (sign-extend) ----
PpcOpcode::vupkhsb | PpcOpcode::vupkhsb128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vupkhsb128);
let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) }
else { (instr.rb(), instr.rd()) };
let b = crate::vmx::as_i8x16(ctx.vr[rb]);
let mut r = [0i16; 8];
for i in 0..8 { r[i] = b[i] as i16; }
ctx.vr[rd] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vupklsb | PpcOpcode::vupklsb128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vupklsb128);
let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) }
else { (instr.rb(), instr.rd()) };
let b = crate::vmx::as_i8x16(ctx.vr[rb]);
let mut r = [0i16; 8];
for i in 0..8 { r[i] = b[8 + i] as i16; }
ctx.vr[rd] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vupkhsh => {
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let mut r = [0i32; 4];
for i in 0..4 { r[i] = b[i] as i32; }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
PpcOpcode::vupklsh => {
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let mut r = [0i32; 4];
for i in 0..4 { r[i] = b[4 + i] as i32; }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
PpcOpcode::vupkhpx => {
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = crate::vmx::unpack_pixel_555(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vupklpx => {
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = crate::vmx::unpack_pixel_555(b[4 + i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
// ---- D3D pack / unpack (VMX128-only) ----
//
// First-Pixels M3: fixed immediate extraction + added pack types
// 1-6. The prior `(instr.raw >> 6) & 0x7` was LSB-numbered (wrong
// position) and masked to only 3 bits. Canary extracts from the
// VX128_3/4 `IMM` field at PPC bits 16-22 (MSB) and does
// `type = IMM >> 2` to pick up the 5-bit type selector — the low
// 2 bits (`pack`) select output-slot layout for `vpkd3d128`.
PpcOpcode::vpkd3d128 => {
use crate::vmx::D3dPackType;
let uimm = crate::decoder::extract_vx128_uimm5(instr.raw);
let pack = (uimm & 3) as usize;
let shift = instr.vx128_4_z() as usize;
let ty = D3dPackType::from_immediate(uimm >> 2);
let src = ctx.vr[instr.vb128()];
let out = match ty {
D3dPackType::D3dColor => crate::vmx::pack_d3dcolor(src),
D3dPackType::NormShort2 => crate::vmx::pack_normshort2(src),
D3dPackType::NormPacked32 => crate::vmx::pack_normpacked32(src),
D3dPackType::Float16_2 => crate::vmx::pack_float16_2(src),
D3dPackType::NormShort4 => crate::vmx::pack_normshort4(src),
D3dPackType::Float16_4 => crate::vmx::pack_float16_4(src),
D3dPackType::NormPacked64 => crate::vmx::pack_normpacked64(src),
D3dPackType::Other(t) => {
tracing::warn!(
raw = format_args!("{:#010x}", instr.raw),
uimm,
ty = t,
"vpkd3d128: unhandled pack type at {:#010x}",
ctx.pc,
);
src
}
};
// Post-pack permutation: merge packed `out` into previous `vd`
// per canary ppc_emit_altivec.cc:2126-2188 MakePermuteMask tables.
// MakePermuteMask(r0,l0, r1,l1, r2,l2, r3,l3): result[i] = if ri==0 { prev[li] } else { out[li] }
let result = if pack == 0 {
out
} else {
// (source_reg, lane): 0=prev vd, 1=packed out
const PERM: [[[(u8, u8); 4]; 4]; 3] = [
// pack=1 (VPACK_32): places out[3] at lane (3-shift)
[[(0,0),(0,1),(0,2),(1,3)], [(0,0),(0,1),(1,3),(0,3)],
[(0,0),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]],
// pack=2 (64-bit): places out[2..3] at lanes (2-shift)..(3-shift)
[[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)],
[(1,2),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]],
// pack=3 (64-bit): same as pack=2 except shift=3 selects out[2] at lane 3
[[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)],
[(1,2),(1,3),(0,2),(0,3)], [(0,0),(0,1),(0,2),(1,2)]],
];
let prev = ctx.vr[instr.vd128()];
let pw = prev.as_u32x4();
let ow = out.as_u32x4();
let sel = PERM[pack - 1][shift];
xenia_types::Vec128::from_u32x4_array([
if sel[0].0 == 0 { pw[sel[0].1 as usize] } else { ow[sel[0].1 as usize] },
if sel[1].0 == 0 { pw[sel[1].1 as usize] } else { ow[sel[1].1 as usize] },
if sel[2].0 == 0 { pw[sel[2].1 as usize] } else { ow[sel[2].1 as usize] },
if sel[3].0 == 0 { pw[sel[3].1 as usize] } else { ow[sel[3].1 as usize] },
])
};
ctx.vr[instr.vd128()] = result;
ctx.pc += 4;
}
PpcOpcode::vupkd3d128 => {
use crate::vmx::D3dPackType;
let uimm = crate::decoder::extract_vx128_uimm5(instr.raw);
let ty = D3dPackType::from_immediate(uimm >> 2);
let src = ctx.vr[instr.vb128()];
let out = match ty {
D3dPackType::D3dColor => crate::vmx::unpack_d3dcolor(src),
D3dPackType::NormShort2 => crate::vmx::unpack_normshort2(src),
D3dPackType::NormPacked32 => crate::vmx::unpack_normpacked32(src),
D3dPackType::Float16_2 => crate::vmx::unpack_float16_2(src),
D3dPackType::NormShort4 => crate::vmx::unpack_normshort4(src),
D3dPackType::Float16_4 => crate::vmx::unpack_float16_4(src),
D3dPackType::NormPacked64 => crate::vmx::unpack_normpacked64(src),
D3dPackType::Other(t) => {
tracing::warn!(
raw = format_args!("{:#010x}", instr.raw),
uimm,
ty = t,
"vupkd3d128: unhandled pack type at {:#010x}",
ctx.pc,
);
src
}
};
ctx.vr[instr.vd128()] = out;
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4g — VMX convert (float ↔ fixed-point)
// ═════════════════════════════════════════════════════════════════
// vctsxs / vctuxs: f32 → i32/u32, scaled by 2^uimm, saturating.
PpcOpcode::vctsxs => {
let uimm = (instr.raw >> 16) & 0x1F;
let b = ctx.vr[instr.rb()].as_f32x4();
let mut r = [0i32; 4]; let mut sat = false;
for i in 0..4 {
let (v, s) = crate::vmx::cvt_f32_to_i32_sat(b[i], uimm);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
PpcOpcode::vctuxs => {
let uimm = (instr.raw >> 16) & 0x1F;
let b = ctx.vr[instr.rb()].as_f32x4();
let mut r = [0u32; 4]; let mut sat = false;
for i in 0..4 {
let (v, s) = crate::vmx::cvt_f32_to_u32_sat(b[i], uimm);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
// vcfsx / vcfux: i32/u32 → f32, scaled by 2^-uimm.
PpcOpcode::vcfsx => {
let uimm = (instr.raw >> 16) & 0x1F;
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let mut r = [0f32; 4];
for i in 0..4 { r[i] = crate::vmx::cvt_i32_to_f32(b[i], uimm); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vcfux => {
let uimm = (instr.raw >> 16) & 0x1F;
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = crate::vmx::cvt_u32_to_f32(b[i], uimm); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
// VMX128 convert variants. uimm lives in bits 16-20 of the encoded form.
PpcOpcode::vcfpsxws128 => {
let uimm = (instr.raw >> 16) & 0x1F;
let b = ctx.vr[instr.vb128()].as_f32x4();
let mut r = [0i32; 4]; let mut sat = false;
for i in 0..4 {
let (v, s) = crate::vmx::cvt_f32_to_i32_sat(b[i], uimm);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.vd128()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
PpcOpcode::vcfpuxws128 => {
let uimm = (instr.raw >> 16) & 0x1F;
let b = ctx.vr[instr.vb128()].as_f32x4();
let mut r = [0u32; 4]; let mut sat = false;
for i in 0..4 {
let (v, s) = crate::vmx::cvt_f32_to_u32_sat(b[i], uimm);
r[i] = v; sat |= s;
}
if sat { ctx.set_vscr_sat(true); }
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vcsxwfp128 => {
let uimm = (instr.raw >> 16) & 0x1F;
let b = crate::vmx::as_i32x4(ctx.vr[instr.vb128()]);
let mut r = [0f32; 4];
for i in 0..4 { r[i] = crate::vmx::cvt_i32_to_f32(b[i], uimm); }
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vcuxwfp128 => {
let uimm = (instr.raw >> 16) & 0x1F;
let b = ctx.vr[instr.vb128()].as_u32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = crate::vmx::cvt_u32_to_f32(b[i], uimm); }
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4h — VMX vector FPU (exp / log)
// ═════════════════════════════════════════════════════════════════
PpcOpcode::vexptefp | PpcOpcode::vexptefp128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vexptefp128);
let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) }
else { (instr.rb(), instr.rd()) };
let b = ctx.vr[rb].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = b[i].exp2(); }
ctx.vr[rd] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vlogefp | PpcOpcode::vlogefp128 => {
let is_128 = matches!(instr.opcode, PpcOpcode::vlogefp128);
let (rb, rd) = if is_128 { (instr.vb128(), instr.vd128()) }
else { (instr.rb(), instr.rd()) };
let b = ctx.vr[rb].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 { r[i] = b[i].log2(); }
ctx.vr[rd] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4i — VMX integer max / min
// ═════════════════════════════════════════════════════════════════
PpcOpcode::vmaxub => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 { r[i] = a[i].max(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vminub => {
let a = ctx.vr[instr.ra()].as_bytes();
let b = ctx.vr[instr.rb()].as_bytes();
let mut r = [0u8; 16];
for i in 0..16 { r[i] = a[i].min(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_bytes(r);
ctx.pc += 4;
}
PpcOpcode::vmaxsb => {
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
let mut r = [0i8; 16];
for i in 0..16 { r[i] = a[i].max(b[i]); }
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
ctx.pc += 4;
}
PpcOpcode::vminsb => {
let a = crate::vmx::as_i8x16(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i8x16(ctx.vr[instr.rb()]);
let mut r = [0i8; 16];
for i in 0..16 { r[i] = a[i].min(b[i]); }
ctx.vr[instr.rd()] = crate::vmx::from_i8x16(r);
ctx.pc += 4;
}
PpcOpcode::vmaxuh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = a[i].max(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vminuh => {
let a = ctx.vr[instr.ra()].as_u16x8();
let b = ctx.vr[instr.rb()].as_u16x8();
let mut r = [0u16; 8];
for i in 0..8 { r[i] = a[i].min(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u16x8_array(r);
ctx.pc += 4;
}
PpcOpcode::vmaxsh => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let mut r = [0i16; 8];
for i in 0..8 { r[i] = a[i].max(b[i]); }
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vminsh => {
let a = crate::vmx::as_i16x8(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i16x8(ctx.vr[instr.rb()]);
let mut r = [0i16; 8];
for i in 0..8 { r[i] = a[i].min(b[i]); }
ctx.vr[instr.rd()] = crate::vmx::from_i16x8(r);
ctx.pc += 4;
}
PpcOpcode::vmaxuw => {
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = a[i].max(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vminuw => {
let a = ctx.vr[instr.ra()].as_u32x4();
let b = ctx.vr[instr.rb()].as_u32x4();
let mut r = [0u32; 4];
for i in 0..4 { r[i] = a[i].min(b[i]); }
ctx.vr[instr.rd()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
PpcOpcode::vmaxsw => {
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let mut r = [0i32; 4];
for i in 0..4 { r[i] = a[i].max(b[i]); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
PpcOpcode::vminsw => {
let a = crate::vmx::as_i32x4(ctx.vr[instr.ra()]);
let b = crate::vmx::as_i32x4(ctx.vr[instr.rb()]);
let mut r = [0i32; 4];
for i in 0..4 { r[i] = a[i].min(b[i]); }
ctx.vr[instr.rd()] = crate::vmx::from_i32x4(r);
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4j — VMX128 FMA / permute
// ═════════════════════════════════════════════════════════════════
// vmaddcfp128: ISA (VD) <- (VA × VD) + VB — same operation as vmaddfp128
PpcOpcode::vmaddcfp128 => {
// ISA: (VD) <- (VA × VD) + VB. Canary InstrEmit_vmaddcfp128 (cc:819): MulAdd(VA, VD, VB).
// Previous code computed di.mul_add(bi, ai) = VD×VB+VA — both operands wrong
// (PPCBUG-425). Fix: ai.mul_add(di, bi) = VA×VD+VB.
let a = ctx.vr[instr.va128()].as_f32x4();
let b = ctx.vr[instr.vb128()].as_f32x4();
let d = ctx.vr[instr.vd128()].as_f32x4();
let mut r = [0f32; 4];
for i in 0..4 {
let ai = vmx::flush_denorm(a[i]);
let bi = vmx::flush_denorm(b[i]);
let di = vmx::flush_denorm(d[i]);
// PPCBUG-437: flush subnormal output too.
r[i] = vmx::flush_denorm(ai.mul_add(di, bi));
}
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4_array(r);
ctx.pc += 4;
}
// vmsum3fp128: horizontal sum of (vA * vB) over lanes 0..3, broadcast to all 4 output lanes.
// Canary `InstrEmit_vmsum3fp128` flushes the *output* denormal
// unconditionally (not the inputs) — see ppc_emit_altivec.cc:1067-1075.
PpcOpcode::vmsum3fp128 => {
// PPCBUG-436: flush per-product intermediates (not just the final sum).
let a = ctx.vr[instr.va128()].as_f32x4();
let b = ctx.vr[instr.vb128()].as_f32x4();
let p0 = vmx::flush_denorm(a[0] * b[0]);
let p1 = vmx::flush_denorm(a[1] * b[1]);
let p2 = vmx::flush_denorm(a[2] * b[2]);
let s = vmx::flush_denorm(p0 + p1 + p2);
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s);
ctx.pc += 4;
}
PpcOpcode::vmsum4fp128 => {
// PPCBUG-436.
let a = ctx.vr[instr.va128()].as_f32x4();
let b = ctx.vr[instr.vb128()].as_f32x4();
let p0 = vmx::flush_denorm(a[0] * b[0]);
let p1 = vmx::flush_denorm(a[1] * b[1]);
let p2 = vmx::flush_denorm(a[2] * b[2]);
let p3 = vmx::flush_denorm(a[3] * b[3]);
let s = vmx::flush_denorm(p0 + p1 + p2 + p3);
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_f32x4(s, s, s, s);
ctx.pc += 4;
}
// vpermwi128: permute words of vB using an 8-bit immediate (2 bits per output lane).
PpcOpcode::vpermwi128 => {
let imm = instr.vx128_p_perm();
let b = ctx.vr[instr.vb128()].as_u32x4();
let mut r = [0u32; 4];
// Output lane i ← b[(imm >> (2 * (3-i))) & 3]
for i in 0..4 {
let sel = ((imm >> (2 * (3 - i))) & 3) as usize;
r[i] = b[sel];
}
ctx.vr[instr.vd128()] = xenia_types::Vec128::from_u32x4_array(r);
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4k — Scalar reservation / byte-reverse (doubleword)
// ═════════════════════════════════════════════════════════════════
// M3.7 — same table-vs-legacy split as lwarx/stwcx.
// PPCBUG-108: ldarx + stdcx. have the same cross-thread atomicity
// limitation as lwarx/stwcx. in the legacy per-ctx fallback path.
// See the lwarx block comment for the full explanation. The M3
// scheduler must enable `ReservationTable` before spawning a second
// host thread. stdcx. carries the debug_assert (see below).
PpcOpcode::ldarx => {
let ea = ea_indexed(ctx, instr);
let val = mem.read_u64(ea);
ctx.gpr[instr.rd()] = val;
ctx.reserved_line = ea & !RESERVATION_MASK;
ctx.reserved_val = val;
ctx.has_reservation = true;
ctx.reservation_width = 8; // PPCBUG-151: doubleword reservation
if let Some(t) = &ctx.reservation_table {
if t.is_enabled() {
ctx.reserved_generation = t.reserve(ea, ctx.hw_id);
}
}
ctx.pc += 4;
}
// PPCBUG-108: see ldarx comment above. stdcx. legacy path cannot observe
// cross-thread reservation invalidations; only safe in lockstep mode.
PpcOpcode::stdcx => {
let ea = ea_indexed(ctx, instr);
let line = ea & !RESERVATION_MASK;
let table_route = ctx
.reservation_table
.as_ref()
.filter(|t| t.is_enabled())
.cloned();
// PPCBUG-151: stdcx. requires a doubleword (ldarx) reservation;
// a word (lwarx) reservation must not commit here.
let width_ok = ctx.reservation_width == 8;
let success = if let Some(t) = &table_route {
ctx.has_reservation
&& width_ok
&& ctx.reserved_line == line
&& t.try_commit(ea, ctx.reserved_generation, ctx.hw_id)
} else {
// Legacy per-ctx path (M2 default / lockstep).
// PPCBUG-108: same sentinel as stwcx. — fires on non-primary
// HW slots if the table is disabled under --parallel.
debug_assert!(
ctx.hw_id == 0,
"PPCBUG-108: legacy per-ctx stdcx. on non-primary HW slot \
(hw_id={}) — ReservationTable must be enabled under --parallel",
ctx.hw_id
);
ctx.has_reservation && width_ok && ctx.reserved_line == line
};
if success {
mem.write_u64(ea, ctx.gpr[instr.rs()]);
ctx.cr[0] = crate::context::CrField {
lt: false,
gt: false,
eq: true,
so: ctx.xer_so != 0,
};
} else {
ctx.cr[0] = crate::context::CrField {
lt: false,
gt: false,
eq: false,
so: ctx.xer_so != 0,
};
if let Some(t) = &table_route {
t.release(ea, ctx.reserved_generation, ctx.hw_id);
}
}
ctx.has_reservation = false;
ctx.reservation_width = 0; // PPCBUG-151: always clear on exit
ctx.pc += 4;
}
PpcOpcode::ldbrx => {
let ea = ea_indexed(ctx, instr);
ctx.gpr[instr.rd()] = mem.read_u64(ea).swap_bytes();
ctx.pc += 4;
}
PpcOpcode::stdbrx => {
let ea = ea_indexed(ctx, instr);
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() { t.invalidate_for_write(ea); }
}
mem.write_u64(ea, ctx.gpr[instr.rs()].swap_bytes());
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4l — Scalar string load / store (register-length)
// ═════════════════════════════════════════════════════════════════
PpcOpcode::lswx => {
let mut ea = ea_indexed(ctx, instr);
let nb = ctx.xer() & 0x7F; // XER[25..31]
let mut rd = instr.rd();
let mut bytes_left = nb;
while bytes_left > 0 {
let mut val = 0u32;
for byte_idx in 0..4 {
if bytes_left == 0 { break; }
let b = mem.read_u8(ea) as u32;
val |= b << (24 - byte_idx * 8);
ea = ea.wrapping_add(1);
bytes_left -= 1;
}
ctx.gpr[rd] = val as u64;
rd = (rd + 1) % 32;
}
ctx.pc += 4;
}
PpcOpcode::stswx => {
let mut ea = ea_indexed(ctx, instr);
let nb = ctx.xer() & 0x7F;
let mut rs = instr.rs();
let mut bytes_left = nb;
if nb > 0 {
if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
if t.has_active_reservers() {
let first_line = ea & !RESERVATION_MASK;
let last_line = ea.wrapping_add(nb - 1) & !RESERVATION_MASK;
t.invalidate_for_write(first_line);
if last_line != first_line { t.invalidate_for_write(last_line); }
}
}
}
while bytes_left > 0 {
let val = ctx.gpr[rs] as u32;
for byte_idx in 0..4 {
if bytes_left == 0 { break; }
mem.write_u8(ea, (val >> (24 - byte_idx * 8)) as u8);
ea = ea.wrapping_add(1);
bytes_left -= 1;
}
rs = (rs + 1) % 32;
}
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// §4m — mcrxr: move XER condition bits to CR field, clear XER[SO/OV/CA]
// ═════════════════════════════════════════════════════════════════
PpcOpcode::mcrxr => {
let crfd = instr.crfd();
ctx.cr[crfd] = crate::context::CrField {
lt: ctx.xer_so != 0,
gt: ctx.xer_ov != 0,
eq: ctx.xer_ca != 0,
so: false,
};
ctx.xer_so = 0;
ctx.xer_ov = 0;
ctx.xer_ca = 0;
ctx.pc += 4;
}
// ═════════════════════════════════════════════════════════════════
// mcrfs — move FPSCR field to CR field and clear corresponding
// FPSCR exception bits. CR field crfD ← FPSCR[(crfS*4)..(crfS*4+3)]
// and then FPSCR bits in that nibble that are exception bits are
// cleared (FX, OX, UX, ZX, XX, VXSNAN, VXISI, VXIDI, VXZDZ, VXIMZ,
// VXVC, VXSOFT, VXSQRT, VXCVI are cleared; FEX/VX are read-only
// summaries and are recomputed later).
// ═════════════════════════════════════════════════════════════════
PpcOpcode::mcrfs => {
let crfd = instr.crfd();
let crfs = instr.crfs();
let shift = 28 - (crfs as u32 * 4);
let nibble = ((ctx.fpscr >> shift) & 0xF) as u8;
ctx.cr[crfd] = crate::context::CrField::from_u8(nibble);
// Clearable exception bits: 0 (FX), 3 (OX), 4 (UX), 5 (ZX),
// 6 (XX), 7 (VXSNAN), 8 (VXISI), 9 (VXIDI), 10 (VXZDZ),
// 11 (VXIMZ), 12 (VXVC), 21 (VXSOFT), 22 (VXSQRT), 23 (VXCVI).
// (Bit positions are PowerISA MSB-0; here 'FPSCR bit n' means
// the bit at (31-n) in our little-endian u32.)
const CLEARABLE_MASK: u32 =
(1 << 31) | (1 << (31 - 3)) | (1 << (31 - 4)) |
(1 << (31 - 5)) | (1 << (31 - 6)) | (1 << (31 - 7)) |
(1 << (31 - 8)) | (1 << (31 - 9)) | (1 << (31 - 10)) |
(1 << (31 - 11)) | (1 << (31 - 12)) |
(1 << (31 - 21)) | (1 << (31 - 22)) | (1 << (31 - 23));
let nibble_mask = 0xFu32 << shift;
ctx.fpscr &= !(nibble_mask & CLEARABLE_MASK);
// PPCBUG-068: recompute the VX summary bit. If any VX* exception
// bit remains set, VX must remain set; if all are cleared, VX
// must clear. (FEX recomputation omitted — xenia doesn't model
// enabled-exception dispatch.)
if ctx.fpscr & fpscr::VX_ALL != 0 {
ctx.fpscr |= fpscr::VX;
} else {
ctx.fpscr &= !fpscr::VX;
}
ctx.pc += 4;
}
// Anything not yet implemented
_ => {
tracing::warn!("Unimplemented opcode at {:#010x}: {:?} [{:08X}]", ctx.pc, instr.opcode, instr.raw);
ctx.pc += 4;
return StepResult::Unimplemented(instr.opcode);
}
}
StepResult::Continue
}
/// Compute an X-form indexed effective address: EA = (rA==0 ? 0 : GPR[rA]) + GPR[rB].
#[inline]
fn ea_indexed(ctx: &PpcContext, instr: &DecodedInstr) -> u32 {
let a = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
a.wrapping_add(ctx.gpr[instr.rb()]) as u32
}
/// Helper for CR logical operations.
fn cr_logical(ctx: &mut PpcContext, instr: &DecodedInstr, op: fn(bool, bool) -> bool) {
let a = ctx.get_cr_bit(instr.crba());
let b = ctx.get_cr_bit(instr.crbb());
ctx.set_cr_bit(instr.crbd(), op(a, b));
}
/// Generate 32-bit rotate mask for rlwinm/rlwimi/rlwnm.
fn rlw_mask(mb: u32, me: u32) -> u32 {
if mb <= me {
(u32::MAX >> mb) & (u32::MAX << (31 - me))
} else {
(u32::MAX >> mb) | (u32::MAX << (31 - me))
}
}
/// Generate 64-bit mask clearing bits 0..mb-1 (left mask for rldicl).
fn rld_mask_left(mb: u32) -> u64 {
if mb == 0 { u64::MAX } else { u64::MAX >> mb }
}
/// Generate 64-bit mask clearing bits me+1..63 (right mask for rldicr).
fn rld_mask_right(me: u32) -> u64 {
if me >= 63 { u64::MAX } else { u64::MAX << (63 - me) }
}
/// Extract VMX register indices, handling both standard (opcode 4) and 128-bit forms.
#[inline]
fn vmx_reg_triple(instr: &DecodedInstr) -> (usize, usize, usize) {
// Check if this is a VMX128 form (opcode 4 with extended register fields)
// Standard Altivec: vD=rd, vA=ra, vB=rb
// VMX128: vD=vd128, vA=va128, vB=vb128
let is_128 = matches!(
instr.opcode,
PpcOpcode::vand128 | PpcOpcode::vandc128 | PpcOpcode::vor128 |
PpcOpcode::vxor128 | PpcOpcode::vnor128 | PpcOpcode::vsel128 |
PpcOpcode::vcmpeqfp128 | PpcOpcode::vcmpgefp128 | PpcOpcode::vcmpgtfp128 |
PpcOpcode::vmrghw128 | PpcOpcode::vmrglw128 |
PpcOpcode::vslw128 | PpcOpcode::vsrw128 | PpcOpcode::vsraw128 | PpcOpcode::vrlw128 |
PpcOpcode::vcmpequw128
);
if is_128 {
(instr.va128(), instr.vb128(), instr.vd128())
} else {
(instr.ra(), instr.rb(), instr.rd())
}
}
/// Update CR6 from vector compare result mask (used when Rc=1 on vector compares).
/// CR6: bit 0 (LT) = all elements true, bit 2 (EQ) = all elements false
#[inline]
fn update_cr6_from_vmask(r: &[u32; 4], ctx: &mut PpcContext) {
let all_true = r.iter().all(|&v| v == 0xFFFF_FFFF);
let all_false = r.iter().all(|&v| v == 0);
ctx.cr[6].lt = all_true;
ctx.cr[6].gt = false;
ctx.cr[6].eq = all_false;
ctx.cr[6].so = false;
}
/// Round a double to single precision and back (matches xenia's ToSingle).
#[inline]
/// Round an f64 to single precision, honouring FPSCR[RN].
fn to_single(ctx: &PpcContext, val: f64) -> f64 {
fpscr::round_to_single(ctx, val)
}
/// Update CR1 from FPSCR (used when Rc=1 on FPU instructions).
/// CR1 = FPSCR[FX, FEX, VX, OX] (bits 0-3).
#[inline]
fn update_cr1_from_fpscr(ctx: &mut PpcContext) {
fpscr::update_cr1(ctx);
}
#[cfg(test)]
mod tests {
use super::*;
/// Simple test memory (64KB). Backed by `Box<[Cell<u8>]>` so the
/// MemoryAccess writes can take `&self`.
struct TestMem {
data: Box<[std::cell::Cell<u8>]>,
}
impl TestMem {
fn new() -> Self {
Self {
data: (0..65536u32).map(|_| std::cell::Cell::new(0)).collect(),
}
}
}
impl MemoryAccess for TestMem {
fn read_u8(&self, addr: u32) -> u8 { self.data[addr as usize].get() }
fn read_u16(&self, addr: u32) -> u16 {
let a = addr as usize;
u16::from_be_bytes([self.data[a].get(), self.data[a+1].get()])
}
fn read_u32(&self, addr: u32) -> u32 {
let a = addr as usize;
u32::from_be_bytes([
self.data[a].get(), self.data[a+1].get(),
self.data[a+2].get(), self.data[a+3].get(),
])
}
fn read_u64(&self, addr: u32) -> u64 {
let a = addr as usize;
u64::from_be_bytes([
self.data[a].get(), self.data[a+1].get(),
self.data[a+2].get(), self.data[a+3].get(),
self.data[a+4].get(), self.data[a+5].get(),
self.data[a+6].get(), self.data[a+7].get(),
])
}
fn write_u8(&self, addr: u32, val: u8) { self.data[addr as usize].set(val); }
fn write_u16(&self, addr: u32, val: u16) {
let a = addr as usize;
let bytes = val.to_be_bytes();
self.data[a].set(bytes[0]);
self.data[a+1].set(bytes[1]);
}
fn write_u32(&self, addr: u32, val: u32) {
let a = addr as usize;
let bytes = val.to_be_bytes();
for (i, b) in bytes.iter().enumerate() {
self.data[a+i].set(*b);
}
}
fn write_u64(&self, addr: u32, val: u64) {
let a = addr as usize;
let bytes = val.to_be_bytes();
for (i, b) in bytes.iter().enumerate() {
self.data[a+i].set(*b);
}
}
fn translate(&self, _addr: u32) -> Option<*const u8> { None }
fn translate_mut(&self, _addr: u32) -> Option<*mut u8> { None }
}
fn write_instr(mem: &TestMem, addr: u32, raw: u32) {
mem.write_u32(addr, raw);
}
#[test]
fn test_addi() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// addi r3, r0, 42
write_instr(&mut mem, 0, (14 << 26) | (3 << 21) | (0 << 16) | 42);
ctx.pc = 0;
let result = step(&mut ctx, &mut mem);
assert_eq!(result, StepResult::Continue);
assert_eq!(ctx.gpr[3], 42);
assert_eq!(ctx.pc, 4);
}
#[test]
fn test_addis() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// addis r3, r0, 1 => r3 = 0x10000
write_instr(&mut mem, 0, (15 << 26) | (3 << 21) | (0 << 16) | 1);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[3], 0x10000);
}
#[test]
fn test_lwz_stw() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// Store 0xDEADBEEF at address 0x100
mem.write_u32(0x100, 0xDEADBEEF);
// addi r1, r0, 0x100
write_instr(&mut mem, 0, (14 << 26) | (1 << 21) | (0 << 16) | 0x100);
// lwz r3, 0(r1)
write_instr(&mut mem, 4, (32 << 26) | (3 << 21) | (1 << 16) | 0);
ctx.pc = 0;
step(&mut ctx, &mut mem);
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[3], 0xDEADBEEF);
}
#[test]
fn test_branch() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// b +0x10 (from addr 0x100)
write_instr(&mut mem, 0x100, (18 << 26) | (4 << 2)); // LI=4, shifted=0x10
ctx.pc = 0x100;
step(&mut ctx, &mut mem);
assert_eq!(ctx.pc, 0x110);
}
#[test]
fn test_bl_updates_lr() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// bl +0x10 (from addr 0x200)
write_instr(&mut mem, 0x200, (18 << 26) | (4 << 2) | 1); // LK=1
ctx.pc = 0x200;
step(&mut ctx, &mut mem);
assert_eq!(ctx.pc, 0x210);
assert_eq!(ctx.lr, 0x204);
}
#[test]
fn test_cmp_and_bc() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 10;
// cmpi cr0, 0, r3, 10 (32-bit compare)
write_instr(&mut mem, 0, (11 << 26) | (0 << 23) | (0 << 21) | (3 << 16) | (10u32 & 0xFFFF));
// bc 12,2,+8 (branch if CR0.EQ, bo=12, bi=2)
write_instr(&mut mem, 4, (16 << 26) | (12 << 21) | (2 << 16) | (2 << 2));
ctx.pc = 0;
step(&mut ctx, &mut mem); // cmpi
assert!(ctx.cr[0].eq);
step(&mut ctx, &mut mem); // bc - should branch
assert_eq!(ctx.pc, 12); // 4 + 8
}
#[test]
fn test_rlwinm() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0xFF00_FF00;
// rlwinm r4, r3, 8, 0, 31 (rotate left 8, full mask = shift left 8)
let raw = (21 << 26) | (3 << 21) | (4 << 16) | (8 << 11) | (0 << 6) | (31 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[4], 0x00FF_00FF);
}
#[test]
fn test_ori_nop() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// ori r0, r0, 0 (NOP)
write_instr(&mut mem, 0, 0x60000000);
ctx.pc = 0;
ctx.gpr[0] = 0xDEAD;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[0], 0xDEAD);
assert_eq!(ctx.pc, 4);
}
#[test]
fn test_fadd() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 3.14;
ctx.fpr[2] = 2.86;
// fadd f3, f1, f2: opcode 63, subop 21 (bits 1-5), frD=3, frA=1, frB=2
// 63<<26 | 3<<21 | 1<<16 | 2<<11 | 21<<1
let raw = (63 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!((ctx.fpr[3] - 6.0).abs() < 1e-10);
}
#[test]
fn test_fmul() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 3.0;
ctx.fpr[2] = 4.0;
// fmul f3, f1, f2: opcode 63, subop 25, frD=3, frA=1, frC=2 (bits 21-25)
// 63<<26 | 3<<21 | 1<<16 | 0<<11 | 2<<6 | 25<<1
let raw = (63 << 26) | (3 << 21) | (1 << 16) | (0 << 11) | (2 << 6) | (25 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!((ctx.fpr[3] - 12.0).abs() < 1e-10);
}
#[test]
fn test_fcmpu() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 5.0;
ctx.fpr[2] = 3.0;
// fcmpu cr0, f1, f2: opcode 63, subop 0 (X-form), crfD=0, frA=1, frB=2
// 63<<26 | 0<<23 | 0<<21 | 1<<16 | 2<<11 | 0<<1
let raw = (63 << 26) | (0 << 23) | (0 << 21) | (1 << 16) | (2 << 11) | (0 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.cr[0].gt); // 5.0 > 3.0
assert!(!ctx.cr[0].lt);
assert!(!ctx.cr[0].eq);
}
#[test]
fn test_fctiwzx() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 42.7;
// fctiwz f2, f1: opcode 63, subop 15 (X-form), frD=2, frB=1
// 63<<26 | 2<<21 | 0<<16 | 1<<11 | 15<<1
let raw = (63 << 26) | (2 << 21) | (0 << 16) | (1 << 11) | (15 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// Result stored as bits in FPR: should be 42 as int
let bits = ctx.fpr[2].to_bits();
assert_eq!(bits as u32, 42);
}
#[test]
fn test_fmadd() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 2.0; // frA
ctx.fpr[2] = 3.0; // frB (addend)
ctx.fpr[3] = 5.0; // frC (multiplier)
// fmadd f4, f1, f3, f2: frD=4, frA=1, frB=2, frC=3
// opcode 63, subop 29 (bits 1-5)
// 63<<26 | 4<<21 | 1<<16 | 2<<11 | 3<<6 | 29<<1
let raw = (63 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (29 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// (2.0 * 5.0) + 3.0 = 13.0
assert!((ctx.fpr[4] - 13.0).abs() < 1e-10);
}
#[test]
fn test_ctx_default_state_matches_canary() {
let ctx = PpcContext::new();
// LR initialized to halt sentinel so a top-level blr drops out cleanly.
assert_eq!(ctx.lr, crate::context::LR_HALT_SENTINEL);
// VSCR starts with NJ bit set (denormals flush to zero).
assert!(ctx.vscr_nj());
assert!(!ctx.vscr_sat());
// VRSAVE defaults to "save all" per canary.
assert_eq!(ctx.vrsave, 0xFFFF_FFFF);
}
#[test]
fn test_vaddubs_saturates_and_sets_vscr_sat() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// Fill vA with 0xF0, vB with 0x20 → 0x110, saturates to 0xFF per lane.
ctx.vr[2] = xenia_types::Vec128::from_bytes([0xF0; 16]);
ctx.vr[3] = xenia_types::Vec128::from_bytes([0x20; 16]);
// vaddubs vD=4, vA=2, vB=3. XO=512 (PPC: opcode 4, VA-form).
let raw: u32 = (4u32 << 26) | (4u32 << 21) | (2u32 << 16) | (3u32 << 11) | 512u32;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
let r = step(&mut ctx, &mut mem);
assert_eq!(r, StepResult::Continue);
assert_eq!(ctx.vr[4].as_bytes(), [0xFFu8; 16]);
assert!(ctx.vscr_sat(), "SAT should be set after saturation");
}
#[test]
fn test_ldarx_stdcx_pair() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
mem.write_u64(0x1000, 0xDEADBEEF_CAFEBABE);
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
// ldarx r3, r4, r5: (31 << 26) | (3<<21) | (4<<16) | (5<<11) | (84<<1)
let raw_ld: u32 = (31u32 << 26) | (3u32 << 21) | (4u32 << 16) | (5u32 << 11) | (84u32 << 1);
write_instr(&mut mem, 0, raw_ld);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[3], 0xDEADBEEF_CAFEBABE);
assert!(ctx.has_reservation);
// stdcx. r6, r4, r5: (31 << 26) | (6<<21) | (4<<16) | (5<<11) | (214<<1) | 1
ctx.gpr[6] = 0x1111_1111_2222_2222;
let raw_st: u32 = (31u32 << 26) | (6u32 << 21) | (4u32 << 16) | (5u32 << 11) | (214u32 << 1) | 1;
write_instr(&mut mem, 4, raw_st);
step(&mut ctx, &mut mem);
assert!(ctx.cr[0].eq, "stdcx. should succeed and set CR0.EQ");
assert_eq!(mem.read_u64(0x1000), 0x1111_1111_2222_2222);
assert!(!ctx.has_reservation);
}
#[test]
fn test_mcrxr_moves_xer_condition_bits_and_clears_them() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.xer_so = 1;
ctx.xer_ov = 0;
ctx.xer_ca = 1;
// mcrxr crfD=3: (31 << 26) | (3<<23) | (512<<1)
let raw: u32 = (31u32 << 26) | (3u32 << 23) | (512u32 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.cr[3].lt, "LT should mirror old XER[SO]");
assert!(!ctx.cr[3].gt, "GT should mirror old XER[OV]");
assert!(ctx.cr[3].eq, "EQ should mirror old XER[CA]");
assert_eq!(ctx.xer_so, 0);
assert_eq!(ctx.xer_ov, 0);
assert_eq!(ctx.xer_ca, 0);
}
// ---------- Phase 2 fixes: OE / overflow ----------
fn addx_raw(rd: u32, ra: u32, rb: u32, oe: bool, rc: bool) -> u32 {
(31 << 26) | (rd << 21) | (ra << 16) | (rb << 11)
| ((oe as u32) << 10) | (266 << 1) | (rc as u32)
}
#[test]
fn addo_sets_xer_ov_on_signed_overflow_and_stickies_so() {
// PPCBUG-012: 32-bit ABI. INT32_MAX + 1 overflows to INT32_MIN.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = i32::MAX as u32 as u64;
ctx.gpr[4] = 1;
write_instr(&mut mem, 0, addx_raw(5, 3, 4, true, false));
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x8000_0000u64);
assert_eq!(ctx.xer_ov, 1, "OV must be set on signed overflow");
assert_eq!(ctx.xer_so, 1, "SO must be stickied from OV");
}
#[test]
fn addo_clears_xer_ov_when_no_overflow_but_keeps_sticky_so() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.xer_ov = 1; // stale from a previous overflow
ctx.xer_so = 1;
ctx.gpr[3] = 1;
ctx.gpr[4] = 2;
write_instr(&mut mem, 0, addx_raw(5, 3, 4, true, false));
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 3);
assert_eq!(ctx.xer_ov, 0, "OV must clear when no overflow");
assert_eq!(ctx.xer_so, 1, "SO is sticky; stays set");
}
#[test]
fn add_without_oe_does_not_touch_xer() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = i64::MAX as u64;
ctx.gpr[4] = 1;
write_instr(&mut mem, 0, addx_raw(5, 3, 4, false, false));
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.xer_ov, 0);
assert_eq!(ctx.xer_so, 0);
}
#[test]
fn addx_rc_uses_32bit_compare_in_xbox_abi() {
// PPCBUG-012+020: 32-bit ABI. r3 + r4 = 0xFFFFFFFF (low 32). As i32
// this is -1 (CR0.LT). The previous 64-bit compare wrongly classified
// this as positive (CR0.GT) for Xbox 360 binaries.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0x0000_0000_FFFF_FFFF;
ctx.gpr[4] = 0;
write_instr(&mut mem, 0, addx_raw(5, 3, 4, false, true));
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFF);
assert!(ctx.cr[0].lt, "32-bit ABI: 0xFFFFFFFF as i32 is -1, CR0.LT");
assert!(!ctx.cr[0].gt);
assert!(!ctx.cr[0].eq);
}
#[test]
fn subfo_sets_xer_ov_on_int32_min_minus_one() {
// PPCBUG-017: 32-bit ABI subfo overflow detection. r4=INT32_MIN, r3=1
// → result = INT32_MIN - 1 → wraps to INT32_MAX with OV=1.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 1;
ctx.gpr[4] = 0x8000_0000u64;
let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x7FFF_FFFFu64);
assert_eq!(ctx.xer_ov, 1);
assert_eq!(ctx.xer_so, 1);
}
#[test]
fn subfo_no_spurious_ov_when_result_has_bit31_set() {
// PPCBUG-017 review-fix regression: subfo r5, r3, r4 with r3=1, r4=0x80000001
// → result = 0x80000000. This is i32::MIN — a legitimate negative value
// with no 32-bit overflow (true_diff = -2147483648, fits in i32).
// The legacy `sum_overflow_64` predicate compared against the u64 view
// of result (= +2147483648), spuriously flagging OV=1.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 1;
ctx.gpr[4] = 0x8000_0001u64;
// subfo r5, r3, r4
let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (40 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x8000_0000u64);
assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV");
}
#[test]
fn subfco_no_spurious_ov_when_result_has_bit31_set() {
// PPCBUG-007 same review-fix: subfcx OE handler must use 32-bit predicate.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 1;
ctx.gpr[4] = 0x8000_0001u64;
// subfco r5, r3, r4 (XO=8, OE=1)
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (8 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x8000_0000u64);
assert_eq!(ctx.xer_ov, 0, "legitimate i32::MIN result must NOT trigger OV");
}
#[test]
fn mullwo_sets_xer_ov_when_product_overflows_32_bits() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// mullwo r5, r3, r4 (XO=235, OE=1)
ctx.gpr[3] = i32::MAX as u64;
ctx.gpr[4] = 2u64;
let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (235 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.xer_ov, 1);
assert_eq!(ctx.xer_so, 1);
}
#[test]
fn divwo_sets_xer_ov_on_divide_by_zero() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// divwo r5, r3, r4 (XO=491, OE=1)
ctx.gpr[3] = 10;
ctx.gpr[4] = 0;
let raw = (31 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (491 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.xer_ov, 1);
assert_eq!(ctx.gpr[5], 0); // undefined in spec; canary uses 0
}
#[test]
fn nego_sets_ov_only_on_int_min() {
// PPCBUG-006: 32-bit ABI. INT_MIN is 0x80000000 (low 32), not 0x8000000000000000.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// nego r5, r3 (XO=104, OE=1)
ctx.gpr[3] = 0x8000_0000;
let raw = (31 << 26) | (5 << 21) | (3 << 16) | (1 << 10) | (104 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.xer_ov, 1);
// -INT_MIN wraps to INT_MIN (low 32 bits) with upper 32 bits zero.
assert_eq!(ctx.gpr[5], 0x0000_0000_8000_0000);
}
#[test]
fn neg_clean_input_no_upper_bits() {
// PPCBUG-006 regression: neg r3=5 must produce 0x00000000_FFFFFFFB,
// not 0xFFFFFFFF_FFFFFFFB (the 64-bit !ra-then-add-1 result).
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 5;
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (104 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFB);
}
#[test]
fn norx_not_simplified_keeps_upper_bits_clean() {
// PPCBUG-029: `not rA, rB` (norx with rs==rb) is the canonical not
// simplified mnemonic. 64-bit !val poisons upper 32 bits of every
// execution; under the 32-bit ABI we must truncate.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0x0000_0000_0000_00FF;
// norx r5, r3, r3 (XO=124)
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (3 << 11) | (124 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FF00, "upper 32 bits must be zero");
}
#[test]
fn eqvx_self_self_self_sets_low32_to_all_ones() {
// PPCBUG-031: `eqv rA, rA, rA` is a common "set-to-all-ones" idiom.
// 64-bit !(0^0) gives u64::MAX (0xFFFFFFFF_FFFFFFFF); 32-bit ABI
// expects 0x00000000_FFFFFFFF.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0;
// eqvx r3, r3, r3 (XO=284)
let raw = (31u32 << 26) | (3 << 21) | (3 << 16) | (3 << 11) | (284 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[3], 0x0000_0000_FFFF_FFFF);
}
#[test]
fn andcx_bit_clear_keeps_upper_clean() {
// PPCBUG-033: `andc rA, rS, rB` = rS & !rB. 64-bit !rB poisons.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0xFFFF_FFFF; // rS
ctx.gpr[4] = 0x000F; // rB (low bits to clear)
// andcx r5, r3, r4 (XO=60)
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (60 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFF0);
}
#[test]
fn subfex_clean_inputs_no_upper_bits() {
// PPCBUG-008: 32-bit ABI. RT = !RA + RB + CA. RA=5, RB=10, CA=1
// → !5u32 = 0xFFFFFFFA, +10 = 0x100000004, +1 = 0x100000005, low32 = 5.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 5;
ctx.gpr[4] = 10;
ctx.xer_ca = 1;
// subfex r5, r3, r4 (XO=136)
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (136 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 5);
assert_eq!(ctx.xer_ca, 1, "rb>=ra → CA=1 (10 > 5)");
}
#[test]
fn andisx_sign_bit_set_classifies_lt() {
// PPCBUG-023: andis. r4, r3, 0x8000 with r3=0xFFFFFFFF should produce
// result=0x80000000 with CR0.LT=1 (i32 view).
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0xFFFF_FFFFu64;
// andis. r4, r3, 0x8000: opcode 29, uimm16 = 0x8000
let raw = (29u32 << 26) | (3 << 21) | (4 << 16) | 0x8000;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[4], 0x8000_0000u64);
assert!(ctx.cr[0].lt, "result=0x80000000 → i32 view negative → CR0.LT");
}
#[test]
fn slwx_high_bit_result_classifies_lt() {
// PPCBUG-044: slwx producing 0x80000000 must classify as CR0.LT under
// the 32-bit ABI, not CR0.GT (which 64-bit view would give).
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0x4000_0000u64;
ctx.gpr[4] = 1;
// slwx. r5, r3, r4 (XO=24, Rc=1)
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (24 << 1) | 1;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x8000_0000u64);
assert!(ctx.cr[0].lt, "0x80000000 as i32 is negative");
}
#[test]
fn lha_negative_halfword_zero_extends_upper() {
// PPCBUG-095: memory 0x8000 must yield gpr[rD] = 0x00000000_FFFF8000.
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u16(0x100, 0x8000);
ctx.gpr[3] = 0x100;
// lha r5, 0(r3): opcode 42
let raw = (42u32 << 26) | (5 << 21) | (3 << 16) | 0;
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_8000u64);
}
#[test]
fn lhaux_negative_halfword_clean_writeback() {
// PPCBUG-098: indexed update form. Memory 0xFFFF → rD = 0x00000000_FFFFFFFF;
// rA must update to the EA.
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u16(0x200, 0xFFFF);
ctx.gpr[3] = 0x100; // ra
ctx.gpr[4] = 0x100; // rb
// lhaux r5, r3, r4 (XO=375)
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (375 << 1);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFFu64);
assert_eq!(ctx.gpr[3], 0x200, "rA updated to EA");
}
#[test]
fn lwa_sign_extends_to_i64() {
// ISA "Load Word and Algebraic" — sign-extend the loaded i32 to i64.
// memory 0x80000000 (i32 = -2147483648) → rD = 0xFFFFFFFF_80000000.
// PPCBUG-105 was reverted post-P8 review: my earlier zero-extend
// change deviated from PowerISA; canary uses SignExtend(INT64_TYPE).
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u32(0x100, 0x8000_0000);
ctx.gpr[3] = 0x100;
// lwa r5, 0(r3): opcode 58, XO=2 (DS-form, ds=0)
let raw = (58u32 << 26) | (5 << 21) | (3 << 16) | 2;
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[5], 0xFFFF_FFFF_8000_0000u64,
"lwa must sign-extend negative word to i64 per PowerISA");
}
#[test]
fn mullwx_overflow_truncates_to_32() {
// PPCBUG-009: mullwo r5, r3, r4 with ra=0x10000, rb=0x10000 → product
// 0x100000000 (overflow). Low 32 = 0; OE must fire.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0x10000;
ctx.gpr[4] = 0x10000;
// mullwo r5, r3, r4 (XO=235, OE=1)
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (1 << 10) | (235 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0, "low 32 bits = 0");
assert_eq!(ctx.xer_ov, 1, "overflow detected");
}
#[test]
fn divwx_negative_quotient_zero_extends() {
// PPCBUG-010+011: -10 / 3 = -3 must produce 0x00000000_FFFFFFFD,
// not 0xFFFFFFFF_FFFFFFFD. CR0.LT must still fire (i32 view of FFFFFFFD is negative).
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = (-10i32) as u32 as u64;
ctx.gpr[4] = 3;
// divwx. r5, r3, r4 (XO=491, Rc=1)
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (491 << 1) | 1;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFDu64);
assert!(ctx.cr[0].lt, "CR0.LT must fire for negative i32 quotient");
}
#[test]
fn srawx_negative_value_zero_extends_upper() {
// PPCBUG-041+043: srawx of negative i32 by 1 produces a negative i32;
// writeback must zero-extend to u64 (not sign-extend).
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0x8000_0000u64; // i32::MIN
ctx.gpr[4] = 1;
// srawx. r5, r3, r4 (XO=792, Rc=1)
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (4 << 11) | (792 << 1) | 1;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_C000_0000u64);
assert!(ctx.cr[0].lt);
}
#[test]
fn srawix_high_count_negative_input_yields_low32_all_ones() {
// PPCBUG-042+043: srawi with count=31 on negative input → low 32 bits
// all ones (0xFFFFFFFF), upper 32 zero (was u64::MAX before fix).
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0x8000_0000u64;
// srawix r5, r3, 31 (XO=824)
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (31 << 11) | (824 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FFFFu64);
}
#[test]
fn addi_li_neg_one_sign_extends_per_powerisa() {
// SWAPBUG-001 / PPCBUG-001 revert: `li r3, -1` (= addi r3, r0, -1)
// must sign-extend simm16 to 64 bits per PowerISA, producing
// 0xFFFFFFFF_FFFFFFFF. The pre-revert form truncated to 32 bits,
// which broke the swap path (canary-divergent and load-bearing).
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// addi r3, r0, -1: opcode 14, simm16 = 0xFFFF
let raw = (14u32 << 26) | (3 << 21) | (0 << 16) | 0xFFFF;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[3], 0xFFFF_FFFF_FFFF_FFFFu64);
}
#[test]
fn addic_carry_uses_32bit_compare() {
// PPCBUG-002: addic ra=0xFFFFFFFF_00000001, simm=-1 (0xFFFF).
// 32-bit: 0x00000001 + 0xFFFFFFFF = 0x00000000 with CA=1.
// 64-bit (buggy): result < ra → since 64-bit ra has high bits set,
// the buggy form would compare against the polluted u64 and could
// give wrong CA. Truncated form ignores upper 32 bits.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0xFFFFFFFF_00000001u64;
// addic r4, r3, -1: opcode 12
let raw = (12u32 << 26) | (4 << 21) | (3 << 16) | 0xFFFF;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// Result low 32: 0x00000001 + 0xFFFFFFFF = 0x00000000 with carry.
assert_eq!(ctx.gpr[4], 0);
assert_eq!(ctx.xer_ca, 1, "32-bit compare must see CA=1");
}
#[test]
fn mulli_overflow_wraps_to_32() {
// PPCBUG-004: mulli must truncate to 32 bits even when the upper 32 bits
// of RA are polluted (e.g. by upstream bugs). Pre-fix: ra = u64::MAX as
// i64 = -1, * 2 = -2, written to GPR as `0xFFFFFFFF_FFFFFFFE`. Post-fix:
// truncated to `0xFFFFFFFE`. Discriminating regression test.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = u64::MAX;
// mulli r4, r3, 2: opcode 7
let raw = (7u32 << 26) | (4 << 21) | (3 << 16) | 2;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[4], 0xFFFF_FFFEu64, "low 32 bits = -2 in i32; upper 32 zero");
}
#[test]
fn subficx_neg_simm_zero_extends() {
// PPCBUG-005: subfic r4, r3, -1 with r3=5: imm-ra = 0xFFFFFFFF - 5 = 0xFFFFFFFA.
// Buggy form: imm sign-extended to u64 0xFFFFFFFFFFFFFFFF - 5 = poisoned.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 5;
// subfic r4, r3, -1: opcode 8, simm = 0xFFFF
let raw = (8u32 << 26) | (4 << 21) | (3 << 16) | 0xFFFF;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[4], 0x0000_0000_FFFF_FFFAu64);
assert_eq!(ctx.xer_ca, 1, "0xFFFFFFFF >= 5 → CA=1");
}
#[test]
fn subfcx_addis_incident_case() {
// PPCBUG-007: regression for the exact case that revealed the addis bug.
// After P1's addis fix this works coincidentally; P4 batch 3 makes
// subfcx itself robust to 64-bit GPR pollution.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// ra polluted in upper 32 bits, low 32 = 0x828F3F98
ctx.gpr[3] = 0xFFFF_FFFF_828F_3F98u64;
// rb clean low 32 = 0x828F3F68
ctx.gpr[4] = 0x0000_0000_828F_3F68u64;
// subfcx r5, r3, r4 (XO=8): result = rb - ra = 0xFFFFFFD0 (low 32)
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (8 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// 32-bit unsigned: 0x828F3F68 < 0x828F3F98 → CA=0
assert_eq!(ctx.xer_ca, 0, "32-bit unsigned: rb < ra → CA=0");
// result = 0x828F3F68 - 0x828F3F98 = 0xFFFFFFD0 (low 32, upper 32 zero)
assert_eq!(ctx.gpr[5], 0xFFFF_FFD0u64);
}
#[test]
fn extsbx_negative_byte_zero_extends_upper() {
// PPCBUG-034+036 coupled: extsb of 0x80 (negative byte) must produce
// 0x00000000_FFFFFF80, NOT 0xFFFFFFFF_FFFFFF80. CR0.LT must still fire
// (i32 view of 0xFFFFFF80 is negative).
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0x80;
// extsbx. r5, r3 (XO=954, Rc=1)
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (954 << 1) | 1;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_FF80);
assert!(ctx.cr[0].lt, "CR0.LT must fire for negative i32");
assert!(!ctx.cr[0].gt);
}
#[test]
fn extshx_negative_halfword_zero_extends_upper() {
// PPCBUG-035+037 coupled: extsh of 0x8000 must produce 0x00000000_FFFF8000.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0x8000;
// extshx. r5, r3 (XO=922, Rc=1)
let raw = (31u32 << 26) | (3 << 21) | (5 << 16) | (922 << 1) | 1;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[5], 0x0000_0000_FFFF_8000);
assert!(ctx.cr[0].lt);
}
#[test]
fn subfmex_ra_max_ca_zero_clears_ca() {
// PPCBUG-019: `subfme` with RA=u32::MAX and CA=0 should set CA=0
// (because !u32::MAX = 0). The buggy code's `!ra != 0` predicate
// on u64 was always true (because !u64-cast-of-u32::MAX has high
// bits flipped non-zero), wrongly setting CA=1.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0xFFFF_FFFFu64;
ctx.xer_ca = 0;
// subfmex r5, r3 (XO=232)
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (232 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.xer_ca, 0, "RA=u32::MAX, CA=0 → !RA32==0, CA=0");
}
// ---------- Phase 2 fixes: trap TO-field ----------
#[test]
fn tw_with_to_zero_never_fires() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 5;
ctx.gpr[4] = 5;
// tw 0, r3, r4 (XO=4). TO in bits 6-10.
let raw = (31 << 26) | (0 << 21) | (3 << 16) | (4 << 11) | (4 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
let r = step(&mut ctx, &mut mem);
assert_eq!(r, StepResult::Continue, "TO=0 must never trap");
assert_eq!(ctx.pc, 4);
}
#[test]
fn tw_eq_fires_on_equal() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 5;
ctx.gpr[4] = 5;
// TO=4 (EQ only)
let raw = (31 << 26) | (4 << 21) | (3 << 16) | (4 << 11) | (4 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
let r = step(&mut ctx, &mut mem);
assert_eq!(r, StepResult::Trap);
}
#[test]
fn tw_eq_does_not_fire_on_unequal() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 5;
ctx.gpr[4] = 7;
// TO=4 (EQ only)
let raw = (31 << 26) | (4 << 21) | (3 << 16) | (4 << 11) | (4 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
let r = step(&mut ctx, &mut mem);
assert_eq!(r, StepResult::Continue);
}
#[test]
fn twi_compares_low_32_bits_only() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0xDEAD_BEEF_0000_0005; // low 32 = 5
// twi 4, r3, 5: primary=3, TO=4, RA=3, SI=5
let raw = (3 << 26) | (4 << 21) | (3 << 16) | (5u32 & 0xFFFF);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
let r = step(&mut ctx, &mut mem);
assert_eq!(r, StepResult::Trap, "word-width compare matches low 32");
}
// ---------- Phase 2 fixes: mcrfs ----------
// ---------- Phase 2h: FPU / FPSCR ----------
#[test]
fn fmsub_inf_minus_inf_sets_vxisi() {
// PPCBUG-203 regression: fmsub with a*c = +∞, -b = -∞ (b=+∞) →
// +∞ + (-∞) → VXISI. Pre-fix had no add-step VXISI check.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = f64::INFINITY;
ctx.fpr[2] = f64::INFINITY; // b
ctx.fpr[3] = 1.0;
// fmsub f4, f1, f3, f2 → 1*∞ - ∞ = VXISI
// A-form: opcode=63, XO=28 (fmsub double): (63<<26)|(rd<<21)|(ra<<16)|(rb<<11)|(rc<<6)|(28<<1)
let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (28 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_ne!(ctx.fpscr & fpscr::VXISI, 0, "fmsub ∞-∞ must set VXISI");
}
#[test]
fn fnmadd_nan_input_preserves_nan_sign() {
// PPCBUG-205 regression: ISA forbids negating a NaN result.
// a*c+b producing a NaN → result must be the NaN unchanged, not -NaN.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
let qnan = f64::NAN;
ctx.fpr[1] = qnan;
ctx.fpr[2] = 1.0;
ctx.fpr[3] = 2.0;
// fnmadd f4, f1, f3, f2 (XO=31)
let raw = (63u32 << 26) | (4 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (31 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// Result must be NaN with the same sign bit as the input NaN.
let r = ctx.fpr[4];
assert!(r.is_nan(), "result must be NaN");
assert_eq!(r.is_sign_negative(), qnan.is_sign_negative(),
"fnmadd must preserve NaN sign (no negation on NaN)");
}
#[test]
fn fadd_inf_minus_inf_sets_vxisi() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = f64::INFINITY;
ctx.fpr[2] = f64::NEG_INFINITY;
// fadd f3, f1, f2 → inf + (-inf) = VXISI
let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// VXISI bit is PPC bit 8 → mask 1<<23
assert_ne!(ctx.fpscr & fpscr::VXISI, 0);
// FX sticky is set on any new exception → mask 1<<31
assert_ne!(ctx.fpscr & fpscr::FX, 0);
// VX summary set → 1<<29
assert_ne!(ctx.fpscr & fpscr::VX, 0);
}
#[test]
fn fdiv_zero_over_zero_sets_vxzdz() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 0.0;
ctx.fpr[2] = 0.0;
// fdiv f3, f1, f2 (opcode 63, subop 18)
let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (18 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_ne!(ctx.fpscr & fpscr::VXZDZ, 0);
}
#[test]
fn fdiv_finite_over_zero_sets_zx() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 1.0;
ctx.fpr[2] = 0.0;
// fdiv f3, f1, f2
let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (18 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_ne!(ctx.fpscr & fpscr::ZX, 0);
}
#[test]
fn fadd_sets_fprf_from_result() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 2.5;
ctx.fpr[2] = 3.5;
let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// Result = +6.0 → FPRF = POS_NORMAL = 0b0_0100
let fprf = ((ctx.fpscr & fpscr::FPRF_MASK) >> 12) as u8;
assert_eq!(fprf, fpscr::fprf::POS_NORMAL);
}
#[test]
fn frsp_honours_fpscr_rn_toward_zero() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// A value whose nearest-even rounding would go up but trunc goes down.
// Choose v = 1 + 0x1.00_0001_ * 2^-24-ish so low bit is 1 in the f32 mantissa.
let v = f64::from_bits(0x3FF0_0000_0000_0001); // 1.0 + ULP at double
ctx.fpr[1] = v;
ctx.fpscr = 0x1; // RN = 01 → toward zero
// frsp f3, f1 (opcode 63, subop 12)
let raw = (63u32 << 26) | (3 << 21) | (1 << 11) | (12 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// Result rounded toward zero should be exactly 1.0_f64
assert_eq!(ctx.fpr[3], 1.0_f64);
}
#[test]
fn fcmpu_sets_so_on_nan_and_fprf_unordered() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = f64::NAN;
ctx.fpr[2] = 1.0;
// fcmpu crfD=4, f1, f2 : (63<<26) | (crfd<<23) | (ra<<16) | (rb<<11) | (0<<1)
let raw = (63u32 << 26) | (4 << 23) | (1 << 16) | (2 << 11);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.cr[4].so, "unordered → SO set");
assert!(!ctx.cr[4].lt && !ctx.cr[4].gt && !ctx.cr[4].eq);
// FPRF unordered = 0b0_0001
let fprf = ((ctx.fpscr & fpscr::FPRF_MASK) >> 12) as u8;
assert_eq!(fprf, 0b0_0001);
}
#[test]
fn fcmpo_on_qnan_sets_vxvc() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = f64::NAN; // QNaN (Rust's NAN)
ctx.fpr[2] = 1.0;
// fcmpo (opcode 63, subop 32)
let raw = (63u32 << 26) | (4 << 23) | (1 << 16) | (2 << 11) | (32 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_ne!(ctx.fpscr & fpscr::VXVC, 0);
}
// ---------- Phase 2i: VMX NaN propagation ----------
#[test]
fn vmaxfp_propagates_nan() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
let mut a = [1.0f32, 2.0, 3.0, 4.0];
let b = [5.0f32, 6.0, 7.0, 8.0];
a[1] = f32::NAN;
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array(a);
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array(b);
// vmaxfp vD=4, vA=2, vB=3 (opcode 4, XO=1034)
let raw = (4u32 << 26) | (4 << 21) | (2 << 16) | (3 << 11) | 1034;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[4].as_f32x4();
// lane 1 must be NaN, not 6.0 (the b side)
assert!(r[1].is_nan());
// Other lanes should pick the max correctly
assert_eq!(r[0], 5.0);
assert_eq!(r[2], 7.0);
assert_eq!(r[3], 8.0);
}
#[test]
fn vminfp_propagates_nan() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
let a = [1.0f32, 2.0, 3.0, 4.0];
let mut b = [5.0f32, 6.0, 7.0, 8.0];
b[2] = f32::NAN;
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array(a);
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array(b);
// vminfp XO=1098
let raw = (4u32 << 26) | (4 << 21) | (2 << 16) | (3 << 11) | 1098;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[4].as_f32x4();
assert!(r[2].is_nan());
}
// ---------- Phase 2j: VMX denorm flush ----------
#[test]
fn vmaddfp_flushes_denormal_inputs() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// Smallest positive denormal f32 = f32::from_bits(1)
let denorm = f32::from_bits(1);
let a = [denorm; 4];
let b = [0.0f32; 4];
let c = [1.0f32; 4];
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array(a);
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array(b);
ctx.vr[4] = xenia_types::Vec128::from_f32x4_array(c);
// vmaddfp vD=5, vA=2, vB=3, vC=4 (A-form: opcode 4, XO=46, vC at rc field)
// layout: (4<<26) | (5<<21) | (2<<16) | (3<<11) | (4<<6) | 46
let raw = (4u32 << 26) | (5 << 21) | (2 << 16) | (3 << 11) | (4 << 6) | 46;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[5].as_f32x4();
// denorm*1 + 0 should be flushed: denorm→0, so result is 0.
assert_eq!(r, [0.0f32; 4]);
}
/// VMX128 variant `vmaddfp128 vD, vA, vB` (primary op 5, key2 = 0b001101)
/// reuses vD as the accumulator: `vD <- (vA × vD) + vB`. Canary
/// `ppc_emit_altivec.cc:786-810` flushes *all three* inputs
/// unconditionally before the fused multiply-add.
#[test]
fn vmaddfp128_flushes_denormal_inputs() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
let denorm = f32::from_bits(1);
// VA=v1, VD=v2, VB=v3 — all carry denormals.
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
// vmaddfp128 vD=v2, vA=v1, vB=v3: op6=5, vd_lo=2, va_lo=1, vb_lo=3, key2=0b001101.
// VA×VD+VB: all three flushed → 0*0+0 = 0.
let raw: u32 = (5u32 << 26) | (2 << 21) | (1 << 16) | (3 << 11) | (3 << 6) | (1 << 4);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.vr[2].as_f32x4(), [0.0f32; 4]);
}
// ---- PPCBUG-424+425: vmaddfp128/vmaddcfp128 operand swap ----
// ISA for both: (VD) <- (VA × VD) + VB. Previous code computed VA×VB+VD and VD×VB+VA.
// Test uses distinct VA, VB, VD registers so the swap is visible.
// Encoding: op6=5, key2=0b001101 (vmaddfp128) / 0b010001 (vmaddcfp128).
// VA=v1=[2.0], VB=v2=[10.0], VD=v3=[3.0] → expected 2.0×3.0+10.0 = 16.0.
// Buggy vmaddfp128: 2.0×10.0+3.0 = 23.0. Buggy vmaddcfp128: 3.0×10.0+2.0 = 32.0.
#[test]
fn vmaddfp128_operand_order_va_times_vd_plus_vb() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([2.0f32; 4]); // VA=v1
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([10.0f32; 4]); // VB=v2
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([3.0f32; 4]); // VD=v3 (also destination)
// vmaddfp128 vD=v3, vA=v1, vB=v2 — op5, key2=0b001101 (bits22-25=3, bit27=1)
let raw: u32 = (5u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (3 << 6) | (1 << 4);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.vr[3].as_f32x4(), [16.0f32; 4], "VA×VD+VB = 2*3+10 = 16");
}
#[test]
fn vmaddcfp128_operand_order_va_times_vd_plus_vb() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([2.0f32; 4]); // VA=v1
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([10.0f32; 4]); // VB=v2
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([3.0f32; 4]); // VD=v3
// vmaddcfp128 vD=v3, vA=v1, vB=v2 — op5, key2=0b010001 (bits22-25=4, bit27=1)
let raw: u32 = (5u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (4 << 6) | (1 << 4);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.vr[3].as_f32x4(), [16.0f32; 4], "VA×VD+VB = 2*3+10 = 16");
}
/// VMX128 `vnmsubfp128 vD, vA, vB` (key2 = 0b010101). Canary
/// `ppc_emit_altivec.cc:1133-1160` flushes all three inputs in the
/// helper. Semantics: `vD <- -((vA * vB) - vD) = vD - vA*vB`.
#[test]
fn vnmsubfp128_flushes_denormal_inputs() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
let denorm = f32::from_bits(1);
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([denorm; 4]);
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([1.0f32; 4]);
// vnmsubfp128 vD=v2, vA=v2, vB=v3: key2 = 0b010101 (21) encoded
// via bits 22-25 = 0101 and bit 27 = 1.
let raw: u32 = 0x1440_1950;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// Without flush: denorm - denorm*1.0 = 0 (but the intermediate
// values propagate subnormals through the compute); with flush
// everything is 0 cleanly.
assert_eq!(ctx.vr[2].as_f32x4(), [0.0f32; 4]);
}
/// VMX128 `vmsum4fp128 vD, vA, vB` computes the 4-lane dot product
/// and broadcasts the result. Canary
/// `ppc_emit_altivec.cc:1077-1084` flushes the *output* denormal
/// (not the inputs). A dot product that sums to a subnormal must
/// read back as 0.
#[test]
fn vmsum4fp128_flushes_denormal_output() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
let denorm = f32::from_bits(1);
// Dot product = denorm * 1.0 + 0 + 0 + 0 = denorm.
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([denorm, 0.0, 0.0, 0.0]);
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([1.0f32, 0.0, 0.0, 0.0]);
// vmsum4fp128 vD=v2, vA=v2, vB=v3: key2 = 0b011101 (29).
let raw: u32 = 0x1440_19D0;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// Subnormal output must flush to 0 and broadcast across all lanes.
assert_eq!(ctx.vr[2].as_f32x4(), [0.0f32; 4]);
}
// ---------- Phase 2k: lve*x / stve*x element masking ----------
#[test]
fn lvebx_loads_byte_into_ea_slot() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
mem.write_u8(0x1003, 0xAB);
ctx.gpr[4] = 0x1003;
ctx.gpr[5] = 0;
// lvebx v1, r4, r5 : (31<<26) | (1<<21) | (4<<16) | (5<<11) | (7<<1)
let raw = (31u32 << 26) | (1 << 21) | (4 << 16) | (5 << 11) | (7 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let bytes = ctx.vr[1].as_bytes();
// Byte at slot 3 (EA & 0xF = 3)
assert_eq!(bytes[3], 0xAB);
// Other bytes zero
for i in 0..16 {
if i != 3 { assert_eq!(bytes[i], 0, "byte {} should be zero", i); }
}
}
#[test]
fn stvewx_stores_only_word_slot() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// Prepare surrounding memory with a sentinel value so we can check non-overwrite.
for i in 0..16 {
mem.write_u8(0x1000 + i, 0x55);
}
// vS lanes: (big-endian view) word0=0xDEADBEEF, word1..3=0.
let mut src = [0u8; 16];
src[0] = 0xDE; src[1] = 0xAD; src[2] = 0xBE; src[3] = 0xEF;
ctx.vr[1] = xenia_types::Vec128::from_bytes(src);
// EA = 0x1000 (slot 0): store word0 at 0x1000.
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
// stvewx v1, r4, r5 : (31<<26) | (1<<21) | (4<<16) | (5<<11) | (199<<1)
let raw = (31u32 << 26) | (1 << 21) | (4 << 16) | (5 << 11) | (199 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(mem.read_u32(0x1000), 0xDEAD_BEEF);
// Adjacent bytes untouched (still 0x55).
for i in 4..16 {
assert_eq!(mem.data[0x1000 + i as usize].get(), 0x55, "byte {} was overwritten", 0x1000+i);
}
}
// ---------- Phase 2l: reservation cache-line granule ----------
#[test]
fn stwcx_succeeds_within_same_cache_line() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
mem.write_u32(0x1004, 0xAAAA_AAAA);
ctx.gpr[4] = 0x1004;
ctx.gpr[5] = 0;
// lwarx r3, r4, r5 : (31<<26)|(3<<21)|(4<<16)|(5<<11)|(20<<1)
let ld = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, ld);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation);
// Now stwcx. to a DIFFERENT address in the SAME cache line (offset within 128 bytes).
ctx.gpr[4] = 0x1008; // 4 bytes over; same line.
ctx.gpr[6] = 0xBBBB_BBBB;
let st = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 4, st);
step(&mut ctx, &mut mem);
// Matches cache line → succeeds.
assert!(ctx.cr[0].eq);
assert_eq!(mem.read_u32(0x1008), 0xBBBB_BBBB);
}
#[test]
fn stwcx_fails_across_cache_lines() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
let ld = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, ld);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// Different cache line (0x1080).
ctx.gpr[4] = 0x1080;
ctx.gpr[6] = 0xCCCC_CCCC;
let st = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 4, st);
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "should fail across cache line");
assert_eq!(mem.read_u32(0x1080), 0, "memory not written on failure");
}
// ---------- PPCBUG-107/140: invalidate_for_write via plain stw ----------
/// PPCBUG-107/140: A plain `stw` to a reserved line must invalidate the
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
#[test]
fn lwarx_then_plain_stw_invalidates_reservation() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// Set up registers: r4=0x1000 (target addr), r5=0 (index), r6=plain store val, r7=stwcx val.
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
ctx.gpr[6] = 0xBBBB_BBBB;
ctx.gpr[7] = 0xCCCC_CCCC;
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stw r6, 0(r4) (opcode 36, D-form)
let stw_plain = (36u32 << 26) | (6 << 21) | (4 << 16) | 0;
write_instr(&mut mem, 4, stw_plain);
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1000's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute plain stw — must call invalidate_for_write and clear the reservation.
step(&mut ctx, &mut mem);
assert_eq!(mem.read_u32(0x1000), 0xBBBB_BBBB, "plain stw must land");
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stw");
// Memory must still hold the value from the plain stw, not from stwcx..
assert_eq!(mem.read_u32(0x1000), 0xBBBB_BBBB, "stwcx. must not overwrite on failure");
}
/// Regression: without any intervening store, `lwarx` + `stwcx.` must still
/// succeed (CR0.EQ=1). Ensures the fix didn't accidentally break the happy path.
#[test]
fn lwarx_then_stwcx_succeeds_without_intervening_store() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
ctx.gpr[7] = 0xDEAD_BEEF;
// Instr 0: lwarx r3, r4, r5
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stwcx. r7, r4, r5
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 4, stwcx);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
step(&mut ctx, &mut mem);
assert!(ctx.cr[0].eq, "stwcx. must succeed when reservation is intact");
assert_eq!(mem.read_u32(0x1000), 0xDEAD_BEEF, "stwcx. must write on success");
}
// ---------- PPCBUG-130: invalidate_for_write via plain stb ----------
/// PPCBUG-130: A plain `stb` to a reserved line must invalidate the
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
#[test]
fn lwarx_then_plain_stb_invalidates_reservation() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// r4=0x1000 (target addr), r5=0 (index), r6=byte store val, r7=stwcx val.
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
ctx.gpr[6] = 0xAB;
ctx.gpr[7] = 0xCCCC_CCCC;
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stb r6, 0(r4) (opcode 38, D-form)
let stb_plain = (38u32 << 26) | (6 << 21) | (4 << 16) | 0;
write_instr(&mut mem, 4, stb_plain);
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1000's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute plain stb — must call invalidate_for_write and clear the reservation.
step(&mut ctx, &mut mem);
assert_eq!(mem.read_u8(0x1000), 0xAB, "plain stb must land");
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stb");
assert_eq!(mem.read_u8(0x1000), 0xAB, "stwcx. must not overwrite on failure");
}
// ---------- PPCBUG-150: invalidate_for_write via plain std ----------
/// PPCBUG-150: A plain `std` to a reserved line must invalidate the
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
#[test]
fn lwarx_then_plain_std_invalidates_reservation() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// r4=0x1000 (target addr), r5=0 (index), r6=doubleword store val, r7=stwcx val.
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
ctx.gpr[6] = 0xDEADBEEF_CAFEBABEu64;
ctx.gpr[7] = 0xCCCC_CCCC;
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: std r6, 0(r4) (opcode 62, DS-form, XO=0b00)
let std_plain = (62u32 << 26) | (6 << 21) | (4 << 16) | 0;
write_instr(&mut mem, 4, std_plain);
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1000's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute plain std — must call invalidate_for_write and clear the reservation.
step(&mut ctx, &mut mem);
assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "plain std must land");
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain std");
assert_eq!(mem.read_u64(0x1000), 0xDEADBEEF_CAFEBABEu64, "stwcx. must not overwrite on failure");
}
// ---------- PPCBUG-160: stmw multi-line invalidation ----------
/// PPCBUG-160: `stmw r28, 0x78(r4)` with r4=0x1000 writes four words at
/// 0x1078, 0x107C, 0x1080, 0x1084 — crossing the cache-line boundary at
/// 0x1080 (RESERVATION_MASK=0x7F, line size=128 bytes).
///
/// A reservation on the *second* line (0x1080) must be invalidated even
/// though the store starts in the first line (0x1000-0x107F). This
/// verifies the multi-line loop added to the stmw arm.
#[test]
fn lwarx_then_plain_stmw_spans_two_lines_and_invalidates() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// r4=0x1080 (reservation address — second line), r5=0 (index for lwarx/stwcx.)
// r8=0x1000 (stmw base), r28-r31 = store values
ctx.gpr[4] = 0x1080;
ctx.gpr[5] = 0;
ctx.gpr[8] = 0x1000;
ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value
ctx.gpr[28] = 0xAAAA_0001;
ctx.gpr[29] = 0xBBBB_0002;
ctx.gpr[30] = 0xCCCC_0003;
ctx.gpr[31] = 0xDDDD_0004;
// Instr 0: lwarx r3, r4, r5 — reserves line containing 0x1080
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stmw r28, 0x78(r8) — writes 4 words at 0x1078,0x107C,0x1080,0x1084
// opcode=47, rs=28, ra=8, d=0x0078
let stmw = (47u32 << 26) | (28 << 21) | (8 << 16) | 0x0078;
write_instr(&mut mem, 4, stmw);
// Instr 2: stwcx. r7, r4, r5 — should fail (line 2 reservation invalidated)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1080's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute stmw — must invalidate both lines including the one reserved at 0x1080.
step(&mut ctx, &mut mem);
assert_eq!(mem.read_u32(0x1078), 0xAAAA_0001, "stmw word 0 must land");
assert_eq!(mem.read_u32(0x107C), 0xBBBB_0002, "stmw word 1 must land");
assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stmw word 2 (line 2) must land");
assert_eq!(mem.read_u32(0x1084), 0xDDDD_0004, "stmw word 3 must land");
// Execute stwcx. — reservation on line 2 (0x1080) was invalidated; must fail.
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail: stmw invalidated the reservation on line 2");
assert_eq!(mem.read_u32(0x1080), 0xCCCC_0003, "stwcx. must not overwrite on failure");
}
// ---------- PPCBUG-167: invalidate_for_write via plain stfd ----------
/// PPCBUG-167: A plain `stfd` to a reserved line must invalidate the
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
/// Also verifies big-endian byte layout of the stored double.
#[test]
fn lwarx_then_plain_stfd_invalidates_reservation() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// r4=0x1000 (target addr), r5=0 (index), r7=stwcx val.
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
ctx.gpr[7] = 0xCCCC_CCCC;
// FPR 5 holds a specific bit pattern.
ctx.fpr[5] = f64::from_bits(0xCAFEBABE_DEADBEEFu64);
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stfd f5, 0(r4) (opcode 54, D-form)
let stfd_plain = (54u32 << 26) | (5 << 21) | (4 << 16) | 0;
write_instr(&mut mem, 4, stfd_plain);
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1000's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute plain stfd — must call invalidate_for_write and clear the reservation.
step(&mut ctx, &mut mem);
// write_f64 delegates to write_u64, which writes big-endian; verify layout.
assert_eq!(
mem.read_u64(0x1000),
0xCAFEBABE_DEADBEEFu64,
"stfd must store FPR bit pattern in big-endian order"
);
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by plain stfd");
assert_eq!(mem.read_u64(0x1000), 0xCAFEBABE_DEADBEEFu64, "stwcx. must not overwrite on failure");
}
// ---------- Phase 2m: SPR DEC + TBL/TBU write ----------
#[test]
fn mfspr_dec_returns_dec_field() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.dec = 0x1234_5678;
// mfspr r3, DEC (22). SPR encoded with halves swapped: (22 & 0x1F)<<5 | (22>>5)&0x1F = 0x2C0 in bits 11..20.
// The decoder does the un-swap, so the raw SPR field stores the swapped form.
let spr_swapped = ((22u32 & 0x1F) << 5) | ((22u32 >> 5) & 0x1F);
let raw = (31u32 << 26) | (3 << 21) | (spr_swapped << 11) | (339 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.gpr[3], 0x1234_5678);
}
#[test]
fn mtspr_tbl_write_updates_low_half() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.timebase = 0xAAAA_BBBB_CCCC_DDDD;
ctx.gpr[3] = 0x1111_2222;
// mtspr TBL_WRITE (284), r3
let spr_swapped = ((284u32 & 0x1F) << 5) | ((284u32 >> 5) & 0x1F);
let raw = (31u32 << 26) | (3 << 21) | (spr_swapped << 11) | (467 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// step() post-increments timebase by 1, so the observable low half is value+1.
assert_eq!(ctx.timebase & 0xFFFF_FFFF, 0x1111_2222u64 + 1);
assert_eq!(ctx.timebase >> 32, 0xAAAA_BBBB);
}
// PPCBUG-053: bcx CTR zero-test must use 32-bit comparison. When prior
// 64-bit pollution (e.g. via negx → mtctr) leaves CTR upper 32 bits
// non-zero, the 64-bit `ctx.ctr != 0` would loop forever even when the
// 32-bit counter has decremented to zero.
#[test]
fn bcx_bdnz_uses_32bit_ctr_compare() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.ctr = 0x0000_0001_0000_0001;
// bdnz +8: BO=16 (decrement, branch if CTR!=0, ignore CR), BI=0, BD/4=2
let raw = (16u32 << 26) | (16 << 21) | (0 << 16) | (2 << 2);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// After decrement: low 32 = 0, high 32 = 1. 32-bit test says zero → no branch.
assert_eq!(ctx.ctr, 0x0000_0001_0000_0000);
assert_eq!(ctx.pc, 4);
}
#[test]
fn bclrx_uses_32bit_ctr_compare() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.ctr = 0x0000_0001_0000_0001;
ctx.lr = 0x100;
// bdnzlr: opcode 19, BO=16 (decrement, branch if CTR!=0), BI=0, XO=16
let raw = (19u32 << 26) | (16 << 21) | (0 << 16) | (16 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// 32-bit CTR=0 after decrement → don't branch to LR.
assert_eq!(ctx.ctr, 0x0000_0001_0000_0000);
assert_eq!(ctx.pc, 4);
}
// PPCBUG-054: mtspr CTR must truncate the source GPR to 32 bits, matching
// canary's `f.Truncate(ctr, INT32_TYPE)`. Prevents upstream 64-bit GPR
// pollution from poisoning the 32-bit CTR counter independently of the
// bcx zero-test fix.
#[test]
fn mtspr_ctr_truncates_to_32_bits() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 0xFFFF_FFFF_8000_0001;
// mtspr CTR (9), r3
let spr_swapped = ((9u32 & 0x1F) << 5) | ((9u32 >> 5) & 0x1F);
let raw = (31u32 << 26) | (3 << 21) | (spr_swapped << 11) | (467 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.ctr, 0x8000_0001);
}
// ───────────────────────────────────────────────────────────────────────
// P8 — test gap closure (PPCBUG-055/067/070/081-085/089)
// ───────────────────────────────────────────────────────────────────────
// PPCBUG-055: branch test gaps. Cover blr, bdnz forward+backward, bcl LK.
#[test]
fn blr_branches_to_lr_aligned() {
// bclr 20, 0 = blr — XO=16. lr lower 2 bits ignored.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.lr = 0x82001003;
ctx.pc = 0x100;
let raw = (19u32 << 26) | (20 << 21) | (0 << 16) | (16 << 1);
write_instr(&mut mem, 0x100, raw);
step(&mut ctx, &mut mem);
assert_eq!(ctx.pc, 0x82001000, "blr aligns LR target to 4 bytes");
}
#[test]
fn bctr_branches_to_ctr_aligned() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.ctr = 0x82002007;
ctx.pc = 0x200;
// bcctr 20, 0 XO=528
let raw = (19u32 << 26) | (20 << 21) | (0 << 16) | (528 << 1);
write_instr(&mut mem, 0x200, raw);
step(&mut ctx, &mut mem);
assert_eq!(ctx.pc, 0x82002004);
}
#[test]
fn bcl_lk_writes_lr_even_when_not_taken() {
// bcl with cond not satisfied still writes LR per ISA.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.cr[0] = crate::context::CrField { lt: false, gt: true, eq: false, so: false };
ctx.pc = 0x100;
// bc 12, 0, +8, LK=1 — branch if CR0.LT=1 (false here)
let raw = (16u32 << 26) | (12 << 21) | (0 << 16) | (2 << 2) | 1;
write_instr(&mut mem, 0x100, raw);
step(&mut ctx, &mut mem);
assert_eq!(ctx.pc, 0x104, "not taken — pc advances");
assert_eq!(ctx.lr, 0x104, "lk=1 writes LR even on not-taken");
}
// PPCBUG-070: CR logical test gaps.
#[test]
fn cror_combines_cr_bits() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// CR bit 4 (cr1.lt=true), bit 8 (cr2.lt=false), result to bit 0 (cr0.lt)
ctx.cr[1] = crate::context::CrField { lt: true, gt: false, eq: false, so: false };
ctx.cr[2] = crate::context::CrField { lt: false, gt: false, eq: false, so: false };
// cror crbD=0, crbA=4, crbB=8: (19<<26)|(0<<21)|(4<<16)|(8<<11)|(449<<1)
let raw = (19u32 << 26) | (0 << 21) | (4 << 16) | (8 << 11) | (449 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.cr[0].lt, "cror 0,4,8 → cr0.lt = cr1.lt | cr2.lt = true");
}
#[test]
fn crand_combines_cr_bits() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.cr[1] = crate::context::CrField { lt: true, gt: false, eq: false, so: false };
ctx.cr[2] = crate::context::CrField { lt: false, gt: false, eq: false, so: false };
// crand crbD=0, crbA=4, crbB=8: XO=257
let raw = (19u32 << 26) | (0 << 21) | (4 << 16) | (8 << 11) | (257 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].lt, "crand 0,4,8 → cr0.lt = cr1.lt & cr2.lt = false");
}
#[test]
fn crxor_self_self_clears_bit() {
// `crclr crbD` is encoded as `crxor crbD, crbD, crbD`.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.cr[0] = crate::context::CrField { lt: true, gt: false, eq: false, so: false };
// crxor 0, 0, 0: XO=193
let raw = (19u32 << 26) | (0 << 21) | (0 << 16) | (0 << 11) | (193 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].lt, "crxor self self → 0");
}
// PPCBUG-067: trap+sc test gaps.
#[test]
fn sc_returns_systemcall_and_advances_pc() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// sc 0
let raw = (17u32 << 26) | (1 << 1);
write_instr(&mut mem, 0x100, raw);
ctx.pc = 0x100;
let r = step(&mut ctx, &mut mem);
assert_eq!(r, StepResult::SystemCall);
assert_eq!(ctx.pc, 0x104, "sc leaves pc at NIA (return address)");
}
#[test]
fn tw_to_zero_never_traps() {
// TO=0 — every condition mask is 0, so no trap can fire.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.gpr[3] = 5;
ctx.gpr[4] = 5;
// tw 0, r3, r4 XO=4
let raw = (31u32 << 26) | (0 << 21) | (3 << 16) | (4 << 11) | (4 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
let r = step(&mut ctx, &mut mem);
assert_eq!(r, StepResult::Continue);
assert_eq!(ctx.pc, 4);
}
// PPCBUG-081-085: SPR/MSR/TB/FPSCR/VSCR move test gaps.
#[test]
fn mfcr_assembles_8_fields_into_u32() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// CR0=0b1010 (LT, EQ), CR7=0b0001 (SO), others zero.
ctx.cr[0] = crate::context::CrField { lt: true, gt: false, eq: true, so: false };
ctx.cr[7] = crate::context::CrField { lt: false, gt: false, eq: false, so: true };
// mfcr r3: XO=19
let raw = (31u32 << 26) | (3 << 21) | (19 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// CR0 nibble (high nibble) = 0b1010 = 0xA → byte 0xA0000000
// CR7 nibble (low nibble) = 0b0001 = 0x1 → byte 0x00000001
assert_eq!(ctx.gpr[3], 0xA000_0001);
}
#[test]
fn mtfsb1_sets_fpscr_bit() {
// mtfsb1 sets a single bit in FPSCR. crbD=0 (bit 0 from MSB) sets FX (1<<31 in our u32 view).
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpscr = 0;
// mtfsb1 0: XO=38
let raw = (63u32 << 26) | (0 << 21) | (38 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_ne!(ctx.fpscr & fpscr::FX, 0, "mtfsb1 0 sets FPSCR.FX");
}
#[test]
fn mtfsb0_clears_fpscr_bit() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpscr = fpscr::FX;
// mtfsb0 0: XO=70
let raw = (63u32 << 26) | (0 << 21) | (70 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.fpscr & fpscr::FX, 0, "mtfsb0 0 clears FPSCR.FX");
}
// PPCBUG-089: cache + sync test gaps. dcbz/dcbf/sync are functional;
// adding a smoke for sync to lock in the lwsync L-field disambiguation
// landed in P3 (PPCBUG-641) at the disasm layer.
#[test]
fn sync_advances_pc_no_state_change() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
let pre_xer = ctx.xer();
let pre_fpscr = ctx.fpscr;
// sync L=0: XO=598
let raw = (31u32 << 26) | (598 << 1);
write_instr(&mut mem, 0x100, raw);
ctx.pc = 0x100;
let r = step(&mut ctx, &mut mem);
assert_eq!(r, StepResult::Continue);
assert_eq!(ctx.pc, 0x104);
assert_eq!(ctx.xer(), pre_xer);
assert_eq!(ctx.fpscr, pre_fpscr);
}
// ───────────────────────────────────────────────────────────────────────
// P8 batch 2 — load/store test gaps
// (PPCBUG-091/100/109-111/118/127/129/132/146-147/153/163/171)
// ───────────────────────────────────────────────────────────────────────
// PPCBUG-091 lbz: smoke + zero-extension.
#[test]
fn lbz_zero_extends_byte() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u8(0x100, 0xFF);
ctx.gpr[3] = 0x100;
// lbz r4, 0(r3): opcode 34
let raw = (34u32 << 26) | (4 << 21) | (3 << 16) | 0;
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[4], 0xFF);
}
// PPCBUG-109/110 lwbrx: byte-reversed load.
#[test]
fn lwbrx_byte_swaps_word() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u32(0x100, 0xDEADBEEF); // big-endian
ctx.gpr[3] = 0;
ctx.gpr[4] = 0x100;
// lwbrx r5, r3, r4 XO=534
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (534 << 1);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[5], 0xEFBEADDE, "lwbrx loads as little-endian");
}
// PPCBUG-111 lwarx: smoke (just establishes the reservation).
#[test]
fn lwarx_loads_word_and_sets_reservation() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u32(0x100, 0x1234_5678);
ctx.gpr[3] = 0;
ctx.gpr[4] = 0x100;
// lwarx r5, r3, r4 XO=20
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (20 << 1);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[5], 0x1234_5678);
}
// PPCBUG-118 ld: doubleword load.
#[test]
fn ld_loads_doubleword_be() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u32(0x100, 0x1122_3344);
mem.write_u32(0x104, 0x5566_7788);
ctx.gpr[3] = 0x100;
// ld r4, 0(r3): opcode 58, DS=0, XO=0
let raw = (58u32 << 26) | (4 << 21) | (3 << 16);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[4], 0x1122_3344_5566_7788);
}
// PPCBUG-127 lmw + lswi.
#[test]
fn lmw_loads_consecutive_words() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u32(0x100, 0x1111_1111);
mem.write_u32(0x104, 0x2222_2222);
ctx.gpr[3] = 0x100;
// lmw r30, 0(r3): opcode 46
let raw = (46u32 << 26) | (30 << 21) | (3 << 16);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[30], 0x1111_1111);
assert_eq!(ctx.gpr[31], 0x2222_2222);
}
#[test]
fn lswi_loads_byte_packed_words() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u32(0x100, 0xAABB_CCDD);
ctx.gpr[3] = 0x100;
// lswi r5, r3, 4 (XO=597). NB=4 → 4 bytes → r5 = 0xAABBCCDD
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (597 << 1);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[5], 0xAABB_CCDD);
}
// PPCBUG-127 lswx (now unblocked by P6 XER TBC fix).
#[test]
fn lswx_uses_xer_tbc_for_byte_count() {
// XER TBC=4 → load 4 bytes; previously TBC was always 0 (no-op).
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u32(0x100, 0x1234_5678);
ctx.gpr[3] = 0x100;
ctx.gpr[4] = 0;
ctx.xer_tbc = 4;
// lswx r5, r4, r3 XO=533
let raw = (31u32 << 26) | (5 << 21) | (4 << 16) | (3 << 11) | (533 << 1);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[5], 0x1234_5678, "lswx with TBC=4 loads 4 bytes");
}
// PPCBUG-129 lfs: zero-extending FP load.
#[test]
fn lfs_loads_single_widened_to_double() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u32(0x100, 1.5_f32.to_bits());
ctx.gpr[3] = 0x100;
// lfs f4, 0(r3): opcode 48
let raw = (48u32 << 26) | (4 << 21) | (3 << 16);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.fpr[4], 1.5_f64);
}
// PPCBUG-132 stb/sth: smoke.
#[test]
fn stb_writes_byte() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[3] = 0x100;
ctx.gpr[4] = 0xAB;
// stb r4, 0(r3): opcode 38
let raw = (38u32 << 26) | (4 << 21) | (3 << 16);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(mem.read_u8(0x100), 0xAB);
}
#[test]
fn sth_writes_halfword_be() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[3] = 0x100;
ctx.gpr[4] = 0x1234;
// sth r4, 0(r3): opcode 44
let raw = (44u32 << 26) | (4 << 21) | (3 << 16);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(mem.read_u16(0x100), 0x1234);
}
// PPCBUG-146 stw, PPCBUG-147 stwcx.
#[test]
fn stw_writes_word_be() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[3] = 0x100;
ctx.gpr[4] = 0xDEAD_BEEF;
// stw r4, 0(r3): opcode 36
let raw = (36u32 << 26) | (4 << 21) | (3 << 16);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(mem.read_u32(0x100), 0xDEAD_BEEF);
}
// PPCBUG-153 std: doubleword store.
#[test]
fn std_writes_doubleword_be() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[3] = 0x100;
ctx.gpr[4] = 0x1122_3344_5566_7788;
// std r4, 0(r3): opcode 62
let raw = (62u32 << 26) | (4 << 21) | (3 << 16);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(mem.read_u32(0x100), 0x1122_3344);
assert_eq!(mem.read_u32(0x104), 0x5566_7788);
}
// PPCBUG-163 stmw + stswx.
#[test]
fn stmw_stores_consecutive_words() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[3] = 0x100;
ctx.gpr[30] = 0xAAAA_AAAA;
ctx.gpr[31] = 0xBBBB_BBBB;
// stmw r30, 0(r3): opcode 47
let raw = (47u32 << 26) | (30 << 21) | (3 << 16);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(mem.read_u32(0x100), 0xAAAA_AAAA);
assert_eq!(mem.read_u32(0x104), 0xBBBB_BBBB);
}
#[test]
fn stswx_uses_xer_tbc_for_byte_count() {
// PPCBUG-163: stswx is now functional after P6 XER TBC fix.
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[3] = 0x100;
ctx.gpr[4] = 0;
ctx.gpr[5] = 0xCAFE_BABE;
ctx.xer_tbc = 4;
// stswx r5, r4, r3 XO=661
let raw = (31u32 << 26) | (5 << 21) | (4 << 16) | (3 << 11) | (661 << 1);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(mem.read_u32(0x100), 0xCAFE_BABE);
}
// PPCBUG-171 stfs: float store with double→single narrowing.
#[test]
fn stfs_writes_single_be() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[3] = 0x100;
ctx.fpr[4] = 1.5_f64;
// stfs f4, 0(r3): opcode 52
let raw = (52u32 << 26) | (4 << 21) | (3 << 16);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(mem.read_u32(0x100), 1.5_f32.to_bits());
}
// ───────────────────────────────────────────────────────────────────────
// P8 batch 3 — FPU + VMX float test gaps
// (PPCBUG-187/208/228/438/439/440)
// ───────────────────────────────────────────────────────────────────────
// PPCBUG-187 single-precision FPU smokes.
#[test]
fn fadds_single_arithmetic() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 1.5;
ctx.fpr[2] = 2.5;
// fadds f3, f1, f2: opcode 59, XO=21
let raw = (59u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.fpr[3], 4.0);
}
#[test]
fn fmuls_single_multiply() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 2.0;
ctx.fpr[2] = 3.0;
// fmuls f3, f1, f2: XO=25, frC at bits 21-25 (so c is rb encoding slot)
// Standard A-form: (59<<26)|(rd<<21)|(ra<<16)|(0<<11)|(rc<<6)|(25<<1)
let raw = (59u32 << 26) | (3 << 21) | (1 << 16) | (2 << 6) | (25 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.fpr[3], 6.0);
}
// PPCBUG-208 double-precision FPU smokes.
#[test]
fn fmul_double_multiply() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 4.0;
ctx.fpr[2] = 0.25;
// fmul f3, f1, f2: opcode 63, XO=25
let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 6) | (25 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.fpr[3], 1.0);
}
#[test]
fn fdiv_zero_over_finite() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 0.0;
ctx.fpr[2] = 5.0;
// fdiv f3, f1, f2: XO=18
let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (18 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.fpr[3], 0.0);
assert_eq!(ctx.fpscr & fpscr::ZX, 0, "0/finite is not divide-by-zero");
}
#[test]
fn fneg_flips_sign_bit() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 1.0;
// fneg f3, f1: XO=40
let raw = (63u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | (40 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.fpr[3], -1.0);
}
#[test]
fn fabs_clears_sign_bit() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = -3.5;
// fabs f3, f1: XO=264
let raw = (63u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | (264 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.fpr[3], 3.5);
}
#[test]
fn fmr_copies_register() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[5] = 1.5_f64.copysign(-1.0);
// fmr f3, f5: XO=72
let raw = (63u32 << 26) | (3 << 21) | (0 << 16) | (5 << 11) | (72 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.fpr[3].to_bits(), ctx.fpr[5].to_bits());
}
// PPCBUG-228 fpu convert / fcmp smokes.
#[test]
fn fcmpu_lt_sets_cr_lt() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = 1.0;
ctx.fpr[2] = 2.0;
// fcmpu cr3, f1, f2: opcode 63, XO=0, BF=3
let raw = (63u32 << 26) | (3 << 23) | (1 << 16) | (2 << 11) | (0 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.cr[3].lt);
assert!(!ctx.cr[3].gt);
}
#[test]
fn fcfid_converts_int64_to_double() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.fpr[1] = f64::from_bits(123u64);
// fcfid f3, f1: XO=846
let raw = (63u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | (846 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert_eq!(ctx.fpr[3], 123.0);
}
// PPCBUG-438 VMX float compares. VC-form: XO at PPC 22-31 (host 9-0), bit 0.
#[test]
fn vcmpeqfp_sets_lanes_to_all_ones_on_eq() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([1.0, 2.0, 3.0, 4.0]);
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([1.0, 0.0, 3.0, 0.0]);
// vcmpeqfp v3, v1, v2: canary base 0x100000c6 → op6=4, XO=198 at bits 0-9.
let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 198;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[3].as_u32x4();
assert_eq!(r[0], 0xFFFF_FFFF); // 1.0 == 1.0 → all ones
assert_eq!(r[1], 0); // 2.0 != 0.0
assert_eq!(r[2], 0xFFFF_FFFF); // 3.0 == 3.0
assert_eq!(r[3], 0); // 4.0 != 0.0
}
// PPCBUG-439 VMX rounding. VX-form XO at bit 0.
#[test]
fn vrfip_rounds_toward_pos_inf() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([1.4, 1.5, -1.4, -1.5]);
// vrfip canary base 0x1000028a → XO=650.
let raw = (4u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | 650;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[3].as_f32x4();
assert_eq!(r[0], 2.0);
assert_eq!(r[1], 2.0);
assert_eq!(r[2], -1.0);
assert_eq!(r[3], -1.0);
}
#[test]
fn vrfim_rounds_toward_neg_inf() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([1.4, 1.5, -1.4, -1.5]);
// vrfim canary base 0x100002ca → XO=714.
let raw = (4u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | 714;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[3].as_f32x4();
assert_eq!(r[0], 1.0);
assert_eq!(r[1], 1.0);
assert_eq!(r[2], -2.0);
assert_eq!(r[3], -2.0);
}
#[test]
fn vrfiz_truncates_toward_zero() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([1.7, 2.5, -1.7, -2.5]);
// vrfiz canary base 0x1000024a → XO=586.
let raw = (4u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | 586;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[3].as_f32x4();
assert_eq!(r[0], 1.0);
assert_eq!(r[1], 2.0);
assert_eq!(r[2], -1.0);
assert_eq!(r[3], -2.0);
}
// PPCBUG-440 VMX convert.
#[test]
fn vctsxs_saturates_max_to_int_max() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([1e10, -1e10, 1.5, -1.5]);
// vctsxs canary base 0x100003ca → XO=970, UIMM=0.
let raw = (4u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | 970;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = crate::vmx::as_i32x4(ctx.vr[3]);
assert_eq!(r[0], i32::MAX, "1e10 saturates to INT_MAX");
assert_eq!(r[1], i32::MIN, "-1e10 saturates to INT_MIN");
assert_eq!(r[2], 1);
assert_eq!(r[3], -1);
}
// ───────────────────────────────────────────────────────────────────────
// P8 batch 4 — VMX integer + permute/pack + multiply-sum + load/store
// (PPCBUG-240/243/277-279/316-325/370-378/490-494/517-519)
// ───────────────────────────────────────────────────────────────────────
// PPCBUG-240 VMX integer add/sub.
#[test]
fn vaddubm_lane_wise_byte_add() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_bytes([0x10; 16]);
ctx.vr[2] = xenia_types::Vec128::from_bytes([0x20; 16]);
// vaddubm canary base 0x10000000 → XO=0
let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[3].as_bytes();
assert_eq!(r[0], 0x30);
assert_eq!(r[15], 0x30);
}
#[test]
fn vsubuwm_lane_wise_word_sub() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_u32x4(100, 200, 300, 400);
ctx.vr[2] = xenia_types::Vec128::from_u32x4(40, 30, 20, 10);
// vsubuwm canary base 0x10000480 → XO=1152
let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 1152;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[3].as_u32x4();
assert_eq!(r[0], 60);
assert_eq!(r[1], 170);
assert_eq!(r[2], 280);
assert_eq!(r[3], 390);
}
// PPCBUG-277 VMX integer compare.
#[test]
fn vcmpequb_lane_wise_byte_compare() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_bytes([0xAA; 16]);
ctx.vr[2] = xenia_types::Vec128::from_bytes([0xAA; 16]);
// vcmpequb canary base 0x10000006 → XO=6
let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 6;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[3].as_bytes();
assert_eq!(r[0], 0xFF);
assert_eq!(r[15], 0xFF);
}
// PPCBUG-278 VMX min/max.
#[test]
fn vmaxsw_lane_wise_signed_max() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = crate::vmx::from_i32x4([10, -5, 100, -1000]);
ctx.vr[2] = crate::vmx::from_i32x4([20, 5, -100, 1000]);
// vmaxsw canary base 0x10000182 → XO=386
let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 386;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = crate::vmx::as_i32x4(ctx.vr[3]);
assert_eq!(r[0], 20);
assert_eq!(r[1], 5);
assert_eq!(r[2], 100);
assert_eq!(r[3], 1000);
}
// PPCBUG-316 VMX shift/rotate.
#[test]
fn vsl_left_shift_via_low3_bits_of_lane15() {
// vsl shifts the 128-bit value left by (vB[15] & 7) bits.
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_u32x4(0x1234_5678, 0, 0, 0);
let mut sh = [0u8; 16]; sh[15] = 4;
ctx.vr[2] = xenia_types::Vec128::from_bytes(sh);
// vsl canary base 0x100001c4 → XO=452
let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 452;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[3].as_u32x4();
assert_eq!(r[0], 0x2345_6780, "shift left by 4 bits");
}
#[test]
fn vsraw_arithmetic_right_shift_per_lane() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = crate::vmx::from_i32x4([-16, 16, -1, 0x4000_0000]);
ctx.vr[2] = xenia_types::Vec128::from_u32x4(2, 2, 2, 2);
// vsraw canary base 0x10000384 → XO=900
let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 900;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = crate::vmx::as_i32x4(ctx.vr[3]);
assert_eq!(r[0], -4);
assert_eq!(r[1], 4);
assert_eq!(r[2], -1); // arith shift preserves sign
assert_eq!(r[3], 0x1000_0000);
}
// PPCBUG-321 VMX logical.
#[test]
fn vand_lane_wise_and() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_u32x4(0xFFFF_FFFF, 0xAAAA_AAAA, 0x5555_5555, 0);
ctx.vr[2] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0x5555_5555, 0xFFFF_FFFF, 0xFFFF_FFFF);
// vand canary base 0x10000404 → XO=1028
let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 1028;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[3].as_u32x4();
assert_eq!(r[0], 0xAAAA_AAAA);
assert_eq!(r[1], 0);
assert_eq!(r[2], 0x5555_5555);
assert_eq!(r[3], 0);
}
// PPCBUG-370 VMX permute/pack.
#[test]
fn vsldoi_byte_concat_shift() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_bytes(
[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10]);
ctx.vr[2] = xenia_types::Vec128::from_bytes(
[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11,
0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99]);
// vsldoi v3, v1, v2, 4 — opcode 4, XO=44, SH at bits 11-15? Actually SH in shb (bits 22-25).
// Canary base 0x1000002c, SHB at bits 22-25.
let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (4 << 6) | 44;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[3].as_bytes();
// shift by 4: result = vA[4..16] || vB[0..4]
assert_eq!(r[0], 0x05);
assert_eq!(r[11], 0x10);
assert_eq!(r[12], 0xAA);
assert_eq!(r[15], 0xDD);
}
// PPCBUG-490 VMX multiply-add (vmaddfp; vmsum* covered indirectly by P5).
#[test]
fn vmaddfp_lane_fma() {
// Per-lane fused multiply-add: vD[i] = vA[i] * vC[i] + vB[i].
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([2.0, 3.0, 4.0, 5.0]);
ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([1.0, 1.0, 1.0, 1.0]);
ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([10.0, 20.0, 30.0, 40.0]);
// vmaddfp v4, v1, v2, v3: opcode 4, XO=46, with vC at bits 6-10 (rd) and vB at 11-15
// Per A-form: (4<<26)|(rd<<21)|(ra<<16)|(rb<<11)|(rc<<6)|46
let raw = (4u32 << 26) | (4 << 21) | (1 << 16) | (3 << 11) | (2 << 6) | 46;
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
let r = ctx.vr[4].as_f32x4();
assert_eq!(r[0], 12.0); // 2*1 + 10
assert_eq!(r[1], 23.0);
assert_eq!(r[2], 34.0);
assert_eq!(r[3], 45.0);
}
// PPCBUG-517 VMX load/store.
#[test]
fn lvx_loads_aligned_quadword() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
// Write 16 distinct bytes
for i in 0..16 { mem.write_u8(0x100 + i, (0xA0 + i) as u8); }
ctx.gpr[3] = 0;
ctx.gpr[4] = 0x100;
// lvx v5, r3, r4: opcode 31, XO=103
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (103 << 1);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[5].as_bytes();
assert_eq!(r[0], 0xA0);
assert_eq!(r[15], 0xAF);
}
#[test]
fn stvx_stores_aligned_quadword() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
let mut data = [0u8; 16];
for i in 0..16 { data[i] = (0xC0 + i) as u8; }
ctx.vr[5] = xenia_types::Vec128::from_bytes(data);
ctx.gpr[3] = 0;
ctx.gpr[4] = 0x100;
// stvx v5, r3, r4: opcode 31, XO=231
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (231 << 1);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
for i in 0..16 {
assert_eq!(mem.read_u8(0x100 + i), (0xC0 + i) as u8);
}
}
#[test]
fn lvebx_byte_lane_load() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
mem.write_u8(0x107, 0x42);
ctx.gpr[3] = 0;
ctx.gpr[4] = 0x107; // EA, byte at offset 7 in the quadword
// lvebx v5, r3, r4: opcode 31, XO=7
let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (7 << 1);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[5].as_bytes();
assert_eq!(r[7], 0x42, "byte loaded at lane (EA & 0xF)");
}
// ---------- Block-cache parity tests ----------
//
// These confirm that running a program through the basic-block
// cache (crate::block_cache::BlockCache + step_block) produces a
// bit-identical PpcContext to running it through step_cached
// (per-instruction). If this ever fails the block cache is not
// safe to engage in production.
fn enc_addi_t(rd: u32, ra: u32, simm: i16) -> u32 {
(14 << 26) | (rd << 21) | (ra << 16) | (simm as u16 as u32)
}
fn enc_lwz_t(rd: u32, ra: u32, d: i16) -> u32 {
(32 << 26) | (rd << 21) | (ra << 16) | (d as u16 as u32)
}
fn enc_stw_t(rs: u32, ra: u32, d: i16) -> u32 {
(36 << 26) | (rs << 21) | (ra << 16) | (d as u16 as u32)
}
fn enc_b_t(li_words: i32) -> u32 {
// Branch: opcode 18, AA=0, LK=0, LI = li_words << 2 (signed).
let li = (li_words as u32) & 0x00FF_FFFF;
(18u32 << 26) | (li << 2)
}
/// Snapshot of the parts of `PpcContext` that block_matches_per_instr
/// is asked to keep identical between dispatch paths. Comparing the
/// whole struct is impractical (vector regs, fp regs, large arrays);
/// the GPR file + pc + lr + cr + cycle counters cover everything the
/// interpreter touches in the test programs below.
#[derive(Debug, PartialEq, Eq)]
struct CtxSnap {
gpr: [u64; 32],
pc: u32,
lr: u64,
ctr: u64,
cycle_count: u64,
cr_packed: u32,
}
impl CtxSnap {
fn from(ctx: &PpcContext) -> Self {
Self {
gpr: ctx.gpr,
pc: ctx.pc,
lr: ctx.lr,
ctr: ctx.ctr,
cycle_count: ctx.cycle_count,
cr_packed: ctx.cr(),
}
}
}
fn run_per_instruction(prog: &[u32], iters: u32, init_gpr: &[(usize, u64)]) -> CtxSnap {
let mut ctx = PpcContext::new();
for &(i, v) in init_gpr {
ctx.gpr[i] = v;
}
let mut mem = TestMem::new();
for (i, &raw) in prog.iter().enumerate() {
write_instr(&mut mem, (i as u32) * 4, raw);
}
let mut cache = crate::decoder::DecodeCache::new();
ctx.pc = 0;
for _ in 0..iters {
// Run one instruction at a time. Memory has constant
// page_version (default trait impl returns 1) so the cache
// entries stay valid forever.
let r = step_cached(&mut ctx, &mut mem, &mut cache, 1);
assert!(matches!(r, StepResult::Continue));
}
CtxSnap::from(&ctx)
}
fn run_block(prog: &[u32], iters: u32, init_gpr: &[(usize, u64)]) -> CtxSnap {
let mut ctx = PpcContext::new();
for &(i, v) in init_gpr {
ctx.gpr[i] = v;
}
let mut mem = TestMem::new();
for (i, &raw) in prog.iter().enumerate() {
write_instr(&mut mem, (i as u32) * 4, raw);
}
let mut bc = crate::block_cache::BlockCache::new();
ctx.pc = 0;
let mut total_steps = 0u32;
// Iterate by *blocks* until we've covered at least `iters`
// instructions. The block path runs N instructions per call
// where N is the block length; we still want to compare on a
// per-instruction footing, so accumulate cycle_count.
while total_steps < iters {
// Borrow bc only long enough to copy the slice we need —
// step_block needs &mut MemoryAccess so we can't hold a
// shared borrow on bc across the call.
let block_ptr: *const crate::block_cache::DecodedBlock = {
let b: &crate::block_cache::DecodedBlock = bc.lookup_or_build(ctx.pc, &mem);
b
};
// Safety: the BlockCache::lookup_or_build contract is that
// the returned reference stays valid until the next
// lookup_or_build on the same cache. We don't call
// lookup_or_build inside step_block and we drop the raw
// pointer at the end of the iteration, so no aliasing.
let block: &crate::block_cache::DecodedBlock = unsafe { &*block_ptr };
let n_before = ctx.cycle_count;
let r = step_block(&mut ctx, &mut mem, block);
assert!(matches!(r, StepResult::Continue));
let stepped = (ctx.cycle_count - n_before) as u32;
total_steps += stepped;
}
CtxSnap::from(&ctx)
}
#[test]
fn block_dispatch_matches_per_instruction_alu_loop() {
// 4-instruction loop: r3 += 1, r3 += 2, r3 += 3, b -12 (back to start).
let prog = [
enc_addi_t(3, 3, 1),
enc_addi_t(3, 3, 2),
enc_addi_t(3, 3, 3),
enc_b_t(-3), // -3 words → back to instr 0
];
let init = [(3usize, 0u64)];
let snap_a = run_per_instruction(&prog, 100, &init);
let snap_b = run_block(&prog, 100, &init);
assert_eq!(snap_a, snap_b);
}
#[test]
fn block_dispatch_matches_per_instruction_loadstore_loop() {
// r4 = 0x800 (data pointer), r3 = 1
// loop:
// stw r3, 0(r4)
// lwz r5, 0(r4)
// addi r3, r5, 1
// b -12
let prog = [
enc_stw_t(3, 4, 0),
enc_lwz_t(5, 4, 0),
enc_addi_t(3, 5, 1),
enc_b_t(-3),
];
let init = [(3usize, 1u64), (4usize, 0x800u64)];
let snap_a = run_per_instruction(&prog, 200, &init);
let snap_b = run_block(&prog, 200, &init);
assert_eq!(snap_a, snap_b);
}
#[test]
fn mcrfs_moves_fpscr_nibble_and_clears_exception_bits() {
let mut ctx = PpcContext::new();
let mut mem = TestMem::new();
// Set FPSCR bit 0 (FX) = 1 and bit 3 (OX) = 1. In our layout:
// FX at (31-0) = 31
// OX at (31-3) = 28
ctx.fpscr = (1u32 << 31) | (1u32 << 28);
// mcrfs crfD=2, crfS=0: (63 << 26) | (crfD<<23) | (crfS<<18) | (64<<1)
let raw = (63 << 26) | (2 << 23) | (0 << 18) | (64 << 1);
write_instr(&mut mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mut mem);
// FPSCR bits 0..3 of crfS=0 → ctx.cr[2] should have FX(lt)=1 and OX(so)=0
// and the FEX/VX nibble positions are bits 1,2
// Nibble contents: FX=1, FEX=0, VX=0, OX=1 → 0b1001 = 9
assert_eq!(ctx.cr[2].as_u8(), 0b1001);
// FX and OX are clearable → FPSCR now has those nibble bits cleared
assert_eq!(ctx.fpscr & (1 << 31), 0, "FX cleared");
assert_eq!(ctx.fpscr & (1 << 28), 0, "OX cleared");
}
/// Regression: `subfze` is `RT ← !RA + CA` (no -1 term), so 64-bit
/// carry-out only happens when `RA == 0 && CA == 1`. The previous
/// predicate (`!ra != 0 || ca != 0`) was copy-pasted from `subfme`
/// and reported CA=1 in nearly every case.
#[test]
fn test_subfze_carry_only_when_ra_zero_and_ca_one() {
// subfze rD, rA: opcode 31, XO=200 (bits 22-30), OE=0, Rc=0.
// Encoding: (31<<26) | (rd<<21) | (ra<<16) | (200<<1)
let raw = (31u32 << 26) | (3 << 21) | (4 << 16) | (200 << 1);
// Case 1: ra=0, ca=1 → CA=1 (the only carry case)
{
let mut ctx = PpcContext::new();
let mem = TestMem::new();
write_instr(&mem, 0, raw);
ctx.pc = 0;
ctx.gpr[4] = 0;
ctx.xer_ca = 1;
step(&mut ctx, &mem);
assert_eq!(ctx.xer_ca, 1, "ra=0, ca=1 should produce CA=1");
assert_eq!(ctx.gpr[3], 0, "result = !0 + 1 = 0 (wraps)");
}
// Case 2: ra=0, ca=0 → CA=0 (old buggy code reported CA=1)
{
let mut ctx = PpcContext::new();
let mem = TestMem::new();
write_instr(&mem, 0, raw);
ctx.pc = 0;
ctx.gpr[4] = 0;
ctx.xer_ca = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.xer_ca, 0, "ra=0, ca=0 should produce CA=0");
// PPCBUG-018: 32-bit ABI. !0u32 + 0 = u32::MAX, with upper 32 bits zero.
assert_eq!(ctx.gpr[3], 0xFFFF_FFFFu64, "result = !0u32 + 0 = u32::MAX");
}
// Case 3: ra=1, ca=0 → CA=0 (old buggy code reported CA=1)
{
let mut ctx = PpcContext::new();
let mem = TestMem::new();
write_instr(&mem, 0, raw);
ctx.pc = 0;
ctx.gpr[4] = 1;
ctx.xer_ca = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.xer_ca, 0, "ra=1, ca=0 should produce CA=0");
// PPCBUG-018: 32-bit ABI. !1u32 + 0 = u32::MAX - 1, with upper 32 bits zero.
assert_eq!(ctx.gpr[3], 0xFFFF_FFFEu64, "result = !1u32 + 0 = u32::MAX - 1");
}
// Case 4: ra=u32::MAX, ca=1 → CA=0; result = !u32::MAX + 1 = 1.
{
let mut ctx = PpcContext::new();
let mem = TestMem::new();
write_instr(&mem, 0, raw);
ctx.pc = 0;
ctx.gpr[4] = 0xFFFF_FFFFu64;
ctx.xer_ca = 1;
step(&mut ctx, &mem);
assert_eq!(ctx.xer_ca, 0, "ra=u32::MAX, ca=1 should produce CA=0");
assert_eq!(ctx.gpr[3], 1, "result = !u32::MAX + 1 = 1");
}
}
/// Regression: `cmp` (L=1) must not derive LT/GT from the sign of a
/// (potentially overflowing) 64-bit subtract. The old code used
/// `update_cr_signed(bf, ra.wrapping_sub(rb))` which mis-signed the
/// result for boundary i64 values like `ra=i64::MIN, rb=1`.
#[test]
fn test_cmp_signed_at_i64_boundaries() {
// cmp BF=0, L=1, RA, RB: (31<<26) | (1<<21) | (ra<<16) | (rb<<11)
// (XO=0; Rc field is reserved on cmp, leave 0)
let raw = |ra: u32, rb: u32| (31u32 << 26) | (1 << 21) | (ra << 16) | (rb << 11);
// i64::MIN < 1 → LT must be set
{
let mut ctx = PpcContext::new();
let mem = TestMem::new();
write_instr(&mem, 0, raw(3, 4));
ctx.pc = 0;
ctx.gpr[3] = i64::MIN as u64;
ctx.gpr[4] = 1;
step(&mut ctx, &mem);
assert!(ctx.cr[0].lt, "i64::MIN < 1 must be LT");
assert!(!ctx.cr[0].gt);
assert!(!ctx.cr[0].eq);
}
// i64::MAX > -1 → GT must be set (the symmetric overflow corner)
{
let mut ctx = PpcContext::new();
let mem = TestMem::new();
write_instr(&mem, 0, raw(3, 4));
ctx.pc = 0;
ctx.gpr[3] = i64::MAX as u64;
ctx.gpr[4] = (-1i64) as u64;
step(&mut ctx, &mem);
assert!(!ctx.cr[0].lt);
assert!(ctx.cr[0].gt, "i64::MAX > -1 must be GT");
assert!(!ctx.cr[0].eq);
}
// Equal at the extreme is still EQ
{
let mut ctx = PpcContext::new();
let mem = TestMem::new();
write_instr(&mem, 0, raw(3, 4));
ctx.pc = 0;
ctx.gpr[3] = i64::MIN as u64;
ctx.gpr[4] = i64::MIN as u64;
step(&mut ctx, &mem);
assert!(!ctx.cr[0].lt);
assert!(!ctx.cr[0].gt);
assert!(ctx.cr[0].eq, "i64::MIN == i64::MIN must be EQ");
}
}
// ---------- PPCBUG-511/513: invalidate_for_write via VMX stores ----------
/// PPCBUG-511: A plain `stvx` to a reserved line must invalidate the
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
#[test]
fn lwarx_then_plain_stvx_invalidates_reservation() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// r4=0x1000 (reservation + store address), r5=0 (index for lwarx/stwcx.), r7=stwcx val.
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
ctx.gpr[7] = 0xCCCC_CCCC;
// VR 0: recognizable pattern to confirm the store lands.
ctx.vr[0] = xenia_types::Vec128::from_bytes([0xAA; 16]);
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stvx v0, r0, r4 (opcode 31, XO 231; rA=0 → base=0, EA = 0 + r4 = 0x1000, aligned)
// (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=4<<11) | (231<<1)
let stvx = (31u32 << 26) | (0 << 21) | (0 << 16) | (4 << 11) | (231 << 1);
write_instr(&mut mem, 4, stvx);
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1000's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute stvx — must call invalidate_for_write and clear the reservation.
step(&mut ctx, &mut mem);
assert_eq!(mem.read_u8(0x1000), 0xAA, "stvx must write the VR bytes");
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvx");
assert_eq!(mem.read_u8(0x1000), 0xAA, "stwcx. must not overwrite on failure");
}
/// PPCBUG-513: A plain `stvlx` to a reserved line must invalidate the
/// reservation so that a subsequent `stwcx.` fails (CR0.EQ=0).
/// stvlx with EA=0x1003 writes bytes 0x1003-0x100F (13 bytes from VR0's high lanes).
#[test]
fn lwarx_then_plain_stvlx_invalidates_reservation() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// Reserve at 0x1000 (same cache line as the stvlx target 0x1003).
ctx.gpr[4] = 0x1000; // lwarx/stwcx. reservation address
ctx.gpr[5] = 0; // index register (0 for lwarx/stwcx.)
ctx.gpr[6] = 0x1003; // stvlx EA: rb=6, ra=0 → ea = 0 + 0x1003 = 0x1003
ctx.gpr[7] = 0xCCCC_CCCC; // stwcx. value
// VR 0: recognizable pattern.
ctx.vr[0] = xenia_types::Vec128::from_bytes([0xBB; 16]);
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20)
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stvlx v0, r0, r6 (opcode 31, XO 647; rA=0 → base=0, EA = r6 = 0x1003)
// store_vector_left writes shift=3 skipped bytes, then bytes 3..15 of VR0 → 0x1003..0x100F
// (31<<26) | (vs=0<<21) | (ra=0<<16) | (rb=6<<11) | (647<<1)
let stvlx = (31u32 << 26) | (0 << 21) | (0 << 16) | (6 << 11) | (647 << 1);
write_instr(&mut mem, 4, stvlx);
// Instr 2: stwcx. r7, r4, r5 (opcode 31, XO 150, Rc=1)
let stwcx = (31u32 << 26) | (7 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 8, stwcx);
// Execute lwarx — reserves 0x1000's cache line.
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
// Execute stvlx — must call invalidate_for_write and clear the reservation.
step(&mut ctx, &mut mem);
// store_vector_left(ea=0x1003): shift=3, n=13 → writes bytes 0x1003-0x100F = 0xBB.
assert_eq!(mem.read_u8(0x1003), 0xBB, "stvlx must write VR bytes starting at EA");
assert_eq!(mem.read_u8(0x100F), 0xBB, "stvlx must write up to (ea & !0xF)+15");
// Execute stwcx. — reservation was invalidated; must fail (CR0.EQ=0).
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail after reservation was invalidated by stvlx");
}
/// Regression: `lvebx` must preserve the prior contents of the
/// destination VR for lanes other than the loaded byte. Previously
// ---------- PPCBUG-151: cross-width reservation pairs must fail ----------
/// PPCBUG-151: `lwarx` (width=4) followed by `stdcx.` (requires width=8)
/// must fail with CR0.EQ=0. Memory must remain unchanged.
#[test]
fn lwarx_then_stdcx_cross_width_fails() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// r4=0x1000 (target addr), r5=0 (index), r6=value to (attempt to) store.
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
ctx.gpr[6] = 0xDEAD_BEEF_CAFE_BABEu64;
// Instr 0: lwarx r3, r4, r5 (opcode 31, XO 20, Rc=0)
let lwarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (20 << 1);
write_instr(&mut mem, 0, lwarx);
// Instr 1: stdcx. r6, r4, r5 (opcode 31, XO 214, Rc=1)
let stdcx = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (214 << 1) | 1;
write_instr(&mut mem, 4, stdcx);
// Execute lwarx — must set a word reservation (width=4).
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "lwarx must set has_reservation");
assert_eq!(ctx.reservation_width, 4, "lwarx must set reservation_width=4");
// Execute stdcx. — width mismatch (needs 8, got 4); must fail.
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stdcx. must fail when reservation was set by lwarx (cross-width)");
// Memory at 0x1000-0x1007 must be unchanged (still zero).
assert_eq!(mem.read_u64(0x1000), 0, "stdcx. must not write on cross-width failure");
// Width must be cleared on exit.
assert_eq!(ctx.reservation_width, 0, "stdcx. must clear reservation_width on exit");
}
/// PPCBUG-151: `ldarx` (width=8) followed by `stwcx.` (requires width=4)
/// must fail with CR0.EQ=0. Memory must remain unchanged.
#[test]
fn ldarx_then_stwcx_cross_width_fails() {
let table = std::sync::Arc::new(crate::ReservationTable::new());
table.enable();
let mut ctx = PpcContext::new();
ctx.reservation_table = Some(table.clone());
ctx.hw_id = 0;
let mut mem = TestMem::new();
// r4=0x1000 (target addr), r5=0 (index), r6=value to (attempt to) store.
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 0;
ctx.gpr[6] = 0xCCCC_CCCCu64;
// Instr 0: ldarx r3, r4, r5 (opcode 31, XO 84, Rc=0)
let ldarx = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (84 << 1);
write_instr(&mut mem, 0, ldarx);
// Instr 1: stwcx. r6, r4, r5 (opcode 31, XO 150, Rc=1)
let stwcx = (31u32 << 26) | (6 << 21) | (4 << 16) | (5 << 11) | (150 << 1) | 1;
write_instr(&mut mem, 4, stwcx);
// Execute ldarx — must set a doubleword reservation (width=8).
ctx.pc = 0;
step(&mut ctx, &mut mem);
assert!(ctx.has_reservation, "ldarx must set has_reservation");
assert_eq!(ctx.reservation_width, 8, "ldarx must set reservation_width=8");
// Execute stwcx. — width mismatch (needs 4, got 8); must fail.
step(&mut ctx, &mut mem);
assert!(!ctx.cr[0].eq, "stwcx. must fail when reservation was set by ldarx (cross-width)");
// Memory at 0x1000 must be unchanged (still zero).
assert_eq!(mem.read_u32(0x1000), 0, "stwcx. must not write on cross-width failure");
// Width must be cleared on exit.
assert_eq!(ctx.reservation_width, 0, "stwcx. must clear reservation_width on exit");
}
/// the handler started from a zeroed buffer.
#[test]
fn test_lvebx_preserves_other_lanes() {
let mut ctx = PpcContext::new();
let mem = TestMem::new();
// Pre-seed vr[3] with a recognizable pattern.
let pattern: [u8; 16] = [
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
];
ctx.vr[3] = xenia_types::Vec128::from_bytes(pattern);
// Place a byte at memory address 0x1004; EA & 0xF == 4 → slot 4.
mem.write_u8(0x1004, 0xAB);
// r4 = 0x1000, r5 = 4 → EA = 0x1004
ctx.gpr[4] = 0x1000;
ctx.gpr[5] = 4;
// lvebx vD=3, rA=4, rB=5: opcode 31, XO=7 → (31<<26)|(3<<21)|(4<<16)|(5<<11)|(7<<1)
let raw = (31u32 << 26) | (3 << 21) | (4 << 16) | (5 << 11) | (7 << 1);
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
// Expected: lane 4 holds 0xAB, every other lane unchanged.
let mut expected = pattern;
expected[4] = 0xAB;
assert_eq!(ctx.vr[3].as_bytes(), expected);
}
// ===== PPCBUG-046 / PPCBUG-561: rldicl / clrldi mb_md fix =====
/// Encode rldicl (MD-form, opcode=30, XO=0) in host bit notation.
/// rs: source register, ra: dest register, sh: shift amount (6-bit),
/// mb: mask-begin (6-bit), rc: record bit.
fn encode_rldicl(rs: u32, ra: u32, sh: u32, mb: u32, rc: u32) -> u32 {
(30 << 26)
| (rs << 21)
| (ra << 16)
| ((sh & 0x1F) << 11)
| ((mb & 0x1F) << 6)
| (((mb >> 5) & 1) << 5)
| (((sh >> 5) & 1) << 1)
| (rc & 1)
}
#[test]
fn clrldi_zero_extends_low_32_bits() {
// clrldi r3, r4, 32 = rldicl r3, r4, 0, 32, 0
// After PPCBUG-046 fix: mask must be 0x00000000_FFFFFFFF (mb=32 → mask from bit 32 to 63)
// If mb=32 was decoded as mb=0, the mask would be all-ones and the result would be 0xDEAD_BEEF_CAFE_BABE (no-op)
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[4] = 0xDEAD_BEEF_CAFE_BABE_u64;
let raw = encode_rldicl(4, 3, 0, 32, 0); // sh=0, mb=32
write_instr(&mem, 0x100, raw);
ctx.pc = 0x100;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[3], 0x0000_0000_CAFE_BABE, "clrldi must zero-extend low 32 bits");
}
#[test]
fn rldicl_mb32_leaves_low_32_clean() {
// Same as above but verify upper 32 are zeroed
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.gpr[5] = 0xFFFF_FFFF_1234_5678_u64;
let raw = encode_rldicl(5, 6, 0, 32, 0);
write_instr(&mem, 0x100, raw);
ctx.pc = 0x100;
step(&mut ctx, &mem);
assert_eq!(ctx.gpr[6], 0x0000_0000_1234_5678_u64);
}
// ===== PPCBUG-275/276/562: vc_rc_bit fix for VC-form vcmpequb =====
/// VC-form: opcode=4 (VMX), vD at 6-10, vA at 11-15, vB at 16-20, Rc at PPC bit 21 = host bit 10, XO=6.
/// vcmpequb.: (4<<26)|(vD<<21)|(vA<<16)|(vB<<11)|(1<<10)|6
fn encode_vcmpequb_dot(vd: u32, va: u32, vb: u32) -> u32 {
(4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | (1 << 10) | 6
}
/// vcmpequb (no dot form): same but Rc=0
fn encode_vcmpequb(vd: u32, va: u32, vb: u32) -> u32 {
(4 << 26) | (vd << 21) | (va << 16) | (vb << 11) | 6
}
#[test]
fn vcmpequb_dot_all_true_sets_cr6_lt() {
// All bytes equal → all lanes 0xFF → CR6.LT=1 (all-true), CR6.EQ=0
let mut ctx = PpcContext::new();
let mem = TestMem::new();
let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]);
ctx.vr[1] = v;
ctx.vr[2] = v;
write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2));
ctx.pc = 0x100;
step(&mut ctx, &mem);
assert!(ctx.cr[6].lt, "all-true: CR6.LT must be 1");
assert!(!ctx.cr[6].eq, "all-true: CR6.EQ must be 0");
}
#[test]
fn vcmpequb_no_dot_does_not_update_cr6() {
// Without dot form, CR6 must be unchanged
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.cr[6] = crate::context::CrField { lt: true, gt: false, eq: true, so: false };
let v = xenia_types::Vec128::from_bytes([0xAAu8; 16]);
ctx.vr[1] = v;
ctx.vr[2] = v;
write_instr(&mem, 0x100, encode_vcmpequb(0, 1, 2));
ctx.pc = 0x100;
step(&mut ctx, &mem);
// CR6 unchanged: no dot form
assert!(ctx.cr[6].lt && ctx.cr[6].eq, "CR6 must be unchanged without dot");
}
#[test]
fn vcmpequb_dot_all_false_sets_cr6_eq() {
// No bytes equal → all lanes 0x00 → CR6.LT=0, CR6.EQ=1 (all-false)
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_bytes([0xAAu8; 16]);
ctx.vr[2] = xenia_types::Vec128::from_bytes([0xBBu8; 16]);
write_instr(&mem, 0x100, encode_vcmpequb_dot(0, 1, 2));
ctx.pc = 0x100;
step(&mut ctx, &mem);
assert!(!ctx.cr[6].lt, "all-false: CR6.LT must be 0");
assert!(ctx.cr[6].eq, "all-false: CR6.EQ must be 1");
}
// ---- PPCBUG-363 + PPCBUG-369: vpkd3d128 post-pack permutation ----
//
// vpkd3d128 VD, VB, type, pack, shift: the low 2 bits of the IMM field
// select how the packed scalar/vector is merged back into the previous VD.
// pack=0 → identity (store out directly); pack=1 → 32-bit merge by shift;
// pack=2,3 → 64-bit merge by shift.
// Canary source: ppc_emit_altivec.cc:2126-2188.
//
// For vpkd3d128, PPC bits 21-22 are always 1 (key2 discriminant), so
// vd128 is always in range [96, 127] for vd_lo in [0, 31].
fn encode_vpkd3d128(vd: u32, vb_lo: u32, imm: u32, z: u32) -> u32 {
// op6=6, FormatVX128_4 layout (canary):
// VD low at PPC 6-10 (host 21-25); VD high (2 bits) at PPC 28-29 (host 2-3).
// IMM at PPC 11-15; VB low at PPC 16-20.
// z (2-bit) at PPC 24-25 (host 6-7).
// key2 = 0b1100001 over bits 21-23 + 26-27:
// bits 21-23 = 0b110 → bit 21=1, bit 22=1, bit 23=0
// bits 26-27 = 0b01 → bit 26=0, bit 27=1
let vd_lo = vd & 0x1F;
let vd_hi = (vd >> 5) & 0x3;
(6u32 << 26)
| (vd_lo << 21)
| (vd_hi << 2)
| (imm << 16)
| (vb_lo << 11)
| (1 << 10) // bit 21 (key2)
| (1 << 9) // bit 22 (key2)
| (z << 6) // z at PPC 24-25
| (1 << 4) // bit 27 (key2)
}
#[test]
fn vpkd3d128_pack0_legacy_unchanged() {
// pack=0 → identity: result = out (packed value), no blend with prev vd.
// type=0 (D3dColor), pack=0 → IMM=0; z=0 (don't care for pack=0).
// vd=96 (vd_lo=0 | bits21=1,22=1→+96).
let mut ctx = PpcContext::new();
let mem = TestMem::new();
// vb=1: R=1.0, G=0, B=0, A=0 → D3dColor packs to word (0<<24)|(255<<16)|(0<<8)|0 = 0x00FF0000
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0);
// prev vd=96: sentinel values that should NOT appear in result
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD);
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 0, 0));
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[96].as_u32x4();
// out = [0, 0, 0, 0x00FF_0000]; pack=0 → result = out
assert_eq!(r[0], 0x0000_0000, "pack=0: lane 0 must be out[0]=0");
assert_eq!(r[1], 0x0000_0000, "pack=0: lane 1 must be out[1]=0");
assert_eq!(r[2], 0x0000_0000, "pack=0: lane 2 must be out[2]=0");
assert_eq!(r[3], 0x00FF_0000, "pack=0: lane 3 must be packed D3dColor");
}
#[test]
fn vpkd3d128_pack1_shift0_d3d_vertex_pack() {
// pack=1, shift=0 (VPACK_32): out[3] placed at lane 3; prev[0..2] preserved.
// MakePermuteMask(0,0, 0,1, 0,2, 1,3) → [prev[0], prev[1], prev[2], out[3]]
// IMM = (type=0 D3dColor << 2) | pack=1 = 1; z=0.
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 0));
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[96].as_u32x4();
assert_eq!(r[0], 0x1111_1111, "pack=1 shift=0: lane 0 from prev");
assert_eq!(r[1], 0x2222_2222, "pack=1 shift=0: lane 1 from prev");
assert_eq!(r[2], 0x3333_3333, "pack=1 shift=0: lane 2 from prev");
assert_eq!(r[3], 0x00FF_0000, "pack=1 shift=0: lane 3 from out[3]");
}
#[test]
fn vpkd3d128_pack1_shift3_puts_out3_at_lane0() {
// pack=1, shift=3 (VPACK_32): out[3] placed at lane 0; prev[1..3] preserved.
// MakePermuteMask(1,3, 0,1, 0,2, 0,3) → [out[3], prev[1], prev[2], prev[3]]
// IMM = 1; z=3.
let mut ctx = PpcContext::new();
let mem = TestMem::new();
ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000
ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
write_instr(&mem, 0, encode_vpkd3d128(96, 1, 1, 3));
ctx.pc = 0;
step(&mut ctx, &mem);
let r = ctx.vr[96].as_u32x4();
assert_eq!(r[0], 0x00FF_0000, "pack=1 shift=3: lane 0 from out[3]");
assert_eq!(r[1], 0x2222_2222, "pack=1 shift=3: lane 1 from prev");
assert_eq!(r[2], 0x3333_3333, "pack=1 shift=3: lane 2 from prev");
assert_eq!(r[3], 0x4444_4444, "pack=1 shift=3: lane 3 from prev");
}
// ---- PPCBUG-510: stvewx128 should write one word (4 bytes), not 16 ----
fn encode_stvewx128(vs_lo: u32, ra: u32, rb: u32) -> u32 {
// stvewx128 is a VMX128 load/store at op6=4.
// decode_op4 key1 = (bits21-27 << 4) | bits30-31 = 0b00110000011 for stvewx128.
// bits21-27 = 0b0011000 (host bits 10-4), bits30-31 = 0b11 (host bits 1-0).
// VS128[4:0] at host bits 25-21; RA at host bits 20-16; RB at host bits 15-11.
// VS128[5] at host bit 3 (PPC bit 28); VS128[6] at host bit 1 (PPC bit 30).
(4u32 << 26)
| (vs_lo << 21) // VS128[4:0]
| (ra << 16) // RA
| (rb << 11) // RB
| (0b0011000 << 4) // bits 21-27 of key1 pattern
| 0b11 // bits 30-31 of key1 pattern
}
#[test]
fn stvewx128_writes_one_word_at_word_aligned_ea() {
// PPCBUG-510: old code wrote all 16 bytes at ea & !0xF, corrupting 12 adjacent bytes.
// Fix: word-align EA, extract lane from (ea & 0xF) >> 2, write 4 bytes only.
let mut ctx = PpcContext::new();
let mem = TestMem::new();
// VS128 = v96 (vs_lo=0 | key bits → vs128=0 since key bits 21-27 set bit4=1 and bit5=1
// in the key, but vs128 uses bits 6-10 for low 5 bits).
// Actually: vs128 uses decode bits 6-10 (host 25-21) and bits 21,22 (host 10,9).
// encode_stvewx128 sets vs_lo in bits 25-21 and key bits at bits 10-4.
// vs128 = bits6-10 | (bit21<<5) | (bit22<<6) = vs_lo | 0 | 0 = vs_lo.
// So vs128 = vs_lo. We'll use vs_lo=3 → vs128=3.
let raw = encode_stvewx128(3, 1, 2);
ctx.vr[3] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
ctx.gpr[1] = 0x1000; // base
ctx.gpr[2] = 0x008; // offset → EA = 0x1008 → word-aligned EA = 0x1008, slot = (0x8 & 0xF)>>2 = 2
write_instr(&mem, 0, raw);
ctx.pc = 0;
step(&mut ctx, &mem);
assert_eq!(ctx.pc, 4, "PC must advance");
// Slot 2 → lane 2 = 0x3333_3333
assert_eq!(mem.read_u32(0x1008), 0x3333_3333, "only lane 2 word at ea");
// Adjacent words must be untouched (mem is zero-init)
assert_eq!(mem.read_u32(0x1000), 0x0000_0000, "byte below must be untouched");
assert_eq!(mem.read_u32(0x100C), 0x0000_0000, "byte above must be untouched");
}
}