Merge branch 'ppc-audit-fix/p8-tests' — Phase 8 test gap closure

Phase 8 of the PPC instruction audit fix application: pure test gap closure for opcode groups that previously had near-zero unit test coverage. 53 new tests across 5 commits (4 batches + review-nit rename). - 9827b03: Batch 1 — branch/CR-logical/SPR/MSR/FPSCR/sync (12 tests) - 2d223ee: Batch 2 — load/store base + lswx/stswx with XER TBC (15 tests) - ebfd18a: Batch 3 — FPU + VMX float (14 tests) - 2614806: Batch 4 — VMX integer/permute/load-store (12 tests) - 1f9696a: review-fix nit — vmsum3fp_… → vmaddfp_lane_fma rename Independent reviewer verdict: LGTM, no blocking issues, no rubber- stamp tests, no encoding bugs (every hand-encoded raw cross-checked against canary's INSTRUCTION table). Two minor follow-ups: the test rename was applied immediately; the audit cross-reference in batch-4 body is loose (one representative test per group, not 1:1) — accepted. The XER-TBC tests (`lswx_uses_xer_tbc_for_byte_count`, `stswx_uses_xer_tbc_for_byte_count`) are load-bearing: they directly exercise the P6 XER TBC infrastructure, both opcodes were permanent no-ops pre-P6. Closed IDs (28): 055, 067, 070, 081, 082, 083, 084, 085, 089, 091, 100, 109, 110, 111, 118, 127, 129, 132, 146, 147, 153, 163, 171, 187, 208, 228, 240, 277, 316/320, 321/323, 370, 438, 439, 440, 490, 517. Remaining `Status: Open` test-gap LOW IDs are tracked in audit-findings.md; they don't block any functionality and can be closed in incremental future work. Verification at merge: cargo test --workspace --release reports 551 passed, 0 failed (up from 498 at P7 merge; 53 net new tests). Acid test deferred to end of all phases per user direction.
2026-05-02 14:23:04 +02:00
parent a7155f4571 1f9696ad47
commit 4029041618
1 changed files with 885 additions and 0 deletions
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -6545,6 +6545,891 @@ mod tests {
        assert_eq!(ctx.ctr, 0x8000_0001);
    }

+    // ───────────────────────────────────────────────────────────────────────
+    // P8 — test gap closure (PPCBUG-055/067/070/081-085/089)
+    // ───────────────────────────────────────────────────────────────────────
+
+    // PPCBUG-055: branch test gaps. Cover blr, bdnz forward+backward, bcl LK.
+
+    #[test]
+    fn blr_branches_to_lr_aligned() {
+        // bclr 20, 0 = blr — XO=16. lr lower 2 bits ignored.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.lr = 0x82001003;
+        ctx.pc = 0x100;
+        let raw = (19u32 << 26) | (20 << 21) | (0 << 16) | (16 << 1);
+        write_instr(&mut mem, 0x100, raw);
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.pc, 0x82001000, "blr aligns LR target to 4 bytes");
+    }
+
+    #[test]
+    fn bctr_branches_to_ctr_aligned() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.ctr = 0x82002007;
+        ctx.pc = 0x200;
+        // bcctr 20, 0  XO=528
+        let raw = (19u32 << 26) | (20 << 21) | (0 << 16) | (528 << 1);
+        write_instr(&mut mem, 0x200, raw);
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.pc, 0x82002004);
+    }
+
+    #[test]
+    fn bcl_lk_writes_lr_even_when_not_taken() {
+        // bcl with cond not satisfied still writes LR per ISA.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.cr[0] = crate::context::CrField { lt: false, gt: true, eq: false, so: false };
+        ctx.pc = 0x100;
+        // bc 12, 0, +8, LK=1 — branch if CR0.LT=1 (false here)
+        let raw = (16u32 << 26) | (12 << 21) | (0 << 16) | (2 << 2) | 1;
+        write_instr(&mut mem, 0x100, raw);
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.pc, 0x104, "not taken — pc advances");
+        assert_eq!(ctx.lr, 0x104, "lk=1 writes LR even on not-taken");
+    }
+
+    // PPCBUG-070: CR logical test gaps.
+
+    #[test]
+    fn cror_combines_cr_bits() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        // CR bit 4 (cr1.lt=true), bit 8 (cr2.lt=false), result to bit 0 (cr0.lt)
+        ctx.cr[1] = crate::context::CrField { lt: true, gt: false, eq: false, so: false };
+        ctx.cr[2] = crate::context::CrField { lt: false, gt: false, eq: false, so: false };
+        // cror crbD=0, crbA=4, crbB=8: (19<<26)|(0<<21)|(4<<16)|(8<<11)|(449<<1)
+        let raw = (19u32 << 26) | (0 << 21) | (4 << 16) | (8 << 11) | (449 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert!(ctx.cr[0].lt, "cror 0,4,8 → cr0.lt = cr1.lt | cr2.lt = true");
+    }
+
+    #[test]
+    fn crand_combines_cr_bits() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.cr[1] = crate::context::CrField { lt: true, gt: false, eq: false, so: false };
+        ctx.cr[2] = crate::context::CrField { lt: false, gt: false, eq: false, so: false };
+        // crand crbD=0, crbA=4, crbB=8: XO=257
+        let raw = (19u32 << 26) | (0 << 21) | (4 << 16) | (8 << 11) | (257 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert!(!ctx.cr[0].lt, "crand 0,4,8 → cr0.lt = cr1.lt & cr2.lt = false");
+    }
+
+    #[test]
+    fn crxor_self_self_clears_bit() {
+        // `crclr crbD` is encoded as `crxor crbD, crbD, crbD`.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.cr[0] = crate::context::CrField { lt: true, gt: false, eq: false, so: false };
+        // crxor 0, 0, 0: XO=193
+        let raw = (19u32 << 26) | (0 << 21) | (0 << 16) | (0 << 11) | (193 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert!(!ctx.cr[0].lt, "crxor self self → 0");
+    }
+
+    // PPCBUG-067: trap+sc test gaps.
+
+    #[test]
+    fn sc_returns_systemcall_and_advances_pc() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        // sc 0
+        let raw = (17u32 << 26) | (1 << 1);
+        write_instr(&mut mem, 0x100, raw);
+        ctx.pc = 0x100;
+        let r = step(&mut ctx, &mut mem);
+        assert_eq!(r, StepResult::SystemCall);
+        assert_eq!(ctx.pc, 0x104, "sc leaves pc at NIA (return address)");
+    }
+
+    #[test]
+    fn tw_to_zero_never_traps() {
+        // TO=0 — every condition mask is 0, so no trap can fire.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.gpr[3] = 5;
+        ctx.gpr[4] = 5;
+        // tw 0, r3, r4  XO=4
+        let raw = (31u32 << 26) | (0 << 21) | (3 << 16) | (4 << 11) | (4 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        let r = step(&mut ctx, &mut mem);
+        assert_eq!(r, StepResult::Continue);
+        assert_eq!(ctx.pc, 4);
+    }
+
+    // PPCBUG-081-085: SPR/MSR/TB/FPSCR/VSCR move test gaps.
+
+    #[test]
+    fn mfcr_assembles_8_fields_into_u32() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        // CR0=0b1010 (LT, EQ), CR7=0b0001 (SO), others zero.
+        ctx.cr[0] = crate::context::CrField { lt: true, gt: false, eq: true, so: false };
+        ctx.cr[7] = crate::context::CrField { lt: false, gt: false, eq: false, so: true };
+        // mfcr r3: XO=19
+        let raw = (31u32 << 26) | (3 << 21) | (19 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        // CR0 nibble (high nibble) = 0b1010 = 0xA → byte 0xA0000000
+        // CR7 nibble (low nibble)  = 0b0001 = 0x1 → byte 0x00000001
+        assert_eq!(ctx.gpr[3], 0xA000_0001);
+    }
+
+    #[test]
+    fn mtfsb1_sets_fpscr_bit() {
+        // mtfsb1 sets a single bit in FPSCR. crbD=0 (bit 0 from MSB) sets FX (1<<31 in our u32 view).
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpscr = 0;
+        // mtfsb1 0: XO=38
+        let raw = (63u32 << 26) | (0 << 21) | (38 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_ne!(ctx.fpscr & fpscr::FX, 0, "mtfsb1 0 sets FPSCR.FX");
+    }
+
+    #[test]
+    fn mtfsb0_clears_fpscr_bit() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpscr = fpscr::FX;
+        // mtfsb0 0: XO=70
+        let raw = (63u32 << 26) | (0 << 21) | (70 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.fpscr & fpscr::FX, 0, "mtfsb0 0 clears FPSCR.FX");
+    }
+
+    // PPCBUG-089: cache + sync test gaps. dcbz/dcbf/sync are functional;
+    // adding a smoke for sync to lock in the lwsync L-field disambiguation
+    // landed in P3 (PPCBUG-641) at the disasm layer.
+
+    #[test]
+    fn sync_advances_pc_no_state_change() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        let pre_xer = ctx.xer();
+        let pre_fpscr = ctx.fpscr;
+        // sync L=0: XO=598
+        let raw = (31u32 << 26) | (598 << 1);
+        write_instr(&mut mem, 0x100, raw);
+        ctx.pc = 0x100;
+        let r = step(&mut ctx, &mut mem);
+        assert_eq!(r, StepResult::Continue);
+        assert_eq!(ctx.pc, 0x104);
+        assert_eq!(ctx.xer(), pre_xer);
+        assert_eq!(ctx.fpscr, pre_fpscr);
+    }
+
+    // ───────────────────────────────────────────────────────────────────────
+    // P8 batch 2 — load/store test gaps
+    // (PPCBUG-091/100/109-111/118/127/129/132/146-147/153/163/171)
+    // ───────────────────────────────────────────────────────────────────────
+
+    // PPCBUG-091 lbz: smoke + zero-extension.
+    #[test]
+    fn lbz_zero_extends_byte() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u8(0x100, 0xFF);
+        ctx.gpr[3] = 0x100;
+        // lbz r4, 0(r3): opcode 34
+        let raw = (34u32 << 26) | (4 << 21) | (3 << 16) | 0;
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.gpr[4], 0xFF);
+    }
+
+    // PPCBUG-109/110 lwbrx: byte-reversed load.
+    #[test]
+    fn lwbrx_byte_swaps_word() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u32(0x100, 0xDEADBEEF);  // big-endian
+        ctx.gpr[3] = 0;
+        ctx.gpr[4] = 0x100;
+        // lwbrx r5, r3, r4  XO=534
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (534 << 1);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.gpr[5], 0xEFBEADDE, "lwbrx loads as little-endian");
+    }
+
+    // PPCBUG-111 lwarx: smoke (just establishes the reservation).
+    #[test]
+    fn lwarx_loads_word_and_sets_reservation() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u32(0x100, 0x1234_5678);
+        ctx.gpr[3] = 0;
+        ctx.gpr[4] = 0x100;
+        // lwarx r5, r3, r4  XO=20
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (20 << 1);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.gpr[5], 0x1234_5678);
+    }
+
+    // PPCBUG-118 ld: doubleword load.
+    #[test]
+    fn ld_loads_doubleword_be() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u32(0x100, 0x1122_3344);
+        mem.write_u32(0x104, 0x5566_7788);
+        ctx.gpr[3] = 0x100;
+        // ld r4, 0(r3): opcode 58, DS=0, XO=0
+        let raw = (58u32 << 26) | (4 << 21) | (3 << 16);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.gpr[4], 0x1122_3344_5566_7788);
+    }
+
+    // PPCBUG-127 lmw + lswi.
+    #[test]
+    fn lmw_loads_consecutive_words() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u32(0x100, 0x1111_1111);
+        mem.write_u32(0x104, 0x2222_2222);
+        ctx.gpr[3] = 0x100;
+        // lmw r30, 0(r3): opcode 46
+        let raw = (46u32 << 26) | (30 << 21) | (3 << 16);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.gpr[30], 0x1111_1111);
+        assert_eq!(ctx.gpr[31], 0x2222_2222);
+    }
+
+    #[test]
+    fn lswi_loads_byte_packed_words() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u32(0x100, 0xAABB_CCDD);
+        ctx.gpr[3] = 0x100;
+        // lswi r5, r3, 4  (XO=597). NB=4 → 4 bytes → r5 = 0xAABBCCDD
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (597 << 1);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.gpr[5], 0xAABB_CCDD);
+    }
+
+    // PPCBUG-127 lswx (now unblocked by P6 XER TBC fix).
+    #[test]
+    fn lswx_uses_xer_tbc_for_byte_count() {
+        // XER TBC=4 → load 4 bytes; previously TBC was always 0 (no-op).
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u32(0x100, 0x1234_5678);
+        ctx.gpr[3] = 0x100;
+        ctx.gpr[4] = 0;
+        ctx.xer_tbc = 4;
+        // lswx r5, r4, r3  XO=533
+        let raw = (31u32 << 26) | (5 << 21) | (4 << 16) | (3 << 11) | (533 << 1);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.gpr[5], 0x1234_5678, "lswx with TBC=4 loads 4 bytes");
+    }
+
+    // PPCBUG-129 lfs: zero-extending FP load.
+    #[test]
+    fn lfs_loads_single_widened_to_double() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u32(0x100, 1.5_f32.to_bits());
+        ctx.gpr[3] = 0x100;
+        // lfs f4, 0(r3): opcode 48
+        let raw = (48u32 << 26) | (4 << 21) | (3 << 16);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.fpr[4], 1.5_f64);
+    }
+
+    // PPCBUG-132 stb/sth: smoke.
+    #[test]
+    fn stb_writes_byte() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        ctx.gpr[3] = 0x100;
+        ctx.gpr[4] = 0xAB;
+        // stb r4, 0(r3): opcode 38
+        let raw = (38u32 << 26) | (4 << 21) | (3 << 16);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(mem.read_u8(0x100), 0xAB);
+    }
+
+    #[test]
+    fn sth_writes_halfword_be() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        ctx.gpr[3] = 0x100;
+        ctx.gpr[4] = 0x1234;
+        // sth r4, 0(r3): opcode 44
+        let raw = (44u32 << 26) | (4 << 21) | (3 << 16);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(mem.read_u16(0x100), 0x1234);
+    }
+
+    // PPCBUG-146 stw, PPCBUG-147 stwcx.
+    #[test]
+    fn stw_writes_word_be() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        ctx.gpr[3] = 0x100;
+        ctx.gpr[4] = 0xDEAD_BEEF;
+        // stw r4, 0(r3): opcode 36
+        let raw = (36u32 << 26) | (4 << 21) | (3 << 16);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(mem.read_u32(0x100), 0xDEAD_BEEF);
+    }
+
+    // PPCBUG-153 std: doubleword store.
+    #[test]
+    fn std_writes_doubleword_be() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        ctx.gpr[3] = 0x100;
+        ctx.gpr[4] = 0x1122_3344_5566_7788;
+        // std r4, 0(r3): opcode 62
+        let raw = (62u32 << 26) | (4 << 21) | (3 << 16);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(mem.read_u32(0x100), 0x1122_3344);
+        assert_eq!(mem.read_u32(0x104), 0x5566_7788);
+    }
+
+    // PPCBUG-163 stmw + stswx.
+    #[test]
+    fn stmw_stores_consecutive_words() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        ctx.gpr[3] = 0x100;
+        ctx.gpr[30] = 0xAAAA_AAAA;
+        ctx.gpr[31] = 0xBBBB_BBBB;
+        // stmw r30, 0(r3): opcode 47
+        let raw = (47u32 << 26) | (30 << 21) | (3 << 16);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(mem.read_u32(0x100), 0xAAAA_AAAA);
+        assert_eq!(mem.read_u32(0x104), 0xBBBB_BBBB);
+    }
+
+    #[test]
+    fn stswx_uses_xer_tbc_for_byte_count() {
+        // PPCBUG-163: stswx is now functional after P6 XER TBC fix.
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        ctx.gpr[3] = 0x100;
+        ctx.gpr[4] = 0;
+        ctx.gpr[5] = 0xCAFE_BABE;
+        ctx.xer_tbc = 4;
+        // stswx r5, r4, r3  XO=661
+        let raw = (31u32 << 26) | (5 << 21) | (4 << 16) | (3 << 11) | (661 << 1);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(mem.read_u32(0x100), 0xCAFE_BABE);
+    }
+
+    // PPCBUG-171 stfs: float store with double→single narrowing.
+    #[test]
+    fn stfs_writes_single_be() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        ctx.gpr[3] = 0x100;
+        ctx.fpr[4] = 1.5_f64;
+        // stfs f4, 0(r3): opcode 52
+        let raw = (52u32 << 26) | (4 << 21) | (3 << 16);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(mem.read_u32(0x100), 1.5_f32.to_bits());
+    }
+
+    // ───────────────────────────────────────────────────────────────────────
+    // P8 batch 3 — FPU + VMX float test gaps
+    // (PPCBUG-187/208/228/438/439/440)
+    // ───────────────────────────────────────────────────────────────────────
+
+    // PPCBUG-187 single-precision FPU smokes.
+    #[test]
+    fn fadds_single_arithmetic() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpr[1] = 1.5;
+        ctx.fpr[2] = 2.5;
+        // fadds f3, f1, f2: opcode 59, XO=21
+        let raw = (59u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (21 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.fpr[3], 4.0);
+    }
+
+    #[test]
+    fn fmuls_single_multiply() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpr[1] = 2.0;
+        ctx.fpr[2] = 3.0;
+        // fmuls f3, f1, f2: XO=25, frC at bits 21-25 (so c is rb encoding slot)
+        // Standard A-form: (59<<26)|(rd<<21)|(ra<<16)|(0<<11)|(rc<<6)|(25<<1)
+        let raw = (59u32 << 26) | (3 << 21) | (1 << 16) | (2 << 6) | (25 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.fpr[3], 6.0);
+    }
+
+    // PPCBUG-208 double-precision FPU smokes.
+    #[test]
+    fn fmul_double_multiply() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpr[1] = 4.0;
+        ctx.fpr[2] = 0.25;
+        // fmul f3, f1, f2: opcode 63, XO=25
+        let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 6) | (25 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.fpr[3], 1.0);
+    }
+
+    #[test]
+    fn fdiv_zero_over_finite() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpr[1] = 0.0;
+        ctx.fpr[2] = 5.0;
+        // fdiv f3, f1, f2: XO=18
+        let raw = (63u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (18 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.fpr[3], 0.0);
+        assert_eq!(ctx.fpscr & fpscr::ZX, 0, "0/finite is not divide-by-zero");
+    }
+
+    #[test]
+    fn fneg_flips_sign_bit() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpr[1] = 1.0;
+        // fneg f3, f1: XO=40
+        let raw = (63u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | (40 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.fpr[3], -1.0);
+    }
+
+    #[test]
+    fn fabs_clears_sign_bit() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpr[1] = -3.5;
+        // fabs f3, f1: XO=264
+        let raw = (63u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | (264 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.fpr[3], 3.5);
+    }
+
+    #[test]
+    fn fmr_copies_register() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpr[5] = 1.5_f64.copysign(-1.0);
+        // fmr f3, f5: XO=72
+        let raw = (63u32 << 26) | (3 << 21) | (0 << 16) | (5 << 11) | (72 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.fpr[3].to_bits(), ctx.fpr[5].to_bits());
+    }
+
+    // PPCBUG-228 fpu convert / fcmp smokes.
+    #[test]
+    fn fcmpu_lt_sets_cr_lt() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpr[1] = 1.0;
+        ctx.fpr[2] = 2.0;
+        // fcmpu cr3, f1, f2: opcode 63, XO=0, BF=3
+        let raw = (63u32 << 26) | (3 << 23) | (1 << 16) | (2 << 11) | (0 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert!(ctx.cr[3].lt);
+        assert!(!ctx.cr[3].gt);
+    }
+
+    #[test]
+    fn fcfid_converts_int64_to_double() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.fpr[1] = f64::from_bits(123u64);
+        // fcfid f3, f1: XO=846
+        let raw = (63u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | (846 << 1);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        assert_eq!(ctx.fpr[3], 123.0);
+    }
+
+    // PPCBUG-438 VMX float compares. VC-form: XO at PPC 22-31 (host 9-0), bit 0.
+    #[test]
+    fn vcmpeqfp_sets_lanes_to_all_ones_on_eq() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([1.0, 2.0, 3.0, 4.0]);
+        ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([1.0, 0.0, 3.0, 0.0]);
+        // vcmpeqfp v3, v1, v2: canary base 0x100000c6 → op6=4, XO=198 at bits 0-9.
+        let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 198;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[3].as_u32x4();
+        assert_eq!(r[0], 0xFFFF_FFFF);  // 1.0 == 1.0 → all ones
+        assert_eq!(r[1], 0);            // 2.0 != 0.0
+        assert_eq!(r[2], 0xFFFF_FFFF);  // 3.0 == 3.0
+        assert_eq!(r[3], 0);            // 4.0 != 0.0
+    }
+
+    // PPCBUG-439 VMX rounding. VX-form XO at bit 0.
+    #[test]
+    fn vrfip_rounds_toward_pos_inf() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([1.4, 1.5, -1.4, -1.5]);
+        // vrfip canary base 0x1000028a → XO=650.
+        let raw = (4u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | 650;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[3].as_f32x4();
+        assert_eq!(r[0], 2.0);
+        assert_eq!(r[1], 2.0);
+        assert_eq!(r[2], -1.0);
+        assert_eq!(r[3], -1.0);
+    }
+
+    #[test]
+    fn vrfim_rounds_toward_neg_inf() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([1.4, 1.5, -1.4, -1.5]);
+        // vrfim canary base 0x100002ca → XO=714.
+        let raw = (4u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | 714;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[3].as_f32x4();
+        assert_eq!(r[0], 1.0);
+        assert_eq!(r[1], 1.0);
+        assert_eq!(r[2], -2.0);
+        assert_eq!(r[3], -2.0);
+    }
+
+    #[test]
+    fn vrfiz_truncates_toward_zero() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([1.7, 2.5, -1.7, -2.5]);
+        // vrfiz canary base 0x1000024a → XO=586.
+        let raw = (4u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | 586;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[3].as_f32x4();
+        assert_eq!(r[0], 1.0);
+        assert_eq!(r[1], 2.0);
+        assert_eq!(r[2], -1.0);
+        assert_eq!(r[3], -2.0);
+    }
+
+    // PPCBUG-440 VMX convert.
+    #[test]
+    fn vctsxs_saturates_max_to_int_max() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([1e10, -1e10, 1.5, -1.5]);
+        // vctsxs canary base 0x100003ca → XO=970, UIMM=0.
+        let raw = (4u32 << 26) | (3 << 21) | (0 << 16) | (1 << 11) | 970;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = crate::vmx::as_i32x4(ctx.vr[3]);
+        assert_eq!(r[0], i32::MAX, "1e10 saturates to INT_MAX");
+        assert_eq!(r[1], i32::MIN, "-1e10 saturates to INT_MIN");
+        assert_eq!(r[2], 1);
+        assert_eq!(r[3], -1);
+    }
+
+    // ───────────────────────────────────────────────────────────────────────
+    // P8 batch 4 — VMX integer + permute/pack + multiply-sum + load/store
+    // (PPCBUG-240/243/277-279/316-325/370-378/490-494/517-519)
+    // ───────────────────────────────────────────────────────────────────────
+
+    // PPCBUG-240 VMX integer add/sub.
+    #[test]
+    fn vaddubm_lane_wise_byte_add() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_bytes([0x10; 16]);
+        ctx.vr[2] = xenia_types::Vec128::from_bytes([0x20; 16]);
+        // vaddubm canary base 0x10000000 → XO=0
+        let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11);
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[3].as_bytes();
+        assert_eq!(r[0], 0x30);
+        assert_eq!(r[15], 0x30);
+    }
+
+    #[test]
+    fn vsubuwm_lane_wise_word_sub() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_u32x4(100, 200, 300, 400);
+        ctx.vr[2] = xenia_types::Vec128::from_u32x4(40, 30, 20, 10);
+        // vsubuwm canary base 0x10000480 → XO=1152
+        let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 1152;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[3].as_u32x4();
+        assert_eq!(r[0], 60);
+        assert_eq!(r[1], 170);
+        assert_eq!(r[2], 280);
+        assert_eq!(r[3], 390);
+    }
+
+    // PPCBUG-277 VMX integer compare.
+    #[test]
+    fn vcmpequb_lane_wise_byte_compare() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_bytes([0xAA; 16]);
+        ctx.vr[2] = xenia_types::Vec128::from_bytes([0xAA; 16]);
+        // vcmpequb canary base 0x10000006 → XO=6
+        let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 6;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[3].as_bytes();
+        assert_eq!(r[0], 0xFF);
+        assert_eq!(r[15], 0xFF);
+    }
+
+    // PPCBUG-278 VMX min/max.
+    #[test]
+    fn vmaxsw_lane_wise_signed_max() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = crate::vmx::from_i32x4([10, -5, 100, -1000]);
+        ctx.vr[2] = crate::vmx::from_i32x4([20, 5, -100, 1000]);
+        // vmaxsw canary base 0x10000182 → XO=386
+        let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 386;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = crate::vmx::as_i32x4(ctx.vr[3]);
+        assert_eq!(r[0], 20);
+        assert_eq!(r[1], 5);
+        assert_eq!(r[2], 100);
+        assert_eq!(r[3], 1000);
+    }
+
+    // PPCBUG-316 VMX shift/rotate.
+    #[test]
+    fn vsl_left_shift_via_low3_bits_of_lane15() {
+        // vsl shifts the 128-bit value left by (vB[15] & 7) bits.
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_u32x4(0x1234_5678, 0, 0, 0);
+        let mut sh = [0u8; 16]; sh[15] = 4;
+        ctx.vr[2] = xenia_types::Vec128::from_bytes(sh);
+        // vsl canary base 0x100001c4 → XO=452
+        let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 452;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[3].as_u32x4();
+        assert_eq!(r[0], 0x2345_6780, "shift left by 4 bits");
+    }
+
+    #[test]
+    fn vsraw_arithmetic_right_shift_per_lane() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = crate::vmx::from_i32x4([-16, 16, -1, 0x4000_0000]);
+        ctx.vr[2] = xenia_types::Vec128::from_u32x4(2, 2, 2, 2);
+        // vsraw canary base 0x10000384 → XO=900
+        let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 900;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = crate::vmx::as_i32x4(ctx.vr[3]);
+        assert_eq!(r[0], -4);
+        assert_eq!(r[1], 4);
+        assert_eq!(r[2], -1);  // arith shift preserves sign
+        assert_eq!(r[3], 0x1000_0000);
+    }
+
+    // PPCBUG-321 VMX logical.
+    #[test]
+    fn vand_lane_wise_and() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_u32x4(0xFFFF_FFFF, 0xAAAA_AAAA, 0x5555_5555, 0);
+        ctx.vr[2] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0x5555_5555, 0xFFFF_FFFF, 0xFFFF_FFFF);
+        // vand canary base 0x10000404 → XO=1028
+        let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | 1028;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[3].as_u32x4();
+        assert_eq!(r[0], 0xAAAA_AAAA);
+        assert_eq!(r[1], 0);
+        assert_eq!(r[2], 0x5555_5555);
+        assert_eq!(r[3], 0);
+    }
+
+    // PPCBUG-370 VMX permute/pack.
+    #[test]
+    fn vsldoi_byte_concat_shift() {
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_bytes(
+            [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+             0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10]);
+        ctx.vr[2] = xenia_types::Vec128::from_bytes(
+            [0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11,
+             0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99]);
+        // vsldoi v3, v1, v2, 4 — opcode 4, XO=44, SH at bits 11-15? Actually SH in shb (bits 22-25).
+        // Canary base 0x1000002c, SHB at bits 22-25.
+        let raw = (4u32 << 26) | (3 << 21) | (1 << 16) | (2 << 11) | (4 << 6) | 44;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[3].as_bytes();
+        // shift by 4: result = vA[4..16] || vB[0..4]
+        assert_eq!(r[0], 0x05);
+        assert_eq!(r[11], 0x10);
+        assert_eq!(r[12], 0xAA);
+        assert_eq!(r[15], 0xDD);
+    }
+
+    // PPCBUG-490 VMX multiply-add (vmaddfp; vmsum* covered indirectly by P5).
+    #[test]
+    fn vmaddfp_lane_fma() {
+        // Per-lane fused multiply-add: vD[i] = vA[i] * vC[i] + vB[i].
+        let mut ctx = PpcContext::new();
+        let mut mem = TestMem::new();
+        ctx.vr[1] = xenia_types::Vec128::from_f32x4_array([2.0, 3.0, 4.0, 5.0]);
+        ctx.vr[2] = xenia_types::Vec128::from_f32x4_array([1.0, 1.0, 1.0, 1.0]);
+        ctx.vr[3] = xenia_types::Vec128::from_f32x4_array([10.0, 20.0, 30.0, 40.0]);
+        // vmaddfp v4, v1, v2, v3: opcode 4, XO=46, with vC at bits 6-10 (rd) and vB at 11-15
+        // Per A-form: (4<<26)|(rd<<21)|(ra<<16)|(rb<<11)|(rc<<6)|46
+        let raw = (4u32 << 26) | (4 << 21) | (1 << 16) | (3 << 11) | (2 << 6) | 46;
+        write_instr(&mut mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mut mem);
+        let r = ctx.vr[4].as_f32x4();
+        assert_eq!(r[0], 12.0);  // 2*1 + 10
+        assert_eq!(r[1], 23.0);
+        assert_eq!(r[2], 34.0);
+        assert_eq!(r[3], 45.0);
+    }
+
+    // PPCBUG-517 VMX load/store.
+    #[test]
+    fn lvx_loads_aligned_quadword() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        // Write 16 distinct bytes
+        for i in 0..16 { mem.write_u8(0x100 + i, (0xA0 + i) as u8); }
+        ctx.gpr[3] = 0;
+        ctx.gpr[4] = 0x100;
+        // lvx v5, r3, r4: opcode 31, XO=103
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (103 << 1);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        let r = ctx.vr[5].as_bytes();
+        assert_eq!(r[0], 0xA0);
+        assert_eq!(r[15], 0xAF);
+    }
+
+    #[test]
+    fn stvx_stores_aligned_quadword() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        let mut data = [0u8; 16];
+        for i in 0..16 { data[i] = (0xC0 + i) as u8; }
+        ctx.vr[5] = xenia_types::Vec128::from_bytes(data);
+        ctx.gpr[3] = 0;
+        ctx.gpr[4] = 0x100;
+        // stvx v5, r3, r4: opcode 31, XO=231
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (231 << 1);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        for i in 0..16 {
+            assert_eq!(mem.read_u8(0x100 + i), (0xC0 + i) as u8);
+        }
+    }
+
+    #[test]
+    fn lvebx_byte_lane_load() {
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        mem.write_u8(0x107, 0x42);
+        ctx.gpr[3] = 0;
+        ctx.gpr[4] = 0x107;  // EA, byte at offset 7 in the quadword
+        // lvebx v5, r3, r4: opcode 31, XO=7
+        let raw = (31u32 << 26) | (5 << 21) | (3 << 16) | (4 << 11) | (7 << 1);
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        let r = ctx.vr[5].as_bytes();
+        assert_eq!(r[7], 0x42, "byte loaded at lane (EA & 0xF)");
+    }
+
    // ---------- Block-cache parity tests ----------
    //
    // These confirm that running a program through the basic-block