diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index fee3a07..3bb3d56 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -3991,14 +3991,12 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - // position) and masked to only 3 bits. Canary extracts from the // VX128_3/4 `IMM` field at PPC bits 16-22 (MSB) and does // `type = IMM >> 2` to pick up the 5-bit type selector — the low - // 2 bits (`pack`) select output-slot layout for `vpkd3d128` and - // are ignored by `vupkd3d128`. Extracting the low 2 bits as - // `pack` (unused here — we hand back the codec output in its - // canonical lane position, the subsequent permute instruction - // handles placement) for completeness. + // 2 bits (`pack`) select output-slot layout for `vpkd3d128`. PpcOpcode::vpkd3d128 => { use crate::vmx::D3dPackType; let uimm = crate::decoder::extract_vx128_uimm5(instr.raw); + let pack = (uimm & 3) as usize; + let shift = instr.vx128_4_z() as usize; let ty = D3dPackType::from_immediate(uimm >> 2); let src = ctx.vr[instr.vb128()]; let out = match ty { @@ -4020,7 +4018,36 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - src } }; - ctx.vr[instr.vd128()] = out; + // Post-pack permutation: merge packed `out` into previous `vd` + // per canary ppc_emit_altivec.cc:2126-2188 MakePermuteMask tables. + // MakePermuteMask(r0,l0, r1,l1, r2,l2, r3,l3): result[i] = if ri==0 { prev[li] } else { out[li] } + let result = if pack == 0 { + out + } else { + // (source_reg, lane): 0=prev vd, 1=packed out + const PERM: [[[(u8, u8); 4]; 4]; 3] = [ + // pack=1 (VPACK_32): places out[3] at lane (3-shift) + [[(0,0),(0,1),(0,2),(1,3)], [(0,0),(0,1),(1,3),(0,3)], + [(0,0),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]], + // pack=2 (64-bit): places out[2..3] at lanes (2-shift)..(3-shift) + [[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)], + [(1,2),(1,3),(0,2),(0,3)], [(1,3),(0,1),(0,2),(0,3)]], + // pack=3 (64-bit): same as pack=2 except shift=3 selects out[2] at lane 3 + [[(0,0),(0,1),(1,2),(1,3)], [(0,0),(1,2),(1,3),(0,3)], + [(1,2),(1,3),(0,2),(0,3)], [(0,0),(0,1),(0,2),(1,2)]], + ]; + let prev = ctx.vr[instr.vd128()]; + let pw = prev.as_u32x4(); + let ow = out.as_u32x4(); + let sel = PERM[pack - 1][shift]; + xenia_types::Vec128::from_u32x4_array([ + if sel[0].0 == 0 { pw[sel[0].1 as usize] } else { ow[sel[0].1 as usize] }, + if sel[1].0 == 0 { pw[sel[1].1 as usize] } else { ow[sel[1].1 as usize] }, + if sel[2].0 == 0 { pw[sel[2].1 as usize] } else { ow[sel[2].1 as usize] }, + if sel[3].0 == 0 { pw[sel[3].1 as usize] } else { ow[sel[3].1 as usize] }, + ]) + }; + ctx.vr[instr.vd128()] = result; ctx.pc += 4; } PpcOpcode::vupkd3d128 => { @@ -6356,4 +6383,84 @@ mod tests { assert!(!ctx.cr[6].lt, "all-false: CR6.LT must be 0"); assert!(ctx.cr[6].eq, "all-false: CR6.EQ must be 1"); } + + // ---- PPCBUG-363 + PPCBUG-369: vpkd3d128 post-pack permutation ---- + // + // vpkd3d128 VD, VB, type, pack, shift: the low 2 bits of the IMM field + // select how the packed scalar/vector is merged back into the previous VD. + // pack=0 → identity (store out directly); pack=1 → 32-bit merge by shift; + // pack=2,3 → 64-bit merge by shift. + // Canary source: ppc_emit_altivec.cc:2126-2188. + // + // For vpkd3d128, PPC bits 21-22 are always 1 (key2 discriminant), so + // vd128 is always in range [96, 127] for vd_lo in [0, 31]. + + fn encode_vpkd3d128(vd_lo: u32, vb_lo: u32, imm: u32, z: u32) -> u32 { + // op6=6 (all VMX128 compute ops); VD[4:0] at host 25-21; IMM at host 20-16; + // VB[4:0] at host 15-11; host bits 10,9 = 1,1 (PPC bits 21,22, key2=0b110); + // z (2-bit) at host 7-6 (PPC bits 24-25); host bit 4 = 1 (PPC bit 27, key2 low=0b01). + // decode_op6 key2 = (bits21-23<<4)|(bits26-27) = (0b110<<4)|0b01 = 0b1100001. + (6u32 << 26) | (vd_lo << 21) | (imm << 16) | (vb_lo << 11) + | (1 << 10) | (1 << 9) | (z << 6) | (1 << 4) + } + + #[test] + fn vpkd3d128_pack0_legacy_unchanged() { + // pack=0 → identity: result = out (packed value), no blend with prev vd. + // type=0 (D3dColor), pack=0 → IMM=0; z=0 (don't care for pack=0). + // vd=96 (vd_lo=0 | bits21=1,22=1→+96). + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + // vb=1: R=1.0, G=0, B=0, A=0 → D3dColor packs to word (0<<24)|(255<<16)|(0<<8)|0 = 0x00FF0000 + ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); + // prev vd=96: sentinel values that should NOT appear in result + ctx.vr[96] = xenia_types::Vec128::from_u32x4(0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD); + write_instr(&mem, 0, encode_vpkd3d128(0, 1, 0, 0)); + ctx.pc = 0; + step(&mut ctx, &mem); + let r = ctx.vr[96].as_u32x4(); + // out = [0, 0, 0, 0x00FF_0000]; pack=0 → result = out + assert_eq!(r[0], 0x0000_0000, "pack=0: lane 0 must be out[0]=0"); + assert_eq!(r[1], 0x0000_0000, "pack=0: lane 1 must be out[1]=0"); + assert_eq!(r[2], 0x0000_0000, "pack=0: lane 2 must be out[2]=0"); + assert_eq!(r[3], 0x00FF_0000, "pack=0: lane 3 must be packed D3dColor"); + } + + #[test] + fn vpkd3d128_pack1_shift0_d3d_vertex_pack() { + // pack=1, shift=0 (VPACK_32): out[3] placed at lane 3; prev[0..2] preserved. + // MakePermuteMask(0,0, 0,1, 0,2, 1,3) → [prev[0], prev[1], prev[2], out[3]] + // IMM = (type=0 D3dColor << 2) | pack=1 = 1; z=0. + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000 + ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444); + write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 0)); + ctx.pc = 0; + step(&mut ctx, &mem); + let r = ctx.vr[96].as_u32x4(); + assert_eq!(r[0], 0x1111_1111, "pack=1 shift=0: lane 0 from prev"); + assert_eq!(r[1], 0x2222_2222, "pack=1 shift=0: lane 1 from prev"); + assert_eq!(r[2], 0x3333_3333, "pack=1 shift=0: lane 2 from prev"); + assert_eq!(r[3], 0x00FF_0000, "pack=1 shift=0: lane 3 from out[3]"); + } + + #[test] + fn vpkd3d128_pack1_shift3_puts_out3_at_lane0() { + // pack=1, shift=3 (VPACK_32): out[3] placed at lane 0; prev[1..3] preserved. + // MakePermuteMask(1,3, 0,1, 0,2, 0,3) → [out[3], prev[1], prev[2], prev[3]] + // IMM = 1; z=3. + let mut ctx = PpcContext::new(); + let mem = TestMem::new(); + ctx.vr[1] = xenia_types::Vec128::from_f32x4(1.0, 0.0, 0.0, 0.0); // out[3]=0x00FF_0000 + ctx.vr[96] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444); + write_instr(&mem, 0, encode_vpkd3d128(0, 1, 1, 3)); + ctx.pc = 0; + step(&mut ctx, &mem); + let r = ctx.vr[96].as_u32x4(); + assert_eq!(r[0], 0x00FF_0000, "pack=1 shift=3: lane 0 from out[3]"); + assert_eq!(r[1], 0x2222_2222, "pack=1 shift=3: lane 1 from prev"); + assert_eq!(r[2], 0x3333_3333, "pack=1 shift=3: lane 2 from prev"); + assert_eq!(r[3], 0x4444_4444, "pack=1 shift=3: lane 3 from prev"); + } }