xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)

First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).

Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.

Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 16:29:38 +02:00
parent 5f0d6487ea
commit 79eb52c378
24 changed files with 10984 additions and 18 deletions

232
crates/xenia-gpu/src/pm4.rs Normal file
View File

@@ -0,0 +1,232 @@
//! PM4 packet format — header decoding + Type-3 opcode set.
//!
//! Xenos PM4 packet layout mirrors `xenia-canary/src/xenia/gpu/packet_disassembler.cc`:
//!
//! - **Type 0** (`packet >> 30 == 0`): register-write run.
//! `count = ((packet >> 16) & 0x3FFF) + 1`. Total dwords = `1 + count`.
//! With `(packet >> 15) & 1 == 1`, all writes target the same register.
//! - **Type 1** (`packet >> 30 == 1`): two-register write. Total dwords = 3.
//! - **Type 2** (`packet >> 30 == 2`): NOP — a single skipped dword.
//! - **Type 3** (`packet >> 30 == 3`): command.
//! `opcode = (packet >> 8) & 0x7F`,
//! `count = ((packet >> 16) & 0x3FFF) + 1`.
//! Total dwords = `1 + count`.
/// The cookie canary writes alongside `PM4_XE_SWAP` so tooling can recognize
/// swap packets. `'X','E','N','X'` big-endian (`kSwapSignature`).
pub const SWAP_SIGNATURE: u32 = 0x584E_4558;
// ── Named Type-3 opcodes (from xenia-canary/src/xenia/gpu/xenos.h:1617-1679) ──
pub const PM4_ME_INIT: u8 = 0x48;
pub const PM4_NOP: u8 = 0x10;
pub const PM4_INDIRECT_BUFFER: u8 = 0x3F;
pub const PM4_INDIRECT_BUFFER_PFD: u8 = 0x37;
pub const PM4_WAIT_FOR_IDLE: u8 = 0x26;
pub const PM4_WAIT_REG_MEM: u8 = 0x3C;
pub const PM4_REG_RMW: u8 = 0x21;
pub const PM4_REG_TO_MEM: u8 = 0x3E;
pub const PM4_MEM_WRITE: u8 = 0x3D;
pub const PM4_COND_WRITE: u8 = 0x45;
pub const PM4_EVENT_WRITE: u8 = 0x46;
pub const PM4_EVENT_WRITE_SHD: u8 = 0x58;
pub const PM4_EVENT_WRITE_EXT: u8 = 0x5A;
pub const PM4_EVENT_WRITE_ZPD: u8 = 0x5B;
pub const PM4_DRAW_INDX: u8 = 0x22;
pub const PM4_DRAW_INDX_2: u8 = 0x36;
pub const PM4_VIZ_QUERY: u8 = 0x23;
pub const PM4_SET_CONSTANT: u8 = 0x2D;
pub const PM4_SET_CONSTANT2: u8 = 0x55;
pub const PM4_SET_SHADER_CONSTANTS: u8 = 0x56;
pub const PM4_LOAD_ALU_CONSTANT: u8 = 0x2F;
pub const PM4_IM_LOAD: u8 = 0x27;
pub const PM4_IM_LOAD_IMMEDIATE: u8 = 0x2B;
pub const PM4_LOAD_CONSTANT_CONTEXT: u8 = 0x2E;
pub const PM4_INVALIDATE_STATE: u8 = 0x3B;
pub const PM4_INTERRUPT: u8 = 0x54;
pub const PM4_SET_SHADER_BASES: u8 = 0x4A;
pub const PM4_SET_BIN_MASK_LO: u8 = 0x60;
pub const PM4_SET_BIN_MASK_HI: u8 = 0x61;
pub const PM4_SET_BIN_SELECT_LO: u8 = 0x62;
pub const PM4_SET_BIN_SELECT_HI: u8 = 0x63;
pub const PM4_SET_BIN_MASK: u8 = 0x50;
pub const PM4_SET_BIN_SELECT: u8 = 0x51;
pub const PM4_CONTEXT_UPDATE: u8 = 0x5E;
/// Xenia-specific: `VdSwap` writes this to trigger a present.
pub const PM4_XE_SWAP: u8 = 0x64;
/// Human-readable name for a Type-3 opcode. Used for tracing spans.
pub fn type3_opcode_name(op: u8) -> &'static str {
match op {
PM4_ME_INIT => "ME_INIT",
PM4_NOP => "NOP",
PM4_INDIRECT_BUFFER => "INDIRECT_BUFFER",
PM4_INDIRECT_BUFFER_PFD => "INDIRECT_BUFFER_PFD",
PM4_WAIT_FOR_IDLE => "WAIT_FOR_IDLE",
PM4_WAIT_REG_MEM => "WAIT_REG_MEM",
PM4_REG_RMW => "REG_RMW",
PM4_REG_TO_MEM => "REG_TO_MEM",
PM4_MEM_WRITE => "MEM_WRITE",
PM4_COND_WRITE => "COND_WRITE",
PM4_EVENT_WRITE => "EVENT_WRITE",
PM4_EVENT_WRITE_SHD => "EVENT_WRITE_SHD",
PM4_EVENT_WRITE_EXT => "EVENT_WRITE_EXT",
PM4_EVENT_WRITE_ZPD => "EVENT_WRITE_ZPD",
PM4_DRAW_INDX => "DRAW_INDX",
PM4_DRAW_INDX_2 => "DRAW_INDX_2",
PM4_VIZ_QUERY => "VIZ_QUERY",
PM4_SET_CONSTANT => "SET_CONSTANT",
PM4_SET_CONSTANT2 => "SET_CONSTANT2",
PM4_SET_SHADER_CONSTANTS => "SET_SHADER_CONSTANTS",
PM4_LOAD_ALU_CONSTANT => "LOAD_ALU_CONSTANT",
PM4_LOAD_CONSTANT_CONTEXT => "LOAD_CONSTANT_CONTEXT",
PM4_IM_LOAD => "IM_LOAD",
PM4_IM_LOAD_IMMEDIATE => "IM_LOAD_IMMEDIATE",
PM4_INVALIDATE_STATE => "INVALIDATE_STATE",
PM4_INTERRUPT => "INTERRUPT",
PM4_SET_SHADER_BASES => "SET_SHADER_BASES",
PM4_SET_BIN_MASK_LO => "SET_BIN_MASK_LO",
PM4_SET_BIN_MASK_HI => "SET_BIN_MASK_HI",
PM4_SET_BIN_SELECT_LO => "SET_BIN_SELECT_LO",
PM4_SET_BIN_SELECT_HI => "SET_BIN_SELECT_HI",
PM4_SET_BIN_MASK => "SET_BIN_MASK",
PM4_SET_BIN_SELECT => "SET_BIN_SELECT",
PM4_CONTEXT_UPDATE => "CONTEXT_UPDATE",
PM4_XE_SWAP => "XE_SWAP",
_ => "UNKNOWN",
}
}
/// Decoded single PM4 packet header.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct PacketHeader {
pub kind: PacketKind,
/// Total size of the packet (including header) in dwords.
pub total_dwords: u32,
}
/// Classification of a PM4 packet.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PacketKind {
/// Type-0 register-write run. `base_index` is the first register index
/// (the register offset / 4). `write_one` is true if all `count` data
/// dwords write to the same register.
Type0 {
base_index: u32,
count: u32,
write_one: bool,
},
/// Type-1 two-register write.
Type1 { reg_index_1: u32, reg_index_2: u32 },
/// Type-2 NOP (a single skipped dword).
Type2,
/// Type-3 command.
Type3 {
opcode: u8,
count: u32,
predicated: bool,
},
}
/// Decode a single PM4 packet header.
pub fn decode(header: u32) -> PacketHeader {
match header >> 30 {
0 => {
let count = ((header >> 16) & 0x3FFF) + 1;
PacketHeader {
kind: PacketKind::Type0 {
base_index: header & 0x7FFF,
count,
write_one: (header >> 15) & 1 != 0,
},
total_dwords: 1 + count,
}
}
1 => PacketHeader {
kind: PacketKind::Type1 {
reg_index_1: header & 0x7FF,
reg_index_2: (header >> 11) & 0x7FF,
},
total_dwords: 3,
},
2 => PacketHeader {
kind: PacketKind::Type2,
total_dwords: 1,
},
3 => {
let count = ((header >> 16) & 0x3FFF) + 1;
PacketHeader {
kind: PacketKind::Type3 {
opcode: ((header >> 8) & 0x7F) as u8,
count,
predicated: (header & 1) != 0,
},
total_dwords: 1 + count,
}
}
_ => unreachable!(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn type2_is_one_dword() {
// 0x80000000 == type 2 header (bits 31:30 = 10)
let hdr = decode(0x8000_0000);
assert_eq!(hdr.kind, PacketKind::Type2);
assert_eq!(hdr.total_dwords, 1);
}
#[test]
fn type0_count_is_inclusive() {
// count field (bits 29:16) = 5 → 6 data dwords. base_index = 0x100.
// write_one = 0.
let hdr = decode((5 << 16) | 0x100);
match hdr.kind {
PacketKind::Type0 {
base_index,
count,
write_one,
} => {
assert_eq!(base_index, 0x100);
assert_eq!(count, 6);
assert!(!write_one);
}
_ => panic!("expected Type0"),
}
assert_eq!(hdr.total_dwords, 7);
}
#[test]
fn type3_swap_packet() {
// Build the exact header canary's VdSwap emits:
// MakePacketType3(PM4_XE_SWAP, 4) → ((3<<30) | ((4-1)<<16) | (0x64<<8))
let hdr_word = (3u32 << 30) | ((4u32 - 1) << 16) | ((PM4_XE_SWAP as u32) << 8);
let hdr = decode(hdr_word);
match hdr.kind {
PacketKind::Type3 {
opcode,
count,
predicated,
} => {
assert_eq!(opcode, PM4_XE_SWAP);
assert_eq!(count, 4);
assert!(!predicated);
}
_ => panic!("expected Type3"),
}
assert_eq!(hdr.total_dwords, 5);
}
#[test]
fn opcode_names_are_present_for_common_ops() {
assert_eq!(type3_opcode_name(PM4_NOP), "NOP");
assert_eq!(type3_opcode_name(PM4_DRAW_INDX), "DRAW_INDX");
assert_eq!(type3_opcode_name(PM4_XE_SWAP), "XE_SWAP");
assert_eq!(type3_opcode_name(PM4_WAIT_REG_MEM), "WAIT_REG_MEM");
assert_eq!(type3_opcode_name(0xFE), "UNKNOWN");
}
}