xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)
First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).
Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.
Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
232
crates/xenia-gpu/src/pm4.rs
Normal file
232
crates/xenia-gpu/src/pm4.rs
Normal file
@@ -0,0 +1,232 @@
|
||||
//! PM4 packet format — header decoding + Type-3 opcode set.
|
||||
//!
|
||||
//! Xenos PM4 packet layout mirrors `xenia-canary/src/xenia/gpu/packet_disassembler.cc`:
|
||||
//!
|
||||
//! - **Type 0** (`packet >> 30 == 0`): register-write run.
|
||||
//! `count = ((packet >> 16) & 0x3FFF) + 1`. Total dwords = `1 + count`.
|
||||
//! With `(packet >> 15) & 1 == 1`, all writes target the same register.
|
||||
//! - **Type 1** (`packet >> 30 == 1`): two-register write. Total dwords = 3.
|
||||
//! - **Type 2** (`packet >> 30 == 2`): NOP — a single skipped dword.
|
||||
//! - **Type 3** (`packet >> 30 == 3`): command.
|
||||
//! `opcode = (packet >> 8) & 0x7F`,
|
||||
//! `count = ((packet >> 16) & 0x3FFF) + 1`.
|
||||
//! Total dwords = `1 + count`.
|
||||
|
||||
/// The cookie canary writes alongside `PM4_XE_SWAP` so tooling can recognize
|
||||
/// swap packets. `'X','E','N','X'` big-endian (`kSwapSignature`).
|
||||
pub const SWAP_SIGNATURE: u32 = 0x584E_4558;
|
||||
|
||||
// ── Named Type-3 opcodes (from xenia-canary/src/xenia/gpu/xenos.h:1617-1679) ──
|
||||
|
||||
pub const PM4_ME_INIT: u8 = 0x48;
|
||||
pub const PM4_NOP: u8 = 0x10;
|
||||
pub const PM4_INDIRECT_BUFFER: u8 = 0x3F;
|
||||
pub const PM4_INDIRECT_BUFFER_PFD: u8 = 0x37;
|
||||
pub const PM4_WAIT_FOR_IDLE: u8 = 0x26;
|
||||
pub const PM4_WAIT_REG_MEM: u8 = 0x3C;
|
||||
pub const PM4_REG_RMW: u8 = 0x21;
|
||||
pub const PM4_REG_TO_MEM: u8 = 0x3E;
|
||||
pub const PM4_MEM_WRITE: u8 = 0x3D;
|
||||
pub const PM4_COND_WRITE: u8 = 0x45;
|
||||
pub const PM4_EVENT_WRITE: u8 = 0x46;
|
||||
pub const PM4_EVENT_WRITE_SHD: u8 = 0x58;
|
||||
pub const PM4_EVENT_WRITE_EXT: u8 = 0x5A;
|
||||
pub const PM4_EVENT_WRITE_ZPD: u8 = 0x5B;
|
||||
pub const PM4_DRAW_INDX: u8 = 0x22;
|
||||
pub const PM4_DRAW_INDX_2: u8 = 0x36;
|
||||
pub const PM4_VIZ_QUERY: u8 = 0x23;
|
||||
pub const PM4_SET_CONSTANT: u8 = 0x2D;
|
||||
pub const PM4_SET_CONSTANT2: u8 = 0x55;
|
||||
pub const PM4_SET_SHADER_CONSTANTS: u8 = 0x56;
|
||||
pub const PM4_LOAD_ALU_CONSTANT: u8 = 0x2F;
|
||||
pub const PM4_IM_LOAD: u8 = 0x27;
|
||||
pub const PM4_IM_LOAD_IMMEDIATE: u8 = 0x2B;
|
||||
pub const PM4_LOAD_CONSTANT_CONTEXT: u8 = 0x2E;
|
||||
pub const PM4_INVALIDATE_STATE: u8 = 0x3B;
|
||||
pub const PM4_INTERRUPT: u8 = 0x54;
|
||||
pub const PM4_SET_SHADER_BASES: u8 = 0x4A;
|
||||
pub const PM4_SET_BIN_MASK_LO: u8 = 0x60;
|
||||
pub const PM4_SET_BIN_MASK_HI: u8 = 0x61;
|
||||
pub const PM4_SET_BIN_SELECT_LO: u8 = 0x62;
|
||||
pub const PM4_SET_BIN_SELECT_HI: u8 = 0x63;
|
||||
pub const PM4_SET_BIN_MASK: u8 = 0x50;
|
||||
pub const PM4_SET_BIN_SELECT: u8 = 0x51;
|
||||
pub const PM4_CONTEXT_UPDATE: u8 = 0x5E;
|
||||
/// Xenia-specific: `VdSwap` writes this to trigger a present.
|
||||
pub const PM4_XE_SWAP: u8 = 0x64;
|
||||
|
||||
/// Human-readable name for a Type-3 opcode. Used for tracing spans.
|
||||
pub fn type3_opcode_name(op: u8) -> &'static str {
|
||||
match op {
|
||||
PM4_ME_INIT => "ME_INIT",
|
||||
PM4_NOP => "NOP",
|
||||
PM4_INDIRECT_BUFFER => "INDIRECT_BUFFER",
|
||||
PM4_INDIRECT_BUFFER_PFD => "INDIRECT_BUFFER_PFD",
|
||||
PM4_WAIT_FOR_IDLE => "WAIT_FOR_IDLE",
|
||||
PM4_WAIT_REG_MEM => "WAIT_REG_MEM",
|
||||
PM4_REG_RMW => "REG_RMW",
|
||||
PM4_REG_TO_MEM => "REG_TO_MEM",
|
||||
PM4_MEM_WRITE => "MEM_WRITE",
|
||||
PM4_COND_WRITE => "COND_WRITE",
|
||||
PM4_EVENT_WRITE => "EVENT_WRITE",
|
||||
PM4_EVENT_WRITE_SHD => "EVENT_WRITE_SHD",
|
||||
PM4_EVENT_WRITE_EXT => "EVENT_WRITE_EXT",
|
||||
PM4_EVENT_WRITE_ZPD => "EVENT_WRITE_ZPD",
|
||||
PM4_DRAW_INDX => "DRAW_INDX",
|
||||
PM4_DRAW_INDX_2 => "DRAW_INDX_2",
|
||||
PM4_VIZ_QUERY => "VIZ_QUERY",
|
||||
PM4_SET_CONSTANT => "SET_CONSTANT",
|
||||
PM4_SET_CONSTANT2 => "SET_CONSTANT2",
|
||||
PM4_SET_SHADER_CONSTANTS => "SET_SHADER_CONSTANTS",
|
||||
PM4_LOAD_ALU_CONSTANT => "LOAD_ALU_CONSTANT",
|
||||
PM4_LOAD_CONSTANT_CONTEXT => "LOAD_CONSTANT_CONTEXT",
|
||||
PM4_IM_LOAD => "IM_LOAD",
|
||||
PM4_IM_LOAD_IMMEDIATE => "IM_LOAD_IMMEDIATE",
|
||||
PM4_INVALIDATE_STATE => "INVALIDATE_STATE",
|
||||
PM4_INTERRUPT => "INTERRUPT",
|
||||
PM4_SET_SHADER_BASES => "SET_SHADER_BASES",
|
||||
PM4_SET_BIN_MASK_LO => "SET_BIN_MASK_LO",
|
||||
PM4_SET_BIN_MASK_HI => "SET_BIN_MASK_HI",
|
||||
PM4_SET_BIN_SELECT_LO => "SET_BIN_SELECT_LO",
|
||||
PM4_SET_BIN_SELECT_HI => "SET_BIN_SELECT_HI",
|
||||
PM4_SET_BIN_MASK => "SET_BIN_MASK",
|
||||
PM4_SET_BIN_SELECT => "SET_BIN_SELECT",
|
||||
PM4_CONTEXT_UPDATE => "CONTEXT_UPDATE",
|
||||
PM4_XE_SWAP => "XE_SWAP",
|
||||
_ => "UNKNOWN",
|
||||
}
|
||||
}
|
||||
|
||||
/// Decoded single PM4 packet header.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct PacketHeader {
|
||||
pub kind: PacketKind,
|
||||
/// Total size of the packet (including header) in dwords.
|
||||
pub total_dwords: u32,
|
||||
}
|
||||
|
||||
/// Classification of a PM4 packet.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PacketKind {
|
||||
/// Type-0 register-write run. `base_index` is the first register index
|
||||
/// (the register offset / 4). `write_one` is true if all `count` data
|
||||
/// dwords write to the same register.
|
||||
Type0 {
|
||||
base_index: u32,
|
||||
count: u32,
|
||||
write_one: bool,
|
||||
},
|
||||
/// Type-1 two-register write.
|
||||
Type1 { reg_index_1: u32, reg_index_2: u32 },
|
||||
/// Type-2 NOP (a single skipped dword).
|
||||
Type2,
|
||||
/// Type-3 command.
|
||||
Type3 {
|
||||
opcode: u8,
|
||||
count: u32,
|
||||
predicated: bool,
|
||||
},
|
||||
}
|
||||
|
||||
/// Decode a single PM4 packet header.
|
||||
pub fn decode(header: u32) -> PacketHeader {
|
||||
match header >> 30 {
|
||||
0 => {
|
||||
let count = ((header >> 16) & 0x3FFF) + 1;
|
||||
PacketHeader {
|
||||
kind: PacketKind::Type0 {
|
||||
base_index: header & 0x7FFF,
|
||||
count,
|
||||
write_one: (header >> 15) & 1 != 0,
|
||||
},
|
||||
total_dwords: 1 + count,
|
||||
}
|
||||
}
|
||||
1 => PacketHeader {
|
||||
kind: PacketKind::Type1 {
|
||||
reg_index_1: header & 0x7FF,
|
||||
reg_index_2: (header >> 11) & 0x7FF,
|
||||
},
|
||||
total_dwords: 3,
|
||||
},
|
||||
2 => PacketHeader {
|
||||
kind: PacketKind::Type2,
|
||||
total_dwords: 1,
|
||||
},
|
||||
3 => {
|
||||
let count = ((header >> 16) & 0x3FFF) + 1;
|
||||
PacketHeader {
|
||||
kind: PacketKind::Type3 {
|
||||
opcode: ((header >> 8) & 0x7F) as u8,
|
||||
count,
|
||||
predicated: (header & 1) != 0,
|
||||
},
|
||||
total_dwords: 1 + count,
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn type2_is_one_dword() {
|
||||
// 0x80000000 == type 2 header (bits 31:30 = 10)
|
||||
let hdr = decode(0x8000_0000);
|
||||
assert_eq!(hdr.kind, PacketKind::Type2);
|
||||
assert_eq!(hdr.total_dwords, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type0_count_is_inclusive() {
|
||||
// count field (bits 29:16) = 5 → 6 data dwords. base_index = 0x100.
|
||||
// write_one = 0.
|
||||
let hdr = decode((5 << 16) | 0x100);
|
||||
match hdr.kind {
|
||||
PacketKind::Type0 {
|
||||
base_index,
|
||||
count,
|
||||
write_one,
|
||||
} => {
|
||||
assert_eq!(base_index, 0x100);
|
||||
assert_eq!(count, 6);
|
||||
assert!(!write_one);
|
||||
}
|
||||
_ => panic!("expected Type0"),
|
||||
}
|
||||
assert_eq!(hdr.total_dwords, 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type3_swap_packet() {
|
||||
// Build the exact header canary's VdSwap emits:
|
||||
// MakePacketType3(PM4_XE_SWAP, 4) → ((3<<30) | ((4-1)<<16) | (0x64<<8))
|
||||
let hdr_word = (3u32 << 30) | ((4u32 - 1) << 16) | ((PM4_XE_SWAP as u32) << 8);
|
||||
let hdr = decode(hdr_word);
|
||||
match hdr.kind {
|
||||
PacketKind::Type3 {
|
||||
opcode,
|
||||
count,
|
||||
predicated,
|
||||
} => {
|
||||
assert_eq!(opcode, PM4_XE_SWAP);
|
||||
assert_eq!(count, 4);
|
||||
assert!(!predicated);
|
||||
}
|
||||
_ => panic!("expected Type3"),
|
||||
}
|
||||
assert_eq!(hdr.total_dwords, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn opcode_names_are_present_for_common_ops() {
|
||||
assert_eq!(type3_opcode_name(PM4_NOP), "NOP");
|
||||
assert_eq!(type3_opcode_name(PM4_DRAW_INDX), "DRAW_INDX");
|
||||
assert_eq!(type3_opcode_name(PM4_XE_SWAP), "XE_SWAP");
|
||||
assert_eq!(type3_opcode_name(PM4_WAIT_REG_MEM), "WAIT_REG_MEM");
|
||||
assert_eq!(type3_opcode_name(0xFE), "UNKNOWN");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user