xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)
First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).
Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.
Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
206
crates/xenia-gpu/src/ucode/alu.rs
Normal file
206
crates/xenia-gpu/src/ucode/alu.rs
Normal file
@@ -0,0 +1,206 @@
|
||||
//! Xenos ALU (vector + scalar) instruction decoder.
|
||||
//!
|
||||
//! An ALU instruction is 96 bits = 3 dwords. The three dwords encode:
|
||||
//! - word0: operand modifier flags + destination info
|
||||
//! - word1: source register / swizzle fields
|
||||
//! - word2: opcode + write mask + export target
|
||||
//!
|
||||
//! See `ucode.h:900-1400` for the full field map. This decoder captures the
|
||||
//! minimal shape the uber-shader needs; flags we don't interpret yet are
|
||||
//! retained as raw bits in `raw` for downstream inspection.
|
||||
|
||||
/// Decoded ALU instruction.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct AluInstruction {
|
||||
/// Vector ALU opcode (bits 0..6 of word2 in canary's layout).
|
||||
pub vector_opcode: u8,
|
||||
/// Scalar ALU opcode (bits 7..13 of word2).
|
||||
pub scalar_opcode: u8,
|
||||
/// Destination register index for vector result (7 bits).
|
||||
pub vector_dest: u8,
|
||||
/// Destination register index for scalar result (7 bits).
|
||||
pub scalar_dest: u8,
|
||||
/// 4-bit write mask for the vector result (x/y/z/w).
|
||||
pub vector_write_mask: u8,
|
||||
/// 4-bit write mask for the scalar result.
|
||||
pub scalar_write_mask: u8,
|
||||
/// Set when the instruction should write to the export bank (position,
|
||||
/// interpolators, color, etc.) instead of the general register file.
|
||||
pub vector_dest_is_export: bool,
|
||||
/// Selects `ps` (previous scalar result) as the scalar operand when set.
|
||||
pub scalar_src_is_ps: bool,
|
||||
/// Source register indices (at most 3 for vector ops).
|
||||
pub src_a: u8,
|
||||
pub src_b: u8,
|
||||
pub src_c: u8,
|
||||
/// Set when the instruction is predicated; skipped if the predicate
|
||||
/// doesn't match `predicate_condition`.
|
||||
pub predicated: bool,
|
||||
pub predicate_condition: bool,
|
||||
/// Raw dwords — preserved verbatim so the translator / interpreter can
|
||||
/// reach into fields we haven't parsed explicitly yet.
|
||||
pub raw: [u32; 3],
|
||||
}
|
||||
|
||||
/// Decode a 3-dword ALU triple.
|
||||
pub fn decode_alu(words: [u32; 3]) -> AluInstruction {
|
||||
let w0 = words[0];
|
||||
let _w1 = words[1];
|
||||
let w2 = words[2];
|
||||
AluInstruction {
|
||||
vector_opcode: (w2 & 0x3F) as u8,
|
||||
scalar_opcode: ((w2 >> 6) & 0x3F) as u8,
|
||||
vector_dest: ((w2 >> 16) & 0x7F) as u8,
|
||||
scalar_dest: ((w2 >> 24) & 0x7F) as u8,
|
||||
vector_write_mask: ((w2 >> 12) & 0xF) as u8,
|
||||
scalar_write_mask: ((w2 >> 8) & 0xF) as u8,
|
||||
vector_dest_is_export: ((w2 >> 23) & 1) != 0,
|
||||
scalar_src_is_ps: ((w0 >> 26) & 1) != 0,
|
||||
src_a: (w0 & 0xFF) as u8,
|
||||
src_b: ((w0 >> 8) & 0xFF) as u8,
|
||||
src_c: ((w0 >> 16) & 0xFF) as u8,
|
||||
predicated: ((w0 >> 27) & 1) != 0,
|
||||
predicate_condition: ((w0 >> 28) & 1) != 0,
|
||||
raw: words,
|
||||
}
|
||||
}
|
||||
|
||||
/// Vector ALU opcodes we reference by name. Values match canary's
|
||||
/// `AluVectorOpcode` enum in `ucode.h:1354`.
|
||||
pub mod vop {
|
||||
pub const ADD: u8 = 0;
|
||||
pub const MUL: u8 = 1;
|
||||
pub const MAX: u8 = 2;
|
||||
pub const MIN: u8 = 3;
|
||||
pub const SEQ: u8 = 4;
|
||||
pub const SGT: u8 = 5;
|
||||
pub const SGE: u8 = 6;
|
||||
pub const SNE: u8 = 7;
|
||||
pub const FRC: u8 = 8;
|
||||
pub const TRUNC: u8 = 9;
|
||||
pub const FLOOR: u8 = 10;
|
||||
pub const MAD: u8 = 11;
|
||||
pub const CND_EQ: u8 = 12;
|
||||
pub const CND_GE: u8 = 13;
|
||||
pub const CND_GT: u8 = 14;
|
||||
pub const DOT4: u8 = 15;
|
||||
pub const DOT3: u8 = 16;
|
||||
pub const DOT2_ADD: u8 = 17;
|
||||
pub const CUBE: u8 = 18;
|
||||
pub const MAX4: u8 = 19;
|
||||
pub const SETP_EQ_PUSH: u8 = 20;
|
||||
pub const SETP_NE_PUSH: u8 = 21;
|
||||
pub const SETP_GT_PUSH: u8 = 22;
|
||||
pub const SETP_GE_PUSH: u8 = 23;
|
||||
pub const KILL_EQ: u8 = 24;
|
||||
pub const KILL_GT: u8 = 25;
|
||||
pub const KILL_GE: u8 = 26;
|
||||
pub const KILL_NE: u8 = 27;
|
||||
pub const DST: u8 = 28;
|
||||
pub const MAX_A: u8 = 29;
|
||||
}
|
||||
|
||||
/// Scalar ALU opcodes. Values match canary's `AluScalarOpcode` enum in
|
||||
/// `ucode.h:1001`.
|
||||
pub mod sop {
|
||||
pub const ADDS: u8 = 0;
|
||||
pub const ADDS_PREV: u8 = 1;
|
||||
pub const MULS: u8 = 2;
|
||||
pub const MULS_PREV: u8 = 3;
|
||||
pub const MULS_PREV2: u8 = 4;
|
||||
pub const MAXS: u8 = 5;
|
||||
pub const MINS: u8 = 6;
|
||||
pub const SEQS: u8 = 7;
|
||||
pub const SGTS: u8 = 8;
|
||||
pub const SGES: u8 = 9;
|
||||
pub const SNES: u8 = 10;
|
||||
pub const FRCS: u8 = 11;
|
||||
pub const TRUNCS: u8 = 12;
|
||||
pub const FLOORS: u8 = 13;
|
||||
pub const EXP: u8 = 14;
|
||||
pub const LOGC: u8 = 15;
|
||||
pub const LOG: u8 = 16;
|
||||
pub const RCPC: u8 = 17;
|
||||
pub const RCPF: u8 = 18;
|
||||
pub const RCP: u8 = 19;
|
||||
pub const RSQC: u8 = 20;
|
||||
pub const RSQF: u8 = 21;
|
||||
pub const RSQ: u8 = 22;
|
||||
pub const MAXAS: u8 = 23;
|
||||
pub const MAXASF: u8 = 24;
|
||||
pub const SUBS: u8 = 25;
|
||||
pub const SUBS_PREV: u8 = 26;
|
||||
pub const SETP_EQ: u8 = 27;
|
||||
pub const SETP_NE: u8 = 28;
|
||||
pub const SETP_GT: u8 = 29;
|
||||
pub const SETP_GE: u8 = 30;
|
||||
pub const SETP_INV: u8 = 31;
|
||||
pub const SETP_POP: u8 = 32;
|
||||
pub const SETP_CLR: u8 = 33;
|
||||
pub const SETP_RSTR: u8 = 34;
|
||||
pub const KILLS_EQ: u8 = 35;
|
||||
pub const KILLS_GT: u8 = 36;
|
||||
pub const KILLS_GE: u8 = 37;
|
||||
pub const KILLS_NE: u8 = 38;
|
||||
pub const KILLS_ONE: u8 = 39;
|
||||
pub const SQRT: u8 = 40;
|
||||
pub const MULSC0: u8 = 42;
|
||||
pub const MULSC1: u8 = 43;
|
||||
pub const ADDSC0: u8 = 44;
|
||||
pub const ADDSC1: u8 = 45;
|
||||
pub const SUBSC0: u8 = 46;
|
||||
pub const SUBSC1: u8 = 47;
|
||||
pub const SIN: u8 = 48;
|
||||
pub const COS: u8 = 49;
|
||||
pub const RETAIN_PREV: u8 = 50;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Regression: our table previously drifted from canary's values (e.g.
|
||||
/// `MAXS=6` when canary says 5, shifting everything through SQRT). Pin
|
||||
/// the most-often-used scalar + vector opcodes here.
|
||||
#[test]
|
||||
fn opcodes_match_canary_values() {
|
||||
// Scalar.
|
||||
assert_eq!(sop::MAXS, 5);
|
||||
assert_eq!(sop::MINS, 6);
|
||||
assert_eq!(sop::SEQS, 7);
|
||||
assert_eq!(sop::EXP, 14);
|
||||
assert_eq!(sop::LOG, 16);
|
||||
assert_eq!(sop::RCP, 19);
|
||||
assert_eq!(sop::RSQ, 22);
|
||||
assert_eq!(sop::SUBS, 25);
|
||||
assert_eq!(sop::SETP_EQ, 27);
|
||||
assert_eq!(sop::KILLS_EQ, 35);
|
||||
assert_eq!(sop::SQRT, 40);
|
||||
assert_eq!(sop::SIN, 48);
|
||||
assert_eq!(sop::RETAIN_PREV, 50);
|
||||
// Vector.
|
||||
assert_eq!(vop::SNE, 7);
|
||||
assert_eq!(vop::CND_EQ, 12);
|
||||
assert_eq!(vop::MAX4, 19);
|
||||
assert_eq!(vop::KILL_EQ, 24);
|
||||
assert_eq!(vop::DST, 28);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_extracts_opcodes_and_dests() {
|
||||
// Build a minimal ALU word:
|
||||
// vector_opcode = ADD (0), scalar_opcode = RCP (22),
|
||||
// vector_dest = 3, scalar_dest = 7, vector_write_mask = 0xF
|
||||
let w2 = (vop::ADD as u32)
|
||||
| ((sop::RCP as u32) << 6)
|
||||
| (0xF << 12) // vector_write_mask
|
||||
| (3u32 << 16) // vector_dest
|
||||
| (7u32 << 24); // scalar_dest
|
||||
let alu = decode_alu([0, 0, w2]);
|
||||
assert_eq!(alu.vector_opcode, vop::ADD);
|
||||
assert_eq!(alu.scalar_opcode, sop::RCP);
|
||||
assert_eq!(alu.vector_dest, 3);
|
||||
assert_eq!(alu.scalar_dest, 7);
|
||||
assert_eq!(alu.vector_write_mask, 0xF);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user