xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)
First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).
Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.
Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
173
crates/xenia-gpu/src/ucode/control_flow.rs
Normal file
173
crates/xenia-gpu/src/ucode/control_flow.rs
Normal file
@@ -0,0 +1,173 @@
|
||||
//! Xenos control-flow clause decoder.
|
||||
//!
|
||||
//! A shader's CF block is a sequence of 48-bit clauses packed two-per-
|
||||
//! three-dword row. Each clause encodes an opcode and type-specific fields
|
||||
//! (exec addr/count, loop start/end, branch target, etc.).
|
||||
//!
|
||||
//! Spec at `xenia-canary/src/xenia/gpu/ucode.h:87-256`. We cover the subset
|
||||
//! the uber-shader needs: `Exec*`, `Loop*`, `Alloc`, `Jmp`, `Call/Ret`,
|
||||
//! `Exit`. Unknown opcodes are classified as `Unknown { opcode }` so the
|
||||
//! translator can log + degrade.
|
||||
|
||||
/// Parsed representation of one CF clause.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ControlFlowInstruction {
|
||||
/// `kExec` / `kExecEnd` — execute a range of ALU/fetch instructions.
|
||||
Exec {
|
||||
/// Instruction-block dword index where this clause's instructions start,
|
||||
/// expressed in **triple units** (each inst = 3 dwords).
|
||||
address: u32,
|
||||
/// Number of triples to execute.
|
||||
count: u32,
|
||||
/// The ALU-vs-fetch sequence bitmap (2 bits per instruction).
|
||||
sequence: u32,
|
||||
/// True when this clause ends the shader.
|
||||
is_end: bool,
|
||||
/// True if predicated; skip when predicate != predicate_condition.
|
||||
predicated: bool,
|
||||
predicate_condition: bool,
|
||||
},
|
||||
/// `kLoopStart` — begin a `aL` loop referencing a loop constant.
|
||||
LoopStart { address: u32, loop_id: u32 },
|
||||
/// `kLoopEnd` — close the loop; `address` points at the matching start.
|
||||
LoopEnd { address: u32, loop_id: u32 },
|
||||
/// `kCondJmp` — conditional jump to another CF index.
|
||||
CondJmp {
|
||||
target: u32,
|
||||
predicated: bool,
|
||||
predicate_condition: bool,
|
||||
},
|
||||
/// `kCondCall` — call into another CF subroutine.
|
||||
CondCall { target: u32 },
|
||||
/// `kReturn` — return from subroutine.
|
||||
Return,
|
||||
/// `kAlloc` — pre-allocate export registers (position, interpolators, colors).
|
||||
Alloc { size: u32, kind: AllocKind },
|
||||
/// Exit the shader (terminal).
|
||||
Exit,
|
||||
/// Unknown / unhandled opcode.
|
||||
Unknown { opcode: u8 },
|
||||
}
|
||||
|
||||
/// Export target types for `kAlloc` clauses.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum AllocKind {
|
||||
Position,
|
||||
Interpolators,
|
||||
Colors,
|
||||
Memexport,
|
||||
Other,
|
||||
}
|
||||
|
||||
impl AllocKind {
|
||||
fn from_bits(b: u32) -> Self {
|
||||
match b & 0x7 {
|
||||
0 => AllocKind::Position,
|
||||
1 => AllocKind::Interpolators,
|
||||
2 => AllocKind::Colors,
|
||||
3 => AllocKind::Memexport,
|
||||
_ => AllocKind::Other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Decode one row (three consecutive CF dwords) into two CF clauses.
|
||||
///
|
||||
/// Word layout per canary (`ucode.h:218-256`):
|
||||
/// - word0 + lo16(word1) → CF_A's 48-bit payload
|
||||
/// - hi16(word1) + word2 → CF_B's 48-bit payload
|
||||
///
|
||||
/// The opcode lives in the top 4 bits of the 48-bit payload (= bits 44..47).
|
||||
pub fn decode_cf_pair(word0: u32, word1: u32, word2: u32) -> (ControlFlowInstruction, ControlFlowInstruction) {
|
||||
// Build each 48-bit value as u64; LE within the clause.
|
||||
let a = (word0 as u64) | ((word1 as u64 & 0xFFFF) << 32);
|
||||
let b = ((word1 as u64 >> 16) & 0xFFFF) | ((word2 as u64) << 16);
|
||||
(decode_single(a), decode_single(b))
|
||||
}
|
||||
|
||||
fn decode_single(payload: u64) -> ControlFlowInstruction {
|
||||
// Top 4 bits of the 48-bit payload.
|
||||
let opcode = ((payload >> 44) & 0xF) as u8;
|
||||
// Predicate bit + condition live at the 28..30 range for exec/jmp. Rough
|
||||
// extraction — good enough for the interpreter, which logs unknowns.
|
||||
let predicated = ((payload >> 28) & 1) != 0;
|
||||
let predicate_condition = ((payload >> 29) & 1) != 0;
|
||||
|
||||
match opcode {
|
||||
0 => ControlFlowInstruction::Exec {
|
||||
address: (payload & 0xFFF) as u32,
|
||||
count: ((payload >> 12) & 0x7) as u32,
|
||||
sequence: ((payload >> 16) & 0xFFF) as u32,
|
||||
is_end: false,
|
||||
predicated,
|
||||
predicate_condition,
|
||||
},
|
||||
1 => ControlFlowInstruction::Exit,
|
||||
2 => ControlFlowInstruction::Exec {
|
||||
address: (payload & 0xFFF) as u32,
|
||||
count: ((payload >> 12) & 0x7) as u32,
|
||||
sequence: ((payload >> 16) & 0xFFF) as u32,
|
||||
is_end: true,
|
||||
predicated,
|
||||
predicate_condition,
|
||||
},
|
||||
6 => ControlFlowInstruction::LoopStart {
|
||||
address: (payload & 0x3FF) as u32,
|
||||
loop_id: ((payload >> 16) & 0x1F) as u32,
|
||||
},
|
||||
7 => ControlFlowInstruction::LoopEnd {
|
||||
address: (payload & 0x3FF) as u32,
|
||||
loop_id: ((payload >> 16) & 0x1F) as u32,
|
||||
},
|
||||
8 => ControlFlowInstruction::CondCall {
|
||||
target: (payload & 0x3FF) as u32,
|
||||
},
|
||||
9 => ControlFlowInstruction::Return,
|
||||
10 => ControlFlowInstruction::CondJmp {
|
||||
target: (payload & 0x3FF) as u32,
|
||||
predicated,
|
||||
predicate_condition,
|
||||
},
|
||||
12 => ControlFlowInstruction::Alloc {
|
||||
size: (payload & 0x7) as u32,
|
||||
kind: AllocKind::from_bits(((payload >> 4) & 0x7) as u32),
|
||||
},
|
||||
other => ControlFlowInstruction::Unknown { opcode: other },
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn opcode_exit_decodes() {
|
||||
// opcode 1 (Exit) in bits 44..47 of A's 48-bit payload.
|
||||
let payload: u64 = 1u64 << 44;
|
||||
let (hi, lo) = ((payload & 0xFFFF_FFFF) as u32, ((payload >> 32) & 0xFFFF) as u32);
|
||||
let cf = decode_cf_pair(hi, lo, 0).0;
|
||||
assert_eq!(cf, ControlFlowInstruction::Exit);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn opcode_exec_end_carries_address_count() {
|
||||
// opcode 2 (ExecEnd), address=4, count=2, sequence=0.
|
||||
let payload: u64 = (2u64 << 44) | (2u64 << 12) | 4;
|
||||
let hi = (payload & 0xFFFF_FFFF) as u32;
|
||||
let lo = ((payload >> 32) & 0xFFFF) as u32;
|
||||
let cf = decode_cf_pair(hi, lo, 0).0;
|
||||
match cf {
|
||||
ControlFlowInstruction::Exec {
|
||||
address,
|
||||
count,
|
||||
is_end,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(address, 4);
|
||||
assert_eq!(count, 2);
|
||||
assert!(is_end);
|
||||
}
|
||||
other => panic!("expected Exec, got {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user