xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)

First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).

Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.

Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 16:29:38 +02:00
parent 5f0d6487ea
commit 79eb52c378
24 changed files with 10984 additions and 18 deletions

View File

@@ -0,0 +1,173 @@
//! Xenos control-flow clause decoder.
//!
//! A shader's CF block is a sequence of 48-bit clauses packed two-per-
//! three-dword row. Each clause encodes an opcode and type-specific fields
//! (exec addr/count, loop start/end, branch target, etc.).
//!
//! Spec at `xenia-canary/src/xenia/gpu/ucode.h:87-256`. We cover the subset
//! the uber-shader needs: `Exec*`, `Loop*`, `Alloc`, `Jmp`, `Call/Ret`,
//! `Exit`. Unknown opcodes are classified as `Unknown { opcode }` so the
//! translator can log + degrade.
/// Parsed representation of one CF clause.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ControlFlowInstruction {
/// `kExec` / `kExecEnd` — execute a range of ALU/fetch instructions.
Exec {
/// Instruction-block dword index where this clause's instructions start,
/// expressed in **triple units** (each inst = 3 dwords).
address: u32,
/// Number of triples to execute.
count: u32,
/// The ALU-vs-fetch sequence bitmap (2 bits per instruction).
sequence: u32,
/// True when this clause ends the shader.
is_end: bool,
/// True if predicated; skip when predicate != predicate_condition.
predicated: bool,
predicate_condition: bool,
},
/// `kLoopStart` — begin a `aL` loop referencing a loop constant.
LoopStart { address: u32, loop_id: u32 },
/// `kLoopEnd` — close the loop; `address` points at the matching start.
LoopEnd { address: u32, loop_id: u32 },
/// `kCondJmp` — conditional jump to another CF index.
CondJmp {
target: u32,
predicated: bool,
predicate_condition: bool,
},
/// `kCondCall` — call into another CF subroutine.
CondCall { target: u32 },
/// `kReturn` — return from subroutine.
Return,
/// `kAlloc` — pre-allocate export registers (position, interpolators, colors).
Alloc { size: u32, kind: AllocKind },
/// Exit the shader (terminal).
Exit,
/// Unknown / unhandled opcode.
Unknown { opcode: u8 },
}
/// Export target types for `kAlloc` clauses.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AllocKind {
Position,
Interpolators,
Colors,
Memexport,
Other,
}
impl AllocKind {
fn from_bits(b: u32) -> Self {
match b & 0x7 {
0 => AllocKind::Position,
1 => AllocKind::Interpolators,
2 => AllocKind::Colors,
3 => AllocKind::Memexport,
_ => AllocKind::Other,
}
}
}
/// Decode one row (three consecutive CF dwords) into two CF clauses.
///
/// Word layout per canary (`ucode.h:218-256`):
/// - word0 + lo16(word1) → CF_A's 48-bit payload
/// - hi16(word1) + word2 → CF_B's 48-bit payload
///
/// The opcode lives in the top 4 bits of the 48-bit payload (= bits 44..47).
pub fn decode_cf_pair(word0: u32, word1: u32, word2: u32) -> (ControlFlowInstruction, ControlFlowInstruction) {
// Build each 48-bit value as u64; LE within the clause.
let a = (word0 as u64) | ((word1 as u64 & 0xFFFF) << 32);
let b = ((word1 as u64 >> 16) & 0xFFFF) | ((word2 as u64) << 16);
(decode_single(a), decode_single(b))
}
fn decode_single(payload: u64) -> ControlFlowInstruction {
// Top 4 bits of the 48-bit payload.
let opcode = ((payload >> 44) & 0xF) as u8;
// Predicate bit + condition live at the 28..30 range for exec/jmp. Rough
// extraction — good enough for the interpreter, which logs unknowns.
let predicated = ((payload >> 28) & 1) != 0;
let predicate_condition = ((payload >> 29) & 1) != 0;
match opcode {
0 => ControlFlowInstruction::Exec {
address: (payload & 0xFFF) as u32,
count: ((payload >> 12) & 0x7) as u32,
sequence: ((payload >> 16) & 0xFFF) as u32,
is_end: false,
predicated,
predicate_condition,
},
1 => ControlFlowInstruction::Exit,
2 => ControlFlowInstruction::Exec {
address: (payload & 0xFFF) as u32,
count: ((payload >> 12) & 0x7) as u32,
sequence: ((payload >> 16) & 0xFFF) as u32,
is_end: true,
predicated,
predicate_condition,
},
6 => ControlFlowInstruction::LoopStart {
address: (payload & 0x3FF) as u32,
loop_id: ((payload >> 16) & 0x1F) as u32,
},
7 => ControlFlowInstruction::LoopEnd {
address: (payload & 0x3FF) as u32,
loop_id: ((payload >> 16) & 0x1F) as u32,
},
8 => ControlFlowInstruction::CondCall {
target: (payload & 0x3FF) as u32,
},
9 => ControlFlowInstruction::Return,
10 => ControlFlowInstruction::CondJmp {
target: (payload & 0x3FF) as u32,
predicated,
predicate_condition,
},
12 => ControlFlowInstruction::Alloc {
size: (payload & 0x7) as u32,
kind: AllocKind::from_bits(((payload >> 4) & 0x7) as u32),
},
other => ControlFlowInstruction::Unknown { opcode: other },
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn opcode_exit_decodes() {
// opcode 1 (Exit) in bits 44..47 of A's 48-bit payload.
let payload: u64 = 1u64 << 44;
let (hi, lo) = ((payload & 0xFFFF_FFFF) as u32, ((payload >> 32) & 0xFFFF) as u32);
let cf = decode_cf_pair(hi, lo, 0).0;
assert_eq!(cf, ControlFlowInstruction::Exit);
}
#[test]
fn opcode_exec_end_carries_address_count() {
// opcode 2 (ExecEnd), address=4, count=2, sequence=0.
let payload: u64 = (2u64 << 44) | (2u64 << 12) | 4;
let hi = (payload & 0xFFFF_FFFF) as u32;
let lo = ((payload >> 32) & 0xFFFF) as u32;
let cf = decode_cf_pair(hi, lo, 0).0;
match cf {
ControlFlowInstruction::Exec {
address,
count,
is_end,
..
} => {
assert_eq!(address, 4);
assert_eq!(count, 2);
assert!(is_end);
}
other => panic!("expected Exec, got {other:?}"),
}
}
}