xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)

First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).

Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.

Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 16:29:38 +02:00
parent 5f0d6487ea
commit 79eb52c378
24 changed files with 10984 additions and 18 deletions

View File

@@ -0,0 +1,124 @@
//! The "Xenos constants" block the WGSL interpreter consumes per draw.
//!
//! Mirrors the Xenos register-file regions that carry the per-draw constant
//! values shaders reference at runtime:
//!
//! | Region | Base | Count | Size |
//! |--------|------|-------|------|
//! | ALU | 0x4000 | 512 × vec4<f32> | 8 KB |
//! | Fetch | 0x4800 | 256 × u32 | 1 KB |
//! | Bool | 0x4900 | 8 × u32 | 32 B |
//! | Loop | 0x4908 | 32 × u32 | 128 B |
//!
//! Total: ~9.2 KB, well under the 64 KB min uniform buffer size on all wgpu
//! backends. The `XenosConstantsBlock` is declared `#[repr(C)]` + bytemuck
//! `Pod` so it can be `bytemuck::bytes_of()`'d directly into a wgpu uniform
//! buffer. The matching WGSL `struct XenosConstants` lives in
//! `shaders/xenos_interp.wgsl`.
use bytemuck::{Pod, Zeroable};
use crate::register_file::RegisterFile;
pub const ALU_CONSTANT_COUNT: usize = 512;
pub const FETCH_CONSTANT_COUNT: usize = 256;
pub const BOOL_CONSTANT_COUNT: usize = 8;
pub const LOOP_CONSTANT_COUNT: usize = 32;
pub const CONST_BASE_ALU: u32 = 0x4000;
pub const CONST_BASE_FETCH: u32 = 0x4800;
pub const CONST_BASE_BOOL: u32 = 0x4900;
pub const CONST_BASE_LOOP: u32 = 0x4908;
/// Per-draw constants block uploaded once to the uniform buffer at
/// `@group(0) @binding(1)`.
#[repr(C)]
#[derive(Clone, Copy)]
pub struct XenosConstantsBlock {
pub alu: [[f32; 4]; ALU_CONSTANT_COUNT],
pub fetch: [u32; FETCH_CONSTANT_COUNT],
pub bool_consts: [u32; BOOL_CONSTANT_COUNT],
pub loop_consts: [u32; LOOP_CONSTANT_COUNT],
}
// SAFETY: all fields are Pod arrays of Pod primitives; `#[repr(C)]` fixes
// the layout. `bytemuck` derives `Pod` only when alignment + padding line
// up, so manual `unsafe impl` is the right tool here.
unsafe impl Zeroable for XenosConstantsBlock {}
unsafe impl Pod for XenosConstantsBlock {}
impl Default for XenosConstantsBlock {
fn default() -> Self {
Self {
alu: [[0.0; 4]; ALU_CONSTANT_COUNT],
fetch: [0; FETCH_CONSTANT_COUNT],
bool_consts: [0; BOOL_CONSTANT_COUNT],
loop_consts: [0; LOOP_CONSTANT_COUNT],
}
}
}
impl XenosConstantsBlock {
/// Size in bytes — exposed for tests + wgpu buffer sizing.
pub const SIZE: usize = std::mem::size_of::<Self>();
/// Snapshot the constants from a Xenos `RegisterFile` into a dense,
/// host-friendly layout the WGSL interpreter expects. ALU constants
/// (vec4 each) are 4 consecutive registers; fetch constants are u32.
pub fn snapshot(rf: &RegisterFile) -> Self {
let mut out = Self::default();
for i in 0..ALU_CONSTANT_COUNT {
let base = CONST_BASE_ALU + (i as u32) * 4;
out.alu[i] = [
f32::from_bits(rf.read(base)),
f32::from_bits(rf.read(base + 1)),
f32::from_bits(rf.read(base + 2)),
f32::from_bits(rf.read(base + 3)),
];
}
for i in 0..FETCH_CONSTANT_COUNT {
out.fetch[i] = rf.read(CONST_BASE_FETCH + i as u32);
}
for i in 0..BOOL_CONSTANT_COUNT {
out.bool_consts[i] = rf.read(CONST_BASE_BOOL + i as u32);
}
for i in 0..LOOP_CONSTANT_COUNT {
out.loop_consts[i] = rf.read(CONST_BASE_LOOP + i as u32);
}
out
}
}
#[cfg(test)]
mod tests {
use super::*;
/// Layout-sanity: total size is (512·16) + (256·4) + (8·4) + (32·4) =
/// 8192 + 1024 + 32 + 128 = 9376 bytes. If this number drifts, either
/// the constant counts changed or the compiler added padding; either
/// way we want to know at test time because the WGSL struct layout in
/// `xenos_interp.wgsl` depends on it.
#[test]
fn xenos_constants_block_size_is_stable() {
assert_eq!(XenosConstantsBlock::SIZE, 9376);
}
#[test]
fn snapshot_roundtrip_from_register_file() {
let mut rf = RegisterFile::new();
// Write a recognisable pattern to alu[0] = (1.0, 2.0, 3.0, 4.0)
rf.write(CONST_BASE_ALU + 0, f32::to_bits(1.0));
rf.write(CONST_BASE_ALU + 1, f32::to_bits(2.0));
rf.write(CONST_BASE_ALU + 2, f32::to_bits(3.0));
rf.write(CONST_BASE_ALU + 3, f32::to_bits(4.0));
rf.write(CONST_BASE_FETCH + 5, 0xDEAD_BEEF);
rf.write(CONST_BASE_BOOL, 0x1234);
rf.write(CONST_BASE_LOOP + 3, 0x5678);
let snap = XenosConstantsBlock::snapshot(&rf);
assert_eq!(snap.alu[0], [1.0, 2.0, 3.0, 4.0]);
assert_eq!(snap.fetch[5], 0xDEAD_BEEF);
assert_eq!(snap.bool_consts[0], 0x1234);
assert_eq!(snap.loop_consts[3], 0x5678);
}
}