First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).
Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.
Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
125 lines
4.6 KiB
Rust
125 lines
4.6 KiB
Rust
//! The "Xenos constants" block the WGSL interpreter consumes per draw.
|
||
//!
|
||
//! Mirrors the Xenos register-file regions that carry the per-draw constant
|
||
//! values shaders reference at runtime:
|
||
//!
|
||
//! | Region | Base | Count | Size |
|
||
//! |--------|------|-------|------|
|
||
//! | ALU | 0x4000 | 512 × vec4<f32> | 8 KB |
|
||
//! | Fetch | 0x4800 | 256 × u32 | 1 KB |
|
||
//! | Bool | 0x4900 | 8 × u32 | 32 B |
|
||
//! | Loop | 0x4908 | 32 × u32 | 128 B |
|
||
//!
|
||
//! Total: ~9.2 KB, well under the 64 KB min uniform buffer size on all wgpu
|
||
//! backends. The `XenosConstantsBlock` is declared `#[repr(C)]` + bytemuck
|
||
//! `Pod` so it can be `bytemuck::bytes_of()`'d directly into a wgpu uniform
|
||
//! buffer. The matching WGSL `struct XenosConstants` lives in
|
||
//! `shaders/xenos_interp.wgsl`.
|
||
|
||
use bytemuck::{Pod, Zeroable};
|
||
|
||
use crate::register_file::RegisterFile;
|
||
|
||
pub const ALU_CONSTANT_COUNT: usize = 512;
|
||
pub const FETCH_CONSTANT_COUNT: usize = 256;
|
||
pub const BOOL_CONSTANT_COUNT: usize = 8;
|
||
pub const LOOP_CONSTANT_COUNT: usize = 32;
|
||
|
||
pub const CONST_BASE_ALU: u32 = 0x4000;
|
||
pub const CONST_BASE_FETCH: u32 = 0x4800;
|
||
pub const CONST_BASE_BOOL: u32 = 0x4900;
|
||
pub const CONST_BASE_LOOP: u32 = 0x4908;
|
||
|
||
/// Per-draw constants block uploaded once to the uniform buffer at
|
||
/// `@group(0) @binding(1)`.
|
||
#[repr(C)]
|
||
#[derive(Clone, Copy)]
|
||
pub struct XenosConstantsBlock {
|
||
pub alu: [[f32; 4]; ALU_CONSTANT_COUNT],
|
||
pub fetch: [u32; FETCH_CONSTANT_COUNT],
|
||
pub bool_consts: [u32; BOOL_CONSTANT_COUNT],
|
||
pub loop_consts: [u32; LOOP_CONSTANT_COUNT],
|
||
}
|
||
|
||
// SAFETY: all fields are Pod arrays of Pod primitives; `#[repr(C)]` fixes
|
||
// the layout. `bytemuck` derives `Pod` only when alignment + padding line
|
||
// up, so manual `unsafe impl` is the right tool here.
|
||
unsafe impl Zeroable for XenosConstantsBlock {}
|
||
unsafe impl Pod for XenosConstantsBlock {}
|
||
|
||
impl Default for XenosConstantsBlock {
|
||
fn default() -> Self {
|
||
Self {
|
||
alu: [[0.0; 4]; ALU_CONSTANT_COUNT],
|
||
fetch: [0; FETCH_CONSTANT_COUNT],
|
||
bool_consts: [0; BOOL_CONSTANT_COUNT],
|
||
loop_consts: [0; LOOP_CONSTANT_COUNT],
|
||
}
|
||
}
|
||
}
|
||
|
||
impl XenosConstantsBlock {
|
||
/// Size in bytes — exposed for tests + wgpu buffer sizing.
|
||
pub const SIZE: usize = std::mem::size_of::<Self>();
|
||
|
||
/// Snapshot the constants from a Xenos `RegisterFile` into a dense,
|
||
/// host-friendly layout the WGSL interpreter expects. ALU constants
|
||
/// (vec4 each) are 4 consecutive registers; fetch constants are u32.
|
||
pub fn snapshot(rf: &RegisterFile) -> Self {
|
||
let mut out = Self::default();
|
||
for i in 0..ALU_CONSTANT_COUNT {
|
||
let base = CONST_BASE_ALU + (i as u32) * 4;
|
||
out.alu[i] = [
|
||
f32::from_bits(rf.read(base)),
|
||
f32::from_bits(rf.read(base + 1)),
|
||
f32::from_bits(rf.read(base + 2)),
|
||
f32::from_bits(rf.read(base + 3)),
|
||
];
|
||
}
|
||
for i in 0..FETCH_CONSTANT_COUNT {
|
||
out.fetch[i] = rf.read(CONST_BASE_FETCH + i as u32);
|
||
}
|
||
for i in 0..BOOL_CONSTANT_COUNT {
|
||
out.bool_consts[i] = rf.read(CONST_BASE_BOOL + i as u32);
|
||
}
|
||
for i in 0..LOOP_CONSTANT_COUNT {
|
||
out.loop_consts[i] = rf.read(CONST_BASE_LOOP + i as u32);
|
||
}
|
||
out
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
/// Layout-sanity: total size is (512·16) + (256·4) + (8·4) + (32·4) =
|
||
/// 8192 + 1024 + 32 + 128 = 9376 bytes. If this number drifts, either
|
||
/// the constant counts changed or the compiler added padding; either
|
||
/// way we want to know at test time because the WGSL struct layout in
|
||
/// `xenos_interp.wgsl` depends on it.
|
||
#[test]
|
||
fn xenos_constants_block_size_is_stable() {
|
||
assert_eq!(XenosConstantsBlock::SIZE, 9376);
|
||
}
|
||
|
||
#[test]
|
||
fn snapshot_roundtrip_from_register_file() {
|
||
let mut rf = RegisterFile::new();
|
||
// Write a recognisable pattern to alu[0] = (1.0, 2.0, 3.0, 4.0)
|
||
rf.write(CONST_BASE_ALU + 0, f32::to_bits(1.0));
|
||
rf.write(CONST_BASE_ALU + 1, f32::to_bits(2.0));
|
||
rf.write(CONST_BASE_ALU + 2, f32::to_bits(3.0));
|
||
rf.write(CONST_BASE_ALU + 3, f32::to_bits(4.0));
|
||
rf.write(CONST_BASE_FETCH + 5, 0xDEAD_BEEF);
|
||
rf.write(CONST_BASE_BOOL, 0x1234);
|
||
rf.write(CONST_BASE_LOOP + 3, 0x5678);
|
||
|
||
let snap = XenosConstantsBlock::snapshot(&rf);
|
||
assert_eq!(snap.alu[0], [1.0, 2.0, 3.0, 4.0]);
|
||
assert_eq!(snap.fetch[5], 0xDEAD_BEEF);
|
||
assert_eq!(snap.bool_consts[0], 0x1234);
|
||
assert_eq!(snap.loop_consts[3], 0x5678);
|
||
}
|
||
}
|