Files
xenia-rs/crates/xenia-gpu/src/xenos_constants.rs
MechaCat02 79eb52c378 xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)
First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).

Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.

Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 16:29:38 +02:00

125 lines
4.6 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! The "Xenos constants" block the WGSL interpreter consumes per draw.
//!
//! Mirrors the Xenos register-file regions that carry the per-draw constant
//! values shaders reference at runtime:
//!
//! | Region | Base | Count | Size |
//! |--------|------|-------|------|
//! | ALU | 0x4000 | 512 × vec4<f32> | 8 KB |
//! | Fetch | 0x4800 | 256 × u32 | 1 KB |
//! | Bool | 0x4900 | 8 × u32 | 32 B |
//! | Loop | 0x4908 | 32 × u32 | 128 B |
//!
//! Total: ~9.2 KB, well under the 64 KB min uniform buffer size on all wgpu
//! backends. The `XenosConstantsBlock` is declared `#[repr(C)]` + bytemuck
//! `Pod` so it can be `bytemuck::bytes_of()`'d directly into a wgpu uniform
//! buffer. The matching WGSL `struct XenosConstants` lives in
//! `shaders/xenos_interp.wgsl`.
use bytemuck::{Pod, Zeroable};
use crate::register_file::RegisterFile;
pub const ALU_CONSTANT_COUNT: usize = 512;
pub const FETCH_CONSTANT_COUNT: usize = 256;
pub const BOOL_CONSTANT_COUNT: usize = 8;
pub const LOOP_CONSTANT_COUNT: usize = 32;
pub const CONST_BASE_ALU: u32 = 0x4000;
pub const CONST_BASE_FETCH: u32 = 0x4800;
pub const CONST_BASE_BOOL: u32 = 0x4900;
pub const CONST_BASE_LOOP: u32 = 0x4908;
/// Per-draw constants block uploaded once to the uniform buffer at
/// `@group(0) @binding(1)`.
#[repr(C)]
#[derive(Clone, Copy)]
pub struct XenosConstantsBlock {
pub alu: [[f32; 4]; ALU_CONSTANT_COUNT],
pub fetch: [u32; FETCH_CONSTANT_COUNT],
pub bool_consts: [u32; BOOL_CONSTANT_COUNT],
pub loop_consts: [u32; LOOP_CONSTANT_COUNT],
}
// SAFETY: all fields are Pod arrays of Pod primitives; `#[repr(C)]` fixes
// the layout. `bytemuck` derives `Pod` only when alignment + padding line
// up, so manual `unsafe impl` is the right tool here.
unsafe impl Zeroable for XenosConstantsBlock {}
unsafe impl Pod for XenosConstantsBlock {}
impl Default for XenosConstantsBlock {
fn default() -> Self {
Self {
alu: [[0.0; 4]; ALU_CONSTANT_COUNT],
fetch: [0; FETCH_CONSTANT_COUNT],
bool_consts: [0; BOOL_CONSTANT_COUNT],
loop_consts: [0; LOOP_CONSTANT_COUNT],
}
}
}
impl XenosConstantsBlock {
/// Size in bytes — exposed for tests + wgpu buffer sizing.
pub const SIZE: usize = std::mem::size_of::<Self>();
/// Snapshot the constants from a Xenos `RegisterFile` into a dense,
/// host-friendly layout the WGSL interpreter expects. ALU constants
/// (vec4 each) are 4 consecutive registers; fetch constants are u32.
pub fn snapshot(rf: &RegisterFile) -> Self {
let mut out = Self::default();
for i in 0..ALU_CONSTANT_COUNT {
let base = CONST_BASE_ALU + (i as u32) * 4;
out.alu[i] = [
f32::from_bits(rf.read(base)),
f32::from_bits(rf.read(base + 1)),
f32::from_bits(rf.read(base + 2)),
f32::from_bits(rf.read(base + 3)),
];
}
for i in 0..FETCH_CONSTANT_COUNT {
out.fetch[i] = rf.read(CONST_BASE_FETCH + i as u32);
}
for i in 0..BOOL_CONSTANT_COUNT {
out.bool_consts[i] = rf.read(CONST_BASE_BOOL + i as u32);
}
for i in 0..LOOP_CONSTANT_COUNT {
out.loop_consts[i] = rf.read(CONST_BASE_LOOP + i as u32);
}
out
}
}
#[cfg(test)]
mod tests {
use super::*;
/// Layout-sanity: total size is (512·16) + (256·4) + (8·4) + (32·4) =
/// 8192 + 1024 + 32 + 128 = 9376 bytes. If this number drifts, either
/// the constant counts changed or the compiler added padding; either
/// way we want to know at test time because the WGSL struct layout in
/// `xenos_interp.wgsl` depends on it.
#[test]
fn xenos_constants_block_size_is_stable() {
assert_eq!(XenosConstantsBlock::SIZE, 9376);
}
#[test]
fn snapshot_roundtrip_from_register_file() {
let mut rf = RegisterFile::new();
// Write a recognisable pattern to alu[0] = (1.0, 2.0, 3.0, 4.0)
rf.write(CONST_BASE_ALU + 0, f32::to_bits(1.0));
rf.write(CONST_BASE_ALU + 1, f32::to_bits(2.0));
rf.write(CONST_BASE_ALU + 2, f32::to_bits(3.0));
rf.write(CONST_BASE_ALU + 3, f32::to_bits(4.0));
rf.write(CONST_BASE_FETCH + 5, 0xDEAD_BEEF);
rf.write(CONST_BASE_BOOL, 0x1234);
rf.write(CONST_BASE_LOOP + 3, 0x5678);
let snap = XenosConstantsBlock::snapshot(&rf);
assert_eq!(snap.alu[0], [1.0, 2.0, 3.0, 4.0]);
assert_eq!(snap.fetch[5], 0xDEAD_BEEF);
assert_eq!(snap.bool_consts[0], 0x1234);
assert_eq!(snap.loop_consts[3], 0x5678);
}
}