Files
xenia-rs/crates/xenia-gpu/src/tiled_address.rs
MechaCat02 79eb52c378 xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)
First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).

Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.

Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 16:29:38 +02:00

179 lines
6.8 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Xenos tiled-texture address formula (2D, Tiled2D layout).
//!
//! Port of `xenia-canary/src/xenia/gpu/texture_address.h:190-208` Tiled2D /
//! `TiledCombine` helpers. The Xbox 360 GPU stores textures in a 32×32-block
//! macro-tile pattern with bank+pipe interleave for its internal DRAM
//! banks; this formula inverts that so we can read pixels out in linear
//! order, given the tiled source buffer.
//!
//! We use this in two places during P4:
//! - `vd_swap` frontbuffer detile (1280×720 k_8_8_8_8 usually).
//! - Any place we need to read tiled guest memory into a host-linear
//! buffer for CPU-side conversion before upload.
/// Tile size constants from canary.
pub const MACRO_TILE_WIDTH_LOG2: u32 = 5; // 32 px
pub const MACRO_TILE_HEIGHT_2D_LOG2: u32 = 5; // 32 px
/// Canary's `TiledCombine` helper — reassembles the DRAM address from the
/// outer-tile byte offset plus the bank/pipe/y-LSB interleave bits.
#[inline]
fn tiled_combine(outer_inner_bytes: u32, bank: u32, pipe: u32, y_lsb: u32) -> u32 {
(y_lsb << 4)
| (pipe << 6)
| (bank << 11)
| (outer_inner_bytes & 0b1111)
| (((outer_inner_bytes >> 4) & 0b1) << 5)
| (((outer_inner_bytes >> 5) & 0b111) << 8)
| ((outer_inner_bytes >> 8) << 12)
}
/// 2D tiled offset in bytes from (x, y) into a tiled surface with
/// `pitch_aligned` pixel pitch (rounded to the macro-tile width) and
/// `bytes_per_block_log2` bytes-per-element (e.g. 2 for RGBA8888 → 4-byte
/// blocks → log2 = 2). Always non-negative for in-bounds inputs; returns
/// `u32` rather than canary's signed `int` since our callers stay in
/// unsigned arithmetic.
///
/// This is the canonical formula — do not simplify without re-reading
/// `texture_address.h:190-208`; the bit-interleave cannot be expressed
/// as a linear function.
pub fn tiled_2d_offset(x: u32, y: u32, pitch_aligned: u32, bytes_per_block_log2: u32) -> u32 {
let macro_tile_cols = pitch_aligned >> MACRO_TILE_WIDTH_LOG2;
// Outer: which 32×32 macro tile we're in.
let outer_blocks = (((y >> MACRO_TILE_HEIGHT_2D_LOG2) * macro_tile_cols)
+ (x >> MACRO_TILE_WIDTH_LOG2))
<< 6;
// Inner: where we are within the 32×32 macro tile (Y dropped by 1 bit
// because that bit becomes the `y_lsb` interleave bit below).
let inner_blocks = (((y >> 1) & 0b111) << 3) | (x & 0b111);
let outer_inner_bytes = (outer_blocks | inner_blocks) << bytes_per_block_log2;
let bank = (y >> 4) & 0b1;
let pipe = ((x >> 3) & 0b11) ^ (((y >> 3) & 0b1) << 1);
let y_lsb = y & 1;
tiled_combine(outer_inner_bytes, bank, pipe, y_lsb)
}
/// Round `pitch_pixels` up to the nearest multiple of the macro-tile width
/// (32 px). Xenos requires tile-aligned pitches; non-aligned values pad.
#[inline]
pub fn align_pitch_to_macro_tile(pitch_pixels: u32) -> u32 {
let mask = (1u32 << MACRO_TILE_WIDTH_LOG2) - 1;
(pitch_pixels + mask) & !mask
}
/// Detile a 2D tiled surface into a linear destination buffer. The
/// closure `block_bytes(src_offset) -> &[u8]` returns a slice pointing at
/// one block in the tiled source, and the detiler writes it into `dst`
/// at the linear (x, y) position.
///
/// `bpp` is the bytes-per-block (4 for RGBA8888, 1 for a1r5g5b5 stored as
/// a single 16-bit block, etc.). `dst` must be at least
/// `width * height * bpp` bytes long.
///
/// Returns `Err(())` if the source doesn't contain enough bytes for the
/// largest offset the formula would produce (defensive — callers can
/// downgrade silently).
pub fn detile_2d(
src: &[u8],
dst: &mut [u8],
width: u32,
height: u32,
pitch_pixels: u32,
bpp: u32,
) -> Result<(), ()> {
let bpp_log2 = bpp.trailing_zeros();
let pitch_aligned = align_pitch_to_macro_tile(pitch_pixels);
let dst_pitch_bytes = (width * bpp) as usize;
let bpp_u = bpp as usize;
for y in 0..height {
for x in 0..width {
let src_off = tiled_2d_offset(x, y, pitch_aligned, bpp_log2) as usize;
if src_off + bpp_u > src.len() {
return Err(());
}
let dst_off = (y as usize) * dst_pitch_bytes + (x as usize) * bpp_u;
if dst_off + bpp_u > dst.len() {
return Err(());
}
dst[dst_off..dst_off + bpp_u].copy_from_slice(&src[src_off..src_off + bpp_u]);
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
/// The (0, 0) pixel is always at byte offset 0 regardless of pitch.
#[test]
fn origin_is_zero() {
assert_eq!(tiled_2d_offset(0, 0, 64, 2), 0);
}
/// Round-trip: detiling a tiled buffer that was filled using the same
/// formula produces the identity linear image.
#[test]
fn roundtrip_small_pattern() {
let w = 32u32;
let h = 16u32;
let bpp = 4u32;
let pitch = align_pitch_to_macro_tile(w);
// Allocate a tiled buffer large enough for the largest offset.
let max_off = (0..h)
.flat_map(|y| (0..w).map(move |x| tiled_2d_offset(x, y, pitch, 2) as usize + 4))
.max()
.unwrap();
let mut tiled = vec![0u8; max_off];
// Write a recognisable 4-byte pattern = (x, y, x^y, 0xFF) into
// each logical (x, y) position in the tiled buffer.
for y in 0..h {
for x in 0..w {
let off = tiled_2d_offset(x, y, pitch, 2) as usize;
tiled[off + 0] = x as u8;
tiled[off + 1] = y as u8;
tiled[off + 2] = (x ^ y) as u8;
tiled[off + 3] = 0xFF;
}
}
let mut linear = vec![0u8; (w * h * bpp) as usize];
detile_2d(&tiled, &mut linear, w, h, pitch, bpp).expect("detile ok");
// Verify every logical pixel landed at the right linear offset.
for y in 0..h {
for x in 0..w {
let lin = ((y * w + x) * bpp) as usize;
assert_eq!(linear[lin + 0], x as u8);
assert_eq!(linear[lin + 1], y as u8);
assert_eq!(linear[lin + 2], (x ^ y) as u8);
assert_eq!(linear[lin + 3], 0xFF);
}
}
}
/// Within a single macro-tile row, stepping `x` by 1 changes the low
/// 3 bits of `x` which feed the `inner_blocks` field — different
/// offsets are expected (no aliasing).
#[test]
fn neighbouring_pixels_have_distinct_offsets() {
let mut seen = std::collections::HashSet::new();
for y in 0..16 {
for x in 0..32 {
assert!(seen.insert(tiled_2d_offset(x, y, 32, 2)));
}
}
}
/// Pitch alignment is a power-of-two rounding: 1280 stays 1280, 1281
/// rounds to 1312.
#[test]
fn align_pitch_rounds_up_to_32() {
assert_eq!(align_pitch_to_macro_tile(1280), 1280);
assert_eq!(align_pitch_to_macro_tile(1281), 1312);
assert_eq!(align_pitch_to_macro_tile(31), 32);
}
}