Files
xenia-rs/crates/xenia-gpu/src/texture_cache.rs
MechaCat02 79eb52c378 xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)
First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).

Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.

Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 16:29:38 +02:00

971 lines
33 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Texture cache — P5.
//!
//! Two-layer design mirroring canary's `TextureCache`:
//!
//! * **CPU layer** (this module): owns decoded, linear, host-endian texel
//! byte buffers keyed by [`TextureKey`]. `ensure_cached` consults the
//! guest memory's page-version counter to decide whether the cached
//! bytes are still fresh and re-decodes on miss or staleness.
//! * **GPU layer** (xenia-ui `texture_cache_host`): owns the
//! `wgpu::Texture` + `TextureView` for each cached key; pulls decoded
//! bytes from this CPU layer on upload.
//!
//! Canary references: `texture_cache.h/.cc`, `texture_info.cc`, and
//! `texture_info_formats.inl` for the format table.
use std::collections::HashMap;
use crate::tiled_address;
/// Xenos texture formats — `xenos::TextureFormat` at `xenos.h:489-579`.
/// Values are the raw enum numbers the guest writes into
/// `xe_gpu_texture_fetch_t.format`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum TextureFormat {
K1Reverse = 0,
K1 = 1,
K8 = 2,
K1555 = 3,
K565 = 4,
K6_5_5 = 5,
K8888 = 6,
K1010102 = 7,
K8_8 = 10,
K4_4_4_4 = 15,
K10_11_11 = 16,
K11_11_10 = 17,
Dxt1 = 18,
Dxt2_3 = 19,
Dxt4_5 = 20,
K24_8 = 22,
K24_8Float = 23,
K16 = 24,
K16_16 = 25,
K16_16_16_16 = 26,
K16Float = 30,
K16_16Float = 31,
K16_16_16_16Float = 32,
K32 = 33,
K32_32 = 34,
K32_32_32_32 = 35,
K32Float = 36,
K32_32Float = 37,
K32_32_32_32Float = 38,
Unknown(u8),
}
impl TextureFormat {
pub fn from_raw(v: u8) -> Self {
use TextureFormat::*;
match v & 0x3F {
0 => K1Reverse,
1 => K1,
2 => K8,
3 => K1555,
4 => K565,
5 => K6_5_5,
6 => K8888,
7 => K1010102,
10 => K8_8,
15 => K4_4_4_4,
16 => K10_11_11,
17 => K11_11_10,
18 => Dxt1,
19 => Dxt2_3,
20 => Dxt4_5,
22 => K24_8,
23 => K24_8Float,
24 => K16,
25 => K16_16,
26 => K16_16_16_16,
30 => K16Float,
31 => K16_16Float,
32 => K16_16_16_16Float,
33 => K32,
34 => K32_32,
35 => K32_32_32_32,
36 => K32Float,
37 => K32_32Float,
38 => K32_32_32_32Float,
other => Unknown(other),
}
}
/// Block width/height in texels + bytes-per-block. For uncompressed
/// formats block_w = block_h = 1. For DXT formats block_w = block_h =
/// 4 (one 4×4 compressed block).
pub fn block_info(self) -> BlockInfo {
use TextureFormat::*;
match self {
K1Reverse | K1 => BlockInfo::new(1, 1, 1), // round up to 1 byte
K8 => BlockInfo::new(1, 1, 1),
K1555 | K565 | K6_5_5 | K4_4_4_4 | K16 | K16Float | K8_8 => BlockInfo::new(1, 1, 2),
K8888 | K1010102 | K10_11_11 | K11_11_10 | K24_8 | K24_8Float | K16_16
| K16_16Float | K32 | K32Float => BlockInfo::new(1, 1, 4),
K16_16_16_16 | K16_16_16_16Float | K32_32 | K32_32Float => BlockInfo::new(1, 1, 8),
K32_32_32_32 | K32_32_32_32Float => BlockInfo::new(1, 1, 16),
Dxt1 => BlockInfo::new(4, 4, 8),
Dxt2_3 | Dxt4_5 => BlockInfo::new(4, 4, 16),
Unknown(_) => BlockInfo::new(1, 1, 4), // safe-ish fallback
}
}
/// True iff this format lands on a wgpu texture format we can
/// natively bind — no CPU-side conversion per frame required. M5
/// adds `k_5_6_5` (CPU-expanded to `Rgba8Unorm` on decode; still
/// counts as supported for the host-cache wiring), `k_DXT2_3`
/// (BC2), and `k_DXT4_5` (BC3).
pub fn is_host_supported(self) -> bool {
matches!(
self,
TextureFormat::K8888
| TextureFormat::K565
| TextureFormat::Dxt1
| TextureFormat::Dxt2_3
| TextureFormat::Dxt4_5
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct BlockInfo {
pub block_w: u8,
pub block_h: u8,
pub bytes_per_block: u8,
}
impl BlockInfo {
pub const fn new(block_w: u8, block_h: u8, bytes_per_block: u8) -> Self {
Self {
block_w,
block_h,
bytes_per_block,
}
}
pub fn log2_bpb(self) -> u32 {
match self.bytes_per_block {
1 => 0,
2 => 1,
4 => 2,
8 => 3,
16 => 4,
_ => 0,
}
}
}
/// Xenos `Endian` enum from `xenos.h:198-204`. 2-bit field in fetch dword 1.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Endian {
None = 0,
Swap8In16 = 1,
Swap8In32 = 2,
Swap16In32 = 3,
}
impl Endian {
pub fn from_raw(v: u8) -> Self {
match v & 0x3 {
1 => Endian::Swap8In16,
2 => Endian::Swap8In32,
3 => Endian::Swap16In32,
_ => Endian::None,
}
}
/// Apply this endian's byte swap to one 32-bit unit. Matches canary's
/// `shaders/endian.xesli:25-55` semantics; the WGSL translator pulls
/// the same mask-shift pattern.
pub fn swap32(self, v: u32) -> u32 {
match self {
Endian::None => v,
Endian::Swap8In16 => ((v & 0x00FF_00FF) << 8) | ((v & 0xFF00_FF00) >> 8),
Endian::Swap8In32 => v.swap_bytes(),
Endian::Swap16In32 => v.rotate_right(16),
}
}
}
/// Texture dimensionality (`xenos::DataDimension`).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Dimension {
D1 = 0,
D2 = 1,
D3Stacked = 2,
Cube = 3,
}
impl Dimension {
pub fn from_raw(v: u8) -> Self {
match v & 0x3 {
1 => Dimension::D2,
2 => Dimension::D3Stacked,
3 => Dimension::Cube,
_ => Dimension::D1,
}
}
}
/// Identity of a cached texture. Matches canary's `TextureCache::TextureKey`
/// at the semantic level — we exclude mip/border state for P5 since neither
/// is populated yet.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct TextureKey {
/// Guest physical base (byte address — already shifted left by 12 from
/// the fetch-constant `base_address` field).
pub base_address: u32,
pub width: u16,
pub height: u16,
pub depth_or_slices: u16,
pub format: TextureFormat,
pub endian: Endian,
pub dimension: Dimension,
pub tiled: bool,
/// Row pitch in texels, already aligned to 32. Canary stores pitch/32
/// in the fetch constant; we keep the raw texel count to avoid
/// callers remembering to shift.
pub pitch_texels: u16,
}
/// Decode a 6-dword texture fetch constant (layout at `xenos.h:1229-1329`).
/// Returns `None` if the constant is obviously unset (all zeros) or if
/// `type` is not the texture-constant marker.
pub fn decode_fetch_constant(dwords: [u32; 6]) -> Option<TextureKey> {
let d0 = dwords[0];
let d1 = dwords[1];
let d2 = dwords[2];
let d5 = dwords[5];
// type: low 2 bits of dword 0 should be 2 (texture) per canary —
// 0 = vertex, 2 = texture. An all-zero constant reads as type 0 so
// `None` filters it out here.
let ty = d0 & 0x3;
if d0 == 0 && d1 == 0 {
return None;
}
// Not a texture constant (e.g. 0 = vertex fetch constant reused).
if ty != 2 {
return None;
}
let pitch_5 = (d0 >> 22) & 0x1FF; // pitch/32 in texels
let tiled = ((d0 >> 31) & 1) != 0;
let format = TextureFormat::from_raw((d1 & 0x3F) as u8);
let endian = Endian::from_raw(((d1 >> 6) & 0x3) as u8);
let base_address = (d1 >> 12) << 12; // base >> 12, re-shifted.
let dim = Dimension::from_raw(((d5 >> 9) & 0x3) as u8);
// Size decode depends on dimension.
let (width, height, depth) = match dim {
Dimension::D1 => ((d2 & 0x00FF_FFFF) as u16 + 1, 1u16, 1u16),
Dimension::D2 => (
(d2 & 0x1FFF) as u16 + 1,
((d2 >> 13) & 0x1FFF) as u16 + 1,
((d2 >> 26) & 0x3F) as u16 + 1,
),
Dimension::D3Stacked | Dimension::Cube => (
(d2 & 0x7FF) as u16 + 1,
((d2 >> 11) & 0x7FF) as u16 + 1,
((d2 >> 22) & 0x3FF) as u16 + 1,
),
};
Some(TextureKey {
base_address,
width,
height,
depth_or_slices: depth,
format,
endian,
dimension: dim,
tiled,
pitch_texels: ((pitch_5 as u16) * 32).max(width),
})
}
/// Decoded, linear, host-endian texture bytes ready for wgpu upload.
#[derive(Debug, Clone)]
pub struct CachedTexture {
pub key: TextureKey,
pub version_when_uploaded: u64,
/// Tightly packed. Layout depends on `key.format`:
/// - `K8888` → `width*height*4` bytes in Rgba8Unorm order.
/// - `Dxt1` → `ceil(w/4)*ceil(h/4)*8` bytes of raw BC1 blocks, after
/// block-level detile + dword-endian swap.
pub bytes: Vec<u8>,
}
impl CachedTexture {
pub fn byte_size(&self) -> usize {
self.bytes.len()
}
}
/// Errors that can happen during decode. The `ensure_cached` caller maps
/// these to `gpu.texture.reject{reason}` metrics so the HUD surfaces when
/// a texture fell back.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DecodeError {
UnsupportedFormat,
OutOfBounds,
ZeroSize,
}
/// Read `len` bytes from guest memory starting at `addr`. Returns `None`
/// if the span would exceed the memory's reported end; otherwise returns
/// a freshly-allocated buffer with the bytes.
///
/// The `MemoryAccess` trait exposes per-byte reads only; we batch them in
/// a single pass to avoid the per-byte virtual dispatch overhead for large
/// textures (1 MiB frontbuffer = 1M dispatch calls).
pub fn read_guest_bytes(
mem: &dyn xenia_memory::MemoryAccess,
addr: u32,
len: usize,
) -> Vec<u8> {
let mut out = Vec::with_capacity(len);
for i in 0..len {
let a = addr.wrapping_add(i as u32);
out.push(mem.read_u8(a));
if a < addr {
// 32-bit overflow; unmap the tail.
break;
}
}
out
}
/// Byte-swap the 32-bit dwords of `buf` in place according to `endian`.
/// `buf.len()` should be a multiple of 4; tail bytes are left untouched.
pub fn apply_endian_32(buf: &mut [u8], endian: Endian) {
if matches!(endian, Endian::None) {
return;
}
let mut i = 0;
while i + 4 <= buf.len() {
let v = u32::from_le_bytes([buf[i], buf[i + 1], buf[i + 2], buf[i + 3]]);
let swapped = endian.swap32(v);
buf[i..i + 4].copy_from_slice(&swapped.to_le_bytes());
i += 4;
}
}
/// Decode a k_8_8_8_8 texture out of guest memory into `Rgba8Unorm` bytes.
/// Applies Xenos→host channel swizzle (Xbox 360 stores BGRA in memory →
/// we emit RGBA for wgpu) and the declared endian swap, then detiles via
/// the Xenos Tiled2D formula.
pub fn decode_k8888_tiled(
key: &TextureKey,
mem: &dyn xenia_memory::MemoryAccess,
) -> Result<Vec<u8>, DecodeError> {
if key.width == 0 || key.height == 0 {
return Err(DecodeError::ZeroSize);
}
let w = key.width as u32;
let h = key.height as u32;
let pitch_aligned = tiled_address::align_pitch_to_macro_tile(key.pitch_texels as u32);
let total_bytes = (pitch_aligned * h * 4) as usize;
let mut raw = read_guest_bytes(mem, key.base_address, total_bytes);
if raw.len() < total_bytes {
return Err(DecodeError::OutOfBounds);
}
apply_endian_32(&mut raw, key.endian);
let mut linear = vec![0u8; (w * h * 4) as usize];
if key.tiled {
if tiled_address::detile_2d(&raw, &mut linear, w, h, pitch_aligned, 4).is_err() {
return Err(DecodeError::OutOfBounds);
}
} else {
// Non-tiled copy row-by-row honoring pitch.
for y in 0..h as usize {
let src = y * (pitch_aligned as usize) * 4;
let dst = y * (w as usize) * 4;
linear[dst..dst + (w as usize) * 4]
.copy_from_slice(&raw[src..src + (w as usize) * 4]);
}
}
// Xenos stores `k_8_8_8_8` in ARGB byte order (high nibble = A). After
// endian.Swap8In32 guests' typical per-dword byte order becomes BGRA
// in little-endian host bytes. Swap B↔R so we hand Rgba8Unorm to wgpu.
for px in linear.chunks_exact_mut(4) {
px.swap(0, 2);
}
Ok(linear)
}
/// Decode a DXT-compressed texture to raw block bytes (no format
/// conversion — wgpu understands `Bc{1,2,3}RgbaUnorm` natively so the
/// GPU does the actual decompression on upload).
///
/// Xenos stores DXT blocks in 4×4 block-tiled order using the Tiled2D
/// formula, with stride counted in blocks. `bytes_per_block` is 8 for
/// BC1 (DXT1), 16 for BC2 (DXT2_3) and BC3 (DXT4_5).
pub fn decode_dxt_tiled(
key: &TextureKey,
mem: &dyn xenia_memory::MemoryAccess,
bytes_per_block: u32,
) -> Result<Vec<u8>, DecodeError> {
if key.width == 0 || key.height == 0 {
return Err(DecodeError::ZeroSize);
}
let block_w = 4u32;
let block_h = 4u32;
let w_blocks = (key.width as u32).div_ceil(block_w);
let h_blocks = (key.height as u32).div_ceil(block_h);
let pitch_blocks = tiled_address::align_pitch_to_macro_tile(
(key.pitch_texels as u32).div_ceil(block_w),
);
let total_bytes = (pitch_blocks * h_blocks * bytes_per_block) as usize;
let mut raw = read_guest_bytes(mem, key.base_address, total_bytes);
if raw.len() < total_bytes {
return Err(DecodeError::OutOfBounds);
}
// DXT blocks are stored as 4×u16 + 4×u8-indices (BC1) or similar
// u16/u32-width fields for BC2/BC3; the Xbox 360's big-endian word
// order requires an endian swap at the u16/u32 level regardless of
// which BC-family format.
apply_endian_32(&mut raw, key.endian);
let mut out = vec![0u8; (w_blocks * h_blocks * bytes_per_block) as usize];
if key.tiled {
if tiled_address::detile_2d(
&raw,
&mut out,
w_blocks,
h_blocks,
pitch_blocks,
bytes_per_block,
)
.is_err()
{
return Err(DecodeError::OutOfBounds);
}
} else {
for y in 0..h_blocks as usize {
let src = y * (pitch_blocks as usize) * (bytes_per_block as usize);
let dst = y * (w_blocks as usize) * (bytes_per_block as usize);
out[dst..dst + (w_blocks as usize) * (bytes_per_block as usize)]
.copy_from_slice(&raw[src..src + (w_blocks as usize) * (bytes_per_block as usize)]);
}
}
Ok(out)
}
/// BC1 / DXT1 — 8-byte blocks.
pub fn decode_dxt1_tiled(
key: &TextureKey,
mem: &dyn xenia_memory::MemoryAccess,
) -> Result<Vec<u8>, DecodeError> {
decode_dxt_tiled(key, mem, 8)
}
/// BC2 / DXT2_3 — 16-byte blocks.
pub fn decode_dxt23_tiled(
key: &TextureKey,
mem: &dyn xenia_memory::MemoryAccess,
) -> Result<Vec<u8>, DecodeError> {
decode_dxt_tiled(key, mem, 16)
}
/// BC3 / DXT4_5 — 16-byte blocks.
pub fn decode_dxt45_tiled(
key: &TextureKey,
mem: &dyn xenia_memory::MemoryAccess,
) -> Result<Vec<u8>, DecodeError> {
decode_dxt_tiled(key, mem, 16)
}
/// **k_5_6_5** — 16-bit R:5 G:6 B:5 per texel (Xbox stores R in the high
/// 5 bits of the 16-bit word). We unpack each texel into 4 bytes of
/// `Rgba8Unorm` (A = 0xFF). wgpu doesn't ship `R5G6B5Unorm` as a
/// sampled texture format on every backend, so CPU-side conversion is
/// the safe path even if it's 2× the texture memory.
///
/// Tiling: Tiled2D at the **texel** level (block = 1 texel = 2 bytes),
/// then we expand each 2-byte u16 into the 4-byte Rgba8 in the linear
/// output buffer.
pub fn decode_k565_tiled(
key: &TextureKey,
mem: &dyn xenia_memory::MemoryAccess,
) -> Result<Vec<u8>, DecodeError> {
if key.width == 0 || key.height == 0 {
return Err(DecodeError::ZeroSize);
}
let w = key.width as u32;
let h = key.height as u32;
// Pitch/block counts — block = 1 texel here, 2 bytes.
let pitch_aligned = tiled_address::align_pitch_to_macro_tile(key.pitch_texels as u32);
let total_bytes = (pitch_aligned * h * 2) as usize;
let mut raw = read_guest_bytes(mem, key.base_address, total_bytes);
if raw.len() < total_bytes {
return Err(DecodeError::OutOfBounds);
}
// 16-bit word order is endian-swap-sensitive.
apply_endian_32(&mut raw, key.endian);
// Step 1: detile (bytes_per_block=2, tile in blocks=texels).
let mut linear_u16 = vec![0u8; (w * h * 2) as usize];
if key.tiled {
if tiled_address::detile_2d(&raw, &mut linear_u16, w, h, pitch_aligned, 2).is_err() {
return Err(DecodeError::OutOfBounds);
}
} else {
for y in 0..h as usize {
let src = y * (pitch_aligned as usize) * 2;
let dst = y * (w as usize) * 2;
linear_u16[dst..dst + (w as usize) * 2]
.copy_from_slice(&raw[src..src + (w as usize) * 2]);
}
}
// Step 2: expand each 16-bit RGB565 to Rgba8Unorm. The in-memory u16
// is little-endian after `apply_endian_32` has normalized the word
// order (we keep host-native byte ordering post-swap).
let mut rgba = vec![0u8; (w * h * 4) as usize];
for y in 0..h as usize {
for x in 0..w as usize {
let off = (y * w as usize + x) * 2;
let lo = linear_u16[off];
let hi = linear_u16[off + 1];
let word = u16::from_le_bytes([lo, hi]);
// 5 bits R (bits 11-15), 6 bits G (5-10), 5 bits B (0-4).
// Expand to full-range u8: replicate high bits into low
// (so 0b11111 → 0xFF, matching the standard 565→888 convention).
let r5 = ((word >> 11) & 0x1F) as u8;
let g6 = ((word >> 5) & 0x3F) as u8;
let b5 = (word & 0x1F) as u8;
let r = (r5 << 3) | (r5 >> 2);
let g = (g6 << 2) | (g6 >> 4);
let b = (b5 << 3) | (b5 >> 2);
let o = (y * w as usize + x) * 4;
rgba[o] = r;
rgba[o + 1] = g;
rgba[o + 2] = b;
rgba[o + 3] = 0xFF;
}
}
Ok(rgba)
}
/// Version-aware CPU-side texture cache. Entries are keyed on
/// `TextureKey.hash` and carry a `version_when_uploaded` watermark against
/// the guest memory's page-version counter. `ensure_cached` queries
/// `GuestMemory::max_page_version` over the texture's byte span; if the
/// span has been written since cache time, the entry is re-decoded.
pub struct TextureCache {
entries: HashMap<TextureKey, CachedTexture>,
/// Monotonic counter of decodes performed — HUD surface.
pub decodes_total: u64,
/// Count of stale-miss re-decodes.
pub restale_total: u64,
}
impl Default for TextureCache {
fn default() -> Self {
Self::new()
}
}
impl TextureCache {
pub fn new() -> Self {
Self {
entries: HashMap::new(),
decodes_total: 0,
restale_total: 0,
}
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn get(&self, key: &TextureKey) -> Option<&CachedTexture> {
self.entries.get(key)
}
/// Return a cached (or freshly-decoded) texture. The caller supplies
/// the current guest-memory page version covering the texture span;
/// see [`max_page_version_for`].
pub fn ensure_cached(
&mut self,
key: TextureKey,
current_version: u64,
mem: &dyn xenia_memory::MemoryAccess,
) -> Result<&CachedTexture, DecodeError> {
// Fast path: fresh entry exists.
if let Some(e) = self.entries.get(&key) {
if e.version_when_uploaded >= current_version {
return Ok(self.entries.get(&key).unwrap());
}
self.restale_total += 1;
}
let bytes = match key.format {
TextureFormat::K8888 => decode_k8888_tiled(&key, mem)?,
TextureFormat::K565 => decode_k565_tiled(&key, mem)?,
TextureFormat::Dxt1 => decode_dxt1_tiled(&key, mem)?,
TextureFormat::Dxt2_3 => decode_dxt23_tiled(&key, mem)?,
TextureFormat::Dxt4_5 => decode_dxt45_tiled(&key, mem)?,
_ => return Err(DecodeError::UnsupportedFormat),
};
self.decodes_total += 1;
let entry = CachedTexture {
key,
version_when_uploaded: current_version,
bytes,
};
self.entries.insert(key, entry);
Ok(self.entries.get(&key).unwrap())
}
pub fn byte_budget(&self) -> usize {
self.entries.values().map(|e| e.byte_size()).sum()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::cell::Cell;
struct FakeMem(Box<[Cell<u8>]>);
impl FakeMem {
fn from_vec(v: Vec<u8>) -> Self {
FakeMem(v.into_iter().map(Cell::new).collect())
}
}
impl xenia_memory::MemoryAccess for FakeMem {
fn read_u8(&self, a: u32) -> u8 {
self.0.get(a as usize).map(|c| c.get()).unwrap_or(0)
}
fn read_u16(&self, a: u32) -> u16 {
u16::from_be_bytes([self.read_u8(a), self.read_u8(a + 1)])
}
fn read_u32(&self, a: u32) -> u32 {
u32::from_be_bytes([
self.read_u8(a),
self.read_u8(a + 1),
self.read_u8(a + 2),
self.read_u8(a + 3),
])
}
fn read_u64(&self, a: u32) -> u64 {
u64::from_be_bytes([
self.read_u8(a),
self.read_u8(a + 1),
self.read_u8(a + 2),
self.read_u8(a + 3),
self.read_u8(a + 4),
self.read_u8(a + 5),
self.read_u8(a + 6),
self.read_u8(a + 7),
])
}
fn write_u8(&self, a: u32, v: u8) {
if let Some(slot) = self.0.get(a as usize) {
slot.set(v);
}
}
fn write_u16(&self, a: u32, v: u16) {
let b = v.to_be_bytes();
self.write_u8(a, b[0]);
self.write_u8(a + 1, b[1]);
}
fn write_u32(&self, a: u32, v: u32) {
let b = v.to_be_bytes();
for i in 0..4 {
self.write_u8(a + i as u32, b[i]);
}
}
fn write_u64(&self, a: u32, v: u64) {
let b = v.to_be_bytes();
for i in 0..8 {
self.write_u8(a + i as u32, b[i]);
}
}
fn translate(&self, _: u32) -> Option<*const u8> {
None
}
fn translate_mut(&self, _: u32) -> Option<*mut u8> {
None
}
}
#[test]
fn format_block_info_matches_canary_expectations() {
assert_eq!(
TextureFormat::K8888.block_info(),
BlockInfo::new(1, 1, 4)
);
assert_eq!(TextureFormat::Dxt1.block_info(), BlockInfo::new(4, 4, 8));
assert_eq!(
TextureFormat::Dxt4_5.block_info(),
BlockInfo::new(4, 4, 16)
);
}
#[test]
fn endian_swap_variants() {
assert_eq!(Endian::None.swap32(0x11223344), 0x11223344);
assert_eq!(Endian::Swap8In16.swap32(0x11223344), 0x22114433);
assert_eq!(Endian::Swap8In32.swap32(0x11223344), 0x44332211);
assert_eq!(Endian::Swap16In32.swap32(0x11223344), 0x33441122);
}
#[test]
fn decode_fetch_constant_rejects_empty() {
let z = [0u32; 6];
assert!(decode_fetch_constant(z).is_none());
}
#[test]
fn decode_fetch_constant_parses_2d_k8888() {
// Build a synthetic k_8_8_8_8 2D texture fetch constant:
// dword0: pitch_5=40 (1280/32), tiled=1, type=2
// dword1: format=6 (K8888), endian=2 (Swap8In32), base=0xAB000>>12
// dword2: width-1=1279, height-1=719
// dword5: dimension=1 (2D)
let d0 = 0x8000_0000 | (40u32 << 22) | 2;
let d1 = (0xAB000u32 >> 12 << 12) | (2u32 << 6) | 6u32;
let d2 = 1279u32 | ((719u32) << 13);
let d5 = 1u32 << 9;
let k = decode_fetch_constant([d0, d1, d2, 0, 0, d5]).expect("parsed");
assert_eq!(k.format, TextureFormat::K8888);
assert_eq!(k.endian, Endian::Swap8In32);
assert_eq!(k.width, 1280);
assert_eq!(k.height, 720);
assert_eq!(k.dimension, Dimension::D2);
assert!(k.tiled);
assert_eq!(k.pitch_texels, 1280);
}
#[test]
fn decode_k8888_roundtrip_linear() {
// Build a 4×4 non-tiled image with pitch=32 (one macro-tile row).
// Each pixel at (x, y) stores ARGB = (0xFF, x, y, y*4+x) as a
// big-endian dword. After Swap8In32 + B↔R swizzle, out[off..] must
// be (x, y, y*4+x, 0xFF) in RGBA order.
let w = 4u32;
let h = 4u32;
let pitch = 32u32;
let mut bytes = vec![0u8; (pitch * h * 4) as usize];
for y in 0..h {
for x in 0..w {
let off = ((y * pitch + x) * 4) as usize;
let argb = (0xFFu32 << 24)
| ((x as u32) << 16)
| ((y as u32) << 8)
| ((y * 4 + x) as u32);
bytes[off..off + 4].copy_from_slice(&argb.to_be_bytes());
}
}
let mem = FakeMem::from_vec(bytes);
let key = TextureKey {
base_address: 0,
width: 4,
height: 4,
depth_or_slices: 1,
format: TextureFormat::K8888,
endian: Endian::Swap8In32,
dimension: Dimension::D2,
tiled: false,
pitch_texels: pitch as u16,
};
let out = decode_k8888_tiled(&key, &mem).expect("decode");
assert_eq!(out.len(), 16 * 4);
assert_eq!(&out[0..4], &[0, 0, 0, 0xFF]);
let off = ((3 * 4 + 3) * 4) as usize;
assert_eq!(&out[off..off + 4], &[3, 3, 15, 0xFF]);
}
// ── First-Pixels M5 format tests ──────────────────────────────
/// BC2 (DXT2_3) roundtrip: 16-byte blocks, 4×4 image = 1 block.
/// Synthetic source of 0xDEADBEEF... bytes; assert the decoder
/// returns the same bytes (passthrough after endian swap).
#[test]
fn decode_dxt23_small_roundtrip() {
// 4×4 texture = 1 BC2 block (16 bytes). With pitch_texels=32
// (macro-tile-aligned) the block pitch is 8 (=32/4), and we
// allocate 8*1*16 = 128 bytes of source.
let mut bytes = vec![0u8; 128];
for (i, b) in bytes.iter_mut().enumerate().take(16) {
*b = i as u8;
}
let mem = FakeMem::from_vec(bytes);
let key = TextureKey {
base_address: 0,
width: 4,
height: 4,
depth_or_slices: 1,
format: TextureFormat::Dxt2_3,
endian: Endian::None, // no swap — we can eyeball passthrough
dimension: Dimension::D2,
tiled: false,
pitch_texels: 32,
};
let out = decode_dxt23_tiled(&key, &mem).expect("decode");
assert_eq!(out.len(), 16); // 1 block × 16 bytes
for i in 0..16 {
assert_eq!(out[i], i as u8);
}
}
/// BC3 (DXT4_5) uses the same 16-byte block infra as BC2; a
/// parallel test prevents a regression that sneaks up via the
/// generic `decode_dxt_tiled`.
#[test]
fn decode_dxt45_uses_16byte_blocks() {
let mem = FakeMem::from_vec(vec![0xAAu8; 256]);
let key = TextureKey {
base_address: 0,
width: 8,
height: 4, // 2×1 blocks
depth_or_slices: 1,
format: TextureFormat::Dxt4_5,
endian: Endian::None,
dimension: Dimension::D2,
tiled: false,
pitch_texels: 32,
};
let out = decode_dxt45_tiled(&key, &mem).expect("decode");
assert_eq!(out.len(), 2 * 16);
}
/// k_5_6_5: a single white texel (all bits set, 0xFFFF) should
/// expand to RGBA8 white (0xFF, 0xFF, 0xFF, 0xFF). A single pure-red
/// texel (R=31, G=0, B=0 → word 0xF800) should expand to R=255 G=0
/// B=0 via the high-bit-replicate convention.
#[test]
fn decode_k565_texel_expansion() {
// Memory layout for a 2×1 non-tiled k_5_6_5 image (pitch=32 texels
// → 32 × 1 × 2 = 64 bytes). We store texel[0] = 0xFFFF (white),
// texel[1] = 0xF800 (pure red).
let mut bytes = vec![0u8; 64];
// 0xFFFF
bytes[0] = 0xFF;
bytes[1] = 0xFF;
// 0xF800 (big-endian memory): high byte 0xF8, low 0x00.
// But after apply_endian_32(Endian::None) we use little-endian
// word decoding — so memory must carry the bytes in LE order.
bytes[2] = 0x00;
bytes[3] = 0xF8;
let mem = FakeMem::from_vec(bytes);
let key = TextureKey {
base_address: 0,
width: 2,
height: 1,
depth_or_slices: 1,
format: TextureFormat::K565,
endian: Endian::None,
dimension: Dimension::D2,
tiled: false,
pitch_texels: 32,
};
let out = decode_k565_tiled(&key, &mem).expect("decode");
assert_eq!(out.len(), 2 * 4);
// Texel 0: white.
assert_eq!(&out[0..4], &[0xFF, 0xFF, 0xFF, 0xFF]);
// Texel 1: pure red via 5-bit-expand (0b11111 → 0xFF).
assert_eq!(&out[4..8], &[0xFF, 0x00, 0x00, 0xFF]);
}
#[test]
fn is_host_supported_covers_m5_formats() {
assert!(TextureFormat::K8888.is_host_supported());
assert!(TextureFormat::K565.is_host_supported());
assert!(TextureFormat::Dxt1.is_host_supported());
assert!(TextureFormat::Dxt2_3.is_host_supported());
assert!(TextureFormat::Dxt4_5.is_host_supported());
// Unsupported formats should still report false.
assert!(!TextureFormat::K16.is_host_supported());
assert!(!TextureFormat::K32Float.is_host_supported());
}
#[test]
fn texture_cache_caches_and_reuses() {
let mut cache = TextureCache::new();
let mem = FakeMem::from_vec(vec![0u8; 8 * 1024]);
let key = TextureKey {
base_address: 0,
width: 4,
height: 4,
depth_or_slices: 1,
format: TextureFormat::K8888,
endian: Endian::None,
dimension: Dimension::D2,
tiled: false,
pitch_texels: 32,
};
cache.ensure_cached(key, 0, &mem).unwrap();
assert_eq!(cache.decodes_total, 1);
// Same version: should hit cache.
cache.ensure_cached(key, 0, &mem).unwrap();
assert_eq!(cache.decodes_total, 1);
// Higher version: stale → re-decode.
cache.ensure_cached(key, 1, &mem).unwrap();
assert_eq!(cache.decodes_total, 2);
assert_eq!(cache.restale_total, 1);
}
/// End-to-end P5 test: a 6-dword fetch constant → decoded `TextureKey`
/// → `ensure_cached` on fresh/version-bumped memory → stale re-decode.
/// Mirrors what `vd_swap` does per frame.
#[test]
fn e2e_fetch_const_to_cache_with_versioning() {
// 4×4 k_8_8_8_8 2D tiled texture at base 0x100, pitch=32 aligned.
let d0 = 0x8000_0000u32 | (1u32 << 22) | 2; // pitch_5=1, tiled, type=2
let d1 = (0x100u32 >> 12 << 12) | (0u32 << 6) | 6; // K8888, endian=none
let d2 = 3u32 | (3u32 << 13); // width-1=3, height-1=3
let d5 = 1u32 << 9; // 2D
let key = decode_fetch_constant([d0, d1, d2, 0, 0, d5]).expect("decoded");
assert_eq!(key.format, TextureFormat::K8888);
assert_eq!(key.width, 4);
let mut mem = FakeMem::from_vec(vec![0xAAu8; 4 * 1024]);
let mut cache = TextureCache::new();
// v0 decode.
let first = cache
.ensure_cached(key, 0, &mem)
.expect("initial decode")
.clone();
// Same version → cache hit.
cache.ensure_cached(key, 0, &mem).expect("hit");
assert_eq!(cache.decodes_total, 1);
// Simulate the guest writing to the texture's pages: version bumps.
for b in &mem.0[..16] {
b.set(0xFF);
}
cache.ensure_cached(key, 1, &mem).expect("re-decode");
assert_eq!(cache.decodes_total, 2);
assert_eq!(cache.restale_total, 1);
// Bytes differ from v0 (proof the re-decode happened).
let second = cache.get(&key).unwrap();
assert_ne!(first.bytes, second.bytes);
}
#[test]
fn texture_cache_rejects_unsupported_format() {
let mut cache = TextureCache::new();
let mem = FakeMem::from_vec(vec![0u8; 1024]);
let key = TextureKey {
base_address: 0,
width: 4,
height: 4,
depth_or_slices: 1,
format: TextureFormat::K16,
endian: Endian::None,
dimension: Dimension::D2,
tiled: false,
pitch_texels: 32,
};
assert!(matches!(
cache.ensure_cached(key, 0, &mem),
Err(DecodeError::UnsupportedFormat)
));
}
}