//! Texture cache — P5. //! //! Two-layer design mirroring canary's `TextureCache`: //! //! * **CPU layer** (this module): owns decoded, linear, host-endian texel //! byte buffers keyed by [`TextureKey`]. `ensure_cached` consults the //! guest memory's page-version counter to decide whether the cached //! bytes are still fresh and re-decodes on miss or staleness. //! * **GPU layer** (xenia-ui `texture_cache_host`): owns the //! `wgpu::Texture` + `TextureView` for each cached key; pulls decoded //! bytes from this CPU layer on upload. //! //! Canary references: `texture_cache.h/.cc`, `texture_info.cc`, and //! `texture_info_formats.inl` for the format table. use std::collections::HashMap; use crate::tiled_address; /// Xenos texture formats — `xenos::TextureFormat` at `xenos.h:489-579`. /// Values are the raw enum numbers the guest writes into /// `xe_gpu_texture_fetch_t.format`. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[repr(u8)] pub enum TextureFormat { K1Reverse = 0, K1 = 1, K8 = 2, K1555 = 3, K565 = 4, K6_5_5 = 5, K8888 = 6, K1010102 = 7, K8_8 = 10, K4_4_4_4 = 15, K10_11_11 = 16, K11_11_10 = 17, Dxt1 = 18, Dxt2_3 = 19, Dxt4_5 = 20, K24_8 = 22, K24_8Float = 23, K16 = 24, K16_16 = 25, K16_16_16_16 = 26, K16Float = 30, K16_16Float = 31, K16_16_16_16Float = 32, K32 = 33, K32_32 = 34, K32_32_32_32 = 35, K32Float = 36, K32_32Float = 37, K32_32_32_32Float = 38, Unknown(u8), } impl TextureFormat { pub fn from_raw(v: u8) -> Self { use TextureFormat::*; match v & 0x3F { 0 => K1Reverse, 1 => K1, 2 => K8, 3 => K1555, 4 => K565, 5 => K6_5_5, 6 => K8888, 7 => K1010102, 10 => K8_8, 15 => K4_4_4_4, 16 => K10_11_11, 17 => K11_11_10, 18 => Dxt1, 19 => Dxt2_3, 20 => Dxt4_5, 22 => K24_8, 23 => K24_8Float, 24 => K16, 25 => K16_16, 26 => K16_16_16_16, 30 => K16Float, 31 => K16_16Float, 32 => K16_16_16_16Float, 33 => K32, 34 => K32_32, 35 => K32_32_32_32, 36 => K32Float, 37 => K32_32Float, 38 => K32_32_32_32Float, other => Unknown(other), } } /// Block width/height in texels + bytes-per-block. For uncompressed /// formats block_w = block_h = 1. For DXT formats block_w = block_h = /// 4 (one 4×4 compressed block). pub fn block_info(self) -> BlockInfo { use TextureFormat::*; match self { K1Reverse | K1 => BlockInfo::new(1, 1, 1), // round up to 1 byte K8 => BlockInfo::new(1, 1, 1), K1555 | K565 | K6_5_5 | K4_4_4_4 | K16 | K16Float | K8_8 => BlockInfo::new(1, 1, 2), K8888 | K1010102 | K10_11_11 | K11_11_10 | K24_8 | K24_8Float | K16_16 | K16_16Float | K32 | K32Float => BlockInfo::new(1, 1, 4), K16_16_16_16 | K16_16_16_16Float | K32_32 | K32_32Float => BlockInfo::new(1, 1, 8), K32_32_32_32 | K32_32_32_32Float => BlockInfo::new(1, 1, 16), Dxt1 => BlockInfo::new(4, 4, 8), Dxt2_3 | Dxt4_5 => BlockInfo::new(4, 4, 16), Unknown(_) => BlockInfo::new(1, 1, 4), // safe-ish fallback } } /// True iff this format lands on a wgpu texture format we can /// natively bind — no CPU-side conversion per frame required. M5 /// adds `k_5_6_5` (CPU-expanded to `Rgba8Unorm` on decode; still /// counts as supported for the host-cache wiring), `k_DXT2_3` /// (BC2), and `k_DXT4_5` (BC3). pub fn is_host_supported(self) -> bool { matches!( self, TextureFormat::K8888 | TextureFormat::K565 | TextureFormat::Dxt1 | TextureFormat::Dxt2_3 | TextureFormat::Dxt4_5 ) } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct BlockInfo { pub block_w: u8, pub block_h: u8, pub bytes_per_block: u8, } impl BlockInfo { pub const fn new(block_w: u8, block_h: u8, bytes_per_block: u8) -> Self { Self { block_w, block_h, bytes_per_block, } } pub fn log2_bpb(self) -> u32 { match self.bytes_per_block { 1 => 0, 2 => 1, 4 => 2, 8 => 3, 16 => 4, _ => 0, } } } /// Xenos `Endian` enum from `xenos.h:198-204`. 2-bit field in fetch dword 1. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Endian { None = 0, Swap8In16 = 1, Swap8In32 = 2, Swap16In32 = 3, } impl Endian { pub fn from_raw(v: u8) -> Self { match v & 0x3 { 1 => Endian::Swap8In16, 2 => Endian::Swap8In32, 3 => Endian::Swap16In32, _ => Endian::None, } } /// Apply this endian's byte swap to one 32-bit unit. Matches canary's /// `shaders/endian.xesli:25-55` semantics; the WGSL translator pulls /// the same mask-shift pattern. pub fn swap32(self, v: u32) -> u32 { match self { Endian::None => v, Endian::Swap8In16 => ((v & 0x00FF_00FF) << 8) | ((v & 0xFF00_FF00) >> 8), Endian::Swap8In32 => v.swap_bytes(), Endian::Swap16In32 => v.rotate_right(16), } } } /// Texture dimensionality (`xenos::DataDimension`). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Dimension { D1 = 0, D2 = 1, D3Stacked = 2, Cube = 3, } impl Dimension { pub fn from_raw(v: u8) -> Self { match v & 0x3 { 1 => Dimension::D2, 2 => Dimension::D3Stacked, 3 => Dimension::Cube, _ => Dimension::D1, } } } /// Identity of a cached texture. Matches canary's `TextureCache::TextureKey` /// at the semantic level — we exclude mip/border state for P5 since neither /// is populated yet. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct TextureKey { /// Guest physical base (byte address — already shifted left by 12 from /// the fetch-constant `base_address` field). pub base_address: u32, pub width: u16, pub height: u16, pub depth_or_slices: u16, pub format: TextureFormat, pub endian: Endian, pub dimension: Dimension, pub tiled: bool, /// Row pitch in texels, already aligned to 32. Canary stores pitch/32 /// in the fetch constant; we keep the raw texel count to avoid /// callers remembering to shift. pub pitch_texels: u16, } /// Decode a 6-dword texture fetch constant (layout at `xenos.h:1229-1329`). /// Returns `None` if the constant is obviously unset (all zeros) or if /// `type` is not the texture-constant marker. pub fn decode_fetch_constant(dwords: [u32; 6]) -> Option { let d0 = dwords[0]; let d1 = dwords[1]; let d2 = dwords[2]; let d5 = dwords[5]; // type: low 2 bits of dword 0 should be 2 (texture) per canary — // 0 = vertex, 2 = texture. An all-zero constant reads as type 0 so // `None` filters it out here. let ty = d0 & 0x3; if d0 == 0 && d1 == 0 { return None; } // Not a texture constant (e.g. 0 = vertex fetch constant reused). if ty != 2 { return None; } let pitch_5 = (d0 >> 22) & 0x1FF; // pitch/32 in texels let tiled = ((d0 >> 31) & 1) != 0; let format = TextureFormat::from_raw((d1 & 0x3F) as u8); let endian = Endian::from_raw(((d1 >> 6) & 0x3) as u8); let base_address = (d1 >> 12) << 12; // base >> 12, re-shifted. let dim = Dimension::from_raw(((d5 >> 9) & 0x3) as u8); // Size decode depends on dimension. let (width, height, depth) = match dim { Dimension::D1 => ((d2 & 0x00FF_FFFF) as u16 + 1, 1u16, 1u16), Dimension::D2 => ( (d2 & 0x1FFF) as u16 + 1, ((d2 >> 13) & 0x1FFF) as u16 + 1, ((d2 >> 26) & 0x3F) as u16 + 1, ), Dimension::D3Stacked | Dimension::Cube => ( (d2 & 0x7FF) as u16 + 1, ((d2 >> 11) & 0x7FF) as u16 + 1, ((d2 >> 22) & 0x3FF) as u16 + 1, ), }; Some(TextureKey { base_address, width, height, depth_or_slices: depth, format, endian, dimension: dim, tiled, pitch_texels: ((pitch_5 as u16) * 32).max(width), }) } /// Decoded, linear, host-endian texture bytes ready for wgpu upload. #[derive(Debug, Clone)] pub struct CachedTexture { pub key: TextureKey, pub version_when_uploaded: u64, /// Tightly packed. Layout depends on `key.format`: /// - `K8888` → `width*height*4` bytes in Rgba8Unorm order. /// - `Dxt1` → `ceil(w/4)*ceil(h/4)*8` bytes of raw BC1 blocks, after /// block-level detile + dword-endian swap. pub bytes: Vec, } impl CachedTexture { pub fn byte_size(&self) -> usize { self.bytes.len() } } /// Errors that can happen during decode. The `ensure_cached` caller maps /// these to `gpu.texture.reject{reason}` metrics so the HUD surfaces when /// a texture fell back. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DecodeError { UnsupportedFormat, OutOfBounds, ZeroSize, } /// Read `len` bytes from guest memory starting at `addr`. Returns `None` /// if the span would exceed the memory's reported end; otherwise returns /// a freshly-allocated buffer with the bytes. /// /// The `MemoryAccess` trait exposes per-byte reads only; we batch them in /// a single pass to avoid the per-byte virtual dispatch overhead for large /// textures (1 MiB frontbuffer = 1M dispatch calls). pub fn read_guest_bytes( mem: &dyn xenia_memory::MemoryAccess, addr: u32, len: usize, ) -> Vec { let mut out = Vec::with_capacity(len); for i in 0..len { let a = addr.wrapping_add(i as u32); out.push(mem.read_u8(a)); if a < addr { // 32-bit overflow; unmap the tail. break; } } out } /// Byte-swap the 32-bit dwords of `buf` in place according to `endian`. /// `buf.len()` should be a multiple of 4; tail bytes are left untouched. pub fn apply_endian_32(buf: &mut [u8], endian: Endian) { if matches!(endian, Endian::None) { return; } let mut i = 0; while i + 4 <= buf.len() { let v = u32::from_le_bytes([buf[i], buf[i + 1], buf[i + 2], buf[i + 3]]); let swapped = endian.swap32(v); buf[i..i + 4].copy_from_slice(&swapped.to_le_bytes()); i += 4; } } /// Decode a k_8_8_8_8 texture out of guest memory into `Rgba8Unorm` bytes. /// Applies Xenos→host channel swizzle (Xbox 360 stores BGRA in memory → /// we emit RGBA for wgpu) and the declared endian swap, then detiles via /// the Xenos Tiled2D formula. pub fn decode_k8888_tiled( key: &TextureKey, mem: &dyn xenia_memory::MemoryAccess, ) -> Result, DecodeError> { if key.width == 0 || key.height == 0 { return Err(DecodeError::ZeroSize); } let w = key.width as u32; let h = key.height as u32; let pitch_aligned = tiled_address::align_pitch_to_macro_tile(key.pitch_texels as u32); let total_bytes = (pitch_aligned * h * 4) as usize; let mut raw = read_guest_bytes(mem, key.base_address, total_bytes); if raw.len() < total_bytes { return Err(DecodeError::OutOfBounds); } apply_endian_32(&mut raw, key.endian); let mut linear = vec![0u8; (w * h * 4) as usize]; if key.tiled { if tiled_address::detile_2d(&raw, &mut linear, w, h, pitch_aligned, 4).is_err() { return Err(DecodeError::OutOfBounds); } } else { // Non-tiled copy row-by-row honoring pitch. for y in 0..h as usize { let src = y * (pitch_aligned as usize) * 4; let dst = y * (w as usize) * 4; linear[dst..dst + (w as usize) * 4] .copy_from_slice(&raw[src..src + (w as usize) * 4]); } } // Xenos stores `k_8_8_8_8` in ARGB byte order (high nibble = A). After // endian.Swap8In32 guests' typical per-dword byte order becomes BGRA // in little-endian host bytes. Swap B↔R so we hand Rgba8Unorm to wgpu. for px in linear.chunks_exact_mut(4) { px.swap(0, 2); } Ok(linear) } /// Decode a DXT-compressed texture to raw block bytes (no format /// conversion — wgpu understands `Bc{1,2,3}RgbaUnorm` natively so the /// GPU does the actual decompression on upload). /// /// Xenos stores DXT blocks in 4×4 block-tiled order using the Tiled2D /// formula, with stride counted in blocks. `bytes_per_block` is 8 for /// BC1 (DXT1), 16 for BC2 (DXT2_3) and BC3 (DXT4_5). pub fn decode_dxt_tiled( key: &TextureKey, mem: &dyn xenia_memory::MemoryAccess, bytes_per_block: u32, ) -> Result, DecodeError> { if key.width == 0 || key.height == 0 { return Err(DecodeError::ZeroSize); } let block_w = 4u32; let block_h = 4u32; let w_blocks = (key.width as u32).div_ceil(block_w); let h_blocks = (key.height as u32).div_ceil(block_h); let pitch_blocks = tiled_address::align_pitch_to_macro_tile( (key.pitch_texels as u32).div_ceil(block_w), ); let total_bytes = (pitch_blocks * h_blocks * bytes_per_block) as usize; let mut raw = read_guest_bytes(mem, key.base_address, total_bytes); if raw.len() < total_bytes { return Err(DecodeError::OutOfBounds); } // DXT blocks are stored as 4×u16 + 4×u8-indices (BC1) or similar // u16/u32-width fields for BC2/BC3; the Xbox 360's big-endian word // order requires an endian swap at the u16/u32 level regardless of // which BC-family format. apply_endian_32(&mut raw, key.endian); let mut out = vec![0u8; (w_blocks * h_blocks * bytes_per_block) as usize]; if key.tiled { if tiled_address::detile_2d( &raw, &mut out, w_blocks, h_blocks, pitch_blocks, bytes_per_block, ) .is_err() { return Err(DecodeError::OutOfBounds); } } else { for y in 0..h_blocks as usize { let src = y * (pitch_blocks as usize) * (bytes_per_block as usize); let dst = y * (w_blocks as usize) * (bytes_per_block as usize); out[dst..dst + (w_blocks as usize) * (bytes_per_block as usize)] .copy_from_slice(&raw[src..src + (w_blocks as usize) * (bytes_per_block as usize)]); } } Ok(out) } /// BC1 / DXT1 — 8-byte blocks. pub fn decode_dxt1_tiled( key: &TextureKey, mem: &dyn xenia_memory::MemoryAccess, ) -> Result, DecodeError> { decode_dxt_tiled(key, mem, 8) } /// BC2 / DXT2_3 — 16-byte blocks. pub fn decode_dxt23_tiled( key: &TextureKey, mem: &dyn xenia_memory::MemoryAccess, ) -> Result, DecodeError> { decode_dxt_tiled(key, mem, 16) } /// BC3 / DXT4_5 — 16-byte blocks. pub fn decode_dxt45_tiled( key: &TextureKey, mem: &dyn xenia_memory::MemoryAccess, ) -> Result, DecodeError> { decode_dxt_tiled(key, mem, 16) } /// **k_5_6_5** — 16-bit R:5 G:6 B:5 per texel (Xbox stores R in the high /// 5 bits of the 16-bit word). We unpack each texel into 4 bytes of /// `Rgba8Unorm` (A = 0xFF). wgpu doesn't ship `R5G6B5Unorm` as a /// sampled texture format on every backend, so CPU-side conversion is /// the safe path even if it's 2× the texture memory. /// /// Tiling: Tiled2D at the **texel** level (block = 1 texel = 2 bytes), /// then we expand each 2-byte u16 into the 4-byte Rgba8 in the linear /// output buffer. pub fn decode_k565_tiled( key: &TextureKey, mem: &dyn xenia_memory::MemoryAccess, ) -> Result, DecodeError> { if key.width == 0 || key.height == 0 { return Err(DecodeError::ZeroSize); } let w = key.width as u32; let h = key.height as u32; // Pitch/block counts — block = 1 texel here, 2 bytes. let pitch_aligned = tiled_address::align_pitch_to_macro_tile(key.pitch_texels as u32); let total_bytes = (pitch_aligned * h * 2) as usize; let mut raw = read_guest_bytes(mem, key.base_address, total_bytes); if raw.len() < total_bytes { return Err(DecodeError::OutOfBounds); } // 16-bit word order is endian-swap-sensitive. apply_endian_32(&mut raw, key.endian); // Step 1: detile (bytes_per_block=2, tile in blocks=texels). let mut linear_u16 = vec![0u8; (w * h * 2) as usize]; if key.tiled { if tiled_address::detile_2d(&raw, &mut linear_u16, w, h, pitch_aligned, 2).is_err() { return Err(DecodeError::OutOfBounds); } } else { for y in 0..h as usize { let src = y * (pitch_aligned as usize) * 2; let dst = y * (w as usize) * 2; linear_u16[dst..dst + (w as usize) * 2] .copy_from_slice(&raw[src..src + (w as usize) * 2]); } } // Step 2: expand each 16-bit RGB565 to Rgba8Unorm. The in-memory u16 // is little-endian after `apply_endian_32` has normalized the word // order (we keep host-native byte ordering post-swap). let mut rgba = vec![0u8; (w * h * 4) as usize]; for y in 0..h as usize { for x in 0..w as usize { let off = (y * w as usize + x) * 2; let lo = linear_u16[off]; let hi = linear_u16[off + 1]; let word = u16::from_le_bytes([lo, hi]); // 5 bits R (bits 11-15), 6 bits G (5-10), 5 bits B (0-4). // Expand to full-range u8: replicate high bits into low // (so 0b11111 → 0xFF, matching the standard 565→888 convention). let r5 = ((word >> 11) & 0x1F) as u8; let g6 = ((word >> 5) & 0x3F) as u8; let b5 = (word & 0x1F) as u8; let r = (r5 << 3) | (r5 >> 2); let g = (g6 << 2) | (g6 >> 4); let b = (b5 << 3) | (b5 >> 2); let o = (y * w as usize + x) * 4; rgba[o] = r; rgba[o + 1] = g; rgba[o + 2] = b; rgba[o + 3] = 0xFF; } } Ok(rgba) } /// Version-aware CPU-side texture cache. Entries are keyed on /// `TextureKey.hash` and carry a `version_when_uploaded` watermark against /// the guest memory's page-version counter. `ensure_cached` queries /// `GuestMemory::max_page_version` over the texture's byte span; if the /// span has been written since cache time, the entry is re-decoded. pub struct TextureCache { entries: HashMap, /// Monotonic counter of decodes performed — HUD surface. pub decodes_total: u64, /// Count of stale-miss re-decodes. pub restale_total: u64, } impl Default for TextureCache { fn default() -> Self { Self::new() } } impl TextureCache { pub fn new() -> Self { Self { entries: HashMap::new(), decodes_total: 0, restale_total: 0, } } pub fn len(&self) -> usize { self.entries.len() } pub fn is_empty(&self) -> bool { self.entries.is_empty() } pub fn get(&self, key: &TextureKey) -> Option<&CachedTexture> { self.entries.get(key) } /// Return a cached (or freshly-decoded) texture. The caller supplies /// the current guest-memory page version covering the texture span; /// see [`max_page_version_for`]. pub fn ensure_cached( &mut self, key: TextureKey, current_version: u64, mem: &dyn xenia_memory::MemoryAccess, ) -> Result<&CachedTexture, DecodeError> { // Fast path: fresh entry exists. if let Some(e) = self.entries.get(&key) { if e.version_when_uploaded >= current_version { return Ok(self.entries.get(&key).unwrap()); } self.restale_total += 1; } let bytes = match key.format { TextureFormat::K8888 => decode_k8888_tiled(&key, mem)?, TextureFormat::K565 => decode_k565_tiled(&key, mem)?, TextureFormat::Dxt1 => decode_dxt1_tiled(&key, mem)?, TextureFormat::Dxt2_3 => decode_dxt23_tiled(&key, mem)?, TextureFormat::Dxt4_5 => decode_dxt45_tiled(&key, mem)?, _ => return Err(DecodeError::UnsupportedFormat), }; self.decodes_total += 1; let entry = CachedTexture { key, version_when_uploaded: current_version, bytes, }; self.entries.insert(key, entry); Ok(self.entries.get(&key).unwrap()) } pub fn byte_budget(&self) -> usize { self.entries.values().map(|e| e.byte_size()).sum() } } #[cfg(test)] mod tests { use super::*; use std::cell::Cell; struct FakeMem(Box<[Cell]>); impl FakeMem { fn from_vec(v: Vec) -> Self { FakeMem(v.into_iter().map(Cell::new).collect()) } } impl xenia_memory::MemoryAccess for FakeMem { fn read_u8(&self, a: u32) -> u8 { self.0.get(a as usize).map(|c| c.get()).unwrap_or(0) } fn read_u16(&self, a: u32) -> u16 { u16::from_be_bytes([self.read_u8(a), self.read_u8(a + 1)]) } fn read_u32(&self, a: u32) -> u32 { u32::from_be_bytes([ self.read_u8(a), self.read_u8(a + 1), self.read_u8(a + 2), self.read_u8(a + 3), ]) } fn read_u64(&self, a: u32) -> u64 { u64::from_be_bytes([ self.read_u8(a), self.read_u8(a + 1), self.read_u8(a + 2), self.read_u8(a + 3), self.read_u8(a + 4), self.read_u8(a + 5), self.read_u8(a + 6), self.read_u8(a + 7), ]) } fn write_u8(&self, a: u32, v: u8) { if let Some(slot) = self.0.get(a as usize) { slot.set(v); } } fn write_u16(&self, a: u32, v: u16) { let b = v.to_be_bytes(); self.write_u8(a, b[0]); self.write_u8(a + 1, b[1]); } fn write_u32(&self, a: u32, v: u32) { let b = v.to_be_bytes(); for i in 0..4 { self.write_u8(a + i as u32, b[i]); } } fn write_u64(&self, a: u32, v: u64) { let b = v.to_be_bytes(); for i in 0..8 { self.write_u8(a + i as u32, b[i]); } } fn translate(&self, _: u32) -> Option<*const u8> { None } fn translate_mut(&self, _: u32) -> Option<*mut u8> { None } } #[test] fn format_block_info_matches_canary_expectations() { assert_eq!( TextureFormat::K8888.block_info(), BlockInfo::new(1, 1, 4) ); assert_eq!(TextureFormat::Dxt1.block_info(), BlockInfo::new(4, 4, 8)); assert_eq!( TextureFormat::Dxt4_5.block_info(), BlockInfo::new(4, 4, 16) ); } #[test] fn endian_swap_variants() { assert_eq!(Endian::None.swap32(0x11223344), 0x11223344); assert_eq!(Endian::Swap8In16.swap32(0x11223344), 0x22114433); assert_eq!(Endian::Swap8In32.swap32(0x11223344), 0x44332211); assert_eq!(Endian::Swap16In32.swap32(0x11223344), 0x33441122); } #[test] fn decode_fetch_constant_rejects_empty() { let z = [0u32; 6]; assert!(decode_fetch_constant(z).is_none()); } #[test] fn decode_fetch_constant_parses_2d_k8888() { // Build a synthetic k_8_8_8_8 2D texture fetch constant: // dword0: pitch_5=40 (1280/32), tiled=1, type=2 // dword1: format=6 (K8888), endian=2 (Swap8In32), base=0xAB000>>12 // dword2: width-1=1279, height-1=719 // dword5: dimension=1 (2D) let d0 = 0x8000_0000 | (40u32 << 22) | 2; let d1 = (0xAB000u32 >> 12 << 12) | (2u32 << 6) | 6u32; let d2 = 1279u32 | ((719u32) << 13); let d5 = 1u32 << 9; let k = decode_fetch_constant([d0, d1, d2, 0, 0, d5]).expect("parsed"); assert_eq!(k.format, TextureFormat::K8888); assert_eq!(k.endian, Endian::Swap8In32); assert_eq!(k.width, 1280); assert_eq!(k.height, 720); assert_eq!(k.dimension, Dimension::D2); assert!(k.tiled); assert_eq!(k.pitch_texels, 1280); } #[test] fn decode_k8888_roundtrip_linear() { // Build a 4×4 non-tiled image with pitch=32 (one macro-tile row). // Each pixel at (x, y) stores ARGB = (0xFF, x, y, y*4+x) as a // big-endian dword. After Swap8In32 + B↔R swizzle, out[off..] must // be (x, y, y*4+x, 0xFF) in RGBA order. let w = 4u32; let h = 4u32; let pitch = 32u32; let mut bytes = vec![0u8; (pitch * h * 4) as usize]; for y in 0..h { for x in 0..w { let off = ((y * pitch + x) * 4) as usize; let argb = (0xFFu32 << 24) | ((x as u32) << 16) | ((y as u32) << 8) | ((y * 4 + x) as u32); bytes[off..off + 4].copy_from_slice(&argb.to_be_bytes()); } } let mem = FakeMem::from_vec(bytes); let key = TextureKey { base_address: 0, width: 4, height: 4, depth_or_slices: 1, format: TextureFormat::K8888, endian: Endian::Swap8In32, dimension: Dimension::D2, tiled: false, pitch_texels: pitch as u16, }; let out = decode_k8888_tiled(&key, &mem).expect("decode"); assert_eq!(out.len(), 16 * 4); assert_eq!(&out[0..4], &[0, 0, 0, 0xFF]); let off = ((3 * 4 + 3) * 4) as usize; assert_eq!(&out[off..off + 4], &[3, 3, 15, 0xFF]); } // ── First-Pixels M5 format tests ────────────────────────────── /// BC2 (DXT2_3) roundtrip: 16-byte blocks, 4×4 image = 1 block. /// Synthetic source of 0xDEADBEEF... bytes; assert the decoder /// returns the same bytes (passthrough after endian swap). #[test] fn decode_dxt23_small_roundtrip() { // 4×4 texture = 1 BC2 block (16 bytes). With pitch_texels=32 // (macro-tile-aligned) the block pitch is 8 (=32/4), and we // allocate 8*1*16 = 128 bytes of source. let mut bytes = vec![0u8; 128]; for (i, b) in bytes.iter_mut().enumerate().take(16) { *b = i as u8; } let mem = FakeMem::from_vec(bytes); let key = TextureKey { base_address: 0, width: 4, height: 4, depth_or_slices: 1, format: TextureFormat::Dxt2_3, endian: Endian::None, // no swap — we can eyeball passthrough dimension: Dimension::D2, tiled: false, pitch_texels: 32, }; let out = decode_dxt23_tiled(&key, &mem).expect("decode"); assert_eq!(out.len(), 16); // 1 block × 16 bytes for i in 0..16 { assert_eq!(out[i], i as u8); } } /// BC3 (DXT4_5) uses the same 16-byte block infra as BC2; a /// parallel test prevents a regression that sneaks up via the /// generic `decode_dxt_tiled`. #[test] fn decode_dxt45_uses_16byte_blocks() { let mem = FakeMem::from_vec(vec![0xAAu8; 256]); let key = TextureKey { base_address: 0, width: 8, height: 4, // 2×1 blocks depth_or_slices: 1, format: TextureFormat::Dxt4_5, endian: Endian::None, dimension: Dimension::D2, tiled: false, pitch_texels: 32, }; let out = decode_dxt45_tiled(&key, &mem).expect("decode"); assert_eq!(out.len(), 2 * 16); } /// k_5_6_5: a single white texel (all bits set, 0xFFFF) should /// expand to RGBA8 white (0xFF, 0xFF, 0xFF, 0xFF). A single pure-red /// texel (R=31, G=0, B=0 → word 0xF800) should expand to R=255 G=0 /// B=0 via the high-bit-replicate convention. #[test] fn decode_k565_texel_expansion() { // Memory layout for a 2×1 non-tiled k_5_6_5 image (pitch=32 texels // → 32 × 1 × 2 = 64 bytes). We store texel[0] = 0xFFFF (white), // texel[1] = 0xF800 (pure red). let mut bytes = vec![0u8; 64]; // 0xFFFF bytes[0] = 0xFF; bytes[1] = 0xFF; // 0xF800 (big-endian memory): high byte 0xF8, low 0x00. // But after apply_endian_32(Endian::None) we use little-endian // word decoding — so memory must carry the bytes in LE order. bytes[2] = 0x00; bytes[3] = 0xF8; let mem = FakeMem::from_vec(bytes); let key = TextureKey { base_address: 0, width: 2, height: 1, depth_or_slices: 1, format: TextureFormat::K565, endian: Endian::None, dimension: Dimension::D2, tiled: false, pitch_texels: 32, }; let out = decode_k565_tiled(&key, &mem).expect("decode"); assert_eq!(out.len(), 2 * 4); // Texel 0: white. assert_eq!(&out[0..4], &[0xFF, 0xFF, 0xFF, 0xFF]); // Texel 1: pure red via 5-bit-expand (0b11111 → 0xFF). assert_eq!(&out[4..8], &[0xFF, 0x00, 0x00, 0xFF]); } #[test] fn is_host_supported_covers_m5_formats() { assert!(TextureFormat::K8888.is_host_supported()); assert!(TextureFormat::K565.is_host_supported()); assert!(TextureFormat::Dxt1.is_host_supported()); assert!(TextureFormat::Dxt2_3.is_host_supported()); assert!(TextureFormat::Dxt4_5.is_host_supported()); // Unsupported formats should still report false. assert!(!TextureFormat::K16.is_host_supported()); assert!(!TextureFormat::K32Float.is_host_supported()); } #[test] fn texture_cache_caches_and_reuses() { let mut cache = TextureCache::new(); let mem = FakeMem::from_vec(vec![0u8; 8 * 1024]); let key = TextureKey { base_address: 0, width: 4, height: 4, depth_or_slices: 1, format: TextureFormat::K8888, endian: Endian::None, dimension: Dimension::D2, tiled: false, pitch_texels: 32, }; cache.ensure_cached(key, 0, &mem).unwrap(); assert_eq!(cache.decodes_total, 1); // Same version: should hit cache. cache.ensure_cached(key, 0, &mem).unwrap(); assert_eq!(cache.decodes_total, 1); // Higher version: stale → re-decode. cache.ensure_cached(key, 1, &mem).unwrap(); assert_eq!(cache.decodes_total, 2); assert_eq!(cache.restale_total, 1); } /// End-to-end P5 test: a 6-dword fetch constant → decoded `TextureKey` /// → `ensure_cached` on fresh/version-bumped memory → stale re-decode. /// Mirrors what `vd_swap` does per frame. #[test] fn e2e_fetch_const_to_cache_with_versioning() { // 4×4 k_8_8_8_8 2D tiled texture at base 0x100, pitch=32 aligned. let d0 = 0x8000_0000u32 | (1u32 << 22) | 2; // pitch_5=1, tiled, type=2 let d1 = (0x100u32 >> 12 << 12) | (0u32 << 6) | 6; // K8888, endian=none let d2 = 3u32 | (3u32 << 13); // width-1=3, height-1=3 let d5 = 1u32 << 9; // 2D let key = decode_fetch_constant([d0, d1, d2, 0, 0, d5]).expect("decoded"); assert_eq!(key.format, TextureFormat::K8888); assert_eq!(key.width, 4); let mut mem = FakeMem::from_vec(vec![0xAAu8; 4 * 1024]); let mut cache = TextureCache::new(); // v0 decode. let first = cache .ensure_cached(key, 0, &mem) .expect("initial decode") .clone(); // Same version → cache hit. cache.ensure_cached(key, 0, &mem).expect("hit"); assert_eq!(cache.decodes_total, 1); // Simulate the guest writing to the texture's pages: version bumps. for b in &mem.0[..16] { b.set(0xFF); } cache.ensure_cached(key, 1, &mem).expect("re-decode"); assert_eq!(cache.decodes_total, 2); assert_eq!(cache.restale_total, 1); // Bytes differ from v0 (proof the re-decode happened). let second = cache.get(&key).unwrap(); assert_ne!(first.bytes, second.bytes); } #[test] fn texture_cache_rejects_unsupported_format() { let mut cache = TextureCache::new(); let mem = FakeMem::from_vec(vec![0u8; 1024]); let key = TextureKey { base_address: 0, width: 4, height: 4, depth_or_slices: 1, format: TextureFormat::K16, endian: Endian::None, dimension: Dimension::D2, tiled: false, pitch_texels: 32, }; assert!(matches!( cache.ensure_cached(key, 0, &mem), Err(DecodeError::UnsupportedFormat) )); } }