//! Xenos tiled-texture address formula (2D, Tiled2D layout). //! //! Port of `xenia-canary/src/xenia/gpu/texture_address.h:190-208` Tiled2D / //! `TiledCombine` helpers. The Xbox 360 GPU stores textures in a 32×32-block //! macro-tile pattern with bank+pipe interleave for its internal DRAM //! banks; this formula inverts that so we can read pixels out in linear //! order, given the tiled source buffer. //! //! We use this in two places during P4: //! - `vd_swap` frontbuffer detile (1280×720 k_8_8_8_8 usually). //! - Any place we need to read tiled guest memory into a host-linear //! buffer for CPU-side conversion before upload. /// Tile size constants from canary. pub const MACRO_TILE_WIDTH_LOG2: u32 = 5; // 32 px pub const MACRO_TILE_HEIGHT_2D_LOG2: u32 = 5; // 32 px /// Canary's `TiledCombine` helper — reassembles the DRAM address from the /// outer-tile byte offset plus the bank/pipe/y-LSB interleave bits. #[inline] fn tiled_combine(outer_inner_bytes: u32, bank: u32, pipe: u32, y_lsb: u32) -> u32 { (y_lsb << 4) | (pipe << 6) | (bank << 11) | (outer_inner_bytes & 0b1111) | (((outer_inner_bytes >> 4) & 0b1) << 5) | (((outer_inner_bytes >> 5) & 0b111) << 8) | ((outer_inner_bytes >> 8) << 12) } /// 2D tiled offset in bytes from (x, y) into a tiled surface with /// `pitch_aligned` pixel pitch (rounded to the macro-tile width) and /// `bytes_per_block_log2` bytes-per-element (e.g. 2 for RGBA8888 → 4-byte /// blocks → log2 = 2). Always non-negative for in-bounds inputs; returns /// `u32` rather than canary's signed `int` since our callers stay in /// unsigned arithmetic. /// /// This is the canonical formula — do not simplify without re-reading /// `texture_address.h:190-208`; the bit-interleave cannot be expressed /// as a linear function. pub fn tiled_2d_offset(x: u32, y: u32, pitch_aligned: u32, bytes_per_block_log2: u32) -> u32 { let macro_tile_cols = pitch_aligned >> MACRO_TILE_WIDTH_LOG2; // Outer: which 32×32 macro tile we're in. let outer_blocks = (((y >> MACRO_TILE_HEIGHT_2D_LOG2) * macro_tile_cols) + (x >> MACRO_TILE_WIDTH_LOG2)) << 6; // Inner: where we are within the 32×32 macro tile (Y dropped by 1 bit // because that bit becomes the `y_lsb` interleave bit below). let inner_blocks = (((y >> 1) & 0b111) << 3) | (x & 0b111); let outer_inner_bytes = (outer_blocks | inner_blocks) << bytes_per_block_log2; let bank = (y >> 4) & 0b1; let pipe = ((x >> 3) & 0b11) ^ (((y >> 3) & 0b1) << 1); let y_lsb = y & 1; tiled_combine(outer_inner_bytes, bank, pipe, y_lsb) } /// Round `pitch_pixels` up to the nearest multiple of the macro-tile width /// (32 px). Xenos requires tile-aligned pitches; non-aligned values pad. #[inline] pub fn align_pitch_to_macro_tile(pitch_pixels: u32) -> u32 { let mask = (1u32 << MACRO_TILE_WIDTH_LOG2) - 1; (pitch_pixels + mask) & !mask } /// Detile a 2D tiled surface into a linear destination buffer. The /// closure `block_bytes(src_offset) -> &[u8]` returns a slice pointing at /// one block in the tiled source, and the detiler writes it into `dst` /// at the linear (x, y) position. /// /// `bpp` is the bytes-per-block (4 for RGBA8888, 1 for a1r5g5b5 stored as /// a single 16-bit block, etc.). `dst` must be at least /// `width * height * bpp` bytes long. /// /// Returns `Err(())` if the source doesn't contain enough bytes for the /// largest offset the formula would produce (defensive — callers can /// downgrade silently). pub fn detile_2d( src: &[u8], dst: &mut [u8], width: u32, height: u32, pitch_pixels: u32, bpp: u32, ) -> Result<(), ()> { let bpp_log2 = bpp.trailing_zeros(); let pitch_aligned = align_pitch_to_macro_tile(pitch_pixels); let dst_pitch_bytes = (width * bpp) as usize; let bpp_u = bpp as usize; for y in 0..height { for x in 0..width { let src_off = tiled_2d_offset(x, y, pitch_aligned, bpp_log2) as usize; if src_off + bpp_u > src.len() { return Err(()); } let dst_off = (y as usize) * dst_pitch_bytes + (x as usize) * bpp_u; if dst_off + bpp_u > dst.len() { return Err(()); } dst[dst_off..dst_off + bpp_u].copy_from_slice(&src[src_off..src_off + bpp_u]); } } Ok(()) } #[cfg(test)] mod tests { use super::*; /// The (0, 0) pixel is always at byte offset 0 regardless of pitch. #[test] fn origin_is_zero() { assert_eq!(tiled_2d_offset(0, 0, 64, 2), 0); } /// Round-trip: detiling a tiled buffer that was filled using the same /// formula produces the identity linear image. #[test] fn roundtrip_small_pattern() { let w = 32u32; let h = 16u32; let bpp = 4u32; let pitch = align_pitch_to_macro_tile(w); // Allocate a tiled buffer large enough for the largest offset. let max_off = (0..h) .flat_map(|y| (0..w).map(move |x| tiled_2d_offset(x, y, pitch, 2) as usize + 4)) .max() .unwrap(); let mut tiled = vec![0u8; max_off]; // Write a recognisable 4-byte pattern = (x, y, x^y, 0xFF) into // each logical (x, y) position in the tiled buffer. for y in 0..h { for x in 0..w { let off = tiled_2d_offset(x, y, pitch, 2) as usize; tiled[off + 0] = x as u8; tiled[off + 1] = y as u8; tiled[off + 2] = (x ^ y) as u8; tiled[off + 3] = 0xFF; } } let mut linear = vec![0u8; (w * h * bpp) as usize]; detile_2d(&tiled, &mut linear, w, h, pitch, bpp).expect("detile ok"); // Verify every logical pixel landed at the right linear offset. for y in 0..h { for x in 0..w { let lin = ((y * w + x) * bpp) as usize; assert_eq!(linear[lin + 0], x as u8); assert_eq!(linear[lin + 1], y as u8); assert_eq!(linear[lin + 2], (x ^ y) as u8); assert_eq!(linear[lin + 3], 0xFF); } } } /// Within a single macro-tile row, stepping `x` by 1 changes the low /// 3 bits of `x` which feed the `inner_blocks` field — different /// offsets are expected (no aliasing). #[test] fn neighbouring_pixels_have_distinct_offsets() { let mut seen = std::collections::HashSet::new(); for y in 0..16 { for x in 0..32 { assert!(seen.insert(tiled_2d_offset(x, y, 32, 2))); } } } /// Pitch alignment is a power-of-two rounding: 1280 stays 1280, 1281 /// rounds to 1312. #[test] fn align_pitch_rounds_up_to_32() { assert_eq!(align_pitch_to_macro_tile(1280), 1280); assert_eq!(align_pitch_to_macro_tile(1281), 1312); assert_eq!(align_pitch_to_macro_tile(31), 32); } }