//! CPU-side shadow of the Xenos GPU's 10 MiB EDRAM. //! //! The real console has 10 MiB of embedded DRAM organised as 2048 tiles, //! each 80 × 16 samples wide at 32 bits per sample (`xenos.h:223-285`, //! `kEdramTileCount = 2048`). 64-bpp formats pack two adjacent EDRAM tiles //! per color value. //! //! xenia-rs does not currently render through a real EDRAM (host draws go //! straight to wgpu attachments), but the resolve path still needs a //! concrete byte source. We keep a linear 10 MiB `Vec` here so: //! //! * clear-resolves can paint `RB_COLOR_CLEAR` / `RB_DEPTH_CLEAR` into the //! source tiles, which the resolve loop then copies into guest memory //! (this is the Sylpheed-first-pixels path); //! * future host→EDRAM readback code has a place to deposit pixels without //! touching the resolve API. //! //! Byte layout inside one tile: row-major, `80 * 16 * bpp` bytes. At 32bpp, //! offset `= y * 80 * 4 + x * 4` from the tile base. Samples are stored in //! native-u32 byte order; any Xenon big-endian vs little-endian shuffling //! happens at the resolve write boundary, not inside EDRAM. //! //! Indexing wraps mod 2048 (`XE_GPU_REGISTER` `RB_COLOR_INFO.color_base` is //! 11-bit). Canary relies on this wraparound for tall surfaces that //! exceed the 10 MiB region. /// Number of tiles in EDRAM. `xenos::kEdramTileCount`. pub const EDRAM_TILE_COUNT: u32 = 2048; /// Samples per tile along X. `xenos::kEdramTileWidthSamples`. pub const EDRAM_TILE_WIDTH_SAMPLES: u32 = 80; /// Samples per tile along Y. `xenos::kEdramTileHeightSamples`. pub const EDRAM_TILE_HEIGHT_SAMPLES: u32 = 16; /// Bytes per tile at 32bpp: 80 × 16 × 4 = 5120. pub const EDRAM_TILE_BYTES_32BPP: u32 = EDRAM_TILE_WIDTH_SAMPLES * EDRAM_TILE_HEIGHT_SAMPLES * 4; /// Bytes per tile at 64bpp: 80 × 16 × 8 = 10_240 (two adjacent 32bpp tiles). pub const EDRAM_TILE_BYTES_64BPP: u32 = EDRAM_TILE_BYTES_32BPP * 2; /// Total EDRAM size in bytes: 2048 × 5120 = 10_485_760 (exactly 10 MiB). pub const EDRAM_SIZE_BYTES: usize = (EDRAM_TILE_COUNT * EDRAM_TILE_BYTES_32BPP) as usize; /// 10 MiB shadow of the console's EDRAM. Owned by `GpuSystem` and lives for /// the lifetime of the GPU; no per-frame allocation. pub struct ShadowEdram { bytes: Vec, } impl Default for ShadowEdram { fn default() -> Self { Self::new() } } impl ShadowEdram { pub fn new() -> Self { Self { bytes: vec![0u8; EDRAM_SIZE_BYTES], } } /// Raw byte offset of a tile within the shadow buffer, wrapped mod 2048. #[inline] fn tile_byte_offset(tile_index: u32) -> usize { ((tile_index % EDRAM_TILE_COUNT) * EDRAM_TILE_BYTES_32BPP) as usize } pub fn as_bytes(&self) -> &[u8] { &self.bytes } pub fn tile(&self, tile_index: u32) -> &[u8] { let off = Self::tile_byte_offset(tile_index); &self.bytes[off..off + EDRAM_TILE_BYTES_32BPP as usize] } pub fn tile_mut(&mut self, tile_index: u32) -> &mut [u8] { let off = Self::tile_byte_offset(tile_index); &mut self.bytes[off..off + EDRAM_TILE_BYTES_32BPP as usize] } /// Sample-space byte offset within the shadow buffer for one 32bpp /// sample at `(x_samples, y_samples)` in a surface whose EDRAM origin /// is `base_tiles` and whose row pitch is `pitch_tiles` 32bpp tiles. /// /// Tile layout: a surface of pitch `P` tiles is laid out as a row of /// `P` tiles followed by the next 16-sample-tall row, etc. Sample /// `(x, y)` lives in tile `(y/16)*P + (x/80)`, at row `y % 16` and /// column `x % 80` within that tile. #[inline] fn sample_offset_32bpp(base_tiles: u16, pitch_tiles: u32, x: u32, y: u32) -> Option { if pitch_tiles == 0 { return None; } let tile_row = y / EDRAM_TILE_HEIGHT_SAMPLES; let tile_col = x / EDRAM_TILE_WIDTH_SAMPLES; let within_y = y % EDRAM_TILE_HEIGHT_SAMPLES; let within_x = x % EDRAM_TILE_WIDTH_SAMPLES; let tile_index = (base_tiles as u32).wrapping_add(tile_row * pitch_tiles + tile_col); let off = Self::tile_byte_offset(tile_index) + (within_y * EDRAM_TILE_WIDTH_SAMPLES * 4 + within_x * 4) as usize; Some(off) } /// Fill a `(w × h)`-sample rectangle at `(x, y)` with a constant 32bpp /// pattern. Coordinates are in *sample space* (already scaled through /// `sample_count_log2_x/y` for MSAA). Wraps mod 2048 tiles via /// `tile_byte_offset`. /// /// The pattern is written as host-native little-endian bytes — the /// endian swap in [`crate::resolve::apply_endian_128`] converts to the /// byte order expected by the destination. #[allow(clippy::too_many_arguments)] pub fn fill_rect_32bpp( &mut self, base_tiles: u16, pitch_tiles: u32, x: u32, y: u32, w: u32, h: u32, pattern: u32, ) { if w == 0 || h == 0 { return; } let le = pattern.to_le_bytes(); for dy in 0..h { for dx in 0..w { if let Some(off) = Self::sample_offset_32bpp( base_tiles, pitch_tiles, x + dx, y + dy, ) && off + 4 <= self.bytes.len() { self.bytes[off..off + 4].copy_from_slice(&le); } } } } /// Read one 32bpp sample at `(x, y)` in sample coordinates. Returns 0 /// if the surface pitch is zero (degenerate; caller should skip the /// resolve). pub fn read_sample_32bpp( &self, base_tiles: u16, pitch_tiles: u32, x: u32, y: u32, ) -> u32 { match Self::sample_offset_32bpp(base_tiles, pitch_tiles, x, y) { Some(off) if off + 4 <= self.bytes.len() => u32::from_le_bytes([ self.bytes[off], self.bytes[off + 1], self.bytes[off + 2], self.bytes[off + 3], ]), _ => 0, } } /// Write one 32bpp sample at `(x, y)` in sample coordinates. Mirror of /// [`Self::read_sample_32bpp`]. Used by the wgpu→ShadowEdram readback /// retile path and unit tests. pub fn write_sample_32bpp( &mut self, base_tiles: u16, pitch_tiles: u32, x: u32, y: u32, sample: u32, ) { if let Some(off) = Self::sample_offset_32bpp(base_tiles, pitch_tiles, x, y) && off + 4 <= self.bytes.len() { self.bytes[off..off + 4].copy_from_slice(&sample.to_le_bytes()); } } /// Bulk write a `(w × h)`-sample rectangle at `(x, y)` from a row-major /// linear `samples` buffer. The buffer length must be at least `w * h`; /// extra entries are ignored. Order: `samples[dy * w + dx]` lands at /// (x + dx, y + dy). This is the format the wgpu→ShadowEdram readback /// path uses after stripping wgpu's 256-byte row alignment. #[allow(clippy::too_many_arguments)] pub fn write_rect_32bpp( &mut self, base_tiles: u16, pitch_tiles: u32, x: u32, y: u32, w: u32, h: u32, samples: &[u32], ) { if w == 0 || h == 0 { return; } let needed = (w as usize).saturating_mul(h as usize); debug_assert!(samples.len() >= needed, "write_rect_32bpp: samples too short"); for dy in 0..h { let row_base = (dy as usize) * (w as usize); for dx in 0..w { let idx = row_base + dx as usize; if idx >= samples.len() { return; } self.write_sample_32bpp(base_tiles, pitch_tiles, x + dx, y + dy, samples[idx]); } } } // --- 64bpp helpers ---------------------------------------------------- // // 64bpp formats (`k_16_16_16_16`, `k_16_16_16_16_FLOAT`, `k_32_32_FLOAT`) // occupy two adjacent EDRAM tiles per logical tile, doubling the row // pitch in tiles. Per Canary `xenos.h:321-325 IsColorRenderTargetFormat64bpp` // and `draw_util.cc:1260-1262` (`pitch_tiles = surface_pitch_tiles << is_64bpp`). // // Convention: callers pass the *32bpp-equivalent* `base_tiles` and // `pitch_tiles_32bpp` (i.e. the `RB_COLOR_INFO.color_base` and // `surface_pitch_tiles` decoded from registers). The 64bpp helpers // multiply both by 2 internally so the lo/hi pair lands in adjacent // tiles. `lo` is the lower-addressed 32bpp word; `hi` is the upper. /// Read one 64bpp sample as `(lo, hi)` u32 pair. Doubled-tile addressing /// per Canary's `is_64bpp` convention. pub fn read_sample_64bpp( &self, base_tiles: u16, pitch_tiles_32bpp: u32, x: u32, y: u32, ) -> (u32, u32) { let pitch64 = pitch_tiles_32bpp.saturating_mul(2); let base64 = (base_tiles as u32).saturating_mul(2) as u16; let lo = self.read_sample_32bpp(base64, pitch64, x.saturating_mul(2), y); let hi = self.read_sample_32bpp(base64, pitch64, x.saturating_mul(2) + 1, y); (lo, hi) } /// Write one 64bpp sample as `(lo, hi)` u32 pair. pub fn write_sample_64bpp( &mut self, base_tiles: u16, pitch_tiles_32bpp: u32, x: u32, y: u32, lo: u32, hi: u32, ) { let pitch64 = pitch_tiles_32bpp.saturating_mul(2); let base64 = (base_tiles as u32).saturating_mul(2) as u16; self.write_sample_32bpp(base64, pitch64, x.saturating_mul(2), y, lo); self.write_sample_32bpp(base64, pitch64, x.saturating_mul(2) + 1, y, hi); } /// Bulk write a 64bpp rectangle from a row-major `(lo, hi)` linear /// buffer. #[allow(clippy::too_many_arguments)] pub fn write_rect_64bpp( &mut self, base_tiles: u16, pitch_tiles_32bpp: u32, x: u32, y: u32, w: u32, h: u32, samples: &[(u32, u32)], ) { if w == 0 || h == 0 { return; } for dy in 0..h { let row_base = (dy as usize) * (w as usize); for dx in 0..w { let idx = row_base + dx as usize; if idx >= samples.len() { return; } let (lo, hi) = samples[idx]; self.write_sample_64bpp(base_tiles, pitch_tiles_32bpp, x + dx, y + dy, lo, hi); } } } /// Fill a `(w × h)`-sample rectangle with a constant 64bpp pattern. /// `lo` lands at the low-addressed 32bpp word, `hi` at the high one /// — i.e. for clears, callers pass `(lo = RB_COLOR_CLEAR_LO, /// hi = RB_COLOR_CLEAR)` per Canary `draw_util.cc:1302-1303`. #[allow(clippy::too_many_arguments)] pub fn fill_rect_64bpp( &mut self, base_tiles: u16, pitch_tiles_32bpp: u32, x: u32, y: u32, w: u32, h: u32, lo: u32, hi: u32, ) { if w == 0 || h == 0 { return; } for dy in 0..h { for dx in 0..w { self.write_sample_64bpp( base_tiles, pitch_tiles_32bpp, x + dx, y + dy, lo, hi, ); } } } } #[cfg(test)] mod tests { use super::*; #[test] fn shadow_edram_is_exactly_10_mib() { assert_eq!(EDRAM_SIZE_BYTES, 10 * 1024 * 1024); let e = ShadowEdram::new(); assert_eq!(e.as_bytes().len(), 10 * 1024 * 1024); } #[test] fn fill_rect_writes_the_whole_first_tile() { let mut e = ShadowEdram::new(); e.fill_rect_32bpp(0, 1, 0, 0, 80, 16, 0x11223344); // Every 4-byte sample in tile 0 should be 0x11223344 (LE). let expected = 0x11223344u32.to_le_bytes(); let tile = e.tile(0); for chunk in tile.chunks_exact(4) { assert_eq!(chunk, expected); } } #[test] fn fill_rect_respects_pitch_and_base() { let mut e = ShadowEdram::new(); // Surface: pitch=2 tiles, base=5. A 160x16 fill should land in // tiles 5 and 6 — and leave tile 4 / tile 7 / tile 0 untouched. e.fill_rect_32bpp(5, 2, 0, 0, 160, 16, 0xAABBCCDD); let expected = 0xAABBCCDDu32.to_le_bytes(); for chunk in e.tile(5).chunks_exact(4) { assert_eq!(chunk, expected); } for chunk in e.tile(6).chunks_exact(4) { assert_eq!(chunk, expected); } assert!(e.tile(4).iter().all(|&b| b == 0)); assert!(e.tile(7).iter().all(|&b| b == 0)); assert!(e.tile(0).iter().all(|&b| b == 0)); } #[test] fn fill_rect_wraps_mod_2048() { let mut e = ShadowEdram::new(); // base=2047, pitch=2: first tile is 2047, second wraps to 0. e.fill_rect_32bpp(2047, 2, 0, 0, 160, 16, 0xDEAD_BEEF); let expected = 0xDEAD_BEEFu32.to_le_bytes(); for chunk in e.tile(2047).chunks_exact(4) { assert_eq!(chunk, expected); } for chunk in e.tile(0).chunks_exact(4) { assert_eq!(chunk, expected); } } #[test] fn read_sample_roundtrips_fill_rect() { let mut e = ShadowEdram::new(); e.fill_rect_32bpp(3, 1, 0, 0, 80, 16, 0xCAFE_F00D); // Sample any interior point. assert_eq!(e.read_sample_32bpp(3, 1, 0, 0), 0xCAFE_F00D); assert_eq!(e.read_sample_32bpp(3, 1, 79, 15), 0xCAFE_F00D); // Untouched neighbouring tile. assert_eq!(e.read_sample_32bpp(4, 1, 0, 0), 0); } #[test] fn zero_pitch_is_a_noop_read() { let e = ShadowEdram::new(); assert_eq!(e.read_sample_32bpp(0, 0, 10, 10), 0); } /// `write_sample_32bpp` round-trips through `read_sample_32bpp`. #[test] fn write_sample_32bpp_round_trips() { let mut e = ShadowEdram::new(); for x in 0..80u32 { for y in 0..16u32 { e.write_sample_32bpp(0, 1, x, y, 0xABCD_0000 | (y << 8) | x); } } for x in 0..80u32 { for y in 0..16u32 { assert_eq!( e.read_sample_32bpp(0, 1, x, y), 0xABCD_0000 | (y << 8) | x, "round-trip mismatch at ({x},{y})" ); } } } /// `write_rect_32bpp` writes row-major samples into the right /// sample-offsets, including across tile boundaries. #[test] fn write_rect_32bpp_crosses_tile_boundary() { let mut e = ShadowEdram::new(); // Surface pitch = 2 tiles → x in [0, 160), y in [0, 16). A 100x4 // rect at (40, 4) crosses x=80 (tile boundary). let w = 100u32; let h = 4u32; let mut samples = Vec::with_capacity((w * h) as usize); for dy in 0..h { for dx in 0..w { samples.push(0x10000 | (dy << 8) | dx); } } e.write_rect_32bpp(0, 2, 40, 4, w, h, &samples); // Spot-check: (40, 4) lands in tile 0; (140, 4) in tile 1. assert_eq!(e.read_sample_32bpp(0, 2, 40, 4), 0x1_0000); assert_eq!( e.read_sample_32bpp(0, 2, 139, 7), 0x10000 | (3 << 8) | 99 ); } /// `read_sample_64bpp` round-trips through `write_sample_64bpp` — /// doubled-pitch addressing keeps lo/hi adjacent in EDRAM bytes. #[test] fn write_read_sample_64bpp_roundtrips() { let mut e = ShadowEdram::new(); // Use 32bpp pitch=1, base=0 → 64bpp pitch=2, base=0. A single-tile // 64bpp surface fits 80x16 logical 64bpp samples? No — 80x16 32bpp // samples per tile, 80 logical 64bpp samples per *pair* of tiles, // and our 80×16 region needs 2 tiles. Stick to 16x4 logical 64bpp. for x in 0..16u32 { for y in 0..4u32 { e.write_sample_64bpp(0, 1, x, y, 0xAAAA_0000 | x, 0xBBBB_0000 | y); } } for x in 0..16u32 { for y in 0..4u32 { let (lo, hi) = e.read_sample_64bpp(0, 1, x, y); assert_eq!(lo, 0xAAAA_0000 | x); assert_eq!(hi, 0xBBBB_0000 | y); } } } /// `fill_rect_64bpp` writes both the lo and hi clear words across /// a 64bpp surface — matches the `RB_COLOR_CLEAR_LO`/`RB_COLOR_CLEAR` /// convention. #[test] fn fill_rect_64bpp_writes_both_words() { let mut e = ShadowEdram::new(); // 16x4 logical 64bpp samples; pitch=1 32bpp tile → 2 64bpp tiles. e.fill_rect_64bpp(0, 1, 0, 0, 16, 4, 0xCAFE_F00D, 0xDEAD_BEEF); for x in 0..16u32 { for y in 0..4u32 { let (lo, hi) = e.read_sample_64bpp(0, 1, x, y); assert_eq!(lo, 0xCAFE_F00D); assert_eq!(hi, 0xDEAD_BEEF); } } } /// 64bpp helpers must respect the doubled tile pitch — adjacent logical /// 64bpp samples must land at adjacent 32bpp samples in EDRAM. #[test] fn sixty_four_bpp_uses_doubled_pitch() { let mut e = ShadowEdram::new(); e.write_sample_64bpp(0, 1, 5, 0, 0x1111_1111, 0x2222_2222); // The lo word must sit at 32bpp x=10 (5 << 1), hi at x=11. // Doubled pitch -> base=0, pitch=2 32bpp. assert_eq!(e.read_sample_32bpp(0, 2, 10, 0), 0x1111_1111); assert_eq!(e.read_sample_32bpp(0, 2, 11, 0), 0x2222_2222); } /// `write_rect_*` with empty dimensions is a no-op. #[test] fn write_rect_empty_is_noop() { let mut e = ShadowEdram::new(); e.write_rect_32bpp(0, 1, 0, 0, 0, 5, &[1, 2, 3]); e.write_rect_32bpp(0, 1, 0, 0, 5, 0, &[1, 2, 3]); e.fill_rect_64bpp(0, 1, 0, 0, 0, 5, 1, 2); e.fill_rect_64bpp(0, 1, 0, 0, 5, 0, 1, 2); // Nothing should have been written. assert!(e.as_bytes().iter().all(|&b| b == 0)); } }