//! EDRAM→guest-memory resolve byte copy. //! //! Fires from [`crate::gpu_system::GpuSystem::handle_event_initiator`] on //! `TILE_FLUSH` (event 15). Reads samples out of the shadow EDRAM at the //! source tile range, applies the `Endian128` byte swap, and writes tiled //! u32 samples into guest memory via a 32bpp bitwise-equivalent fast path //! (Canary `IsColorResolveFormatBitwiseEquivalent` — `xenos.h:614-639`). //! //! Ground truth: `xenia-canary/src/xenia/gpu/draw_util.cc:1102-1370` and //! `xenos.h:1077-1114` (`GpuSwapInline`), `1039-1052` (`CopySampleSelect`). //! //! ## Endian ordering //! //! [`xenia_memory::access::MemoryAccess::write_u32`] stores big-endian //! bytes (it calls `val.to_be_bytes()`). The Xenon CPU sees memory as big- //! endian u32s, so `write_u32(addr, 0x11223344)` lands `[0x11, 0x22, 0x33, //! 0x44]` in memory — which is the `kNone` (no swap) byte order from the //! host's view of the sample. //! //! The resolve has an `Endian128` mode controlled by //! `RB_COPY_DEST_INFO.copy_dest_endian`: games typically set `k8in32` so //! that later texture fetches see little-endian bytes. We therefore //! pre-swap the sample *before* `write_u32` so the big-endian store yields //! the desired byte order in memory. use crate::draw_state::{ResolveInfo, ResolveSource}; use crate::edram::ShadowEdram; use crate::render_target_cache::MsaaSamples; use crate::tiled_address::{align_pitch_to_macro_tile, tiled_2d_offset}; use xenia_memory::access::MemoryAccess; /// Stats returned from one resolve copy. Aggregated by the caller into /// `GpuStats` counters so the HUD can surface them. #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] pub struct ResolveCopyStats { /// Number of 32bpp samples actually written to guest memory. pub samples_written: u32, /// Was the format path supported? `false` means we skipped. pub supported: bool, } /// `xenos::CopyCommand::kNull` = 3 — resolve emits no copy (clear-only). pub const COPY_COMMAND_NULL: u8 = 3; /// Sanitized sample selector (`xenos::CopySampleSelect`, `xenos.h:1039`). /// We keep the *raw* enum value in `ResolveInfo` and pass a sanitized one /// here so callers can match on the effective mode rather than re-applying /// the MSAA/depth sanitation rules from Canary `draw_util.cc:839-876`. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CopySampleSelect { K0 = 0, K1 = 1, K2 = 2, K3 = 3, K01 = 4, K23 = 5, K0123 = 6, } impl CopySampleSelect { pub fn from_raw(raw: u8) -> Self { match raw & 0x7 { 1 => Self::K1, 2 => Self::K2, 3 => Self::K3, 4 => Self::K01, 5 => Self::K23, 6 | 7 => Self::K0123, _ => Self::K0, } } /// Single-sample picks return `Some(index 0..=3)`; averaging picks /// return `None` (caller must synthesize via per-sample reads). pub fn single_sample_index(self) -> Option { match self { Self::K0 => Some(0), Self::K1 => Some(1), Self::K2 => Some(2), Self::K3 => Some(3), _ => None, } } /// `IsSingleCopySampleSelected` from `xenos.h:1049`. pub fn is_single_sample(self) -> bool { self.single_sample_index().is_some() } } /// `SanitizeCopySampleSelect` (Canary `draw_util.cc:839-876`). MSAA /// modes + depth limit which sample selectors are valid; invalid ones /// are silently remapped. Returning the sanitized enum lets the resolve /// loop assume a single-sample pick for 1x MSAA, etc. pub fn sanitize_sample_select( raw: u8, msaa: MsaaSamples, is_depth: bool, ) -> CopySampleSelect { let select = CopySampleSelect::from_raw(raw); match msaa { MsaaSamples::X1 => { // Only sample 0 exists. Averaging modes → k0; >k0123 clamp. match select { CopySampleSelect::K0 => CopySampleSelect::K0, _ => CopySampleSelect::K0, } } MsaaSamples::X2 => { // Samples 0 and 1 exist (stacked vertically). k2 → k0, k3 → k1; // k23 → k01. Depth cannot average. match select { CopySampleSelect::K0 => CopySampleSelect::K0, CopySampleSelect::K1 => CopySampleSelect::K1, CopySampleSelect::K2 => CopySampleSelect::K0, CopySampleSelect::K3 => CopySampleSelect::K1, CopySampleSelect::K01 | CopySampleSelect::K23 | CopySampleSelect::K0123 => { if is_depth { CopySampleSelect::K0 } else { CopySampleSelect::K01 } } } } MsaaSamples::X4 => { // All single-samples valid. Depth cannot average → pick // representative single sample (k01→k0, k23→k2, k0123→k0). if is_depth { match select { CopySampleSelect::K01 => CopySampleSelect::K0, CopySampleSelect::K23 => CopySampleSelect::K2, CopySampleSelect::K0123 => CopySampleSelect::K0, other => other, } } else { select } } } } /// Sample-index to in-pixel (dx, dy) offset for the current MSAA mode. /// Matches the standard Xbox 360 MSAA sample layout (Canary /// `texture_util::GetMsaaSampleLocation` / the shader constants). For 1x, /// always `(0, 0)`. /// /// * 2x MSAA: sample 0 = top line, sample 1 = bottom line. /// * 4x MSAA: 2×2 grid `{(0,0),(1,0),(0,1),(1,1)}`. #[inline] fn sample_offset_in_pixel(sample_idx: u8, msaa: MsaaSamples) -> (u32, u32) { match msaa { MsaaSamples::X1 => (0, 0), MsaaSamples::X2 => (0, (sample_idx & 1) as u32), MsaaSamples::X4 => ((sample_idx & 1) as u32, ((sample_idx >> 1) & 1) as u32), } } /// Apply the `Endian128` byte swap to one 32-bit sample. Matches the cases /// inside `GpuSwapInline` plus the 64/128-bit variants from /// `xenos::Endian128`. The 64/128 modes cannot be expressed in a single u32 /// so they fall through to `k8in32` and log at the call site. #[inline] pub fn apply_endian_128(value: u32, endian: u8) -> u32 { match endian { 0 => value, // k8in16: swap bytes within each 16-bit word. 1 => ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8), // k8in32: full byte reversal. 2 => value.swap_bytes(), // k16in32: swap 16-bit halves. 3 => value.rotate_left(16), // k8in64 / k8in128: require cross-dword context. Approximate with // k8in32 (byte-reverse each dword) so the bytes land in a sensible // order; the caller logs the approximation. 4 | 5 => value.swap_bytes(), _ => value, } } /// `xenos::ColorFormat` values we use as destination formats for 32bpp /// resolves. Canary `xenos.h:582-609`. mod color_format { pub const K_8_8_8_8: u8 = 6; pub const K_2_10_10_10: u8 = 7; pub const K_8_8_8_8_A: u8 = 14; pub const K_16_16_FLOAT: u8 = 31; pub const K_32_FLOAT: u8 = 36; pub const K_8_8_8_8_AS_16_16_16_16: u8 = 50; pub const K_2_10_10_10_AS_16_16_16_16: u8 = 54; // ── 64bpp dest formats (Canary `xenos.h:582-609`) ────────────────── /// `k_16_16_16_16` (4 channels × 16 bits, signed/unsigned variants /// resolve identically — same bit layout). pub const K_16_16_16_16: u8 = 26; /// `k_16_16_16_16_FLOAT` (4 channels × half-float). pub const K_16_16_16_16_FLOAT: u8 = 32; /// `k_32_32_FLOAT` (R32 + G32, 64bpp). `xenos::TextureFormat = 37`. pub const K_32_32_FLOAT: u8 = 37; /// Depth textures (Canary `xenos::TextureFormat`). pub const K_24_8: u8 = 22; pub const K_24_8_FLOAT: u8 = 23; } /// 32-bit bitwise-equivalence check covering 32bpp color and depth resolves. /// Color side mirrors `xenos::IsColorResolveFormatBitwiseEquivalent` /// (`xenos.h:614-639`). Depth side maps `DepthRenderTargetFormat` to /// its textural form (`kD24S8 → k_24_8`, `kD24FS8 → k_24_8_FLOAT`). pub fn is_32bpp_bitwise_equivalent( source: ResolveSource, source_is_64bpp: bool, source_format: u8, dest_format: u8, ) -> bool { if source_is_64bpp { return false; } match source { ResolveSource::Color(_) => { use color_format as cf; match source_format { // k_8_8_8_8 (0) and k_8_8_8_8_GAMMA (1). Gamma decode is // applied by the sampler at texture-fetch time (TextureSign:: // kGamma); the bits are identical, so the copy path is the // same. 0 | 1 => matches!( dest_format, cf::K_8_8_8_8 | cf::K_8_8_8_8_A | cf::K_8_8_8_8_AS_16_16_16_16 ), // k_2_10_10_10 (2) and k_2_10_10_10_AS_10_10_10_10 (10). 2 | 10 => matches!( dest_format, cf::K_2_10_10_10 | cf::K_2_10_10_10_AS_16_16_16_16 ), // k_16_16_FLOAT (6). 6 => dest_format == cf::K_16_16_FLOAT, // k_32_FLOAT (14). 14 => dest_format == cf::K_32_FLOAT, _ => false, } } ResolveSource::Depth => match source_format { // kD24S8 (0) → k_24_8 (22). 0 => dest_format == color_format::K_24_8, // kD24FS8 (1) → k_24_8_FLOAT (23). 1 => dest_format == color_format::K_24_8_FLOAT, _ => false, }, } } /// 64-bit bitwise-equivalence check (Canary `xenos.h:614-639` 64bpp arms). /// Used when `info.source_is_64bpp == true`. Only color resolves go here — /// depth is always 32bpp. pub fn is_64bpp_bitwise_equivalent(source_format: u8, dest_format: u8) -> bool { use color_format as cf; match source_format { // k_16_16_16_16 (5) — signed and unsigned variants resolve to the // same bits because the resolve is a raw byte copy. 5 => dest_format == cf::K_16_16_16_16, // k_16_16_16_16_FLOAT (7). 7 => dest_format == cf::K_16_16_16_16_FLOAT, // k_32_32_FLOAT (15). 15 => dest_format == cf::K_32_32_FLOAT, _ => false, } } /// Run one resolve copy. Returns the number of samples successfully /// written and whether the dest format was supported; the caller updates /// `GpuStats::resolves_copied_total` / `resolves_skipped_total` accordingly. pub fn copy_to_memory( info: &ResolveInfo, edram: &ShadowEdram, mem: &dyn MemoryAccess, ) -> ResolveCopyStats { // --- No-op paths (not a failure) --- if info.coords.width == 0 || info.coords.height == 0 { return ResolveCopyStats { samples_written: 0, supported: true, }; } if info.copy_command == COPY_COMMAND_NULL { return ResolveCopyStats { samples_written: 0, supported: true, }; } // --- Supported-shape gates --- if info.copy_dest_array { tracing::warn!( src = info.copy_src_select, fmt = info.dest_format, "gpu: resolve skipped — copy_dest_array (3D/stacked) not implemented" ); return ResolveCopyStats::default(); } if info.dest_exp_bias != 0 { tracing::warn!( bias = info.dest_exp_bias, "gpu: resolve skipped — dest_exp_bias != 0 not implemented" ); return ResolveCopyStats::default(); } let supported = if info.source_is_64bpp { // 64bpp color resolve. Depth is always 32bpp so this only fires // for `ResolveSource::Color(_)`. matches!(info.source, ResolveSource::Color(_)) && is_64bpp_bitwise_equivalent(info.source_format, info.dest_format) } else { is_32bpp_bitwise_equivalent( info.source, info.source_is_64bpp, info.source_format, info.dest_format, ) }; if !supported { tracing::warn!( source = ?info.source, source_format = info.source_format, source_is_64bpp = info.source_is_64bpp, dest_format = info.dest_format, "gpu: resolve skipped — not a bitwise-equivalent pair" ); return ResolveCopyStats::default(); } if info.dest_endian >= 4 { tracing::warn!( endian = info.dest_endian, "gpu: resolve endian k8in64/k8in128 approximated as k8in32" ); } // Destination pitch must be aligned to 32 texels per // `kStoragePitchHeightAlignmentBlocks`. `align_pitch_to_macro_tile` // rounds to 32 (it's `MACRO_TILE_WIDTH_LOG2 = 5`). let pitch_aligned = align_pitch_to_macro_tile(info.dest_pitch_pixels); if pitch_aligned == 0 { return ResolveCopyStats { samples_written: 0, supported: true, }; } // bpp_log2: 2 for 32bpp, 3 for 64bpp. Drives the `tiled_2d_offset` // stride calculation per Canary `texture_address.h:120-180`. let bpp_log2: u32 = if info.source_is_64bpp { 3 } else { 2 }; let is_depth = matches!(info.source, ResolveSource::Depth); let sanitized = sanitize_sample_select(info.copy_sample_select, info.msaa, is_depth); // For averaging modes we'd previously fall back to sample 0 + warn. // 3A wires real averaging via `read_pixel_averaged`; single-sample // picks still take the fast path. let single_sample_idx = sanitized.single_sample_index(); let mut samples_written: u32 = 0; for dy in 0..info.coords.height { let pixel_y = info.coords.y0 + dy; for dx in 0..info.coords.width { let pixel_x = info.coords.x0 + dx; // Destination coordinates are 0-based against `dest_base` — the // base already points at the top-left of the copy rectangle. let dst_off = tiled_2d_offset(dx, dy, pitch_aligned, bpp_log2); let dst_addr = info.dest_base.wrapping_add(dst_off); if info.source_is_64bpp { let (lo, hi) = match single_sample_idx { Some(idx) => { let (sx, sy) = sample_xy(pixel_x, pixel_y, idx, info.msaa, &info.coords); edram.read_sample_64bpp( info.source_base_tiles, info.surface_pitch_tiles, sx, sy, ) } None => read_pixel_averaged_64bpp(edram, info, sanitized, pixel_x, pixel_y), }; let lo_swapped = apply_endian_128(lo, info.dest_endian); let hi_swapped = apply_endian_128(hi, info.dest_endian); mem.write_u32(dst_addr, lo_swapped); mem.write_u32(dst_addr.wrapping_add(4), hi_swapped); samples_written += 1; } else { let sample = match single_sample_idx { Some(idx) => { let (sx, sy) = sample_xy(pixel_x, pixel_y, idx, info.msaa, &info.coords); edram.read_sample_32bpp( info.source_base_tiles, info.surface_pitch_tiles, sx, sy, ) } None => read_pixel_averaged_32bpp( edram, info, sanitized, pixel_x, pixel_y, ), }; let swapped = apply_endian_128(sample, info.dest_endian); mem.write_u32(dst_addr, swapped); samples_written += 1; } } } ResolveCopyStats { samples_written, supported: true, } } /// Compute the EDRAM sample-space (x, y) for `(pixel_x, pixel_y)` and a /// single MSAA sample index. #[inline] fn sample_xy( pixel_x: u32, pixel_y: u32, sample_idx: u8, msaa: MsaaSamples, coords: &crate::draw_state::ResolveCoordinates, ) -> (u32, u32) { let (sample_dx, sample_dy) = sample_offset_in_pixel(sample_idx, msaa); let sx = (pixel_x << coords.sample_count_log2_x) + sample_dx; let sy = (pixel_y << coords.sample_count_log2_y) + sample_dy; (sx, sy) } /// Sample indices selected by an averaging `CopySampleSelect`. /// `K01 → [0, 1]`, `K23 → [2, 3]`, `K0123 → [0, 1, 2, 3]`. Single-sample /// picks should never reach this helper (caller checks `single_sample_index`). fn averaging_sample_set(select: CopySampleSelect) -> &'static [u8] { match select { CopySampleSelect::K01 => &[0, 1], CopySampleSelect::K23 => &[2, 3], CopySampleSelect::K0123 => &[0, 1, 2, 3], // Single-sample picks: caller must never invoke this — fall back // to sample 0 just to keep the function total. _ => &[0], } } /// Average N samples of a 32bpp pixel format. Each sample is read, decoded /// by `source_format`, averaged in the appropriate numeric space, then /// re-encoded back into the same 32bpp word. Mirrors Canary's resolve /// shader paths in `resolve.xesli:595-629` (per-format averaging) — we /// implement them on the CPU because the resolve runs on the host. fn read_pixel_averaged_32bpp( edram: &ShadowEdram, info: &ResolveInfo, select: CopySampleSelect, pixel_x: u32, pixel_y: u32, ) -> u32 { let indices = averaging_sample_set(select); let n = indices.len() as u32; if n == 0 { return 0; } // Pull every selected sample. let mut raw = [0u32; 4]; for (i, &idx) in indices.iter().enumerate() { let (sx, sy) = sample_xy(pixel_x, pixel_y, idx, info.msaa, &info.coords); raw[i] = edram.read_sample_32bpp(info.source_base_tiles, info.surface_pitch_tiles, sx, sy); } let raw_slice = &raw[..indices.len()]; average_samples_32bpp(raw_slice, info.source_format, info.source) } /// Average N samples of a 64bpp pixel format, returning `(lo, hi)`. fn read_pixel_averaged_64bpp( edram: &ShadowEdram, info: &ResolveInfo, select: CopySampleSelect, pixel_x: u32, pixel_y: u32, ) -> (u32, u32) { let indices = averaging_sample_set(select); let n = indices.len(); if n == 0 { return (0, 0); } let mut raw = [(0u32, 0u32); 4]; for (i, &idx) in indices.iter().enumerate() { let (sx, sy) = sample_xy(pixel_x, pixel_y, idx, info.msaa, &info.coords); raw[i] = edram.read_sample_64bpp(info.source_base_tiles, info.surface_pitch_tiles, sx, sy); } let raw_slice = &raw[..n]; average_samples_64bpp(raw_slice, info.source_format) } /// Per-format averaging for 32bpp color/depth resolves. fn average_samples_32bpp(samples: &[u32], source_format: u8, source: ResolveSource) -> u32 { let n = samples.len() as u32; debug_assert!(n > 0); match source { ResolveSource::Color(_) => match source_format { // k_8_8_8_8 / k_8_8_8_8_GAMMA (0/1): per-channel rounded // unsigned-int mean. Matches Canary's `resolve.xesli` per-component // average for u8 — gamma is a sampler-time post-decode, the // bits are identical for resolve purposes. 0 | 1 => average_8_8_8_8(samples, n), // k_2_10_10_10 / k_2_10_10_10_AS_10_10_10_10: per-field rounded // unsigned-int mean. Field widths 2/10/10/10 from low to high. 2 | 10 => average_2_10_10_10(samples, n), // k_16_16_FLOAT (6): two half-floats packed in one u32. 6 => average_2_half_floats(samples, n), // k_32_FLOAT (14): one f32 per sample. 14 => average_1_f32(samples, n), // For any unsupported format, fall back to first sample — // upstream gating already filtered to bitwise-equivalent pairs // so this branch should be unreachable in practice. _ => samples[0], }, // Depth resolves never carry MSAA averaging (sanitize collapses to // single-sample); reaching this branch is a degenerate caller. ResolveSource::Depth => samples[0], } } /// Per-format averaging for 64bpp color resolves. Returns `(lo, hi)`. fn average_samples_64bpp(samples: &[(u32, u32)], source_format: u8) -> (u32, u32) { let n = samples.len() as u32; debug_assert!(n > 0); match source_format { // k_16_16_16_16 (5): four 16-bit channels across (lo, hi). Per- // channel rounded unsigned-int mean. Signed/unsigned variants // resolve identically because the resolve is a raw byte copy — // averaging signed values as unsigned still gives the correct // bits because two's-complement addition of `n` values divided // by `n` lands on the same bit pattern after truncation. 5 => average_4_u16(samples, n), // k_16_16_16_16_FLOAT (7): four half-floats. 7 => average_4_half_floats(samples, n), // k_32_32_FLOAT (15): two f32 (R32 = lo, G32 = hi). 15 => average_2_f32(samples, n), _ => samples[0], } } #[inline] fn average_8_8_8_8(samples: &[u32], n: u32) -> u32 { // Per-byte rounded unsigned mean. let mut sums = [0u32; 4]; for &s in samples { sums[0] += s & 0xFF; sums[1] += (s >> 8) & 0xFF; sums[2] += (s >> 16) & 0xFF; sums[3] += (s >> 24) & 0xFF; } let half = n / 2; let avg = |sum: u32| ((sum + half) / n) & 0xFF; avg(sums[0]) | (avg(sums[1]) << 8) | (avg(sums[2]) << 16) | (avg(sums[3]) << 24) } #[inline] fn average_2_10_10_10(samples: &[u32], n: u32) -> u32 { // Field widths 2/10/10/10 (low to high). let mut sum_a = 0u32; // 2 bits let mut sum_b = 0u32; // 10 bits let mut sum_g = 0u32; // 10 bits let mut sum_r = 0u32; // 10 bits for &s in samples { sum_a += s & 0x3; sum_b += (s >> 2) & 0x3FF; sum_g += (s >> 12) & 0x3FF; sum_r += (s >> 22) & 0x3FF; } let half = n / 2; let avg = |sum: u32, width: u32| ((sum + half) / n) & ((1u32 << width) - 1); avg(sum_a, 2) | (avg(sum_b, 10) << 2) | (avg(sum_g, 10) << 12) | (avg(sum_r, 10) << 22) } #[inline] fn half_to_f32(half: u16) -> f32 { let sign = ((half >> 15) & 0x1) as u32; let exp = ((half >> 10) & 0x1F) as i32; let mant = (half & 0x3FF) as u32; if exp == 0 { if mant == 0 { return f32::from_bits(sign << 31); } // Subnormal half → normalized f32. let mut e = -14; let mut m = mant; while (m & 0x400) == 0 { m <<= 1; e -= 1; } m &= 0x3FF; let f_exp = (e + 127) as u32; return f32::from_bits((sign << 31) | (f_exp << 23) | (m << 13)); } if exp == 31 { let f_exp = 0xFFu32; let f_mant = mant << 13; return f32::from_bits((sign << 31) | (f_exp << 23) | f_mant); } let f_exp = (exp - 15 + 127) as u32; f32::from_bits((sign << 31) | (f_exp << 23) | (mant << 13)) } #[inline] fn f32_to_half(f: f32) -> u16 { let bits = f.to_bits(); let sign = ((bits >> 31) & 0x1) as u16; let exp = ((bits >> 23) & 0xFF) as i32; let mant = bits & 0x7FFFFF; if exp == 0xFF { // Inf or NaN. let h_mant = if mant != 0 { 0x200 } else { 0 }; return (sign << 15) | (0x1F << 10) | h_mant; } if exp == 0 { return sign << 15; } let e = exp - 127 + 15; if e >= 31 { return (sign << 15) | (0x1F << 10); } if e <= 0 { // Subnormal half. Round-to-nearest-even is overkill; truncate // toward zero — averaging 4 floats then converting once is the // dominant precision path anyway. if e < -10 { return sign << 15; } let m = (mant | 0x800000) >> ((1 - e) as u32 + 13); return (sign << 15) | (m as u16); } let h_mant = (mant >> 13) as u16; (sign << 15) | ((e as u16) << 10) | h_mant } #[inline] fn average_2_half_floats(samples: &[u32], n: u32) -> u32 { // Each u32 = (lo: half, hi: half). Average as f32, re-encode. let mut sum_lo = 0.0f32; let mut sum_hi = 0.0f32; for &s in samples { sum_lo += half_to_f32((s & 0xFFFF) as u16); sum_hi += half_to_f32(((s >> 16) & 0xFFFF) as u16); } let inv = 1.0f32 / n as f32; let lo = f32_to_half(sum_lo * inv) as u32; let hi = f32_to_half(sum_hi * inv) as u32; lo | (hi << 16) } #[inline] fn average_1_f32(samples: &[u32], n: u32) -> u32 { let mut sum = 0.0f32; for &s in samples { sum += f32::from_bits(s); } (sum / n as f32).to_bits() } #[inline] fn average_4_u16(samples: &[(u32, u32)], n: u32) -> (u32, u32) { // (lo, hi) carry 4 × 16-bit channels. lo = (R, G), hi = (B, A) or similar // packing — averaging is per-16-bit-field regardless of channel mapping. let extract = |w: u32, shift: u32| (w >> shift) & 0xFFFF; let mut sums = [0u32; 4]; for &(lo, hi) in samples { sums[0] += extract(lo, 0); sums[1] += extract(lo, 16); sums[2] += extract(hi, 0); sums[3] += extract(hi, 16); } let half = n / 2; let avg = |sum: u32| ((sum + half) / n) & 0xFFFF; let lo = avg(sums[0]) | (avg(sums[1]) << 16); let hi = avg(sums[2]) | (avg(sums[3]) << 16); (lo, hi) } #[inline] fn average_4_half_floats(samples: &[(u32, u32)], n: u32) -> (u32, u32) { let mut sums = [0.0f32; 4]; for &(lo, hi) in samples { sums[0] += half_to_f32((lo & 0xFFFF) as u16); sums[1] += half_to_f32(((lo >> 16) & 0xFFFF) as u16); sums[2] += half_to_f32((hi & 0xFFFF) as u16); sums[3] += half_to_f32(((hi >> 16) & 0xFFFF) as u16); } let inv = 1.0f32 / n as f32; let h0 = f32_to_half(sums[0] * inv) as u32; let h1 = f32_to_half(sums[1] * inv) as u32; let h2 = f32_to_half(sums[2] * inv) as u32; let h3 = f32_to_half(sums[3] * inv) as u32; (h0 | (h1 << 16), h2 | (h3 << 16)) } #[inline] fn average_2_f32(samples: &[(u32, u32)], n: u32) -> (u32, u32) { let mut sum_lo = 0.0f32; let mut sum_hi = 0.0f32; for &(lo, hi) in samples { sum_lo += f32::from_bits(lo); sum_hi += f32::from_bits(hi); } let inv = 1.0f32 / n as f32; ((sum_lo * inv).to_bits(), (sum_hi * inv).to_bits()) } #[cfg(test)] mod tests { use super::*; use crate::draw_state::{ResolveCoordinates, ResolveInfo}; use crate::edram::ShadowEdram; use crate::render_target_cache::MsaaSamples; use crate::tiled_address::{align_pitch_to_macro_tile, tiled_2d_offset}; use xenia_memory::GuestMemory; /// Build a minimally-populated [`ResolveInfo`] for tests. fn minimal_info(dest_base: u32, pitch: u32, height: u32) -> ResolveInfo { ResolveInfo { copy_src_select: 0, copy_sample_select: 0, color_clear_enable: false, depth_clear_enable: false, copy_command: 0, dest_base, dest_pitch_pixels: pitch, dest_height_pixels: height, dest_format: color_format::K_8_8_8_8, dest_endian: 0, dest_exp_bias: 0, source: ResolveSource::Color(0), coords: ResolveCoordinates { x0: 0, y0: 0, width: pitch, height, sample_count_log2_x: 0, sample_count_log2_y: 0, }, source_format: 0, source_base_tiles: 0, surface_pitch_tiles: pitch.div_ceil(80), msaa: MsaaSamples::X1, source_is_64bpp: false, color_clear_value: 0, color_clear_value_lo: 0, depth_clear_value: 0, copy_dest_array: false, } } fn fresh_mem() -> GuestMemory { use xenia_memory::page_table::MemoryProtect; let mut mem = GuestMemory::new().expect("guest memory"); mem.alloc( 0x4000_0000, 0x0010_0000, MemoryProtect::READ | MemoryProtect::WRITE, ) .expect("alloc"); mem } #[test] fn endian_k_none_is_identity() { assert_eq!(apply_endian_128(0x11223344, 0), 0x11223344); } #[test] fn endian_k8in16_swaps_byte_pairs() { assert_eq!(apply_endian_128(0x11223344, 1), 0x22114433); } #[test] fn endian_k8in32_is_full_byte_reverse() { assert_eq!(apply_endian_128(0x11223344, 2), 0x44332211); } #[test] fn endian_k16in32_swaps_halves() { assert_eq!(apply_endian_128(0x11223344, 3), 0x33441122); } #[test] fn color_clear_resolve_writes_le_bytes_with_k8in32() { // Clear-resolve a 32x8 rectangle of k_8_8_8_8 samples to pattern // 0x11223344 with endian k8in32. Memory should contain LE bytes // [0x44, 0x33, 0x22, 0x11] at every tiled sample offset. let mut mem = fresh_mem(); let mut edram = ShadowEdram::new(); edram.fill_rect_32bpp(0, 1, 0, 0, 32, 8, 0x11223344); let mut info = minimal_info(0x4000_0000, 32, 8); info.dest_endian = 2; // k8in32 info.source_base_tiles = 0; info.surface_pitch_tiles = 1; let stats = copy_to_memory(&info, &edram, &mut mem); assert!(stats.supported); assert_eq!(stats.samples_written, 32 * 8); let pitch_aligned = align_pitch_to_macro_tile(32); for y in 0..8u32 { for x in 0..32u32 { let off = tiled_2d_offset(x, y, pitch_aligned, 2); let addr = 0x4000_0000u32.wrapping_add(off); let bytes = [ mem.read_u8(addr), mem.read_u8(addr.wrapping_add(1)), mem.read_u8(addr.wrapping_add(2)), mem.read_u8(addr.wrapping_add(3)), ]; assert_eq!( bytes, [0x44, 0x33, 0x22, 0x11], "mismatch at ({x}, {y})" ); } } } #[test] fn k_none_endian_keeps_big_endian_bytes() { let mut mem = fresh_mem(); let mut edram = ShadowEdram::new(); edram.fill_rect_32bpp(0, 1, 0, 0, 16, 8, 0xAABBCCDD); let mut info = minimal_info(0x4000_0000, 16, 8); info.dest_endian = 0; info.source_base_tiles = 0; info.surface_pitch_tiles = 1; let stats = copy_to_memory(&info, &edram, &mut mem); assert!(stats.supported); let pitch_aligned = align_pitch_to_macro_tile(16); let off = tiled_2d_offset(0, 0, pitch_aligned, 2); let addr = 0x4000_0000u32.wrapping_add(off); assert_eq!( [ mem.read_u8(addr), mem.read_u8(addr.wrapping_add(1)), mem.read_u8(addr.wrapping_add(2)), mem.read_u8(addr.wrapping_add(3)), ], [0xAA, 0xBB, 0xCC, 0xDD] ); } #[test] fn empty_rect_is_noop_and_no_page_version_bump() { let mut mem = fresh_mem(); let edram = ShadowEdram::new(); let before = mem.page_version(0x4000_0000); let mut info = minimal_info(0x4000_0000, 0, 0); info.coords.width = 0; info.coords.height = 0; let stats = copy_to_memory(&info, &edram, &mut mem); assert!(stats.supported); assert_eq!(stats.samples_written, 0); assert_eq!(mem.page_version(0x4000_0000), before); } #[test] fn unsupported_dest_format_is_graceful() { let mut mem = fresh_mem(); let edram = ShadowEdram::new(); let mut info = minimal_info(0x4000_0000, 16, 16); // k_16_16_16_16 is 64bpp — not bitwise-equivalent to any 32bpp dest. info.dest_format = 26; let stats = copy_to_memory(&info, &edram, &mut mem); assert!(!stats.supported); assert_eq!(stats.samples_written, 0); } #[test] fn resolve_bumps_page_version_for_texture_cache_invalidation() { let mut mem = fresh_mem(); let mut edram = ShadowEdram::new(); edram.fill_rect_32bpp(0, 1, 0, 0, 16, 8, 0xDEADBEEF); let before = mem.page_version(0x4000_0000); let mut info = minimal_info(0x4000_0000, 16, 8); info.source_base_tiles = 0; info.surface_pitch_tiles = 1; let stats = copy_to_memory(&info, &edram, &mut mem); assert!(stats.supported); assert!(mem.page_version(0x4000_0000) > before); } /// k_2_10_10_10 source ↔ k_2_10_10_10 dest is bitwise-equivalent per /// Canary `xenos.h:624-627`. Same path, just different format bytes. #[test] fn k_2_10_10_10_is_bitwise_equivalent() { assert!(is_32bpp_bitwise_equivalent( ResolveSource::Color(0), false, /* source */ 2, /* dest */ 7, )); assert!(is_32bpp_bitwise_equivalent( ResolveSource::Color(0), false, /* source k_2_10_10_10_AS_10_10_10_10 */ 10, /* dest k_2_10_10_10_AS_16_16_16_16 */ 54, )); } /// k_8_8_8_8_GAMMA source resolves identically to k_8_8_8_8 (gamma is /// applied at sample time, not on store). #[test] fn k_8_8_8_8_gamma_source_is_bitwise_equivalent() { assert!(is_32bpp_bitwise_equivalent( ResolveSource::Color(0), false, /* source k_8_8_8_8_GAMMA */ 1, /* dest k_8_8_8_8 */ 6, )); } /// Depth resolve: kD24S8 → k_24_8, kD24FS8 → k_24_8_FLOAT. #[test] fn depth_resolve_format_equivalence() { assert!(is_32bpp_bitwise_equivalent( ResolveSource::Depth, false, /* kD24S8 */ 0, /* k_24_8 */ 22, )); assert!(is_32bpp_bitwise_equivalent( ResolveSource::Depth, false, /* kD24FS8 */ 1, /* k_24_8_FLOAT */ 23, )); // Mismatched depth → texture format = not equivalent. assert!(!is_32bpp_bitwise_equivalent( ResolveSource::Depth, false, 0, 23, )); } /// 64bpp source is never equivalent to a 32bpp dest, even when the /// source/dest format numbers might look compatible. #[test] fn sixty_four_bpp_source_is_never_equivalent() { assert!(!is_32bpp_bitwise_equivalent( ResolveSource::Color(0), true, 5, // k_16_16_16_16 6, )); } /// 64bpp bitwise-equivalent pairs per Canary `xenos.h:614-639`. #[test] fn sixty_four_bpp_equivalence_pairs() { // k_16_16_16_16 (5) → k_16_16_16_16 (26) assert!(is_64bpp_bitwise_equivalent(5, 26)); // k_16_16_16_16_FLOAT (7) → k_16_16_16_16_FLOAT (32) assert!(is_64bpp_bitwise_equivalent(7, 32)); // k_32_32_FLOAT (15) → k_32_32_FLOAT (37) assert!(is_64bpp_bitwise_equivalent(15, 37)); // Cross-format must reject. assert!(!is_64bpp_bitwise_equivalent(5, 32)); assert!(!is_64bpp_bitwise_equivalent(0, 26)); } /// End-to-end 64bpp resolve: paint a `k_16_16_16_16` pattern into EDRAM /// and confirm `copy_to_memory` lands two u32s per pixel into guest mem. #[test] fn sixty_four_bpp_resolve_writes_two_words_per_pixel() { let mut mem = fresh_mem(); let mut edram = ShadowEdram::new(); // 16x4 logical 64bpp samples; pitch = 1 32bpp tile. edram.fill_rect_64bpp(0, 1, 0, 0, 16, 4, 0xAABB_CCDD, 0x1122_3344); let mut info = minimal_info(0x4000_0000, 16, 4); info.source = ResolveSource::Color(0); info.source_format = 5; // k_16_16_16_16 info.dest_format = color_format::K_16_16_16_16; info.source_is_64bpp = true; info.dest_endian = 0; // kNone info.source_base_tiles = 0; info.surface_pitch_tiles = 1; info.coords.width = 16; info.coords.height = 4; let stats = copy_to_memory(&info, &edram, &mut mem); assert!(stats.supported); assert_eq!(stats.samples_written, 16 * 4); // First pixel: lo word at dst_off, hi word at dst_off + 4. With // bpp_log2=3, pitch_aligned=32 (rounded from 16), tiled offset // for (0,0) is 0. let pitch_aligned = align_pitch_to_macro_tile(16); let off = tiled_2d_offset(0, 0, pitch_aligned, 3); let addr = 0x4000_0000u32.wrapping_add(off); // BE store of 0xAABBCCDD = bytes [0xAA, 0xBB, 0xCC, 0xDD] assert_eq!(mem.read_u8(addr), 0xAA); assert_eq!(mem.read_u8(addr.wrapping_add(1)), 0xBB); assert_eq!(mem.read_u8(addr.wrapping_add(2)), 0xCC); assert_eq!(mem.read_u8(addr.wrapping_add(3)), 0xDD); assert_eq!(mem.read_u8(addr.wrapping_add(4)), 0x11); assert_eq!(mem.read_u8(addr.wrapping_add(7)), 0x44); } /// MSAA averaging — `k_8_8_8_8` per-channel rounded mean of 4 samples. /// Build a 4x MSAA RT where the 4 samples per pixel hold (0, 64, 128, /// 192) in the red channel and check the resolve produces the rounded /// mean (96). #[test] fn msaa_4x_averaging_k_8_8_8_8() { let mut mem = fresh_mem(); let mut edram = ShadowEdram::new(); // 4x MSAA: each pixel occupies a 2×2 sample grid. // Pixel (0,0) sample positions (0..4) at sample-coords: // s0: (0, 0) // s1: (1, 0) // s2: (0, 1) // s3: (1, 1) // Stuff R=[0, 64, 128, 192], G=B=A=0. edram.write_sample_32bpp(0, 1, 0, 0, 0x00_00_00_00); // R=0 edram.write_sample_32bpp(0, 1, 1, 0, 0x00_00_00_40); // R=64 edram.write_sample_32bpp(0, 1, 0, 1, 0x00_00_00_80); // R=128 edram.write_sample_32bpp(0, 1, 1, 1, 0x00_00_00_C0); // R=192 let mut info = minimal_info(0x4000_0000, 1, 1); info.source = ResolveSource::Color(0); info.source_format = 0; // k_8_8_8_8 info.dest_format = color_format::K_8_8_8_8; info.copy_sample_select = 6; // K0123 info.msaa = MsaaSamples::X4; info.coords.sample_count_log2_x = 1; info.coords.sample_count_log2_y = 1; info.coords.width = 1; info.coords.height = 1; info.dest_endian = 0; info.source_base_tiles = 0; info.surface_pitch_tiles = 1; let stats = copy_to_memory(&info, &edram, &mut mem); assert!(stats.supported); assert_eq!(stats.samples_written, 1); // R = (0+64+128+192 + 2)/4 = 96 = 0x60. Big-endian store. let addr = 0x4000_0000u32; // The byte order in u32 is [byte0, byte1, byte2, byte3] where // byte0 = R. After BE store of pixel 0x000000_60 (R=0x60), the // bytes at the resolve-tile offset are [0x00, 0x00, 0x00, 0x60]. let bytes = [ mem.read_u8(addr), mem.read_u8(addr.wrapping_add(1)), mem.read_u8(addr.wrapping_add(2)), mem.read_u8(addr.wrapping_add(3)), ]; assert_eq!(bytes, [0x00, 0x00, 0x00, 0x60], "averaged R should be 0x60"); } /// MSAA averaging — `k_32_FLOAT` averages 4 f32 samples linearly. #[test] fn msaa_4x_averaging_k_32_float() { let mut mem = fresh_mem(); let mut edram = ShadowEdram::new(); let f = |v: f32| v.to_bits(); edram.write_sample_32bpp(0, 1, 0, 0, f(1.0)); edram.write_sample_32bpp(0, 1, 1, 0, f(2.0)); edram.write_sample_32bpp(0, 1, 0, 1, f(3.0)); edram.write_sample_32bpp(0, 1, 1, 1, f(4.0)); let mut info = minimal_info(0x4000_0000, 1, 1); info.source = ResolveSource::Color(0); info.source_format = 14; // k_32_FLOAT info.dest_format = color_format::K_32_FLOAT; info.copy_sample_select = 6; // K0123 info.msaa = MsaaSamples::X4; info.coords.sample_count_log2_x = 1; info.coords.sample_count_log2_y = 1; info.coords.width = 1; info.coords.height = 1; info.dest_endian = 2; // k8in32 — game-typical for float sampling info.source_base_tiles = 0; info.surface_pitch_tiles = 1; let stats = copy_to_memory(&info, &edram, &mut mem); assert!(stats.supported); // (1+2+3+4)/4 = 2.5 let expected = 2.5f32.to_bits(); // k8in32 swap = byte-reverse → BE store puts the LE-swapped bytes back // in original (big-endian) order. Reconstruct guest-visible u32: let bytes = [ mem.read_u8(0x4000_0000), mem.read_u8(0x4000_0001), mem.read_u8(0x4000_0002), mem.read_u8(0x4000_0003), ]; // After endian k8in32 (swap_bytes) and BE store, the bytes in memory // are LE-from-CPU-perspective. So bytes here are u32::to_le_bytes(expected). assert_eq!(bytes, expected.to_le_bytes()); } /// MSAA averaging — `k_2_10_10_10` per-field rounded mean. #[test] fn msaa_2x_averaging_k_2_10_10_10() { // 2x MSAA samples are stacked vertically (s0 at y=0, s1 at y=1). let mut mem = fresh_mem(); let mut edram = ShadowEdram::new(); // Field widths 2/10/10/10. Pack two values per field (a/b/g/r). let pack = |a: u32, b: u32, g: u32, r: u32| { (a & 0x3) | ((b & 0x3FF) << 2) | ((g & 0x3FF) << 12) | ((r & 0x3FF) << 22) }; edram.write_sample_32bpp(0, 1, 0, 0, pack(0, 100, 200, 300)); edram.write_sample_32bpp(0, 1, 0, 1, pack(2, 200, 300, 400)); let mut info = minimal_info(0x4000_0000, 1, 1); info.source = ResolveSource::Color(0); info.source_format = 2; // k_2_10_10_10 info.dest_format = color_format::K_2_10_10_10; info.copy_sample_select = 4; // K01 info.msaa = MsaaSamples::X2; info.coords.sample_count_log2_x = 0; info.coords.sample_count_log2_y = 1; info.coords.width = 1; info.coords.height = 1; info.dest_endian = 0; info.source_base_tiles = 0; info.surface_pitch_tiles = 1; let stats = copy_to_memory(&info, &edram, &mut mem); assert!(stats.supported); // Expected per-field: a=(0+2+1)/2=1, b=(100+200+1)/2=150, g=(200+300+1)/2=250, r=(300+400+1)/2=350 let expected = pack(1, 150, 250, 350); // Read back as BE u32 (big-endian byte ordering). let bytes = [ mem.read_u8(0x4000_0000), mem.read_u8(0x4000_0001), mem.read_u8(0x4000_0002), mem.read_u8(0x4000_0003), ]; assert_eq!(bytes, expected.to_be_bytes()); } /// End-to-end depth resolve: set up a depth RT at tile base 8, paint /// it via clear value, and verify the copy emerges in guest memory /// with the right bytes. #[test] fn depth_clear_resolve_end_to_end() { let mut mem = fresh_mem(); let mut edram = ShadowEdram::new(); // Paint the depth tiles directly with a known pattern. edram.fill_rect_32bpp(8, 1, 0, 0, 16, 8, 0x3FFF_FF00); let mut info = minimal_info(0x4000_0000, 16, 8); info.source = ResolveSource::Depth; info.source_format = 0; // kD24S8 info.dest_format = color_format::K_24_8; info.dest_endian = 2; // k8in32 info.source_base_tiles = 8; info.surface_pitch_tiles = 1; let stats = copy_to_memory(&info, &edram, &mut mem); assert!(stats.supported); assert_eq!(stats.samples_written, 16 * 8); // First pixel should be the endian-swapped pattern: BE-store of // 0x3FFF_FF00.swap_bytes() = 0x00FF_FF3F → bytes [0x00, 0xFF, 0xFF, 0x3F]. let pitch_aligned = align_pitch_to_macro_tile(16); let off = tiled_2d_offset(0, 0, pitch_aligned, 2); let addr = 0x4000_0000u32.wrapping_add(off); assert_eq!( [ mem.read_u8(addr), mem.read_u8(addr.wrapping_add(1)), mem.read_u8(addr.wrapping_add(2)), mem.read_u8(addr.wrapping_add(3)), ], [0x00, 0xFF, 0xFF, 0x3F] ); } /// `sanitize_sample_select` for 1x MSAA collapses every select to K0. #[test] fn sanitize_1x_msaa_collapses_to_k0() { for raw in 0..=7u8 { let s = sanitize_sample_select(raw, MsaaSamples::X1, false); assert_eq!(s, CopySampleSelect::K0, "raw={raw}"); } } /// 2x MSAA: k2→k0, k3→k1, k23→k01; depth averages sanitize to k0. #[test] fn sanitize_2x_msaa_obeys_canary_rules() { assert_eq!( sanitize_sample_select(2, MsaaSamples::X2, false), CopySampleSelect::K0 ); assert_eq!( sanitize_sample_select(3, MsaaSamples::X2, false), CopySampleSelect::K1 ); assert_eq!( sanitize_sample_select(5, MsaaSamples::X2, false), CopySampleSelect::K01 ); // Depth — no averaging. assert_eq!( sanitize_sample_select(4, MsaaSamples::X2, true), CopySampleSelect::K0 ); assert_eq!( sanitize_sample_select(6, MsaaSamples::X2, true), CopySampleSelect::K0 ); } /// 4x MSAA: single-samples untouched for color; depth averages /// collapse to a representative single sample (k0123 → k0). #[test] fn sanitize_4x_msaa_depth_collapses_averages() { assert_eq!( sanitize_sample_select(6, MsaaSamples::X4, true), CopySampleSelect::K0 ); assert_eq!( sanitize_sample_select(5, MsaaSamples::X4, true), CopySampleSelect::K2 ); assert_eq!( sanitize_sample_select(4, MsaaSamples::X4, true), CopySampleSelect::K0 ); // Color keeps averages. assert_eq!( sanitize_sample_select(6, MsaaSamples::X4, false), CopySampleSelect::K0123 ); } /// Sample offsets follow the standard Xbox 360 MSAA layout. #[test] fn sample_offset_layout() { // 1x assert_eq!(sample_offset_in_pixel(0, MsaaSamples::X1), (0, 0)); // 2x assert_eq!(sample_offset_in_pixel(0, MsaaSamples::X2), (0, 0)); assert_eq!(sample_offset_in_pixel(1, MsaaSamples::X2), (0, 1)); // 4x assert_eq!(sample_offset_in_pixel(0, MsaaSamples::X4), (0, 0)); assert_eq!(sample_offset_in_pixel(1, MsaaSamples::X4), (1, 0)); assert_eq!(sample_offset_in_pixel(2, MsaaSamples::X4), (0, 1)); assert_eq!(sample_offset_in_pixel(3, MsaaSamples::X4), (1, 1)); } }