First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).
Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.
Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1261 lines
46 KiB
Rust
1261 lines
46 KiB
Rust
//! EDRAM→guest-memory resolve byte copy.
|
||
//!
|
||
//! Fires from [`crate::gpu_system::GpuSystem::handle_event_initiator`] on
|
||
//! `TILE_FLUSH` (event 15). Reads samples out of the shadow EDRAM at the
|
||
//! source tile range, applies the `Endian128` byte swap, and writes tiled
|
||
//! u32 samples into guest memory via a 32bpp bitwise-equivalent fast path
|
||
//! (Canary `IsColorResolveFormatBitwiseEquivalent` — `xenos.h:614-639`).
|
||
//!
|
||
//! Ground truth: `xenia-canary/src/xenia/gpu/draw_util.cc:1102-1370` and
|
||
//! `xenos.h:1077-1114` (`GpuSwapInline`), `1039-1052` (`CopySampleSelect`).
|
||
//!
|
||
//! ## Endian ordering
|
||
//!
|
||
//! [`xenia_memory::access::MemoryAccess::write_u32`] stores big-endian
|
||
//! bytes (it calls `val.to_be_bytes()`). The Xenon CPU sees memory as big-
|
||
//! endian u32s, so `write_u32(addr, 0x11223344)` lands `[0x11, 0x22, 0x33,
|
||
//! 0x44]` in memory — which is the `kNone` (no swap) byte order from the
|
||
//! host's view of the sample.
|
||
//!
|
||
//! The resolve has an `Endian128` mode controlled by
|
||
//! `RB_COPY_DEST_INFO.copy_dest_endian`: games typically set `k8in32` so
|
||
//! that later texture fetches see little-endian bytes. We therefore
|
||
//! pre-swap the sample *before* `write_u32` so the big-endian store yields
|
||
//! the desired byte order in memory.
|
||
|
||
use crate::draw_state::{ResolveInfo, ResolveSource};
|
||
use crate::edram::ShadowEdram;
|
||
use crate::render_target_cache::MsaaSamples;
|
||
use crate::tiled_address::{align_pitch_to_macro_tile, tiled_2d_offset};
|
||
|
||
use xenia_memory::access::MemoryAccess;
|
||
|
||
/// Stats returned from one resolve copy. Aggregated by the caller into
|
||
/// `GpuStats` counters so the HUD can surface them.
|
||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||
pub struct ResolveCopyStats {
|
||
/// Number of 32bpp samples actually written to guest memory.
|
||
pub samples_written: u32,
|
||
/// Was the format path supported? `false` means we skipped.
|
||
pub supported: bool,
|
||
}
|
||
|
||
/// `xenos::CopyCommand::kNull` = 3 — resolve emits no copy (clear-only).
|
||
pub const COPY_COMMAND_NULL: u8 = 3;
|
||
|
||
/// Sanitized sample selector (`xenos::CopySampleSelect`, `xenos.h:1039`).
|
||
/// We keep the *raw* enum value in `ResolveInfo` and pass a sanitized one
|
||
/// here so callers can match on the effective mode rather than re-applying
|
||
/// the MSAA/depth sanitation rules from Canary `draw_util.cc:839-876`.
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
pub enum CopySampleSelect {
|
||
K0 = 0,
|
||
K1 = 1,
|
||
K2 = 2,
|
||
K3 = 3,
|
||
K01 = 4,
|
||
K23 = 5,
|
||
K0123 = 6,
|
||
}
|
||
|
||
impl CopySampleSelect {
|
||
pub fn from_raw(raw: u8) -> Self {
|
||
match raw & 0x7 {
|
||
1 => Self::K1,
|
||
2 => Self::K2,
|
||
3 => Self::K3,
|
||
4 => Self::K01,
|
||
5 => Self::K23,
|
||
6 | 7 => Self::K0123,
|
||
_ => Self::K0,
|
||
}
|
||
}
|
||
|
||
/// Single-sample picks return `Some(index 0..=3)`; averaging picks
|
||
/// return `None` (caller must synthesize via per-sample reads).
|
||
pub fn single_sample_index(self) -> Option<u8> {
|
||
match self {
|
||
Self::K0 => Some(0),
|
||
Self::K1 => Some(1),
|
||
Self::K2 => Some(2),
|
||
Self::K3 => Some(3),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
/// `IsSingleCopySampleSelected` from `xenos.h:1049`.
|
||
pub fn is_single_sample(self) -> bool {
|
||
self.single_sample_index().is_some()
|
||
}
|
||
}
|
||
|
||
/// `SanitizeCopySampleSelect` (Canary `draw_util.cc:839-876`). MSAA
|
||
/// modes + depth limit which sample selectors are valid; invalid ones
|
||
/// are silently remapped. Returning the sanitized enum lets the resolve
|
||
/// loop assume a single-sample pick for 1x MSAA, etc.
|
||
pub fn sanitize_sample_select(
|
||
raw: u8,
|
||
msaa: MsaaSamples,
|
||
is_depth: bool,
|
||
) -> CopySampleSelect {
|
||
let select = CopySampleSelect::from_raw(raw);
|
||
match msaa {
|
||
MsaaSamples::X1 => {
|
||
// Only sample 0 exists. Averaging modes → k0; >k0123 clamp.
|
||
match select {
|
||
CopySampleSelect::K0 => CopySampleSelect::K0,
|
||
_ => CopySampleSelect::K0,
|
||
}
|
||
}
|
||
MsaaSamples::X2 => {
|
||
// Samples 0 and 1 exist (stacked vertically). k2 → k0, k3 → k1;
|
||
// k23 → k01. Depth cannot average.
|
||
match select {
|
||
CopySampleSelect::K0 => CopySampleSelect::K0,
|
||
CopySampleSelect::K1 => CopySampleSelect::K1,
|
||
CopySampleSelect::K2 => CopySampleSelect::K0,
|
||
CopySampleSelect::K3 => CopySampleSelect::K1,
|
||
CopySampleSelect::K01 | CopySampleSelect::K23 | CopySampleSelect::K0123 => {
|
||
if is_depth {
|
||
CopySampleSelect::K0
|
||
} else {
|
||
CopySampleSelect::K01
|
||
}
|
||
}
|
||
}
|
||
}
|
||
MsaaSamples::X4 => {
|
||
// All single-samples valid. Depth cannot average → pick
|
||
// representative single sample (k01→k0, k23→k2, k0123→k0).
|
||
if is_depth {
|
||
match select {
|
||
CopySampleSelect::K01 => CopySampleSelect::K0,
|
||
CopySampleSelect::K23 => CopySampleSelect::K2,
|
||
CopySampleSelect::K0123 => CopySampleSelect::K0,
|
||
other => other,
|
||
}
|
||
} else {
|
||
select
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Sample-index to in-pixel (dx, dy) offset for the current MSAA mode.
|
||
/// Matches the standard Xbox 360 MSAA sample layout (Canary
|
||
/// `texture_util::GetMsaaSampleLocation` / the shader constants). For 1x,
|
||
/// always `(0, 0)`.
|
||
///
|
||
/// * 2x MSAA: sample 0 = top line, sample 1 = bottom line.
|
||
/// * 4x MSAA: 2×2 grid `{(0,0),(1,0),(0,1),(1,1)}`.
|
||
#[inline]
|
||
fn sample_offset_in_pixel(sample_idx: u8, msaa: MsaaSamples) -> (u32, u32) {
|
||
match msaa {
|
||
MsaaSamples::X1 => (0, 0),
|
||
MsaaSamples::X2 => (0, (sample_idx & 1) as u32),
|
||
MsaaSamples::X4 => ((sample_idx & 1) as u32, ((sample_idx >> 1) & 1) as u32),
|
||
}
|
||
}
|
||
|
||
/// Apply the `Endian128` byte swap to one 32-bit sample. Matches the cases
|
||
/// inside `GpuSwapInline` plus the 64/128-bit variants from
|
||
/// `xenos::Endian128`. The 64/128 modes cannot be expressed in a single u32
|
||
/// so they fall through to `k8in32` and log at the call site.
|
||
#[inline]
|
||
pub fn apply_endian_128(value: u32, endian: u8) -> u32 {
|
||
match endian {
|
||
0 => value,
|
||
// k8in16: swap bytes within each 16-bit word.
|
||
1 => ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8),
|
||
// k8in32: full byte reversal.
|
||
2 => value.swap_bytes(),
|
||
// k16in32: swap 16-bit halves.
|
||
3 => value.rotate_left(16),
|
||
// k8in64 / k8in128: require cross-dword context. Approximate with
|
||
// k8in32 (byte-reverse each dword) so the bytes land in a sensible
|
||
// order; the caller logs the approximation.
|
||
4 | 5 => value.swap_bytes(),
|
||
_ => value,
|
||
}
|
||
}
|
||
|
||
/// `xenos::ColorFormat` values we use as destination formats for 32bpp
|
||
/// resolves. Canary `xenos.h:582-609`.
|
||
mod color_format {
|
||
pub const K_8_8_8_8: u8 = 6;
|
||
pub const K_2_10_10_10: u8 = 7;
|
||
pub const K_8_8_8_8_A: u8 = 14;
|
||
pub const K_16_16_FLOAT: u8 = 31;
|
||
pub const K_32_FLOAT: u8 = 36;
|
||
pub const K_8_8_8_8_AS_16_16_16_16: u8 = 50;
|
||
pub const K_2_10_10_10_AS_16_16_16_16: u8 = 54;
|
||
// ── 64bpp dest formats (Canary `xenos.h:582-609`) ──────────────────
|
||
/// `k_16_16_16_16` (4 channels × 16 bits, signed/unsigned variants
|
||
/// resolve identically — same bit layout).
|
||
pub const K_16_16_16_16: u8 = 26;
|
||
/// `k_16_16_16_16_FLOAT` (4 channels × half-float).
|
||
pub const K_16_16_16_16_FLOAT: u8 = 32;
|
||
/// `k_32_32_FLOAT` (R32 + G32, 64bpp). `xenos::TextureFormat = 37`.
|
||
pub const K_32_32_FLOAT: u8 = 37;
|
||
/// Depth textures (Canary `xenos::TextureFormat`).
|
||
pub const K_24_8: u8 = 22;
|
||
pub const K_24_8_FLOAT: u8 = 23;
|
||
}
|
||
|
||
/// 32-bit bitwise-equivalence check covering 32bpp color and depth resolves.
|
||
/// Color side mirrors `xenos::IsColorResolveFormatBitwiseEquivalent`
|
||
/// (`xenos.h:614-639`). Depth side maps `DepthRenderTargetFormat` to
|
||
/// its textural form (`kD24S8 → k_24_8`, `kD24FS8 → k_24_8_FLOAT`).
|
||
pub fn is_32bpp_bitwise_equivalent(
|
||
source: ResolveSource,
|
||
source_is_64bpp: bool,
|
||
source_format: u8,
|
||
dest_format: u8,
|
||
) -> bool {
|
||
if source_is_64bpp {
|
||
return false;
|
||
}
|
||
match source {
|
||
ResolveSource::Color(_) => {
|
||
use color_format as cf;
|
||
match source_format {
|
||
// k_8_8_8_8 (0) and k_8_8_8_8_GAMMA (1). Gamma decode is
|
||
// applied by the sampler at texture-fetch time (TextureSign::
|
||
// kGamma); the bits are identical, so the copy path is the
|
||
// same.
|
||
0 | 1 => matches!(
|
||
dest_format,
|
||
cf::K_8_8_8_8 | cf::K_8_8_8_8_A | cf::K_8_8_8_8_AS_16_16_16_16
|
||
),
|
||
// k_2_10_10_10 (2) and k_2_10_10_10_AS_10_10_10_10 (10).
|
||
2 | 10 => matches!(
|
||
dest_format,
|
||
cf::K_2_10_10_10 | cf::K_2_10_10_10_AS_16_16_16_16
|
||
),
|
||
// k_16_16_FLOAT (6).
|
||
6 => dest_format == cf::K_16_16_FLOAT,
|
||
// k_32_FLOAT (14).
|
||
14 => dest_format == cf::K_32_FLOAT,
|
||
_ => false,
|
||
}
|
||
}
|
||
ResolveSource::Depth => match source_format {
|
||
// kD24S8 (0) → k_24_8 (22).
|
||
0 => dest_format == color_format::K_24_8,
|
||
// kD24FS8 (1) → k_24_8_FLOAT (23).
|
||
1 => dest_format == color_format::K_24_8_FLOAT,
|
||
_ => false,
|
||
},
|
||
}
|
||
}
|
||
|
||
/// 64-bit bitwise-equivalence check (Canary `xenos.h:614-639` 64bpp arms).
|
||
/// Used when `info.source_is_64bpp == true`. Only color resolves go here —
|
||
/// depth is always 32bpp.
|
||
pub fn is_64bpp_bitwise_equivalent(source_format: u8, dest_format: u8) -> bool {
|
||
use color_format as cf;
|
||
match source_format {
|
||
// k_16_16_16_16 (5) — signed and unsigned variants resolve to the
|
||
// same bits because the resolve is a raw byte copy.
|
||
5 => dest_format == cf::K_16_16_16_16,
|
||
// k_16_16_16_16_FLOAT (7).
|
||
7 => dest_format == cf::K_16_16_16_16_FLOAT,
|
||
// k_32_32_FLOAT (15).
|
||
15 => dest_format == cf::K_32_32_FLOAT,
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
/// Run one resolve copy. Returns the number of samples successfully
|
||
/// written and whether the dest format was supported; the caller updates
|
||
/// `GpuStats::resolves_copied_total` / `resolves_skipped_total` accordingly.
|
||
pub fn copy_to_memory(
|
||
info: &ResolveInfo,
|
||
edram: &ShadowEdram,
|
||
mem: &dyn MemoryAccess,
|
||
) -> ResolveCopyStats {
|
||
// --- No-op paths (not a failure) ---
|
||
if info.coords.width == 0 || info.coords.height == 0 {
|
||
return ResolveCopyStats {
|
||
samples_written: 0,
|
||
supported: true,
|
||
};
|
||
}
|
||
if info.copy_command == COPY_COMMAND_NULL {
|
||
return ResolveCopyStats {
|
||
samples_written: 0,
|
||
supported: true,
|
||
};
|
||
}
|
||
|
||
// --- Supported-shape gates ---
|
||
if info.copy_dest_array {
|
||
tracing::warn!(
|
||
src = info.copy_src_select,
|
||
fmt = info.dest_format,
|
||
"gpu: resolve skipped — copy_dest_array (3D/stacked) not implemented"
|
||
);
|
||
return ResolveCopyStats::default();
|
||
}
|
||
if info.dest_exp_bias != 0 {
|
||
tracing::warn!(
|
||
bias = info.dest_exp_bias,
|
||
"gpu: resolve skipped — dest_exp_bias != 0 not implemented"
|
||
);
|
||
return ResolveCopyStats::default();
|
||
}
|
||
let supported = if info.source_is_64bpp {
|
||
// 64bpp color resolve. Depth is always 32bpp so this only fires
|
||
// for `ResolveSource::Color(_)`.
|
||
matches!(info.source, ResolveSource::Color(_))
|
||
&& is_64bpp_bitwise_equivalent(info.source_format, info.dest_format)
|
||
} else {
|
||
is_32bpp_bitwise_equivalent(
|
||
info.source,
|
||
info.source_is_64bpp,
|
||
info.source_format,
|
||
info.dest_format,
|
||
)
|
||
};
|
||
if !supported {
|
||
tracing::warn!(
|
||
source = ?info.source,
|
||
source_format = info.source_format,
|
||
source_is_64bpp = info.source_is_64bpp,
|
||
dest_format = info.dest_format,
|
||
"gpu: resolve skipped — not a bitwise-equivalent pair"
|
||
);
|
||
return ResolveCopyStats::default();
|
||
}
|
||
|
||
if info.dest_endian >= 4 {
|
||
tracing::warn!(
|
||
endian = info.dest_endian,
|
||
"gpu: resolve endian k8in64/k8in128 approximated as k8in32"
|
||
);
|
||
}
|
||
|
||
// Destination pitch must be aligned to 32 texels per
|
||
// `kStoragePitchHeightAlignmentBlocks`. `align_pitch_to_macro_tile`
|
||
// rounds to 32 (it's `MACRO_TILE_WIDTH_LOG2 = 5`).
|
||
let pitch_aligned = align_pitch_to_macro_tile(info.dest_pitch_pixels);
|
||
if pitch_aligned == 0 {
|
||
return ResolveCopyStats {
|
||
samples_written: 0,
|
||
supported: true,
|
||
};
|
||
}
|
||
// bpp_log2: 2 for 32bpp, 3 for 64bpp. Drives the `tiled_2d_offset`
|
||
// stride calculation per Canary `texture_address.h:120-180`.
|
||
let bpp_log2: u32 = if info.source_is_64bpp { 3 } else { 2 };
|
||
|
||
let is_depth = matches!(info.source, ResolveSource::Depth);
|
||
let sanitized = sanitize_sample_select(info.copy_sample_select, info.msaa, is_depth);
|
||
// For averaging modes we'd previously fall back to sample 0 + warn.
|
||
// 3A wires real averaging via `read_pixel_averaged`; single-sample
|
||
// picks still take the fast path.
|
||
let single_sample_idx = sanitized.single_sample_index();
|
||
|
||
let mut samples_written: u32 = 0;
|
||
for dy in 0..info.coords.height {
|
||
let pixel_y = info.coords.y0 + dy;
|
||
for dx in 0..info.coords.width {
|
||
let pixel_x = info.coords.x0 + dx;
|
||
// Destination coordinates are 0-based against `dest_base` — the
|
||
// base already points at the top-left of the copy rectangle.
|
||
let dst_off = tiled_2d_offset(dx, dy, pitch_aligned, bpp_log2);
|
||
let dst_addr = info.dest_base.wrapping_add(dst_off);
|
||
|
||
if info.source_is_64bpp {
|
||
let (lo, hi) = match single_sample_idx {
|
||
Some(idx) => {
|
||
let (sx, sy) = sample_xy(pixel_x, pixel_y, idx, info.msaa, &info.coords);
|
||
edram.read_sample_64bpp(
|
||
info.source_base_tiles,
|
||
info.surface_pitch_tiles,
|
||
sx,
|
||
sy,
|
||
)
|
||
}
|
||
None => read_pixel_averaged_64bpp(edram, info, sanitized, pixel_x, pixel_y),
|
||
};
|
||
let lo_swapped = apply_endian_128(lo, info.dest_endian);
|
||
let hi_swapped = apply_endian_128(hi, info.dest_endian);
|
||
mem.write_u32(dst_addr, lo_swapped);
|
||
mem.write_u32(dst_addr.wrapping_add(4), hi_swapped);
|
||
samples_written += 1;
|
||
} else {
|
||
let sample = match single_sample_idx {
|
||
Some(idx) => {
|
||
let (sx, sy) = sample_xy(pixel_x, pixel_y, idx, info.msaa, &info.coords);
|
||
edram.read_sample_32bpp(
|
||
info.source_base_tiles,
|
||
info.surface_pitch_tiles,
|
||
sx,
|
||
sy,
|
||
)
|
||
}
|
||
None => read_pixel_averaged_32bpp(
|
||
edram,
|
||
info,
|
||
sanitized,
|
||
pixel_x,
|
||
pixel_y,
|
||
),
|
||
};
|
||
let swapped = apply_endian_128(sample, info.dest_endian);
|
||
mem.write_u32(dst_addr, swapped);
|
||
samples_written += 1;
|
||
}
|
||
}
|
||
}
|
||
|
||
ResolveCopyStats {
|
||
samples_written,
|
||
supported: true,
|
||
}
|
||
}
|
||
|
||
/// Compute the EDRAM sample-space (x, y) for `(pixel_x, pixel_y)` and a
|
||
/// single MSAA sample index.
|
||
#[inline]
|
||
fn sample_xy(
|
||
pixel_x: u32,
|
||
pixel_y: u32,
|
||
sample_idx: u8,
|
||
msaa: MsaaSamples,
|
||
coords: &crate::draw_state::ResolveCoordinates,
|
||
) -> (u32, u32) {
|
||
let (sample_dx, sample_dy) = sample_offset_in_pixel(sample_idx, msaa);
|
||
let sx = (pixel_x << coords.sample_count_log2_x) + sample_dx;
|
||
let sy = (pixel_y << coords.sample_count_log2_y) + sample_dy;
|
||
(sx, sy)
|
||
}
|
||
|
||
/// Sample indices selected by an averaging `CopySampleSelect`.
|
||
/// `K01 → [0, 1]`, `K23 → [2, 3]`, `K0123 → [0, 1, 2, 3]`. Single-sample
|
||
/// picks should never reach this helper (caller checks `single_sample_index`).
|
||
fn averaging_sample_set(select: CopySampleSelect) -> &'static [u8] {
|
||
match select {
|
||
CopySampleSelect::K01 => &[0, 1],
|
||
CopySampleSelect::K23 => &[2, 3],
|
||
CopySampleSelect::K0123 => &[0, 1, 2, 3],
|
||
// Single-sample picks: caller must never invoke this — fall back
|
||
// to sample 0 just to keep the function total.
|
||
_ => &[0],
|
||
}
|
||
}
|
||
|
||
/// Average N samples of a 32bpp pixel format. Each sample is read, decoded
|
||
/// by `source_format`, averaged in the appropriate numeric space, then
|
||
/// re-encoded back into the same 32bpp word. Mirrors Canary's resolve
|
||
/// shader paths in `resolve.xesli:595-629` (per-format averaging) — we
|
||
/// implement them on the CPU because the resolve runs on the host.
|
||
fn read_pixel_averaged_32bpp(
|
||
edram: &ShadowEdram,
|
||
info: &ResolveInfo,
|
||
select: CopySampleSelect,
|
||
pixel_x: u32,
|
||
pixel_y: u32,
|
||
) -> u32 {
|
||
let indices = averaging_sample_set(select);
|
||
let n = indices.len() as u32;
|
||
if n == 0 {
|
||
return 0;
|
||
}
|
||
// Pull every selected sample.
|
||
let mut raw = [0u32; 4];
|
||
for (i, &idx) in indices.iter().enumerate() {
|
||
let (sx, sy) = sample_xy(pixel_x, pixel_y, idx, info.msaa, &info.coords);
|
||
raw[i] = edram.read_sample_32bpp(info.source_base_tiles, info.surface_pitch_tiles, sx, sy);
|
||
}
|
||
let raw_slice = &raw[..indices.len()];
|
||
average_samples_32bpp(raw_slice, info.source_format, info.source)
|
||
}
|
||
|
||
/// Average N samples of a 64bpp pixel format, returning `(lo, hi)`.
|
||
fn read_pixel_averaged_64bpp(
|
||
edram: &ShadowEdram,
|
||
info: &ResolveInfo,
|
||
select: CopySampleSelect,
|
||
pixel_x: u32,
|
||
pixel_y: u32,
|
||
) -> (u32, u32) {
|
||
let indices = averaging_sample_set(select);
|
||
let n = indices.len();
|
||
if n == 0 {
|
||
return (0, 0);
|
||
}
|
||
let mut raw = [(0u32, 0u32); 4];
|
||
for (i, &idx) in indices.iter().enumerate() {
|
||
let (sx, sy) = sample_xy(pixel_x, pixel_y, idx, info.msaa, &info.coords);
|
||
raw[i] = edram.read_sample_64bpp(info.source_base_tiles, info.surface_pitch_tiles, sx, sy);
|
||
}
|
||
let raw_slice = &raw[..n];
|
||
average_samples_64bpp(raw_slice, info.source_format)
|
||
}
|
||
|
||
/// Per-format averaging for 32bpp color/depth resolves.
|
||
fn average_samples_32bpp(samples: &[u32], source_format: u8, source: ResolveSource) -> u32 {
|
||
let n = samples.len() as u32;
|
||
debug_assert!(n > 0);
|
||
match source {
|
||
ResolveSource::Color(_) => match source_format {
|
||
// k_8_8_8_8 / k_8_8_8_8_GAMMA (0/1): per-channel rounded
|
||
// unsigned-int mean. Matches Canary's `resolve.xesli` per-component
|
||
// average for u8 — gamma is a sampler-time post-decode, the
|
||
// bits are identical for resolve purposes.
|
||
0 | 1 => average_8_8_8_8(samples, n),
|
||
// k_2_10_10_10 / k_2_10_10_10_AS_10_10_10_10: per-field rounded
|
||
// unsigned-int mean. Field widths 2/10/10/10 from low to high.
|
||
2 | 10 => average_2_10_10_10(samples, n),
|
||
// k_16_16_FLOAT (6): two half-floats packed in one u32.
|
||
6 => average_2_half_floats(samples, n),
|
||
// k_32_FLOAT (14): one f32 per sample.
|
||
14 => average_1_f32(samples, n),
|
||
// For any unsupported format, fall back to first sample —
|
||
// upstream gating already filtered to bitwise-equivalent pairs
|
||
// so this branch should be unreachable in practice.
|
||
_ => samples[0],
|
||
},
|
||
// Depth resolves never carry MSAA averaging (sanitize collapses to
|
||
// single-sample); reaching this branch is a degenerate caller.
|
||
ResolveSource::Depth => samples[0],
|
||
}
|
||
}
|
||
|
||
/// Per-format averaging for 64bpp color resolves. Returns `(lo, hi)`.
|
||
fn average_samples_64bpp(samples: &[(u32, u32)], source_format: u8) -> (u32, u32) {
|
||
let n = samples.len() as u32;
|
||
debug_assert!(n > 0);
|
||
match source_format {
|
||
// k_16_16_16_16 (5): four 16-bit channels across (lo, hi). Per-
|
||
// channel rounded unsigned-int mean. Signed/unsigned variants
|
||
// resolve identically because the resolve is a raw byte copy —
|
||
// averaging signed values as unsigned still gives the correct
|
||
// bits because two's-complement addition of `n` values divided
|
||
// by `n` lands on the same bit pattern after truncation.
|
||
5 => average_4_u16(samples, n),
|
||
// k_16_16_16_16_FLOAT (7): four half-floats.
|
||
7 => average_4_half_floats(samples, n),
|
||
// k_32_32_FLOAT (15): two f32 (R32 = lo, G32 = hi).
|
||
15 => average_2_f32(samples, n),
|
||
_ => samples[0],
|
||
}
|
||
}
|
||
|
||
#[inline]
|
||
fn average_8_8_8_8(samples: &[u32], n: u32) -> u32 {
|
||
// Per-byte rounded unsigned mean.
|
||
let mut sums = [0u32; 4];
|
||
for &s in samples {
|
||
sums[0] += s & 0xFF;
|
||
sums[1] += (s >> 8) & 0xFF;
|
||
sums[2] += (s >> 16) & 0xFF;
|
||
sums[3] += (s >> 24) & 0xFF;
|
||
}
|
||
let half = n / 2;
|
||
let avg = |sum: u32| ((sum + half) / n) & 0xFF;
|
||
avg(sums[0])
|
||
| (avg(sums[1]) << 8)
|
||
| (avg(sums[2]) << 16)
|
||
| (avg(sums[3]) << 24)
|
||
}
|
||
|
||
#[inline]
|
||
fn average_2_10_10_10(samples: &[u32], n: u32) -> u32 {
|
||
// Field widths 2/10/10/10 (low to high).
|
||
let mut sum_a = 0u32; // 2 bits
|
||
let mut sum_b = 0u32; // 10 bits
|
||
let mut sum_g = 0u32; // 10 bits
|
||
let mut sum_r = 0u32; // 10 bits
|
||
for &s in samples {
|
||
sum_a += s & 0x3;
|
||
sum_b += (s >> 2) & 0x3FF;
|
||
sum_g += (s >> 12) & 0x3FF;
|
||
sum_r += (s >> 22) & 0x3FF;
|
||
}
|
||
let half = n / 2;
|
||
let avg = |sum: u32, width: u32| ((sum + half) / n) & ((1u32 << width) - 1);
|
||
avg(sum_a, 2) | (avg(sum_b, 10) << 2) | (avg(sum_g, 10) << 12) | (avg(sum_r, 10) << 22)
|
||
}
|
||
|
||
#[inline]
|
||
fn half_to_f32(half: u16) -> f32 {
|
||
let sign = ((half >> 15) & 0x1) as u32;
|
||
let exp = ((half >> 10) & 0x1F) as i32;
|
||
let mant = (half & 0x3FF) as u32;
|
||
if exp == 0 {
|
||
if mant == 0 {
|
||
return f32::from_bits(sign << 31);
|
||
}
|
||
// Subnormal half → normalized f32.
|
||
let mut e = -14;
|
||
let mut m = mant;
|
||
while (m & 0x400) == 0 {
|
||
m <<= 1;
|
||
e -= 1;
|
||
}
|
||
m &= 0x3FF;
|
||
let f_exp = (e + 127) as u32;
|
||
return f32::from_bits((sign << 31) | (f_exp << 23) | (m << 13));
|
||
}
|
||
if exp == 31 {
|
||
let f_exp = 0xFFu32;
|
||
let f_mant = mant << 13;
|
||
return f32::from_bits((sign << 31) | (f_exp << 23) | f_mant);
|
||
}
|
||
let f_exp = (exp - 15 + 127) as u32;
|
||
f32::from_bits((sign << 31) | (f_exp << 23) | (mant << 13))
|
||
}
|
||
|
||
#[inline]
|
||
fn f32_to_half(f: f32) -> u16 {
|
||
let bits = f.to_bits();
|
||
let sign = ((bits >> 31) & 0x1) as u16;
|
||
let exp = ((bits >> 23) & 0xFF) as i32;
|
||
let mant = bits & 0x7FFFFF;
|
||
if exp == 0xFF {
|
||
// Inf or NaN.
|
||
let h_mant = if mant != 0 { 0x200 } else { 0 };
|
||
return (sign << 15) | (0x1F << 10) | h_mant;
|
||
}
|
||
if exp == 0 {
|
||
return sign << 15;
|
||
}
|
||
let e = exp - 127 + 15;
|
||
if e >= 31 {
|
||
return (sign << 15) | (0x1F << 10);
|
||
}
|
||
if e <= 0 {
|
||
// Subnormal half. Round-to-nearest-even is overkill; truncate
|
||
// toward zero — averaging 4 floats then converting once is the
|
||
// dominant precision path anyway.
|
||
if e < -10 {
|
||
return sign << 15;
|
||
}
|
||
let m = (mant | 0x800000) >> ((1 - e) as u32 + 13);
|
||
return (sign << 15) | (m as u16);
|
||
}
|
||
let h_mant = (mant >> 13) as u16;
|
||
(sign << 15) | ((e as u16) << 10) | h_mant
|
||
}
|
||
|
||
#[inline]
|
||
fn average_2_half_floats(samples: &[u32], n: u32) -> u32 {
|
||
// Each u32 = (lo: half, hi: half). Average as f32, re-encode.
|
||
let mut sum_lo = 0.0f32;
|
||
let mut sum_hi = 0.0f32;
|
||
for &s in samples {
|
||
sum_lo += half_to_f32((s & 0xFFFF) as u16);
|
||
sum_hi += half_to_f32(((s >> 16) & 0xFFFF) as u16);
|
||
}
|
||
let inv = 1.0f32 / n as f32;
|
||
let lo = f32_to_half(sum_lo * inv) as u32;
|
||
let hi = f32_to_half(sum_hi * inv) as u32;
|
||
lo | (hi << 16)
|
||
}
|
||
|
||
#[inline]
|
||
fn average_1_f32(samples: &[u32], n: u32) -> u32 {
|
||
let mut sum = 0.0f32;
|
||
for &s in samples {
|
||
sum += f32::from_bits(s);
|
||
}
|
||
(sum / n as f32).to_bits()
|
||
}
|
||
|
||
#[inline]
|
||
fn average_4_u16(samples: &[(u32, u32)], n: u32) -> (u32, u32) {
|
||
// (lo, hi) carry 4 × 16-bit channels. lo = (R, G), hi = (B, A) or similar
|
||
// packing — averaging is per-16-bit-field regardless of channel mapping.
|
||
let extract = |w: u32, shift: u32| (w >> shift) & 0xFFFF;
|
||
let mut sums = [0u32; 4];
|
||
for &(lo, hi) in samples {
|
||
sums[0] += extract(lo, 0);
|
||
sums[1] += extract(lo, 16);
|
||
sums[2] += extract(hi, 0);
|
||
sums[3] += extract(hi, 16);
|
||
}
|
||
let half = n / 2;
|
||
let avg = |sum: u32| ((sum + half) / n) & 0xFFFF;
|
||
let lo = avg(sums[0]) | (avg(sums[1]) << 16);
|
||
let hi = avg(sums[2]) | (avg(sums[3]) << 16);
|
||
(lo, hi)
|
||
}
|
||
|
||
#[inline]
|
||
fn average_4_half_floats(samples: &[(u32, u32)], n: u32) -> (u32, u32) {
|
||
let mut sums = [0.0f32; 4];
|
||
for &(lo, hi) in samples {
|
||
sums[0] += half_to_f32((lo & 0xFFFF) as u16);
|
||
sums[1] += half_to_f32(((lo >> 16) & 0xFFFF) as u16);
|
||
sums[2] += half_to_f32((hi & 0xFFFF) as u16);
|
||
sums[3] += half_to_f32(((hi >> 16) & 0xFFFF) as u16);
|
||
}
|
||
let inv = 1.0f32 / n as f32;
|
||
let h0 = f32_to_half(sums[0] * inv) as u32;
|
||
let h1 = f32_to_half(sums[1] * inv) as u32;
|
||
let h2 = f32_to_half(sums[2] * inv) as u32;
|
||
let h3 = f32_to_half(sums[3] * inv) as u32;
|
||
(h0 | (h1 << 16), h2 | (h3 << 16))
|
||
}
|
||
|
||
#[inline]
|
||
fn average_2_f32(samples: &[(u32, u32)], n: u32) -> (u32, u32) {
|
||
let mut sum_lo = 0.0f32;
|
||
let mut sum_hi = 0.0f32;
|
||
for &(lo, hi) in samples {
|
||
sum_lo += f32::from_bits(lo);
|
||
sum_hi += f32::from_bits(hi);
|
||
}
|
||
let inv = 1.0f32 / n as f32;
|
||
((sum_lo * inv).to_bits(), (sum_hi * inv).to_bits())
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use crate::draw_state::{ResolveCoordinates, ResolveInfo};
|
||
use crate::edram::ShadowEdram;
|
||
use crate::render_target_cache::MsaaSamples;
|
||
use crate::tiled_address::{align_pitch_to_macro_tile, tiled_2d_offset};
|
||
use xenia_memory::GuestMemory;
|
||
|
||
/// Build a minimally-populated [`ResolveInfo`] for tests.
|
||
fn minimal_info(dest_base: u32, pitch: u32, height: u32) -> ResolveInfo {
|
||
ResolveInfo {
|
||
copy_src_select: 0,
|
||
copy_sample_select: 0,
|
||
color_clear_enable: false,
|
||
depth_clear_enable: false,
|
||
copy_command: 0,
|
||
dest_base,
|
||
dest_pitch_pixels: pitch,
|
||
dest_height_pixels: height,
|
||
dest_format: color_format::K_8_8_8_8,
|
||
dest_endian: 0,
|
||
dest_exp_bias: 0,
|
||
source: ResolveSource::Color(0),
|
||
coords: ResolveCoordinates {
|
||
x0: 0,
|
||
y0: 0,
|
||
width: pitch,
|
||
height,
|
||
sample_count_log2_x: 0,
|
||
sample_count_log2_y: 0,
|
||
},
|
||
source_format: 0,
|
||
source_base_tiles: 0,
|
||
surface_pitch_tiles: pitch.div_ceil(80),
|
||
msaa: MsaaSamples::X1,
|
||
source_is_64bpp: false,
|
||
color_clear_value: 0,
|
||
color_clear_value_lo: 0,
|
||
depth_clear_value: 0,
|
||
copy_dest_array: false,
|
||
}
|
||
}
|
||
|
||
fn fresh_mem() -> GuestMemory {
|
||
use xenia_memory::page_table::MemoryProtect;
|
||
let mut mem = GuestMemory::new().expect("guest memory");
|
||
mem.alloc(
|
||
0x4000_0000,
|
||
0x0010_0000,
|
||
MemoryProtect::READ | MemoryProtect::WRITE,
|
||
)
|
||
.expect("alloc");
|
||
mem
|
||
}
|
||
|
||
#[test]
|
||
fn endian_k_none_is_identity() {
|
||
assert_eq!(apply_endian_128(0x11223344, 0), 0x11223344);
|
||
}
|
||
|
||
#[test]
|
||
fn endian_k8in16_swaps_byte_pairs() {
|
||
assert_eq!(apply_endian_128(0x11223344, 1), 0x22114433);
|
||
}
|
||
|
||
#[test]
|
||
fn endian_k8in32_is_full_byte_reverse() {
|
||
assert_eq!(apply_endian_128(0x11223344, 2), 0x44332211);
|
||
}
|
||
|
||
#[test]
|
||
fn endian_k16in32_swaps_halves() {
|
||
assert_eq!(apply_endian_128(0x11223344, 3), 0x33441122);
|
||
}
|
||
|
||
#[test]
|
||
fn color_clear_resolve_writes_le_bytes_with_k8in32() {
|
||
// Clear-resolve a 32x8 rectangle of k_8_8_8_8 samples to pattern
|
||
// 0x11223344 with endian k8in32. Memory should contain LE bytes
|
||
// [0x44, 0x33, 0x22, 0x11] at every tiled sample offset.
|
||
let mut mem = fresh_mem();
|
||
let mut edram = ShadowEdram::new();
|
||
edram.fill_rect_32bpp(0, 1, 0, 0, 32, 8, 0x11223344);
|
||
|
||
let mut info = minimal_info(0x4000_0000, 32, 8);
|
||
info.dest_endian = 2; // k8in32
|
||
info.source_base_tiles = 0;
|
||
info.surface_pitch_tiles = 1;
|
||
|
||
let stats = copy_to_memory(&info, &edram, &mut mem);
|
||
assert!(stats.supported);
|
||
assert_eq!(stats.samples_written, 32 * 8);
|
||
|
||
let pitch_aligned = align_pitch_to_macro_tile(32);
|
||
for y in 0..8u32 {
|
||
for x in 0..32u32 {
|
||
let off = tiled_2d_offset(x, y, pitch_aligned, 2);
|
||
let addr = 0x4000_0000u32.wrapping_add(off);
|
||
let bytes = [
|
||
mem.read_u8(addr),
|
||
mem.read_u8(addr.wrapping_add(1)),
|
||
mem.read_u8(addr.wrapping_add(2)),
|
||
mem.read_u8(addr.wrapping_add(3)),
|
||
];
|
||
assert_eq!(
|
||
bytes,
|
||
[0x44, 0x33, 0x22, 0x11],
|
||
"mismatch at ({x}, {y})"
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn k_none_endian_keeps_big_endian_bytes() {
|
||
let mut mem = fresh_mem();
|
||
let mut edram = ShadowEdram::new();
|
||
edram.fill_rect_32bpp(0, 1, 0, 0, 16, 8, 0xAABBCCDD);
|
||
|
||
let mut info = minimal_info(0x4000_0000, 16, 8);
|
||
info.dest_endian = 0;
|
||
info.source_base_tiles = 0;
|
||
info.surface_pitch_tiles = 1;
|
||
|
||
let stats = copy_to_memory(&info, &edram, &mut mem);
|
||
assert!(stats.supported);
|
||
|
||
let pitch_aligned = align_pitch_to_macro_tile(16);
|
||
let off = tiled_2d_offset(0, 0, pitch_aligned, 2);
|
||
let addr = 0x4000_0000u32.wrapping_add(off);
|
||
assert_eq!(
|
||
[
|
||
mem.read_u8(addr),
|
||
mem.read_u8(addr.wrapping_add(1)),
|
||
mem.read_u8(addr.wrapping_add(2)),
|
||
mem.read_u8(addr.wrapping_add(3)),
|
||
],
|
||
[0xAA, 0xBB, 0xCC, 0xDD]
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn empty_rect_is_noop_and_no_page_version_bump() {
|
||
let mut mem = fresh_mem();
|
||
let edram = ShadowEdram::new();
|
||
let before = mem.page_version(0x4000_0000);
|
||
|
||
let mut info = minimal_info(0x4000_0000, 0, 0);
|
||
info.coords.width = 0;
|
||
info.coords.height = 0;
|
||
let stats = copy_to_memory(&info, &edram, &mut mem);
|
||
assert!(stats.supported);
|
||
assert_eq!(stats.samples_written, 0);
|
||
assert_eq!(mem.page_version(0x4000_0000), before);
|
||
}
|
||
|
||
#[test]
|
||
fn unsupported_dest_format_is_graceful() {
|
||
let mut mem = fresh_mem();
|
||
let edram = ShadowEdram::new();
|
||
let mut info = minimal_info(0x4000_0000, 16, 16);
|
||
// k_16_16_16_16 is 64bpp — not bitwise-equivalent to any 32bpp dest.
|
||
info.dest_format = 26;
|
||
let stats = copy_to_memory(&info, &edram, &mut mem);
|
||
assert!(!stats.supported);
|
||
assert_eq!(stats.samples_written, 0);
|
||
}
|
||
|
||
#[test]
|
||
fn resolve_bumps_page_version_for_texture_cache_invalidation() {
|
||
let mut mem = fresh_mem();
|
||
let mut edram = ShadowEdram::new();
|
||
edram.fill_rect_32bpp(0, 1, 0, 0, 16, 8, 0xDEADBEEF);
|
||
|
||
let before = mem.page_version(0x4000_0000);
|
||
let mut info = minimal_info(0x4000_0000, 16, 8);
|
||
info.source_base_tiles = 0;
|
||
info.surface_pitch_tiles = 1;
|
||
let stats = copy_to_memory(&info, &edram, &mut mem);
|
||
assert!(stats.supported);
|
||
assert!(mem.page_version(0x4000_0000) > before);
|
||
}
|
||
|
||
/// k_2_10_10_10 source ↔ k_2_10_10_10 dest is bitwise-equivalent per
|
||
/// Canary `xenos.h:624-627`. Same path, just different format bytes.
|
||
#[test]
|
||
fn k_2_10_10_10_is_bitwise_equivalent() {
|
||
assert!(is_32bpp_bitwise_equivalent(
|
||
ResolveSource::Color(0), false, /* source */ 2, /* dest */ 7,
|
||
));
|
||
assert!(is_32bpp_bitwise_equivalent(
|
||
ResolveSource::Color(0),
|
||
false,
|
||
/* source k_2_10_10_10_AS_10_10_10_10 */ 10,
|
||
/* dest k_2_10_10_10_AS_16_16_16_16 */ 54,
|
||
));
|
||
}
|
||
|
||
/// k_8_8_8_8_GAMMA source resolves identically to k_8_8_8_8 (gamma is
|
||
/// applied at sample time, not on store).
|
||
#[test]
|
||
fn k_8_8_8_8_gamma_source_is_bitwise_equivalent() {
|
||
assert!(is_32bpp_bitwise_equivalent(
|
||
ResolveSource::Color(0),
|
||
false,
|
||
/* source k_8_8_8_8_GAMMA */ 1,
|
||
/* dest k_8_8_8_8 */ 6,
|
||
));
|
||
}
|
||
|
||
/// Depth resolve: kD24S8 → k_24_8, kD24FS8 → k_24_8_FLOAT.
|
||
#[test]
|
||
fn depth_resolve_format_equivalence() {
|
||
assert!(is_32bpp_bitwise_equivalent(
|
||
ResolveSource::Depth,
|
||
false,
|
||
/* kD24S8 */ 0,
|
||
/* k_24_8 */ 22,
|
||
));
|
||
assert!(is_32bpp_bitwise_equivalent(
|
||
ResolveSource::Depth,
|
||
false,
|
||
/* kD24FS8 */ 1,
|
||
/* k_24_8_FLOAT */ 23,
|
||
));
|
||
// Mismatched depth → texture format = not equivalent.
|
||
assert!(!is_32bpp_bitwise_equivalent(
|
||
ResolveSource::Depth,
|
||
false,
|
||
0,
|
||
23,
|
||
));
|
||
}
|
||
|
||
/// 64bpp source is never equivalent to a 32bpp dest, even when the
|
||
/// source/dest format numbers might look compatible.
|
||
#[test]
|
||
fn sixty_four_bpp_source_is_never_equivalent() {
|
||
assert!(!is_32bpp_bitwise_equivalent(
|
||
ResolveSource::Color(0),
|
||
true,
|
||
5, // k_16_16_16_16
|
||
6,
|
||
));
|
||
}
|
||
|
||
/// 64bpp bitwise-equivalent pairs per Canary `xenos.h:614-639`.
|
||
#[test]
|
||
fn sixty_four_bpp_equivalence_pairs() {
|
||
// k_16_16_16_16 (5) → k_16_16_16_16 (26)
|
||
assert!(is_64bpp_bitwise_equivalent(5, 26));
|
||
// k_16_16_16_16_FLOAT (7) → k_16_16_16_16_FLOAT (32)
|
||
assert!(is_64bpp_bitwise_equivalent(7, 32));
|
||
// k_32_32_FLOAT (15) → k_32_32_FLOAT (37)
|
||
assert!(is_64bpp_bitwise_equivalent(15, 37));
|
||
// Cross-format must reject.
|
||
assert!(!is_64bpp_bitwise_equivalent(5, 32));
|
||
assert!(!is_64bpp_bitwise_equivalent(0, 26));
|
||
}
|
||
|
||
/// End-to-end 64bpp resolve: paint a `k_16_16_16_16` pattern into EDRAM
|
||
/// and confirm `copy_to_memory` lands two u32s per pixel into guest mem.
|
||
#[test]
|
||
fn sixty_four_bpp_resolve_writes_two_words_per_pixel() {
|
||
let mut mem = fresh_mem();
|
||
let mut edram = ShadowEdram::new();
|
||
// 16x4 logical 64bpp samples; pitch = 1 32bpp tile.
|
||
edram.fill_rect_64bpp(0, 1, 0, 0, 16, 4, 0xAABB_CCDD, 0x1122_3344);
|
||
|
||
let mut info = minimal_info(0x4000_0000, 16, 4);
|
||
info.source = ResolveSource::Color(0);
|
||
info.source_format = 5; // k_16_16_16_16
|
||
info.dest_format = color_format::K_16_16_16_16;
|
||
info.source_is_64bpp = true;
|
||
info.dest_endian = 0; // kNone
|
||
info.source_base_tiles = 0;
|
||
info.surface_pitch_tiles = 1;
|
||
info.coords.width = 16;
|
||
info.coords.height = 4;
|
||
|
||
let stats = copy_to_memory(&info, &edram, &mut mem);
|
||
assert!(stats.supported);
|
||
assert_eq!(stats.samples_written, 16 * 4);
|
||
|
||
// First pixel: lo word at dst_off, hi word at dst_off + 4. With
|
||
// bpp_log2=3, pitch_aligned=32 (rounded from 16), tiled offset
|
||
// for (0,0) is 0.
|
||
let pitch_aligned = align_pitch_to_macro_tile(16);
|
||
let off = tiled_2d_offset(0, 0, pitch_aligned, 3);
|
||
let addr = 0x4000_0000u32.wrapping_add(off);
|
||
// BE store of 0xAABBCCDD = bytes [0xAA, 0xBB, 0xCC, 0xDD]
|
||
assert_eq!(mem.read_u8(addr), 0xAA);
|
||
assert_eq!(mem.read_u8(addr.wrapping_add(1)), 0xBB);
|
||
assert_eq!(mem.read_u8(addr.wrapping_add(2)), 0xCC);
|
||
assert_eq!(mem.read_u8(addr.wrapping_add(3)), 0xDD);
|
||
assert_eq!(mem.read_u8(addr.wrapping_add(4)), 0x11);
|
||
assert_eq!(mem.read_u8(addr.wrapping_add(7)), 0x44);
|
||
}
|
||
|
||
/// MSAA averaging — `k_8_8_8_8` per-channel rounded mean of 4 samples.
|
||
/// Build a 4x MSAA RT where the 4 samples per pixel hold (0, 64, 128,
|
||
/// 192) in the red channel and check the resolve produces the rounded
|
||
/// mean (96).
|
||
#[test]
|
||
fn msaa_4x_averaging_k_8_8_8_8() {
|
||
let mut mem = fresh_mem();
|
||
let mut edram = ShadowEdram::new();
|
||
// 4x MSAA: each pixel occupies a 2×2 sample grid.
|
||
// Pixel (0,0) sample positions (0..4) at sample-coords:
|
||
// s0: (0, 0)
|
||
// s1: (1, 0)
|
||
// s2: (0, 1)
|
||
// s3: (1, 1)
|
||
// Stuff R=[0, 64, 128, 192], G=B=A=0.
|
||
edram.write_sample_32bpp(0, 1, 0, 0, 0x00_00_00_00); // R=0
|
||
edram.write_sample_32bpp(0, 1, 1, 0, 0x00_00_00_40); // R=64
|
||
edram.write_sample_32bpp(0, 1, 0, 1, 0x00_00_00_80); // R=128
|
||
edram.write_sample_32bpp(0, 1, 1, 1, 0x00_00_00_C0); // R=192
|
||
|
||
let mut info = minimal_info(0x4000_0000, 1, 1);
|
||
info.source = ResolveSource::Color(0);
|
||
info.source_format = 0; // k_8_8_8_8
|
||
info.dest_format = color_format::K_8_8_8_8;
|
||
info.copy_sample_select = 6; // K0123
|
||
info.msaa = MsaaSamples::X4;
|
||
info.coords.sample_count_log2_x = 1;
|
||
info.coords.sample_count_log2_y = 1;
|
||
info.coords.width = 1;
|
||
info.coords.height = 1;
|
||
info.dest_endian = 0;
|
||
info.source_base_tiles = 0;
|
||
info.surface_pitch_tiles = 1;
|
||
|
||
let stats = copy_to_memory(&info, &edram, &mut mem);
|
||
assert!(stats.supported);
|
||
assert_eq!(stats.samples_written, 1);
|
||
// R = (0+64+128+192 + 2)/4 = 96 = 0x60. Big-endian store.
|
||
let addr = 0x4000_0000u32;
|
||
// The byte order in u32 is [byte0, byte1, byte2, byte3] where
|
||
// byte0 = R. After BE store of pixel 0x000000_60 (R=0x60), the
|
||
// bytes at the resolve-tile offset are [0x00, 0x00, 0x00, 0x60].
|
||
let bytes = [
|
||
mem.read_u8(addr),
|
||
mem.read_u8(addr.wrapping_add(1)),
|
||
mem.read_u8(addr.wrapping_add(2)),
|
||
mem.read_u8(addr.wrapping_add(3)),
|
||
];
|
||
assert_eq!(bytes, [0x00, 0x00, 0x00, 0x60], "averaged R should be 0x60");
|
||
}
|
||
|
||
/// MSAA averaging — `k_32_FLOAT` averages 4 f32 samples linearly.
|
||
#[test]
|
||
fn msaa_4x_averaging_k_32_float() {
|
||
let mut mem = fresh_mem();
|
||
let mut edram = ShadowEdram::new();
|
||
let f = |v: f32| v.to_bits();
|
||
edram.write_sample_32bpp(0, 1, 0, 0, f(1.0));
|
||
edram.write_sample_32bpp(0, 1, 1, 0, f(2.0));
|
||
edram.write_sample_32bpp(0, 1, 0, 1, f(3.0));
|
||
edram.write_sample_32bpp(0, 1, 1, 1, f(4.0));
|
||
|
||
let mut info = minimal_info(0x4000_0000, 1, 1);
|
||
info.source = ResolveSource::Color(0);
|
||
info.source_format = 14; // k_32_FLOAT
|
||
info.dest_format = color_format::K_32_FLOAT;
|
||
info.copy_sample_select = 6; // K0123
|
||
info.msaa = MsaaSamples::X4;
|
||
info.coords.sample_count_log2_x = 1;
|
||
info.coords.sample_count_log2_y = 1;
|
||
info.coords.width = 1;
|
||
info.coords.height = 1;
|
||
info.dest_endian = 2; // k8in32 — game-typical for float sampling
|
||
info.source_base_tiles = 0;
|
||
info.surface_pitch_tiles = 1;
|
||
|
||
let stats = copy_to_memory(&info, &edram, &mut mem);
|
||
assert!(stats.supported);
|
||
// (1+2+3+4)/4 = 2.5
|
||
let expected = 2.5f32.to_bits();
|
||
// k8in32 swap = byte-reverse → BE store puts the LE-swapped bytes back
|
||
// in original (big-endian) order. Reconstruct guest-visible u32:
|
||
let bytes = [
|
||
mem.read_u8(0x4000_0000),
|
||
mem.read_u8(0x4000_0001),
|
||
mem.read_u8(0x4000_0002),
|
||
mem.read_u8(0x4000_0003),
|
||
];
|
||
// After endian k8in32 (swap_bytes) and BE store, the bytes in memory
|
||
// are LE-from-CPU-perspective. So bytes here are u32::to_le_bytes(expected).
|
||
assert_eq!(bytes, expected.to_le_bytes());
|
||
}
|
||
|
||
/// MSAA averaging — `k_2_10_10_10` per-field rounded mean.
|
||
#[test]
|
||
fn msaa_2x_averaging_k_2_10_10_10() {
|
||
// 2x MSAA samples are stacked vertically (s0 at y=0, s1 at y=1).
|
||
let mut mem = fresh_mem();
|
||
let mut edram = ShadowEdram::new();
|
||
// Field widths 2/10/10/10. Pack two values per field (a/b/g/r).
|
||
let pack = |a: u32, b: u32, g: u32, r: u32| {
|
||
(a & 0x3) | ((b & 0x3FF) << 2) | ((g & 0x3FF) << 12) | ((r & 0x3FF) << 22)
|
||
};
|
||
edram.write_sample_32bpp(0, 1, 0, 0, pack(0, 100, 200, 300));
|
||
edram.write_sample_32bpp(0, 1, 0, 1, pack(2, 200, 300, 400));
|
||
|
||
let mut info = minimal_info(0x4000_0000, 1, 1);
|
||
info.source = ResolveSource::Color(0);
|
||
info.source_format = 2; // k_2_10_10_10
|
||
info.dest_format = color_format::K_2_10_10_10;
|
||
info.copy_sample_select = 4; // K01
|
||
info.msaa = MsaaSamples::X2;
|
||
info.coords.sample_count_log2_x = 0;
|
||
info.coords.sample_count_log2_y = 1;
|
||
info.coords.width = 1;
|
||
info.coords.height = 1;
|
||
info.dest_endian = 0;
|
||
info.source_base_tiles = 0;
|
||
info.surface_pitch_tiles = 1;
|
||
|
||
let stats = copy_to_memory(&info, &edram, &mut mem);
|
||
assert!(stats.supported);
|
||
// Expected per-field: a=(0+2+1)/2=1, b=(100+200+1)/2=150, g=(200+300+1)/2=250, r=(300+400+1)/2=350
|
||
let expected = pack(1, 150, 250, 350);
|
||
// Read back as BE u32 (big-endian byte ordering).
|
||
let bytes = [
|
||
mem.read_u8(0x4000_0000),
|
||
mem.read_u8(0x4000_0001),
|
||
mem.read_u8(0x4000_0002),
|
||
mem.read_u8(0x4000_0003),
|
||
];
|
||
assert_eq!(bytes, expected.to_be_bytes());
|
||
}
|
||
|
||
/// End-to-end depth resolve: set up a depth RT at tile base 8, paint
|
||
/// it via clear value, and verify the copy emerges in guest memory
|
||
/// with the right bytes.
|
||
#[test]
|
||
fn depth_clear_resolve_end_to_end() {
|
||
let mut mem = fresh_mem();
|
||
let mut edram = ShadowEdram::new();
|
||
// Paint the depth tiles directly with a known pattern.
|
||
edram.fill_rect_32bpp(8, 1, 0, 0, 16, 8, 0x3FFF_FF00);
|
||
|
||
let mut info = minimal_info(0x4000_0000, 16, 8);
|
||
info.source = ResolveSource::Depth;
|
||
info.source_format = 0; // kD24S8
|
||
info.dest_format = color_format::K_24_8;
|
||
info.dest_endian = 2; // k8in32
|
||
info.source_base_tiles = 8;
|
||
info.surface_pitch_tiles = 1;
|
||
|
||
let stats = copy_to_memory(&info, &edram, &mut mem);
|
||
assert!(stats.supported);
|
||
assert_eq!(stats.samples_written, 16 * 8);
|
||
|
||
// First pixel should be the endian-swapped pattern: BE-store of
|
||
// 0x3FFF_FF00.swap_bytes() = 0x00FF_FF3F → bytes [0x00, 0xFF, 0xFF, 0x3F].
|
||
let pitch_aligned = align_pitch_to_macro_tile(16);
|
||
let off = tiled_2d_offset(0, 0, pitch_aligned, 2);
|
||
let addr = 0x4000_0000u32.wrapping_add(off);
|
||
assert_eq!(
|
||
[
|
||
mem.read_u8(addr),
|
||
mem.read_u8(addr.wrapping_add(1)),
|
||
mem.read_u8(addr.wrapping_add(2)),
|
||
mem.read_u8(addr.wrapping_add(3)),
|
||
],
|
||
[0x00, 0xFF, 0xFF, 0x3F]
|
||
);
|
||
}
|
||
|
||
/// `sanitize_sample_select` for 1x MSAA collapses every select to K0.
|
||
#[test]
|
||
fn sanitize_1x_msaa_collapses_to_k0() {
|
||
for raw in 0..=7u8 {
|
||
let s = sanitize_sample_select(raw, MsaaSamples::X1, false);
|
||
assert_eq!(s, CopySampleSelect::K0, "raw={raw}");
|
||
}
|
||
}
|
||
|
||
/// 2x MSAA: k2→k0, k3→k1, k23→k01; depth averages sanitize to k0.
|
||
#[test]
|
||
fn sanitize_2x_msaa_obeys_canary_rules() {
|
||
assert_eq!(
|
||
sanitize_sample_select(2, MsaaSamples::X2, false),
|
||
CopySampleSelect::K0
|
||
);
|
||
assert_eq!(
|
||
sanitize_sample_select(3, MsaaSamples::X2, false),
|
||
CopySampleSelect::K1
|
||
);
|
||
assert_eq!(
|
||
sanitize_sample_select(5, MsaaSamples::X2, false),
|
||
CopySampleSelect::K01
|
||
);
|
||
// Depth — no averaging.
|
||
assert_eq!(
|
||
sanitize_sample_select(4, MsaaSamples::X2, true),
|
||
CopySampleSelect::K0
|
||
);
|
||
assert_eq!(
|
||
sanitize_sample_select(6, MsaaSamples::X2, true),
|
||
CopySampleSelect::K0
|
||
);
|
||
}
|
||
|
||
/// 4x MSAA: single-samples untouched for color; depth averages
|
||
/// collapse to a representative single sample (k0123 → k0).
|
||
#[test]
|
||
fn sanitize_4x_msaa_depth_collapses_averages() {
|
||
assert_eq!(
|
||
sanitize_sample_select(6, MsaaSamples::X4, true),
|
||
CopySampleSelect::K0
|
||
);
|
||
assert_eq!(
|
||
sanitize_sample_select(5, MsaaSamples::X4, true),
|
||
CopySampleSelect::K2
|
||
);
|
||
assert_eq!(
|
||
sanitize_sample_select(4, MsaaSamples::X4, true),
|
||
CopySampleSelect::K0
|
||
);
|
||
// Color keeps averages.
|
||
assert_eq!(
|
||
sanitize_sample_select(6, MsaaSamples::X4, false),
|
||
CopySampleSelect::K0123
|
||
);
|
||
}
|
||
|
||
/// Sample offsets follow the standard Xbox 360 MSAA layout.
|
||
#[test]
|
||
fn sample_offset_layout() {
|
||
// 1x
|
||
assert_eq!(sample_offset_in_pixel(0, MsaaSamples::X1), (0, 0));
|
||
// 2x
|
||
assert_eq!(sample_offset_in_pixel(0, MsaaSamples::X2), (0, 0));
|
||
assert_eq!(sample_offset_in_pixel(1, MsaaSamples::X2), (0, 1));
|
||
// 4x
|
||
assert_eq!(sample_offset_in_pixel(0, MsaaSamples::X4), (0, 0));
|
||
assert_eq!(sample_offset_in_pixel(1, MsaaSamples::X4), (1, 0));
|
||
assert_eq!(sample_offset_in_pixel(2, MsaaSamples::X4), (0, 1));
|
||
assert_eq!(sample_offset_in_pixel(3, MsaaSamples::X4), (1, 1));
|
||
}
|
||
}
|