First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).
Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.
Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1114 lines
43 KiB
Rust
1114 lines
43 KiB
Rust
//! Extract draw state from the Xenos register file at `PM4_DRAW_INDX` time.
|
||
//!
|
||
//! This is the "what are we drawing?" snapshot: primitive type, vertex count,
|
||
//! index buffer (if any), viewport, scissor, blend, depth state, and enough
|
||
//! handles for a future translator / uber-shader to pull fetch constants +
|
||
//! shader blobs. Ground truth: `xenia-canary/src/xenia/gpu/draw_util.h` and
|
||
//! the PM4 handler at `pm4_command_processor_implement.h:1128-1151`.
|
||
//!
|
||
//! We only extract what the P3 uber-shader actually consumes; the rest is
|
||
//! reserved for later phases.
|
||
|
||
use crate::register_file::RegisterFile;
|
||
|
||
/// Primitive type (Xenos `PrimitiveType` enum from `xenos.h`).
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
pub enum PrimitiveType {
|
||
None,
|
||
PointList,
|
||
LineList,
|
||
LineStrip,
|
||
TriangleList,
|
||
TriangleFan,
|
||
TriangleStrip,
|
||
RectangleList,
|
||
QuadList,
|
||
Unknown(u8),
|
||
}
|
||
|
||
impl PrimitiveType {
|
||
pub fn from_bits(b: u32) -> Self {
|
||
match b & 0x3F {
|
||
0 => PrimitiveType::None,
|
||
1 => PrimitiveType::PointList,
|
||
2 => PrimitiveType::LineList,
|
||
3 => PrimitiveType::LineStrip,
|
||
4 => PrimitiveType::TriangleList,
|
||
5 => PrimitiveType::TriangleFan,
|
||
6 => PrimitiveType::TriangleStrip,
|
||
8 => PrimitiveType::RectangleList,
|
||
13 => PrimitiveType::QuadList,
|
||
other => PrimitiveType::Unknown(other as u8),
|
||
}
|
||
}
|
||
}
|
||
|
||
/// How the draw was issued per `VGT_DRAW_INITIATOR.source_select`:
|
||
/// 0=DMA, 1=Immediate (in-packet indices), 2=AutoIndex.
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
pub enum IndexSource {
|
||
/// Index buffer fetched from `VGT_DMA_BASE` / `VGT_DMA_SIZE`.
|
||
Dma {
|
||
base_address: u32,
|
||
size_dwords: u32,
|
||
index_size: IndexSize,
|
||
},
|
||
/// Indices follow the `DRAW_INDX_2` packet header inline.
|
||
Immediate { index_size: IndexSize },
|
||
/// No index buffer; generate `0..vertex_count - 1` on the host.
|
||
AutoIndex,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
pub enum IndexSize {
|
||
/// 16-bit indices.
|
||
Sixteen,
|
||
/// 32-bit indices.
|
||
ThirtyTwo,
|
||
}
|
||
|
||
/// Snapshot of one draw call's state, sampled from the register file.
|
||
#[derive(Debug, Clone, Copy)]
|
||
pub struct DrawState {
|
||
pub primitive: PrimitiveType,
|
||
pub vertex_count: u32,
|
||
pub index_source: IndexSource,
|
||
pub viewport: Viewport,
|
||
pub scissor: Scissor,
|
||
/// RB_COLOR_INFO for each of the 4 possible color render targets; `None`
|
||
/// where the target is not bound.
|
||
pub color_info: [Option<ColorTargetInfo>; 4],
|
||
pub depth_info: Option<DepthTargetInfo>,
|
||
pub rb_modecontrol: u32,
|
||
pub rb_colorcontrol: u32,
|
||
pub rb_depthcontrol: u32,
|
||
/// P4: per-color-target blend state. Index matches `color_info`.
|
||
pub rb_blendcontrol: [u32; 4],
|
||
/// P4: stencil state.
|
||
pub rb_stencilrefmask: u32,
|
||
pub rb_stencilrefmask_bf: u32,
|
||
/// P4: pixel offset applied at rasterization.
|
||
pub pa_sc_window_offset: u32,
|
||
/// P4: resolve destination registers (`RB_COPY_*`). These are set by
|
||
/// the guest just before triggering a TILE_FLUSH event and describe
|
||
/// where an EDRAM→texture copy should land.
|
||
pub rb_copy_control: u32,
|
||
pub rb_copy_dest_base: u32,
|
||
pub rb_copy_dest_pitch: u32,
|
||
pub rb_copy_dest_info: u32,
|
||
/// Key of the VS blob that was active at draw time (from
|
||
/// `GpuSystem::active_vs_key`). `None` = no VS loaded yet; the draw is
|
||
/// meaningless and will be rejected by the dispatcher.
|
||
pub vs_blob_key: Option<u32>,
|
||
/// Key of the PS blob that was active at draw time.
|
||
pub ps_blob_key: Option<u32>,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Copy, Default)]
|
||
pub struct Viewport {
|
||
pub scale_x: f32,
|
||
pub scale_y: f32,
|
||
pub scale_z: f32,
|
||
pub offset_x: f32,
|
||
pub offset_y: f32,
|
||
pub offset_z: f32,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Copy, Default)]
|
||
pub struct Scissor {
|
||
pub tl_x: u16,
|
||
pub tl_y: u16,
|
||
pub br_x: u16,
|
||
pub br_y: u16,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Copy)]
|
||
pub struct ColorTargetInfo {
|
||
/// EDRAM tile base for this color target (`RB_COLOR_INFO.base_tiles`).
|
||
pub base_tiles: u16,
|
||
/// Color format (`RB_COLOR_INFO.color_format`).
|
||
pub format: u8,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Copy)]
|
||
pub struct DepthTargetInfo {
|
||
/// EDRAM tile base for depth/stencil.
|
||
pub base_tiles: u16,
|
||
/// 0=D24S8, 1=D24FS8 (per `xenos.h:404-408`).
|
||
pub format: u8,
|
||
}
|
||
|
||
/// Resolve source: either one of four color render targets or the depth RT.
|
||
/// Packed into `RB_COPY_CONTROL.copy_src_select` (bits [2:0]): 0..=3 pick
|
||
/// color0..3, 4 picks depth. Canary `registers.h:853`.
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
pub enum ResolveSource {
|
||
Color(u8),
|
||
Depth,
|
||
}
|
||
|
||
/// Resolve rectangle in pixel coordinates at the destination resolution,
|
||
/// 8-pixel aligned per Canary's `kResolveAlignmentPixels = 8`. MSAA scaling
|
||
/// is kept separate — `sample_count_log2_x/y` tell the resolve how many
|
||
/// samples to step per destination pixel.
|
||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||
pub struct ResolveCoordinates {
|
||
pub x0: u32,
|
||
pub y0: u32,
|
||
pub width: u32,
|
||
pub height: u32,
|
||
/// 1 iff 4x MSAA (samples laid out 2x wider than pixels).
|
||
pub sample_count_log2_x: u32,
|
||
/// 1 iff 2x+ MSAA (samples laid out 2x taller than pixels).
|
||
pub sample_count_log2_y: u32,
|
||
}
|
||
|
||
/// Decoded resolve state — describes how a `TILE_FLUSH` event should copy
|
||
/// EDRAM bytes to a guest-memory tiled texture. Canary equivalent:
|
||
/// `draw_util::ResolveInfo` at `draw_util.h:627`. Bit-field layout in
|
||
/// `RB_COPY_CONTROL / RB_COPY_DEST_INFO / RB_COPY_DEST_PITCH` comes from
|
||
/// `registers.h:853-897`.
|
||
#[derive(Debug, Clone, Copy)]
|
||
pub struct ResolveInfo {
|
||
/// Which source RT (0..=3=color, 4=depth). Raw register bits.
|
||
pub copy_src_select: u8,
|
||
/// Sample selector for MSAA sources. See `xenos::CopySampleSelect`.
|
||
pub copy_sample_select: u8,
|
||
/// Enable clear of the source render target after the copy.
|
||
pub color_clear_enable: bool,
|
||
pub depth_clear_enable: bool,
|
||
/// 0 = raw tile copy (same format), 1 = convert to `copy_dest_format`.
|
||
/// 2 = constantOne, 3 = null (no copy).
|
||
pub copy_command: u8,
|
||
/// Guest-memory destination address, already masked to the 29-bit
|
||
/// Xenon physical range (`& 0x1FFF_FFFF`).
|
||
pub dest_base: u32,
|
||
/// Destination pitch in pixels (0..=16383). Byte pitch = pitch * bpp
|
||
/// after the caller pitch-aligns to `kStoragePitchHeightAlignmentBlocks
|
||
/// = 32`.
|
||
pub dest_pitch_pixels: u32,
|
||
pub dest_height_pixels: u32,
|
||
/// Destination format (`xenos::ColorFormat`, 6 bits).
|
||
pub dest_format: u8,
|
||
/// Byte-swap mode applied before the write (`xenos::Endian128`, 0..=5).
|
||
pub dest_endian: u8,
|
||
/// Signed [-32, 31] exponent bias applied during conversion.
|
||
pub dest_exp_bias: i8,
|
||
/// Decoded resolve source (color0..3 or depth).
|
||
pub source: ResolveSource,
|
||
/// 8-pixel-aligned resolve rectangle.
|
||
pub coords: ResolveCoordinates,
|
||
/// Source format: `ColorRenderTargetFormat` when color,
|
||
/// `DepthRenderTargetFormat` when depth.
|
||
pub source_format: u8,
|
||
/// EDRAM tile origin of the source RT (from `RB_COLOR_INFO.color_base`
|
||
/// or `RB_DEPTH_INFO.depth_base`, 11-bit mod 2048).
|
||
pub source_base_tiles: u16,
|
||
/// `GetSurfacePitchTiles(surface_pitch, msaa, is_64bpp)` — how many
|
||
/// 80-sample-wide tiles make up one EDRAM row.
|
||
pub surface_pitch_tiles: u32,
|
||
/// MSAA mode from `RB_SURFACE_INFO`.
|
||
pub msaa: crate::render_target_cache::MsaaSamples,
|
||
/// True iff the source color format is 64bpp (doubles EDRAM pitch/base).
|
||
pub source_is_64bpp: bool,
|
||
/// `RB_COLOR_CLEAR` — constant written into EDRAM when
|
||
/// `color_clear_enable` is set.
|
||
pub color_clear_value: u32,
|
||
/// `RB_COLOR_CLEAR_LO` — second 32-bit lane for 64bpp clear.
|
||
pub color_clear_value_lo: u32,
|
||
/// `RB_DEPTH_CLEAR` — constant written into EDRAM depth tiles on
|
||
/// `depth_clear_enable`.
|
||
pub depth_clear_value: u32,
|
||
/// `RB_COPY_DEST_INFO.copy_dest_array` — 2D (false) vs 3D/stacked (true).
|
||
pub copy_dest_array: bool,
|
||
}
|
||
|
||
/// `GetSurfacePitchTiles(pitch_pixels, msaa, is_64bpp)` — ported from
|
||
/// `xenos.h:465-476`. Returns the number of 80-sample-wide EDRAM tiles
|
||
/// that make up one row of a surface with `pitch_pixels`-pixel pitch.
|
||
///
|
||
/// At 4x MSAA samples span twice the pixel width, so the sample pitch
|
||
/// doubles. 64bpp formats pack two EDRAM tiles per color value, so the
|
||
/// effective tile pitch doubles again.
|
||
#[inline]
|
||
pub fn surface_pitch_tiles(
|
||
pitch_pixels: u32,
|
||
msaa: crate::render_target_cache::MsaaSamples,
|
||
is_64bpp: bool,
|
||
) -> u32 {
|
||
use crate::render_target_cache::MsaaSamples;
|
||
const EDRAM_TILE_WIDTH_SAMPLES: u32 = 80;
|
||
let pitch_samples = pitch_pixels << u32::from(msaa == MsaaSamples::X4);
|
||
let pitch_tiles = pitch_samples.div_ceil(EDRAM_TILE_WIDTH_SAMPLES);
|
||
pitch_tiles << u32::from(is_64bpp)
|
||
}
|
||
|
||
/// Canary `ColorRenderTargetFormat` is 64bpp iff its numeric value is one
|
||
/// of {5, 7, 15} — i.e. `k_16_16_16_16`, `k_16_16_16_16_FLOAT`, or
|
||
/// `k_32_32_FLOAT`. `xenos.h:297-317` + the enum's `IsColorRenderTarget
|
||
/// Format64bpp` helper.
|
||
#[inline]
|
||
pub fn color_render_target_format_is_64bpp(fmt: u8) -> bool {
|
||
matches!(fmt, 5 | 7 | 15)
|
||
}
|
||
|
||
/// `kResolveAlignmentPixels` from Canary (`draw_util.cc:925` area).
|
||
pub const RESOLVE_ALIGNMENT_PIXELS: u32 = 8;
|
||
|
||
/// Clamp a raw resolve rectangle to the `PA_SC_WINDOW_SCISSOR_*` registers
|
||
/// and align to the 8-pixel grid. Caller passes `i32` because the VF0
|
||
/// derivation can produce negative bounding-box values; this helper clamps
|
||
/// them to the non-negative window defined by the scissor.
|
||
///
|
||
/// Returns `(x0, y0, width, height)` in pixels, all non-negative, all
|
||
/// 8-pixel-aligned, `width`/`height` already `>= 0`. Width/height of 0
|
||
/// signals "empty resolve; skip".
|
||
pub fn resolve_rect_apply_scissor_and_align_8(
|
||
rf: &RegisterFile,
|
||
x0_in: i32,
|
||
y0_in: i32,
|
||
x1_in: i32,
|
||
y1_in: i32,
|
||
) -> (u32, u32, u32, u32) {
|
||
let tl = rf.read(reg::PA_SC_WINDOW_SCISSOR_TL);
|
||
let br = rf.read(reg::PA_SC_WINDOW_SCISSOR_BR);
|
||
let tl_x = (tl & 0x3FFF) as i32;
|
||
let tl_y = ((tl >> 16) & 0x3FFF) as i32;
|
||
let br_x = (br & 0x3FFF) as i32;
|
||
let br_y = ((br >> 16) & 0x3FFF) as i32;
|
||
|
||
// Clamp only when the scissor is a non-degenerate window; otherwise
|
||
// leave the input rect alone (Canary's `kResolveAlignmentPixels` will
|
||
// still 8-align it below).
|
||
let (mut x0, mut y0, mut x1, mut y1) = (x0_in, y0_in, x1_in, y1_in);
|
||
if br_x > tl_x && br_y > tl_y {
|
||
let clamp = |v: i32, lo: i32, hi: i32| v.max(lo).min(hi);
|
||
x0 = clamp(x0, tl_x, br_x);
|
||
y0 = clamp(y0, tl_y, br_y);
|
||
x1 = clamp(x1, tl_x, br_x);
|
||
y1 = clamp(y1, tl_y, br_y);
|
||
}
|
||
if x1 < x0 {
|
||
x1 = x0;
|
||
}
|
||
if y1 < y0 {
|
||
y1 = y0;
|
||
}
|
||
|
||
// 8-pixel align. Floor top-left; ceil bottom-right.
|
||
let align_mask = (RESOLVE_ALIGNMENT_PIXELS as i32) - 1;
|
||
x0 &= !align_mask;
|
||
y0 &= !align_mask;
|
||
x1 = (x1 + align_mask) & !align_mask;
|
||
y1 = (y1 + align_mask) & !align_mask;
|
||
|
||
let x0u = x0.max(0) as u32;
|
||
let y0u = y0.max(0) as u32;
|
||
let x1u = x1.max(0) as u32;
|
||
let y1u = y1.max(0) as u32;
|
||
(
|
||
x0u,
|
||
y0u,
|
||
x1u.saturating_sub(x0u),
|
||
y1u.saturating_sub(y0u),
|
||
)
|
||
}
|
||
|
||
/// Parse vertex fetch constant 0 (Canary `xe_gpu_vertex_fetch_t`,
|
||
/// `xenos.h:1158-1172`) and derive the resolve bounding-box in pixel units.
|
||
/// Returns `None` when the fetch isn't the 6-float vertex buffer the
|
||
/// resolve shader expects (type != kVertex or size != 6).
|
||
///
|
||
/// This mirrors `draw_util.cc:950-1014` minus window-offset and half-pixel
|
||
/// nudging — the pitfalls there are (a) handling endian via `GpuSwap` and
|
||
/// (b) Fixed16p8 top-left rounding `(v + 127) >> 8`. Both are replicated.
|
||
///
|
||
/// The returned rect is in *pixel* coordinates, *pre-scissor-clamp* and
|
||
/// *pre-alignment*. Caller feeds it through
|
||
/// [`resolve_rect_apply_scissor_and_align_8`].
|
||
pub fn vertex_fetch_0_rect(
|
||
rf: &RegisterFile,
|
||
mem: &dyn xenia_memory::access::MemoryAccess,
|
||
) -> Option<(i32, i32, i32, i32)> {
|
||
const CONST_BASE_FETCH: u32 = 0x4800;
|
||
let dword_0 = rf.read(CONST_BASE_FETCH);
|
||
let dword_1 = rf.read(CONST_BASE_FETCH + 1);
|
||
|
||
// type:2 at bits [1:0]; kVertex = 3 per xenos.h:1147-1152.
|
||
let fetch_type = dword_0 & 0x3;
|
||
if fetch_type != 3 {
|
||
return None;
|
||
}
|
||
// size:24 at bits [25:2] of dword_1 — in dwords; expect 6 (3 × vec2).
|
||
let size = (dword_1 >> 2) & 0x00FF_FFFF;
|
||
if size != 6 {
|
||
return None;
|
||
}
|
||
// address:30 at bits [31:2] of dword_0 — in dwords.
|
||
let address_bytes = dword_0 & 0xFFFF_FFFC;
|
||
// endian:2 at bits [1:0] of dword_1 — xenos::Endian (kNone/k8in16/k8in32/k16in32).
|
||
let fetch_endian = (dword_1 & 0x3) as u8;
|
||
|
||
// Read 6 floats from guest memory. `mem.read_u32` stores BE bytes as a
|
||
// u32 value; to mirror Canary's "raw LE bytes → u32 → GpuSwap" we have
|
||
// to re-interpret the memory as LE (flipping what `read_u32` did).
|
||
let floats: [f32; 6] = std::array::from_fn(|i| {
|
||
let be_u32 = mem.read_u32(address_bytes.wrapping_add(i as u32 * 4));
|
||
// `be_u32` was composed from bytes `[b0,b1,b2,b3]` as
|
||
// `(b0<<24)|...|b3`. Canary reads those same bytes in host-LE,
|
||
// producing `(b3<<24)|...|b0`. That's `be_u32.swap_bytes()`.
|
||
let canary_le = be_u32.swap_bytes();
|
||
let swapped = gpu_swap_u32(canary_le, fetch_endian);
|
||
f32::from_bits(swapped)
|
||
});
|
||
|
||
// PA_SU_VTX_CNTL::pix_center: bit 0, 0 = kD3DZero (+0.5 half-pixel), 1 = kOpenGL (no offset).
|
||
// Register index 0x2083 per register_table.inc (PA_SU_VTX_CNTL).
|
||
const PA_SU_VTX_CNTL: u32 = 0x2083;
|
||
let half_pixel_offset = if rf.read(PA_SU_VTX_CNTL) & 1 == 0 {
|
||
0.5f32
|
||
} else {
|
||
0.0f32
|
||
};
|
||
|
||
// Convert each to Fixed16p8 (multiply by 256, round).
|
||
let fixed: [i32; 6] = std::array::from_fn(|i| {
|
||
((floats[i] + half_pixel_offset) * 256.0).round() as i32
|
||
});
|
||
|
||
let x0 = fixed[0].min(fixed[2]).min(fixed[4]);
|
||
let y0 = fixed[1].min(fixed[3]).min(fixed[5]);
|
||
let x1 = fixed[0].max(fixed[2]).max(fixed[4]);
|
||
let y1 = fixed[1].max(fixed[3]).max(fixed[5]);
|
||
|
||
// Top-left rounding: `(v + 127) >> 8` for both corners.
|
||
let round = |v: i32| (v + 127) >> 8;
|
||
Some((round(x0), round(y0), round(x1), round(y1)))
|
||
}
|
||
|
||
/// Canary `GpuSwapInline` on a u32. Exposed here so the vertex-fetch path
|
||
/// can apply the same byte-order transform Canary's `GpuSwap<float>` applies
|
||
/// to vertex data. `xenos.h:1077-1114`.
|
||
#[inline]
|
||
fn gpu_swap_u32(value: u32, endian: u8) -> u32 {
|
||
match endian & 0x3 {
|
||
// kNone.
|
||
0 => value,
|
||
// k8in16: swap bytes within each 16-bit word.
|
||
1 => ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8),
|
||
// k8in32: full byte reversal.
|
||
2 => value.swap_bytes(),
|
||
// k16in32: swap 16-bit halves.
|
||
_ => value.rotate_left(16),
|
||
}
|
||
}
|
||
|
||
impl ResolveInfo {
|
||
/// Legacy entrypoint used when the caller already has a `DrawState`. It
|
||
/// fills only the narrow register bits that live in `DrawState` — the
|
||
/// wider coordinate / EDRAM fields require the full register file.
|
||
///
|
||
/// Kept for tests that construct resolve decoders from captured draw
|
||
/// states. `from_register_file` is the canonical path.
|
||
pub fn from_draw_state(ds: &DrawState) -> Self {
|
||
use crate::render_target_cache::MsaaSamples;
|
||
let c = ds.rb_copy_control;
|
||
let p = ds.rb_copy_dest_pitch;
|
||
let i = ds.rb_copy_dest_info;
|
||
// Sign-extend the 6-bit exp_bias from `copy_dest_info[21:16]`.
|
||
let exp_raw = (i >> 16) & 0x3F;
|
||
let exp_sign = ((exp_raw & 0x20) != 0) as i8;
|
||
let exp_bias = (exp_raw as i8) - (exp_sign * 64);
|
||
let src_sel = (c & 0x7) as u8;
|
||
let source = if src_sel >= 4 {
|
||
ResolveSource::Depth
|
||
} else {
|
||
ResolveSource::Color(src_sel)
|
||
};
|
||
Self {
|
||
copy_src_select: src_sel,
|
||
copy_sample_select: ((c >> 4) & 0x7) as u8,
|
||
color_clear_enable: ((c >> 8) & 1) != 0,
|
||
depth_clear_enable: ((c >> 9) & 1) != 0,
|
||
copy_command: ((c >> 20) & 0x3) as u8,
|
||
dest_base: ds.rb_copy_dest_base & 0x1FFF_FFFF,
|
||
dest_pitch_pixels: p & 0x3FFF,
|
||
dest_height_pixels: (p >> 16) & 0x3FFF,
|
||
dest_format: ((i >> 7) & 0x3F) as u8,
|
||
dest_endian: (i & 0x7) as u8,
|
||
dest_exp_bias: exp_bias,
|
||
source,
|
||
coords: ResolveCoordinates::default(),
|
||
source_format: 0,
|
||
source_base_tiles: 0,
|
||
surface_pitch_tiles: 0,
|
||
msaa: MsaaSamples::X1,
|
||
source_is_64bpp: false,
|
||
color_clear_value: 0,
|
||
color_clear_value_lo: 0,
|
||
depth_clear_value: 0,
|
||
copy_dest_array: ((i >> 3) & 1) != 0,
|
||
}
|
||
}
|
||
|
||
/// Canonical resolve decoder — reads live register values and derives the
|
||
/// full rectangle / EDRAM layout. Mirrors canary `draw_util.cc:926-1318`
|
||
/// `GetResolveInfo` with the following simplifications (all scoped in
|
||
/// the landing plan and will be expanded as needs arise):
|
||
///
|
||
/// * The rectangle is derived from the scissor window and
|
||
/// `RB_COPY_DEST_PITCH` rather than fetched from vertex fetch 0.
|
||
/// Sylpheed's splash uses a clear-resolve — there's no draw ahead
|
||
/// of it — so vertex-fetch-derived geometry is not available.
|
||
/// * `copy_sample_select` is kept as-is; sample averaging for 2x/4x
|
||
/// MSAA is not yet applied on the read side.
|
||
/// * `PA_SC_WINDOW_OFFSET` is not applied — not needed for Sylpheed
|
||
/// and canary only applies it when `PA_SU_SC_MODE_CNTL.vtx_window
|
||
/// _offset_enable` is set, which requires a live draw.
|
||
pub fn from_register_file(rf: &RegisterFile) -> Self {
|
||
use crate::render_target_cache::MsaaSamples;
|
||
let c = rf.read(reg::RB_COPY_CONTROL);
|
||
let i = rf.read(reg::RB_COPY_DEST_INFO);
|
||
let p = rf.read(reg::RB_COPY_DEST_PITCH);
|
||
let dest_base_raw = rf.read(reg::RB_COPY_DEST_BASE);
|
||
|
||
// Sign-extend 6-bit exp_bias from copy_dest_info[21:16].
|
||
let exp_raw = (i >> 16) & 0x3F;
|
||
let exp_sign = ((exp_raw & 0x20) != 0) as i8;
|
||
let exp_bias = (exp_raw as i8) - (exp_sign * 64);
|
||
|
||
let src_sel = (c & 0x7) as u8;
|
||
let source = if src_sel >= 4 {
|
||
ResolveSource::Depth
|
||
} else {
|
||
ResolveSource::Color(src_sel & 0x3)
|
||
};
|
||
|
||
let rb_surface_info = rf.read(reg::RB_SURFACE_INFO);
|
||
let surface_pitch_pixels = rb_surface_info & 0x3FFF;
|
||
let msaa = MsaaSamples::from_raw((rb_surface_info >> 16) & 0x3);
|
||
|
||
// Source format + base tiles depend on which RT we're reading.
|
||
let (source_format, source_base_tiles, source_is_64bpp) = match source {
|
||
ResolveSource::Color(idx) => {
|
||
let rb = match idx {
|
||
0 => rf.read(reg::RB_COLOR_INFO_0),
|
||
1 => rf.read(reg::RB_COLOR_INFO_1),
|
||
2 => rf.read(reg::RB_COLOR_INFO_2),
|
||
_ => rf.read(reg::RB_COLOR_INFO_3),
|
||
};
|
||
let fmt = ((rb >> 16) & 0xF) as u8;
|
||
let base = (rb & 0xFFF) as u16;
|
||
(fmt, base, color_render_target_format_is_64bpp(fmt))
|
||
}
|
||
ResolveSource::Depth => {
|
||
let rb = rf.read(reg::RB_DEPTH_INFO);
|
||
let fmt = ((rb >> 16) & 0x1) as u8;
|
||
let base = (rb & 0xFFF) as u16;
|
||
(fmt, base, false)
|
||
}
|
||
};
|
||
|
||
let pitch_tiles = surface_pitch_tiles(surface_pitch_pixels, msaa, source_is_64bpp);
|
||
|
||
// --- Rectangle derivation ---
|
||
// Default extent is (0, 0, dest_pitch, dest_height); subject to
|
||
// scissor clamp + 8-pixel alignment.
|
||
let dest_pitch = p & 0x3FFF;
|
||
let dest_height = (p >> 16) & 0x3FFF;
|
||
let coords_no_msaa = resolve_rect_apply_scissor_and_align_8(
|
||
rf,
|
||
0,
|
||
0,
|
||
dest_pitch as i32,
|
||
dest_height as i32,
|
||
);
|
||
let coords = ResolveCoordinates {
|
||
x0: coords_no_msaa.0,
|
||
y0: coords_no_msaa.1,
|
||
width: coords_no_msaa.2,
|
||
height: coords_no_msaa.3,
|
||
sample_count_log2_x: u32::from(msaa == MsaaSamples::X4),
|
||
sample_count_log2_y: u32::from(msaa != MsaaSamples::X1),
|
||
};
|
||
|
||
Self {
|
||
copy_src_select: src_sel,
|
||
copy_sample_select: ((c >> 4) & 0x7) as u8,
|
||
color_clear_enable: ((c >> 8) & 1) != 0,
|
||
depth_clear_enable: ((c >> 9) & 1) != 0,
|
||
copy_command: ((c >> 20) & 0x3) as u8,
|
||
dest_base: dest_base_raw & 0x1FFF_FFFF,
|
||
dest_pitch_pixels: dest_pitch,
|
||
dest_height_pixels: dest_height,
|
||
dest_format: ((i >> 7) & 0x3F) as u8,
|
||
dest_endian: (i & 0x7) as u8,
|
||
dest_exp_bias: exp_bias,
|
||
source,
|
||
coords,
|
||
source_format,
|
||
source_base_tiles,
|
||
surface_pitch_tiles: pitch_tiles,
|
||
msaa,
|
||
source_is_64bpp,
|
||
color_clear_value: rf.read(reg::RB_COLOR_CLEAR),
|
||
color_clear_value_lo: rf.read(reg::RB_COLOR_CLEAR_LO),
|
||
depth_clear_value: rf.read(reg::RB_DEPTH_CLEAR),
|
||
copy_dest_array: ((i >> 3) & 1) != 0,
|
||
}
|
||
}
|
||
|
||
/// Memory-aware variant: if vertex fetch 0 contains the D3D9-hack
|
||
/// "resolve rectangle" vertices (3 vec2 floats, Canary `draw_util.cc
|
||
/// :950-1014`), use its bounding box as the resolve extent. Falls back
|
||
/// to the scissor + `RB_COPY_DEST_PITCH/HEIGHT` rect when VF0 isn't a
|
||
/// 6-dword vertex buffer.
|
||
///
|
||
/// Used from the live TILE_FLUSH path; tests can stick with
|
||
/// `from_register_file` when they don't want to program VF0.
|
||
pub fn from_register_file_and_memory(
|
||
rf: &RegisterFile,
|
||
mem: &dyn xenia_memory::access::MemoryAccess,
|
||
) -> Self {
|
||
let mut info = Self::from_register_file(rf);
|
||
if let Some((x0, y0, x1, y1)) = vertex_fetch_0_rect(rf, mem) {
|
||
let (rx0, ry0, rw, rh) =
|
||
resolve_rect_apply_scissor_and_align_8(rf, x0, y0, x1, y1);
|
||
// Only override when the VF0 rect is non-empty — an empty VF0
|
||
// means the game hasn't set one up yet and we should keep the
|
||
// scissor+dest default.
|
||
if rw > 0 && rh > 0 {
|
||
info.coords.x0 = rx0;
|
||
info.coords.y0 = ry0;
|
||
info.coords.width = rw;
|
||
info.coords.height = rh;
|
||
}
|
||
}
|
||
info
|
||
}
|
||
}
|
||
|
||
/// Register indices from `xenia-canary/src/xenia/gpu/registers.h`. Only what
|
||
/// the extractor reads is named here.
|
||
pub mod reg {
|
||
pub const VGT_DRAW_INITIATOR: u32 = 0x2281;
|
||
pub const VGT_DMA_BASE: u32 = 0x2282;
|
||
pub const VGT_DMA_SIZE: u32 = 0x2283;
|
||
pub const PA_CL_VPORT_XSCALE: u32 = 0x210F;
|
||
pub const PA_CL_VPORT_XOFFSET: u32 = 0x2110;
|
||
pub const PA_CL_VPORT_YSCALE: u32 = 0x2111;
|
||
pub const PA_CL_VPORT_YOFFSET: u32 = 0x2112;
|
||
pub const PA_CL_VPORT_ZSCALE: u32 = 0x2113;
|
||
pub const PA_CL_VPORT_ZOFFSET: u32 = 0x2114;
|
||
pub const PA_SC_WINDOW_SCISSOR_TL: u32 = 0x200E;
|
||
pub const PA_SC_WINDOW_SCISSOR_BR: u32 = 0x200F;
|
||
pub const RB_MODECONTROL: u32 = 0x2208;
|
||
pub const RB_SURFACE_INFO: u32 = 0x2000;
|
||
pub const RB_COLOR_INFO_0: u32 = 0x2001;
|
||
pub const RB_COLOR_INFO_1: u32 = 0x2010;
|
||
pub const RB_COLOR_INFO_2: u32 = 0x2011;
|
||
pub const RB_COLOR_INFO_3: u32 = 0x2012;
|
||
pub const RB_DEPTH_INFO: u32 = 0x2002;
|
||
pub const RB_COLORCONTROL: u32 = 0x2202;
|
||
pub const RB_DEPTHCONTROL: u32 = 0x2200;
|
||
// P4 additions — per-RT blend + stencil + window offset + resolve dst.
|
||
pub const RB_BLENDCONTROL_0: u32 = 0x2201;
|
||
pub const RB_BLENDCONTROL_1: u32 = 0x2209;
|
||
pub const RB_BLENDCONTROL_2: u32 = 0x220A;
|
||
pub const RB_BLENDCONTROL_3: u32 = 0x220B;
|
||
pub const RB_STENCILREFMASK: u32 = 0x210D;
|
||
pub const RB_STENCILREFMASK_BF: u32 = 0x210C;
|
||
pub const PA_SC_WINDOW_OFFSET: u32 = 0x2080;
|
||
pub const RB_COPY_CONTROL: u32 = 0x2318;
|
||
pub const RB_COPY_DEST_BASE: u32 = 0x2319;
|
||
pub const RB_COPY_DEST_PITCH: u32 = 0x231A;
|
||
pub const RB_COPY_DEST_INFO: u32 = 0x231B;
|
||
pub const RB_DEPTH_CLEAR: u32 = 0x231D;
|
||
pub const RB_COLOR_CLEAR: u32 = 0x231E;
|
||
pub const RB_COLOR_CLEAR_LO: u32 = 0x231F;
|
||
}
|
||
|
||
/// Build a [`DrawState`] from a `VGT_DRAW_INITIATOR` value + the current
|
||
/// register file. `extra_dma_base`/`extra_dma_size` can override the
|
||
/// DMA fields if the caller has them from the PM4 packet payload (canary
|
||
/// passes them inline with `DRAW_INDX`).
|
||
pub fn extract(
|
||
register_file: &RegisterFile,
|
||
vgt_draw_initiator: u32,
|
||
dma_base: Option<u32>,
|
||
dma_size: Option<u32>,
|
||
) -> DrawState {
|
||
// `VGT_DRAW_INITIATOR` bit layout (per canary):
|
||
// [5:0] prim_type
|
||
// [7:6] source_select (0=DMA, 1=immediate, 2=auto)
|
||
// [8] index_size (0=16-bit, 1=32-bit)
|
||
// [31:16] num_indices
|
||
let prim_bits = vgt_draw_initiator & 0x3F;
|
||
let source_select = (vgt_draw_initiator >> 6) & 0x3;
|
||
let index_size_bit = (vgt_draw_initiator >> 8) & 0x1;
|
||
let num_indices = (vgt_draw_initiator >> 16) & 0xFFFF;
|
||
let index_size = if index_size_bit == 0 {
|
||
IndexSize::Sixteen
|
||
} else {
|
||
IndexSize::ThirtyTwo
|
||
};
|
||
|
||
let index_source = match source_select {
|
||
0 => IndexSource::Dma {
|
||
base_address: dma_base.unwrap_or_else(|| register_file.read(reg::VGT_DMA_BASE)),
|
||
size_dwords: dma_size.unwrap_or_else(|| register_file.read(reg::VGT_DMA_SIZE)),
|
||
index_size,
|
||
},
|
||
1 => IndexSource::Immediate { index_size },
|
||
_ => IndexSource::AutoIndex,
|
||
};
|
||
|
||
let f = |r: u32| f32::from_bits(register_file.read(r));
|
||
let viewport = Viewport {
|
||
scale_x: f(reg::PA_CL_VPORT_XSCALE),
|
||
scale_y: f(reg::PA_CL_VPORT_YSCALE),
|
||
scale_z: f(reg::PA_CL_VPORT_ZSCALE),
|
||
offset_x: f(reg::PA_CL_VPORT_XOFFSET),
|
||
offset_y: f(reg::PA_CL_VPORT_YOFFSET),
|
||
offset_z: f(reg::PA_CL_VPORT_ZOFFSET),
|
||
};
|
||
|
||
let tl = register_file.read(reg::PA_SC_WINDOW_SCISSOR_TL);
|
||
let br = register_file.read(reg::PA_SC_WINDOW_SCISSOR_BR);
|
||
let scissor = Scissor {
|
||
tl_x: (tl & 0x7FFF) as u16,
|
||
tl_y: ((tl >> 16) & 0x7FFF) as u16,
|
||
br_x: (br & 0x7FFF) as u16,
|
||
br_y: ((br >> 16) & 0x7FFF) as u16,
|
||
};
|
||
|
||
let rb_modecontrol = register_file.read(reg::RB_MODECONTROL);
|
||
let color_mask = rb_modecontrol & 0xF;
|
||
let ci = |reg: u32, present: bool| {
|
||
if !present {
|
||
return None;
|
||
}
|
||
let raw = register_file.read(reg);
|
||
Some(ColorTargetInfo {
|
||
base_tiles: (raw & 0xFFF) as u16,
|
||
format: ((raw >> 16) & 0xF) as u8,
|
||
})
|
||
};
|
||
let color_info = [
|
||
ci(reg::RB_COLOR_INFO_0, (color_mask & 0x1) != 0),
|
||
ci(reg::RB_COLOR_INFO_1, (color_mask & 0x2) != 0),
|
||
ci(reg::RB_COLOR_INFO_2, (color_mask & 0x4) != 0),
|
||
ci(reg::RB_COLOR_INFO_3, (color_mask & 0x8) != 0),
|
||
];
|
||
let depth_raw = register_file.read(reg::RB_DEPTH_INFO);
|
||
// Depth-surface "present" = the RB_MODECONTROL depth-enable bit at bit 4.
|
||
let depth_present = (rb_modecontrol & 0x10) != 0;
|
||
let depth_info = if depth_present {
|
||
Some(DepthTargetInfo {
|
||
base_tiles: (depth_raw & 0xFFF) as u16,
|
||
format: ((depth_raw >> 16) & 0x1) as u8,
|
||
})
|
||
} else {
|
||
None
|
||
};
|
||
|
||
DrawState {
|
||
primitive: PrimitiveType::from_bits(prim_bits),
|
||
vertex_count: num_indices,
|
||
index_source,
|
||
viewport,
|
||
scissor,
|
||
color_info,
|
||
depth_info,
|
||
rb_modecontrol,
|
||
rb_colorcontrol: register_file.read(reg::RB_COLORCONTROL),
|
||
rb_depthcontrol: register_file.read(reg::RB_DEPTHCONTROL),
|
||
rb_blendcontrol: [
|
||
register_file.read(reg::RB_BLENDCONTROL_0),
|
||
register_file.read(reg::RB_BLENDCONTROL_1),
|
||
register_file.read(reg::RB_BLENDCONTROL_2),
|
||
register_file.read(reg::RB_BLENDCONTROL_3),
|
||
],
|
||
rb_stencilrefmask: register_file.read(reg::RB_STENCILREFMASK),
|
||
rb_stencilrefmask_bf: register_file.read(reg::RB_STENCILREFMASK_BF),
|
||
pa_sc_window_offset: register_file.read(reg::PA_SC_WINDOW_OFFSET),
|
||
rb_copy_control: register_file.read(reg::RB_COPY_CONTROL),
|
||
rb_copy_dest_base: register_file.read(reg::RB_COPY_DEST_BASE),
|
||
rb_copy_dest_pitch: register_file.read(reg::RB_COPY_DEST_PITCH),
|
||
rb_copy_dest_info: register_file.read(reg::RB_COPY_DEST_INFO),
|
||
// P3b M1: the kernel-side caller is expected to populate these
|
||
// via `DrawState { ..extract(...), vs_blob_key, ps_blob_key }` so
|
||
// the pure-register extraction stays decoupled from `GpuSystem`
|
||
// state. Default to None so a bare `extract()` stays valid for
|
||
// unit tests.
|
||
vs_blob_key: None,
|
||
ps_blob_key: None,
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
fn rf() -> RegisterFile {
|
||
RegisterFile::new()
|
||
}
|
||
|
||
#[test]
|
||
fn extract_basic_triangle_list_no_rt() {
|
||
let rf = rf();
|
||
// prim_type=4 (TriangleList), source=2 (auto), num_indices=6
|
||
let vgt = (6u32 << 16) | (2 << 6) | 4;
|
||
let ds = extract(&rf, vgt, None, None);
|
||
assert_eq!(ds.primitive, PrimitiveType::TriangleList);
|
||
assert_eq!(ds.vertex_count, 6);
|
||
assert!(matches!(ds.index_source, IndexSource::AutoIndex));
|
||
assert!(ds.color_info.iter().all(|c| c.is_none()));
|
||
assert!(ds.depth_info.is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn extract_dma_indices_uses_override() {
|
||
let rf = rf();
|
||
let vgt = (3u32 << 16) | (0 << 6) | 4; // prim=TriList, source=DMA
|
||
let ds = extract(&rf, vgt, Some(0xDEAD_0000), Some(6));
|
||
match ds.index_source {
|
||
IndexSource::Dma {
|
||
base_address,
|
||
size_dwords,
|
||
index_size,
|
||
} => {
|
||
assert_eq!(base_address, 0xDEAD_0000);
|
||
assert_eq!(size_dwords, 6);
|
||
assert_eq!(index_size, IndexSize::Sixteen);
|
||
}
|
||
other => panic!("expected Dma, got {other:?}"),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn color_and_depth_enabled_bits_are_honored() {
|
||
let mut rf = rf();
|
||
// rb_modecontrol: color0 + depth enabled (bit0 + bit4)
|
||
rf.write(reg::RB_MODECONTROL, 0x11);
|
||
rf.write(reg::RB_COLOR_INFO_0, (2 << 16) | 0x64); // format=2, tile=0x64
|
||
rf.write(reg::RB_DEPTH_INFO, (1 << 16) | 0x32);
|
||
let ds = extract(&rf, 4, None, None);
|
||
let c = ds.color_info[0].unwrap();
|
||
assert_eq!(c.format, 2);
|
||
assert_eq!(c.base_tiles, 0x64);
|
||
let d = ds.depth_info.unwrap();
|
||
assert_eq!(d.format, 1);
|
||
assert_eq!(d.base_tiles, 0x32);
|
||
}
|
||
|
||
/// `RB_COPY_DEST_BASE` is a raw 32-bit register, but a Xenon physical
|
||
/// address is 29-bit (`& 0x1FFF_FFFF`). `ResolveInfo::from_register_file`
|
||
/// must mask before writes to prevent out-of-range memory accesses.
|
||
#[test]
|
||
fn resolve_info_masks_dest_base_to_physical() {
|
||
let mut rf = rf();
|
||
rf.write(reg::RB_COPY_DEST_BASE, 0xDEAD_BEEF);
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
assert_eq!(info.dest_base, 0x1EAD_BEEF);
|
||
}
|
||
|
||
/// Scissor ∩ (0, 0, dest_pitch, dest_height), then 8-pixel-aligned per
|
||
/// Canary `kResolveAlignmentPixels`. Verify that the scissor actually
|
||
/// tightens the rect (not just degenerates it).
|
||
#[test]
|
||
fn resolve_info_derives_8px_aligned_rect_from_scissor_and_dest_pitch() {
|
||
let mut rf = rf();
|
||
// Dest pitch/height 1280×720; scissor (5, 5) -> (1000, 717).
|
||
rf.write(reg::RB_COPY_DEST_PITCH, (720u32 << 16) | 1280u32);
|
||
rf.write(reg::PA_SC_WINDOW_SCISSOR_TL, (5u32 << 16) | 5u32);
|
||
rf.write(reg::PA_SC_WINDOW_SCISSOR_BR, (717u32 << 16) | 1000u32);
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
// x0 floors to 0 (was 5 -> &!7 = 0), y0 same.
|
||
// x1 = min(1280, 1000) = 1000; ceil-to-8 = 1000. y1 = min(720, 717) = 717, ceil = 720.
|
||
assert_eq!(info.coords.x0, 0);
|
||
assert_eq!(info.coords.y0, 0);
|
||
assert_eq!(info.coords.width, 1000);
|
||
assert_eq!(info.coords.height, 720);
|
||
}
|
||
|
||
/// Non-degenerate scissor outside `dest_pitch/height` clamps to the
|
||
/// destination extent.
|
||
#[test]
|
||
fn resolve_info_scissor_cannot_widen_past_dest() {
|
||
let mut rf = rf();
|
||
rf.write(reg::RB_COPY_DEST_PITCH, (16u32 << 16) | 16u32);
|
||
rf.write(reg::PA_SC_WINDOW_SCISSOR_BR, (1000u32 << 16) | 1000u32);
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
assert_eq!(info.coords.width, 16);
|
||
assert_eq!(info.coords.height, 16);
|
||
}
|
||
|
||
/// Source decoding: `copy_src_select >= 4` → depth; otherwise Color(idx).
|
||
#[test]
|
||
fn resolve_info_decodes_source_select() {
|
||
let mut rf = rf();
|
||
rf.write(reg::RB_COPY_CONTROL, 2); // src_select = 2 (color2)
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
assert_eq!(info.source, ResolveSource::Color(2));
|
||
assert_eq!(info.copy_src_select, 2);
|
||
|
||
rf.write(reg::RB_COPY_CONTROL, 4); // src_select = 4 -> depth
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
assert_eq!(info.source, ResolveSource::Depth);
|
||
}
|
||
|
||
/// `copy_dest_info` fields: endian (bits 2:0), format (bits 12:7),
|
||
/// exp_bias (bits 21:16, signed 6-bit), array (bit 3).
|
||
#[test]
|
||
fn resolve_info_decodes_copy_dest_info_fields() {
|
||
let mut rf = rf();
|
||
// endian=2 (k8in32), format=6 (k_8_8_8_8), exp_bias=-1 (0x3F), array=1
|
||
let val = 2u32 | (1u32 << 3) | (6u32 << 7) | (0x3Fu32 << 16);
|
||
rf.write(reg::RB_COPY_DEST_INFO, val);
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
assert_eq!(info.dest_endian, 2);
|
||
assert_eq!(info.dest_format, 6);
|
||
assert_eq!(info.dest_exp_bias, -1);
|
||
assert!(info.copy_dest_array);
|
||
}
|
||
|
||
/// Positive and negative exp_bias round-trip through the 6-bit
|
||
/// sign-extension.
|
||
#[test]
|
||
fn resolve_info_exp_bias_sign_extends() {
|
||
let mut rf = rf();
|
||
rf.write(reg::RB_COPY_DEST_INFO, 1u32 << 16); // exp_bias = +1
|
||
assert_eq!(ResolveInfo::from_register_file(&rf).dest_exp_bias, 1);
|
||
rf.write(reg::RB_COPY_DEST_INFO, 0x20u32 << 16); // exp_bias = -32
|
||
assert_eq!(ResolveInfo::from_register_file(&rf).dest_exp_bias, -32);
|
||
rf.write(reg::RB_COPY_DEST_INFO, 0x1Fu32 << 16); // exp_bias = +31
|
||
assert_eq!(ResolveInfo::from_register_file(&rf).dest_exp_bias, 31);
|
||
}
|
||
|
||
/// `RB_SURFACE_INFO`: surface_pitch (bits 13:0) and msaa_samples (bits 17:16)
|
||
/// feed `surface_pitch_tiles`. 1280 px divides by 80 exactly → 16 tiles
|
||
/// at 1x MSAA / 32bpp; 4x MSAA doubles the sample pitch.
|
||
#[test]
|
||
fn resolve_info_computes_surface_pitch_tiles() {
|
||
let mut rf = rf();
|
||
rf.write(reg::RB_COPY_CONTROL, 0); // color0
|
||
rf.write(reg::RB_COLOR_INFO_0, 0u32 << 16); // k_8_8_8_8 -> 32bpp
|
||
rf.write(reg::RB_SURFACE_INFO, 1280); // msaa=1x, pitch=1280
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
assert_eq!(info.surface_pitch_tiles, 16);
|
||
assert!(!info.source_is_64bpp);
|
||
|
||
// 4x MSAA widens the sample pitch by 2x.
|
||
rf.write(reg::RB_SURFACE_INFO, 1280 | (2u32 << 16));
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
assert_eq!(info.surface_pitch_tiles, 32);
|
||
|
||
// Non-aligned pitch rounds up.
|
||
rf.write(reg::RB_SURFACE_INFO, 1281);
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
assert_eq!(info.surface_pitch_tiles, 17);
|
||
}
|
||
|
||
/// `color_render_target_format_is_64bpp` matches the 64bpp enum values
|
||
/// in `xenos::ColorRenderTargetFormat`: k_16_16_16_16 (5),
|
||
/// k_16_16_16_16_FLOAT (7), k_32_32_FLOAT (15).
|
||
#[test]
|
||
fn color_format_64bpp_table_is_correct() {
|
||
assert!(!color_render_target_format_is_64bpp(0));
|
||
assert!(!color_render_target_format_is_64bpp(4));
|
||
assert!(color_render_target_format_is_64bpp(5));
|
||
assert!(!color_render_target_format_is_64bpp(6));
|
||
assert!(color_render_target_format_is_64bpp(7));
|
||
assert!(!color_render_target_format_is_64bpp(14));
|
||
assert!(color_render_target_format_is_64bpp(15));
|
||
}
|
||
|
||
/// `surface_pitch_tiles` helper: exact arithmetic including the 64bpp
|
||
/// doubling. `xenos.h:465-476`.
|
||
#[test]
|
||
fn surface_pitch_tiles_matches_canary_helper() {
|
||
use crate::render_target_cache::MsaaSamples;
|
||
// 80 px, 1x, 32bpp -> 1 tile exactly.
|
||
assert_eq!(surface_pitch_tiles(80, MsaaSamples::X1, false), 1);
|
||
// 81 px, 1x, 32bpp -> 2 tiles (round up).
|
||
assert_eq!(surface_pitch_tiles(81, MsaaSamples::X1, false), 2);
|
||
// 80 px, 1x, 64bpp -> 2 tiles (64bpp doubles).
|
||
assert_eq!(surface_pitch_tiles(80, MsaaSamples::X1, true), 2);
|
||
// 80 px, 2x, 32bpp -> 1 tile (2x MSAA doesn't widen X).
|
||
assert_eq!(surface_pitch_tiles(80, MsaaSamples::X2, false), 1);
|
||
// 80 px, 4x, 32bpp -> 2 tiles (4x MSAA widens X 2x).
|
||
assert_eq!(surface_pitch_tiles(80, MsaaSamples::X4, false), 2);
|
||
// 80 px, 4x, 64bpp -> 4 tiles.
|
||
assert_eq!(surface_pitch_tiles(80, MsaaSamples::X4, true), 4);
|
||
}
|
||
|
||
/// The color-source branch reads from `RB_COLOR_INFO_<idx>` based on
|
||
/// `copy_src_select`. Verify that index-3 color targets are addressed.
|
||
#[test]
|
||
fn resolve_info_color_source_selects_correct_color_info() {
|
||
let mut rf = rf();
|
||
rf.write(reg::RB_COPY_CONTROL, 3); // color3
|
||
rf.write(reg::RB_COLOR_INFO_3, (5u32 << 16) | 0x123); // k_16_16_16_16, base=0x123
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
assert_eq!(info.source, ResolveSource::Color(3));
|
||
assert_eq!(info.source_format, 5);
|
||
assert_eq!(info.source_base_tiles, 0x123);
|
||
assert!(info.source_is_64bpp);
|
||
}
|
||
|
||
/// Depth-source branch reads from `RB_DEPTH_INFO` and parses its
|
||
/// 1-bit format.
|
||
#[test]
|
||
fn resolve_info_depth_source_reads_depth_info() {
|
||
let mut rf = rf();
|
||
rf.write(reg::RB_COPY_CONTROL, 4); // depth
|
||
rf.write(reg::RB_DEPTH_INFO, (1u32 << 16) | 0x55); // kD24FS8, base=0x55
|
||
let info = ResolveInfo::from_register_file(&rf);
|
||
assert_eq!(info.source, ResolveSource::Depth);
|
||
assert_eq!(info.source_format, 1);
|
||
assert_eq!(info.source_base_tiles, 0x55);
|
||
assert!(!info.source_is_64bpp); // depth always 32bpp
|
||
}
|
||
|
||
// ---- Vertex fetch 0 rectangle tests -------------------------------
|
||
|
||
/// Helper: seed a triangle covering the rectangle `(x0, y0) → (x1, y1)`
|
||
/// into guest memory at `vb_addr` and program VF0 to read 6 dwords
|
||
/// from it with endian = k8in32 (the standard D3D-vertex-buffer case).
|
||
fn seed_vertex_fetch_0(
|
||
rf: &mut RegisterFile,
|
||
mem: &xenia_memory::GuestMemory,
|
||
vb_addr: u32,
|
||
x0: f32,
|
||
y0: f32,
|
||
x1: f32,
|
||
y1: f32,
|
||
) {
|
||
use xenia_memory::MemoryAccess;
|
||
// Three (x, y) float pairs covering the rect — exactly the D3D9
|
||
// resolve triangle layout Canary expects.
|
||
// (x0, y0), (x1, y0), (x0, y1)
|
||
let floats = [x0, y0, x1, y0, x0, y1];
|
||
for (i, f) in floats.iter().enumerate() {
|
||
// Write float as BE (PPC `stfs` semantics). `mem.write_u32`
|
||
// already stores BE bytes; pass the raw u32 bit pattern.
|
||
mem.write_u32(
|
||
vb_addr + i as u32 * 4,
|
||
f.to_bits(),
|
||
);
|
||
}
|
||
|
||
// VF0 dword 0: address (bits 31:2, in dwords) + type (bits 1:0 = 3).
|
||
let addr_dwords = vb_addr / 4;
|
||
let dword_0 = (addr_dwords << 2) | 3;
|
||
// VF0 dword 1: size (bits 25:2 = 6) + endian (bits 1:0 = 2 = k8in32).
|
||
let dword_1 = (6u32 << 2) | 2;
|
||
rf.write(0x4800, dword_0);
|
||
rf.write(0x4801, dword_1);
|
||
}
|
||
|
||
fn fresh_mem_for_vf0() -> xenia_memory::GuestMemory {
|
||
use xenia_memory::page_table::MemoryProtect;
|
||
let mut mem = xenia_memory::GuestMemory::new().expect("guest memory");
|
||
mem.alloc(
|
||
0x5000_0000,
|
||
0x1_0000,
|
||
MemoryProtect::READ | MemoryProtect::WRITE,
|
||
)
|
||
.expect("alloc");
|
||
mem
|
||
}
|
||
|
||
#[test]
|
||
fn vf0_rect_returns_none_when_no_vertex_buffer() {
|
||
let rf = rf();
|
||
let mem = fresh_mem_for_vf0();
|
||
assert!(vertex_fetch_0_rect(&rf, &mem).is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn vf0_rect_returns_none_for_wrong_size() {
|
||
let mut rf = rf();
|
||
let mem = fresh_mem_for_vf0();
|
||
// type=3 (kVertex), size=4 (wrong — should be 6), endian=2.
|
||
rf.write(0x4800, (0x5000_0000u32) | 3);
|
||
rf.write(0x4801, (4u32 << 2) | 2);
|
||
assert!(vertex_fetch_0_rect(&rf, &mem).is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn vf0_rect_derives_rectangle_from_three_vertices() {
|
||
let mut rf = rf();
|
||
let mut mem = fresh_mem_for_vf0();
|
||
// D3D9 pixel center: +0.5 half-pixel offset applied before Fixed16p8.
|
||
// Leave PA_SU_VTX_CNTL at 0 (kD3DZero).
|
||
// Triangle at (0, 0) → (100, 50) → vertex 2 = (0, 50).
|
||
seed_vertex_fetch_0(&mut rf, &mut mem, 0x5000_0000, 0.0, 0.0, 100.0, 50.0);
|
||
|
||
let (x0, y0, x1, y1) = vertex_fetch_0_rect(&rf, &mem).expect("VF0 present");
|
||
// (0 + 0.5) * 256 = 128. (128 + 127) >> 8 = 0. So x0/y0 = 0.
|
||
// (100 + 0.5) * 256 = 25728. (25728 + 127) >> 8 = 100.
|
||
// (50 + 0.5) * 256 = 12928. (12928 + 127) >> 8 = 50.
|
||
assert_eq!(x0, 0);
|
||
assert_eq!(y0, 0);
|
||
assert_eq!(x1, 100);
|
||
assert_eq!(y1, 50);
|
||
}
|
||
|
||
#[test]
|
||
fn from_register_file_and_memory_prefers_vf0_rect() {
|
||
let mut rf = rf();
|
||
let mut mem = fresh_mem_for_vf0();
|
||
// Without VF0: dest_pitch/height defaults produce (0, 0, 1280, 720).
|
||
rf.write(reg::RB_COPY_DEST_PITCH, (720u32 << 16) | 1280u32);
|
||
// With VF0 pointing at a 256×128 triangle, override to that.
|
||
seed_vertex_fetch_0(&mut rf, &mut mem, 0x5000_0000, 0.0, 0.0, 256.0, 128.0);
|
||
|
||
let info = ResolveInfo::from_register_file_and_memory(&rf, &mem);
|
||
assert_eq!(info.coords.x0, 0);
|
||
assert_eq!(info.coords.y0, 0);
|
||
assert_eq!(info.coords.width, 256);
|
||
assert_eq!(info.coords.height, 128);
|
||
}
|
||
|
||
/// If VF0 is absent, fall back to the scissor+dest default.
|
||
#[test]
|
||
fn from_register_file_and_memory_falls_back_without_vf0() {
|
||
let mut rf = rf();
|
||
let mem = fresh_mem_for_vf0();
|
||
rf.write(reg::RB_COPY_DEST_PITCH, (720u32 << 16) | 1280u32);
|
||
let info = ResolveInfo::from_register_file_and_memory(&rf, &mem);
|
||
assert_eq!(info.coords.width, 1280);
|
||
assert_eq!(info.coords.height, 720);
|
||
}
|
||
|
||
/// `resolve_rect_apply_scissor_and_align_8` with no scissor just
|
||
/// 8-aligns.
|
||
#[test]
|
||
fn scissor_helper_8_aligns_with_no_scissor() {
|
||
let rf = rf();
|
||
let (x0, y0, w, h) = resolve_rect_apply_scissor_and_align_8(&rf, 5, 5, 1001, 17);
|
||
assert_eq!(x0, 0);
|
||
assert_eq!(y0, 0);
|
||
// 1001 ceil-to-8 = 1008; 17 ceil-to-8 = 24.
|
||
assert_eq!(w, 1008);
|
||
assert_eq!(h, 24);
|
||
}
|
||
|
||
/// Negative bounding-box (VF0 can produce these) clamps to the scissor
|
||
/// top-left without going below zero.
|
||
#[test]
|
||
fn scissor_helper_clamps_negative_to_zero() {
|
||
let mut rf = rf();
|
||
// Small scissor at (0,0)..(128, 64).
|
||
rf.write(reg::PA_SC_WINDOW_SCISSOR_BR, (64u32 << 16) | 128u32);
|
||
let (x0, y0, w, h) = resolve_rect_apply_scissor_and_align_8(&rf, -50, -50, 80, 32);
|
||
assert_eq!(x0, 0);
|
||
assert_eq!(y0, 0);
|
||
// x1 clamped from 80 -> 80, ceil8 -> 80. y1 32 -> 32.
|
||
assert_eq!(w, 80);
|
||
assert_eq!(h, 32);
|
||
}
|
||
}
|