Files
xenia-rs/crates/xenia-gpu/src/draw_state.rs
MechaCat02 8723d6826b fix(gpu): GPUBUG-103/104/105 — fix 8 draw-state register addresses + index_size bit
Eight of the register-index constants in draw_state.rs::reg pointed at
completely unrelated registers because the canonical canary table
(register_table.inc) was misread when the module was first authored.
Re-validated each value against canary's lines 1232-1336.

| Register                  | Pre-fix | Canary | Was-actually  |
| ------------------------- | ------- | ------ | ------------- |
| VGT_DRAW_INITIATOR        | 0x2281  | 0x21FC | (junk)        |
| VGT_DMA_BASE              | 0x2282  | 0x21FA | (junk)        |
| VGT_DMA_SIZE              | 0x2283  | 0x21FB | (junk)        |
| PA_SC_WINDOW_SCISSOR_TL   | 0x200E  | 0x2081 | SCREEN_SCIS_TL|
| PA_SC_WINDOW_SCISSOR_BR   | 0x200F  | 0x2082 | SCREEN_SCIS_BR|
| RB_COLOR_INFO_1           | 0x2010  | 0x2003 | COHER_DEST_BASE_10|
| RB_COLOR_INFO_2           | 0x2011  | 0x2004 | COHER_DEST_BASE_11|
| RB_COLOR_INFO_3           | 0x2012  | 0x2005 | COHER_DEST_BASE_12|
| PA_SU_VTX_CNTL            | 0x2083  | 0x2302 | PA_SC_CLIPRECT_RULE|

Also corrected the `index_size` bit position in VGT_DRAW_INITIATOR
extraction: was bit 8 (which is `major_mode[0]`), should be bit 11 per
canary `registers.h:324` (`xenos::IndexFormat index_size : 1; // +11`).
The block comment in `extract()` was also wrong about the
intermediate field layout and has been refreshed.

Verification at -n 100M lockstep:
  swaps:                2 → 2     (unchanged)
  draws:                0 → 0     (still gated — see below)
  packets:              ~61M (within noise)
Tests: 149 (no count change; existing draw_state tests cover the
new constants implicitly via behavioral round-trip).

The audit predicted Phases C+D+E together would unlock `draws > 0`,
but the runtime plateau is multi-causal per the audit's own analysis
(`project_xenia_rs_audit_2026_05_02.md`). The likely remaining
blockers in -n 100M:
  * 4 parked-waiter worker threads (handles 0x1004, 0x100c, 0x15e4,
    0x42450b5c) — Phase F's XAM/spinlock fixes target this.
  * shader_blobs_live=0 after 100M — the game hasn't issued IM_LOAD
    yet because workers haven't loaded shader resources.
The register fixes here are still load-bearing for any draw that
DOES happen (every register read at 0x2281 was junk before this
commit) — landing them now is correct even if draws=0 persists until
Phase F unparks the resource-loader threads.

Closes GPUBUG-103, GPUBUG-104, GPUBUG-105 (P0).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 14:22:04 +02:00

1130 lines
44 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Extract draw state from the Xenos register file at `PM4_DRAW_INDX` time.
//!
//! This is the "what are we drawing?" snapshot: primitive type, vertex count,
//! index buffer (if any), viewport, scissor, blend, depth state, and enough
//! handles for a future translator / uber-shader to pull fetch constants +
//! shader blobs. Ground truth: `xenia-canary/src/xenia/gpu/draw_util.h` and
//! the PM4 handler at `pm4_command_processor_implement.h:1128-1151`.
//!
//! We only extract what the P3 uber-shader actually consumes; the rest is
//! reserved for later phases.
use crate::register_file::RegisterFile;
/// Primitive type (Xenos `PrimitiveType` enum from `xenos.h`).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PrimitiveType {
None,
PointList,
LineList,
LineStrip,
TriangleList,
TriangleFan,
TriangleStrip,
RectangleList,
QuadList,
Unknown(u8),
}
impl PrimitiveType {
pub fn from_bits(b: u32) -> Self {
match b & 0x3F {
0 => PrimitiveType::None,
1 => PrimitiveType::PointList,
2 => PrimitiveType::LineList,
3 => PrimitiveType::LineStrip,
4 => PrimitiveType::TriangleList,
5 => PrimitiveType::TriangleFan,
6 => PrimitiveType::TriangleStrip,
8 => PrimitiveType::RectangleList,
13 => PrimitiveType::QuadList,
other => PrimitiveType::Unknown(other as u8),
}
}
}
/// How the draw was issued per `VGT_DRAW_INITIATOR.source_select`:
/// 0=DMA, 1=Immediate (in-packet indices), 2=AutoIndex.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IndexSource {
/// Index buffer fetched from `VGT_DMA_BASE` / `VGT_DMA_SIZE`.
Dma {
base_address: u32,
size_dwords: u32,
index_size: IndexSize,
},
/// Indices follow the `DRAW_INDX_2` packet header inline.
Immediate { index_size: IndexSize },
/// No index buffer; generate `0..vertex_count - 1` on the host.
AutoIndex,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IndexSize {
/// 16-bit indices.
Sixteen,
/// 32-bit indices.
ThirtyTwo,
}
/// Snapshot of one draw call's state, sampled from the register file.
#[derive(Debug, Clone, Copy)]
pub struct DrawState {
pub primitive: PrimitiveType,
pub vertex_count: u32,
pub index_source: IndexSource,
pub viewport: Viewport,
pub scissor: Scissor,
/// RB_COLOR_INFO for each of the 4 possible color render targets; `None`
/// where the target is not bound.
pub color_info: [Option<ColorTargetInfo>; 4],
pub depth_info: Option<DepthTargetInfo>,
pub rb_modecontrol: u32,
pub rb_colorcontrol: u32,
pub rb_depthcontrol: u32,
/// P4: per-color-target blend state. Index matches `color_info`.
pub rb_blendcontrol: [u32; 4],
/// P4: stencil state.
pub rb_stencilrefmask: u32,
pub rb_stencilrefmask_bf: u32,
/// P4: pixel offset applied at rasterization.
pub pa_sc_window_offset: u32,
/// P4: resolve destination registers (`RB_COPY_*`). These are set by
/// the guest just before triggering a TILE_FLUSH event and describe
/// where an EDRAM→texture copy should land.
pub rb_copy_control: u32,
pub rb_copy_dest_base: u32,
pub rb_copy_dest_pitch: u32,
pub rb_copy_dest_info: u32,
/// Key of the VS blob that was active at draw time (from
/// `GpuSystem::active_vs_key`). `None` = no VS loaded yet; the draw is
/// meaningless and will be rejected by the dispatcher.
pub vs_blob_key: Option<u32>,
/// Key of the PS blob that was active at draw time.
pub ps_blob_key: Option<u32>,
}
#[derive(Debug, Clone, Copy, Default)]
pub struct Viewport {
pub scale_x: f32,
pub scale_y: f32,
pub scale_z: f32,
pub offset_x: f32,
pub offset_y: f32,
pub offset_z: f32,
}
#[derive(Debug, Clone, Copy, Default)]
pub struct Scissor {
pub tl_x: u16,
pub tl_y: u16,
pub br_x: u16,
pub br_y: u16,
}
#[derive(Debug, Clone, Copy)]
pub struct ColorTargetInfo {
/// EDRAM tile base for this color target (`RB_COLOR_INFO.base_tiles`).
pub base_tiles: u16,
/// Color format (`RB_COLOR_INFO.color_format`).
pub format: u8,
}
#[derive(Debug, Clone, Copy)]
pub struct DepthTargetInfo {
/// EDRAM tile base for depth/stencil.
pub base_tiles: u16,
/// 0=D24S8, 1=D24FS8 (per `xenos.h:404-408`).
pub format: u8,
}
/// Resolve source: either one of four color render targets or the depth RT.
/// Packed into `RB_COPY_CONTROL.copy_src_select` (bits [2:0]): 0..=3 pick
/// color0..3, 4 picks depth. Canary `registers.h:853`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ResolveSource {
Color(u8),
Depth,
}
/// Resolve rectangle in pixel coordinates at the destination resolution,
/// 8-pixel aligned per Canary's `kResolveAlignmentPixels = 8`. MSAA scaling
/// is kept separate — `sample_count_log2_x/y` tell the resolve how many
/// samples to step per destination pixel.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct ResolveCoordinates {
pub x0: u32,
pub y0: u32,
pub width: u32,
pub height: u32,
/// 1 iff 4x MSAA (samples laid out 2x wider than pixels).
pub sample_count_log2_x: u32,
/// 1 iff 2x+ MSAA (samples laid out 2x taller than pixels).
pub sample_count_log2_y: u32,
}
/// Decoded resolve state — describes how a `TILE_FLUSH` event should copy
/// EDRAM bytes to a guest-memory tiled texture. Canary equivalent:
/// `draw_util::ResolveInfo` at `draw_util.h:627`. Bit-field layout in
/// `RB_COPY_CONTROL / RB_COPY_DEST_INFO / RB_COPY_DEST_PITCH` comes from
/// `registers.h:853-897`.
#[derive(Debug, Clone, Copy)]
pub struct ResolveInfo {
/// Which source RT (0..=3=color, 4=depth). Raw register bits.
pub copy_src_select: u8,
/// Sample selector for MSAA sources. See `xenos::CopySampleSelect`.
pub copy_sample_select: u8,
/// Enable clear of the source render target after the copy.
pub color_clear_enable: bool,
pub depth_clear_enable: bool,
/// 0 = raw tile copy (same format), 1 = convert to `copy_dest_format`.
/// 2 = constantOne, 3 = null (no copy).
pub copy_command: u8,
/// Guest-memory destination address, already masked to the 29-bit
/// Xenon physical range (`& 0x1FFF_FFFF`).
pub dest_base: u32,
/// Destination pitch in pixels (0..=16383). Byte pitch = pitch * bpp
/// after the caller pitch-aligns to `kStoragePitchHeightAlignmentBlocks
/// = 32`.
pub dest_pitch_pixels: u32,
pub dest_height_pixels: u32,
/// Destination format (`xenos::ColorFormat`, 6 bits).
pub dest_format: u8,
/// Byte-swap mode applied before the write (`xenos::Endian128`, 0..=5).
pub dest_endian: u8,
/// Signed [-32, 31] exponent bias applied during conversion.
pub dest_exp_bias: i8,
/// Decoded resolve source (color0..3 or depth).
pub source: ResolveSource,
/// 8-pixel-aligned resolve rectangle.
pub coords: ResolveCoordinates,
/// Source format: `ColorRenderTargetFormat` when color,
/// `DepthRenderTargetFormat` when depth.
pub source_format: u8,
/// EDRAM tile origin of the source RT (from `RB_COLOR_INFO.color_base`
/// or `RB_DEPTH_INFO.depth_base`, 11-bit mod 2048).
pub source_base_tiles: u16,
/// `GetSurfacePitchTiles(surface_pitch, msaa, is_64bpp)` — how many
/// 80-sample-wide tiles make up one EDRAM row.
pub surface_pitch_tiles: u32,
/// MSAA mode from `RB_SURFACE_INFO`.
pub msaa: crate::render_target_cache::MsaaSamples,
/// True iff the source color format is 64bpp (doubles EDRAM pitch/base).
pub source_is_64bpp: bool,
/// `RB_COLOR_CLEAR` — constant written into EDRAM when
/// `color_clear_enable` is set.
pub color_clear_value: u32,
/// `RB_COLOR_CLEAR_LO` — second 32-bit lane for 64bpp clear.
pub color_clear_value_lo: u32,
/// `RB_DEPTH_CLEAR` — constant written into EDRAM depth tiles on
/// `depth_clear_enable`.
pub depth_clear_value: u32,
/// `RB_COPY_DEST_INFO.copy_dest_array` — 2D (false) vs 3D/stacked (true).
pub copy_dest_array: bool,
}
/// `GetSurfacePitchTiles(pitch_pixels, msaa, is_64bpp)` — ported from
/// `xenos.h:465-476`. Returns the number of 80-sample-wide EDRAM tiles
/// that make up one row of a surface with `pitch_pixels`-pixel pitch.
///
/// At 4x MSAA samples span twice the pixel width, so the sample pitch
/// doubles. 64bpp formats pack two EDRAM tiles per color value, so the
/// effective tile pitch doubles again.
#[inline]
pub fn surface_pitch_tiles(
pitch_pixels: u32,
msaa: crate::render_target_cache::MsaaSamples,
is_64bpp: bool,
) -> u32 {
use crate::render_target_cache::MsaaSamples;
const EDRAM_TILE_WIDTH_SAMPLES: u32 = 80;
let pitch_samples = pitch_pixels << u32::from(msaa == MsaaSamples::X4);
let pitch_tiles = pitch_samples.div_ceil(EDRAM_TILE_WIDTH_SAMPLES);
pitch_tiles << u32::from(is_64bpp)
}
/// Canary `ColorRenderTargetFormat` is 64bpp iff its numeric value is one
/// of {5, 7, 15} — i.e. `k_16_16_16_16`, `k_16_16_16_16_FLOAT`, or
/// `k_32_32_FLOAT`. `xenos.h:297-317` + the enum's `IsColorRenderTarget
/// Format64bpp` helper.
#[inline]
pub fn color_render_target_format_is_64bpp(fmt: u8) -> bool {
matches!(fmt, 5 | 7 | 15)
}
/// `kResolveAlignmentPixels` from Canary (`draw_util.cc:925` area).
pub const RESOLVE_ALIGNMENT_PIXELS: u32 = 8;
/// Clamp a raw resolve rectangle to the `PA_SC_WINDOW_SCISSOR_*` registers
/// and align to the 8-pixel grid. Caller passes `i32` because the VF0
/// derivation can produce negative bounding-box values; this helper clamps
/// them to the non-negative window defined by the scissor.
///
/// Returns `(x0, y0, width, height)` in pixels, all non-negative, all
/// 8-pixel-aligned, `width`/`height` already `>= 0`. Width/height of 0
/// signals "empty resolve; skip".
pub fn resolve_rect_apply_scissor_and_align_8(
rf: &RegisterFile,
x0_in: i32,
y0_in: i32,
x1_in: i32,
y1_in: i32,
) -> (u32, u32, u32, u32) {
let tl = rf.read(reg::PA_SC_WINDOW_SCISSOR_TL);
let br = rf.read(reg::PA_SC_WINDOW_SCISSOR_BR);
let tl_x = (tl & 0x3FFF) as i32;
let tl_y = ((tl >> 16) & 0x3FFF) as i32;
let br_x = (br & 0x3FFF) as i32;
let br_y = ((br >> 16) & 0x3FFF) as i32;
// Clamp only when the scissor is a non-degenerate window; otherwise
// leave the input rect alone (Canary's `kResolveAlignmentPixels` will
// still 8-align it below).
let (mut x0, mut y0, mut x1, mut y1) = (x0_in, y0_in, x1_in, y1_in);
if br_x > tl_x && br_y > tl_y {
let clamp = |v: i32, lo: i32, hi: i32| v.max(lo).min(hi);
x0 = clamp(x0, tl_x, br_x);
y0 = clamp(y0, tl_y, br_y);
x1 = clamp(x1, tl_x, br_x);
y1 = clamp(y1, tl_y, br_y);
}
if x1 < x0 {
x1 = x0;
}
if y1 < y0 {
y1 = y0;
}
// 8-pixel align. Floor top-left; ceil bottom-right.
let align_mask = (RESOLVE_ALIGNMENT_PIXELS as i32) - 1;
x0 &= !align_mask;
y0 &= !align_mask;
x1 = (x1 + align_mask) & !align_mask;
y1 = (y1 + align_mask) & !align_mask;
let x0u = x0.max(0) as u32;
let y0u = y0.max(0) as u32;
let x1u = x1.max(0) as u32;
let y1u = y1.max(0) as u32;
(
x0u,
y0u,
x1u.saturating_sub(x0u),
y1u.saturating_sub(y0u),
)
}
/// Parse vertex fetch constant 0 (Canary `xe_gpu_vertex_fetch_t`,
/// `xenos.h:1158-1172`) and derive the resolve bounding-box in pixel units.
/// Returns `None` when the fetch isn't the 6-float vertex buffer the
/// resolve shader expects (type != kVertex or size != 6).
///
/// This mirrors `draw_util.cc:950-1014` minus window-offset and half-pixel
/// nudging — the pitfalls there are (a) handling endian via `GpuSwap` and
/// (b) Fixed16p8 top-left rounding `(v + 127) >> 8`. Both are replicated.
///
/// The returned rect is in *pixel* coordinates, *pre-scissor-clamp* and
/// *pre-alignment*. Caller feeds it through
/// [`resolve_rect_apply_scissor_and_align_8`].
pub fn vertex_fetch_0_rect(
rf: &RegisterFile,
mem: &dyn xenia_memory::access::MemoryAccess,
) -> Option<(i32, i32, i32, i32)> {
const CONST_BASE_FETCH: u32 = 0x4800;
let dword_0 = rf.read(CONST_BASE_FETCH);
let dword_1 = rf.read(CONST_BASE_FETCH + 1);
// type:2 at bits [1:0]; kVertex = 3 per xenos.h:1147-1152.
let fetch_type = dword_0 & 0x3;
if fetch_type != 3 {
return None;
}
// size:24 at bits [25:2] of dword_1 — in dwords; expect 6 (3 × vec2).
let size = (dword_1 >> 2) & 0x00FF_FFFF;
if size != 6 {
return None;
}
// address:30 at bits [31:2] of dword_0 — in dwords.
let address_bytes = dword_0 & 0xFFFF_FFFC;
// endian:2 at bits [1:0] of dword_1 — xenos::Endian (kNone/k8in16/k8in32/k16in32).
let fetch_endian = (dword_1 & 0x3) as u8;
// Read 6 floats from guest memory. `mem.read_u32` stores BE bytes as a
// u32 value; to mirror Canary's "raw LE bytes → u32 → GpuSwap" we have
// to re-interpret the memory as LE (flipping what `read_u32` did).
let floats: [f32; 6] = std::array::from_fn(|i| {
let be_u32 = mem.read_u32(address_bytes.wrapping_add(i as u32 * 4));
// `be_u32` was composed from bytes `[b0,b1,b2,b3]` as
// `(b0<<24)|...|b3`. Canary reads those same bytes in host-LE,
// producing `(b3<<24)|...|b0`. That's `be_u32.swap_bytes()`.
let canary_le = be_u32.swap_bytes();
let swapped = gpu_swap_u32(canary_le, fetch_endian);
f32::from_bits(swapped)
});
// PA_SU_VTX_CNTL::pix_center: bit 0, 0 = kD3DZero (+0.5 half-pixel),
// 1 = kOpenGL (no offset). Pre-fix the constant at this site read
// 0x2083 (PA_SC_CLIPRECT_RULE), giving non-deterministic half-pixel
// offsets that broke 3D camera matrices. Canary `register_table.inc`
// line 1336 says PA_SU_VTX_CNTL is 0x2302. GPUBUG-105.
const PA_SU_VTX_CNTL: u32 = 0x2302;
let half_pixel_offset = if rf.read(PA_SU_VTX_CNTL) & 1 == 0 {
0.5f32
} else {
0.0f32
};
// Convert each to Fixed16p8 (multiply by 256, round).
let fixed: [i32; 6] = std::array::from_fn(|i| {
((floats[i] + half_pixel_offset) * 256.0).round() as i32
});
let x0 = fixed[0].min(fixed[2]).min(fixed[4]);
let y0 = fixed[1].min(fixed[3]).min(fixed[5]);
let x1 = fixed[0].max(fixed[2]).max(fixed[4]);
let y1 = fixed[1].max(fixed[3]).max(fixed[5]);
// Top-left rounding: `(v + 127) >> 8` for both corners.
let round = |v: i32| (v + 127) >> 8;
Some((round(x0), round(y0), round(x1), round(y1)))
}
/// Canary `GpuSwapInline` on a u32. Exposed here so the vertex-fetch path
/// can apply the same byte-order transform Canary's `GpuSwap<float>` applies
/// to vertex data. `xenos.h:1077-1114`.
#[inline]
fn gpu_swap_u32(value: u32, endian: u8) -> u32 {
match endian & 0x3 {
// kNone.
0 => value,
// k8in16: swap bytes within each 16-bit word.
1 => ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8),
// k8in32: full byte reversal.
2 => value.swap_bytes(),
// k16in32: swap 16-bit halves.
_ => value.rotate_left(16),
}
}
impl ResolveInfo {
/// Legacy entrypoint used when the caller already has a `DrawState`. It
/// fills only the narrow register bits that live in `DrawState` — the
/// wider coordinate / EDRAM fields require the full register file.
///
/// Kept for tests that construct resolve decoders from captured draw
/// states. `from_register_file` is the canonical path.
pub fn from_draw_state(ds: &DrawState) -> Self {
use crate::render_target_cache::MsaaSamples;
let c = ds.rb_copy_control;
let p = ds.rb_copy_dest_pitch;
let i = ds.rb_copy_dest_info;
// Sign-extend the 6-bit exp_bias from `copy_dest_info[21:16]`.
let exp_raw = (i >> 16) & 0x3F;
let exp_sign = ((exp_raw & 0x20) != 0) as i8;
let exp_bias = (exp_raw as i8) - (exp_sign * 64);
let src_sel = (c & 0x7) as u8;
let source = if src_sel >= 4 {
ResolveSource::Depth
} else {
ResolveSource::Color(src_sel)
};
Self {
copy_src_select: src_sel,
copy_sample_select: ((c >> 4) & 0x7) as u8,
color_clear_enable: ((c >> 8) & 1) != 0,
depth_clear_enable: ((c >> 9) & 1) != 0,
copy_command: ((c >> 20) & 0x3) as u8,
dest_base: ds.rb_copy_dest_base & 0x1FFF_FFFF,
dest_pitch_pixels: p & 0x3FFF,
dest_height_pixels: (p >> 16) & 0x3FFF,
dest_format: ((i >> 7) & 0x3F) as u8,
dest_endian: (i & 0x7) as u8,
dest_exp_bias: exp_bias,
source,
coords: ResolveCoordinates::default(),
source_format: 0,
source_base_tiles: 0,
surface_pitch_tiles: 0,
msaa: MsaaSamples::X1,
source_is_64bpp: false,
color_clear_value: 0,
color_clear_value_lo: 0,
depth_clear_value: 0,
copy_dest_array: ((i >> 3) & 1) != 0,
}
}
/// Canonical resolve decoder — reads live register values and derives the
/// full rectangle / EDRAM layout. Mirrors canary `draw_util.cc:926-1318`
/// `GetResolveInfo` with the following simplifications (all scoped in
/// the landing plan and will be expanded as needs arise):
///
/// * The rectangle is derived from the scissor window and
/// `RB_COPY_DEST_PITCH` rather than fetched from vertex fetch 0.
/// Sylpheed's splash uses a clear-resolve — there's no draw ahead
/// of it — so vertex-fetch-derived geometry is not available.
/// * `copy_sample_select` is kept as-is; sample averaging for 2x/4x
/// MSAA is not yet applied on the read side.
/// * `PA_SC_WINDOW_OFFSET` is not applied — not needed for Sylpheed
/// and canary only applies it when `PA_SU_SC_MODE_CNTL.vtx_window
/// _offset_enable` is set, which requires a live draw.
pub fn from_register_file(rf: &RegisterFile) -> Self {
use crate::render_target_cache::MsaaSamples;
let c = rf.read(reg::RB_COPY_CONTROL);
let i = rf.read(reg::RB_COPY_DEST_INFO);
let p = rf.read(reg::RB_COPY_DEST_PITCH);
let dest_base_raw = rf.read(reg::RB_COPY_DEST_BASE);
// Sign-extend 6-bit exp_bias from copy_dest_info[21:16].
let exp_raw = (i >> 16) & 0x3F;
let exp_sign = ((exp_raw & 0x20) != 0) as i8;
let exp_bias = (exp_raw as i8) - (exp_sign * 64);
let src_sel = (c & 0x7) as u8;
let source = if src_sel >= 4 {
ResolveSource::Depth
} else {
ResolveSource::Color(src_sel & 0x3)
};
let rb_surface_info = rf.read(reg::RB_SURFACE_INFO);
let surface_pitch_pixels = rb_surface_info & 0x3FFF;
let msaa = MsaaSamples::from_raw((rb_surface_info >> 16) & 0x3);
// Source format + base tiles depend on which RT we're reading.
let (source_format, source_base_tiles, source_is_64bpp) = match source {
ResolveSource::Color(idx) => {
let rb = match idx {
0 => rf.read(reg::RB_COLOR_INFO_0),
1 => rf.read(reg::RB_COLOR_INFO_1),
2 => rf.read(reg::RB_COLOR_INFO_2),
_ => rf.read(reg::RB_COLOR_INFO_3),
};
let fmt = ((rb >> 16) & 0xF) as u8;
let base = (rb & 0xFFF) as u16;
(fmt, base, color_render_target_format_is_64bpp(fmt))
}
ResolveSource::Depth => {
let rb = rf.read(reg::RB_DEPTH_INFO);
let fmt = ((rb >> 16) & 0x1) as u8;
let base = (rb & 0xFFF) as u16;
(fmt, base, false)
}
};
let pitch_tiles = surface_pitch_tiles(surface_pitch_pixels, msaa, source_is_64bpp);
// --- Rectangle derivation ---
// Default extent is (0, 0, dest_pitch, dest_height); subject to
// scissor clamp + 8-pixel alignment.
let dest_pitch = p & 0x3FFF;
let dest_height = (p >> 16) & 0x3FFF;
let coords_no_msaa = resolve_rect_apply_scissor_and_align_8(
rf,
0,
0,
dest_pitch as i32,
dest_height as i32,
);
let coords = ResolveCoordinates {
x0: coords_no_msaa.0,
y0: coords_no_msaa.1,
width: coords_no_msaa.2,
height: coords_no_msaa.3,
sample_count_log2_x: u32::from(msaa == MsaaSamples::X4),
sample_count_log2_y: u32::from(msaa != MsaaSamples::X1),
};
Self {
copy_src_select: src_sel,
copy_sample_select: ((c >> 4) & 0x7) as u8,
color_clear_enable: ((c >> 8) & 1) != 0,
depth_clear_enable: ((c >> 9) & 1) != 0,
copy_command: ((c >> 20) & 0x3) as u8,
dest_base: dest_base_raw & 0x1FFF_FFFF,
dest_pitch_pixels: dest_pitch,
dest_height_pixels: dest_height,
dest_format: ((i >> 7) & 0x3F) as u8,
dest_endian: (i & 0x7) as u8,
dest_exp_bias: exp_bias,
source,
coords,
source_format,
source_base_tiles,
surface_pitch_tiles: pitch_tiles,
msaa,
source_is_64bpp,
color_clear_value: rf.read(reg::RB_COLOR_CLEAR),
color_clear_value_lo: rf.read(reg::RB_COLOR_CLEAR_LO),
depth_clear_value: rf.read(reg::RB_DEPTH_CLEAR),
copy_dest_array: ((i >> 3) & 1) != 0,
}
}
/// Memory-aware variant: if vertex fetch 0 contains the D3D9-hack
/// "resolve rectangle" vertices (3 vec2 floats, Canary `draw_util.cc
/// :950-1014`), use its bounding box as the resolve extent. Falls back
/// to the scissor + `RB_COPY_DEST_PITCH/HEIGHT` rect when VF0 isn't a
/// 6-dword vertex buffer.
///
/// Used from the live TILE_FLUSH path; tests can stick with
/// `from_register_file` when they don't want to program VF0.
pub fn from_register_file_and_memory(
rf: &RegisterFile,
mem: &dyn xenia_memory::access::MemoryAccess,
) -> Self {
let mut info = Self::from_register_file(rf);
if let Some((x0, y0, x1, y1)) = vertex_fetch_0_rect(rf, mem) {
let (rx0, ry0, rw, rh) =
resolve_rect_apply_scissor_and_align_8(rf, x0, y0, x1, y1);
// Only override when the VF0 rect is non-empty — an empty VF0
// means the game hasn't set one up yet and we should keep the
// scissor+dest default.
if rw > 0 && rh > 0 {
info.coords.x0 = rx0;
info.coords.y0 = ry0;
info.coords.width = rw;
info.coords.height = rh;
}
}
info
}
}
/// Register indices from `xenia-canary/src/xenia/gpu/register_table.inc`.
/// Only what the extractor reads is named here.
///
/// GPUBUG-103/104/105: 8 of these were previously off-by-many — pointing at
/// completely different registers. Each has been re-validated against
/// canary's `register_table.inc`.
pub mod reg {
// VGT (vertex/geometry transform) — GPUBUG-103.
pub const VGT_DRAW_INITIATOR: u32 = 0x21FC; // was 0x2281 (junk).
pub const VGT_DMA_BASE: u32 = 0x21FA; // was 0x2282 (junk).
pub const VGT_DMA_SIZE: u32 = 0x21FB; // was 0x2283 (junk).
pub const PA_CL_VPORT_XSCALE: u32 = 0x210F;
pub const PA_CL_VPORT_XOFFSET: u32 = 0x2110;
pub const PA_CL_VPORT_YSCALE: u32 = 0x2111;
pub const PA_CL_VPORT_YOFFSET: u32 = 0x2112;
pub const PA_CL_VPORT_ZSCALE: u32 = 0x2113;
pub const PA_CL_VPORT_ZOFFSET: u32 = 0x2114;
// GPUBUG-104: pre-fix these read PA_SC_SCREEN_SCISSOR_{TL,BR} (the
// global screen scissor) instead of the per-window scissor.
pub const PA_SC_WINDOW_SCISSOR_TL: u32 = 0x2081; // was 0x200E (= SCREEN_SCISSOR_TL).
pub const PA_SC_WINDOW_SCISSOR_BR: u32 = 0x2082; // was 0x200F (= SCREEN_SCISSOR_BR).
pub const RB_MODECONTROL: u32 = 0x2208;
pub const RB_SURFACE_INFO: u32 = 0x2000;
pub const RB_COLOR_INFO_0: u32 = 0x2001;
// GPUBUG-105: per-RT color info; pre-fix indexed COHER_DEST_BASE_*
// instead. Canary names them RB_COLOR1/2/3_INFO.
pub const RB_COLOR_INFO_1: u32 = 0x2003; // was 0x2010 (= COHER_DEST_BASE_10).
pub const RB_COLOR_INFO_2: u32 = 0x2004; // was 0x2011.
pub const RB_COLOR_INFO_3: u32 = 0x2005; // was 0x2012.
pub const RB_DEPTH_INFO: u32 = 0x2002;
pub const RB_COLORCONTROL: u32 = 0x2202;
pub const RB_DEPTHCONTROL: u32 = 0x2200;
// P4 additions — per-RT blend + stencil + window offset + resolve dst.
pub const RB_BLENDCONTROL_0: u32 = 0x2201;
pub const RB_BLENDCONTROL_1: u32 = 0x2209;
pub const RB_BLENDCONTROL_2: u32 = 0x220A;
pub const RB_BLENDCONTROL_3: u32 = 0x220B;
pub const RB_STENCILREFMASK: u32 = 0x210D;
pub const RB_STENCILREFMASK_BF: u32 = 0x210C;
pub const PA_SC_WINDOW_OFFSET: u32 = 0x2080;
pub const RB_COPY_CONTROL: u32 = 0x2318;
pub const RB_COPY_DEST_BASE: u32 = 0x2319;
pub const RB_COPY_DEST_PITCH: u32 = 0x231A;
pub const RB_COPY_DEST_INFO: u32 = 0x231B;
pub const RB_DEPTH_CLEAR: u32 = 0x231D;
pub const RB_COLOR_CLEAR: u32 = 0x231E;
pub const RB_COLOR_CLEAR_LO: u32 = 0x231F;
}
/// Build a [`DrawState`] from a `VGT_DRAW_INITIATOR` value + the current
/// register file. `extra_dma_base`/`extra_dma_size` can override the
/// DMA fields if the caller has them from the PM4 packet payload (canary
/// passes them inline with `DRAW_INDX`).
pub fn extract(
register_file: &RegisterFile,
vgt_draw_initiator: u32,
dma_base: Option<u32>,
dma_size: Option<u32>,
) -> DrawState {
// `VGT_DRAW_INITIATOR` bit layout (per canary `registers.h:315-327`):
// [5:0] prim_type (PrimitiveType)
// [7:6] source_select (0=DMA, 1=immediate, 2=auto)
// [9:8] major_mode
// [10] _pad
// [11] index_size (0=16-bit, 1=32-bit) ← GPUBUG-103
// [12] not_eop
// [15:13] _pad
// [31:16] num_indices
let prim_bits = vgt_draw_initiator & 0x3F;
let source_select = (vgt_draw_initiator >> 6) & 0x3;
let index_size_bit = (vgt_draw_initiator >> 11) & 0x1;
let num_indices = (vgt_draw_initiator >> 16) & 0xFFFF;
let index_size = if index_size_bit == 0 {
IndexSize::Sixteen
} else {
IndexSize::ThirtyTwo
};
let index_source = match source_select {
0 => IndexSource::Dma {
base_address: dma_base.unwrap_or_else(|| register_file.read(reg::VGT_DMA_BASE)),
size_dwords: dma_size.unwrap_or_else(|| register_file.read(reg::VGT_DMA_SIZE)),
index_size,
},
1 => IndexSource::Immediate { index_size },
_ => IndexSource::AutoIndex,
};
let f = |r: u32| f32::from_bits(register_file.read(r));
let viewport = Viewport {
scale_x: f(reg::PA_CL_VPORT_XSCALE),
scale_y: f(reg::PA_CL_VPORT_YSCALE),
scale_z: f(reg::PA_CL_VPORT_ZSCALE),
offset_x: f(reg::PA_CL_VPORT_XOFFSET),
offset_y: f(reg::PA_CL_VPORT_YOFFSET),
offset_z: f(reg::PA_CL_VPORT_ZOFFSET),
};
let tl = register_file.read(reg::PA_SC_WINDOW_SCISSOR_TL);
let br = register_file.read(reg::PA_SC_WINDOW_SCISSOR_BR);
let scissor = Scissor {
tl_x: (tl & 0x7FFF) as u16,
tl_y: ((tl >> 16) & 0x7FFF) as u16,
br_x: (br & 0x7FFF) as u16,
br_y: ((br >> 16) & 0x7FFF) as u16,
};
let rb_modecontrol = register_file.read(reg::RB_MODECONTROL);
let color_mask = rb_modecontrol & 0xF;
let ci = |reg: u32, present: bool| {
if !present {
return None;
}
let raw = register_file.read(reg);
Some(ColorTargetInfo {
base_tiles: (raw & 0xFFF) as u16,
format: ((raw >> 16) & 0xF) as u8,
})
};
let color_info = [
ci(reg::RB_COLOR_INFO_0, (color_mask & 0x1) != 0),
ci(reg::RB_COLOR_INFO_1, (color_mask & 0x2) != 0),
ci(reg::RB_COLOR_INFO_2, (color_mask & 0x4) != 0),
ci(reg::RB_COLOR_INFO_3, (color_mask & 0x8) != 0),
];
let depth_raw = register_file.read(reg::RB_DEPTH_INFO);
// Depth-surface "present" = the RB_MODECONTROL depth-enable bit at bit 4.
let depth_present = (rb_modecontrol & 0x10) != 0;
let depth_info = if depth_present {
Some(DepthTargetInfo {
base_tiles: (depth_raw & 0xFFF) as u16,
format: ((depth_raw >> 16) & 0x1) as u8,
})
} else {
None
};
DrawState {
primitive: PrimitiveType::from_bits(prim_bits),
vertex_count: num_indices,
index_source,
viewport,
scissor,
color_info,
depth_info,
rb_modecontrol,
rb_colorcontrol: register_file.read(reg::RB_COLORCONTROL),
rb_depthcontrol: register_file.read(reg::RB_DEPTHCONTROL),
rb_blendcontrol: [
register_file.read(reg::RB_BLENDCONTROL_0),
register_file.read(reg::RB_BLENDCONTROL_1),
register_file.read(reg::RB_BLENDCONTROL_2),
register_file.read(reg::RB_BLENDCONTROL_3),
],
rb_stencilrefmask: register_file.read(reg::RB_STENCILREFMASK),
rb_stencilrefmask_bf: register_file.read(reg::RB_STENCILREFMASK_BF),
pa_sc_window_offset: register_file.read(reg::PA_SC_WINDOW_OFFSET),
rb_copy_control: register_file.read(reg::RB_COPY_CONTROL),
rb_copy_dest_base: register_file.read(reg::RB_COPY_DEST_BASE),
rb_copy_dest_pitch: register_file.read(reg::RB_COPY_DEST_PITCH),
rb_copy_dest_info: register_file.read(reg::RB_COPY_DEST_INFO),
// P3b M1: the kernel-side caller is expected to populate these
// via `DrawState { ..extract(...), vs_blob_key, ps_blob_key }` so
// the pure-register extraction stays decoupled from `GpuSystem`
// state. Default to None so a bare `extract()` stays valid for
// unit tests.
vs_blob_key: None,
ps_blob_key: None,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn rf() -> RegisterFile {
RegisterFile::new()
}
#[test]
fn extract_basic_triangle_list_no_rt() {
let rf = rf();
// prim_type=4 (TriangleList), source=2 (auto), num_indices=6
let vgt = (6u32 << 16) | (2 << 6) | 4;
let ds = extract(&rf, vgt, None, None);
assert_eq!(ds.primitive, PrimitiveType::TriangleList);
assert_eq!(ds.vertex_count, 6);
assert!(matches!(ds.index_source, IndexSource::AutoIndex));
assert!(ds.color_info.iter().all(|c| c.is_none()));
assert!(ds.depth_info.is_none());
}
#[test]
fn extract_dma_indices_uses_override() {
let rf = rf();
let vgt = (3u32 << 16) | (0 << 6) | 4; // prim=TriList, source=DMA
let ds = extract(&rf, vgt, Some(0xDEAD_0000), Some(6));
match ds.index_source {
IndexSource::Dma {
base_address,
size_dwords,
index_size,
} => {
assert_eq!(base_address, 0xDEAD_0000);
assert_eq!(size_dwords, 6);
assert_eq!(index_size, IndexSize::Sixteen);
}
other => panic!("expected Dma, got {other:?}"),
}
}
#[test]
fn color_and_depth_enabled_bits_are_honored() {
let mut rf = rf();
// rb_modecontrol: color0 + depth enabled (bit0 + bit4)
rf.write(reg::RB_MODECONTROL, 0x11);
rf.write(reg::RB_COLOR_INFO_0, (2 << 16) | 0x64); // format=2, tile=0x64
rf.write(reg::RB_DEPTH_INFO, (1 << 16) | 0x32);
let ds = extract(&rf, 4, None, None);
let c = ds.color_info[0].unwrap();
assert_eq!(c.format, 2);
assert_eq!(c.base_tiles, 0x64);
let d = ds.depth_info.unwrap();
assert_eq!(d.format, 1);
assert_eq!(d.base_tiles, 0x32);
}
/// `RB_COPY_DEST_BASE` is a raw 32-bit register, but a Xenon physical
/// address is 29-bit (`& 0x1FFF_FFFF`). `ResolveInfo::from_register_file`
/// must mask before writes to prevent out-of-range memory accesses.
#[test]
fn resolve_info_masks_dest_base_to_physical() {
let mut rf = rf();
rf.write(reg::RB_COPY_DEST_BASE, 0xDEAD_BEEF);
let info = ResolveInfo::from_register_file(&rf);
assert_eq!(info.dest_base, 0x1EAD_BEEF);
}
/// Scissor ∩ (0, 0, dest_pitch, dest_height), then 8-pixel-aligned per
/// Canary `kResolveAlignmentPixels`. Verify that the scissor actually
/// tightens the rect (not just degenerates it).
#[test]
fn resolve_info_derives_8px_aligned_rect_from_scissor_and_dest_pitch() {
let mut rf = rf();
// Dest pitch/height 1280×720; scissor (5, 5) -> (1000, 717).
rf.write(reg::RB_COPY_DEST_PITCH, (720u32 << 16) | 1280u32);
rf.write(reg::PA_SC_WINDOW_SCISSOR_TL, (5u32 << 16) | 5u32);
rf.write(reg::PA_SC_WINDOW_SCISSOR_BR, (717u32 << 16) | 1000u32);
let info = ResolveInfo::from_register_file(&rf);
// x0 floors to 0 (was 5 -> &!7 = 0), y0 same.
// x1 = min(1280, 1000) = 1000; ceil-to-8 = 1000. y1 = min(720, 717) = 717, ceil = 720.
assert_eq!(info.coords.x0, 0);
assert_eq!(info.coords.y0, 0);
assert_eq!(info.coords.width, 1000);
assert_eq!(info.coords.height, 720);
}
/// Non-degenerate scissor outside `dest_pitch/height` clamps to the
/// destination extent.
#[test]
fn resolve_info_scissor_cannot_widen_past_dest() {
let mut rf = rf();
rf.write(reg::RB_COPY_DEST_PITCH, (16u32 << 16) | 16u32);
rf.write(reg::PA_SC_WINDOW_SCISSOR_BR, (1000u32 << 16) | 1000u32);
let info = ResolveInfo::from_register_file(&rf);
assert_eq!(info.coords.width, 16);
assert_eq!(info.coords.height, 16);
}
/// Source decoding: `copy_src_select >= 4` → depth; otherwise Color(idx).
#[test]
fn resolve_info_decodes_source_select() {
let mut rf = rf();
rf.write(reg::RB_COPY_CONTROL, 2); // src_select = 2 (color2)
let info = ResolveInfo::from_register_file(&rf);
assert_eq!(info.source, ResolveSource::Color(2));
assert_eq!(info.copy_src_select, 2);
rf.write(reg::RB_COPY_CONTROL, 4); // src_select = 4 -> depth
let info = ResolveInfo::from_register_file(&rf);
assert_eq!(info.source, ResolveSource::Depth);
}
/// `copy_dest_info` fields: endian (bits 2:0), format (bits 12:7),
/// exp_bias (bits 21:16, signed 6-bit), array (bit 3).
#[test]
fn resolve_info_decodes_copy_dest_info_fields() {
let mut rf = rf();
// endian=2 (k8in32), format=6 (k_8_8_8_8), exp_bias=-1 (0x3F), array=1
let val = 2u32 | (1u32 << 3) | (6u32 << 7) | (0x3Fu32 << 16);
rf.write(reg::RB_COPY_DEST_INFO, val);
let info = ResolveInfo::from_register_file(&rf);
assert_eq!(info.dest_endian, 2);
assert_eq!(info.dest_format, 6);
assert_eq!(info.dest_exp_bias, -1);
assert!(info.copy_dest_array);
}
/// Positive and negative exp_bias round-trip through the 6-bit
/// sign-extension.
#[test]
fn resolve_info_exp_bias_sign_extends() {
let mut rf = rf();
rf.write(reg::RB_COPY_DEST_INFO, 1u32 << 16); // exp_bias = +1
assert_eq!(ResolveInfo::from_register_file(&rf).dest_exp_bias, 1);
rf.write(reg::RB_COPY_DEST_INFO, 0x20u32 << 16); // exp_bias = -32
assert_eq!(ResolveInfo::from_register_file(&rf).dest_exp_bias, -32);
rf.write(reg::RB_COPY_DEST_INFO, 0x1Fu32 << 16); // exp_bias = +31
assert_eq!(ResolveInfo::from_register_file(&rf).dest_exp_bias, 31);
}
/// `RB_SURFACE_INFO`: surface_pitch (bits 13:0) and msaa_samples (bits 17:16)
/// feed `surface_pitch_tiles`. 1280 px divides by 80 exactly → 16 tiles
/// at 1x MSAA / 32bpp; 4x MSAA doubles the sample pitch.
#[test]
fn resolve_info_computes_surface_pitch_tiles() {
let mut rf = rf();
rf.write(reg::RB_COPY_CONTROL, 0); // color0
rf.write(reg::RB_COLOR_INFO_0, 0u32 << 16); // k_8_8_8_8 -> 32bpp
rf.write(reg::RB_SURFACE_INFO, 1280); // msaa=1x, pitch=1280
let info = ResolveInfo::from_register_file(&rf);
assert_eq!(info.surface_pitch_tiles, 16);
assert!(!info.source_is_64bpp);
// 4x MSAA widens the sample pitch by 2x.
rf.write(reg::RB_SURFACE_INFO, 1280 | (2u32 << 16));
let info = ResolveInfo::from_register_file(&rf);
assert_eq!(info.surface_pitch_tiles, 32);
// Non-aligned pitch rounds up.
rf.write(reg::RB_SURFACE_INFO, 1281);
let info = ResolveInfo::from_register_file(&rf);
assert_eq!(info.surface_pitch_tiles, 17);
}
/// `color_render_target_format_is_64bpp` matches the 64bpp enum values
/// in `xenos::ColorRenderTargetFormat`: k_16_16_16_16 (5),
/// k_16_16_16_16_FLOAT (7), k_32_32_FLOAT (15).
#[test]
fn color_format_64bpp_table_is_correct() {
assert!(!color_render_target_format_is_64bpp(0));
assert!(!color_render_target_format_is_64bpp(4));
assert!(color_render_target_format_is_64bpp(5));
assert!(!color_render_target_format_is_64bpp(6));
assert!(color_render_target_format_is_64bpp(7));
assert!(!color_render_target_format_is_64bpp(14));
assert!(color_render_target_format_is_64bpp(15));
}
/// `surface_pitch_tiles` helper: exact arithmetic including the 64bpp
/// doubling. `xenos.h:465-476`.
#[test]
fn surface_pitch_tiles_matches_canary_helper() {
use crate::render_target_cache::MsaaSamples;
// 80 px, 1x, 32bpp -> 1 tile exactly.
assert_eq!(surface_pitch_tiles(80, MsaaSamples::X1, false), 1);
// 81 px, 1x, 32bpp -> 2 tiles (round up).
assert_eq!(surface_pitch_tiles(81, MsaaSamples::X1, false), 2);
// 80 px, 1x, 64bpp -> 2 tiles (64bpp doubles).
assert_eq!(surface_pitch_tiles(80, MsaaSamples::X1, true), 2);
// 80 px, 2x, 32bpp -> 1 tile (2x MSAA doesn't widen X).
assert_eq!(surface_pitch_tiles(80, MsaaSamples::X2, false), 1);
// 80 px, 4x, 32bpp -> 2 tiles (4x MSAA widens X 2x).
assert_eq!(surface_pitch_tiles(80, MsaaSamples::X4, false), 2);
// 80 px, 4x, 64bpp -> 4 tiles.
assert_eq!(surface_pitch_tiles(80, MsaaSamples::X4, true), 4);
}
/// The color-source branch reads from `RB_COLOR_INFO_<idx>` based on
/// `copy_src_select`. Verify that index-3 color targets are addressed.
#[test]
fn resolve_info_color_source_selects_correct_color_info() {
let mut rf = rf();
rf.write(reg::RB_COPY_CONTROL, 3); // color3
rf.write(reg::RB_COLOR_INFO_3, (5u32 << 16) | 0x123); // k_16_16_16_16, base=0x123
let info = ResolveInfo::from_register_file(&rf);
assert_eq!(info.source, ResolveSource::Color(3));
assert_eq!(info.source_format, 5);
assert_eq!(info.source_base_tiles, 0x123);
assert!(info.source_is_64bpp);
}
/// Depth-source branch reads from `RB_DEPTH_INFO` and parses its
/// 1-bit format.
#[test]
fn resolve_info_depth_source_reads_depth_info() {
let mut rf = rf();
rf.write(reg::RB_COPY_CONTROL, 4); // depth
rf.write(reg::RB_DEPTH_INFO, (1u32 << 16) | 0x55); // kD24FS8, base=0x55
let info = ResolveInfo::from_register_file(&rf);
assert_eq!(info.source, ResolveSource::Depth);
assert_eq!(info.source_format, 1);
assert_eq!(info.source_base_tiles, 0x55);
assert!(!info.source_is_64bpp); // depth always 32bpp
}
// ---- Vertex fetch 0 rectangle tests -------------------------------
/// Helper: seed a triangle covering the rectangle `(x0, y0) → (x1, y1)`
/// into guest memory at `vb_addr` and program VF0 to read 6 dwords
/// from it with endian = k8in32 (the standard D3D-vertex-buffer case).
fn seed_vertex_fetch_0(
rf: &mut RegisterFile,
mem: &xenia_memory::GuestMemory,
vb_addr: u32,
x0: f32,
y0: f32,
x1: f32,
y1: f32,
) {
use xenia_memory::MemoryAccess;
// Three (x, y) float pairs covering the rect — exactly the D3D9
// resolve triangle layout Canary expects.
// (x0, y0), (x1, y0), (x0, y1)
let floats = [x0, y0, x1, y0, x0, y1];
for (i, f) in floats.iter().enumerate() {
// Write float as BE (PPC `stfs` semantics). `mem.write_u32`
// already stores BE bytes; pass the raw u32 bit pattern.
mem.write_u32(
vb_addr + i as u32 * 4,
f.to_bits(),
);
}
// VF0 dword 0: address (bits 31:2, in dwords) + type (bits 1:0 = 3).
let addr_dwords = vb_addr / 4;
let dword_0 = (addr_dwords << 2) | 3;
// VF0 dword 1: size (bits 25:2 = 6) + endian (bits 1:0 = 2 = k8in32).
let dword_1 = (6u32 << 2) | 2;
rf.write(0x4800, dword_0);
rf.write(0x4801, dword_1);
}
fn fresh_mem_for_vf0() -> xenia_memory::GuestMemory {
use xenia_memory::page_table::MemoryProtect;
let mut mem = xenia_memory::GuestMemory::new().expect("guest memory");
mem.alloc(
0x5000_0000,
0x1_0000,
MemoryProtect::READ | MemoryProtect::WRITE,
)
.expect("alloc");
mem
}
#[test]
fn vf0_rect_returns_none_when_no_vertex_buffer() {
let rf = rf();
let mem = fresh_mem_for_vf0();
assert!(vertex_fetch_0_rect(&rf, &mem).is_none());
}
#[test]
fn vf0_rect_returns_none_for_wrong_size() {
let mut rf = rf();
let mem = fresh_mem_for_vf0();
// type=3 (kVertex), size=4 (wrong — should be 6), endian=2.
rf.write(0x4800, (0x5000_0000u32) | 3);
rf.write(0x4801, (4u32 << 2) | 2);
assert!(vertex_fetch_0_rect(&rf, &mem).is_none());
}
#[test]
fn vf0_rect_derives_rectangle_from_three_vertices() {
let mut rf = rf();
let mut mem = fresh_mem_for_vf0();
// D3D9 pixel center: +0.5 half-pixel offset applied before Fixed16p8.
// Leave PA_SU_VTX_CNTL at 0 (kD3DZero).
// Triangle at (0, 0) → (100, 50) → vertex 2 = (0, 50).
seed_vertex_fetch_0(&mut rf, &mut mem, 0x5000_0000, 0.0, 0.0, 100.0, 50.0);
let (x0, y0, x1, y1) = vertex_fetch_0_rect(&rf, &mem).expect("VF0 present");
// (0 + 0.5) * 256 = 128. (128 + 127) >> 8 = 0. So x0/y0 = 0.
// (100 + 0.5) * 256 = 25728. (25728 + 127) >> 8 = 100.
// (50 + 0.5) * 256 = 12928. (12928 + 127) >> 8 = 50.
assert_eq!(x0, 0);
assert_eq!(y0, 0);
assert_eq!(x1, 100);
assert_eq!(y1, 50);
}
#[test]
fn from_register_file_and_memory_prefers_vf0_rect() {
let mut rf = rf();
let mut mem = fresh_mem_for_vf0();
// Without VF0: dest_pitch/height defaults produce (0, 0, 1280, 720).
rf.write(reg::RB_COPY_DEST_PITCH, (720u32 << 16) | 1280u32);
// With VF0 pointing at a 256×128 triangle, override to that.
seed_vertex_fetch_0(&mut rf, &mut mem, 0x5000_0000, 0.0, 0.0, 256.0, 128.0);
let info = ResolveInfo::from_register_file_and_memory(&rf, &mem);
assert_eq!(info.coords.x0, 0);
assert_eq!(info.coords.y0, 0);
assert_eq!(info.coords.width, 256);
assert_eq!(info.coords.height, 128);
}
/// If VF0 is absent, fall back to the scissor+dest default.
#[test]
fn from_register_file_and_memory_falls_back_without_vf0() {
let mut rf = rf();
let mem = fresh_mem_for_vf0();
rf.write(reg::RB_COPY_DEST_PITCH, (720u32 << 16) | 1280u32);
let info = ResolveInfo::from_register_file_and_memory(&rf, &mem);
assert_eq!(info.coords.width, 1280);
assert_eq!(info.coords.height, 720);
}
/// `resolve_rect_apply_scissor_and_align_8` with no scissor just
/// 8-aligns.
#[test]
fn scissor_helper_8_aligns_with_no_scissor() {
let rf = rf();
let (x0, y0, w, h) = resolve_rect_apply_scissor_and_align_8(&rf, 5, 5, 1001, 17);
assert_eq!(x0, 0);
assert_eq!(y0, 0);
// 1001 ceil-to-8 = 1008; 17 ceil-to-8 = 24.
assert_eq!(w, 1008);
assert_eq!(h, 24);
}
/// Negative bounding-box (VF0 can produce these) clamps to the scissor
/// top-left without going below zero.
#[test]
fn scissor_helper_clamps_negative_to_zero() {
let mut rf = rf();
// Small scissor at (0,0)..(128, 64).
rf.write(reg::PA_SC_WINDOW_SCISSOR_BR, (64u32 << 16) | 128u32);
let (x0, y0, w, h) = resolve_rect_apply_scissor_and_align_8(&rf, -50, -50, 80, 32);
assert_eq!(x0, 0);
assert_eq!(y0, 0);
// x1 clamped from 80 -> 80, ceil8 -> 80. y1 32 -> 32.
assert_eq!(w, 80);
assert_eq!(h, 32);
}
}