fix(gpu): GPUBUG-103/104/105 — fix 8 draw-state register addresses + index_size bit

Eight of the register-index constants in draw_state.rs::reg pointed at
completely unrelated registers because the canonical canary table
(register_table.inc) was misread when the module was first authored.
Re-validated each value against canary's lines 1232-1336.

| Register                  | Pre-fix | Canary | Was-actually  |
| ------------------------- | ------- | ------ | ------------- |
| VGT_DRAW_INITIATOR        | 0x2281  | 0x21FC | (junk)        |
| VGT_DMA_BASE              | 0x2282  | 0x21FA | (junk)        |
| VGT_DMA_SIZE              | 0x2283  | 0x21FB | (junk)        |
| PA_SC_WINDOW_SCISSOR_TL   | 0x200E  | 0x2081 | SCREEN_SCIS_TL|
| PA_SC_WINDOW_SCISSOR_BR   | 0x200F  | 0x2082 | SCREEN_SCIS_BR|
| RB_COLOR_INFO_1           | 0x2010  | 0x2003 | COHER_DEST_BASE_10|
| RB_COLOR_INFO_2           | 0x2011  | 0x2004 | COHER_DEST_BASE_11|
| RB_COLOR_INFO_3           | 0x2012  | 0x2005 | COHER_DEST_BASE_12|
| PA_SU_VTX_CNTL            | 0x2083  | 0x2302 | PA_SC_CLIPRECT_RULE|

Also corrected the `index_size` bit position in VGT_DRAW_INITIATOR
extraction: was bit 8 (which is `major_mode[0]`), should be bit 11 per
canary `registers.h:324` (`xenos::IndexFormat index_size : 1; // +11`).
The block comment in `extract()` was also wrong about the
intermediate field layout and has been refreshed.

Verification at -n 100M lockstep:
  swaps:                2 → 2     (unchanged)
  draws:                0 → 0     (still gated — see below)
  packets:              ~61M (within noise)
Tests: 149 (no count change; existing draw_state tests cover the
new constants implicitly via behavioral round-trip).

The audit predicted Phases C+D+E together would unlock `draws > 0`,
but the runtime plateau is multi-causal per the audit's own analysis
(`project_xenia_rs_audit_2026_05_02.md`). The likely remaining
blockers in -n 100M:
  * 4 parked-waiter worker threads (handles 0x1004, 0x100c, 0x15e4,
    0x42450b5c) — Phase F's XAM/spinlock fixes target this.
  * shader_blobs_live=0 after 100M — the game hasn't issued IM_LOAD
    yet because workers haven't loaded shader resources.
The register fixes here are still load-bearing for any draw that
DOES happen (every register read at 0x2281 was junk before this
commit) — landing them now is correct even if draws=0 persists until
Phase F unparks the resource-loader threads.

Closes GPUBUG-103, GPUBUG-104, GPUBUG-105 (P0).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-03 14:22:04 +02:00
parent a07784349d
commit 8723d6826b

View File

@@ -362,9 +362,12 @@ pub fn vertex_fetch_0_rect(
f32::from_bits(swapped) f32::from_bits(swapped)
}); });
// PA_SU_VTX_CNTL::pix_center: bit 0, 0 = kD3DZero (+0.5 half-pixel), 1 = kOpenGL (no offset). // PA_SU_VTX_CNTL::pix_center: bit 0, 0 = kD3DZero (+0.5 half-pixel),
// Register index 0x2083 per register_table.inc (PA_SU_VTX_CNTL). // 1 = kOpenGL (no offset). Pre-fix the constant at this site read
const PA_SU_VTX_CNTL: u32 = 0x2083; // 0x2083 (PA_SC_CLIPRECT_RULE), giving non-deterministic half-pixel
// offsets that broke 3D camera matrices. Canary `register_table.inc`
// line 1336 says PA_SU_VTX_CNTL is 0x2302. GPUBUG-105.
const PA_SU_VTX_CNTL: u32 = 0x2302;
let half_pixel_offset = if rf.read(PA_SU_VTX_CNTL) & 1 == 0 { let half_pixel_offset = if rf.read(PA_SU_VTX_CNTL) & 1 == 0 {
0.5f32 0.5f32
} else { } else {
@@ -588,26 +591,35 @@ impl ResolveInfo {
} }
} }
/// Register indices from `xenia-canary/src/xenia/gpu/registers.h`. Only what /// Register indices from `xenia-canary/src/xenia/gpu/register_table.inc`.
/// the extractor reads is named here. /// Only what the extractor reads is named here.
///
/// GPUBUG-103/104/105: 8 of these were previously off-by-many — pointing at
/// completely different registers. Each has been re-validated against
/// canary's `register_table.inc`.
pub mod reg { pub mod reg {
pub const VGT_DRAW_INITIATOR: u32 = 0x2281; // VGT (vertex/geometry transform) — GPUBUG-103.
pub const VGT_DMA_BASE: u32 = 0x2282; pub const VGT_DRAW_INITIATOR: u32 = 0x21FC; // was 0x2281 (junk).
pub const VGT_DMA_SIZE: u32 = 0x2283; pub const VGT_DMA_BASE: u32 = 0x21FA; // was 0x2282 (junk).
pub const VGT_DMA_SIZE: u32 = 0x21FB; // was 0x2283 (junk).
pub const PA_CL_VPORT_XSCALE: u32 = 0x210F; pub const PA_CL_VPORT_XSCALE: u32 = 0x210F;
pub const PA_CL_VPORT_XOFFSET: u32 = 0x2110; pub const PA_CL_VPORT_XOFFSET: u32 = 0x2110;
pub const PA_CL_VPORT_YSCALE: u32 = 0x2111; pub const PA_CL_VPORT_YSCALE: u32 = 0x2111;
pub const PA_CL_VPORT_YOFFSET: u32 = 0x2112; pub const PA_CL_VPORT_YOFFSET: u32 = 0x2112;
pub const PA_CL_VPORT_ZSCALE: u32 = 0x2113; pub const PA_CL_VPORT_ZSCALE: u32 = 0x2113;
pub const PA_CL_VPORT_ZOFFSET: u32 = 0x2114; pub const PA_CL_VPORT_ZOFFSET: u32 = 0x2114;
pub const PA_SC_WINDOW_SCISSOR_TL: u32 = 0x200E; // GPUBUG-104: pre-fix these read PA_SC_SCREEN_SCISSOR_{TL,BR} (the
pub const PA_SC_WINDOW_SCISSOR_BR: u32 = 0x200F; // global screen scissor) instead of the per-window scissor.
pub const PA_SC_WINDOW_SCISSOR_TL: u32 = 0x2081; // was 0x200E (= SCREEN_SCISSOR_TL).
pub const PA_SC_WINDOW_SCISSOR_BR: u32 = 0x2082; // was 0x200F (= SCREEN_SCISSOR_BR).
pub const RB_MODECONTROL: u32 = 0x2208; pub const RB_MODECONTROL: u32 = 0x2208;
pub const RB_SURFACE_INFO: u32 = 0x2000; pub const RB_SURFACE_INFO: u32 = 0x2000;
pub const RB_COLOR_INFO_0: u32 = 0x2001; pub const RB_COLOR_INFO_0: u32 = 0x2001;
pub const RB_COLOR_INFO_1: u32 = 0x2010; // GPUBUG-105: per-RT color info; pre-fix indexed COHER_DEST_BASE_*
pub const RB_COLOR_INFO_2: u32 = 0x2011; // instead. Canary names them RB_COLOR1/2/3_INFO.
pub const RB_COLOR_INFO_3: u32 = 0x2012; pub const RB_COLOR_INFO_1: u32 = 0x2003; // was 0x2010 (= COHER_DEST_BASE_10).
pub const RB_COLOR_INFO_2: u32 = 0x2004; // was 0x2011.
pub const RB_COLOR_INFO_3: u32 = 0x2005; // was 0x2012.
pub const RB_DEPTH_INFO: u32 = 0x2002; pub const RB_DEPTH_INFO: u32 = 0x2002;
pub const RB_COLORCONTROL: u32 = 0x2202; pub const RB_COLORCONTROL: u32 = 0x2202;
pub const RB_DEPTHCONTROL: u32 = 0x2200; pub const RB_DEPTHCONTROL: u32 = 0x2200;
@@ -638,14 +650,18 @@ pub fn extract(
dma_base: Option<u32>, dma_base: Option<u32>,
dma_size: Option<u32>, dma_size: Option<u32>,
) -> DrawState { ) -> DrawState {
// `VGT_DRAW_INITIATOR` bit layout (per canary): // `VGT_DRAW_INITIATOR` bit layout (per canary `registers.h:315-327`):
// [5:0] prim_type // [5:0] prim_type (PrimitiveType)
// [7:6] source_select (0=DMA, 1=immediate, 2=auto) // [7:6] source_select (0=DMA, 1=immediate, 2=auto)
// [8] index_size (0=16-bit, 1=32-bit) // [9:8] major_mode
// [10] _pad
// [11] index_size (0=16-bit, 1=32-bit) ← GPUBUG-103
// [12] not_eop
// [15:13] _pad
// [31:16] num_indices // [31:16] num_indices
let prim_bits = vgt_draw_initiator & 0x3F; let prim_bits = vgt_draw_initiator & 0x3F;
let source_select = (vgt_draw_initiator >> 6) & 0x3; let source_select = (vgt_draw_initiator >> 6) & 0x3;
let index_size_bit = (vgt_draw_initiator >> 8) & 0x1; let index_size_bit = (vgt_draw_initiator >> 11) & 0x1;
let num_indices = (vgt_draw_initiator >> 16) & 0xFFFF; let num_indices = (vgt_draw_initiator >> 16) & 0xFFFF;
let index_size = if index_size_bit == 0 { let index_size = if index_size_bit == 0 {
IndexSize::Sixteen IndexSize::Sixteen