fix(gpu): GPUBUG-103/104/105 — fix 8 draw-state register addresses + index_size bit
Eight of the register-index constants in draw_state.rs::reg pointed at
completely unrelated registers because the canonical canary table
(register_table.inc) was misread when the module was first authored.
Re-validated each value against canary's lines 1232-1336.
| Register | Pre-fix | Canary | Was-actually |
| ------------------------- | ------- | ------ | ------------- |
| VGT_DRAW_INITIATOR | 0x2281 | 0x21FC | (junk) |
| VGT_DMA_BASE | 0x2282 | 0x21FA | (junk) |
| VGT_DMA_SIZE | 0x2283 | 0x21FB | (junk) |
| PA_SC_WINDOW_SCISSOR_TL | 0x200E | 0x2081 | SCREEN_SCIS_TL|
| PA_SC_WINDOW_SCISSOR_BR | 0x200F | 0x2082 | SCREEN_SCIS_BR|
| RB_COLOR_INFO_1 | 0x2010 | 0x2003 | COHER_DEST_BASE_10|
| RB_COLOR_INFO_2 | 0x2011 | 0x2004 | COHER_DEST_BASE_11|
| RB_COLOR_INFO_3 | 0x2012 | 0x2005 | COHER_DEST_BASE_12|
| PA_SU_VTX_CNTL | 0x2083 | 0x2302 | PA_SC_CLIPRECT_RULE|
Also corrected the `index_size` bit position in VGT_DRAW_INITIATOR
extraction: was bit 8 (which is `major_mode[0]`), should be bit 11 per
canary `registers.h:324` (`xenos::IndexFormat index_size : 1; // +11`).
The block comment in `extract()` was also wrong about the
intermediate field layout and has been refreshed.
Verification at -n 100M lockstep:
swaps: 2 → 2 (unchanged)
draws: 0 → 0 (still gated — see below)
packets: ~61M (within noise)
Tests: 149 (no count change; existing draw_state tests cover the
new constants implicitly via behavioral round-trip).
The audit predicted Phases C+D+E together would unlock `draws > 0`,
but the runtime plateau is multi-causal per the audit's own analysis
(`project_xenia_rs_audit_2026_05_02.md`). The likely remaining
blockers in -n 100M:
* 4 parked-waiter worker threads (handles 0x1004, 0x100c, 0x15e4,
0x42450b5c) — Phase F's XAM/spinlock fixes target this.
* shader_blobs_live=0 after 100M — the game hasn't issued IM_LOAD
yet because workers haven't loaded shader resources.
The register fixes here are still load-bearing for any draw that
DOES happen (every register read at 0x2281 was junk before this
commit) — landing them now is correct even if draws=0 persists until
Phase F unparks the resource-loader threads.
Closes GPUBUG-103, GPUBUG-104, GPUBUG-105 (P0).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -362,9 +362,12 @@ pub fn vertex_fetch_0_rect(
|
||||
f32::from_bits(swapped)
|
||||
});
|
||||
|
||||
// PA_SU_VTX_CNTL::pix_center: bit 0, 0 = kD3DZero (+0.5 half-pixel), 1 = kOpenGL (no offset).
|
||||
// Register index 0x2083 per register_table.inc (PA_SU_VTX_CNTL).
|
||||
const PA_SU_VTX_CNTL: u32 = 0x2083;
|
||||
// PA_SU_VTX_CNTL::pix_center: bit 0, 0 = kD3DZero (+0.5 half-pixel),
|
||||
// 1 = kOpenGL (no offset). Pre-fix the constant at this site read
|
||||
// 0x2083 (PA_SC_CLIPRECT_RULE), giving non-deterministic half-pixel
|
||||
// offsets that broke 3D camera matrices. Canary `register_table.inc`
|
||||
// line 1336 says PA_SU_VTX_CNTL is 0x2302. GPUBUG-105.
|
||||
const PA_SU_VTX_CNTL: u32 = 0x2302;
|
||||
let half_pixel_offset = if rf.read(PA_SU_VTX_CNTL) & 1 == 0 {
|
||||
0.5f32
|
||||
} else {
|
||||
@@ -588,26 +591,35 @@ impl ResolveInfo {
|
||||
}
|
||||
}
|
||||
|
||||
/// Register indices from `xenia-canary/src/xenia/gpu/registers.h`. Only what
|
||||
/// the extractor reads is named here.
|
||||
/// Register indices from `xenia-canary/src/xenia/gpu/register_table.inc`.
|
||||
/// Only what the extractor reads is named here.
|
||||
///
|
||||
/// GPUBUG-103/104/105: 8 of these were previously off-by-many — pointing at
|
||||
/// completely different registers. Each has been re-validated against
|
||||
/// canary's `register_table.inc`.
|
||||
pub mod reg {
|
||||
pub const VGT_DRAW_INITIATOR: u32 = 0x2281;
|
||||
pub const VGT_DMA_BASE: u32 = 0x2282;
|
||||
pub const VGT_DMA_SIZE: u32 = 0x2283;
|
||||
// VGT (vertex/geometry transform) — GPUBUG-103.
|
||||
pub const VGT_DRAW_INITIATOR: u32 = 0x21FC; // was 0x2281 (junk).
|
||||
pub const VGT_DMA_BASE: u32 = 0x21FA; // was 0x2282 (junk).
|
||||
pub const VGT_DMA_SIZE: u32 = 0x21FB; // was 0x2283 (junk).
|
||||
pub const PA_CL_VPORT_XSCALE: u32 = 0x210F;
|
||||
pub const PA_CL_VPORT_XOFFSET: u32 = 0x2110;
|
||||
pub const PA_CL_VPORT_YSCALE: u32 = 0x2111;
|
||||
pub const PA_CL_VPORT_YOFFSET: u32 = 0x2112;
|
||||
pub const PA_CL_VPORT_ZSCALE: u32 = 0x2113;
|
||||
pub const PA_CL_VPORT_ZOFFSET: u32 = 0x2114;
|
||||
pub const PA_SC_WINDOW_SCISSOR_TL: u32 = 0x200E;
|
||||
pub const PA_SC_WINDOW_SCISSOR_BR: u32 = 0x200F;
|
||||
// GPUBUG-104: pre-fix these read PA_SC_SCREEN_SCISSOR_{TL,BR} (the
|
||||
// global screen scissor) instead of the per-window scissor.
|
||||
pub const PA_SC_WINDOW_SCISSOR_TL: u32 = 0x2081; // was 0x200E (= SCREEN_SCISSOR_TL).
|
||||
pub const PA_SC_WINDOW_SCISSOR_BR: u32 = 0x2082; // was 0x200F (= SCREEN_SCISSOR_BR).
|
||||
pub const RB_MODECONTROL: u32 = 0x2208;
|
||||
pub const RB_SURFACE_INFO: u32 = 0x2000;
|
||||
pub const RB_COLOR_INFO_0: u32 = 0x2001;
|
||||
pub const RB_COLOR_INFO_1: u32 = 0x2010;
|
||||
pub const RB_COLOR_INFO_2: u32 = 0x2011;
|
||||
pub const RB_COLOR_INFO_3: u32 = 0x2012;
|
||||
// GPUBUG-105: per-RT color info; pre-fix indexed COHER_DEST_BASE_*
|
||||
// instead. Canary names them RB_COLOR1/2/3_INFO.
|
||||
pub const RB_COLOR_INFO_1: u32 = 0x2003; // was 0x2010 (= COHER_DEST_BASE_10).
|
||||
pub const RB_COLOR_INFO_2: u32 = 0x2004; // was 0x2011.
|
||||
pub const RB_COLOR_INFO_3: u32 = 0x2005; // was 0x2012.
|
||||
pub const RB_DEPTH_INFO: u32 = 0x2002;
|
||||
pub const RB_COLORCONTROL: u32 = 0x2202;
|
||||
pub const RB_DEPTHCONTROL: u32 = 0x2200;
|
||||
@@ -638,14 +650,18 @@ pub fn extract(
|
||||
dma_base: Option<u32>,
|
||||
dma_size: Option<u32>,
|
||||
) -> DrawState {
|
||||
// `VGT_DRAW_INITIATOR` bit layout (per canary):
|
||||
// [5:0] prim_type
|
||||
// `VGT_DRAW_INITIATOR` bit layout (per canary `registers.h:315-327`):
|
||||
// [5:0] prim_type (PrimitiveType)
|
||||
// [7:6] source_select (0=DMA, 1=immediate, 2=auto)
|
||||
// [8] index_size (0=16-bit, 1=32-bit)
|
||||
// [9:8] major_mode
|
||||
// [10] _pad
|
||||
// [11] index_size (0=16-bit, 1=32-bit) ← GPUBUG-103
|
||||
// [12] not_eop
|
||||
// [15:13] _pad
|
||||
// [31:16] num_indices
|
||||
let prim_bits = vgt_draw_initiator & 0x3F;
|
||||
let source_select = (vgt_draw_initiator >> 6) & 0x3;
|
||||
let index_size_bit = (vgt_draw_initiator >> 8) & 0x1;
|
||||
let index_size_bit = (vgt_draw_initiator >> 11) & 0x1;
|
||||
let num_indices = (vgt_draw_initiator >> 16) & 0xFFFF;
|
||||
let index_size = if index_size_bit == 0 {
|
||||
IndexSize::Sixteen
|
||||
|
||||
Reference in New Issue
Block a user