From 8723d6826baef7696a601f19dd962c56e1b8a8ab Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sun, 3 May 2026 14:22:04 +0200 Subject: [PATCH] =?UTF-8?q?fix(gpu):=20GPUBUG-103/104/105=20=E2=80=94=20fi?= =?UTF-8?q?x=208=20draw-state=20register=20addresses=20+=20index=5Fsize=20?= =?UTF-8?q?bit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eight of the register-index constants in draw_state.rs::reg pointed at completely unrelated registers because the canonical canary table (register_table.inc) was misread when the module was first authored. Re-validated each value against canary's lines 1232-1336. | Register | Pre-fix | Canary | Was-actually | | ------------------------- | ------- | ------ | ------------- | | VGT_DRAW_INITIATOR | 0x2281 | 0x21FC | (junk) | | VGT_DMA_BASE | 0x2282 | 0x21FA | (junk) | | VGT_DMA_SIZE | 0x2283 | 0x21FB | (junk) | | PA_SC_WINDOW_SCISSOR_TL | 0x200E | 0x2081 | SCREEN_SCIS_TL| | PA_SC_WINDOW_SCISSOR_BR | 0x200F | 0x2082 | SCREEN_SCIS_BR| | RB_COLOR_INFO_1 | 0x2010 | 0x2003 | COHER_DEST_BASE_10| | RB_COLOR_INFO_2 | 0x2011 | 0x2004 | COHER_DEST_BASE_11| | RB_COLOR_INFO_3 | 0x2012 | 0x2005 | COHER_DEST_BASE_12| | PA_SU_VTX_CNTL | 0x2083 | 0x2302 | PA_SC_CLIPRECT_RULE| Also corrected the `index_size` bit position in VGT_DRAW_INITIATOR extraction: was bit 8 (which is `major_mode[0]`), should be bit 11 per canary `registers.h:324` (`xenos::IndexFormat index_size : 1; // +11`). The block comment in `extract()` was also wrong about the intermediate field layout and has been refreshed. Verification at -n 100M lockstep: swaps: 2 → 2 (unchanged) draws: 0 → 0 (still gated — see below) packets: ~61M (within noise) Tests: 149 (no count change; existing draw_state tests cover the new constants implicitly via behavioral round-trip). The audit predicted Phases C+D+E together would unlock `draws > 0`, but the runtime plateau is multi-causal per the audit's own analysis (`project_xenia_rs_audit_2026_05_02.md`). The likely remaining blockers in -n 100M: * 4 parked-waiter worker threads (handles 0x1004, 0x100c, 0x15e4, 0x42450b5c) — Phase F's XAM/spinlock fixes target this. * shader_blobs_live=0 after 100M — the game hasn't issued IM_LOAD yet because workers haven't loaded shader resources. The register fixes here are still load-bearing for any draw that DOES happen (every register read at 0x2281 was junk before this commit) — landing them now is correct even if draws=0 persists until Phase F unparks the resource-loader threads. Closes GPUBUG-103, GPUBUG-104, GPUBUG-105 (P0). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-gpu/src/draw_state.rs | 50 ++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/crates/xenia-gpu/src/draw_state.rs b/crates/xenia-gpu/src/draw_state.rs index 0b078a8..c1887f7 100644 --- a/crates/xenia-gpu/src/draw_state.rs +++ b/crates/xenia-gpu/src/draw_state.rs @@ -362,9 +362,12 @@ pub fn vertex_fetch_0_rect( f32::from_bits(swapped) }); - // PA_SU_VTX_CNTL::pix_center: bit 0, 0 = kD3DZero (+0.5 half-pixel), 1 = kOpenGL (no offset). - // Register index 0x2083 per register_table.inc (PA_SU_VTX_CNTL). - const PA_SU_VTX_CNTL: u32 = 0x2083; + // PA_SU_VTX_CNTL::pix_center: bit 0, 0 = kD3DZero (+0.5 half-pixel), + // 1 = kOpenGL (no offset). Pre-fix the constant at this site read + // 0x2083 (PA_SC_CLIPRECT_RULE), giving non-deterministic half-pixel + // offsets that broke 3D camera matrices. Canary `register_table.inc` + // line 1336 says PA_SU_VTX_CNTL is 0x2302. GPUBUG-105. + const PA_SU_VTX_CNTL: u32 = 0x2302; let half_pixel_offset = if rf.read(PA_SU_VTX_CNTL) & 1 == 0 { 0.5f32 } else { @@ -588,26 +591,35 @@ impl ResolveInfo { } } -/// Register indices from `xenia-canary/src/xenia/gpu/registers.h`. Only what -/// the extractor reads is named here. +/// Register indices from `xenia-canary/src/xenia/gpu/register_table.inc`. +/// Only what the extractor reads is named here. +/// +/// GPUBUG-103/104/105: 8 of these were previously off-by-many — pointing at +/// completely different registers. Each has been re-validated against +/// canary's `register_table.inc`. pub mod reg { - pub const VGT_DRAW_INITIATOR: u32 = 0x2281; - pub const VGT_DMA_BASE: u32 = 0x2282; - pub const VGT_DMA_SIZE: u32 = 0x2283; + // VGT (vertex/geometry transform) — GPUBUG-103. + pub const VGT_DRAW_INITIATOR: u32 = 0x21FC; // was 0x2281 (junk). + pub const VGT_DMA_BASE: u32 = 0x21FA; // was 0x2282 (junk). + pub const VGT_DMA_SIZE: u32 = 0x21FB; // was 0x2283 (junk). pub const PA_CL_VPORT_XSCALE: u32 = 0x210F; pub const PA_CL_VPORT_XOFFSET: u32 = 0x2110; pub const PA_CL_VPORT_YSCALE: u32 = 0x2111; pub const PA_CL_VPORT_YOFFSET: u32 = 0x2112; pub const PA_CL_VPORT_ZSCALE: u32 = 0x2113; pub const PA_CL_VPORT_ZOFFSET: u32 = 0x2114; - pub const PA_SC_WINDOW_SCISSOR_TL: u32 = 0x200E; - pub const PA_SC_WINDOW_SCISSOR_BR: u32 = 0x200F; + // GPUBUG-104: pre-fix these read PA_SC_SCREEN_SCISSOR_{TL,BR} (the + // global screen scissor) instead of the per-window scissor. + pub const PA_SC_WINDOW_SCISSOR_TL: u32 = 0x2081; // was 0x200E (= SCREEN_SCISSOR_TL). + pub const PA_SC_WINDOW_SCISSOR_BR: u32 = 0x2082; // was 0x200F (= SCREEN_SCISSOR_BR). pub const RB_MODECONTROL: u32 = 0x2208; pub const RB_SURFACE_INFO: u32 = 0x2000; pub const RB_COLOR_INFO_0: u32 = 0x2001; - pub const RB_COLOR_INFO_1: u32 = 0x2010; - pub const RB_COLOR_INFO_2: u32 = 0x2011; - pub const RB_COLOR_INFO_3: u32 = 0x2012; + // GPUBUG-105: per-RT color info; pre-fix indexed COHER_DEST_BASE_* + // instead. Canary names them RB_COLOR1/2/3_INFO. + pub const RB_COLOR_INFO_1: u32 = 0x2003; // was 0x2010 (= COHER_DEST_BASE_10). + pub const RB_COLOR_INFO_2: u32 = 0x2004; // was 0x2011. + pub const RB_COLOR_INFO_3: u32 = 0x2005; // was 0x2012. pub const RB_DEPTH_INFO: u32 = 0x2002; pub const RB_COLORCONTROL: u32 = 0x2202; pub const RB_DEPTHCONTROL: u32 = 0x2200; @@ -638,14 +650,18 @@ pub fn extract( dma_base: Option, dma_size: Option, ) -> DrawState { - // `VGT_DRAW_INITIATOR` bit layout (per canary): - // [5:0] prim_type + // `VGT_DRAW_INITIATOR` bit layout (per canary `registers.h:315-327`): + // [5:0] prim_type (PrimitiveType) // [7:6] source_select (0=DMA, 1=immediate, 2=auto) - // [8] index_size (0=16-bit, 1=32-bit) + // [9:8] major_mode + // [10] _pad + // [11] index_size (0=16-bit, 1=32-bit) ← GPUBUG-103 + // [12] not_eop + // [15:13] _pad // [31:16] num_indices let prim_bits = vgt_draw_initiator & 0x3F; let source_select = (vgt_draw_initiator >> 6) & 0x3; - let index_size_bit = (vgt_draw_initiator >> 8) & 0x1; + let index_size_bit = (vgt_draw_initiator >> 11) & 0x1; let num_indices = (vgt_draw_initiator >> 16) & 0xFFFF; let index_size = if index_size_bit == 0 { IndexSize::Sixteen