The publisher splash (title idx0) rendered FLAT in ours while canary samples a texture: ours never decoded the logo's textured pixel shader (E59B2B3D, a `tfetch2D` sprite) even though our guest IM_LOADs the exact same microcode canary does (verified byte-identical against the Wine oracle). The shader was misparsed as flat. Three coupled bugs in the ucode decoder, all off vs canary `gpu/ucode.h`: 1. CF opcode table was off-by-one (`control_flow.rs`): mapped opcode 0→Exec and 1→Exit, but Xenos has 0=kNop, 1=kExec, 2=kExecEnd, 3..6/13..14 the cond-exec variants, 7/8 loop, 9/10 call/return, 11 condjmp, 12 alloc, 15 mark-vs-fetch-done. So a real `kExec` clause was read as a terminal `Exit`, truncating the CF block and dropping every instruction (incl. the `tfetch`) after it. Added Nop/MarkVsFetchDone variants; parse now ends on an END-bit exec clause. 2. exec/loop `address` is an absolute instruction-triple index from shader dword 0, but indexed our post-CF `instructions` slice directly (`ucode/mod.rs`). Rebase addresses by the CF triple count so `address*3` lands on the right instruction. 3. Fetch instruction bitfields were wrong (`ucode/fetch.rs`): `const_index` read from bit 5 (actually `src_reg`) instead of bit 20, and texture `dimension` from dword1 instead of dword2 bit14. The logo's `tfetch ..,tf0` was read as `tf1`, whose empty fetch-constant failed to decode → no texture. Also the `sequence` fetch/ALU bit is bit[0] of each pair, not bit[1] (`shader_metrics.rs`, `translator.rs`, `xenos_interp.wgsl`). Result (--gpu-inline, deterministic 2x): the active PS's `tfetch_slots` now resolves slot 0, the tf0 fetch-constant decodes (fmt K8888), and `gpu.texture.decode` fires (137x at -n 50M; texture_cache_entries 0→1, the only golden field that changed — all draw/swap counts unchanged). The same fixes correct the WGSL uber-shader's fetch/CF walk for the threaded/--ui path. Added a regression test that parses the real E59B2B3D microcode and asserts a tfetch slot is found. Golden re-baselined (texture_cache_entries 0→1). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
133 lines
4.7 KiB
Rust
133 lines
4.7 KiB
Rust
//! Xenos fetch (vertex + texture) instruction decoder.
|
|
//!
|
|
//! Like ALU instructions, fetches are 96 bits (3 dwords). The opcode lives
|
|
//! in the low 5 bits of word0. We split them into `VertexFetch` and
|
|
//! `TextureFetch` structurally because their operand layouts differ.
|
|
//!
|
|
//! Reference: `xenia-canary/src/xenia/gpu/ucode.h:690-877`.
|
|
|
|
/// Decoded fetch instruction.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum FetchInstruction {
|
|
Vertex(VertexFetch),
|
|
Texture(TextureFetch),
|
|
/// Unknown / minor variants we don't model yet.
|
|
Unknown { opcode: u8, raw: [u32; 3] },
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub struct VertexFetch {
|
|
/// Vertex fetch constant index (0..=95).
|
|
pub fetch_const: u8,
|
|
/// Source register index (vertex index in r#).
|
|
pub src_register: u8,
|
|
/// Destination register for the fetched value.
|
|
pub dest_register: u8,
|
|
/// 4-bit write mask.
|
|
pub dest_write_mask: u8,
|
|
pub raw: [u32; 3],
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub struct TextureFetch {
|
|
/// Texture fetch constant index (0..=31).
|
|
pub fetch_const: u8,
|
|
pub src_register: u8,
|
|
pub dest_register: u8,
|
|
pub dest_write_mask: u8,
|
|
/// Dimension: 0=1D, 1=2D, 2=3D/stacked, 3=cube.
|
|
pub dimension: u8,
|
|
pub raw: [u32; 3],
|
|
}
|
|
|
|
/// Opcodes (low 5 bits of word0). From `ucode.h`.
|
|
pub mod op {
|
|
pub const VERTEX_FETCH: u8 = 0x00;
|
|
pub const TEXTURE_FETCH: u8 = 0x01;
|
|
pub const GET_TEXTURE_BORDER_COLOR_FRAC: u8 = 0x16;
|
|
pub const GET_TEXTURE_COMPUTED_LOD: u8 = 0x17;
|
|
pub const GET_TEXTURE_WEIGHTS: u8 = 0x18;
|
|
pub const GET_TEXTURE_GRADIENTS: u8 = 0x19;
|
|
pub const SET_TEXTURE_LOD: u8 = 0x1A;
|
|
pub const SET_TEXTURE_GRADIENTS_HORZ: u8 = 0x1B;
|
|
pub const SET_TEXTURE_GRADIENTS_VERT: u8 = 0x1C;
|
|
}
|
|
|
|
pub fn decode_fetch(words: [u32; 3]) -> FetchInstruction {
|
|
// Fetch dword0 bitfields (Xenos `ucode.h:740-749` vfetch / `844-845`
|
|
// tfetch): opcode_value:5, src_reg:6, src_reg_am:1, dst_reg:6,
|
|
// dst_reg_am:1, (fetch_valid_only|must_be_one):1, const_index:5 @ bit20,
|
|
// ... The prior decoder read `const_index` from bit 5 (which is actually
|
|
// `src_reg`), so every fetch reported the wrong fetch-constant slot — the
|
|
// logo `tfetch2D ..., tf0` was read as `tf1`, and slot 1's empty constant
|
|
// failed to decode → no texture. The texture-fetch `dimension` lives in
|
|
// dword2 bits 14..15, not dword1.
|
|
let w0 = words[0];
|
|
let w1 = words[1];
|
|
let w2 = words[2];
|
|
let opcode = (w0 & 0x1F) as u8;
|
|
match opcode {
|
|
op::VERTEX_FETCH => FetchInstruction::Vertex(VertexFetch {
|
|
fetch_const: ((w0 >> 20) & 0x1F) as u8,
|
|
src_register: ((w0 >> 5) & 0x3F) as u8,
|
|
dest_register: ((w0 >> 12) & 0x3F) as u8,
|
|
dest_write_mask: (w1 & 0xF) as u8,
|
|
raw: words,
|
|
}),
|
|
op::TEXTURE_FETCH => FetchInstruction::Texture(TextureFetch {
|
|
fetch_const: ((w0 >> 20) & 0x1F) as u8,
|
|
src_register: ((w0 >> 5) & 0x3F) as u8,
|
|
dest_register: ((w0 >> 12) & 0x3F) as u8,
|
|
dest_write_mask: (w1 & 0xF) as u8,
|
|
dimension: ((w2 >> 14) & 0x3) as u8,
|
|
raw: words,
|
|
}),
|
|
_ => FetchInstruction::Unknown { opcode, raw: words },
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn decode_vertex_fetch() {
|
|
// opcode=0 (vertex). Xenos dword0: src_reg@bit5, dst_reg@bit12,
|
|
// const_index@bit20. fetch_const=5, src=2, dest=7.
|
|
let w0 = 0u32 | (2 << 5) | (7 << 12) | (5 << 20);
|
|
let v = decode_fetch([w0, 0, 0]);
|
|
match v {
|
|
FetchInstruction::Vertex(vf) => {
|
|
assert_eq!(vf.fetch_const, 5);
|
|
assert_eq!(vf.src_register, 2);
|
|
assert_eq!(vf.dest_register, 7);
|
|
}
|
|
other => panic!("expected Vertex, got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn decode_texture_fetch() {
|
|
// opcode=1 (texture). const_index@bit20=3, src@bit5=1, dst@bit12=4.
|
|
// dimension lives in dword2 bits 14..15.
|
|
let w0 = 1u32 | (1 << 5) | (4 << 12) | (3 << 20);
|
|
let w2 = 2u32 << 14;
|
|
let t = decode_fetch([w0, 0, w2]);
|
|
match t {
|
|
FetchInstruction::Texture(tf) => {
|
|
assert_eq!(tf.fetch_const, 3);
|
|
assert_eq!(tf.src_register, 1);
|
|
assert_eq!(tf.dest_register, 4);
|
|
assert_eq!(tf.dimension, 2);
|
|
}
|
|
other => panic!("expected Texture, got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn unknown_opcode_is_classified() {
|
|
let v = decode_fetch([0x16, 0, 0]); // GET_TEXTURE_BORDER_COLOR_FRAC
|
|
assert!(matches!(v, FetchInstruction::Unknown { opcode: 0x16, .. }));
|
|
}
|
|
}
|