//! Xenos (ATI R500-family) shader microcode decoder. //! //! Ground truth: `xenia-canary/src/xenia/gpu/ucode.h`. We parse only what a //! shader *interpreter* (P3 uber-shader) needs: control-flow clauses, ALU //! instructions (vector + scalar pipes), and fetch instructions (vertex + //! texture). The uber-shader consumes this IR directly; when a WGSL-emitting //! translator comes online in P7, it reuses the same parser. //! //! ## Binary layout //! //! A compiled shader has two sections back-to-back: //! //! 1. **Control-flow block** — `cf_count` 64-bit clause pairs. Canary packs //! two clauses into three 32-bit words: //! ```text //! word0 word1 word2 //! [-CF_A (48)-][-CF_B (48)-] //! ``` //! Word 0 is the low 32 of CF_A; word 1's low 16 bits finish CF_A and //! its high 16 bits start CF_B; word 2 holds CF_B's remaining 32 bits. //! //! 2. **Instruction block** — variable-size array of 96-bit ALU / fetch //! instructions. Each control-flow clause of kind `Exec*` references a //! contiguous range of these by `(address, count)` in dwords * 3. //! //! We read big-endian dwords straight out of guest memory (the `raw` //! `&[u32]` slice is already host-endian-corrected by the PM4 executor that //! cached the shader blob). See `ucode.h:218-256` for the exec clause bit //! layout and `:700-877` for the fetch/ALU mix. pub mod alu; pub mod control_flow; pub mod fetch; use self::alu::AluInstruction; use self::control_flow::{AllocKind, ControlFlowInstruction, decode_cf_pair}; use self::fetch::FetchInstruction; /// CF-clause kind codes encoded into the WGSL-facing packed shader. Kept /// in sync with `shaders/xenos_interp.wgsl`'s `CF_KIND_*` constants. pub mod cf_kind { pub const EXEC: u32 = 0; pub const EXEC_END: u32 = 1; pub const ALLOC: u32 = 2; pub const EXIT: u32 = 3; pub const LOOP_START: u32 = 4; pub const LOOP_END: u32 = 5; pub const COND_JMP: u32 = 6; pub const COND_CALL: u32 = 7; pub const RETURN: u32 = 8; pub const UNKNOWN: u32 = 15; } /// Alloc-kind codes, packed into the aux dword of an `Alloc` clause. pub mod cf_alloc_kind { pub const POSITION: u32 = 0; pub const INTERPOLATORS: u32 = 1; pub const COLORS: u32 = 2; pub const MEMEXPORT: u32 = 3; pub const OTHER: u32 = 4; } /// Pack a [`ParsedShader`] into the dense dword layout the WGSL runtime /// interpreter expects: /// /// ```text /// [0] cf_count /// [1 .. 1 + cf_count*3] CF table: (kind, primary, aux) triples per clause /// [1 + cf_count*3 ..] raw 3-dword instruction stream (ALU/fetch) /// ``` /// /// The CF table lets WGSL walk clauses without reconstructing bit-packed /// layouts on the GPU. Semantics per `kind`: /// /// | kind | primary | aux | /// |-------------|----------------------------|------------------------------| /// | EXEC/EXEC_END | address (in triples) | (sequence<<8) \| count | /// | ALLOC | alloc_kind (see cf_alloc_kind) | size | /// | EXIT | 0 | 0 | /// | LOOP_START | address | loop_id | /// | LOOP_END | address | loop_id | /// | COND_JMP | target | predicate flags | /// | COND_CALL | target | 0 | /// | RETURN | 0 | 0 | /// | UNKNOWN | opcode | 0 | pub fn pack_for_wgsl(parsed: &ParsedShader) -> Vec { let cf_count = parsed.cf.len() as u32; let mut out = Vec::with_capacity(1 + (cf_count as usize) * 3 + parsed.instructions.len()); out.push(cf_count); for clause in &parsed.cf { let (kind, primary, aux) = encode_cf(*clause); out.push(kind); out.push(primary); out.push(aux); } out.extend_from_slice(&parsed.instructions); out } fn encode_cf(c: ControlFlowInstruction) -> (u32, u32, u32) { use ControlFlowInstruction::*; match c { Exec { address, count, sequence, is_end, predicated, predicate_condition, } => { let pred_bits = (predicated as u32) | ((predicate_condition as u32) << 1); let kind = if is_end { cf_kind::EXEC_END } else { cf_kind::EXEC } | (pred_bits << 8); (kind, address, (sequence << 8) | count) } Alloc { size, kind } => { let akind = match kind { AllocKind::Position => cf_alloc_kind::POSITION, AllocKind::Interpolators => cf_alloc_kind::INTERPOLATORS, AllocKind::Colors => cf_alloc_kind::COLORS, AllocKind::Memexport => cf_alloc_kind::MEMEXPORT, AllocKind::Other => cf_alloc_kind::OTHER, }; (cf_kind::ALLOC, akind, size) } Exit => (cf_kind::EXIT, 0, 0), LoopStart { address, loop_id } => (cf_kind::LOOP_START, address, loop_id), LoopEnd { address, loop_id } => (cf_kind::LOOP_END, address, loop_id), CondJmp { target, predicated, predicate_condition, } => { let pred_bits = (predicated as u32) | ((predicate_condition as u32) << 1); (cf_kind::COND_JMP, target, pred_bits) } CondCall { target } => (cf_kind::COND_CALL, target, 0), Return => (cf_kind::RETURN, 0, 0), Unknown { opcode } => (cf_kind::UNKNOWN, opcode as u32, 0), } } /// One instruction word set from the instruction-block section. Xenos packs /// ALU and fetch instructions identically (96 bits each); the owning exec /// clause's "sequence" bitmap decides which is which. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DecodedInstruction { /// ALU pipe (vector ALU + optional co-issued scalar ALU). Alu(AluInstruction), /// Vertex or texture fetch. Fetch(FetchInstruction), } /// Parsed shader: the control-flow clause list + the raw 32-bit instruction /// words. The uber-shader / translator is expected to index into /// `instructions` based on `(clause.address * 3, clause.count * 3)`. #[derive(Debug, Clone, Default)] pub struct ParsedShader { pub cf: Vec, /// Raw instruction dwords. Each 3-dword triple is one ALU or fetch /// instruction; the owning `Exec` clause's `sequence` bitmap picks the /// kind. pub instructions: Vec, } /// Decode a shader blob. `raw_dwords` is a host-endian slice of the entire /// microcode buffer (control flow + instructions). Heuristic: CF dword count /// is encoded in the first word's low 12 bits of the last exec clause — /// canary iterates until it hits a clause of kind `Exit`. We do the same. pub fn parse_shader(raw_dwords: &[u32]) -> ParsedShader { let mut cf = Vec::new(); // CF clauses are 48-bit (word1 lo 16 + word0 = 48 or so per canary's // layout). Walk pairs of 3 dwords per pair of clauses. let mut i = 0usize; while i + 2 < raw_dwords.len() { let a = decode_cf_pair(raw_dwords[i], raw_dwords[i + 1], raw_dwords[i + 2]); let (first, second) = a; let seen_exit = matches!( first, ControlFlowInstruction::Exit | ControlFlowInstruction::Unknown { .. } ) || matches!( second, ControlFlowInstruction::Exit | ControlFlowInstruction::Unknown { .. } ); cf.push(first); cf.push(second); i += 3; if seen_exit { break; } } // Everything after `i` dwords is the instruction block. let instructions = raw_dwords[i..].to_vec(); ParsedShader { cf, instructions } } #[cfg(test)] mod tests { use super::*; #[test] fn empty_blob_parses_empty() { let p = parse_shader(&[]); assert!(p.cf.is_empty()); assert!(p.instructions.is_empty()); } #[test] fn pack_for_wgsl_layout_is_correct() { // Build a tiny ParsedShader by hand and verify the packed form. let parsed = ParsedShader { cf: vec![ ControlFlowInstruction::Exec { address: 0x10, count: 3, sequence: 0b1010, is_end: false, predicated: false, predicate_condition: false, }, ControlFlowInstruction::Exit, ], instructions: vec![0x1111, 0x2222, 0x3333], }; let packed = pack_for_wgsl(&parsed); assert_eq!(packed[0], 2, "cf_count"); // First clause: EXEC, address=0x10, aux = (sequence<<8)|count = 0x0A03 assert_eq!(packed[1] & 0xFF, cf_kind::EXEC); assert_eq!(packed[2], 0x10); assert_eq!(packed[3], (0b1010 << 8) | 3); // Second clause: EXIT assert_eq!(packed[4] & 0xFF, cf_kind::EXIT); // Instruction block starts at 1 + 2*3 = 7 assert_eq!(packed[7..], [0x1111, 0x2222, 0x3333]); } #[test] fn trivial_exit_clause_stops_parsing() { // Two clauses: [NOP (kind=0), EXIT (kind=1)] encoded per canary. // Exit clause is opcode 1 in the top 4 bits of the upper 16 bits. let w0 = 0u32; // clause A body let w1 = (1u32 << 12) << 16; // upper 16 bits = 0x1000 → opcode=1 (EXIT) for clause A let w2 = 0u32; let p = parse_shader(&[w0, w1, w2, 0xDEAD_BEEF]); assert!(!p.cf.is_empty()); // Exit detected → remaining dword is instruction data. assert_eq!(p.instructions, vec![0xDEAD_BEEF]); } }