//! Xenos control-flow clause decoder. //! //! A shader's CF block is a sequence of 48-bit clauses packed two-per- //! three-dword row. Each clause encodes an opcode and type-specific fields //! (exec addr/count, loop start/end, branch target, etc.). //! //! Spec at `xenia-canary/src/xenia/gpu/ucode.h:87-256`. We cover the subset //! the uber-shader needs: `Exec*`, `Loop*`, `Alloc`, `Jmp`, `Call/Ret`, //! `Exit`. Unknown opcodes are classified as `Unknown { opcode }` so the //! translator can log + degrade. /// Parsed representation of one CF clause. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ControlFlowInstruction { /// `kExec` / `kExecEnd` — execute a range of ALU/fetch instructions. Exec { /// Instruction-block dword index where this clause's instructions start, /// expressed in **triple units** (each inst = 3 dwords). address: u32, /// Number of triples to execute. count: u32, /// The ALU-vs-fetch sequence bitmap (2 bits per instruction). sequence: u32, /// True when this clause ends the shader. is_end: bool, /// True if predicated; skip when predicate != predicate_condition. predicated: bool, predicate_condition: bool, }, /// `kLoopStart` — begin a `aL` loop referencing a loop constant. LoopStart { address: u32, loop_id: u32 }, /// `kLoopEnd` — close the loop; `address` points at the matching start. LoopEnd { address: u32, loop_id: u32 }, /// `kCondJmp` — conditional jump to another CF index. CondJmp { target: u32, predicated: bool, predicate_condition: bool, }, /// `kCondCall` — call into another CF subroutine. CondCall { target: u32 }, /// `kReturn` — return from subroutine. Return, /// `kAlloc` — pre-allocate export registers (position, interpolators, colors). Alloc { size: u32, kind: AllocKind }, /// `kNop` — fills space in the CF block; executes nothing, does not end /// the shader. (Xenos opcode 0.) Nop, /// `kMarkVsFetchDone` — hint that no more vertex fetches will be performed. /// (Xenos opcode 15.) Non-terminating. MarkVsFetchDone, /// Exit the shader (terminal). Synthesized — Xenos has no dedicated exit /// opcode; the shader ends after an `Exec`/`CondExec` clause with the /// END bit set (`is_end`). Retained for callers/tests that reference it. Exit, /// Unknown / unhandled opcode. Unknown { opcode: u8 }, } /// Export target types for `kAlloc` clauses. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum AllocKind { Position, Interpolators, Colors, Memexport, Other, } impl AllocKind { fn from_bits(b: u32) -> Self { match b & 0x7 { 0 => AllocKind::Position, 1 => AllocKind::Interpolators, 2 => AllocKind::Colors, 3 => AllocKind::Memexport, _ => AllocKind::Other, } } } /// Decode one row (three consecutive CF dwords) into two CF clauses. /// /// Word layout per canary (`ucode.h:218-256`): /// - word0 + lo16(word1) → CF_A's 48-bit payload /// - hi16(word1) + word2 → CF_B's 48-bit payload /// /// The opcode lives in the top 4 bits of the 48-bit payload (= bits 44..47). pub fn decode_cf_pair(word0: u32, word1: u32, word2: u32) -> (ControlFlowInstruction, ControlFlowInstruction) { // Build each 48-bit value as u64; LE within the clause. let a = (word0 as u64) | ((word1 as u64 & 0xFFFF) << 32); let b = ((word1 as u64 >> 16) & 0xFFFF) | ((word2 as u64) << 16); (decode_single(a), decode_single(b)) } fn decode_single(payload: u64) -> ControlFlowInstruction { // Top 4 bits of the 48-bit payload. let opcode = ((payload >> 44) & 0xF) as u8; // GPUBUG-103 (iterate-3P): clause-level predication is determined by the // *opcode*, not by free bits. The 48-bit CF payload is word0 = bits 0..31, // word1 = bits 32..47. Per canary `ucode.h`: // * `ControlFlowExecInstruction` (kExec/kExecEnd, opcodes 1/2): NOT // predicate-gated — it runs unconditionally. // * `ControlFlowCondExecInstruction` (kCondExec/kCondExecEnd, 3/4): gated // by a *bool constant*, `condition_` at word1 bit 10 = payload bit 42. // We don't model bool-constant gating in the WGSL paths (the bool is // virtually always set for these), so treat as unconditional. // * `ControlFlowCondExecPredInstruction` (kCondExecPred/...End/Clean..., // 5/6/13/14): gated by the *predicate register*; `condition_` at word1 // bit 9 = payload bit 41. // The prior code read bits 28/29 (which fall inside `sequence_`/`vc_hi_`) // and stamped `predicated=true` on plenty of plain `kExec` clauses — which // made the P7 translator reject EVERY splash VS as `cf_cond`, forcing the // interpreter (placeholder geometry) for all draws. let is_pred_gated = matches!(opcode, 5 | 6 | 13 | 14); let predicated = is_pred_gated; let predicate_condition = is_pred_gated && ((payload >> 41) & 1) != 0; // Xenos `ControlFlowOpcode` (canary `ucode.h:86-160`): // 0 kNop, 1 kExec, 2 kExecEnd, 3 kCondExec, 4 kCondExecEnd, // 5 kCondExecPred, 6 kCondExecPredEnd, 7 kLoopStart, 8 kLoopEnd, // 9 kCondCall, 10 kReturn, 11 kCondJmp, 12 kAlloc, // 13 kCondExecPredClean, 14 kCondExecPredCleanEnd, 15 kMarkVsFetchDone. // All exec variants share the address(12)/count(3)/sequence(12) layout // of `ControlFlowExecInstruction`; the `*End` variants terminate the // shader. (Prior table was off-by-one — it mapped 0→Exec and 1→Exit, // so a real `kExec` clause was misread as a terminal `Exit`, truncating // the CF block and dropping every `tfetch` in it.) let exec = |is_end: bool| ControlFlowInstruction::Exec { address: (payload & 0xFFF) as u32, count: ((payload >> 12) & 0x7) as u32, sequence: ((payload >> 16) & 0xFFF) as u32, is_end, predicated, predicate_condition, }; match opcode { 0 => ControlFlowInstruction::Nop, 1 => exec(false), 2 => exec(true), 3 => exec(false), 4 => exec(true), 5 => exec(false), 6 => exec(true), 7 => ControlFlowInstruction::LoopStart { address: (payload & 0x3FF) as u32, loop_id: ((payload >> 16) & 0x1F) as u32, }, 8 => ControlFlowInstruction::LoopEnd { address: (payload & 0x3FF) as u32, loop_id: ((payload >> 16) & 0x1F) as u32, }, 9 => ControlFlowInstruction::CondCall { target: (payload & 0x3FF) as u32, }, 10 => ControlFlowInstruction::Return, 11 => ControlFlowInstruction::CondJmp { target: (payload & 0x3FF) as u32, predicated, predicate_condition, }, 12 => ControlFlowInstruction::Alloc { size: (payload & 0x7) as u32, kind: AllocKind::from_bits(((payload >> 4) & 0x7) as u32), }, 13 => exec(false), 14 => exec(true), 15 => ControlFlowInstruction::MarkVsFetchDone, other => ControlFlowInstruction::Unknown { opcode: other }, } } #[cfg(test)] mod tests { use super::*; #[test] fn opcode_nop_and_exec_decode() { // Xenos opcode 0 = kNop (non-terminating padding). let payload: u64 = 0u64 << 44; let (hi, lo) = ((payload & 0xFFFF_FFFF) as u32, ((payload >> 32) & 0xFFFF) as u32); assert_eq!(decode_cf_pair(hi, lo, 0).0, ControlFlowInstruction::Nop); // Xenos opcode 1 = kExec (executes instructions; NOT a terminal exit). let payload: u64 = 1u64 << 44; let (hi, lo) = ((payload & 0xFFFF_FFFF) as u32, ((payload >> 32) & 0xFFFF) as u32); match decode_cf_pair(hi, lo, 0).0 { ControlFlowInstruction::Exec { is_end, .. } => assert!(!is_end), other => panic!("opcode 1 should be non-end Exec, got {other:?}"), } // Xenos opcode 15 = kMarkVsFetchDone (non-terminating hint). let payload: u64 = 15u64 << 44; let (hi, lo) = ((payload & 0xFFFF_FFFF) as u32, ((payload >> 32) & 0xFFFF) as u32); assert_eq!( decode_cf_pair(hi, lo, 0).0, ControlFlowInstruction::MarkVsFetchDone ); } #[test] fn real_logo_shader_has_tfetch_clauses() { // The publisher-logo pixel shader E59B2B3DA4AA9008 (captured from the // canary oracle, byte-identical to the microcode our guest IM_LOADs). // Regression for iterate-3M: the old off-by-one opcode table decoded // its leading `kExec` (opcode 1) as a terminal `Exit`, truncating the // CF block so the `tfetch2D` never appeared → flat splash. let ucode: [u32; 24] = [ 0x00011002, 0x00001200, 0xC4000000, 0x00004003, 0x00002200, 0x00000000, 0x10082021, 0x1F1FF688, 0x00004000, 0xC8080001, 0x001B1B00, 0xC1020000, 0xC8070000, 0x00C0C000, 0xC1020000, 0xC8070001, 0x00C01B00, 0xC1000100, 0xC80F8000, 0x00000000, 0xC2010100, 0x00000000, 0x00000000, 0x00000000, ]; let p = crate::ucode::parse_shader(&ucode); let exec_clauses = p .cf .iter() .filter(|c| matches!(c, ControlFlowInstruction::Exec { .. })) .count(); assert!(exec_clauses >= 1, "expected >=1 Exec clause, cf={:?}", p.cf); let slots = crate::shader_metrics::tfetch_slots(&p); assert!(!slots.is_empty(), "expected tfetch slots, got none; cf={:?}", p.cf); } #[test] fn opcode_exec_end_carries_address_count() { // opcode 2 (ExecEnd), address=4, count=2, sequence=0. let payload: u64 = (2u64 << 44) | (2u64 << 12) | 4; let hi = (payload & 0xFFFF_FFFF) as u32; let lo = ((payload >> 32) & 0xFFFF) as u32; let cf = decode_cf_pair(hi, lo, 0).0; match cf { ControlFlowInstruction::Exec { address, count, is_end, .. } => { assert_eq!(address, 4); assert_eq!(count, 2); assert!(is_end); } other => panic!("expected Exec, got {other:?}"), } } }