//! Xenos ALU (vector + scalar) instruction decoder. //! //! An ALU instruction is 96 bits = 3 dwords. The three dwords encode: //! - word0: operand modifier flags + destination info //! - word1: source register / swizzle fields //! - word2: opcode + write mask + export target //! //! See `ucode.h:900-1400` for the full field map. This decoder captures the //! minimal shape the uber-shader needs; flags we don't interpret yet are //! retained as raw bits in `raw` for downstream inspection. /// Decoded ALU instruction. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct AluInstruction { /// Vector ALU opcode (bits 0..6 of word2 in canary's layout). pub vector_opcode: u8, /// Scalar ALU opcode (bits 7..13 of word2). pub scalar_opcode: u8, /// Destination register index for vector result (7 bits). pub vector_dest: u8, /// Destination register index for scalar result (7 bits). pub scalar_dest: u8, /// 4-bit write mask for the vector result (x/y/z/w). pub vector_write_mask: u8, /// 4-bit write mask for the scalar result. pub scalar_write_mask: u8, /// Set when the instruction should write to the export bank (position, /// interpolators, color, etc.) instead of the general register file. pub vector_dest_is_export: bool, /// Selects `ps` (previous scalar result) as the scalar operand when set. pub scalar_src_is_ps: bool, /// Source register indices (at most 3 for vector ops). The src bytes /// are the canary `srcN_reg` fields (8 bits each); for **temp-typed** /// operands (see `src_a_is_temp` etc.), bit 7 of the byte is the abs /// flag and bit 6 is the loop-relative flag — bits 5:0 give the temp /// index. For **constant-typed** operands the full byte is the /// constant index. pub src_a: u8, pub src_b: u8, pub src_c: u8, /// Per-operand "is temporary" flag — when true, the corresponding /// `src_X` byte indexes a general register (r#); when false, it /// indexes an ALU constant (c#). Decoded from word-0 bits 29-31 /// (canary's `src3_sel`/`src2_sel`/`src1_sel`). GPUBUG-101. pub src_a_is_temp: bool, pub src_b_is_temp: bool, pub src_c_is_temp: bool, /// Set when the instruction is predicated; skipped if the predicate /// doesn't match `predicate_condition`. pub predicated: bool, pub predicate_condition: bool, /// Raw dwords — preserved verbatim so the translator / interpreter can /// reach into fields we haven't parsed explicitly yet. pub raw: [u32; 3], } /// Decode a 3-dword ALU triple. pub fn decode_alu(words: [u32; 3]) -> AluInstruction { let w0 = words[0]; let _w1 = words[1]; let w2 = words[2]; AluInstruction { vector_opcode: (w2 & 0x3F) as u8, scalar_opcode: ((w2 >> 6) & 0x3F) as u8, vector_dest: ((w2 >> 16) & 0x7F) as u8, scalar_dest: ((w2 >> 24) & 0x7F) as u8, vector_write_mask: ((w2 >> 12) & 0xF) as u8, scalar_write_mask: ((w2 >> 8) & 0xF) as u8, vector_dest_is_export: ((w2 >> 23) & 1) != 0, scalar_src_is_ps: ((w0 >> 26) & 1) != 0, src_a: (w0 & 0xFF) as u8, src_b: ((w0 >> 8) & 0xFF) as u8, src_c: ((w0 >> 16) & 0xFF) as u8, // Word-0 bits 29-31 are the per-operand temp-vs-constant // selector (canary `src3_sel`/`src2_sel`/`src1_sel`, // ucode.h:2078-2086). Our `src_a` is canary's third operand // (low byte of w0), so its selector is bit 29. src_a_is_temp: ((w0 >> 29) & 1) != 0, src_b_is_temp: ((w0 >> 30) & 1) != 0, src_c_is_temp: ((w0 >> 31) & 1) != 0, predicated: ((w0 >> 27) & 1) != 0, predicate_condition: ((w0 >> 28) & 1) != 0, raw: words, } } /// Vector ALU opcodes we reference by name. Values match canary's /// `AluVectorOpcode` enum in `ucode.h:1354`. pub mod vop { pub const ADD: u8 = 0; pub const MUL: u8 = 1; pub const MAX: u8 = 2; pub const MIN: u8 = 3; pub const SEQ: u8 = 4; pub const SGT: u8 = 5; pub const SGE: u8 = 6; pub const SNE: u8 = 7; pub const FRC: u8 = 8; pub const TRUNC: u8 = 9; pub const FLOOR: u8 = 10; pub const MAD: u8 = 11; pub const CND_EQ: u8 = 12; pub const CND_GE: u8 = 13; pub const CND_GT: u8 = 14; pub const DOT4: u8 = 15; pub const DOT3: u8 = 16; pub const DOT2_ADD: u8 = 17; pub const CUBE: u8 = 18; pub const MAX4: u8 = 19; pub const SETP_EQ_PUSH: u8 = 20; pub const SETP_NE_PUSH: u8 = 21; pub const SETP_GT_PUSH: u8 = 22; pub const SETP_GE_PUSH: u8 = 23; pub const KILL_EQ: u8 = 24; pub const KILL_GT: u8 = 25; pub const KILL_GE: u8 = 26; pub const KILL_NE: u8 = 27; pub const DST: u8 = 28; pub const MAX_A: u8 = 29; } /// Scalar ALU opcodes. Values match canary's `AluScalarOpcode` enum in /// `ucode.h:1001`. pub mod sop { pub const ADDS: u8 = 0; pub const ADDS_PREV: u8 = 1; pub const MULS: u8 = 2; pub const MULS_PREV: u8 = 3; pub const MULS_PREV2: u8 = 4; pub const MAXS: u8 = 5; pub const MINS: u8 = 6; pub const SEQS: u8 = 7; pub const SGTS: u8 = 8; pub const SGES: u8 = 9; pub const SNES: u8 = 10; pub const FRCS: u8 = 11; pub const TRUNCS: u8 = 12; pub const FLOORS: u8 = 13; pub const EXP: u8 = 14; pub const LOGC: u8 = 15; pub const LOG: u8 = 16; pub const RCPC: u8 = 17; pub const RCPF: u8 = 18; pub const RCP: u8 = 19; pub const RSQC: u8 = 20; pub const RSQF: u8 = 21; pub const RSQ: u8 = 22; pub const MAXAS: u8 = 23; pub const MAXASF: u8 = 24; pub const SUBS: u8 = 25; pub const SUBS_PREV: u8 = 26; pub const SETP_EQ: u8 = 27; pub const SETP_NE: u8 = 28; pub const SETP_GT: u8 = 29; pub const SETP_GE: u8 = 30; pub const SETP_INV: u8 = 31; pub const SETP_POP: u8 = 32; pub const SETP_CLR: u8 = 33; pub const SETP_RSTR: u8 = 34; pub const KILLS_EQ: u8 = 35; pub const KILLS_GT: u8 = 36; pub const KILLS_GE: u8 = 37; pub const KILLS_NE: u8 = 38; pub const KILLS_ONE: u8 = 39; pub const SQRT: u8 = 40; pub const MULSC0: u8 = 42; pub const MULSC1: u8 = 43; pub const ADDSC0: u8 = 44; pub const ADDSC1: u8 = 45; pub const SUBSC0: u8 = 46; pub const SUBSC1: u8 = 47; pub const SIN: u8 = 48; pub const COS: u8 = 49; pub const RETAIN_PREV: u8 = 50; } #[cfg(test)] mod tests { use super::*; /// Regression: our table previously drifted from canary's values (e.g. /// `MAXS=6` when canary says 5, shifting everything through SQRT). Pin /// the most-often-used scalar + vector opcodes here. #[test] fn opcodes_match_canary_values() { // Scalar. assert_eq!(sop::MAXS, 5); assert_eq!(sop::MINS, 6); assert_eq!(sop::SEQS, 7); assert_eq!(sop::EXP, 14); assert_eq!(sop::LOG, 16); assert_eq!(sop::RCP, 19); assert_eq!(sop::RSQ, 22); assert_eq!(sop::SUBS, 25); assert_eq!(sop::SETP_EQ, 27); assert_eq!(sop::KILLS_EQ, 35); assert_eq!(sop::SQRT, 40); assert_eq!(sop::SIN, 48); assert_eq!(sop::RETAIN_PREV, 50); // Vector. assert_eq!(vop::SNE, 7); assert_eq!(vop::CND_EQ, 12); assert_eq!(vop::MAX4, 19); assert_eq!(vop::KILL_EQ, 24); assert_eq!(vop::DST, 28); } #[test] fn decode_extracts_opcodes_and_dests() { // Build a minimal ALU word: // vector_opcode = ADD (0), scalar_opcode = RCP (22), // vector_dest = 3, scalar_dest = 7, vector_write_mask = 0xF let w2 = (vop::ADD as u32) | ((sop::RCP as u32) << 6) | (0xF << 12) // vector_write_mask | (3u32 << 16) // vector_dest | (7u32 << 24); // scalar_dest let alu = decode_alu([0, 0, w2]); assert_eq!(alu.vector_opcode, vop::ADD); assert_eq!(alu.scalar_opcode, sop::RCP); assert_eq!(alu.vector_dest, 3); assert_eq!(alu.scalar_dest, 7); assert_eq!(alu.vector_write_mask, 0xF); } }