diff --git a/crates/xenia-gpu/src/translator.rs b/crates/xenia-gpu/src/translator.rs index 843c219..1bb9902 100644 --- a/crates/xenia-gpu/src/translator.rs +++ b/crates/xenia-gpu/src/translator.rs @@ -485,6 +485,22 @@ fn src_operand(src_byte: u8, is_temp: bool, swizzle: u8, negate: bool) -> String } fn vector_expr(op: u8, a: &str, b: &str, c: &str) -> Option { + // Semantics mirror the runtime interpreter's `exec_vector_op` + // (`shaders/xenos_interp.wgsl`), which in turn mirrors canary's + // `AluVectorOpcode` (ucode.h:1001+). Side-effecting ops (kill*, setp_push) + // need per-invocation state the AOT emitter doesn't track yet → still + // `None` (interpreter fallback). + let cmp4 = |op: &str| { + format!( + "vec4(select(0.0,1.0,{a}.x{op}{b}.x), select(0.0,1.0,{a}.y{op}{b}.y), select(0.0,1.0,{a}.z{op}{b}.z), select(0.0,1.0,{a}.w{op}{b}.w))" + ) + }; + // CND* : per-lane select(c, b, a 0). + let cnd4 = |op: &str| { + format!( + "vec4(select({c}.x,{b}.x,{a}.x{op}0.0), select({c}.y,{b}.y,{a}.y{op}0.0), select({c}.z,{b}.z,{a}.z{op}0.0), select({c}.w,{b}.w,{a}.w{op}0.0))" + ) + }; let s = match op { vop::ADD => format!("({a} + {b})"), vop::MUL => format!("({a} * {b})"), @@ -493,37 +509,63 @@ fn vector_expr(op: u8, a: &str, b: &str, c: &str) -> Option { vop::MAD => format!("({a} * {b} + {c})"), vop::DOT4 => format!("vec4(dot({a}, {b}))"), vop::DOT3 => format!("vec4(dot({a}.xyz, {b}.xyz))"), - vop::DOT2_ADD => format!( - "vec4({a}.x * {b}.x + {a}.y * {b}.y + {c}.x)" - ), - vop::SEQ => format!( - "vec4(select(0.0,1.0,{a}.x=={b}.x), select(0.0,1.0,{a}.y=={b}.y), select(0.0,1.0,{a}.z=={b}.z), select(0.0,1.0,{a}.w=={b}.w))" - ), - vop::SGT => format!( - "vec4(select(0.0,1.0,{a}.x>{b}.x), select(0.0,1.0,{a}.y>{b}.y), select(0.0,1.0,{a}.z>{b}.z), select(0.0,1.0,{a}.w>{b}.w))" - ), - vop::SGE => format!( - "vec4(select(0.0,1.0,{a}.x>={b}.x), select(0.0,1.0,{a}.y>={b}.y), select(0.0,1.0,{a}.z>={b}.z), select(0.0,1.0,{a}.w>={b}.w))" - ), - vop::SNE => format!( - "vec4(select(0.0,1.0,{a}.x!={b}.x), select(0.0,1.0,{a}.y!={b}.y), select(0.0,1.0,{a}.z!={b}.z), select(0.0,1.0,{a}.w!={b}.w))" - ), + vop::DOT2_ADD => format!("vec4({a}.x * {b}.x + {a}.y * {b}.y + {c}.x)"), + vop::SEQ => cmp4("=="), + vop::SGT => cmp4(">"), + vop::SGE => cmp4(">="), + vop::SNE => cmp4("!="), + vop::CND_EQ => cnd4("=="), + vop::CND_GE => cnd4(">="), + vop::CND_GT => cnd4(">"), vop::FRC => format!("fract({a})"), + vop::TRUNC => format!("trunc({a})"), vop::FLOOR => format!("floor({a})"), + vop::MAX4 => format!("vec4(max(max({a}.x,{a}.y), max({a}.z,{a}.w)))"), + // dst = (1, src0.y*src1.y, src0.z, src1.w) (canary kDst) + vop::DST => format!("vec4(1.0, {a}.y * {b}.y, {a}.z, {b}.w)"), _ => return None, }; Some(s) } fn scalar_expr(op: u8, a: &str, b: &str, prev: &str) -> Option { + // Semantics mirror the runtime interpreter's `exec_scalar_op` + // (`shaders/xenos_interp.wgsl`) / canary's `AluScalarOpcode` + // (ucode.h:1001+). Side-effecting ops (setp*, kills*, maxas*) need + // per-invocation predicate/kill/address state the AOT emitter doesn't + // track yet → still `None` (interpreter fallback). let s = match op { sop::ADDS => format!("({a} + {b})"), sop::ADDS_PREV => format!("({a} + {prev})"), sop::MULS => format!("({a} * {b})"), sop::MULS_PREV => format!("({a} * {prev})"), + // muls_prev2 / LIT emulation (canary kMulsPrev2): guard against + // -FLT_MAX / non-finite ps & b, and b <= 0. + sop::MULS_PREV2 => format!( + "select({a} * {prev}, -3.4028235e38, {prev} == -3.4028235e38 || !(\ + {prev} == {prev}) || abs({prev}) > 3.4028235e38 || !({b} == {b}) || \ + abs({b}) > 3.4028235e38 || {b} <= 0.0)" + ), sop::MAXS => format!("max({a}, {b})"), sop::MINS => format!("min({a}, {b})"), - sop::RCP => format!("xe_rcp({a})"), + sop::SEQS => format!("select(0.0, 1.0, {a} == 0.0)"), + sop::SGTS => format!("select(0.0, 1.0, {a} > 0.0)"), + sop::SGES => format!("select(0.0, 1.0, {a} >= 0.0)"), + sop::SNES => format!("select(0.0, 1.0, {a} != 0.0)"), + sop::FRCS => format!("fract({a})"), + sop::TRUNCS => format!("trunc({a})"), + sop::FLOORS => format!("floor({a})"), + sop::SUBS => format!("({a} - {b})"), + sop::SUBS_PREV => format!("({a} - {prev})"), + sop::EXP => format!("exp2({a})"), + sop::LOG | sop::LOGC => format!("select(log2({a}), 0.0, {a} == 1.0)"), + sop::RCP | sop::RCPC | sop::RCPF => format!("xe_rcp({a})"), + sop::RSQ | sop::RSQC | sop::RSQF => { + format!("select(0.0, inverseSqrt({a}), {a} > 0.0)") + } + sop::SQRT => format!("select(0.0, sqrt({a}), {a} >= 0.0)"), + sop::SIN => format!("sin({a})"), + sop::COS => format!("cos({a})"), sop::RETAIN_PREV => prev.to_string(), _ => return None, }; diff --git a/crates/xenia-gpu/src/ucode/control_flow.rs b/crates/xenia-gpu/src/ucode/control_flow.rs index f8e60dd..966609a 100644 --- a/crates/xenia-gpu/src/ucode/control_flow.rs +++ b/crates/xenia-gpu/src/ucode/control_flow.rs @@ -96,10 +96,26 @@ pub fn decode_cf_pair(word0: u32, word1: u32, word2: u32) -> (ControlFlowInstruc fn decode_single(payload: u64) -> ControlFlowInstruction { // Top 4 bits of the 48-bit payload. let opcode = ((payload >> 44) & 0xF) as u8; - // Predicate bit + condition live at the 28..30 range for exec/jmp. Rough - // extraction — good enough for the interpreter, which logs unknowns. - let predicated = ((payload >> 28) & 1) != 0; - let predicate_condition = ((payload >> 29) & 1) != 0; + + // GPUBUG-103 (iterate-3P): clause-level predication is determined by the + // *opcode*, not by free bits. The 48-bit CF payload is word0 = bits 0..31, + // word1 = bits 32..47. Per canary `ucode.h`: + // * `ControlFlowExecInstruction` (kExec/kExecEnd, opcodes 1/2): NOT + // predicate-gated — it runs unconditionally. + // * `ControlFlowCondExecInstruction` (kCondExec/kCondExecEnd, 3/4): gated + // by a *bool constant*, `condition_` at word1 bit 10 = payload bit 42. + // We don't model bool-constant gating in the WGSL paths (the bool is + // virtually always set for these), so treat as unconditional. + // * `ControlFlowCondExecPredInstruction` (kCondExecPred/...End/Clean..., + // 5/6/13/14): gated by the *predicate register*; `condition_` at word1 + // bit 9 = payload bit 41. + // The prior code read bits 28/29 (which fall inside `sequence_`/`vc_hi_`) + // and stamped `predicated=true` on plenty of plain `kExec` clauses — which + // made the P7 translator reject EVERY splash VS as `cf_cond`, forcing the + // interpreter (placeholder geometry) for all draws. + let is_pred_gated = matches!(opcode, 5 | 6 | 13 | 14); + let predicated = is_pred_gated; + let predicate_condition = is_pred_gated && ((payload >> 41) & 1) != 0; // Xenos `ControlFlowOpcode` (canary `ucode.h:86-160`): // 0 kNop, 1 kExec, 2 kExecEnd, 3 kCondExec, 4 kCondExecEnd,