//! Host-side static analysis over a [`ParsedShader`], emitted once per unique //! shader blob. Produces the observability the plan's P3b/P3c sections call //! for (`gpu.shader.interpret{stage,kind}` + `gpu.shader.reject{reason}`), so //! the HUD can show when a game is reaching ops the WGSL interpreter falls //! back on. //! //! Analysis is intentionally cheap: it scans each exec clause's instruction //! triples, classifies them as ALU / vertex-fetch / texture-fetch using the //! owning clause's sequence bitmap, and bumps counters accordingly. No GPU //! readback is required — `reject` reasons are inferred from opcode values //! alone. use metrics::counter; use crate::ucode::alu::{decode_alu, sop, vop}; use crate::ucode::control_flow::ControlFlowInstruction; use crate::ucode::fetch::{FetchInstruction, decode_fetch}; use crate::ucode::ParsedShader; /// Walk `parsed` once and emit `gpu.shader.interpret` + `gpu.shader.reject` /// counters. `stage` should be `"vs"` or `"ps"`. pub fn emit_for(parsed: &ParsedShader, stage: &'static str) { let mut alu_count: u64 = 0; let mut vfetch_count: u64 = 0; let mut tfetch_count: u64 = 0; let mut rejects: Vec<(&'static str, u64)> = Vec::new(); let mut features: Vec<&'static str> = Vec::new(); for clause in &parsed.cf { match clause { ControlFlowInstruction::Exec { address, count, sequence, .. } => { for i in 0..(*count as usize) { let triple_idx = *address as usize + i; let base = triple_idx * 3; if base + 2 >= parsed.instructions.len() { break; } let words = [ parsed.instructions[base], parsed.instructions[base + 1], parsed.instructions[base + 2], ]; // sequence bit layout: 2 bits per triple, hi bit = is-fetch. let is_fetch = ((sequence >> (i * 2 + 1)) & 1) != 0; if is_fetch { match decode_fetch(words) { FetchInstruction::Vertex(_) => vfetch_count += 1, FetchInstruction::Texture(tf) => { tfetch_count += 1; match tf.dimension { 0 => mark_feature(&mut features, "tfetch_1d"), 2 => mark_feature(&mut features, "tfetch_3d"), 3 => mark_feature(&mut features, "tfetch_cube"), _ => {} } if tf.dimension != 1 { bump(&mut rejects, "texfetch_dimension"); } } FetchInstruction::Unknown { .. } => { bump(&mut rejects, "fetch_unknown"); } } } else { alu_count += 1; let alu = decode_alu(words); if !vec_op_supported(alu.vector_opcode) { bump(&mut rejects, "alu_vec_unsupported"); } if !scl_op_supported(alu.scalar_opcode) { bump(&mut rejects, "alu_scl_unsupported"); } // Feature-of-interest detection for future phases. // Transcendentals + kill + setp + cube/max4 are the // high-value signals: they tell us which of the // deferred capabilities Sylpheed actually exercises. match alu.vector_opcode { v if v == vop::CUBE => mark_feature(&mut features, "vec_cube"), v if v == vop::MAX4 => mark_feature(&mut features, "vec_max4"), v if v == vop::KILL_EQ || v == vop::KILL_GT || v == vop::KILL_GE || v == vop::KILL_NE => { mark_feature(&mut features, "vec_kill"); } v if v == vop::CND_EQ || v == vop::CND_GE || v == vop::CND_GT => { mark_feature(&mut features, "vec_cnd"); } _ => {} } match alu.scalar_opcode { s if s == sop::EXP || s == sop::LOG || s == sop::LOGC || s == sop::SIN || s == sop::COS => { mark_feature(&mut features, "scl_transcendental"); } s if s == sop::RSQ || s == sop::RSQC || s == sop::RSQF || s == sop::SQRT => { mark_feature(&mut features, "scl_sqrt_family"); } s if s == sop::SETP_EQ || s == sop::SETP_NE || s == sop::SETP_GT || s == sop::SETP_GE || s == sop::SETP_INV || s == sop::SETP_POP || s == sop::SETP_CLR || s == sop::SETP_RSTR => { mark_feature(&mut features, "scl_setp"); } s if s == sop::KILLS_EQ || s == sop::KILLS_GT || s == sop::KILLS_GE || s == sop::KILLS_NE || s == sop::KILLS_ONE => { mark_feature(&mut features, "scl_kills"); } _ => {} } if alu.predicated { mark_feature(&mut features, "alu_predicated"); } } } } ControlFlowInstruction::LoopStart { .. } | ControlFlowInstruction::LoopEnd { .. } => { mark_feature(&mut features, "cf_loop"); bump(&mut rejects, "cf_loop"); } ControlFlowInstruction::CondJmp { .. } => { mark_feature(&mut features, "cf_cond_jmp"); bump(&mut rejects, "cf_cond_jmp"); } ControlFlowInstruction::CondCall { .. } | ControlFlowInstruction::Return => { mark_feature(&mut features, "cf_call_return"); bump(&mut rejects, "cf_call_return"); } ControlFlowInstruction::Unknown { .. } => { bump(&mut rejects, "cf_unknown"); } _ => {} } } counter!("gpu.shader.interpret", "stage" => stage, "kind" => "alu") .increment(alu_count); counter!("gpu.shader.interpret", "stage" => stage, "kind" => "vfetch") .increment(vfetch_count); counter!("gpu.shader.interpret", "stage" => stage, "kind" => "tfetch") .increment(tfetch_count); for (reason, n) in rejects { counter!("gpu.shader.reject", "stage" => stage, "reason" => reason).increment(n); } for name in features { counter!("gpu.feature.used", "stage" => stage, "name" => name).increment(1); } } fn mark_feature(buf: &mut Vec<&'static str>, name: &'static str) { if !buf.contains(&name) { buf.push(name); } } fn bump(buf: &mut Vec<(&'static str, u64)>, reason: &'static str) { for entry in buf.iter_mut() { if entry.0 == reason { entry.1 += 1; return; } } buf.push((reason, 1)); } fn vec_op_supported(op: u8) -> bool { matches!( op, vop::ADD | vop::MUL | vop::MAX | vop::MIN | vop::SEQ | vop::SGT | vop::SGE | vop::SNE | vop::FRC | vop::TRUNC | vop::FLOOR | vop::MAD | vop::CND_EQ | vop::CND_GE | vop::CND_GT | vop::DOT4 | vop::DOT3 | vop::DOT2_ADD | vop::MAX4 | vop::KILL_EQ | vop::KILL_GT | vop::KILL_GE | vop::KILL_NE | vop::DST ) } fn scl_op_supported(op: u8) -> bool { matches!( op, sop::ADDS | sop::ADDS_PREV | sop::MULS | sop::MULS_PREV | sop::MAXS | sop::MINS | sop::SEQS | sop::SGTS | sop::SGES | sop::SNES | sop::FRCS | sop::TRUNCS | sop::FLOORS | sop::EXP | sop::LOG | sop::LOGC | sop::RCP | sop::RCPC | sop::RCPF | sop::RSQ | sop::RSQC | sop::RSQF | sop::SQRT | sop::SUBS | sop::SUBS_PREV | sop::SETP_EQ | sop::SETP_NE | sop::SETP_GT | sop::SETP_GE | sop::SETP_INV | sop::SETP_POP | sop::SETP_CLR | sop::SETP_RSTR | sop::KILLS_EQ | sop::KILLS_GT | sop::KILLS_GE | sop::KILLS_NE | sop::KILLS_ONE | sop::SIN | sop::COS | sop::RETAIN_PREV ) } #[cfg(test)] mod tests { use super::*; use crate::ucode::alu::{sop, vop}; use crate::ucode::control_flow::ControlFlowInstruction; /// Build a minimal `ParsedShader` with one `Exec` clause containing /// `count` ALU triples and assert the `alu` counter path works. #[test] fn emit_for_runs_on_synthetic_shader() { let alu_w2 = (vop::ADD as u32) | ((sop::ADDS as u32) << 6) | (0xF << 12); let shader = ParsedShader { cf: vec![ ControlFlowInstruction::Exec { address: 0, count: 2, sequence: 0, // all ALU (no is-fetch bits) is_end: false, predicated: false, predicate_condition: false, }, ControlFlowInstruction::Exit, ], instructions: vec![0, 0, alu_w2, 0, 0, alu_w2], }; // Just smoke: doesn't panic. Counters are validated via metrics // exporters elsewhere; we only assert this doesn't throw on a // well-formed ParsedShader. emit_for(&shader, "vs"); } /// P8: a shader containing `LoopStart` should mark `cf_loop` as used /// so the HUD can surface which deferred feature a game triggers. #[test] fn feature_detection_flags_loops_and_kills() { let kill_alu_w2 = (vop::KILL_EQ as u32) | ((sop::RETAIN_PREV as u32) << 6) | (0xF << 12); let shader = ParsedShader { cf: vec![ ControlFlowInstruction::LoopStart { address: 0, loop_id: 0, }, ControlFlowInstruction::Exec { address: 0, count: 1, sequence: 0, is_end: true, predicated: false, predicate_condition: false, }, ], instructions: vec![0, 0, kill_alu_w2], }; // Smoke: emits cleanly. emit_for(&shader, "ps"); } #[test] fn unsupported_ops_classified_as_rejects() { // Opcode 63 is outside our supported sets for both pipes. let alu_w2 = 63u32 | (63u32 << 6) | (0xF << 12); let shader = ParsedShader { cf: vec![ ControlFlowInstruction::Exec { address: 0, count: 1, sequence: 0, is_end: true, predicated: false, predicate_condition: false, }, ], instructions: vec![0, 0, alu_w2], }; // Again: smoke — but also confirm our static tables reject op 63. assert!(!vec_op_supported(63)); assert!(!scl_op_supported(63)); emit_for(&shader, "ps"); } }