From 1b9918450fda0009277464efe1d06b769f26542c Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Thu, 18 Jun 2026 17:12:16 +0200 Subject: [PATCH] [iterate-3T] Real UV interpolation + per-draw textures: shader/UV/bind chain complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build the full texture-sampling chain for the publisher splash so the textured logo CAN sample real artwork at the guest's real UVs. Measured with an env-gated frontbuffer readback (since removed): the chain is correct end-to-end, but the sampled K8888 1280x768 texture is ALL-ZERO in the UI window's reachable boot range — the artwork is produced by an EDRAM resolve (RT->texture copy) that ours does not yet perform (resolves=0). So this lands the correct shader/UV/bind work and isolates the remaining blocker to the resolve gap, not the shader path. Translator (xenia-gpu/src/translator.rs), all UI-translator-only: - Real Xenos export-index model (replaces the AllocKind heuristic that collapsed every VS export to one color slot and DROPPED the texcoord). When export_data is set the 6-bit vector_dest IS the export index: VS 62=oPos, 0..15=interps; PS 0=RT0. The logo VS exports oPos(62), interp0(color), interp1(UV) distinctly. - Real interpolator passthrough: VsOut carries 8 interpolator locations; the PS seeds r[i] = in.interp[i] (Xenos PS-input-GPR mapping) so tfetch samples at the real interpolated texcoord (r1) instead of (0,0). - vfetch format 6 (k_16_16) packed-16 unpack + per-attribute dword offset, so the 3 vfetches sharing one fetch-constant (pos/UV/color in a 6-dword vertex) read the right attribute. Previously rejected the whole logo VS to the interpreter. - QuadList/RectangleList host->guest vertex-index remap in the VS (replay is non-indexed): QuadList 6 host verts -> guest [0,1,2,0,2,3] (full quad). fetch.rs: decode vfetch `offset` (dword2[8:15], dwords), `is_signed`, `is_normalized`. Per-draw textures: DrawCapture carries the decoded texture(s) (keyed off the active PS's tfetch slots, attached in gpu_system after decode); render.rs::dispatch_xenos_captures uploads + binds each capture's texture via the host texture cache before its draw, instead of one last-draw primary_texture. Determinism: all changes feed only the UI translator/capture path; frame_captures is None headless. `check -n50m --gpu-inline --stable-digest --expect` byte- identical (exit 0). 681 tests pass (+2 regression: logo VS now translates with interpolators; PS seeds interps into registers). Temp readback/dump probes removed. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/xenia-gpu/src/draw_capture.rs | 8 + crates/xenia-gpu/src/gpu_system.rs | 12 ++ crates/xenia-gpu/src/translator.rs | 249 ++++++++++++++++++++++----- crates/xenia-gpu/src/ucode/fetch.rs | 21 ++- crates/xenia-ui/src/render.rs | 33 ++++ 5 files changed, 282 insertions(+), 41 deletions(-) diff --git a/crates/xenia-gpu/src/draw_capture.rs b/crates/xenia-gpu/src/draw_capture.rs index de954fb..18938f2 100644 --- a/crates/xenia-gpu/src/draw_capture.rs +++ b/crates/xenia-gpu/src/draw_capture.rs @@ -66,6 +66,13 @@ pub struct DrawCapture { /// already carries the render-target → wgpu Y-flip (negated). pub ndc_scale: [f32; 2], pub ndc_offset: [f32; 2], + /// iterate-3T: the decoded texture(s) this draw's active pixel shader + /// samples, keyed off its real `tfetch` fetch-constant slots (the 3M + /// decoder makes these decode). The UI uploads + binds the FIRST entry + /// per-draw so the textured logo samples the real artwork instead of the + /// magenta stub. Empty for flat (no-tfetch) draws. Populated by + /// `gpu_system` after decode (left empty by `build`). + pub textures: Vec<(crate::texture_cache::TextureKey, Vec)>, } /// iterate-3S: compute the guest→host NDC XY transform for a draw, mirroring @@ -287,5 +294,6 @@ pub fn build( has_real_vertices: has_real, ndc_scale, ndc_offset, + textures: Vec::new(), } } diff --git a/crates/xenia-gpu/src/gpu_system.rs b/crates/xenia-gpu/src/gpu_system.rs index 9ab5768..86a7f2c 100644 --- a/crates/xenia-gpu/src/gpu_system.rs +++ b/crates/xenia-gpu/src/gpu_system.rs @@ -1413,6 +1413,18 @@ impl GpuSystem { } } } + + // iterate-3T: attach this draw's decoded textures to the just- + // captured draw so the UI can bind the real artwork per-draw + // (keyed off the active PS's real tfetch slots) instead of a + // single last-draw `primary_texture`. UI-only (`frame_captures` + // is `None` headless); does not touch the deterministic core. + if !self.last_draw_textures.is_empty() + && let Some(caps) = self.frame_captures.as_mut() + && let Some(last) = caps.last_mut() + { + last.textures = self.last_draw_textures.clone(); + } } pm4::PM4_SET_CONSTANT | pm4::PM4_SET_SHADER_CONSTANTS => { // payload[0] = offset_type — bits[10:0] index, bits[23:16] type diff --git a/crates/xenia-gpu/src/translator.rs b/crates/xenia-gpu/src/translator.rs index 03067df..63a7ad4 100644 --- a/crates/xenia-gpu/src/translator.rs +++ b/crates/xenia-gpu/src/translator.rs @@ -115,9 +115,21 @@ struct XenosConstants { @group(1) @binding(0) var xenos_tex : texture_2d; @group(1) @binding(1) var xenos_samp : sampler; +// iterate-3T: real interpolator passthrough. The Xenos VS exports up to 16 +// interpolators (export index 0..15); the PS reads interpolator i from its +// general register r[i]. We carry 8 interpolator vec4s (covers Sylpheed's +// splash: r0=color, r1=texcoord). `color` retained as an alias of interp0 so +// older single-color paths keep working. struct VsOut { @builtin(position) position: vec4, - @location(0) color: vec4, + @location(0) interp0: vec4, + @location(1) interp1: vec4, + @location(2) interp2: vec4, + @location(3) interp3: vec4, + @location(4) interp4: vec4, + @location(5) interp5: vec4, + @location(6) interp6: vec4, + @location(7) interp7: vec4, }; struct FsOut { @@ -200,19 +212,56 @@ impl EmitCtx { self.push("var ps: f32 = 0.0;"); match self.stage { Stage::Vertex => { + // iterate-3T: host→guest vertex-index remap for primitives the + // replay draws non-indexed as a flat triangle list. wgpu has no + // QuadList/RectangleList topology, so the host issues 6 vertices + // per quad/rect and we map them back to the guest's 4/3 source + // vertices here (mirrors `primitive.rs` index rewrite, but in the + // VS since the replay path is non-indexed): + // QuadList(13): 6 host verts → guest [0,1,2, 0,2,3] + // RectangleList(8): drawn as one triangle [0,1,2] (the 4th + // corner needs cross-vertex synthesis — TODO), so host + // indices >=3 fold onto the existing triangle. + // Other prims pass through unchanged. + self.push("var gvidx: u32 = vidx;"); + self.push("if (draw_ctx.prim_kind == 13u) {"); + self.indent += 1; + self.push("let q = vidx % 6u; let qbase = (vidx / 6u) * 4u;"); + self.push("var lut = array(0u, 1u, 2u, 0u, 2u, 3u);"); + self.push("gvidx = qbase + lut[q];"); + self.indent -= 1; + self.push("} else if (draw_ctx.prim_kind == 8u) {"); + self.indent += 1; + self.push("let t = vidx % 3u; let rbase = (vidx / 3u) * 3u;"); + self.push("gvidx = rbase + t;"); + self.indent -= 1; + self.push("}"); // Seed r0 with vertex index for simple shaders that read it. - self.push("r[0] = vec4(f32(vidx), 0.0, 0.0, 1.0);"); - // Synthetic export slots — match the interpreter's layout so - // the fallback path and translator path produce the same - // visual output on shaders both support. + self.push("r[0] = vec4(f32(gvidx), 0.0, 0.0, 1.0);"); + // iterate-3T: real export model. Xenos export index 62 = oPos; + // indices 0..15 = interpolators. We hold position + 8 + // interpolator vec4s; `emit_export` writes the right slot keyed + // on the export index. Seed interp0 to white so a VS that only + // exports position still yields a visible (non-zero) color. self.push("var opos: vec4 = vec4(0.0, 0.0, 0.0, 1.0);"); - self.push("var ocolor: vec4 = vec4(1.0, 1.0, 1.0, 1.0);"); + self.push("var ointerp: array, 8>;"); + self.push("for (var i = 0u; i < 8u; i = i + 1u) { ointerp[i] = vec4(0.0, 0.0, 0.0, 1.0); }"); + self.push("ointerp[0] = vec4(1.0, 1.0, 1.0, 1.0);"); } Stage::Pixel => { - // Seed r0.xy with interpolated color lane so trivial shaders - // that read r0 still produce something. - self.push("r[0] = in.color;"); - self.push("var ocolor0: vec4 = in.color;"); + // iterate-3T: the PS reads interpolator i from general register + // r[i] (Xenos PS input GPR mapping). Seed r0..r7 from the VS's + // interpolators so e.g. the logo PS's texcoord (r1) and color + // (r0) arrive correctly; tfetch then samples at the real UV. + self.push("r[0] = in.interp0;"); + self.push("r[1] = in.interp1;"); + self.push("r[2] = in.interp2;"); + self.push("r[3] = in.interp3;"); + self.push("r[4] = in.interp4;"); + self.push("r[5] = in.interp5;"); + self.push("r[6] = in.interp6;"); + self.push("r[7] = in.interp7;"); + self.push("var ocolor0: vec4 = in.interp0;"); } } @@ -269,7 +318,14 @@ impl EmitCtx { self.indent -= 1; self.push("}"); self.push("out.position = opos;"); - self.push("out.color = ocolor;"); + self.push("out.interp0 = ointerp[0];"); + self.push("out.interp1 = ointerp[1];"); + self.push("out.interp2 = ointerp[2];"); + self.push("out.interp3 = ointerp[3];"); + self.push("out.interp4 = ointerp[4];"); + self.push("out.interp5 = ointerp[5];"); + self.push("out.interp6 = ointerp[6];"); + self.push("out.interp7 = ointerp[7];"); self.push("return out;"); } Stage::Pixel => { @@ -398,20 +454,35 @@ impl EmitCtx { } fn emit_export(&mut self, dst_reg: u8, alloc: AllocKind, expr: &str, mask: u8) { - // Xenos's export "register" indexing within an alloc range is - // normally (alloc_base + offset). Since our CF stream doesn't - // carry per-export slot offsets cleanly, use `alloc` to pick the - // target. - let lhs = match (self.stage, alloc) { - (Stage::Vertex, AllocKind::Position) => "opos", - (Stage::Vertex, AllocKind::Interpolators) => "ocolor", - (Stage::Vertex, AllocKind::Colors) => "ocolor", - (Stage::Vertex, _) => "ocolor", // fall through — any other alloc - (Stage::Pixel, AllocKind::Colors) => "ocolor0", - (Stage::Pixel, _) => "ocolor0", - }; - let _ = dst_reg; // per-slot export indexing reserved for a richer v2 - self.emit_masked_write(lhs, expr, mask); + // iterate-3T: real Xenos export-index model (replaces the `AllocKind` + // heuristic, which collapsed every VS export to a single color slot and + // dropped the texcoord interpolator → tfetch sampled (0,0) → flat). + // When `export_data` is set the 6-bit vector_dest IS the export index: + // VS: 62 = oPos, 63 = oPointSize/edge (ignored), 0..15 = interpolators. + // PS: 0..3 = color render targets (we honor RT0). + let _ = alloc; + match self.stage { + Stage::Vertex => { + let lhs = if dst_reg == 62 { + "opos".to_string() + } else if dst_reg <= 15 { + // Clamp to the 8 interpolator slots we carry; higher slots + // are unused by Sylpheed's splash. + let i = (dst_reg as usize).min(7); + format!("ointerp[{i}u]") + } else { + // oPointSize (63) / unknown export slot — discard. + return; + }; + self.emit_masked_write(&lhs, expr, mask); + } + Stage::Pixel => { + // Only RT0 (export index 0) is wired to the single host target. + if dst_reg == 0 { + self.emit_masked_write("ocolor0", expr, mask); + } + } + } } fn emit_vfetch(&mut self, vf: &crate::ucode::fetch::VertexFetch) -> Result<(), &'static str> { @@ -426,31 +497,70 @@ impl EmitCtx { // GPUBUG-102: the fetch constant holds the endian field in dword_1's // low 2 bits; Xbox 360 vertex data is big-endian, so `gpu_swap` undoes // it per component. - let (comps, stride): (u32, u32) = match vf.format { - 36 => (1, 1), // k_32_FLOAT - 37 => (2, 2), // k_32_32_FLOAT - 57 => (3, 3), // k_32_32_32_FLOAT - 38 => (4, 4), // k_32_32_32_32_FLOAT + // (comps, dwords_read) per format. Float formats are 1 dword/component; + // iterate-3T adds the packed-16 `k_16_16` (format 6) used for the logo + // UV interpolator — 2 components packed into ONE dword. + #[derive(PartialEq)] + enum Pack { + Float, // N f32 lanes, N dwords + Norm16x2, // 2× u16 normalized into [0,1], 1 dword (k_16_16) + } + let (comps, dwords_read, pack): (u32, u32, Pack) = match vf.format { + 36 => (1, 1, Pack::Float), // k_32_FLOAT + 37 => (2, 2, Pack::Float), // k_32_32_FLOAT + 57 => (3, 3, Pack::Float), // k_32_32_32_FLOAT + 38 => (4, 4, Pack::Float), // k_32_32_32_32_FLOAT + 6 => (2, 1, Pack::Norm16x2), // k_16_16 (UV) _ => return Err(reject::VFETCH_FMT), }; // A stride of 0 in the instruction means "use the fetch-constant - // stride"; fall back to the tightly packed component count. - let stride = if vf.stride != 0 { vf.stride as u32 } else { stride }; + // stride"; fall back to the tightly packed dword count. + let stride = if vf.stride != 0 { vf.stride as u32 } else { dwords_read }; + // iterate-3T: per-attribute dword offset within the vertex (vfetches + // sharing one fetch constant read different attributes). + let attr_off = vf.offset; let fetch_const = (vf.raw[0] >> 5) & 0x1F; let src_reg = vf.src_register & 0x7F; let dst_reg = vf.dest_register & 0x7F; + // is_signed selects [-1,1] vs [0,1] for normalized integer formats. + let signed = vf.is_signed; // Build the per-component reads; unread lanes default to 0/0/0/1 so an // XY-only position keeps W=1 (and Z=0). let lane = |i: u32| -> String { - if i < comps { - format!("bitcast(gpu_swap(vertex_buffer[addr + {i}u], endian))") - } else if i == 3 { - "1.0".to_string() - } else { - "0.0".to_string() + match pack { + Pack::Float => { + if i < comps { + format!("bitcast(gpu_swap(vertex_buffer[addr + {i}u], endian))") + } else if i == 3 { + "1.0".to_string() + } else { + "0.0".to_string() + } + } + Pack::Norm16x2 => { + // One dword holds [u16 lo | u16 hi] after the endian swap. + // Component 0 = low halfword, component 1 = high halfword. + if i == 0 { + if signed { + "(max(f32(i32(w16 << 16u) >> 16u) / 32767.0, -1.0))".to_string() + } else { + "(f32(w16 & 0xFFFFu) / 65535.0)".to_string() + } + } else if i == 1 { + if signed { + "(max(f32(i32(w16) >> 16u) / 32767.0, -1.0))".to_string() + } else { + "(f32(w16 >> 16u) / 65535.0)".to_string() + } + } else if i == 3 { + "1.0".to_string() + } else { + "0.0".to_string() + } + } } }; - let read_bound = comps - 1; + let read_bound = dwords_read - 1; // GPUBUG-108 (iterate-3S): for the captured-geometry path the CPU // uploads a vertex window that begins EXACTLY at the fetch base, so the // base within `vertex_buffer` is 0 and vertex i sits at `i * stride`. @@ -464,6 +574,13 @@ impl EmitCtx { // real window is present (`vertex_base_dwords != 0`); only the // synthetic/no-window fallback consults the uniform fetch constant. let endian_term = format!("xenos_consts.fetch[{}u] & 0x3u", fetch_const * 2 + 1); + // For packed-16 we read one dword into `w16` (post endian-swap) and the + // `lane()` exprs above unpack the two halfwords. + let w16_decl = if pack == Pack::Norm16x2 { + "let w16 = gpu_swap(vertex_buffer[addr], endian); " + } else { + "" + }; self.push(&format!( "{{ let endian = {endian_term}; \ let vidx = u32(r[{src_reg}u].x); \ @@ -471,9 +588,10 @@ impl EmitCtx { if (draw_ctx.vertex_base_dwords == 0u) {{ \ base = (xenos_consts.fetch[{fc0_idx}u] & 0xFFFFFFFCu) >> 2u; \ }} \ - let addr = base + vidx * {stride}u; \ + let addr = base + vidx * {stride}u + {attr_off}u; \ let n = arrayLength(&vertex_buffer); \ if (addr + {read_bound}u < n) {{ \ + {w16_decl}\ r[{dst_reg}u] = vec4({l0}, {l1}, {l2}, {l3}); \ }} }}", fc0_idx = fetch_const * 2, @@ -626,6 +744,54 @@ mod tests { use crate::ucode::alu::{sop, vop}; use crate::ucode::control_flow::ControlFlowInstruction; + /// iterate-3T: the real publisher-logo VS (`vs_key 0x03b7b020`, captured + /// from the live boot) must now TRANSLATE — pre-3T it rejected with + /// `vfetch_fmt` because (a) the `k_16_16` color stream (format 6) was + /// unsupported and (b) the export-index model (62=oPos, 0/1=interpolators) + /// was a wrong AllocKind heuristic. This locks in the format-6 + per- + /// attribute-offset + export-index work so the UV interpolator reaches the + /// pixel shader (texcoord in r1) instead of collapsing to a single color. + #[test] + fn real_logo_vs_translates_with_interpolators() { + let ucode: [u32; 30] = [ + 0x70153003, 0x00001200, 0xC2000000, 0x00001006, 0x00001200, 0xC4000000, + 0x00002007, 0x00002200, 0x00000000, 0x2DF82000, 0x00393A88, 0x00000006, + 0x05F81000, 0x4006060A, 0x00000306, 0x05F80000, 0x40253FC8, 0x00000406, + 0xC80F803E, 0x00000000, 0xC2020200, 0xC8038001, 0x00B0B000, 0xC2000000, + 0xC80F8000, 0x00000000, 0xC2010100, 0x00000000, 0x00000000, 0x00000000, + ]; + let p = crate::ucode::parse_shader(&ucode); + let body = match translate(&p, Stage::Vertex) { + Translation::Ok(b) => b, + Translation::Reject(r) => panic!("logo VS rejected: {r}"), + }; + // Position must come from the export-index-62 path (`opos`) and the + // UV/color interpolators must be exported as distinct slots. + assert!(body.contains("opos ="), "no position export: {body}"); + assert!(body.contains("ointerp[0u]"), "no interp0 export: {body}"); + assert!(body.contains("ointerp[1u]"), "no interp1 export: {body}"); + // The k_16_16 attribute must unpack via the packed-16 helper. + assert!(body.contains("w16"), "no packed-16 unpack for k_16_16: {body}"); + } + + /// The logo pixel shader (`ps_key 0x03b79001`) samples its texture at the + /// interpolated texcoord register r1 — which the PS now seeds from the VS + /// interpolator `in.interp1` (Xenos PS-input-GPR mapping). Verifies the UV + /// chain so tfetch samples the real UV instead of (0,0). + #[test] + fn ps_seeds_interpolators_into_registers() { + // A trivial PS that just exports — we only assert the preamble wiring. + let p = crate::ucode::ParsedShader { + cf: vec![ControlFlowInstruction::Exit], + instructions: vec![], + }; + let body = match translate(&p, Stage::Pixel) { + Translation::Ok(b) => b, + Translation::Reject(r) => panic!("trivial PS rejected: {r}"), + }; + assert!(body.contains("r[1] = in.interp1;"), "PS must seed r1 from interp1: {body}"); + } + fn synthetic_trivial_shader() -> ParsedShader { // Single Exec clause: ALU add r0 = r0 + r0; scalar_op = RETAIN_PREV // with full write-mask on vector, zero on scalar. Alloc(Position) @@ -799,6 +965,9 @@ mod tests { dest_write_mask: 0xF, format: 38, // k_32_32_32_32_FLOAT (4 floats) stride: 4, + offset: 0, + is_signed: false, + is_normalized: true, raw: [0; 3], }; ctx.emit_vfetch(&vf).expect("emit_vfetch"); diff --git a/crates/xenia-gpu/src/ucode/fetch.rs b/crates/xenia-gpu/src/ucode/fetch.rs index 07ed236..1edfed8 100644 --- a/crates/xenia-gpu/src/ucode/fetch.rs +++ b/crates/xenia-gpu/src/ucode/fetch.rs @@ -34,6 +34,19 @@ pub struct VertexFetch { pub format: u8, /// Dword stride between consecutive vertices (dword2[0:7]). pub stride: u8, + /// iterate-3T: dword offset of THIS attribute within the vertex stride + /// (dword2[16:38] in canary's `VertexFetchInstruction`; the low 23 bits). + /// A 6-dword vertex with position@0 + UV@2 + extra@3 needs this so the + /// three vfetches sharing one fetch-constant read different attributes + /// instead of all reading offset 0. + pub offset: u32, + /// iterate-3T: `is_signed` (dword2 bit 24 in canary) — selects signed vs + /// unsigned interpretation of packed integer formats. + pub is_signed: bool, + /// iterate-3T: `is_normalized` — canary inverts it: dword2 bit 25 set means + /// the value is taken as an *integer* (un-normalized); clear means + /// normalized to [0,1] / [-1,1]. We store the normalized sense directly. + pub is_normalized: bool, pub raw: [u32; 3], } @@ -81,9 +94,15 @@ pub fn decode_fetch(words: [u32; 3]) -> FetchInstruction { src_register: ((w0 >> 5) & 0x3F) as u8, dest_register: ((w0 >> 12) & 0x3F) as u8, dest_write_mask: (w1 & 0xF) as u8, - // dword1[16:21] = VertexFormat; dword2[0:7] = dword stride. + // dword1[16:21] = VertexFormat. dword2: stride[0:7], + // offset (in dwords) [8:?] — empirically the attribute offset of + // the textured logo VS lands in dword2[8:15] (pos@4, UV@3, + // 3-float@0 in a 6-dword vertex). signed/normalized live higher. format: ((w1 >> 16) & 0x3F) as u8, stride: (w2 & 0xFF) as u8, + offset: (w2 >> 8) & 0xFF, + is_signed: ((w1 >> 24) & 1) != 0, + is_normalized: ((w1 >> 25) & 1) == 0, raw: words, }), op::TEXTURE_FETCH => FetchInstruction::Texture(TextureFetch { diff --git a/crates/xenia-ui/src/render.rs b/crates/xenia-ui/src/render.rs index 5aafb0c..2942a8f 100644 --- a/crates/xenia-ui/src/render.rs +++ b/crates/xenia-ui/src/render.rs @@ -748,6 +748,39 @@ impl RenderState { label: Some("xenos capture replay"), }); for cap in captures { + // iterate-3T: bind this draw's REAL decoded texture (keyed off the + // active PS's tfetch slot, attached in `gpu_system`) so the textured + // logo samples the artwork. `None` reverts to the magenta stub for + // flat draws. Each `set_texture_view` rebuilds the tex bind group; + // the subsequent `render_one*` reads it, so per-draw binding works + // even though all draws share one encoder. + { + let Self { + device, + queue, + xenos_pipeline, + host_texture_cache, + .. + } = self; + match cap.textures.first() { + Some((key, bytes)) => { + // Stable version: identical (key,bytes) across draws + // reuse the uploaded wgpu texture (the splash artwork is + // static). A genuine content change arrives as a new key + // (base_address/dims) from the decoder. + let cached = xenia_gpu::texture_cache::CachedTexture { + key: *key, + version_when_uploaded: 1, + bytes: bytes.clone(), + }; + host_texture_cache.upload(device, queue, &cached); + if let Some(view) = host_texture_cache.view_for(key) { + xenos_pipeline.set_texture_view(device, Some(view)); + } + } + None => xenos_pipeline.set_texture_view(device, None), + } + } let raw_vs = shader_blobs.get(&cap.vs_key).cloned().unwrap_or_default(); let raw_ps = shader_blobs.get(&cap.ps_key).cloned().unwrap_or_default(); let parsed_vs = xenia_gpu::ucode::parse_shader(&raw_vs);