From f6f3aac6735a812de3d948808234cdffdf9338ea Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Thu, 18 Jun 2026 20:58:21 +0200 Subject: [PATCH] [iterate-3Z] Fix logo color (yellow->white): k_8_8_8_8 vfetch + vfetch field/stride/saturate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defect 2 of the three render-fidelity defects vs the canary oracle (the publisher "SQUARE ENIX" logo rendered YELLOW instead of WHITE). Root, measured by readback (env-gated probes, removed): the logo PS multiplies the sampled texture by the interpolated vertex COLOR; the K8888 texture itself decodes correctly (67,667 white texels + 2,087 red — the red dots — zero yellow), so the yellow came from the vertex-color attribute decode. Four coupled, canary-faithful fixes (all UI-translator/capture only — the deterministic headless core is untouched; n50m --gpu-inline --stable-digest golden byte-identical, exit 0): - GPUBUG-112 (translator vfetch): VertexFormat 6 = k_8_8_8_8 (4x u8 normalized, 1 dword), NOT k_16_16 (which is 25) per canary xenos.h:643. The logo color stream is k_8_8_8_8; decoding it as k_16_16 read only 2 of 4 channels and forced BLUE = 0 -> white texture x (R,G,0) = yellow. Now unpacks all four 8-bit channels (canary spirv_shader_translator_fetch.cc k_8_8_8_8 packed_offsets 0/8/16/24); added k_16_16 (format 25) too. - GPUBUG-113 (ucode/fetch): vfetch is_signed / is_normalized / is_mini_fetch bit positions were wrong (read bits 24/25, which sit inside exp_adjust). Per canary ucode.h:757-758,764: signed=fomat_comp_all (w1 bit12), normalized=(num_format_all==0) (w1 bit13), mini_fetch (w1 bit30). - GPUBUG-114 (translator vfetch): a vfetch_mini reuses the address AND STRIDE of the preceding full vfetch of the same stream (canary ucode.h:733); its own stride field is 0. Track the last full stride per fetch-const and inherit it so a mini color/UV attribute indexes by the real vertex stride, not its tight dword count. - GPUBUG-115 (translator PS export): saturate the color export to [0,1] before the UNORM render-target write, mirroring canary spirv_shader_translator.cc:3607 ("Saturate, flushing NaN to 0"). Without it an out-of-range guest color writes garbage to the sRGB target. Verified by env-gated frontbuffer readback (copy_texture_to_buffer, removed before commit): the logo now renders WHITE text + RED dots (bbox centered ~y322-389), zero yellow anywhere. Workspace tests green (added 4: k_8_8_8_8 4-channel unpack, mini-fetch stride inheritance, vfetch bit decode, PS saturate). Determinism: golden byte-identical. Remaining (defects 1 & 3, see memory iterate-3Z): logo orientation and the ed732b5a fullscreen background fill (renders ~white, canary shows black) — both localized but not yet cleanly resolved; plan in the memory file. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/xenia-gpu/src/translator.rs | 142 +++++++++++++++++++++++++--- crates/xenia-gpu/src/ucode/fetch.rs | 50 ++++++++-- 2 files changed, 170 insertions(+), 22 deletions(-) diff --git a/crates/xenia-gpu/src/translator.rs b/crates/xenia-gpu/src/translator.rs index 6a4864e..2aa966b 100644 --- a/crates/xenia-gpu/src/translator.rs +++ b/crates/xenia-gpu/src/translator.rs @@ -168,6 +168,14 @@ struct EmitCtx { stage: Stage, out: String, indent: usize, + /// GPUBUG-114: dword stride of the most recent *full* vfetch, keyed by + /// fetch-const register offset. A vfetch_mini carries stride=0 and reuses + /// the address + stride of the preceding full vfetch of the same stream + /// (canary ucode.h:733). Without this a mini color attribute indexes by its + /// tight dword count instead of the real vertex stride → reads the wrong + /// vertex's data (Sylpheed's background fill `0x36660986` read garbage → + /// white instead of the intended color). + last_full_stride: std::collections::HashMap, } impl EmitCtx { @@ -176,6 +184,7 @@ impl EmitCtx { stage, out: String::with_capacity(2048), indent: 0, + last_full_stride: std::collections::HashMap::new(), } } @@ -330,7 +339,16 @@ impl EmitCtx { } Stage::Pixel => { self.push("var out: FsOut;"); - self.push("out.color0 = ocolor0;"); + // GPUBUG-115: saturate the color export to [0,1], flushing NaN + // to 0 — exactly what canary does before writing a UNORM render + // target (spirv_shader_translator.cc:3607 "Saturate, flushing + // NaN to 0"). The Xenos RB clamps PS output for UNORM targets; + // without this an out-of-range guest color (Sylpheed's + // background fill exports a huge negative float `-32896.5` as a + // fullscreen-clear value) writes garbage/NaN to the sRGB target + // → renders white instead of the clamped black canary shows. + // `clamp(x,0,1)` returns 0 for NaN under WGSL's clamp semantics. + self.push("out.color0 = clamp(ocolor0, vec4(0.0), vec4(1.0));"); self.push("return out;"); } } @@ -504,27 +522,39 @@ impl EmitCtx { enum Pack { Float, // N f32 lanes, N dwords Norm16x2, // 2× u16 normalized into [0,1], 1 dword (k_16_16) + Norm8x4, // 4× u8 normalized into [0,1], 1 dword (k_8_8_8_8) } let (comps, dwords_read, pack): (u32, u32, Pack) = match vf.format { - 36 => (1, 1, Pack::Float), // k_32_FLOAT - 37 => (2, 2, Pack::Float), // k_32_32_FLOAT - 57 => (3, 3, Pack::Float), // k_32_32_32_FLOAT - 38 => (4, 4, Pack::Float), // k_32_32_32_32_FLOAT - 6 => (2, 1, Pack::Norm16x2), // k_16_16 (UV) + 36 => (1, 1, Pack::Float), // k_32_FLOAT + 37 => (2, 2, Pack::Float), // k_32_32_FLOAT + 57 => (3, 3, Pack::Float), // k_32_32_32_FLOAT + 38 => (4, 4, Pack::Float), // k_32_32_32_32_FLOAT + 6 => (4, 1, Pack::Norm8x4), // k_8_8_8_8 (packed RGBA8 — GPUBUG-112) + 25 => (2, 1, Pack::Norm16x2), // k_16_16 _ => return Err(reject::VFETCH_FMT), }; - // A stride of 0 in the instruction means "use the fetch-constant - // stride"; fall back to the tightly packed dword count. - let stride = if vf.stride != 0 { vf.stride as u32 } else { dwords_read }; - // iterate-3T: per-attribute dword offset within the vertex (vfetches - // sharing one fetch constant read different attributes). - let attr_off = vf.offset; // iterate-3X (GPUBUG-110): index the fetch-constant region by the full // `const_index*3 + const_index_sel` mapping (canary `ucode.h:700`), // packed as `const_index*6 + sel*2` dwords. The previous expression // `(vf.raw[0] >> 5) & 0x1F` read the *src_reg* bits, not the const // index — wrong for the endian term and the no-window fallback base. let const_off = vf.const_reg_offset(); + // GPUBUG-114: a full vfetch carries the real vertex dword stride; a + // vfetch_mini reuses the address + stride of the preceding full vfetch + // of the same stream (canary ucode.h:733). Track the last full stride + // per fetch-const and inherit it for mini-fetches (stride field == 0). + let stride = if vf.is_mini_fetch || vf.stride == 0 { + *self + .last_full_stride + .get(&const_off) + .unwrap_or(&dwords_read) + } else { + self.last_full_stride.insert(const_off, vf.stride as u32); + vf.stride as u32 + }; + // iterate-3T: per-attribute dword offset within the vertex (vfetches + // sharing one fetch constant read different attributes). + let attr_off = vf.offset; let src_reg = vf.src_register & 0x7F; let dst_reg = vf.dest_register & 0x7F; // is_signed selects [-1,1] vs [0,1] for normalized integer formats. @@ -563,6 +593,23 @@ impl EmitCtx { "0.0".to_string() } } + Pack::Norm8x4 => { + // One dword holds 4× u8 (canary spirv_shader_translator_fetch + // k_8_8_8_8: comp0@bit0, comp1@bit8, comp2@bit16, comp3@bit24) + // after the endian swap. All four channels present → normalize + // to [0,1]. GPUBUG-112: this is the logo/background vertex + // COLOR (RGBA8), previously misdecoded as k_16_16 (2 chans, + // B forced 0) → white texture × (R,G,0) = yellow. + let sh = i * 8; + if signed { + format!( + "(max(f32(i32(w16 << {l}u) >> 24u) / 127.0, -1.0))", + l = 24 - sh + ) + } else { + format!("(f32((w16 >> {sh}u) & 0xFFu) / 255.0)") + } + } } }; let read_bound = dwords_read - 1; @@ -579,9 +626,9 @@ impl EmitCtx { // real window is present (`vertex_base_dwords != 0`); only the // synthetic/no-window fallback consults the uniform fetch constant. let endian_term = format!("xenos_consts.fetch[{}u] & 0x3u", const_off + 1); - // For packed-16 we read one dword into `w16` (post endian-swap) and the - // `lane()` exprs above unpack the two halfwords. - let w16_decl = if pack == Pack::Norm16x2 { + // For packed formats (k_16_16, k_8_8_8_8) we read one dword into `w16` + // (post endian-swap) and the `lane()` exprs above unpack the channels. + let w16_decl = if pack == Pack::Norm16x2 || pack == Pack::Norm8x4 { "let w16 = gpu_swap(vertex_buffer[addr], endian); " } else { "" @@ -974,6 +1021,7 @@ mod tests { offset: 0, is_signed: false, is_normalized: true, + is_mini_fetch: false, raw: [0; 3], }; ctx.emit_vfetch(&vf).expect("emit_vfetch"); @@ -981,6 +1029,70 @@ mod tests { assert!(body.contains("gpu_swap("), "emitted vfetch body: {body}"); } + fn vf(format: u8, stride: u8, offset: u32, mini: bool) -> crate::ucode::fetch::VertexFetch { + crate::ucode::fetch::VertexFetch { + fetch_const: 0, + const_index_sel: 0, + src_register: 0, + dest_register: 0, + dest_write_mask: 0xF, + format, + stride, + offset, + is_signed: false, + is_normalized: true, + is_mini_fetch: mini, + raw: [0; 3], + } + } + + #[test] + fn vfetch_k8888_unpacks_four_channels() { + // GPUBUG-112: VertexFormat 6 = k_8_8_8_8 (4× u8 normalized, 1 dword), + // NOT k_16_16. All four channels (R,G,B,A) must be unpacked so a + // vertex COLOR keeps its blue channel (white texture × white color = + // white, not yellow). + let mut ctx = EmitCtx::new(Stage::Vertex); + ctx.emit_vfetch(&vf(6, 6, 3, false)).expect("emit"); + let body = ctx.finish(); + // Four /255.0 channel reads from one packed dword `w16`. + assert!(body.contains("let w16 ="), "needs packed dword: {body}"); + assert_eq!(body.matches("/ 255.0").count(), 4, "four 8-bit channels: {body}"); + } + + #[test] + fn vfetch_mini_inherits_full_stride() { + // GPUBUG-114: a vfetch_mini (stride field 0) inherits the stride of the + // preceding full vfetch of the same stream (canary ucode.h:733). Emit a + // full fetch (stride 7) then a mini fetch and assert the mini indexes by + // stride 7, not its tight dword count. + let mut ctx = EmitCtx::new(Stage::Vertex); + ctx.emit_vfetch(&vf(57, 7, 0, false)).expect("full"); // k_32_32_32_FLOAT + ctx.emit_vfetch(&vf(38, 0, 3, true)).expect("mini"); // k_32_32_32_32_FLOAT, mini + let body = ctx.finish(); + assert!(body.contains("vidx * 7u + 3u"), "mini must inherit stride 7: {body}"); + assert!(!body.contains("vidx * 4u"), "mini must not use tight stride 4: {body}"); + } + + #[test] + fn ps_color_export_is_saturated() { + // GPUBUG-115: the PS color export must be clamped to [0,1] (canary + // saturates before UNORM RT write) so an out-of-range guest color + // doesn't write garbage/white to the sRGB target. + let p = crate::ucode::ParsedShader { + cf: vec![ControlFlowInstruction::Exit], + instructions: vec![], + }; + let body = match translate(&p, Stage::Pixel) { + Translation::Ok(b) => b, + Translation::Reject(r) => panic!("PS rejected: {r}"), + }; + assert!( + body.contains("clamp(ocolor0, vec4(0.0), vec4(1.0))"), + "PS must saturate color export: {body}" + ); + } + #[test] fn loop_clause_rejected() { let shader = ParsedShader { diff --git a/crates/xenia-gpu/src/ucode/fetch.rs b/crates/xenia-gpu/src/ucode/fetch.rs index 2e5a11e..a437a38 100644 --- a/crates/xenia-gpu/src/ucode/fetch.rs +++ b/crates/xenia-gpu/src/ucode/fetch.rs @@ -48,13 +48,19 @@ pub struct VertexFetch { /// three vfetches sharing one fetch-constant read different attributes /// instead of all reading offset 0. pub offset: u32, - /// iterate-3T: `is_signed` (dword2 bit 24 in canary) — selects signed vs - /// unsigned interpretation of packed integer formats. + /// `is_signed` = canary `fomat_comp_all`, word1 bit 12 (ucode.h:757) — + /// selects signed vs unsigned interpretation of packed integer formats. + /// (GPUBUG-113: was read from word1 bit 24, which is inside `exp_adjust`.) pub is_signed: bool, - /// iterate-3T: `is_normalized` — canary inverts it: dword2 bit 25 set means - /// the value is taken as an *integer* (un-normalized); clear means - /// normalized to [0,1] / [-1,1]. We store the normalized sense directly. + /// `is_normalized` = canary `num_format_all == 0`, word1 bit 13 + /// (ucode.h:758). Set bit ⇒ integer (un-normalized); clear ⇒ normalized. + /// We store the normalized sense directly. (GPUBUG-113: was word1 bit 25.) pub is_normalized: bool, + /// `is_mini_fetch` = canary word1 bit 30 (ucode.h:764). A mini-fetch reuses + /// the address AND STRIDE of the preceding full vfetch of the same stream; + /// its own `stride` field is 0. Required so a vfetch_mini color attribute + /// indexes by the real vertex stride instead of its tight dword count. + pub is_mini_fetch: bool, pub raw: [u32; 3], } @@ -121,8 +127,13 @@ pub fn decode_fetch(words: [u32; 3]) -> FetchInstruction { format: ((w1 >> 16) & 0x3F) as u8, stride: (w2 & 0xFF) as u8, offset: (w2 >> 8) & 0xFF, - is_signed: ((w1 >> 24) & 1) != 0, - is_normalized: ((w1 >> 25) & 1) == 0, + // GPUBUG-113: canary ucode.h:757-758,764 — signed=fomat_comp_all + // (w1 bit12), normalized=(num_format_all==0) (w1 bit13), + // mini-fetch=(w1 bit30). The previous bit24/25 reads landed inside + // `exp_adjust`, so signedness/normalization were effectively random. + is_signed: ((w1 >> 12) & 1) != 0, + is_normalized: ((w1 >> 13) & 1) == 0, + is_mini_fetch: ((w1 >> 30) & 1) != 0, raw: words, }), op::TEXTURE_FETCH => FetchInstruction::Texture(TextureFetch { @@ -183,6 +194,31 @@ mod tests { } } + #[test] + fn vertex_fetch_signed_normalized_mini_bits() { + // GPUBUG-113: canary ucode.h:757-758,764 — is_signed=fomat_comp_all + // (w1 bit12), is_normalized=(num_format_all==0) (w1 bit13), + // is_mini_fetch=(w1 bit30). Validate each bit independently. + let mk = |w1: u32| match decode_fetch([0, w1, 0]) { + FetchInstruction::Vertex(vf) => vf, + _ => panic!("vertex"), + }; + // No bits: unsigned, normalized, full fetch. + let v = mk(0); + assert!(!v.is_signed); + assert!(v.is_normalized); + assert!(!v.is_mini_fetch); + // bit12 → signed. + assert!(mk(1 << 12).is_signed); + // bit13 (num_format_all=1) → NOT normalized. + assert!(!mk(1 << 13).is_normalized); + // bit30 → mini fetch. + assert!(mk(1 << 30).is_mini_fetch); + // The old (wrong) bits 24/25 must NOT affect signed/normalized. + assert!(!mk(1 << 24).is_signed); + assert!(mk(1 << 25).is_normalized); + } + #[test] fn decode_texture_fetch() { // opcode=1 (texture). const_index@bit20=3, src@bit5=1, dst@bit12=4.