Files
xenia-rs/crates/xenia-gpu/src/primitive.rs
MechaCat02 2f55d1fd7d [iterate-2X] Texture pipeline: un-stub RectangleList + draw-time texture decode
Two faithful, deterministic GPU-backend changes that make the texture path
correct for whatever textured draw the splash eventually dispatches. Both are
currently inert on Sylpheed (the textured logo draw is still gated downstream
— see below), but neither shifts the stable-digest golden, so they land safely.

1. Un-stub RectangleList primitive expansion (primitive.rs). The splash submits
   2819 RectangleList draws at 200M, all of which were REJECTED by the P3 stub
   (`gpu.primitive.rejected{rectangle_list}`) → only ~592 flat point/quad draws
   rasterized. Mirror canary's intent (primitive_processor.cc:389-456
   kRectangleListAsTriangleStrip) within our CPU index-rewrite idiom: emit each
   rect's 3 real vertices as one TriangleList triangle (v0,v1,v2), rejected=false,
   faithful host_vertex_count. The full quad (synthesized 4th corner v3=v0+v2-v1)
   needs real vertex fetch in vs_main — left as a documented TODO. Rejection
   warnings drop 2819→0.

2. Draw-time texture decode keyed off the active PS's real tfetch slots
   (gpu_system.rs + exports.rs vd_swap). Previously vd_swap decoded a hardcoded
   fetch-constant slot 0 at swap time. Now the DRAW handler parses the bound
   pixel shader (ucode::parse_shader), collects its tfetch fetch_const slots via
   new shader_metrics::tfetch_slots, reads each 6-dword fetch constant, and
   decode+caches it into GpuSystem::last_draw_textures. vd_swap publishes the
   first of these (UI binds one texture today), falling back to the legacy slot-0
   probe on flat-only frames. New span_max_version helper walks page_version over
   the trait (draw-time &dyn MemoryAccess lacks the heap's inherent
   max_page_version). Pure function of guest writes — deterministic.

Status: texture_decodes stays 0 on Sylpheed because all 6 live shaders are flat
(no tfetch); canary's textured logo shaders E59B2B3D/F7B1457 are not yet
dispatched by ours (a downstream title-state gate, the next frontier). The full
P5 decode→publish→upload→sample path is already wired; this makes the decode
side key off the real shader instead of a guess.

Validation: stable-digest golden sylpheed_n50m unchanged (draws=718 swaps=147
tex=0), regenerated twice byte-identical; 200M run shows 0 RectangleList
rejections. cargo test --workspace green (677, +2: rectangle_list_expansion,
tfetch_slots_extracts_texture_fetch_constants). No temp hooks. Branch only;
not pushed/merged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-14 21:34:43 +02:00

265 lines
9.5 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Primitive processor — normalize Xenos primitives into host-GPU forms.
//!
//! wgpu only exposes `PrimitiveTopology::{PointList, LineList, LineStrip,
//! TriangleList, TriangleStrip}`. For everything else (fans, quads,
//! rectangles) we rewrite indices on the CPU side so the host just sees a
//! triangle list. Ground truth: `xenia-canary/src/xenia/gpu/primitive_processor.h/cc`.
//!
//! Scope: list, strip, fan, quad, and rectangle expansions are all handled
//! (rectangles via CPU triangle-list rewrite — see `expand_rectangles`).
use crate::draw_state::{IndexSize, PrimitiveType};
/// Host primitive topology — a subset of wgpu's that we commit to.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HostTopology {
PointList,
LineList,
LineStrip,
TriangleList,
TriangleStrip,
}
/// Result of primitive processing.
#[derive(Debug, Clone)]
pub struct ProcessedPrimitive {
pub topology: HostTopology,
/// When the Xenos primitive needed client-side rewriting (fans, quads),
/// this buffer holds the rewritten 16-bit or 32-bit index sequence.
/// `None` means the input index buffer is usable as-is.
pub rewritten_indices: Option<Vec<u32>>,
/// Post-processing vertex count — equals the input count when indices
/// pass through unchanged.
pub host_vertex_count: u32,
/// `true` if we rejected the primitive (unsupported shape) and the
/// caller should skip this draw. Logged via `tracing::warn!`.
pub rejected: bool,
}
/// Normalize a draw.
///
/// `indices` is `None` for `AutoIndex` draws; otherwise it's the decoded
/// index stream (already endian-converted / widened to u32 by the caller).
pub fn process(
primitive: PrimitiveType,
vertex_count: u32,
indices: Option<&[u32]>,
) -> ProcessedPrimitive {
match primitive {
PrimitiveType::PointList => pass_through(HostTopology::PointList, vertex_count),
PrimitiveType::LineList => pass_through(HostTopology::LineList, vertex_count),
PrimitiveType::LineStrip => pass_through(HostTopology::LineStrip, vertex_count),
PrimitiveType::TriangleList => pass_through(HostTopology::TriangleList, vertex_count),
PrimitiveType::TriangleStrip => pass_through(HostTopology::TriangleStrip, vertex_count),
PrimitiveType::TriangleFan => expand_fan(indices, vertex_count),
PrimitiveType::RectangleList => expand_rectangles(indices, vertex_count),
PrimitiveType::QuadList => expand_quads(indices, vertex_count),
PrimitiveType::None | PrimitiveType::Unknown(_) => {
tracing::warn!(?primitive, "gpu: rejecting unsupported primitive");
metrics::counter!("gpu.primitive.rejected").increment(1);
ProcessedPrimitive {
topology: HostTopology::TriangleList,
rewritten_indices: None,
host_vertex_count: 0,
rejected: true,
}
}
}
}
fn pass_through(topology: HostTopology, vertex_count: u32) -> ProcessedPrimitive {
ProcessedPrimitive {
topology,
rewritten_indices: None,
host_vertex_count: vertex_count,
rejected: false,
}
}
/// Convert a triangle fan to a triangle list. Fan indices `[0, 1, 2, 3, 4]`
/// expand to triangles `(0,1,2), (0,2,3), (0,3,4)` — 3 × (n-2) host indices.
fn expand_fan(indices: Option<&[u32]>, vertex_count: u32) -> ProcessedPrimitive {
if vertex_count < 3 {
return ProcessedPrimitive {
topology: HostTopology::TriangleList,
rewritten_indices: Some(Vec::new()),
host_vertex_count: 0,
rejected: false,
};
}
let mut out = Vec::with_capacity(3 * (vertex_count as usize - 2));
let get = |i: u32| -> u32 {
match indices {
Some(buf) => buf[i as usize],
None => i,
}
};
let apex = get(0);
for i in 1..vertex_count.saturating_sub(1) {
out.push(apex);
out.push(get(i));
out.push(get(i + 1));
}
let host_vertex_count = out.len() as u32;
ProcessedPrimitive {
topology: HostTopology::TriangleList,
rewritten_indices: Some(out),
host_vertex_count,
rejected: false,
}
}
/// Convert a quad list (groups of 4) to a triangle list (groups of 6).
fn expand_quads(indices: Option<&[u32]>, vertex_count: u32) -> ProcessedPrimitive {
let quad_count = vertex_count / 4;
let mut out = Vec::with_capacity(6 * quad_count as usize);
let get = |i: u32| -> u32 {
match indices {
Some(buf) => buf[i as usize],
None => i,
}
};
for q in 0..quad_count {
let base = q * 4;
let a = get(base);
let b = get(base + 1);
let c = get(base + 2);
let d = get(base + 3);
out.extend_from_slice(&[a, b, c, a, c, d]);
}
let host_vertex_count = out.len() as u32;
ProcessedPrimitive {
topology: HostTopology::TriangleList,
rewritten_indices: Some(out),
host_vertex_count,
rejected: false,
}
}
/// Rectangle lists: a Xenos-specific primitive where each group of 3
/// vertices defines a rectangle; the 4th corner is extrapolated as
/// `v3 = v0 + v2 - v1` (parallelogram completion). Canary expands this in a
/// host vertex-shader variant (`kRectangleListAsTriangleStrip`,
/// `primitive_processor.cc:389-456`): a 4-vertex triangle strip per rect with
/// the 4th corner synthesized *in the VS* from the host-vertex index.
///
/// Our replay pipeline has no host-VS corner synthesis (and the procedural
/// `vs_main` does not consume `rewritten_indices` yet), so we mirror the
/// `expand_quads`/`expand_fan` CPU idiom and emit the 3 real vertices of each
/// rect as one triangle list `(v0,v1,v2)` — the visible lower half of the
/// rect. This un-rejects the draw and gives a faithful `host_vertex_count`.
///
/// TODO: once `vs_main` does real vertex fetch + interpolation, upgrade to the
/// full quad — 6 indices `[v0,v1,v2, v2,v1,v3]` with a synthesized `v3` corner
/// — mirroring canary's `kRectangleListAsTriangleStrip`.
fn expand_rectangles(indices: Option<&[u32]>, vertex_count: u32) -> ProcessedPrimitive {
let rect_count = vertex_count / 3;
let mut out = Vec::with_capacity(3 * rect_count as usize);
let get = |i: u32| -> u32 {
match indices {
Some(buf) => buf[i as usize],
None => i,
}
};
for r in 0..rect_count {
let base = r * 3;
out.push(get(base));
out.push(get(base + 1));
out.push(get(base + 2));
}
let host_vertex_count = out.len() as u32;
metrics::counter!("gpu.primitive.expanded", "shape" => "rectangle_list").increment(1);
ProcessedPrimitive {
topology: HostTopology::TriangleList,
rewritten_indices: Some(out),
host_vertex_count,
rejected: false,
}
}
/// Widen a u16 index buffer to u32. The primitive processor normalizes to
/// u32 so downstream wgpu pipeline descriptors stay simple.
pub fn widen_indices(raw: &[u8], size: IndexSize, count: u32) -> Vec<u32> {
let mut out = Vec::with_capacity(count as usize);
match size {
IndexSize::Sixteen => {
for i in 0..count as usize {
let off = i * 2;
if off + 2 > raw.len() {
break;
}
// Xenos indices are big-endian on the wire.
let be = u16::from_be_bytes([raw[off], raw[off + 1]]);
out.push(be as u32);
}
}
IndexSize::ThirtyTwo => {
for i in 0..count as usize {
let off = i * 4;
if off + 4 > raw.len() {
break;
}
let be = u32::from_be_bytes([raw[off], raw[off + 1], raw[off + 2], raw[off + 3]]);
out.push(be);
}
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn triangle_list_passes_through() {
let p = process(PrimitiveType::TriangleList, 6, None);
assert_eq!(p.topology, HostTopology::TriangleList);
assert!(p.rewritten_indices.is_none());
assert_eq!(p.host_vertex_count, 6);
assert!(!p.rejected);
}
#[test]
fn fan_to_list_expands_correctly() {
// Fan of 5 vertices → triangles (0,1,2), (0,2,3), (0,3,4)
let p = process(PrimitiveType::TriangleFan, 5, None);
let idx = p.rewritten_indices.unwrap();
assert_eq!(idx, vec![0, 1, 2, 0, 2, 3, 0, 3, 4]);
assert_eq!(p.topology, HostTopology::TriangleList);
assert_eq!(p.host_vertex_count, 9);
}
#[test]
fn quad_list_expansion() {
let p = process(PrimitiveType::QuadList, 8, None);
let idx = p.rewritten_indices.unwrap();
assert_eq!(idx, vec![0, 1, 2, 0, 2, 3, 4, 5, 6, 4, 6, 7]);
}
#[test]
fn rectangle_list_expansion() {
// 2 rects (6 verts) → one triangle (v0,v1,v2) per rect, not rejected.
let p = process(PrimitiveType::RectangleList, 6, None);
let idx = p.rewritten_indices.unwrap();
assert_eq!(idx, vec![0, 1, 2, 3, 4, 5]);
assert_eq!(p.topology, HostTopology::TriangleList);
assert_eq!(p.host_vertex_count, 6);
assert!(!p.rejected);
}
#[test]
fn widen_u16_indices_big_endian() {
// 3 indices [1, 2, 0x1234] in BE u16.
let raw = [0, 1, 0, 2, 0x12, 0x34];
let out = widen_indices(&raw, IndexSize::Sixteen, 3);
assert_eq!(out, vec![1, 2, 0x1234]);
}
#[test]
fn rejects_unknown_primitive() {
let p = process(PrimitiveType::Unknown(0x2A), 3, None);
assert!(p.rejected);
}
}