Files
xenia-rs/crates/xenia-ui/src/xenos_pipeline.rs
MechaCat02 e2b8860e10 Add xenia-ui crate; switch analysis store to DuckDB
Workspace gains a new xenia-ui member that owns the winit/wgpu
window, the Xenos display pipeline (xenos_pipeline + render +
texture_cache_host), HUD font/blit shaders, and the input-bridge
plumbing the app uses to surface guest framebuffers and overlays.

Workspace dependencies grow accordingly: rusqlite is replaced with
duckdb (analysis pipeline now writes DuckDB stores), and tracing /
metrics / pprof / winit / wgpu / gilrs / pollster / crossbeam /
bytemuck are added at workspace level so xenia-ui and xenia-app
share versions. Cargo.lock regenerated.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 16:26:48 +02:00

644 lines
25 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Host pipeline that consumes PM4 `DRAW_INDX*` captures.
//!
//! Drives [`xenia_gpu::shaders::XENOS_INTERP_WGSL`]. This file owns the
//! wgpu bind-group + pipeline + buffer surface the Xenos WGSL interpreter
//! binds to. The WGSL module is expected to declare:
//!
//! ```text
//! @group(0) @binding(0) var<uniform> xenos_draw : XenosDrawConstants; // 16 B
//! @group(0) @binding(1) var<uniform> xenos_consts : XenosConstants; // ~9.2 KB
//! @group(0) @binding(2) var<storage, read> vs_ucode : array<u32>;
//! @group(0) @binding(3) var<storage, read> ps_ucode : array<u32>;
//! @group(0) @binding(4) var<storage, read> vertex_buffer : array<u32>;
//! ```
//!
//! Texture bindings (M6) are a single-slot stub for P3b:
//!
//! ```text
//! @group(1) @binding(0) var xenos_tex : texture_2d<f32>;
//! @group(1) @binding(1) var xenos_samp : sampler;
//! ```
//!
//! The bound texture is a 1×1 magenta placeholder. Real per-slot guest
//! texture uploads + format decode land with the texture cache (P5).
use bytemuck::{Pod, Zeroable};
use wgpu::util::DeviceExt;
use xenia_gpu::shaders::XENOS_INTERP_WGSL;
use xenia_gpu::xenos_constants::XenosConstantsBlock;
/// Per-draw constants mirroring the WGSL `XenosDrawConstants` uniform
/// block. Ordering / padding matches `xenos_interp.wgsl` exactly.
#[repr(C)]
#[derive(Clone, Copy, Pod, Zeroable)]
struct DrawConstants {
draw_index: u32,
vertex_count: u32,
prim_kind: u32,
_pad: u32,
}
/// Submitted to [`XenosPipeline::render_one`] to render one captured draw.
#[derive(Clone, Copy, Debug)]
pub struct DrawRequest {
/// Monotonic draw counter; shader uses it for per-draw colour rotation.
pub draw_index: u32,
/// Host-normalised vertex count (after primitive-processor rewrite).
pub vertex_count: u32,
/// Xenos primitive-type code; shader may branch on it in P3b+.
pub prim_kind: u32,
}
/// Reasonable upper bound on a single shader blob (dwords). Most Xbox 360
/// shaders are ≪ 4 KB; 64 KB is orders-of-magnitude slack.
const UCODE_BUFFER_MAX_DWORDS: u64 = 16 * 1024; // 64 KB each for VS & PS
/// 16 MB of vertex data — enough for any realistic Xenos draw.
const VERTEX_BUFFER_MAX_BYTES: u64 = 16 * 1024 * 1024;
pub struct XenosPipeline {
pipeline: wgpu::RenderPipeline,
draw_ctx_buffer: wgpu::Buffer,
constants_buffer: wgpu::Buffer,
vs_ucode_buffer: wgpu::Buffer,
ps_ucode_buffer: wgpu::Buffer,
vertex_buffer: wgpu::Buffer,
bind_group: wgpu::BindGroup,
/// P5: swapped per-draw when a new cached texture becomes active.
tex_bind_group: wgpu::BindGroup,
/// Layout + sampler retained so `set_texture_view` can rebuild
/// `tex_bind_group` on the fly without re-reading the pipeline.
tex_bgl: wgpu::BindGroupLayout,
sampler: wgpu::Sampler,
/// Fallback 1×1 magenta texture — used when no guest texture has been
/// uploaded yet or when a draw references an unsupported format.
dummy_view: wgpu::TextureView,
/// P7 — retained pipeline layout + compiled-pipeline cache for
/// Xenos→WGSL translator output. Keyed on `(vs_blob_key, ps_blob_key)`
/// so every (vs, ps) pair gets compiled once and re-used for every
/// subsequent draw. Interpreter pipeline remains the fallback.
pipeline_layout: wgpu::PipelineLayout,
translated_cache: std::collections::HashMap<(u32, u32), wgpu::RenderPipeline>,
pub target_format: wgpu::TextureFormat,
}
impl XenosPipeline {
pub fn new(
device: &wgpu::Device,
queue: &wgpu::Queue,
target_format: wgpu::TextureFormat,
) -> Self {
let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some("xenos_interp.wgsl"),
source: wgpu::ShaderSource::Wgsl(XENOS_INTERP_WGSL.into()),
});
let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("xenos bind group layout"),
entries: &[
// b0: draw_ctx (16 B uniform)
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Uniform,
has_dynamic_offset: false,
min_binding_size: std::num::NonZeroU64::new(
std::mem::size_of::<DrawConstants>() as u64,
),
},
count: None,
},
// b1: XenosConstants read-only storage (~9.2 KB). Not uniform
// because the block contains packed `array<u32>` fields and
// WGSL's uniform address space would require 16-byte stride.
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: std::num::NonZeroU64::new(
XenosConstantsBlock::SIZE as u64,
),
},
count: None,
},
// b2: vs_ucode (read-only storage)
wgpu::BindGroupLayoutEntry {
binding: 2,
visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
// b3: ps_ucode (read-only storage)
wgpu::BindGroupLayoutEntry {
binding: 3,
visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
// b4: vertex_buffer (read-only storage)
wgpu::BindGroupLayoutEntry {
binding: 4,
visibility: wgpu::ShaderStages::VERTEX,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
],
});
let tex_bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("xenos tex bind group layout"),
entries: &[
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Texture {
sample_type: wgpu::TextureSampleType::Float { filterable: true },
view_dimension: wgpu::TextureViewDimension::D2,
multisampled: false,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
count: None,
},
],
});
let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("xenos pipeline layout"),
bind_group_layouts: &[&bgl, &tex_bgl],
push_constant_ranges: &[],
});
// Buffer allocation. `queue.write_buffer` uses COPY_DST; all
// interpreter-facing buffers need it.
let initial_draw = DrawConstants {
draw_index: 0,
vertex_count: 3,
prim_kind: 4,
_pad: 0,
};
let draw_ctx_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("xenos draw ctx"),
contents: bytemuck::bytes_of(&initial_draw),
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
});
let constants_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("xenos constants"),
size: XenosConstantsBlock::SIZE as u64,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let vs_ucode_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("xenos vs ucode"),
size: UCODE_BUFFER_MAX_DWORDS * 4,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let ps_ucode_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("xenos ps ucode"),
size: UCODE_BUFFER_MAX_DWORDS * 4,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let vertex_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("xenos vertex buffer"),
size: VERTEX_BUFFER_MAX_BYTES,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
// Dummy 1×1 magenta texture — placeholder until P5's texture cache
// lands. Every `interpret_texture_fetch` samples this for now so the
// interpreter can exercise textureSample paths without a real cache.
let dummy_tex = device.create_texture(&wgpu::TextureDescriptor {
label: Some("xenos dummy texture"),
size: wgpu::Extent3d {
width: 1,
height: 1,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8Unorm,
usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
view_formats: &[],
});
// Magenta (255, 0, 255, 255) so a missing-texture read visibly stands
// out on-screen when the interpreter does sample it.
queue.write_texture(
wgpu::ImageCopyTexture {
texture: &dummy_tex,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
&[0xFFu8, 0x00, 0xFF, 0xFF],
wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: Some(4),
rows_per_image: Some(1),
},
wgpu::Extent3d {
width: 1,
height: 1,
depth_or_array_layers: 1,
},
);
let dummy_view = dummy_tex.create_view(&wgpu::TextureViewDescriptor::default());
let dummy_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
label: Some("xenos dummy sampler"),
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Linear,
mipmap_filter: wgpu::FilterMode::Nearest,
..Default::default()
});
let tex_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("xenos tex bind group"),
layout: &tex_bgl,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureView(&dummy_view),
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Sampler(&dummy_sampler),
},
],
});
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("xenos bind group"),
layout: &bgl,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: draw_ctx_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 1,
resource: constants_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 2,
resource: vs_ucode_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 3,
resource: ps_ucode_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 4,
resource: vertex_buffer.as_entire_binding(),
},
],
});
let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
label: Some("xenos_interp pipeline"),
layout: Some(&layout),
vertex: wgpu::VertexState {
module: &shader,
entry_point: "vs_main",
compilation_options: Default::default(),
buffers: &[],
},
fragment: Some(wgpu::FragmentState {
module: &shader,
entry_point: "fs_main",
compilation_options: Default::default(),
targets: &[Some(wgpu::ColorTargetState {
format: target_format,
blend: Some(wgpu::BlendState {
color: wgpu::BlendComponent {
src_factor: wgpu::BlendFactor::SrcAlpha,
dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha,
operation: wgpu::BlendOperation::Add,
},
alpha: wgpu::BlendComponent::OVER,
}),
write_mask: wgpu::ColorWrites::ALL,
})],
}),
primitive: wgpu::PrimitiveState {
topology: wgpu::PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: wgpu::FrontFace::Ccw,
cull_mode: None,
polygon_mode: wgpu::PolygonMode::Fill,
unclipped_depth: false,
conservative: false,
},
depth_stencil: None,
multisample: wgpu::MultisampleState::default(),
multiview: None,
cache: None,
});
Self {
pipeline,
draw_ctx_buffer,
constants_buffer,
vs_ucode_buffer,
ps_ucode_buffer,
vertex_buffer,
bind_group,
tex_bind_group,
tex_bgl,
sampler: dummy_sampler,
dummy_view,
pipeline_layout: layout,
translated_cache: std::collections::HashMap::new(),
target_format,
}
}
/// P7 — does the translator cache already have a pipeline for this
/// (vs, ps) pair?
pub fn has_translated(&self, vs_blob_key: u32, ps_blob_key: u32) -> bool {
self.translated_cache
.contains_key(&(vs_blob_key, ps_blob_key))
}
/// P7 — fetch a cached translator pipeline. `None` if not yet built.
pub fn translated_pipeline(
&self,
vs_blob_key: u32,
ps_blob_key: u32,
) -> Option<&wgpu::RenderPipeline> {
self.translated_cache
.get(&(vs_blob_key, ps_blob_key))
}
/// P7 — compile a translator-produced WGSL module into a
/// `wgpu::RenderPipeline` and insert it into the cache keyed on
/// `(vs_blob_key, ps_blob_key)`. Returns `true` on success. Duplicate
/// inserts are no-ops. Emits `gpu.shader.compile_ok` on success.
pub fn insert_translated(
&mut self,
device: &wgpu::Device,
vs_blob_key: u32,
ps_blob_key: u32,
wgsl: &str,
) -> bool {
let key = (vs_blob_key, ps_blob_key);
if self.translated_cache.contains_key(&key) {
return true;
}
let shader = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some("xenos translated module"),
source: wgpu::ShaderSource::Wgsl(wgsl.to_string().into()),
})
})) {
Ok(m) => m,
Err(_) => {
metrics::counter!("gpu.shader.compile_err", "stage" => "module")
.increment(1);
return false;
}
};
let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
label: Some("xenos translated pipeline"),
layout: Some(&self.pipeline_layout),
vertex: wgpu::VertexState {
module: &shader,
entry_point: "vs_main",
compilation_options: Default::default(),
buffers: &[],
},
fragment: Some(wgpu::FragmentState {
module: &shader,
entry_point: "fs_main",
compilation_options: Default::default(),
targets: &[Some(wgpu::ColorTargetState {
format: self.target_format,
blend: Some(wgpu::BlendState {
color: wgpu::BlendComponent {
src_factor: wgpu::BlendFactor::SrcAlpha,
dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha,
operation: wgpu::BlendOperation::Add,
},
alpha: wgpu::BlendComponent::OVER,
}),
write_mask: wgpu::ColorWrites::ALL,
})],
}),
primitive: wgpu::PrimitiveState {
topology: wgpu::PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: wgpu::FrontFace::Ccw,
cull_mode: None,
polygon_mode: wgpu::PolygonMode::Fill,
unclipped_depth: false,
conservative: false,
},
depth_stencil: None,
multisample: wgpu::MultisampleState::default(),
multiview: None,
cache: None,
});
self.translated_cache.insert(key, pipeline);
metrics::counter!("gpu.shader.compile_ok").increment(1);
true
}
/// Render one draw with the translator-produced pipeline instead of
/// the interpreter. Mirrors [`render_one`] except the bound pipeline
/// is swapped for `pipeline`.
pub fn render_one_with_pipeline(
&self,
queue: &wgpu::Queue,
encoder: &mut wgpu::CommandEncoder,
target_view: &wgpu::TextureView,
req: DrawRequest,
pipeline: &wgpu::RenderPipeline,
) {
let cb = DrawConstants {
draw_index: req.draw_index,
vertex_count: req.vertex_count.max(3),
prim_kind: req.prim_kind,
_pad: 0,
};
queue.write_buffer(&self.draw_ctx_buffer, 0, bytemuck::bytes_of(&cb));
let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: Some("xenos translated draw"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: target_view,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Load,
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
});
pass.set_pipeline(pipeline);
pass.set_bind_group(0, &self.bind_group, &[]);
pass.set_bind_group(1, &self.tex_bind_group, &[]);
let rounded = req.vertex_count.div_ceil(3) * 3;
pass.draw(0..rounded.max(3), 0..1);
}
/// Number of distinct translator pipelines cached. Surfaced to the HUD.
pub fn translated_pipeline_count(&self) -> usize {
self.translated_cache.len()
}
/// P5 — swap the active texture bound at `@group(1) @binding(0)`.
/// `view` is typically a wgpu texture view obtained from the
/// [`TextureCacheHost`]. Pass `None` to revert to the built-in dummy
/// magenta stub.
pub fn set_texture_view(&mut self, device: &wgpu::Device, view: Option<&wgpu::TextureView>) {
let bound = view.unwrap_or(&self.dummy_view);
self.tex_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("xenos tex bind group (rebind)"),
layout: &self.tex_bgl,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureView(bound),
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Sampler(&self.sampler),
},
],
});
}
/// Clear `target_view` to `color`, store.
pub fn clear(
&self,
encoder: &mut wgpu::CommandEncoder,
target_view: &wgpu::TextureView,
color: [f64; 4],
) {
let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: Some("xenos frontbuffer clear"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: target_view,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color {
r: color[0],
g: color[1],
b: color[2],
a: color[3],
}),
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
});
let _ = &mut pass;
}
/// Upload shader microcode + constants once (before the batch of draws
/// that share them). Skips zero-length blobs.
pub fn upload_shader_and_constants(
&self,
queue: &wgpu::Queue,
vs_ucode: &[u32],
ps_ucode: &[u32],
constants: &XenosConstantsBlock,
) {
queue.write_buffer(&self.constants_buffer, 0, bytemuck::bytes_of(constants));
if !vs_ucode.is_empty() {
let bytes: &[u8] = bytemuck::cast_slice(vs_ucode);
let max = (UCODE_BUFFER_MAX_DWORDS * 4) as usize;
queue.write_buffer(&self.vs_ucode_buffer, 0, &bytes[..bytes.len().min(max)]);
}
if !ps_ucode.is_empty() {
let bytes: &[u8] = bytemuck::cast_slice(ps_ucode);
let max = (UCODE_BUFFER_MAX_DWORDS * 4) as usize;
queue.write_buffer(&self.ps_ucode_buffer, 0, &bytes[..bytes.len().min(max)]);
}
}
/// Upload vertex data (as raw big-endian dwords — the WGSL side will
/// bswap as needed during format unpacking).
pub fn upload_vertex_data(&self, queue: &wgpu::Queue, data: &[u32]) {
if data.is_empty() {
return;
}
let bytes: &[u8] = bytemuck::cast_slice(data);
let max = VERTEX_BUFFER_MAX_BYTES as usize;
queue.write_buffer(&self.vertex_buffer, 0, &bytes[..bytes.len().min(max)]);
}
/// Render one captured draw.
pub fn render_one(
&self,
queue: &wgpu::Queue,
encoder: &mut wgpu::CommandEncoder,
target_view: &wgpu::TextureView,
req: DrawRequest,
) {
let cb = DrawConstants {
draw_index: req.draw_index,
vertex_count: req.vertex_count.max(3),
prim_kind: req.prim_kind,
_pad: 0,
};
queue.write_buffer(&self.draw_ctx_buffer, 0, bytemuck::bytes_of(&cb));
let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: Some("xenos draw"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: target_view,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Load,
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
});
pass.set_pipeline(&self.pipeline);
pass.set_bind_group(0, &self.bind_group, &[]);
pass.set_bind_group(1, &self.tex_bind_group, &[]);
let rounded = req.vertex_count.div_ceil(3) * 3;
pass.draw(0..rounded.max(3), 0..1);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn draw_constants_layout_matches_wgsl_uniform() {
assert_eq!(std::mem::size_of::<DrawConstants>(), 16);
}
}