//! Host pipeline that consumes PM4 `DRAW_INDX*` captures. //! //! Drives [`xenia_gpu::shaders::XENOS_INTERP_WGSL`]. This file owns the //! wgpu bind-group + pipeline + buffer surface the Xenos WGSL interpreter //! binds to. The WGSL module is expected to declare: //! //! ```text //! @group(0) @binding(0) var xenos_draw : XenosDrawConstants; // 16 B //! @group(0) @binding(1) var xenos_consts : XenosConstants; // ~9.2 KB //! @group(0) @binding(2) var vs_ucode : array; //! @group(0) @binding(3) var ps_ucode : array; //! @group(0) @binding(4) var vertex_buffer : array; //! ``` //! //! Texture bindings (M6) are a single-slot stub for P3b: //! //! ```text //! @group(1) @binding(0) var xenos_tex : texture_2d; //! @group(1) @binding(1) var xenos_samp : sampler; //! ``` //! //! The bound texture is a 1×1 magenta placeholder. Real per-slot guest //! texture uploads + format decode land with the texture cache (P5). use bytemuck::{Pod, Zeroable}; use wgpu::util::DeviceExt; use xenia_gpu::shaders::XENOS_INTERP_WGSL; use xenia_gpu::xenos_constants::XenosConstantsBlock; /// Per-draw constants mirroring the WGSL `XenosDrawConstants` uniform /// block. Ordering / padding matches `xenos_interp.wgsl` exactly. #[repr(C)] #[derive(Clone, Copy, Pod, Zeroable)] struct DrawConstants { draw_index: u32, vertex_count: u32, prim_kind: u32, /// iterate-3O: guest dword base of the uploaded `vertex_buffer` window. /// The WGSL subtracts this from the absolute vertex-fetch address. vertex_base_dwords: u32, /// iterate-3S: guest→host NDC XY transform (mirrors canary /// `GetHostViewportInfo`). `clip.xy = pos.xy * ndc_scale + ndc_offset*pos.w`. /// Y is pre-flipped for wgpu. 16 bytes so the block stays 16-byte aligned. ndc_scale: [f32; 2], ndc_offset: [f32; 2], } /// iterate-3Y: the per-draw host color/blend/write-mask render state, decoded /// from the guest registers (`RB_BLENDCONTROL0` / `RB_COLOR_MASK`). Used both /// as part of the pipeline-cache key and to build the `wgpu::ColorTargetState`. /// Mirrors canary's `GetColorBlendStateForRenderTarget` (D3D12 /// `pipeline_cache.cc`): the factors come straight from `RB_BLENDCONTROL`, /// and a zero write-mask forces the no-blend `One,Zero` equation. #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] pub struct RenderState { /// `RB_BLENDCONTROL0` raw value (RT0). `0x00010001` (One,Zero / One,Zero, /// Add) is the opaque case. pub blend_control: u32, /// RT0 nibble of `RB_COLOR_MASK` (bit0=R … bit3=A). 0 = write nothing. pub color_mask: u8, } impl RenderState { /// Fully-opaque, all-channels state (the legacy fixed behaviour). Used for /// procedural/synthetic draws that have no captured guest state. pub const OPAQUE: RenderState = RenderState { blend_control: 0x0001_0001, color_mask: 0xF, }; /// Map a Xenos `BlendFactor` (5-bit field) to a wgpu `BlendFactor`, /// mirroring canary `kBlendFactorMap` (D3D12 `pipeline_cache.cc:1504`). fn map_factor(f: u32) -> wgpu::BlendFactor { match f { 0 => wgpu::BlendFactor::Zero, 1 => wgpu::BlendFactor::One, 4 => wgpu::BlendFactor::Src, 5 => wgpu::BlendFactor::OneMinusSrc, 6 => wgpu::BlendFactor::SrcAlpha, 7 => wgpu::BlendFactor::OneMinusSrcAlpha, 8 => wgpu::BlendFactor::Dst, 9 => wgpu::BlendFactor::OneMinusDst, 10 => wgpu::BlendFactor::DstAlpha, 11 => wgpu::BlendFactor::OneMinusDstAlpha, 12 => wgpu::BlendFactor::Constant, 13 => wgpu::BlendFactor::OneMinusConstant, 14 => wgpu::BlendFactor::Constant, 15 => wgpu::BlendFactor::OneMinusConstant, 16 => wgpu::BlendFactor::SrcAlphaSaturated, // 2/3 and >16 are undefined on Xenos; canary maps to Zero. _ => wgpu::BlendFactor::Zero, } } /// Map a Xenos `BlendFactor` for the *alpha* channel, mirroring canary /// `kBlendFactorAlphaMap` (color-mode factors collapse to alpha). fn map_factor_alpha(f: u32) -> wgpu::BlendFactor { match f { 4 => wgpu::BlendFactor::SrcAlpha, 5 => wgpu::BlendFactor::OneMinusSrcAlpha, 8 => wgpu::BlendFactor::DstAlpha, 9 => wgpu::BlendFactor::OneMinusDstAlpha, other => Self::map_factor(other), } } fn map_op(o: u32) -> wgpu::BlendOperation { match o { 0 => wgpu::BlendOperation::Add, 1 => wgpu::BlendOperation::Subtract, 2 => wgpu::BlendOperation::Min, 3 => wgpu::BlendOperation::Max, 4 => wgpu::BlendOperation::ReverseSubtract, _ => wgpu::BlendOperation::Add, } } /// Build the `wgpu::ColorTargetState` for this draw. fn color_target(&self, format: wgpu::TextureFormat) -> wgpu::ColorTargetState { let bc = self.blend_control; let color_src = bc & 0x1F; let color_op = (bc >> 5) & 0x7; let color_dst = (bc >> 8) & 0x1F; let alpha_src = (bc >> 16) & 0x1F; let alpha_op = (bc >> 21) & 0x7; let alpha_dst = (bc >> 24) & 0x1F; // wgpu requires `blend: None` when nothing would be written; also the // `One,Zero,Add` identity is the opaque case (canary's no-blend), which // we express as `blend: None` so it's a plain overwrite. let is_opaque = color_src == 1 && color_dst == 0 && color_op == 0 && alpha_src == 1 && alpha_dst == 0 && alpha_op == 0; let blend = if is_opaque { None } else { Some(wgpu::BlendState { color: wgpu::BlendComponent { src_factor: Self::map_factor(color_src), dst_factor: Self::map_factor(color_dst), operation: Self::map_op(color_op), }, alpha: wgpu::BlendComponent { src_factor: Self::map_factor_alpha(alpha_src), dst_factor: Self::map_factor_alpha(alpha_dst), operation: Self::map_op(alpha_op), }, }) }; let mut write_mask = wgpu::ColorWrites::empty(); if self.color_mask & 0x1 != 0 { write_mask |= wgpu::ColorWrites::RED; } if self.color_mask & 0x2 != 0 { write_mask |= wgpu::ColorWrites::GREEN; } if self.color_mask & 0x4 != 0 { write_mask |= wgpu::ColorWrites::BLUE; } if self.color_mask & 0x8 != 0 { write_mask |= wgpu::ColorWrites::ALPHA; } wgpu::ColorTargetState { format, blend, write_mask, } } } /// Submitted to [`XenosPipeline::render_one`] to render one captured draw. #[derive(Clone, Copy, Debug)] pub struct DrawRequest { /// Monotonic draw counter; shader uses it for per-draw colour rotation. pub draw_index: u32, /// Host-normalised vertex count (after primitive-processor rewrite). pub vertex_count: u32, /// Xenos primitive-type code; shader may branch on it in P3b+. pub prim_kind: u32, /// iterate-3O: guest dword base of the per-draw vertex window uploaded to /// `vertex_buffer` (b4). 0 = no real vertex window (procedural fallback). pub vertex_base_dwords: u32, /// iterate-3S: guest→host NDC XY transform (Y pre-flipped). When all-zero /// the shader leaves the position untransformed (procedural fallback). pub ndc_scale: [f32; 2], pub ndc_offset: [f32; 2], } /// Reasonable upper bound on a single shader blob (dwords). Most Xbox 360 /// shaders are ≪ 4 KB; 64 KB is orders-of-magnitude slack. const UCODE_BUFFER_MAX_DWORDS: u64 = 16 * 1024; // 64 KB each for VS & PS /// 16 MB of vertex data — enough for any realistic Xenos draw. const VERTEX_BUFFER_MAX_BYTES: u64 = 16 * 1024 * 1024; pub struct XenosPipeline { /// Interpreter pipeline with the legacy fixed (alpha-blend) state. Kept as /// the default; per-state variants are built lazily in `interp_cache`. pipeline: wgpu::RenderPipeline, /// iterate-3Y: the interpreter WGSL module, retained so per-render-state /// interpreter pipelines can be compiled on demand. interp_shader: wgpu::ShaderModule, /// iterate-3Y: interpreter pipelines keyed on the per-draw `RenderState` /// (blend + write mask), so flat/alpha/opaque draws composite correctly /// even when their (vs,ps) didn't translate. interp_cache: std::collections::HashMap, draw_ctx_buffer: wgpu::Buffer, constants_buffer: wgpu::Buffer, vs_ucode_buffer: wgpu::Buffer, ps_ucode_buffer: wgpu::Buffer, vertex_buffer: wgpu::Buffer, bind_group: wgpu::BindGroup, /// P5: swapped per-draw when a new cached texture becomes active. tex_bind_group: wgpu::BindGroup, /// Layout + sampler retained so `set_texture_view` can rebuild /// `tex_bind_group` on the fly without re-reading the pipeline. tex_bgl: wgpu::BindGroupLayout, sampler: wgpu::Sampler, /// Fallback 1×1 magenta texture — used when no guest texture has been /// uploaded yet or when a draw references an unsupported format. dummy_view: wgpu::TextureView, /// P7 — retained pipeline layout + compiled-pipeline cache for /// Xenos→WGSL translator output. Keyed on `(vs_blob_key, ps_blob_key)` /// so every (vs, ps) pair gets compiled once and re-used for every /// subsequent draw. Interpreter pipeline remains the fallback. pipeline_layout: wgpu::PipelineLayout, /// iterate-3Y: cached translator pipelines keyed on the shader pair AND the /// per-draw render state, so the same (vs,ps) with different blend/mask /// composites correctly. The translated WGSL module is itself cached per /// (vs,ps) so re-translation only happens once. translated_cache: std::collections::HashMap<(u32, u32, RenderState), wgpu::RenderPipeline>, translated_modules: std::collections::HashMap<(u32, u32), wgpu::ShaderModule>, pub target_format: wgpu::TextureFormat, } impl XenosPipeline { pub fn new( device: &wgpu::Device, queue: &wgpu::Queue, target_format: wgpu::TextureFormat, ) -> Self { let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { label: Some("xenos_interp.wgsl"), source: wgpu::ShaderSource::Wgsl(XENOS_INTERP_WGSL.into()), }); let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { label: Some("xenos bind group layout"), entries: &[ // b0: draw_ctx (16 B uniform) wgpu::BindGroupLayoutEntry { binding: 0, visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Uniform, has_dynamic_offset: false, min_binding_size: std::num::NonZeroU64::new( std::mem::size_of::() as u64, ), }, count: None, }, // b1: XenosConstants read-only storage (~9.2 KB). Not uniform // because the block contains packed `array` fields and // WGSL's uniform address space would require 16-byte stride. wgpu::BindGroupLayoutEntry { binding: 1, visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: false, min_binding_size: std::num::NonZeroU64::new( XenosConstantsBlock::SIZE as u64, ), }, count: None, }, // b2: vs_ucode (read-only storage) wgpu::BindGroupLayoutEntry { binding: 2, visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: false, min_binding_size: None, }, count: None, }, // b3: ps_ucode (read-only storage) wgpu::BindGroupLayoutEntry { binding: 3, visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: false, min_binding_size: None, }, count: None, }, // b4: vertex_buffer (read-only storage) wgpu::BindGroupLayoutEntry { binding: 4, visibility: wgpu::ShaderStages::VERTEX, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: false, min_binding_size: None, }, count: None, }, ], }); let tex_bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { label: Some("xenos tex bind group layout"), entries: &[ wgpu::BindGroupLayoutEntry { binding: 0, visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, ty: wgpu::BindingType::Texture { sample_type: wgpu::TextureSampleType::Float { filterable: true }, view_dimension: wgpu::TextureViewDimension::D2, multisampled: false, }, count: None, }, wgpu::BindGroupLayoutEntry { binding: 1, visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), count: None, }, ], }); let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: Some("xenos pipeline layout"), bind_group_layouts: &[&bgl, &tex_bgl], push_constant_ranges: &[], }); // Buffer allocation. `queue.write_buffer` uses COPY_DST; all // interpreter-facing buffers need it. let initial_draw = DrawConstants { draw_index: 0, vertex_count: 3, prim_kind: 4, vertex_base_dwords: 0, ndc_scale: [0.0, 0.0], ndc_offset: [0.0, 0.0], }; let draw_ctx_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { label: Some("xenos draw ctx"), contents: bytemuck::bytes_of(&initial_draw), usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, }); let constants_buffer = device.create_buffer(&wgpu::BufferDescriptor { label: Some("xenos constants"), size: XenosConstantsBlock::SIZE as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, mapped_at_creation: false, }); let vs_ucode_buffer = device.create_buffer(&wgpu::BufferDescriptor { label: Some("xenos vs ucode"), size: UCODE_BUFFER_MAX_DWORDS * 4, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, mapped_at_creation: false, }); let ps_ucode_buffer = device.create_buffer(&wgpu::BufferDescriptor { label: Some("xenos ps ucode"), size: UCODE_BUFFER_MAX_DWORDS * 4, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, mapped_at_creation: false, }); let vertex_buffer = device.create_buffer(&wgpu::BufferDescriptor { label: Some("xenos vertex buffer"), size: VERTEX_BUFFER_MAX_BYTES, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, mapped_at_creation: false, }); // Dummy 1×1 magenta texture — placeholder until P5's texture cache // lands. Every `interpret_texture_fetch` samples this for now so the // interpreter can exercise textureSample paths without a real cache. let dummy_tex = device.create_texture(&wgpu::TextureDescriptor { label: Some("xenos dummy texture"), size: wgpu::Extent3d { width: 1, height: 1, depth_or_array_layers: 1, }, mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, format: wgpu::TextureFormat::Rgba8Unorm, usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, view_formats: &[], }); // iterate-3Y: transparent black (0,0,0,0). When a textured draw's // real texture can't be resolved (e.g. its sampler slot is shadowed by // a vertex-fetch constant), sampling a *transparent* texel makes the // draw a no-op under its real premultiplied-alpha blend — instead of // fabricating an opaque magenta that overpaints everything (the old // debug stub). This removes a fake rather than adding one: we never // invent visible pixels for an unresolved texture. queue.write_texture( wgpu::ImageCopyTexture { texture: &dummy_tex, mip_level: 0, origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, }, &[0x00u8, 0x00, 0x00, 0x00], wgpu::ImageDataLayout { offset: 0, bytes_per_row: Some(4), rows_per_image: Some(1), }, wgpu::Extent3d { width: 1, height: 1, depth_or_array_layers: 1, }, ); let dummy_view = dummy_tex.create_view(&wgpu::TextureViewDescriptor::default()); let dummy_sampler = device.create_sampler(&wgpu::SamplerDescriptor { label: Some("xenos dummy sampler"), address_mode_u: wgpu::AddressMode::ClampToEdge, address_mode_v: wgpu::AddressMode::ClampToEdge, address_mode_w: wgpu::AddressMode::ClampToEdge, mag_filter: wgpu::FilterMode::Linear, min_filter: wgpu::FilterMode::Linear, mipmap_filter: wgpu::FilterMode::Nearest, ..Default::default() }); let tex_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { label: Some("xenos tex bind group"), layout: &tex_bgl, entries: &[ wgpu::BindGroupEntry { binding: 0, resource: wgpu::BindingResource::TextureView(&dummy_view), }, wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::Sampler(&dummy_sampler), }, ], }); let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { label: Some("xenos bind group"), layout: &bgl, entries: &[ wgpu::BindGroupEntry { binding: 0, resource: draw_ctx_buffer.as_entire_binding(), }, wgpu::BindGroupEntry { binding: 1, resource: constants_buffer.as_entire_binding(), }, wgpu::BindGroupEntry { binding: 2, resource: vs_ucode_buffer.as_entire_binding(), }, wgpu::BindGroupEntry { binding: 3, resource: ps_ucode_buffer.as_entire_binding(), }, wgpu::BindGroupEntry { binding: 4, resource: vertex_buffer.as_entire_binding(), }, ], }); let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { label: Some("xenos_interp pipeline"), layout: Some(&layout), vertex: wgpu::VertexState { module: &shader, entry_point: "vs_main", compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &shader, entry_point: "fs_main", compilation_options: Default::default(), targets: &[Some(wgpu::ColorTargetState { format: target_format, blend: Some(wgpu::BlendState { color: wgpu::BlendComponent { src_factor: wgpu::BlendFactor::SrcAlpha, dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha, operation: wgpu::BlendOperation::Add, }, alpha: wgpu::BlendComponent::OVER, }), write_mask: wgpu::ColorWrites::ALL, })], }), primitive: wgpu::PrimitiveState { topology: wgpu::PrimitiveTopology::TriangleList, strip_index_format: None, front_face: wgpu::FrontFace::Ccw, cull_mode: None, polygon_mode: wgpu::PolygonMode::Fill, unclipped_depth: false, conservative: false, }, depth_stencil: None, multisample: wgpu::MultisampleState::default(), multiview: None, cache: None, }); Self { pipeline, interp_shader: shader, interp_cache: std::collections::HashMap::new(), draw_ctx_buffer, constants_buffer, vs_ucode_buffer, ps_ucode_buffer, vertex_buffer, bind_group, tex_bind_group, tex_bgl, sampler: dummy_sampler, dummy_view, pipeline_layout: layout, translated_cache: std::collections::HashMap::new(), translated_modules: std::collections::HashMap::new(), target_format, } } /// P7 — has the translator already produced a WGSL *module* for this /// (vs, ps) pair? (A per-render-state pipeline may still need building.) pub fn has_translated(&self, vs_blob_key: u32, ps_blob_key: u32) -> bool { self.translated_modules .contains_key(&(vs_blob_key, ps_blob_key)) } /// P7 — compile a translator-produced WGSL module and cache it keyed on /// `(vs_blob_key, ps_blob_key)`. The actual `RenderPipeline` (which also /// depends on the per-draw blend/mask state) is built lazily by /// [`render_one_translated`]. Returns `true` on success. pub fn insert_translated( &mut self, device: &wgpu::Device, vs_blob_key: u32, ps_blob_key: u32, wgsl: &str, ) -> bool { let key = (vs_blob_key, ps_blob_key); if self.translated_modules.contains_key(&key) { return true; } let shader = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { device.create_shader_module(wgpu::ShaderModuleDescriptor { label: Some("xenos translated module"), source: wgpu::ShaderSource::Wgsl(wgsl.to_string().into()), }) })) { Ok(m) => m, Err(_) => { metrics::counter!("gpu.shader.compile_err", "stage" => "module") .increment(1); return false; } }; self.translated_modules.insert(key, shader); metrics::counter!("gpu.shader.compile_ok").increment(1); true } /// iterate-3Y: ensure a translator pipeline exists for `(vs,ps,rstate)`, /// building it from the cached module + the per-draw color/blend target. fn ensure_translated_for_state( &mut self, device: &wgpu::Device, vs_key: u32, ps_key: u32, rstate: RenderState, ) -> bool { let pkey = (vs_key, ps_key, rstate); if self.translated_cache.contains_key(&pkey) { return true; } let Some(module) = self.translated_modules.get(&(vs_key, ps_key)) else { return false; }; let target = rstate.color_target(self.target_format); let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { label: Some("xenos translated pipeline"), layout: Some(&self.pipeline_layout), vertex: wgpu::VertexState { module, entry_point: "vs_main", compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module, entry_point: "fs_main", compilation_options: Default::default(), targets: &[Some(target)], }), primitive: wgpu::PrimitiveState { topology: wgpu::PrimitiveTopology::TriangleList, strip_index_format: None, front_face: wgpu::FrontFace::Ccw, cull_mode: None, polygon_mode: wgpu::PolygonMode::Fill, unclipped_depth: false, conservative: false, }, depth_stencil: None, multisample: wgpu::MultisampleState::default(), multiview: None, cache: None, }); self.translated_cache.insert(pkey, pipeline); true } /// iterate-3Y: ensure an interpreter pipeline exists for `rstate`. fn ensure_interp_for_state(&mut self, device: &wgpu::Device, rstate: RenderState) { if self.interp_cache.contains_key(&rstate) { return; } let target = rstate.color_target(self.target_format); let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { label: Some("xenos interp pipeline (per-state)"), layout: Some(&self.pipeline_layout), vertex: wgpu::VertexState { module: &self.interp_shader, entry_point: "vs_main", compilation_options: Default::default(), buffers: &[], }, fragment: Some(wgpu::FragmentState { module: &self.interp_shader, entry_point: "fs_main", compilation_options: Default::default(), targets: &[Some(target)], }), primitive: wgpu::PrimitiveState { topology: wgpu::PrimitiveTopology::TriangleList, strip_index_format: None, front_face: wgpu::FrontFace::Ccw, cull_mode: None, polygon_mode: wgpu::PolygonMode::Fill, unclipped_depth: false, conservative: false, }, depth_stencil: None, multisample: wgpu::MultisampleState::default(), multiview: None, cache: None, }); self.interp_cache.insert(rstate, pipeline); } /// iterate-3Y: render one draw through the translator pipeline built for /// this draw's render state. Returns `false` if no module is cached for /// `(vs,ps)` (caller should fall back to the interpreter). pub fn render_one_translated( &mut self, device: &wgpu::Device, queue: &wgpu::Queue, encoder: &mut wgpu::CommandEncoder, target_view: &wgpu::TextureView, req: DrawRequest, vs_key: u32, ps_key: u32, rstate: RenderState, ) -> bool { if !self.ensure_translated_for_state(device, vs_key, ps_key, rstate) { return false; } let cb = DrawConstants { draw_index: req.draw_index, vertex_count: req.vertex_count.max(3), prim_kind: req.prim_kind, vertex_base_dwords: req.vertex_base_dwords, ndc_scale: req.ndc_scale, ndc_offset: req.ndc_offset, }; queue.write_buffer(&self.draw_ctx_buffer, 0, bytemuck::bytes_of(&cb)); let pipeline = self .translated_cache .get(&(vs_key, ps_key, rstate)) .expect("just ensured"); let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { label: Some("xenos translated draw"), color_attachments: &[Some(wgpu::RenderPassColorAttachment { view: target_view, resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Load, store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, timestamp_writes: None, occlusion_query_set: None, }); pass.set_pipeline(pipeline); pass.set_bind_group(0, &self.bind_group, &[]); pass.set_bind_group(1, &self.tex_bind_group, &[]); let rounded = req.vertex_count.div_ceil(3) * 3; pass.draw(0..rounded.max(3), 0..1); true } /// Number of distinct translator pipelines cached. Surfaced to the HUD. pub fn translated_pipeline_count(&self) -> usize { self.translated_cache.len() } /// P5 — swap the active texture bound at `@group(1) @binding(0)`. /// `view` is typically a wgpu texture view obtained from the /// [`TextureCacheHost`]. Pass `None` to revert to the built-in dummy /// magenta stub. pub fn set_texture_view(&mut self, device: &wgpu::Device, view: Option<&wgpu::TextureView>) { let bound = view.unwrap_or(&self.dummy_view); self.tex_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { label: Some("xenos tex bind group (rebind)"), layout: &self.tex_bgl, entries: &[ wgpu::BindGroupEntry { binding: 0, resource: wgpu::BindingResource::TextureView(bound), }, wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::Sampler(&self.sampler), }, ], }); } /// Clear `target_view` to `color`, store. pub fn clear( &self, encoder: &mut wgpu::CommandEncoder, target_view: &wgpu::TextureView, color: [f64; 4], ) { let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { label: Some("xenos frontbuffer clear"), color_attachments: &[Some(wgpu::RenderPassColorAttachment { view: target_view, resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color { r: color[0], g: color[1], b: color[2], a: color[3], }), store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, timestamp_writes: None, occlusion_query_set: None, }); let _ = &mut pass; } /// Upload shader microcode + constants once (before the batch of draws /// that share them). Skips zero-length blobs. pub fn upload_shader_and_constants( &self, queue: &wgpu::Queue, vs_ucode: &[u32], ps_ucode: &[u32], constants: &XenosConstantsBlock, ) { queue.write_buffer(&self.constants_buffer, 0, bytemuck::bytes_of(constants)); if !vs_ucode.is_empty() { let bytes: &[u8] = bytemuck::cast_slice(vs_ucode); let max = (UCODE_BUFFER_MAX_DWORDS * 4) as usize; queue.write_buffer(&self.vs_ucode_buffer, 0, &bytes[..bytes.len().min(max)]); } if !ps_ucode.is_empty() { let bytes: &[u8] = bytemuck::cast_slice(ps_ucode); let max = (UCODE_BUFFER_MAX_DWORDS * 4) as usize; queue.write_buffer(&self.ps_ucode_buffer, 0, &bytes[..bytes.len().min(max)]); } } /// Upload vertex data (as raw big-endian dwords — the WGSL side will /// bswap as needed during format unpacking). pub fn upload_vertex_data(&self, queue: &wgpu::Queue, data: &[u32]) { if data.is_empty() { return; } let bytes: &[u8] = bytemuck::cast_slice(data); let max = VERTEX_BUFFER_MAX_BYTES as usize; queue.write_buffer(&self.vertex_buffer, 0, &bytes[..bytes.len().min(max)]); } /// Render one captured draw through the interpreter, using the per-draw /// `rstate` (blend/write-mask) so flat draws composite correctly even /// when their (vs,ps) didn't translate. `RenderState::OPAQUE` reproduces /// the legacy fixed behaviour for procedural/synthetic draws. pub fn render_one( &mut self, device: &wgpu::Device, queue: &wgpu::Queue, encoder: &mut wgpu::CommandEncoder, target_view: &wgpu::TextureView, req: DrawRequest, rstate: RenderState, ) { self.ensure_interp_for_state(device, rstate); let cb = DrawConstants { draw_index: req.draw_index, vertex_count: req.vertex_count.max(3), prim_kind: req.prim_kind, vertex_base_dwords: req.vertex_base_dwords, ndc_scale: req.ndc_scale, ndc_offset: req.ndc_offset, }; queue.write_buffer(&self.draw_ctx_buffer, 0, bytemuck::bytes_of(&cb)); let pipeline = self .interp_cache .get(&rstate) .expect("just ensured"); let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { label: Some("xenos draw"), color_attachments: &[Some(wgpu::RenderPassColorAttachment { view: target_view, resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Load, store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, timestamp_writes: None, occlusion_query_set: None, }); pass.set_pipeline(pipeline); pass.set_bind_group(0, &self.bind_group, &[]); pass.set_bind_group(1, &self.tex_bind_group, &[]); let rounded = req.vertex_count.div_ceil(3) * 3; pass.draw(0..rounded.max(3), 0..1); } } #[cfg(test)] mod tests { use super::*; #[test] fn draw_constants_layout_matches_wgsl_uniform() { assert_eq!(std::mem::size_of::(), 32); } }