//! Extract draw state from the Xenos register file at `PM4_DRAW_INDX` time. //! //! This is the "what are we drawing?" snapshot: primitive type, vertex count, //! index buffer (if any), viewport, scissor, blend, depth state, and enough //! handles for a future translator / uber-shader to pull fetch constants + //! shader blobs. Ground truth: `xenia-canary/src/xenia/gpu/draw_util.h` and //! the PM4 handler at `pm4_command_processor_implement.h:1128-1151`. //! //! We only extract what the P3 uber-shader actually consumes; the rest is //! reserved for later phases. use crate::register_file::RegisterFile; /// Primitive type (Xenos `PrimitiveType` enum from `xenos.h`). #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum PrimitiveType { None, PointList, LineList, LineStrip, TriangleList, TriangleFan, TriangleStrip, RectangleList, QuadList, Unknown(u8), } impl PrimitiveType { pub fn from_bits(b: u32) -> Self { match b & 0x3F { 0 => PrimitiveType::None, 1 => PrimitiveType::PointList, 2 => PrimitiveType::LineList, 3 => PrimitiveType::LineStrip, 4 => PrimitiveType::TriangleList, 5 => PrimitiveType::TriangleFan, 6 => PrimitiveType::TriangleStrip, 8 => PrimitiveType::RectangleList, 13 => PrimitiveType::QuadList, other => PrimitiveType::Unknown(other as u8), } } } /// How the draw was issued per `VGT_DRAW_INITIATOR.source_select`: /// 0=DMA, 1=Immediate (in-packet indices), 2=AutoIndex. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum IndexSource { /// Index buffer fetched from `VGT_DMA_BASE` / `VGT_DMA_SIZE`. Dma { base_address: u32, size_dwords: u32, index_size: IndexSize, }, /// Indices follow the `DRAW_INDX_2` packet header inline. Immediate { index_size: IndexSize }, /// No index buffer; generate `0..vertex_count - 1` on the host. AutoIndex, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum IndexSize { /// 16-bit indices. Sixteen, /// 32-bit indices. ThirtyTwo, } /// Snapshot of one draw call's state, sampled from the register file. #[derive(Debug, Clone, Copy)] pub struct DrawState { pub primitive: PrimitiveType, pub vertex_count: u32, pub index_source: IndexSource, pub viewport: Viewport, pub scissor: Scissor, /// RB_COLOR_INFO for each of the 4 possible color render targets; `None` /// where the target is not bound. pub color_info: [Option; 4], pub depth_info: Option, pub rb_modecontrol: u32, pub rb_colorcontrol: u32, pub rb_depthcontrol: u32, /// P4: per-color-target blend state. Index matches `color_info`. pub rb_blendcontrol: [u32; 4], /// P4: stencil state. pub rb_stencilrefmask: u32, pub rb_stencilrefmask_bf: u32, /// P4: pixel offset applied at rasterization. pub pa_sc_window_offset: u32, /// P4: resolve destination registers (`RB_COPY_*`). These are set by /// the guest just before triggering a TILE_FLUSH event and describe /// where an EDRAM→texture copy should land. pub rb_copy_control: u32, pub rb_copy_dest_base: u32, pub rb_copy_dest_pitch: u32, pub rb_copy_dest_info: u32, /// Key of the VS blob that was active at draw time (from /// `GpuSystem::active_vs_key`). `None` = no VS loaded yet; the draw is /// meaningless and will be rejected by the dispatcher. pub vs_blob_key: Option, /// Key of the PS blob that was active at draw time. pub ps_blob_key: Option, } #[derive(Debug, Clone, Copy, Default)] pub struct Viewport { pub scale_x: f32, pub scale_y: f32, pub scale_z: f32, pub offset_x: f32, pub offset_y: f32, pub offset_z: f32, } #[derive(Debug, Clone, Copy, Default)] pub struct Scissor { pub tl_x: u16, pub tl_y: u16, pub br_x: u16, pub br_y: u16, } #[derive(Debug, Clone, Copy)] pub struct ColorTargetInfo { /// EDRAM tile base for this color target (`RB_COLOR_INFO.base_tiles`). pub base_tiles: u16, /// Color format (`RB_COLOR_INFO.color_format`). pub format: u8, } #[derive(Debug, Clone, Copy)] pub struct DepthTargetInfo { /// EDRAM tile base for depth/stencil. pub base_tiles: u16, /// 0=D24S8, 1=D24FS8 (per `xenos.h:404-408`). pub format: u8, } /// Resolve source: either one of four color render targets or the depth RT. /// Packed into `RB_COPY_CONTROL.copy_src_select` (bits [2:0]): 0..=3 pick /// color0..3, 4 picks depth. Canary `registers.h:853`. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ResolveSource { Color(u8), Depth, } /// Resolve rectangle in pixel coordinates at the destination resolution, /// 8-pixel aligned per Canary's `kResolveAlignmentPixels = 8`. MSAA scaling /// is kept separate — `sample_count_log2_x/y` tell the resolve how many /// samples to step per destination pixel. #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] pub struct ResolveCoordinates { pub x0: u32, pub y0: u32, pub width: u32, pub height: u32, /// 1 iff 4x MSAA (samples laid out 2x wider than pixels). pub sample_count_log2_x: u32, /// 1 iff 2x+ MSAA (samples laid out 2x taller than pixels). pub sample_count_log2_y: u32, } /// Decoded resolve state — describes how a `TILE_FLUSH` event should copy /// EDRAM bytes to a guest-memory tiled texture. Canary equivalent: /// `draw_util::ResolveInfo` at `draw_util.h:627`. Bit-field layout in /// `RB_COPY_CONTROL / RB_COPY_DEST_INFO / RB_COPY_DEST_PITCH` comes from /// `registers.h:853-897`. #[derive(Debug, Clone, Copy)] pub struct ResolveInfo { /// Which source RT (0..=3=color, 4=depth). Raw register bits. pub copy_src_select: u8, /// Sample selector for MSAA sources. See `xenos::CopySampleSelect`. pub copy_sample_select: u8, /// Enable clear of the source render target after the copy. pub color_clear_enable: bool, pub depth_clear_enable: bool, /// 0 = raw tile copy (same format), 1 = convert to `copy_dest_format`. /// 2 = constantOne, 3 = null (no copy). pub copy_command: u8, /// Guest-memory destination address, already masked to the 29-bit /// Xenon physical range (`& 0x1FFF_FFFF`). pub dest_base: u32, /// Destination pitch in pixels (0..=16383). Byte pitch = pitch * bpp /// after the caller pitch-aligns to `kStoragePitchHeightAlignmentBlocks /// = 32`. pub dest_pitch_pixels: u32, pub dest_height_pixels: u32, /// Destination format (`xenos::ColorFormat`, 6 bits). pub dest_format: u8, /// Byte-swap mode applied before the write (`xenos::Endian128`, 0..=5). pub dest_endian: u8, /// Signed [-32, 31] exponent bias applied during conversion. pub dest_exp_bias: i8, /// Decoded resolve source (color0..3 or depth). pub source: ResolveSource, /// 8-pixel-aligned resolve rectangle. pub coords: ResolveCoordinates, /// Source format: `ColorRenderTargetFormat` when color, /// `DepthRenderTargetFormat` when depth. pub source_format: u8, /// EDRAM tile origin of the source RT (from `RB_COLOR_INFO.color_base` /// or `RB_DEPTH_INFO.depth_base`, 11-bit mod 2048). pub source_base_tiles: u16, /// `GetSurfacePitchTiles(surface_pitch, msaa, is_64bpp)` — how many /// 80-sample-wide tiles make up one EDRAM row. pub surface_pitch_tiles: u32, /// MSAA mode from `RB_SURFACE_INFO`. pub msaa: crate::render_target_cache::MsaaSamples, /// True iff the source color format is 64bpp (doubles EDRAM pitch/base). pub source_is_64bpp: bool, /// `RB_COLOR_CLEAR` — constant written into EDRAM when /// `color_clear_enable` is set. pub color_clear_value: u32, /// `RB_COLOR_CLEAR_LO` — second 32-bit lane for 64bpp clear. pub color_clear_value_lo: u32, /// `RB_DEPTH_CLEAR` — constant written into EDRAM depth tiles on /// `depth_clear_enable`. pub depth_clear_value: u32, /// `RB_COPY_DEST_INFO.copy_dest_array` — 2D (false) vs 3D/stacked (true). pub copy_dest_array: bool, } /// `GetSurfacePitchTiles(pitch_pixels, msaa, is_64bpp)` — ported from /// `xenos.h:465-476`. Returns the number of 80-sample-wide EDRAM tiles /// that make up one row of a surface with `pitch_pixels`-pixel pitch. /// /// At 4x MSAA samples span twice the pixel width, so the sample pitch /// doubles. 64bpp formats pack two EDRAM tiles per color value, so the /// effective tile pitch doubles again. #[inline] pub fn surface_pitch_tiles( pitch_pixels: u32, msaa: crate::render_target_cache::MsaaSamples, is_64bpp: bool, ) -> u32 { use crate::render_target_cache::MsaaSamples; const EDRAM_TILE_WIDTH_SAMPLES: u32 = 80; let pitch_samples = pitch_pixels << u32::from(msaa == MsaaSamples::X4); let pitch_tiles = pitch_samples.div_ceil(EDRAM_TILE_WIDTH_SAMPLES); pitch_tiles << u32::from(is_64bpp) } /// Canary `ColorRenderTargetFormat` is 64bpp iff its numeric value is one /// of {5, 7, 15} — i.e. `k_16_16_16_16`, `k_16_16_16_16_FLOAT`, or /// `k_32_32_FLOAT`. `xenos.h:297-317` + the enum's `IsColorRenderTarget /// Format64bpp` helper. #[inline] pub fn color_render_target_format_is_64bpp(fmt: u8) -> bool { matches!(fmt, 5 | 7 | 15) } /// `kResolveAlignmentPixels` from Canary (`draw_util.cc:925` area). pub const RESOLVE_ALIGNMENT_PIXELS: u32 = 8; /// Clamp a raw resolve rectangle to the `PA_SC_WINDOW_SCISSOR_*` registers /// and align to the 8-pixel grid. Caller passes `i32` because the VF0 /// derivation can produce negative bounding-box values; this helper clamps /// them to the non-negative window defined by the scissor. /// /// Returns `(x0, y0, width, height)` in pixels, all non-negative, all /// 8-pixel-aligned, `width`/`height` already `>= 0`. Width/height of 0 /// signals "empty resolve; skip". pub fn resolve_rect_apply_scissor_and_align_8( rf: &RegisterFile, x0_in: i32, y0_in: i32, x1_in: i32, y1_in: i32, ) -> (u32, u32, u32, u32) { let tl = rf.read(reg::PA_SC_WINDOW_SCISSOR_TL); let br = rf.read(reg::PA_SC_WINDOW_SCISSOR_BR); let tl_x = (tl & 0x3FFF) as i32; let tl_y = ((tl >> 16) & 0x3FFF) as i32; let br_x = (br & 0x3FFF) as i32; let br_y = ((br >> 16) & 0x3FFF) as i32; // Clamp only when the scissor is a non-degenerate window; otherwise // leave the input rect alone (Canary's `kResolveAlignmentPixels` will // still 8-align it below). let (mut x0, mut y0, mut x1, mut y1) = (x0_in, y0_in, x1_in, y1_in); if br_x > tl_x && br_y > tl_y { let clamp = |v: i32, lo: i32, hi: i32| v.max(lo).min(hi); x0 = clamp(x0, tl_x, br_x); y0 = clamp(y0, tl_y, br_y); x1 = clamp(x1, tl_x, br_x); y1 = clamp(y1, tl_y, br_y); } if x1 < x0 { x1 = x0; } if y1 < y0 { y1 = y0; } // 8-pixel align. Floor top-left; ceil bottom-right. let align_mask = (RESOLVE_ALIGNMENT_PIXELS as i32) - 1; x0 &= !align_mask; y0 &= !align_mask; x1 = (x1 + align_mask) & !align_mask; y1 = (y1 + align_mask) & !align_mask; let x0u = x0.max(0) as u32; let y0u = y0.max(0) as u32; let x1u = x1.max(0) as u32; let y1u = y1.max(0) as u32; ( x0u, y0u, x1u.saturating_sub(x0u), y1u.saturating_sub(y0u), ) } /// Parse vertex fetch constant 0 (Canary `xe_gpu_vertex_fetch_t`, /// `xenos.h:1158-1172`) and derive the resolve bounding-box in pixel units. /// Returns `None` when the fetch isn't the 6-float vertex buffer the /// resolve shader expects (type != kVertex or size != 6). /// /// This mirrors `draw_util.cc:950-1014` minus window-offset and half-pixel /// nudging — the pitfalls there are (a) handling endian via `GpuSwap` and /// (b) Fixed16p8 top-left rounding `(v + 127) >> 8`. Both are replicated. /// /// The returned rect is in *pixel* coordinates, *pre-scissor-clamp* and /// *pre-alignment*. Caller feeds it through /// [`resolve_rect_apply_scissor_and_align_8`]. pub fn vertex_fetch_0_rect( rf: &RegisterFile, mem: &dyn xenia_memory::access::MemoryAccess, ) -> Option<(i32, i32, i32, i32)> { const CONST_BASE_FETCH: u32 = 0x4800; let dword_0 = rf.read(CONST_BASE_FETCH); let dword_1 = rf.read(CONST_BASE_FETCH + 1); // type:2 at bits [1:0]; kVertex = 3 per xenos.h:1147-1152. let fetch_type = dword_0 & 0x3; if fetch_type != 3 { return None; } // size:24 at bits [25:2] of dword_1 — in dwords; expect 6 (3 × vec2). let size = (dword_1 >> 2) & 0x00FF_FFFF; if size != 6 { return None; } // address:30 at bits [31:2] of dword_0 — in dwords. let address_bytes = dword_0 & 0xFFFF_FFFC; // endian:2 at bits [1:0] of dword_1 — xenos::Endian (kNone/k8in16/k8in32/k16in32). let fetch_endian = (dword_1 & 0x3) as u8; // Read 6 floats from guest memory. `mem.read_u32` stores BE bytes as a // u32 value; to mirror Canary's "raw LE bytes → u32 → GpuSwap" we have // to re-interpret the memory as LE (flipping what `read_u32` did). let floats: [f32; 6] = std::array::from_fn(|i| { let be_u32 = mem.read_u32(address_bytes.wrapping_add(i as u32 * 4)); // `be_u32` was composed from bytes `[b0,b1,b2,b3]` as // `(b0<<24)|...|b3`. Canary reads those same bytes in host-LE, // producing `(b3<<24)|...|b0`. That's `be_u32.swap_bytes()`. let canary_le = be_u32.swap_bytes(); let swapped = gpu_swap_u32(canary_le, fetch_endian); f32::from_bits(swapped) }); // PA_SU_VTX_CNTL::pix_center: bit 0, 0 = kD3DZero (+0.5 half-pixel), // 1 = kOpenGL (no offset). Pre-fix the constant at this site read // 0x2083 (PA_SC_CLIPRECT_RULE), giving non-deterministic half-pixel // offsets that broke 3D camera matrices. Canary `register_table.inc` // line 1336 says PA_SU_VTX_CNTL is 0x2302. GPUBUG-105. const PA_SU_VTX_CNTL: u32 = 0x2302; let half_pixel_offset = if rf.read(PA_SU_VTX_CNTL) & 1 == 0 { 0.5f32 } else { 0.0f32 }; // Convert each to Fixed16p8 (multiply by 256, round). let fixed: [i32; 6] = std::array::from_fn(|i| { ((floats[i] + half_pixel_offset) * 256.0).round() as i32 }); let x0 = fixed[0].min(fixed[2]).min(fixed[4]); let y0 = fixed[1].min(fixed[3]).min(fixed[5]); let x1 = fixed[0].max(fixed[2]).max(fixed[4]); let y1 = fixed[1].max(fixed[3]).max(fixed[5]); // Top-left rounding: `(v + 127) >> 8` for both corners. let round = |v: i32| (v + 127) >> 8; Some((round(x0), round(y0), round(x1), round(y1))) } /// Canary `GpuSwapInline` on a u32. Exposed here so the vertex-fetch path /// can apply the same byte-order transform Canary's `GpuSwap` applies /// to vertex data. `xenos.h:1077-1114`. #[inline] fn gpu_swap_u32(value: u32, endian: u8) -> u32 { match endian & 0x3 { // kNone. 0 => value, // k8in16: swap bytes within each 16-bit word. 1 => ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8), // k8in32: full byte reversal. 2 => value.swap_bytes(), // k16in32: swap 16-bit halves. _ => value.rotate_left(16), } } impl ResolveInfo { /// Legacy entrypoint used when the caller already has a `DrawState`. It /// fills only the narrow register bits that live in `DrawState` — the /// wider coordinate / EDRAM fields require the full register file. /// /// Kept for tests that construct resolve decoders from captured draw /// states. `from_register_file` is the canonical path. pub fn from_draw_state(ds: &DrawState) -> Self { use crate::render_target_cache::MsaaSamples; let c = ds.rb_copy_control; let p = ds.rb_copy_dest_pitch; let i = ds.rb_copy_dest_info; // Sign-extend the 6-bit exp_bias from `copy_dest_info[21:16]`. let exp_raw = (i >> 16) & 0x3F; let exp_sign = ((exp_raw & 0x20) != 0) as i8; let exp_bias = (exp_raw as i8) - (exp_sign * 64); let src_sel = (c & 0x7) as u8; let source = if src_sel >= 4 { ResolveSource::Depth } else { ResolveSource::Color(src_sel) }; Self { copy_src_select: src_sel, copy_sample_select: ((c >> 4) & 0x7) as u8, color_clear_enable: ((c >> 8) & 1) != 0, depth_clear_enable: ((c >> 9) & 1) != 0, copy_command: ((c >> 20) & 0x3) as u8, dest_base: ds.rb_copy_dest_base & 0x1FFF_FFFF, dest_pitch_pixels: p & 0x3FFF, dest_height_pixels: (p >> 16) & 0x3FFF, dest_format: ((i >> 7) & 0x3F) as u8, dest_endian: (i & 0x7) as u8, dest_exp_bias: exp_bias, source, coords: ResolveCoordinates::default(), source_format: 0, source_base_tiles: 0, surface_pitch_tiles: 0, msaa: MsaaSamples::X1, source_is_64bpp: false, color_clear_value: 0, color_clear_value_lo: 0, depth_clear_value: 0, copy_dest_array: ((i >> 3) & 1) != 0, } } /// Canonical resolve decoder — reads live register values and derives the /// full rectangle / EDRAM layout. Mirrors canary `draw_util.cc:926-1318` /// `GetResolveInfo` with the following simplifications (all scoped in /// the landing plan and will be expanded as needs arise): /// /// * The rectangle is derived from the scissor window and /// `RB_COPY_DEST_PITCH` rather than fetched from vertex fetch 0. /// Sylpheed's splash uses a clear-resolve — there's no draw ahead /// of it — so vertex-fetch-derived geometry is not available. /// * `copy_sample_select` is kept as-is; sample averaging for 2x/4x /// MSAA is not yet applied on the read side. /// * `PA_SC_WINDOW_OFFSET` is not applied — not needed for Sylpheed /// and canary only applies it when `PA_SU_SC_MODE_CNTL.vtx_window /// _offset_enable` is set, which requires a live draw. pub fn from_register_file(rf: &RegisterFile) -> Self { use crate::render_target_cache::MsaaSamples; let c = rf.read(reg::RB_COPY_CONTROL); let i = rf.read(reg::RB_COPY_DEST_INFO); let p = rf.read(reg::RB_COPY_DEST_PITCH); let dest_base_raw = rf.read(reg::RB_COPY_DEST_BASE); // Sign-extend 6-bit exp_bias from copy_dest_info[21:16]. let exp_raw = (i >> 16) & 0x3F; let exp_sign = ((exp_raw & 0x20) != 0) as i8; let exp_bias = (exp_raw as i8) - (exp_sign * 64); let src_sel = (c & 0x7) as u8; let source = if src_sel >= 4 { ResolveSource::Depth } else { ResolveSource::Color(src_sel & 0x3) }; let rb_surface_info = rf.read(reg::RB_SURFACE_INFO); let surface_pitch_pixels = rb_surface_info & 0x3FFF; let msaa = MsaaSamples::from_raw((rb_surface_info >> 16) & 0x3); // Source format + base tiles depend on which RT we're reading. let (source_format, source_base_tiles, source_is_64bpp) = match source { ResolveSource::Color(idx) => { let rb = match idx { 0 => rf.read(reg::RB_COLOR_INFO_0), 1 => rf.read(reg::RB_COLOR_INFO_1), 2 => rf.read(reg::RB_COLOR_INFO_2), _ => rf.read(reg::RB_COLOR_INFO_3), }; let fmt = ((rb >> 16) & 0xF) as u8; let base = (rb & 0xFFF) as u16; (fmt, base, color_render_target_format_is_64bpp(fmt)) } ResolveSource::Depth => { let rb = rf.read(reg::RB_DEPTH_INFO); let fmt = ((rb >> 16) & 0x1) as u8; let base = (rb & 0xFFF) as u16; (fmt, base, false) } }; let pitch_tiles = surface_pitch_tiles(surface_pitch_pixels, msaa, source_is_64bpp); // --- Rectangle derivation --- // Default extent is (0, 0, dest_pitch, dest_height); subject to // scissor clamp + 8-pixel alignment. let dest_pitch = p & 0x3FFF; let dest_height = (p >> 16) & 0x3FFF; let coords_no_msaa = resolve_rect_apply_scissor_and_align_8( rf, 0, 0, dest_pitch as i32, dest_height as i32, ); let coords = ResolveCoordinates { x0: coords_no_msaa.0, y0: coords_no_msaa.1, width: coords_no_msaa.2, height: coords_no_msaa.3, sample_count_log2_x: u32::from(msaa == MsaaSamples::X4), sample_count_log2_y: u32::from(msaa != MsaaSamples::X1), }; Self { copy_src_select: src_sel, copy_sample_select: ((c >> 4) & 0x7) as u8, color_clear_enable: ((c >> 8) & 1) != 0, depth_clear_enable: ((c >> 9) & 1) != 0, copy_command: ((c >> 20) & 0x3) as u8, dest_base: dest_base_raw & 0x1FFF_FFFF, dest_pitch_pixels: dest_pitch, dest_height_pixels: dest_height, dest_format: ((i >> 7) & 0x3F) as u8, dest_endian: (i & 0x7) as u8, dest_exp_bias: exp_bias, source, coords, source_format, source_base_tiles, surface_pitch_tiles: pitch_tiles, msaa, source_is_64bpp, color_clear_value: rf.read(reg::RB_COLOR_CLEAR), color_clear_value_lo: rf.read(reg::RB_COLOR_CLEAR_LO), depth_clear_value: rf.read(reg::RB_DEPTH_CLEAR), copy_dest_array: ((i >> 3) & 1) != 0, } } /// Memory-aware variant: if vertex fetch 0 contains the D3D9-hack /// "resolve rectangle" vertices (3 vec2 floats, Canary `draw_util.cc /// :950-1014`), use its bounding box as the resolve extent. Falls back /// to the scissor + `RB_COPY_DEST_PITCH/HEIGHT` rect when VF0 isn't a /// 6-dword vertex buffer. /// /// Used from the live TILE_FLUSH path; tests can stick with /// `from_register_file` when they don't want to program VF0. pub fn from_register_file_and_memory( rf: &RegisterFile, mem: &dyn xenia_memory::access::MemoryAccess, ) -> Self { let mut info = Self::from_register_file(rf); if let Some((x0, y0, x1, y1)) = vertex_fetch_0_rect(rf, mem) { let (rx0, ry0, rw, rh) = resolve_rect_apply_scissor_and_align_8(rf, x0, y0, x1, y1); // Only override when the VF0 rect is non-empty — an empty VF0 // means the game hasn't set one up yet and we should keep the // scissor+dest default. if rw > 0 && rh > 0 { info.coords.x0 = rx0; info.coords.y0 = ry0; info.coords.width = rw; info.coords.height = rh; } } info } } /// Register indices from `xenia-canary/src/xenia/gpu/register_table.inc`. /// Only what the extractor reads is named here. /// /// GPUBUG-103/104/105: 8 of these were previously off-by-many — pointing at /// completely different registers. Each has been re-validated against /// canary's `register_table.inc`. pub mod reg { // VGT (vertex/geometry transform) — GPUBUG-103. pub const VGT_DRAW_INITIATOR: u32 = 0x21FC; // was 0x2281 (junk). pub const VGT_DMA_BASE: u32 = 0x21FA; // was 0x2282 (junk). pub const VGT_DMA_SIZE: u32 = 0x21FB; // was 0x2283 (junk). pub const PA_CL_VPORT_XSCALE: u32 = 0x210F; pub const PA_CL_VPORT_XOFFSET: u32 = 0x2110; pub const PA_CL_VPORT_YSCALE: u32 = 0x2111; pub const PA_CL_VPORT_YOFFSET: u32 = 0x2112; pub const PA_CL_VPORT_ZSCALE: u32 = 0x2113; pub const PA_CL_VPORT_ZOFFSET: u32 = 0x2114; // GPUBUG-104: pre-fix these read PA_SC_SCREEN_SCISSOR_{TL,BR} (the // global screen scissor) instead of the per-window scissor. pub const PA_SC_WINDOW_SCISSOR_TL: u32 = 0x2081; // was 0x200E (= SCREEN_SCISSOR_TL). pub const PA_SC_WINDOW_SCISSOR_BR: u32 = 0x2082; // was 0x200F (= SCREEN_SCISSOR_BR). pub const RB_MODECONTROL: u32 = 0x2208; pub const RB_SURFACE_INFO: u32 = 0x2000; pub const RB_COLOR_INFO_0: u32 = 0x2001; // GPUBUG-105: per-RT color info; pre-fix indexed COHER_DEST_BASE_* // instead. Canary names them RB_COLOR1/2/3_INFO. pub const RB_COLOR_INFO_1: u32 = 0x2003; // was 0x2010 (= COHER_DEST_BASE_10). pub const RB_COLOR_INFO_2: u32 = 0x2004; // was 0x2011. pub const RB_COLOR_INFO_3: u32 = 0x2005; // was 0x2012. pub const RB_DEPTH_INFO: u32 = 0x2002; pub const RB_COLORCONTROL: u32 = 0x2202; pub const RB_DEPTHCONTROL: u32 = 0x2200; // P4 additions — per-RT blend + stencil + window offset + resolve dst. pub const RB_BLENDCONTROL_0: u32 = 0x2201; pub const RB_BLENDCONTROL_1: u32 = 0x2209; pub const RB_BLENDCONTROL_2: u32 = 0x220A; pub const RB_BLENDCONTROL_3: u32 = 0x220B; pub const RB_STENCILREFMASK: u32 = 0x210D; pub const RB_STENCILREFMASK_BF: u32 = 0x210C; pub const PA_SC_WINDOW_OFFSET: u32 = 0x2080; pub const RB_COPY_CONTROL: u32 = 0x2318; pub const RB_COPY_DEST_BASE: u32 = 0x2319; pub const RB_COPY_DEST_PITCH: u32 = 0x231A; pub const RB_COPY_DEST_INFO: u32 = 0x231B; pub const RB_DEPTH_CLEAR: u32 = 0x231D; pub const RB_COLOR_CLEAR: u32 = 0x231E; pub const RB_COLOR_CLEAR_LO: u32 = 0x231F; } /// Build a [`DrawState`] from a `VGT_DRAW_INITIATOR` value + the current /// register file. `extra_dma_base`/`extra_dma_size` can override the /// DMA fields if the caller has them from the PM4 packet payload (canary /// passes them inline with `DRAW_INDX`). pub fn extract( register_file: &RegisterFile, vgt_draw_initiator: u32, dma_base: Option, dma_size: Option, ) -> DrawState { // `VGT_DRAW_INITIATOR` bit layout (per canary `registers.h:315-327`): // [5:0] prim_type (PrimitiveType) // [7:6] source_select (0=DMA, 1=immediate, 2=auto) // [9:8] major_mode // [10] _pad // [11] index_size (0=16-bit, 1=32-bit) ← GPUBUG-103 // [12] not_eop // [15:13] _pad // [31:16] num_indices let prim_bits = vgt_draw_initiator & 0x3F; let source_select = (vgt_draw_initiator >> 6) & 0x3; let index_size_bit = (vgt_draw_initiator >> 11) & 0x1; let num_indices = (vgt_draw_initiator >> 16) & 0xFFFF; let index_size = if index_size_bit == 0 { IndexSize::Sixteen } else { IndexSize::ThirtyTwo }; let index_source = match source_select { 0 => IndexSource::Dma { base_address: dma_base.unwrap_or_else(|| register_file.read(reg::VGT_DMA_BASE)), size_dwords: dma_size.unwrap_or_else(|| register_file.read(reg::VGT_DMA_SIZE)), index_size, }, 1 => IndexSource::Immediate { index_size }, _ => IndexSource::AutoIndex, }; let f = |r: u32| f32::from_bits(register_file.read(r)); let viewport = Viewport { scale_x: f(reg::PA_CL_VPORT_XSCALE), scale_y: f(reg::PA_CL_VPORT_YSCALE), scale_z: f(reg::PA_CL_VPORT_ZSCALE), offset_x: f(reg::PA_CL_VPORT_XOFFSET), offset_y: f(reg::PA_CL_VPORT_YOFFSET), offset_z: f(reg::PA_CL_VPORT_ZOFFSET), }; let tl = register_file.read(reg::PA_SC_WINDOW_SCISSOR_TL); let br = register_file.read(reg::PA_SC_WINDOW_SCISSOR_BR); let scissor = Scissor { tl_x: (tl & 0x7FFF) as u16, tl_y: ((tl >> 16) & 0x7FFF) as u16, br_x: (br & 0x7FFF) as u16, br_y: ((br >> 16) & 0x7FFF) as u16, }; let rb_modecontrol = register_file.read(reg::RB_MODECONTROL); let color_mask = rb_modecontrol & 0xF; let ci = |reg: u32, present: bool| { if !present { return None; } let raw = register_file.read(reg); Some(ColorTargetInfo { base_tiles: (raw & 0xFFF) as u16, format: ((raw >> 16) & 0xF) as u8, }) }; let color_info = [ ci(reg::RB_COLOR_INFO_0, (color_mask & 0x1) != 0), ci(reg::RB_COLOR_INFO_1, (color_mask & 0x2) != 0), ci(reg::RB_COLOR_INFO_2, (color_mask & 0x4) != 0), ci(reg::RB_COLOR_INFO_3, (color_mask & 0x8) != 0), ]; let depth_raw = register_file.read(reg::RB_DEPTH_INFO); // Depth-surface "present" = the RB_MODECONTROL depth-enable bit at bit 4. let depth_present = (rb_modecontrol & 0x10) != 0; let depth_info = if depth_present { Some(DepthTargetInfo { base_tiles: (depth_raw & 0xFFF) as u16, format: ((depth_raw >> 16) & 0x1) as u8, }) } else { None }; DrawState { primitive: PrimitiveType::from_bits(prim_bits), vertex_count: num_indices, index_source, viewport, scissor, color_info, depth_info, rb_modecontrol, rb_colorcontrol: register_file.read(reg::RB_COLORCONTROL), rb_depthcontrol: register_file.read(reg::RB_DEPTHCONTROL), rb_blendcontrol: [ register_file.read(reg::RB_BLENDCONTROL_0), register_file.read(reg::RB_BLENDCONTROL_1), register_file.read(reg::RB_BLENDCONTROL_2), register_file.read(reg::RB_BLENDCONTROL_3), ], rb_stencilrefmask: register_file.read(reg::RB_STENCILREFMASK), rb_stencilrefmask_bf: register_file.read(reg::RB_STENCILREFMASK_BF), pa_sc_window_offset: register_file.read(reg::PA_SC_WINDOW_OFFSET), rb_copy_control: register_file.read(reg::RB_COPY_CONTROL), rb_copy_dest_base: register_file.read(reg::RB_COPY_DEST_BASE), rb_copy_dest_pitch: register_file.read(reg::RB_COPY_DEST_PITCH), rb_copy_dest_info: register_file.read(reg::RB_COPY_DEST_INFO), // P3b M1: the kernel-side caller is expected to populate these // via `DrawState { ..extract(...), vs_blob_key, ps_blob_key }` so // the pure-register extraction stays decoupled from `GpuSystem` // state. Default to None so a bare `extract()` stays valid for // unit tests. vs_blob_key: None, ps_blob_key: None, } } #[cfg(test)] mod tests { use super::*; fn rf() -> RegisterFile { RegisterFile::new() } #[test] fn extract_basic_triangle_list_no_rt() { let rf = rf(); // prim_type=4 (TriangleList), source=2 (auto), num_indices=6 let vgt = (6u32 << 16) | (2 << 6) | 4; let ds = extract(&rf, vgt, None, None); assert_eq!(ds.primitive, PrimitiveType::TriangleList); assert_eq!(ds.vertex_count, 6); assert!(matches!(ds.index_source, IndexSource::AutoIndex)); assert!(ds.color_info.iter().all(|c| c.is_none())); assert!(ds.depth_info.is_none()); } #[test] fn extract_dma_indices_uses_override() { let rf = rf(); let vgt = (3u32 << 16) | (0 << 6) | 4; // prim=TriList, source=DMA let ds = extract(&rf, vgt, Some(0xDEAD_0000), Some(6)); match ds.index_source { IndexSource::Dma { base_address, size_dwords, index_size, } => { assert_eq!(base_address, 0xDEAD_0000); assert_eq!(size_dwords, 6); assert_eq!(index_size, IndexSize::Sixteen); } other => panic!("expected Dma, got {other:?}"), } } #[test] fn color_and_depth_enabled_bits_are_honored() { let mut rf = rf(); // rb_modecontrol: color0 + depth enabled (bit0 + bit4) rf.write(reg::RB_MODECONTROL, 0x11); rf.write(reg::RB_COLOR_INFO_0, (2 << 16) | 0x64); // format=2, tile=0x64 rf.write(reg::RB_DEPTH_INFO, (1 << 16) | 0x32); let ds = extract(&rf, 4, None, None); let c = ds.color_info[0].unwrap(); assert_eq!(c.format, 2); assert_eq!(c.base_tiles, 0x64); let d = ds.depth_info.unwrap(); assert_eq!(d.format, 1); assert_eq!(d.base_tiles, 0x32); } /// `RB_COPY_DEST_BASE` is a raw 32-bit register, but a Xenon physical /// address is 29-bit (`& 0x1FFF_FFFF`). `ResolveInfo::from_register_file` /// must mask before writes to prevent out-of-range memory accesses. #[test] fn resolve_info_masks_dest_base_to_physical() { let mut rf = rf(); rf.write(reg::RB_COPY_DEST_BASE, 0xDEAD_BEEF); let info = ResolveInfo::from_register_file(&rf); assert_eq!(info.dest_base, 0x1EAD_BEEF); } /// Scissor ∩ (0, 0, dest_pitch, dest_height), then 8-pixel-aligned per /// Canary `kResolveAlignmentPixels`. Verify that the scissor actually /// tightens the rect (not just degenerates it). #[test] fn resolve_info_derives_8px_aligned_rect_from_scissor_and_dest_pitch() { let mut rf = rf(); // Dest pitch/height 1280×720; scissor (5, 5) -> (1000, 717). rf.write(reg::RB_COPY_DEST_PITCH, (720u32 << 16) | 1280u32); rf.write(reg::PA_SC_WINDOW_SCISSOR_TL, (5u32 << 16) | 5u32); rf.write(reg::PA_SC_WINDOW_SCISSOR_BR, (717u32 << 16) | 1000u32); let info = ResolveInfo::from_register_file(&rf); // x0 floors to 0 (was 5 -> &!7 = 0), y0 same. // x1 = min(1280, 1000) = 1000; ceil-to-8 = 1000. y1 = min(720, 717) = 717, ceil = 720. assert_eq!(info.coords.x0, 0); assert_eq!(info.coords.y0, 0); assert_eq!(info.coords.width, 1000); assert_eq!(info.coords.height, 720); } /// Non-degenerate scissor outside `dest_pitch/height` clamps to the /// destination extent. #[test] fn resolve_info_scissor_cannot_widen_past_dest() { let mut rf = rf(); rf.write(reg::RB_COPY_DEST_PITCH, (16u32 << 16) | 16u32); rf.write(reg::PA_SC_WINDOW_SCISSOR_BR, (1000u32 << 16) | 1000u32); let info = ResolveInfo::from_register_file(&rf); assert_eq!(info.coords.width, 16); assert_eq!(info.coords.height, 16); } /// Source decoding: `copy_src_select >= 4` → depth; otherwise Color(idx). #[test] fn resolve_info_decodes_source_select() { let mut rf = rf(); rf.write(reg::RB_COPY_CONTROL, 2); // src_select = 2 (color2) let info = ResolveInfo::from_register_file(&rf); assert_eq!(info.source, ResolveSource::Color(2)); assert_eq!(info.copy_src_select, 2); rf.write(reg::RB_COPY_CONTROL, 4); // src_select = 4 -> depth let info = ResolveInfo::from_register_file(&rf); assert_eq!(info.source, ResolveSource::Depth); } /// `copy_dest_info` fields: endian (bits 2:0), format (bits 12:7), /// exp_bias (bits 21:16, signed 6-bit), array (bit 3). #[test] fn resolve_info_decodes_copy_dest_info_fields() { let mut rf = rf(); // endian=2 (k8in32), format=6 (k_8_8_8_8), exp_bias=-1 (0x3F), array=1 let val = 2u32 | (1u32 << 3) | (6u32 << 7) | (0x3Fu32 << 16); rf.write(reg::RB_COPY_DEST_INFO, val); let info = ResolveInfo::from_register_file(&rf); assert_eq!(info.dest_endian, 2); assert_eq!(info.dest_format, 6); assert_eq!(info.dest_exp_bias, -1); assert!(info.copy_dest_array); } /// Positive and negative exp_bias round-trip through the 6-bit /// sign-extension. #[test] fn resolve_info_exp_bias_sign_extends() { let mut rf = rf(); rf.write(reg::RB_COPY_DEST_INFO, 1u32 << 16); // exp_bias = +1 assert_eq!(ResolveInfo::from_register_file(&rf).dest_exp_bias, 1); rf.write(reg::RB_COPY_DEST_INFO, 0x20u32 << 16); // exp_bias = -32 assert_eq!(ResolveInfo::from_register_file(&rf).dest_exp_bias, -32); rf.write(reg::RB_COPY_DEST_INFO, 0x1Fu32 << 16); // exp_bias = +31 assert_eq!(ResolveInfo::from_register_file(&rf).dest_exp_bias, 31); } /// `RB_SURFACE_INFO`: surface_pitch (bits 13:0) and msaa_samples (bits 17:16) /// feed `surface_pitch_tiles`. 1280 px divides by 80 exactly → 16 tiles /// at 1x MSAA / 32bpp; 4x MSAA doubles the sample pitch. #[test] fn resolve_info_computes_surface_pitch_tiles() { let mut rf = rf(); rf.write(reg::RB_COPY_CONTROL, 0); // color0 rf.write(reg::RB_COLOR_INFO_0, 0u32 << 16); // k_8_8_8_8 -> 32bpp rf.write(reg::RB_SURFACE_INFO, 1280); // msaa=1x, pitch=1280 let info = ResolveInfo::from_register_file(&rf); assert_eq!(info.surface_pitch_tiles, 16); assert!(!info.source_is_64bpp); // 4x MSAA widens the sample pitch by 2x. rf.write(reg::RB_SURFACE_INFO, 1280 | (2u32 << 16)); let info = ResolveInfo::from_register_file(&rf); assert_eq!(info.surface_pitch_tiles, 32); // Non-aligned pitch rounds up. rf.write(reg::RB_SURFACE_INFO, 1281); let info = ResolveInfo::from_register_file(&rf); assert_eq!(info.surface_pitch_tiles, 17); } /// `color_render_target_format_is_64bpp` matches the 64bpp enum values /// in `xenos::ColorRenderTargetFormat`: k_16_16_16_16 (5), /// k_16_16_16_16_FLOAT (7), k_32_32_FLOAT (15). #[test] fn color_format_64bpp_table_is_correct() { assert!(!color_render_target_format_is_64bpp(0)); assert!(!color_render_target_format_is_64bpp(4)); assert!(color_render_target_format_is_64bpp(5)); assert!(!color_render_target_format_is_64bpp(6)); assert!(color_render_target_format_is_64bpp(7)); assert!(!color_render_target_format_is_64bpp(14)); assert!(color_render_target_format_is_64bpp(15)); } /// `surface_pitch_tiles` helper: exact arithmetic including the 64bpp /// doubling. `xenos.h:465-476`. #[test] fn surface_pitch_tiles_matches_canary_helper() { use crate::render_target_cache::MsaaSamples; // 80 px, 1x, 32bpp -> 1 tile exactly. assert_eq!(surface_pitch_tiles(80, MsaaSamples::X1, false), 1); // 81 px, 1x, 32bpp -> 2 tiles (round up). assert_eq!(surface_pitch_tiles(81, MsaaSamples::X1, false), 2); // 80 px, 1x, 64bpp -> 2 tiles (64bpp doubles). assert_eq!(surface_pitch_tiles(80, MsaaSamples::X1, true), 2); // 80 px, 2x, 32bpp -> 1 tile (2x MSAA doesn't widen X). assert_eq!(surface_pitch_tiles(80, MsaaSamples::X2, false), 1); // 80 px, 4x, 32bpp -> 2 tiles (4x MSAA widens X 2x). assert_eq!(surface_pitch_tiles(80, MsaaSamples::X4, false), 2); // 80 px, 4x, 64bpp -> 4 tiles. assert_eq!(surface_pitch_tiles(80, MsaaSamples::X4, true), 4); } /// The color-source branch reads from `RB_COLOR_INFO_` based on /// `copy_src_select`. Verify that index-3 color targets are addressed. #[test] fn resolve_info_color_source_selects_correct_color_info() { let mut rf = rf(); rf.write(reg::RB_COPY_CONTROL, 3); // color3 rf.write(reg::RB_COLOR_INFO_3, (5u32 << 16) | 0x123); // k_16_16_16_16, base=0x123 let info = ResolveInfo::from_register_file(&rf); assert_eq!(info.source, ResolveSource::Color(3)); assert_eq!(info.source_format, 5); assert_eq!(info.source_base_tiles, 0x123); assert!(info.source_is_64bpp); } /// Depth-source branch reads from `RB_DEPTH_INFO` and parses its /// 1-bit format. #[test] fn resolve_info_depth_source_reads_depth_info() { let mut rf = rf(); rf.write(reg::RB_COPY_CONTROL, 4); // depth rf.write(reg::RB_DEPTH_INFO, (1u32 << 16) | 0x55); // kD24FS8, base=0x55 let info = ResolveInfo::from_register_file(&rf); assert_eq!(info.source, ResolveSource::Depth); assert_eq!(info.source_format, 1); assert_eq!(info.source_base_tiles, 0x55); assert!(!info.source_is_64bpp); // depth always 32bpp } // ---- Vertex fetch 0 rectangle tests ------------------------------- /// Helper: seed a triangle covering the rectangle `(x0, y0) → (x1, y1)` /// into guest memory at `vb_addr` and program VF0 to read 6 dwords /// from it with endian = k8in32 (the standard D3D-vertex-buffer case). fn seed_vertex_fetch_0( rf: &mut RegisterFile, mem: &xenia_memory::GuestMemory, vb_addr: u32, x0: f32, y0: f32, x1: f32, y1: f32, ) { use xenia_memory::MemoryAccess; // Three (x, y) float pairs covering the rect — exactly the D3D9 // resolve triangle layout Canary expects. // (x0, y0), (x1, y0), (x0, y1) let floats = [x0, y0, x1, y0, x0, y1]; for (i, f) in floats.iter().enumerate() { // Write float as BE (PPC `stfs` semantics). `mem.write_u32` // already stores BE bytes; pass the raw u32 bit pattern. mem.write_u32( vb_addr + i as u32 * 4, f.to_bits(), ); } // VF0 dword 0: address (bits 31:2, in dwords) + type (bits 1:0 = 3). let addr_dwords = vb_addr / 4; let dword_0 = (addr_dwords << 2) | 3; // VF0 dword 1: size (bits 25:2 = 6) + endian (bits 1:0 = 2 = k8in32). let dword_1 = (6u32 << 2) | 2; rf.write(0x4800, dword_0); rf.write(0x4801, dword_1); } fn fresh_mem_for_vf0() -> xenia_memory::GuestMemory { use xenia_memory::page_table::MemoryProtect; let mut mem = xenia_memory::GuestMemory::new().expect("guest memory"); mem.alloc( 0x5000_0000, 0x1_0000, MemoryProtect::READ | MemoryProtect::WRITE, ) .expect("alloc"); mem } #[test] fn vf0_rect_returns_none_when_no_vertex_buffer() { let rf = rf(); let mem = fresh_mem_for_vf0(); assert!(vertex_fetch_0_rect(&rf, &mem).is_none()); } #[test] fn vf0_rect_returns_none_for_wrong_size() { let mut rf = rf(); let mem = fresh_mem_for_vf0(); // type=3 (kVertex), size=4 (wrong — should be 6), endian=2. rf.write(0x4800, (0x5000_0000u32) | 3); rf.write(0x4801, (4u32 << 2) | 2); assert!(vertex_fetch_0_rect(&rf, &mem).is_none()); } #[test] fn vf0_rect_derives_rectangle_from_three_vertices() { let mut rf = rf(); let mut mem = fresh_mem_for_vf0(); // D3D9 pixel center: +0.5 half-pixel offset applied before Fixed16p8. // Leave PA_SU_VTX_CNTL at 0 (kD3DZero). // Triangle at (0, 0) → (100, 50) → vertex 2 = (0, 50). seed_vertex_fetch_0(&mut rf, &mut mem, 0x5000_0000, 0.0, 0.0, 100.0, 50.0); let (x0, y0, x1, y1) = vertex_fetch_0_rect(&rf, &mem).expect("VF0 present"); // (0 + 0.5) * 256 = 128. (128 + 127) >> 8 = 0. So x0/y0 = 0. // (100 + 0.5) * 256 = 25728. (25728 + 127) >> 8 = 100. // (50 + 0.5) * 256 = 12928. (12928 + 127) >> 8 = 50. assert_eq!(x0, 0); assert_eq!(y0, 0); assert_eq!(x1, 100); assert_eq!(y1, 50); } #[test] fn from_register_file_and_memory_prefers_vf0_rect() { let mut rf = rf(); let mut mem = fresh_mem_for_vf0(); // Without VF0: dest_pitch/height defaults produce (0, 0, 1280, 720). rf.write(reg::RB_COPY_DEST_PITCH, (720u32 << 16) | 1280u32); // With VF0 pointing at a 256×128 triangle, override to that. seed_vertex_fetch_0(&mut rf, &mut mem, 0x5000_0000, 0.0, 0.0, 256.0, 128.0); let info = ResolveInfo::from_register_file_and_memory(&rf, &mem); assert_eq!(info.coords.x0, 0); assert_eq!(info.coords.y0, 0); assert_eq!(info.coords.width, 256); assert_eq!(info.coords.height, 128); } /// If VF0 is absent, fall back to the scissor+dest default. #[test] fn from_register_file_and_memory_falls_back_without_vf0() { let mut rf = rf(); let mem = fresh_mem_for_vf0(); rf.write(reg::RB_COPY_DEST_PITCH, (720u32 << 16) | 1280u32); let info = ResolveInfo::from_register_file_and_memory(&rf, &mem); assert_eq!(info.coords.width, 1280); assert_eq!(info.coords.height, 720); } /// `resolve_rect_apply_scissor_and_align_8` with no scissor just /// 8-aligns. #[test] fn scissor_helper_8_aligns_with_no_scissor() { let rf = rf(); let (x0, y0, w, h) = resolve_rect_apply_scissor_and_align_8(&rf, 5, 5, 1001, 17); assert_eq!(x0, 0); assert_eq!(y0, 0); // 1001 ceil-to-8 = 1008; 17 ceil-to-8 = 24. assert_eq!(w, 1008); assert_eq!(h, 24); } /// Negative bounding-box (VF0 can produce these) clamps to the scissor /// top-left without going below zero. #[test] fn scissor_helper_clamps_negative_to_zero() { let mut rf = rf(); // Small scissor at (0,0)..(128, 64). rf.write(reg::PA_SC_WINDOW_SCISSOR_BR, (64u32 << 16) | 128u32); let (x0, y0, w, h) = resolve_rect_apply_scissor_and_align_8(&rf, -50, -50, 80, 32); assert_eq!(x0, 0); assert_eq!(y0, 0); // x1 clamped from 80 -> 80, ceil8 -> 80. y1 32 -> 32. assert_eq!(w, 80); assert_eq!(h, 32); } }