xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)

First real GPU implementation. Ring/PM4 frontend (ring_view, ring_drain, pm4) drains the command processor; gpu_system owns the threaded backend (DrainFence RPC + parker/fence helpers from M1) and the MMIO-mapped register block (mmio_region). Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode the Xbox 360 microcode, translator.rs lowers it onto the WGSL xenos_interp interpreter shader (shaders/xenos_interp.wgsl). shader_metrics.rs counts decode/translate work. Render state: draw_state, primitive, render_target_cache, texture_cache, tiled_address (Xenos's swizzled tiled-memory layout), xenos_constants (register field constants), edram (the 10 MiB EDRAM model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs owns the typed GPU-resource handles the kernel hands out. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 16:29:38 +02:00
parent 5f0d6487ea
commit 79eb52c378
24 changed files with 10984 additions and 18 deletions
--- a/crates/xenia-gpu/src/ring_drain.rs
+++ b/crates/xenia-gpu/src/ring_drain.rs
@@ -0,0 +1,169 @@
+//! Ring-buffer drainer.
+//!
+//! Walks a guest PM4 ring buffer from `start_offset` forward, classifying each
+//! packet via [`crate::pm4`] and stopping when it either reaches the end of
+//! the window it was asked to scan, walks off a NOP-fill region, or hits a
+//! malformed header.
+//!
+//! It does **not** execute draws — that's deferred to a later phase. Its job
+//! is to (a) advance the read pointer far enough that games keep making
+//! progress, and (b) surface `PM4_XE_SWAP` packets so `VdSwap` can forward
+//! them to the host UI.
+
+use xenia_memory::MemoryAccess;
+
+use crate::pm4::{self, PacketKind};
+
+/// Outcome of a [`drain`] call.
+#[derive(Default, Debug, Clone, Copy)]
+pub struct DrainResult {
+    /// Dword offset reached, relative to the start of the ring buffer.
+    pub new_offset: u32,
+    /// How many packets were walked in this call.
+    pub packets_walked: u32,
+    /// True if we saw `PM4_XE_SWAP` during the walk.
+    pub swap_seen: bool,
+    /// If `swap_seen`, the guest frontbuffer *physical* address written next
+    /// to `PM4_XE_SWAP` (dword 2 of the 4-payload packet).
+    pub swap_frontbuffer_phys: u32,
+    /// If `swap_seen`, the width written at dword 3.
+    pub swap_width: u32,
+    /// If `swap_seen`, the height written at dword 4.
+    pub swap_height: u32,
+}
+
+/// Walk `max_packets` packets starting at dword offset `start_offset` in the
+/// ring buffer at guest address `ring_base` of size `ring_size_dwords`.
+///
+/// The offset is treated modulo `ring_size_dwords`. Walking stops when:
+/// - `max_packets` have been walked,
+/// - a `PM4_XE_SWAP` has been consumed (the swap is reported and we stop so
+///   the UI sees the frame boundary before further drain),
+/// - a header's declared total size would exceed the remaining budget,
+/// - the ring size is zero (drainer is a no-op).
+pub fn drain<M: MemoryAccess + ?Sized>(
+    mem: &M,
+    ring_base: u32,
+    ring_size_dwords: u32,
+    start_offset: u32,
+    max_packets: u32,
+) -> DrainResult {
+    if ring_size_dwords == 0 || ring_base == 0 {
+        return DrainResult::default();
+    }
+    let mut result = DrainResult {
+        new_offset: start_offset % ring_size_dwords,
+        ..DrainResult::default()
+    };
+    let mut offset = result.new_offset;
+    for _ in 0..max_packets {
+        let header_addr = ring_base.wrapping_add(offset.wrapping_mul(4));
+        let header = mem.read_u32(header_addr);
+        let packet = pm4::decode(header);
+        // Refuse to walk past the ring in a single packet.
+        if packet.total_dwords > ring_size_dwords {
+            break;
+        }
+        // Type-3 PM4_XE_SWAP → record payload and stop.
+        if let PacketKind::Type3 { opcode, .. } = packet.kind
+            && opcode == pm4::PM4_XE_SWAP {
+                // Payload layout (from canary VdSwap_entry):
+                //   [0] XE_SWAP header
+                //   [1] kSwapSignature ("XNEX" = 0x584E4558)
+                //   [2] frontbuffer physical address
+                //   [3] width
+                //   [4] height
+                let payload = |i: u32| {
+                    let addr =
+                        ring_base.wrapping_add(((offset + i) % ring_size_dwords).wrapping_mul(4));
+                    mem.read_u32(addr)
+                };
+                result.swap_seen = true;
+                result.swap_frontbuffer_phys = payload(2);
+                result.swap_width = payload(3);
+                result.swap_height = payload(4);
+                offset = (offset + packet.total_dwords) % ring_size_dwords;
+                result.new_offset = offset;
+                result.packets_walked += 1;
+                return result;
+            }
+        offset = (offset + packet.total_dwords) % ring_size_dwords;
+        result.new_offset = offset;
+        result.packets_walked += 1;
+    }
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use xenia_memory::GuestMemory;
+    use xenia_memory::page_table::MemoryProtect;
+
+    fn build_mem() -> GuestMemory {
+        let mut mem = GuestMemory::new().unwrap();
+        let rw = MemoryProtect::READ | MemoryProtect::WRITE;
+        mem.alloc(0x4000_0000, 0x1000, rw).unwrap();
+        mem
+    }
+
+    fn write_dword(mem: &GuestMemory, addr: u32, val: u32) {
+        mem.write_u32(addr, val);
+    }
+
+    #[test]
+    fn walks_nops_until_budget_exhausted() {
+        let mut mem = build_mem();
+        // Fill 10 dwords with Type-2 NOPs.
+        for i in 0..10 {
+            write_dword(&mut mem, 0x4000_0000 + i * 4, 0x8000_0000);
+        }
+        let r = drain(&mem, 0x4000_0000, 0x400, 0, 5);
+        assert_eq!(r.packets_walked, 5);
+        assert_eq!(r.new_offset, 5);
+        assert!(!r.swap_seen);
+    }
+
+    #[test]
+    fn stops_at_swap_and_reports_payload() {
+        let mut mem = build_mem();
+        // Two NOPs, then a PM4_XE_SWAP packet.
+        write_dword(&mut mem, 0x4000_0000, 0x8000_0000);
+        write_dword(&mut mem, 0x4000_0004, 0x8000_0000);
+        // MakePacketType3(PM4_XE_SWAP, 4) → (3<<30) | (3<<16) | (0x64<<8)
+        let swap_hdr = (3u32 << 30) | (3u32 << 16) | ((pm4::PM4_XE_SWAP as u32) << 8);
+        write_dword(&mut mem, 0x4000_0008, swap_hdr);
+        write_dword(&mut mem, 0x4000_000C, pm4::SWAP_SIGNATURE);
+        write_dword(&mut mem, 0x4000_0010, 0xDEAD_F000); // frontbuffer phys
+        write_dword(&mut mem, 0x4000_0014, 1280);
+        write_dword(&mut mem, 0x4000_0018, 720);
+        let r = drain(&mem, 0x4000_0000, 0x400, 0, 16);
+        assert!(r.swap_seen);
+        assert_eq!(r.swap_frontbuffer_phys, 0xDEAD_F000);
+        assert_eq!(r.swap_width, 1280);
+        assert_eq!(r.swap_height, 720);
+        assert_eq!(r.packets_walked, 3);
+        assert_eq!(r.new_offset, 7); // 2 NOPs (1 dword each) + 5-dword swap = 7
+    }
+
+    #[test]
+    fn wraps_around_ring() {
+        let mut mem = build_mem();
+        // Ring size = 4 dwords. Start at offset 3 (last dword). Write a NOP
+        // there, then the walker should wrap to offset 0.
+        write_dword(&mut mem, 0x4000_000C, 0x8000_0000);
+        write_dword(&mut mem, 0x4000_0000, 0x8000_0000);
+        let r = drain(&mem, 0x4000_0000, 4, 3, 2);
+        assert_eq!(r.packets_walked, 2);
+        assert_eq!(r.new_offset, 1);
+    }
+
+    #[test]
+    fn zero_ring_size_is_noop() {
+        let mem = build_mem();
+        let r = drain(&mem, 0x4000_0000, 0, 0, 10);
+        assert_eq!(r.packets_walked, 0);
+        assert_eq!(r.new_offset, 0);
+        assert!(!r.swap_seen);
+    }
+}