xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)
First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).
Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.
Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
169
crates/xenia-gpu/src/ring_drain.rs
Normal file
169
crates/xenia-gpu/src/ring_drain.rs
Normal file
@@ -0,0 +1,169 @@
|
||||
//! Ring-buffer drainer.
|
||||
//!
|
||||
//! Walks a guest PM4 ring buffer from `start_offset` forward, classifying each
|
||||
//! packet via [`crate::pm4`] and stopping when it either reaches the end of
|
||||
//! the window it was asked to scan, walks off a NOP-fill region, or hits a
|
||||
//! malformed header.
|
||||
//!
|
||||
//! It does **not** execute draws — that's deferred to a later phase. Its job
|
||||
//! is to (a) advance the read pointer far enough that games keep making
|
||||
//! progress, and (b) surface `PM4_XE_SWAP` packets so `VdSwap` can forward
|
||||
//! them to the host UI.
|
||||
|
||||
use xenia_memory::MemoryAccess;
|
||||
|
||||
use crate::pm4::{self, PacketKind};
|
||||
|
||||
/// Outcome of a [`drain`] call.
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
pub struct DrainResult {
|
||||
/// Dword offset reached, relative to the start of the ring buffer.
|
||||
pub new_offset: u32,
|
||||
/// How many packets were walked in this call.
|
||||
pub packets_walked: u32,
|
||||
/// True if we saw `PM4_XE_SWAP` during the walk.
|
||||
pub swap_seen: bool,
|
||||
/// If `swap_seen`, the guest frontbuffer *physical* address written next
|
||||
/// to `PM4_XE_SWAP` (dword 2 of the 4-payload packet).
|
||||
pub swap_frontbuffer_phys: u32,
|
||||
/// If `swap_seen`, the width written at dword 3.
|
||||
pub swap_width: u32,
|
||||
/// If `swap_seen`, the height written at dword 4.
|
||||
pub swap_height: u32,
|
||||
}
|
||||
|
||||
/// Walk `max_packets` packets starting at dword offset `start_offset` in the
|
||||
/// ring buffer at guest address `ring_base` of size `ring_size_dwords`.
|
||||
///
|
||||
/// The offset is treated modulo `ring_size_dwords`. Walking stops when:
|
||||
/// - `max_packets` have been walked,
|
||||
/// - a `PM4_XE_SWAP` has been consumed (the swap is reported and we stop so
|
||||
/// the UI sees the frame boundary before further drain),
|
||||
/// - a header's declared total size would exceed the remaining budget,
|
||||
/// - the ring size is zero (drainer is a no-op).
|
||||
pub fn drain<M: MemoryAccess + ?Sized>(
|
||||
mem: &M,
|
||||
ring_base: u32,
|
||||
ring_size_dwords: u32,
|
||||
start_offset: u32,
|
||||
max_packets: u32,
|
||||
) -> DrainResult {
|
||||
if ring_size_dwords == 0 || ring_base == 0 {
|
||||
return DrainResult::default();
|
||||
}
|
||||
let mut result = DrainResult {
|
||||
new_offset: start_offset % ring_size_dwords,
|
||||
..DrainResult::default()
|
||||
};
|
||||
let mut offset = result.new_offset;
|
||||
for _ in 0..max_packets {
|
||||
let header_addr = ring_base.wrapping_add(offset.wrapping_mul(4));
|
||||
let header = mem.read_u32(header_addr);
|
||||
let packet = pm4::decode(header);
|
||||
// Refuse to walk past the ring in a single packet.
|
||||
if packet.total_dwords > ring_size_dwords {
|
||||
break;
|
||||
}
|
||||
// Type-3 PM4_XE_SWAP → record payload and stop.
|
||||
if let PacketKind::Type3 { opcode, .. } = packet.kind
|
||||
&& opcode == pm4::PM4_XE_SWAP {
|
||||
// Payload layout (from canary VdSwap_entry):
|
||||
// [0] XE_SWAP header
|
||||
// [1] kSwapSignature ("XNEX" = 0x584E4558)
|
||||
// [2] frontbuffer physical address
|
||||
// [3] width
|
||||
// [4] height
|
||||
let payload = |i: u32| {
|
||||
let addr =
|
||||
ring_base.wrapping_add(((offset + i) % ring_size_dwords).wrapping_mul(4));
|
||||
mem.read_u32(addr)
|
||||
};
|
||||
result.swap_seen = true;
|
||||
result.swap_frontbuffer_phys = payload(2);
|
||||
result.swap_width = payload(3);
|
||||
result.swap_height = payload(4);
|
||||
offset = (offset + packet.total_dwords) % ring_size_dwords;
|
||||
result.new_offset = offset;
|
||||
result.packets_walked += 1;
|
||||
return result;
|
||||
}
|
||||
offset = (offset + packet.total_dwords) % ring_size_dwords;
|
||||
result.new_offset = offset;
|
||||
result.packets_walked += 1;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use xenia_memory::GuestMemory;
|
||||
use xenia_memory::page_table::MemoryProtect;
|
||||
|
||||
fn build_mem() -> GuestMemory {
|
||||
let mut mem = GuestMemory::new().unwrap();
|
||||
let rw = MemoryProtect::READ | MemoryProtect::WRITE;
|
||||
mem.alloc(0x4000_0000, 0x1000, rw).unwrap();
|
||||
mem
|
||||
}
|
||||
|
||||
fn write_dword(mem: &GuestMemory, addr: u32, val: u32) {
|
||||
mem.write_u32(addr, val);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn walks_nops_until_budget_exhausted() {
|
||||
let mut mem = build_mem();
|
||||
// Fill 10 dwords with Type-2 NOPs.
|
||||
for i in 0..10 {
|
||||
write_dword(&mut mem, 0x4000_0000 + i * 4, 0x8000_0000);
|
||||
}
|
||||
let r = drain(&mem, 0x4000_0000, 0x400, 0, 5);
|
||||
assert_eq!(r.packets_walked, 5);
|
||||
assert_eq!(r.new_offset, 5);
|
||||
assert!(!r.swap_seen);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stops_at_swap_and_reports_payload() {
|
||||
let mut mem = build_mem();
|
||||
// Two NOPs, then a PM4_XE_SWAP packet.
|
||||
write_dword(&mut mem, 0x4000_0000, 0x8000_0000);
|
||||
write_dword(&mut mem, 0x4000_0004, 0x8000_0000);
|
||||
// MakePacketType3(PM4_XE_SWAP, 4) → (3<<30) | (3<<16) | (0x64<<8)
|
||||
let swap_hdr = (3u32 << 30) | (3u32 << 16) | ((pm4::PM4_XE_SWAP as u32) << 8);
|
||||
write_dword(&mut mem, 0x4000_0008, swap_hdr);
|
||||
write_dword(&mut mem, 0x4000_000C, pm4::SWAP_SIGNATURE);
|
||||
write_dword(&mut mem, 0x4000_0010, 0xDEAD_F000); // frontbuffer phys
|
||||
write_dword(&mut mem, 0x4000_0014, 1280);
|
||||
write_dword(&mut mem, 0x4000_0018, 720);
|
||||
let r = drain(&mem, 0x4000_0000, 0x400, 0, 16);
|
||||
assert!(r.swap_seen);
|
||||
assert_eq!(r.swap_frontbuffer_phys, 0xDEAD_F000);
|
||||
assert_eq!(r.swap_width, 1280);
|
||||
assert_eq!(r.swap_height, 720);
|
||||
assert_eq!(r.packets_walked, 3);
|
||||
assert_eq!(r.new_offset, 7); // 2 NOPs (1 dword each) + 5-dword swap = 7
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wraps_around_ring() {
|
||||
let mut mem = build_mem();
|
||||
// Ring size = 4 dwords. Start at offset 3 (last dword). Write a NOP
|
||||
// there, then the walker should wrap to offset 0.
|
||||
write_dword(&mut mem, 0x4000_000C, 0x8000_0000);
|
||||
write_dword(&mut mem, 0x4000_0000, 0x8000_0000);
|
||||
let r = drain(&mem, 0x4000_0000, 4, 3, 2);
|
||||
assert_eq!(r.packets_walked, 2);
|
||||
assert_eq!(r.new_offset, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_ring_size_is_noop() {
|
||||
let mem = build_mem();
|
||||
let r = drain(&mem, 0x4000_0000, 0, 0, 10);
|
||||
assert_eq!(r.packets_walked, 0);
|
||||
assert_eq!(r.new_offset, 0);
|
||||
assert!(!r.swap_seen);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user