Files
xenia-rs/crates/xenia-gpu/src/mmio_region.rs
MechaCat02 79eb52c378 xenia-gpu: end-to-end Xenos pipeline (PM4, ucode, EDRAM, resolve)
First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).

Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.

Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 16:29:38 +02:00

218 lines
9.3 KiB
Rust

//! Construct an `xenia_memory::MmioRegion` that backs the Xenos GPU register
//! aperture at guest physical `0x7FC80000` (per canary
//! `graphics_system.cc:141-144` — `memory_->AddVirtualMappedRange(0x7FC80000,
//! 0xFFFF0000, 0x0000FFFF, …)`).
//!
//! Only a handful of registers need a round-trip over the bus; everything
//! else (the ALU / fetch constants, the RBBM state machine, …) lives inside
//! `GpuSystem::register_file` and is driven by PM4 packets from the CP on
//! the same host thread.
//!
//! The read/write closures capture `Arc<AtomicU32>` mailboxes cloned from
//! [`crate::GpuMmio`]; [`crate::GpuSystem::sync_with_mmio`] samples them
//! each scheduler round.
use std::sync::atomic::Ordering;
use xenia_memory::MmioRegion;
use crate::gpu_system::{reg, GpuMmio};
/// Xenos GPU register aperture base (guest physical address). Matches
/// canary's `graphics_system.cc:141`.
pub const APERTURE_BASE: u32 = 0x7FC8_0000;
/// Mask used by `MmioRegion::contains` so any `0x7FC8xxxx` address hits.
pub const APERTURE_MASK: u32 = 0xFFFF_0000;
/// Total aperture size in bytes (enough for the low 16-bit register window).
pub const APERTURE_SIZE: u32 = 0x0001_0000;
/// Build the [`MmioRegion`] to install on the guest memory.
pub fn build_region(mmio: &GpuMmio) -> MmioRegion {
let read_wptr = mmio.cp_rb_wptr.clone();
let read_rptr = mmio.cp_rb_rptr.clone();
let read_int_status = mmio.cp_int_status.clone();
let read_int_ack = mmio.cp_int_ack.clone();
let read_vblank_status = mmio.d1mode_vblank_vline_status.clone();
let write_wptr = mmio.cp_rb_wptr.clone();
let write_int_ack = mmio.cp_int_ack.clone();
let write_vblank_status = mmio.d1mode_vblank_vline_status.clone();
// M1.7 parker — captured into the WPTR write closure to wake a
// parked GPU worker on every guest WPTR write. In inline mode the
// mutex holds `None`, so the unpark site is a brief lock + no-op.
let wake_pending = mmio.wake_pending.clone();
let worker_thread = mmio.worker_thread.clone();
MmioRegion {
base_address: APERTURE_BASE,
mask: APERTURE_MASK,
size: APERTURE_SIZE,
read_callback: Box::new(move |addr: u32| {
let reg_index = (addr & 0xFFFF) / 4;
match reg_index {
reg::CP_RB_WPTR => read_wptr.load(Ordering::Relaxed),
reg::CP_RB_RPTR => read_rptr.load(Ordering::Relaxed),
reg::CP_INT_STATUS => read_int_status.load(Ordering::Relaxed),
// Games sometimes read-back the ack register to check interrupt ownership
// — serve the last-written value.
reg::CP_INT_ACK => read_int_ack.load(Ordering::Relaxed),
reg::D1MODE_VBLANK_VLINE_STATUS => {
read_vblank_status.load(Ordering::Relaxed)
}
_ => {
tracing::trace!(
reg = format_args!("{reg_index:#x}"),
addr = format_args!("{addr:#010x}"),
"gpu mmio: unmapped read (returning 0)"
);
0
}
}
}),
write_callback: Box::new(move |addr: u32, value: u32| {
let reg_index = (addr & 0xFFFF) / 4;
match reg_index {
reg::CP_RB_WPTR => {
// Release: any prior writes to ring memory the guest
// performed before bumping WPTR must be visible to
// the GPU consumer that Acquire-loads this atomic.
write_wptr.store(value, Ordering::Release);
// M1.7 parker wake: set the pending bit (Release) so
// a worker swapping it on its way to `park_timeout`
// sees `was_pending == true` and skips the park; AND
// unpark the worker if it's already parked. Both are
// necessary to defend against the race window between
// the worker's `swap(false)` and `park_timeout()`.
wake_pending.store(true, Ordering::Release);
if let Ok(g) = worker_thread.lock() {
if let Some(t) = g.as_ref() {
t.unpark();
}
}
tracing::trace!(
value,
addr = format_args!("{addr:#010x}"),
"gpu mmio: CP_RB_WPTR write"
);
}
// CP_INT_ACK clears interrupt bits; we just echo the value.
reg::CP_INT_ACK => {
write_int_ack.store(value, Ordering::Relaxed);
}
// D1MODE_VBLANK_VLINE_STATUS is write-1-to-clear per the
// AMD M56 display-controller ref. Clear any bit the guest
// writes a 1 to (leaving other bits untouched).
reg::D1MODE_VBLANK_VLINE_STATUS => {
let prev = write_vblank_status.load(Ordering::Relaxed);
write_vblank_status.store(prev & !value, Ordering::Relaxed);
}
_ => {
tracing::trace!(
reg = format_args!("{reg_index:#x}"),
addr = format_args!("{addr:#010x}"),
value = format_args!("{value:#x}"),
"gpu mmio: unmapped write (dropping)"
);
}
}
}),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn build() -> (GpuMmio, MmioRegion) {
let mmio = GpuMmio::new();
let region = build_region(&mmio);
(mmio, region)
}
/// `D1MODE_VBLANK_VLINE_STATUS` read must surface the atomic's current
/// value — Sylpheed's graphics-interrupt callback reads bit 0 to decide
/// whether vblank actually fired; if we always return 0 the callback
/// silently skips every frame's work.
#[test]
fn vblank_status_read_returns_stored_value() {
let (mmio, region) = build();
mmio.d1mode_vblank_vline_status
.store(0x1, Ordering::Relaxed);
let offset = APERTURE_BASE + reg::D1MODE_VBLANK_VLINE_STATUS * 4;
assert_eq!((region.read_callback)(offset), 0x1);
}
/// Guest clears the flag by writing 1 back. Classic write-1-to-clear —
/// AMD M56 display-controller ref and Canary's behavior. We preserve
/// unrelated bits so higher-bit status (VLINE_INT_OCCURRED etc.) can
/// coexist with a concurrent clear of bit 0.
#[test]
fn vblank_status_write_1_to_clear() {
let (mmio, region) = build();
mmio.d1mode_vblank_vline_status
.store(0b11, Ordering::Relaxed);
let offset = APERTURE_BASE + reg::D1MODE_VBLANK_VLINE_STATUS * 4;
(region.write_callback)(offset, 0b01);
assert_eq!(
mmio.d1mode_vblank_vline_status.load(Ordering::Relaxed),
0b10,
"bit 0 cleared, bit 1 preserved"
);
}
/// Write-0-to-a-bit must NOT clear that bit — classic W1TC semantics.
#[test]
fn vblank_status_write_0_is_noop() {
let (mmio, region) = build();
mmio.d1mode_vblank_vline_status
.store(0b11, Ordering::Relaxed);
let offset = APERTURE_BASE + reg::D1MODE_VBLANK_VLINE_STATUS * 4;
(region.write_callback)(offset, 0x0);
assert_eq!(
mmio.d1mode_vblank_vline_status.load(Ordering::Relaxed),
0b11
);
}
/// Regression: prior to the fix, `reg::CP_RB_WPTR` held a byte offset
/// (`0x0714`) while the match arm compared against a *register index*
/// (`(addr & 0xFFFF) / 4 == 0x01C5`). Guest MMIO writes to the WPTR
/// therefore fell through to "unmapped" and the atomic never moved;
/// only `VdInitializeRingBuffer` / `extend_write_ptr` paths worked.
///
/// Verify every CP register lands in its atomic when the guest writes
/// at the canonical `APERTURE_BASE + index*4` byte address.
#[test]
fn cp_rb_wptr_write_via_mmio_bus_reaches_atomic() {
let (mmio, region) = build();
let offset = APERTURE_BASE + reg::CP_RB_WPTR * 4;
assert_eq!(offset, 0x7FC8_0714, "byte offset must match Canary CP_RB_WPTR");
(region.write_callback)(offset, 0x1234_5678);
assert_eq!(mmio.cp_rb_wptr.load(Ordering::Relaxed), 0x1234_5678);
}
#[test]
fn cp_int_ack_write_via_mmio_bus_reaches_atomic() {
let (mmio, region) = build();
let offset = APERTURE_BASE + reg::CP_INT_ACK * 4;
assert_eq!(offset, 0x7FC8_07D0, "byte offset must match Canary CP_INT_ACK");
(region.write_callback)(offset, 0xDEAD_BEEF);
assert_eq!(mmio.cp_int_ack.load(Ordering::Relaxed), 0xDEAD_BEEF);
}
#[test]
fn cp_rb_rptr_read_via_mmio_bus_returns_atomic() {
let (mmio, region) = build();
mmio.cp_rb_rptr.store(0xCAFE_F00D, Ordering::Relaxed);
let offset = APERTURE_BASE + reg::CP_RB_RPTR * 4;
assert_eq!((region.read_callback)(offset), 0xCAFE_F00D);
}
#[test]
fn cp_int_status_read_via_mmio_bus_returns_atomic() {
let (mmio, region) = build();
mmio.cp_int_status.store(0x0000_0001, Ordering::Relaxed);
let offset = APERTURE_BASE + reg::CP_INT_STATUS * 4;
assert_eq!((region.read_callback)(offset), 0x0000_0001);
}
}