First real GPU implementation. Ring/PM4 frontend (ring_view,
ring_drain, pm4) drains the command processor; gpu_system owns the
threaded backend (DrainFence RPC + parker/fence helpers from M1) and
the MMIO-mapped register block (mmio_region).
Xenos shader frontend: ucode/{alu,control_flow,fetch,mod}.rs decode
the Xbox 360 microcode, translator.rs lowers it onto the WGSL
xenos_interp interpreter shader (shaders/xenos_interp.wgsl).
shader_metrics.rs counts decode/translate work.
Render state: draw_state, primitive, render_target_cache,
texture_cache, tiled_address (Xenos's swizzled tiled-memory layout),
xenos_constants (register field constants), edram (the 10 MiB EDRAM
model with MSAA), and resolve.rs (TILE_FLUSH copy-out — clear-resolve
plus bitwise-equivalent 32 bpp + 64 bpp paths landed). handle.rs
owns the typed GPU-resource handles the kernel hands out.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
218 lines
9.3 KiB
Rust
218 lines
9.3 KiB
Rust
//! Construct an `xenia_memory::MmioRegion` that backs the Xenos GPU register
|
|
//! aperture at guest physical `0x7FC80000` (per canary
|
|
//! `graphics_system.cc:141-144` — `memory_->AddVirtualMappedRange(0x7FC80000,
|
|
//! 0xFFFF0000, 0x0000FFFF, …)`).
|
|
//!
|
|
//! Only a handful of registers need a round-trip over the bus; everything
|
|
//! else (the ALU / fetch constants, the RBBM state machine, …) lives inside
|
|
//! `GpuSystem::register_file` and is driven by PM4 packets from the CP on
|
|
//! the same host thread.
|
|
//!
|
|
//! The read/write closures capture `Arc<AtomicU32>` mailboxes cloned from
|
|
//! [`crate::GpuMmio`]; [`crate::GpuSystem::sync_with_mmio`] samples them
|
|
//! each scheduler round.
|
|
|
|
use std::sync::atomic::Ordering;
|
|
|
|
use xenia_memory::MmioRegion;
|
|
|
|
use crate::gpu_system::{reg, GpuMmio};
|
|
|
|
/// Xenos GPU register aperture base (guest physical address). Matches
|
|
/// canary's `graphics_system.cc:141`.
|
|
pub const APERTURE_BASE: u32 = 0x7FC8_0000;
|
|
/// Mask used by `MmioRegion::contains` so any `0x7FC8xxxx` address hits.
|
|
pub const APERTURE_MASK: u32 = 0xFFFF_0000;
|
|
/// Total aperture size in bytes (enough for the low 16-bit register window).
|
|
pub const APERTURE_SIZE: u32 = 0x0001_0000;
|
|
|
|
/// Build the [`MmioRegion`] to install on the guest memory.
|
|
pub fn build_region(mmio: &GpuMmio) -> MmioRegion {
|
|
let read_wptr = mmio.cp_rb_wptr.clone();
|
|
let read_rptr = mmio.cp_rb_rptr.clone();
|
|
let read_int_status = mmio.cp_int_status.clone();
|
|
let read_int_ack = mmio.cp_int_ack.clone();
|
|
let read_vblank_status = mmio.d1mode_vblank_vline_status.clone();
|
|
let write_wptr = mmio.cp_rb_wptr.clone();
|
|
let write_int_ack = mmio.cp_int_ack.clone();
|
|
let write_vblank_status = mmio.d1mode_vblank_vline_status.clone();
|
|
// M1.7 parker — captured into the WPTR write closure to wake a
|
|
// parked GPU worker on every guest WPTR write. In inline mode the
|
|
// mutex holds `None`, so the unpark site is a brief lock + no-op.
|
|
let wake_pending = mmio.wake_pending.clone();
|
|
let worker_thread = mmio.worker_thread.clone();
|
|
|
|
MmioRegion {
|
|
base_address: APERTURE_BASE,
|
|
mask: APERTURE_MASK,
|
|
size: APERTURE_SIZE,
|
|
read_callback: Box::new(move |addr: u32| {
|
|
let reg_index = (addr & 0xFFFF) / 4;
|
|
match reg_index {
|
|
reg::CP_RB_WPTR => read_wptr.load(Ordering::Relaxed),
|
|
reg::CP_RB_RPTR => read_rptr.load(Ordering::Relaxed),
|
|
reg::CP_INT_STATUS => read_int_status.load(Ordering::Relaxed),
|
|
// Games sometimes read-back the ack register to check interrupt ownership
|
|
// — serve the last-written value.
|
|
reg::CP_INT_ACK => read_int_ack.load(Ordering::Relaxed),
|
|
reg::D1MODE_VBLANK_VLINE_STATUS => {
|
|
read_vblank_status.load(Ordering::Relaxed)
|
|
}
|
|
_ => {
|
|
tracing::trace!(
|
|
reg = format_args!("{reg_index:#x}"),
|
|
addr = format_args!("{addr:#010x}"),
|
|
"gpu mmio: unmapped read (returning 0)"
|
|
);
|
|
0
|
|
}
|
|
}
|
|
}),
|
|
write_callback: Box::new(move |addr: u32, value: u32| {
|
|
let reg_index = (addr & 0xFFFF) / 4;
|
|
match reg_index {
|
|
reg::CP_RB_WPTR => {
|
|
// Release: any prior writes to ring memory the guest
|
|
// performed before bumping WPTR must be visible to
|
|
// the GPU consumer that Acquire-loads this atomic.
|
|
write_wptr.store(value, Ordering::Release);
|
|
// M1.7 parker wake: set the pending bit (Release) so
|
|
// a worker swapping it on its way to `park_timeout`
|
|
// sees `was_pending == true` and skips the park; AND
|
|
// unpark the worker if it's already parked. Both are
|
|
// necessary to defend against the race window between
|
|
// the worker's `swap(false)` and `park_timeout()`.
|
|
wake_pending.store(true, Ordering::Release);
|
|
if let Ok(g) = worker_thread.lock() {
|
|
if let Some(t) = g.as_ref() {
|
|
t.unpark();
|
|
}
|
|
}
|
|
tracing::trace!(
|
|
value,
|
|
addr = format_args!("{addr:#010x}"),
|
|
"gpu mmio: CP_RB_WPTR write"
|
|
);
|
|
}
|
|
// CP_INT_ACK clears interrupt bits; we just echo the value.
|
|
reg::CP_INT_ACK => {
|
|
write_int_ack.store(value, Ordering::Relaxed);
|
|
}
|
|
// D1MODE_VBLANK_VLINE_STATUS is write-1-to-clear per the
|
|
// AMD M56 display-controller ref. Clear any bit the guest
|
|
// writes a 1 to (leaving other bits untouched).
|
|
reg::D1MODE_VBLANK_VLINE_STATUS => {
|
|
let prev = write_vblank_status.load(Ordering::Relaxed);
|
|
write_vblank_status.store(prev & !value, Ordering::Relaxed);
|
|
}
|
|
_ => {
|
|
tracing::trace!(
|
|
reg = format_args!("{reg_index:#x}"),
|
|
addr = format_args!("{addr:#010x}"),
|
|
value = format_args!("{value:#x}"),
|
|
"gpu mmio: unmapped write (dropping)"
|
|
);
|
|
}
|
|
}
|
|
}),
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn build() -> (GpuMmio, MmioRegion) {
|
|
let mmio = GpuMmio::new();
|
|
let region = build_region(&mmio);
|
|
(mmio, region)
|
|
}
|
|
|
|
/// `D1MODE_VBLANK_VLINE_STATUS` read must surface the atomic's current
|
|
/// value — Sylpheed's graphics-interrupt callback reads bit 0 to decide
|
|
/// whether vblank actually fired; if we always return 0 the callback
|
|
/// silently skips every frame's work.
|
|
#[test]
|
|
fn vblank_status_read_returns_stored_value() {
|
|
let (mmio, region) = build();
|
|
mmio.d1mode_vblank_vline_status
|
|
.store(0x1, Ordering::Relaxed);
|
|
let offset = APERTURE_BASE + reg::D1MODE_VBLANK_VLINE_STATUS * 4;
|
|
assert_eq!((region.read_callback)(offset), 0x1);
|
|
}
|
|
|
|
/// Guest clears the flag by writing 1 back. Classic write-1-to-clear —
|
|
/// AMD M56 display-controller ref and Canary's behavior. We preserve
|
|
/// unrelated bits so higher-bit status (VLINE_INT_OCCURRED etc.) can
|
|
/// coexist with a concurrent clear of bit 0.
|
|
#[test]
|
|
fn vblank_status_write_1_to_clear() {
|
|
let (mmio, region) = build();
|
|
mmio.d1mode_vblank_vline_status
|
|
.store(0b11, Ordering::Relaxed);
|
|
let offset = APERTURE_BASE + reg::D1MODE_VBLANK_VLINE_STATUS * 4;
|
|
(region.write_callback)(offset, 0b01);
|
|
assert_eq!(
|
|
mmio.d1mode_vblank_vline_status.load(Ordering::Relaxed),
|
|
0b10,
|
|
"bit 0 cleared, bit 1 preserved"
|
|
);
|
|
}
|
|
|
|
/// Write-0-to-a-bit must NOT clear that bit — classic W1TC semantics.
|
|
#[test]
|
|
fn vblank_status_write_0_is_noop() {
|
|
let (mmio, region) = build();
|
|
mmio.d1mode_vblank_vline_status
|
|
.store(0b11, Ordering::Relaxed);
|
|
let offset = APERTURE_BASE + reg::D1MODE_VBLANK_VLINE_STATUS * 4;
|
|
(region.write_callback)(offset, 0x0);
|
|
assert_eq!(
|
|
mmio.d1mode_vblank_vline_status.load(Ordering::Relaxed),
|
|
0b11
|
|
);
|
|
}
|
|
|
|
/// Regression: prior to the fix, `reg::CP_RB_WPTR` held a byte offset
|
|
/// (`0x0714`) while the match arm compared against a *register index*
|
|
/// (`(addr & 0xFFFF) / 4 == 0x01C5`). Guest MMIO writes to the WPTR
|
|
/// therefore fell through to "unmapped" and the atomic never moved;
|
|
/// only `VdInitializeRingBuffer` / `extend_write_ptr` paths worked.
|
|
///
|
|
/// Verify every CP register lands in its atomic when the guest writes
|
|
/// at the canonical `APERTURE_BASE + index*4` byte address.
|
|
#[test]
|
|
fn cp_rb_wptr_write_via_mmio_bus_reaches_atomic() {
|
|
let (mmio, region) = build();
|
|
let offset = APERTURE_BASE + reg::CP_RB_WPTR * 4;
|
|
assert_eq!(offset, 0x7FC8_0714, "byte offset must match Canary CP_RB_WPTR");
|
|
(region.write_callback)(offset, 0x1234_5678);
|
|
assert_eq!(mmio.cp_rb_wptr.load(Ordering::Relaxed), 0x1234_5678);
|
|
}
|
|
|
|
#[test]
|
|
fn cp_int_ack_write_via_mmio_bus_reaches_atomic() {
|
|
let (mmio, region) = build();
|
|
let offset = APERTURE_BASE + reg::CP_INT_ACK * 4;
|
|
assert_eq!(offset, 0x7FC8_07D0, "byte offset must match Canary CP_INT_ACK");
|
|
(region.write_callback)(offset, 0xDEAD_BEEF);
|
|
assert_eq!(mmio.cp_int_ack.load(Ordering::Relaxed), 0xDEAD_BEEF);
|
|
}
|
|
|
|
#[test]
|
|
fn cp_rb_rptr_read_via_mmio_bus_returns_atomic() {
|
|
let (mmio, region) = build();
|
|
mmio.cp_rb_rptr.store(0xCAFE_F00D, Ordering::Relaxed);
|
|
let offset = APERTURE_BASE + reg::CP_RB_RPTR * 4;
|
|
assert_eq!((region.read_callback)(offset), 0xCAFE_F00D);
|
|
}
|
|
|
|
#[test]
|
|
fn cp_int_status_read_via_mmio_bus_returns_atomic() {
|
|
let (mmio, region) = build();
|
|
mmio.cp_int_status.store(0x0000_0001, Ordering::Relaxed);
|
|
let offset = APERTURE_BASE + reg::CP_INT_STATUS * 4;
|
|
assert_eq!((region.read_callback)(offset), 0x0000_0001);
|
|
}
|
|
}
|