Compare commits
4 Commits
iterate-2U
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3f5d5cf5f7 | ||
|
|
2f55d1fd7d | ||
|
|
a91f4c550b | ||
|
|
66bd805726 |
@@ -2338,10 +2338,22 @@ fn coord_post_round(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if kernel.gpu.has_pending_interrupts() {
|
if kernel.gpu.has_pending_interrupts() {
|
||||||
for _pi in kernel.gpu.take_pending_interrupts() {
|
for pi in kernel.gpu.take_pending_interrupts() {
|
||||||
|
// Canary `ExecutePacketType3_INTERRUPT` dispatches the callback
|
||||||
|
// once per set bit of `cpu_mask` with that bit's index as the
|
||||||
|
// target CPU (`DispatchInterruptCallback(1, n)`). The guest's
|
||||||
|
// swap-acknowledge fence stores `cpu_mask`, and the ISR clears
|
||||||
|
// `1 << current_cpu` from it — so the ISR must run impersonating
|
||||||
|
// the masked CPU or the fence never reaches 0. Sylpheed uses a
|
||||||
|
// single-bit mask (`0x4` → CPU 2); take the lowest set bit.
|
||||||
|
let cpu = if pi.cpu_mask == 0 {
|
||||||
|
xenia_kernel::interrupts::VSYNC_TARGET_CPU
|
||||||
|
} else {
|
||||||
|
pi.cpu_mask.trailing_zeros().min(5) as u8
|
||||||
|
};
|
||||||
kernel
|
kernel
|
||||||
.interrupts
|
.interrupts
|
||||||
.queue_interrupt(xenia_kernel::INTERRUPT_SOURCE_CP);
|
.queue_interrupt(xenia_kernel::INTERRUPT_SOURCE_CP, cpu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3545,7 +3557,17 @@ fn dispatch_graphics_interrupts(
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// X_KPCR offset of `prcb_data.current_cpu` (canary `xthread.cc`
|
||||||
|
/// `SetActiveCpu` → `pcr.prcb_data.current_cpu`). The guest graphics
|
||||||
|
/// ISR reads it via `lbz r10, 268(r13)` to decide which per-CPU bit of
|
||||||
|
/// the swap-acknowledge fence to clear.
|
||||||
|
const PCR_CURRENT_CPU_OFF: u32 = 268;
|
||||||
|
|
||||||
while let Some(source) = kernel.interrupts.peek_next() {
|
while let Some(source) = kernel.interrupts.peek_next() {
|
||||||
|
let target_cpu = kernel
|
||||||
|
.interrupts
|
||||||
|
.peek_next_cpu()
|
||||||
|
.unwrap_or(xenia_kernel::interrupts::VSYNC_TARGET_CPU);
|
||||||
// Victim selection: Ready first, then Blocked (canary's
|
// Victim selection: Ready first, then Blocked (canary's
|
||||||
// `XThread::GetCurrentThread()` analog — any live thread will
|
// `XThread::GetCurrentThread()` analog — any live thread will
|
||||||
// do for borrowing context). Skip Idle/Exited/ServicingIrq.
|
// do for borrowing context). Skip Idle/Exited/ServicingIrq.
|
||||||
@@ -3615,6 +3637,19 @@ fn dispatch_graphics_interrupts(
|
|||||||
saved
|
saved
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Impersonate the interrupt's target CPU on the borrowed thread's
|
||||||
|
// PCR, mirroring canary `EmulateCPInterruptDPC` →
|
||||||
|
// `XThread::SetActiveCpu(cpu)`. The guest swap-complete ISR clears
|
||||||
|
// `1 << [pcr.current_cpu]` from the per-present swap-acknowledge
|
||||||
|
// fence; if it runs on the wrong CPU it clears the wrong bit and
|
||||||
|
// the GPU's trailing `WAIT_REG_MEM` on that fence never releases —
|
||||||
|
// stranding the present/title loop. Save/restore so borrowing a
|
||||||
|
// thread doesn't permanently rewrite its processor number.
|
||||||
|
let pcr_addr = (kernel.scheduler.ctx_mut_ref(target_ref).gpr[13] as u32)
|
||||||
|
.wrapping_add(PCR_CURRENT_CPU_OFF);
|
||||||
|
let saved_cpu = mem.read_u8(pcr_addr);
|
||||||
|
mem.write_u8(pcr_addr, target_cpu);
|
||||||
|
|
||||||
// Stash the previous `scheduler.current` (call_export reaches
|
// Stash the previous `scheduler.current` (call_export reaches
|
||||||
// it; imports the ISR calls must dispatch on the borrowed
|
// it; imports the ISR calls must dispatch on the borrowed
|
||||||
// thread). Restore on the way out.
|
// thread). Restore on the way out.
|
||||||
@@ -3707,6 +3742,7 @@ fn dispatch_graphics_interrupts(
|
|||||||
|
|
||||||
// Restore the borrowed context.
|
// Restore the borrowed context.
|
||||||
saved.restore(kernel.scheduler.ctx_mut_ref(target_ref));
|
saved.restore(kernel.scheduler.ctx_mut_ref(target_ref));
|
||||||
|
mem.write_u8(pcr_addr, saved_cpu);
|
||||||
kernel.scheduler.current = prev_current;
|
kernel.scheduler.current = prev_current;
|
||||||
kernel.interrupts.delivered += 1;
|
kernel.interrupts.delivered += 1;
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
{
|
{
|
||||||
"instructions": 50000014,
|
"instructions": 50000014,
|
||||||
"imports": 178937,
|
"imports": 352251,
|
||||||
"unimpl": 0,
|
"unimpl": 0,
|
||||||
"draws": 78,
|
"draws": 718,
|
||||||
"swaps": 4,
|
"swaps": 147,
|
||||||
"unique_render_targets": 2,
|
"unique_render_targets": 2,
|
||||||
"shader_blobs_live": 3,
|
"shader_blobs_live": 6,
|
||||||
"texture_cache_entries": 0
|
"texture_cache_entries": 0
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -78,6 +78,30 @@ pub fn physical_to_backing(addr: u32) -> u32 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Max guest page-version over the `[base, base+len)` span, walking 4 KiB
|
||||||
|
/// pages via the `MemoryAccess` trait's `page_version`.
|
||||||
|
///
|
||||||
|
/// The concrete heap exposes an inherent `max_page_version(base, len)`, but
|
||||||
|
/// the draw handler only holds `&dyn MemoryAccess` (which carries the coarser
|
||||||
|
/// `page_version(addr)` accessor). This is byte-equivalent to
|
||||||
|
/// `heap::max_page_version` and stays a pure function of the per-page write
|
||||||
|
/// counters (no wall-clock), so texture-decode timing remains deterministic.
|
||||||
|
fn span_max_version(mem: &dyn MemoryAccess, base: u32, len: u32) -> u64 {
|
||||||
|
const PAGE: u32 = 0x1000;
|
||||||
|
let last = base.saturating_add(len.saturating_sub(1));
|
||||||
|
let mut page = base & !(PAGE - 1);
|
||||||
|
let last_page = last & !(PAGE - 1);
|
||||||
|
let mut max = 0u64;
|
||||||
|
loop {
|
||||||
|
max = max.max(mem.page_version(page));
|
||||||
|
if page >= last_page {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
page = page.wrapping_add(PAGE);
|
||||||
|
}
|
||||||
|
max
|
||||||
|
}
|
||||||
|
|
||||||
/// Cached Xenos microcode blob, produced by `PM4_IM_LOAD*` packets.
|
/// Cached Xenos microcode blob, produced by `PM4_IM_LOAD*` packets.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct ShaderBlob {
|
pub struct ShaderBlob {
|
||||||
@@ -400,6 +424,12 @@ pub struct GpuSystem {
|
|||||||
/// on every texture-fetch resolution; the UI thread sees the decoded
|
/// on every texture-fetch resolution; the UI thread sees the decoded
|
||||||
/// bytes via `UiBridge::publish_texture`.
|
/// bytes via `UiBridge::publish_texture`.
|
||||||
pub texture_cache: crate::texture_cache::TextureCache,
|
pub texture_cache: crate::texture_cache::TextureCache,
|
||||||
|
/// P5b: textures decoded at the most recent `PM4_DRAW_INDX*`, keyed off
|
||||||
|
/// the *active* pixel shader's real `tfetch` fetch-constant slots (not a
|
||||||
|
/// hardcoded slot). `vd_swap` publishes the first of these to the UI so
|
||||||
|
/// the replay binds the texture the draw actually samples. Cleared and
|
||||||
|
/// repopulated each draw; empty when the active PS issues no `tfetch`.
|
||||||
|
pub last_draw_textures: Vec<(crate::texture_cache::TextureKey, Vec<u8>)>,
|
||||||
/// 10 MiB shadow of the Xenos EDRAM. Written by clear-resolves and
|
/// 10 MiB shadow of the Xenos EDRAM. Written by clear-resolves and
|
||||||
/// (future) host-render-target readback; read by the resolve byte-copy
|
/// (future) host-render-target readback; read by the resolve byte-copy
|
||||||
/// path that writes tiled pixels into guest memory. Allocated once at
|
/// path that writes tiled pixels into guest memory. Allocated once at
|
||||||
@@ -431,6 +461,7 @@ impl GpuSystem {
|
|||||||
rt_cache: crate::render_target_cache::RenderTargetCache::new(),
|
rt_cache: crate::render_target_cache::RenderTargetCache::new(),
|
||||||
last_resolve: None,
|
last_resolve: None,
|
||||||
texture_cache: crate::texture_cache::TextureCache::new(),
|
texture_cache: crate::texture_cache::TextureCache::new(),
|
||||||
|
last_draw_textures: Vec::new(),
|
||||||
edram: crate::edram::ShadowEdram::new(),
|
edram: crate::edram::ShadowEdram::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1265,6 +1296,60 @@ impl GpuSystem {
|
|||||||
);
|
);
|
||||||
self.last_draw = Some(ds);
|
self.last_draw = Some(ds);
|
||||||
self.last_primitive = Some(processed);
|
self.last_primitive = Some(processed);
|
||||||
|
|
||||||
|
// P5b: decode the textures the *active pixel shader* actually
|
||||||
|
// samples. Parse the bound PS, collect its `tfetch`
|
||||||
|
// fetch-constant slots, read each 6-dword fetch constant from
|
||||||
|
// the register file, and decode+cache it. `vd_swap` publishes
|
||||||
|
// the result. Empty for flat (no-tfetch) shaders — the
|
||||||
|
// dominant case on Sylpheed's current splash, where this stays
|
||||||
|
// inert until the textured logo draw is reached.
|
||||||
|
self.last_draw_textures.clear();
|
||||||
|
if let Some(ps_key) = self.active_ps_key {
|
||||||
|
// Collect slots under an immutable borrow of `shader_blobs`,
|
||||||
|
// then drop it before mutating `texture_cache`.
|
||||||
|
let slots: Vec<u8> = match self.shader_blobs.get(&ps_key) {
|
||||||
|
Some(blob) => {
|
||||||
|
let parsed = crate::ucode::parse_shader(&blob.dwords);
|
||||||
|
crate::shader_metrics::tfetch_slots(&parsed)
|
||||||
|
}
|
||||||
|
None => Vec::new(),
|
||||||
|
};
|
||||||
|
for slot in slots {
|
||||||
|
let mut fetch6 = [0u32; 6];
|
||||||
|
for (k, w) in fetch6.iter_mut().enumerate() {
|
||||||
|
*w = self
|
||||||
|
.register_file
|
||||||
|
.read(CONST_BASE_FETCH + slot as u32 * 6 + k as u32);
|
||||||
|
}
|
||||||
|
let Some(key) = crate::texture_cache::decode_fetch_constant(fetch6) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let bi = key.format.block_info();
|
||||||
|
let span_bytes = (key.pitch_texels as u32)
|
||||||
|
* (key.height as u32)
|
||||||
|
* (bi.bytes_per_block as u32)
|
||||||
|
/ (bi.block_w as u32);
|
||||||
|
let version = span_max_version(mem, key.base_address, span_bytes.max(4));
|
||||||
|
match self.texture_cache.ensure_cached(key, version, mem) {
|
||||||
|
Ok(entry) => {
|
||||||
|
self.last_draw_textures.push((entry.key, entry.bytes.clone()));
|
||||||
|
metrics::counter!(
|
||||||
|
"gpu.texture.decode",
|
||||||
|
"fmt" => format!("{:?}", key.format),
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
metrics::counter!(
|
||||||
|
"gpu.texture.reject",
|
||||||
|
"reason" => format!("{e:?}"),
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pm4::PM4_SET_CONSTANT | pm4::PM4_SET_SHADER_CONSTANTS => {
|
pm4::PM4_SET_CONSTANT | pm4::PM4_SET_SHADER_CONSTANTS => {
|
||||||
// payload[0] = offset_type — bits[10:0] index, bits[23:16] type
|
// payload[0] = offset_type — bits[10:0] index, bits[23:16] type
|
||||||
@@ -1544,6 +1629,15 @@ pub mod reg {
|
|||||||
/// `XE_GPU_REG_D1MODE_VBLANK_VLINE_STATUS` (Canary register_table.inc:1126).
|
/// `XE_GPU_REG_D1MODE_VBLANK_VLINE_STATUS` (Canary register_table.inc:1126).
|
||||||
/// Bit 0 = VBLANK_INT_OCCURRED.
|
/// Bit 0 = VBLANK_INT_OCCURRED.
|
||||||
pub const D1MODE_VBLANK_VLINE_STATUS: u32 = 0x1951;
|
pub const D1MODE_VBLANK_VLINE_STATUS: u32 = 0x1951;
|
||||||
|
/// `XE_GPU_REG_D1MODE_VIEWPORT_SIZE` / `AVIVO_D1MODE_VIEWPORT_SIZE`
|
||||||
|
/// (Canary `register_table.inc:1134`). Packs the active display resolution
|
||||||
|
/// as `(width << 16) | height` with 12-bit fields. The guest's
|
||||||
|
/// swap-complete interrupt callback (`sub_824CE2B8`) divides by the low
|
||||||
|
/// 12 bits (`height`) as a refresh-pacing term, so a 0 read makes its
|
||||||
|
/// `twi` divide-by-zero guard trap and abort the ISR before it clears the
|
||||||
|
/// swap-acknowledge fence. Canary returns the constant below from
|
||||||
|
/// `GraphicsSystem::ReadRegister` (graphics_system.cc:311).
|
||||||
|
pub const D1MODE_VIEWPORT_SIZE: u32 = 0x1961;
|
||||||
/// `XE_GPU_REG_VGT_EVENT_INITIATOR` — set by EVENT_WRITE.
|
/// `XE_GPU_REG_VGT_EVENT_INITIATOR` — set by EVENT_WRITE.
|
||||||
pub const VGT_EVENT_INITIATOR: u32 = 0x21F9;
|
pub const VGT_EVENT_INITIATOR: u32 = 0x21F9;
|
||||||
/// `XE_GPU_REG_COHER_STATUS_HOST` — coherency bits
|
/// `XE_GPU_REG_COHER_STATUS_HOST` — coherency bits
|
||||||
|
|||||||
@@ -58,6 +58,15 @@ pub fn build_region(mmio: &GpuMmio) -> MmioRegion {
|
|||||||
reg::D1MODE_VBLANK_VLINE_STATUS => {
|
reg::D1MODE_VBLANK_VLINE_STATUS => {
|
||||||
read_vblank_status.load(Ordering::Relaxed)
|
read_vblank_status.load(Ordering::Relaxed)
|
||||||
}
|
}
|
||||||
|
// AVIVO_D1MODE_VIEWPORT_SIZE: the active display resolution
|
||||||
|
// (1280x720) packed as `(width << 16) | height`. Canary
|
||||||
|
// serves this constant from `GraphicsSystem::ReadRegister`
|
||||||
|
// (graphics_system.cc:311). The guest swap-complete interrupt
|
||||||
|
// callback divides by the low 12 bits (`height = 0x2D0`); a 0
|
||||||
|
// read trips its `twi` divide-guard and aborts the ISR before
|
||||||
|
// it acknowledges the per-present swap fence — which strands
|
||||||
|
// the present/title loop. Mirror canary exactly.
|
||||||
|
reg::D1MODE_VIEWPORT_SIZE => 0x0500_02D0,
|
||||||
_ => {
|
_ => {
|
||||||
tracing::trace!(
|
tracing::trace!(
|
||||||
reg = format_args!("{reg_index:#x}"),
|
reg = format_args!("{reg_index:#x}"),
|
||||||
|
|||||||
@@ -5,9 +5,8 @@
|
|||||||
//! rectangles) we rewrite indices on the CPU side so the host just sees a
|
//! rectangles) we rewrite indices on the CPU side so the host just sees a
|
||||||
//! triangle list. Ground truth: `xenia-canary/src/xenia/gpu/primitive_processor.h/cc`.
|
//! triangle list. Ground truth: `xenia-canary/src/xenia/gpu/primitive_processor.h/cc`.
|
||||||
//!
|
//!
|
||||||
//! P3 scope: only the shapes Sylpheed's UI + early gameplay paths need
|
//! Scope: list, strip, fan, quad, and rectangle expansions are all handled
|
||||||
//! (list, strip, fan). Rectangle + quad expansions are stubs logged via
|
//! (rectangles via CPU triangle-list rewrite — see `expand_rectangles`).
|
||||||
//! `tracing::warn!` for later.
|
|
||||||
|
|
||||||
use crate::draw_state::{IndexSize, PrimitiveType};
|
use crate::draw_state::{IndexSize, PrimitiveType};
|
||||||
|
|
||||||
@@ -138,18 +137,43 @@ fn expand_quads(indices: Option<&[u32]>, vertex_count: u32) -> ProcessedPrimitiv
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Rectangle lists: a Xenos-specific primitive where each group of 3
|
/// Rectangle lists: a Xenos-specific primitive where each group of 3
|
||||||
/// vertices defines a right-angle rectangle by its three non-repeated
|
/// vertices defines a rectangle; the 4th corner is extrapolated as
|
||||||
/// corners (the 4th is derived). The uber-shader doesn't support this yet;
|
/// `v3 = v0 + v2 - v1` (parallelogram completion). Canary expands this in a
|
||||||
/// the ucode translator will emulate it as a geometry-stage fake. For P3
|
/// host vertex-shader variant (`kRectangleListAsTriangleStrip`,
|
||||||
/// we emit an empty draw.
|
/// `primitive_processor.cc:389-456`): a 4-vertex triangle strip per rect with
|
||||||
fn expand_rectangles(_indices: Option<&[u32]>, _vertex_count: u32) -> ProcessedPrimitive {
|
/// the 4th corner synthesized *in the VS* from the host-vertex index.
|
||||||
tracing::warn!("gpu: rectangle list primitive not yet implemented (P3 stub)");
|
///
|
||||||
metrics::counter!("gpu.primitive.rejected", "reason" => "rectangle_list").increment(1);
|
/// Our replay pipeline has no host-VS corner synthesis (and the procedural
|
||||||
|
/// `vs_main` does not consume `rewritten_indices` yet), so we mirror the
|
||||||
|
/// `expand_quads`/`expand_fan` CPU idiom and emit the 3 real vertices of each
|
||||||
|
/// rect as one triangle list `(v0,v1,v2)` — the visible lower half of the
|
||||||
|
/// rect. This un-rejects the draw and gives a faithful `host_vertex_count`.
|
||||||
|
///
|
||||||
|
/// TODO: once `vs_main` does real vertex fetch + interpolation, upgrade to the
|
||||||
|
/// full quad — 6 indices `[v0,v1,v2, v2,v1,v3]` with a synthesized `v3` corner
|
||||||
|
/// — mirroring canary's `kRectangleListAsTriangleStrip`.
|
||||||
|
fn expand_rectangles(indices: Option<&[u32]>, vertex_count: u32) -> ProcessedPrimitive {
|
||||||
|
let rect_count = vertex_count / 3;
|
||||||
|
let mut out = Vec::with_capacity(3 * rect_count as usize);
|
||||||
|
let get = |i: u32| -> u32 {
|
||||||
|
match indices {
|
||||||
|
Some(buf) => buf[i as usize],
|
||||||
|
None => i,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for r in 0..rect_count {
|
||||||
|
let base = r * 3;
|
||||||
|
out.push(get(base));
|
||||||
|
out.push(get(base + 1));
|
||||||
|
out.push(get(base + 2));
|
||||||
|
}
|
||||||
|
let host_vertex_count = out.len() as u32;
|
||||||
|
metrics::counter!("gpu.primitive.expanded", "shape" => "rectangle_list").increment(1);
|
||||||
ProcessedPrimitive {
|
ProcessedPrimitive {
|
||||||
topology: HostTopology::TriangleList,
|
topology: HostTopology::TriangleList,
|
||||||
rewritten_indices: Some(Vec::new()),
|
rewritten_indices: Some(out),
|
||||||
host_vertex_count: 0,
|
host_vertex_count,
|
||||||
rejected: true,
|
rejected: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -213,6 +237,17 @@ mod tests {
|
|||||||
assert_eq!(idx, vec![0, 1, 2, 0, 2, 3, 4, 5, 6, 4, 6, 7]);
|
assert_eq!(idx, vec![0, 1, 2, 0, 2, 3, 4, 5, 6, 4, 6, 7]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rectangle_list_expansion() {
|
||||||
|
// 2 rects (6 verts) → one triangle (v0,v1,v2) per rect, not rejected.
|
||||||
|
let p = process(PrimitiveType::RectangleList, 6, None);
|
||||||
|
let idx = p.rewritten_indices.unwrap();
|
||||||
|
assert_eq!(idx, vec![0, 1, 2, 3, 4, 5]);
|
||||||
|
assert_eq!(p.topology, HostTopology::TriangleList);
|
||||||
|
assert_eq!(p.host_vertex_count, 6);
|
||||||
|
assert!(!p.rejected);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn widen_u16_indices_big_endian() {
|
fn widen_u16_indices_big_endian() {
|
||||||
// 3 indices [1, 2, 0x1234] in BE u16.
|
// 3 indices [1, 2, 0x1234] in BE u16.
|
||||||
|
|||||||
@@ -174,6 +174,49 @@ pub fn emit_for(parsed: &ParsedShader, stage: &'static str) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Collect the unique texture-fetch-constant slot indices a shader samples.
|
||||||
|
///
|
||||||
|
/// Walks the same exec-clause / sequence-bitmap path as [`emit_for`] but only
|
||||||
|
/// extracts `TextureFetch.fetch_const` slots, deduplicated and in first-seen
|
||||||
|
/// order. The GPU draw handler uses this to decide which fetch constants to
|
||||||
|
/// decode + cache at draw time (keyed off the *active* pixel shader's real
|
||||||
|
/// `tfetch` instructions rather than a hardcoded slot).
|
||||||
|
pub fn tfetch_slots(parsed: &ParsedShader) -> Vec<u8> {
|
||||||
|
let mut slots: Vec<u8> = Vec::new();
|
||||||
|
for clause in &parsed.cf {
|
||||||
|
if let ControlFlowInstruction::Exec {
|
||||||
|
address,
|
||||||
|
count,
|
||||||
|
sequence,
|
||||||
|
..
|
||||||
|
} = clause
|
||||||
|
{
|
||||||
|
for i in 0..(*count as usize) {
|
||||||
|
let base = (*address as usize + i) * 3;
|
||||||
|
if base + 2 >= parsed.instructions.len() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// sequence bit layout: 2 bits per triple, hi bit = is-fetch.
|
||||||
|
let is_fetch = ((sequence >> (i * 2 + 1)) & 1) != 0;
|
||||||
|
if !is_fetch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let words = [
|
||||||
|
parsed.instructions[base],
|
||||||
|
parsed.instructions[base + 1],
|
||||||
|
parsed.instructions[base + 2],
|
||||||
|
];
|
||||||
|
if let FetchInstruction::Texture(tf) = decode_fetch(words) {
|
||||||
|
if !slots.contains(&tf.fetch_const) {
|
||||||
|
slots.push(tf.fetch_const);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slots
|
||||||
|
}
|
||||||
|
|
||||||
fn mark_feature(buf: &mut Vec<&'static str>, name: &'static str) {
|
fn mark_feature(buf: &mut Vec<&'static str>, name: &'static str) {
|
||||||
if !buf.contains(&name) {
|
if !buf.contains(&name) {
|
||||||
buf.push(name);
|
buf.push(name);
|
||||||
@@ -298,6 +341,46 @@ mod tests {
|
|||||||
emit_for(&shader, "vs");
|
emit_for(&shader, "vs");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `tfetch_slots` should extract the fetch-constant slot of a texture
|
||||||
|
/// fetch (and dedup), and return empty for a flat ALU-only shader.
|
||||||
|
#[test]
|
||||||
|
fn tfetch_slots_extracts_texture_fetch_constants() {
|
||||||
|
// word0: opcode TEXTURE_FETCH (0x01) in low 5 bits, fetch_const=3 in
|
||||||
|
// bits[9:5] → 0x01 | (3 << 5) = 0x61.
|
||||||
|
let tfetch_w0: u32 = 0x01 | (3u32 << 5);
|
||||||
|
let shader = ParsedShader {
|
||||||
|
cf: vec![
|
||||||
|
ControlFlowInstruction::Exec {
|
||||||
|
address: 0,
|
||||||
|
count: 2,
|
||||||
|
// triple 0 is a fetch (hi bit of its 2-bit field set),
|
||||||
|
// triple 1 is ALU. is_fetch = (sequence >> (i*2+1)) & 1.
|
||||||
|
sequence: 0b00_10,
|
||||||
|
is_end: false,
|
||||||
|
predicated: false,
|
||||||
|
predicate_condition: false,
|
||||||
|
},
|
||||||
|
ControlFlowInstruction::Exit,
|
||||||
|
],
|
||||||
|
instructions: vec![tfetch_w0, 0, 0, /* ALU triple */ 0, 0, 0],
|
||||||
|
};
|
||||||
|
assert_eq!(tfetch_slots(&shader), vec![3]);
|
||||||
|
|
||||||
|
// Flat shader: no fetch bits → no slots.
|
||||||
|
let flat = ParsedShader {
|
||||||
|
cf: vec![ControlFlowInstruction::Exec {
|
||||||
|
address: 0,
|
||||||
|
count: 1,
|
||||||
|
sequence: 0,
|
||||||
|
is_end: false,
|
||||||
|
predicated: false,
|
||||||
|
predicate_condition: false,
|
||||||
|
}],
|
||||||
|
instructions: vec![0, 0, 0],
|
||||||
|
};
|
||||||
|
assert!(tfetch_slots(&flat).is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
/// P8: a shader containing `LoopStart` should mark `cf_loop` as used
|
/// P8: a shader containing `LoopStart` should mark `cf_loop` as used
|
||||||
/// so the HUD can surface which deferred feature a game triggers.
|
/// so the HUD can surface which deferred feature a game triggers.
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -1652,6 +1652,79 @@ fn nt_set_information_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// XFileRenameInformation (10): move the backing file to a new path.
|
||||||
|
// Sylpheed's asset-cache decompresses each packed resource to a staging
|
||||||
|
// `cache:\<hash><tail>.tmp` then renames it into its final nested path
|
||||||
|
// `cache:\<hash>\<dir>\<file>`. Without an actual host-FS rename the
|
||||||
|
// nested target stays empty, the later read-back of the decompressed
|
||||||
|
// asset (e.g. the title logo texture `\69d8e45c\e\534ffea`) misses, and
|
||||||
|
// the logo never loads. Mirror canary `xboxkrnl_io_info.cc:226`
|
||||||
|
// (`X_FILE_RENAME_INFORMATION{ replace_existing@0, root_dir_handle@4,
|
||||||
|
// ansi_string@8 }` → `file->Rename(TranslateAnsiPath(ansi_string))`).
|
||||||
|
if info_class == 10 {
|
||||||
|
// Read the target path from the embedded ANSI_STRING at info_ptr+8.
|
||||||
|
let target_raw = match crate::path::read_ansi_string(mem, info_ptr + 8) {
|
||||||
|
Some(s) if !s.is_empty() => s,
|
||||||
|
_ => {
|
||||||
|
const STATUS_OBJECT_NAME_INVALID: u64 = 0xC000_0033;
|
||||||
|
ctx.gpr[3] = STATUS_OBJECT_NAME_INVALID;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// Resolve the destination against the host cache backing dir. We only
|
||||||
|
// support renames within the writable `cache:` mount (the only place
|
||||||
|
// a guest can create files); disc/synth entries are read-only.
|
||||||
|
let new_host = state.resolve_cache_path(&target_raw);
|
||||||
|
// Current backing host path of the handle.
|
||||||
|
let old_host = match state.objects.get(&handle) {
|
||||||
|
Some(KernelObject::File { host_path: Some(hp), .. }) => Some(hp.clone()),
|
||||||
|
Some(KernelObject::File { .. }) => None,
|
||||||
|
_ => {
|
||||||
|
ctx.gpr[3] = STATUS_INVALID_HANDLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let status: u64 = match (old_host, new_host) {
|
||||||
|
(Some(old), Some(new)) => {
|
||||||
|
if let Some(parent) = new.parent() {
|
||||||
|
let _ = std::fs::create_dir_all(parent);
|
||||||
|
}
|
||||||
|
match std::fs::rename(&old, &new) {
|
||||||
|
Ok(()) => {
|
||||||
|
// Update the handle so subsequent I/O targets the new
|
||||||
|
// host path + guest path.
|
||||||
|
if let Some(KernelObject::File { path, host_path, .. }) =
|
||||||
|
state.objects.get_mut(&handle)
|
||||||
|
{
|
||||||
|
*path = crate::path::normalize_path(&target_raw);
|
||||||
|
*host_path = Some(new.clone());
|
||||||
|
}
|
||||||
|
tracing::info!(
|
||||||
|
"NtSetInformationFile rename cache {:?} -> {:?} ({:?})",
|
||||||
|
old, new, target_raw
|
||||||
|
);
|
||||||
|
STATUS_SUCCESS
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(
|
||||||
|
"NtSetInformationFile rename {:?} -> {:?} failed: {}",
|
||||||
|
old, new, e
|
||||||
|
);
|
||||||
|
STATUS_UNSUCCESSFUL
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Non-cache (read-only VFS) source/target: acknowledge without a
|
||||||
|
// host move, matching the prior permissive behaviour.
|
||||||
|
_ => STATUS_SUCCESS,
|
||||||
|
};
|
||||||
|
if iosb_ptr != 0 {
|
||||||
|
write_io_status_block(mem, iosb_ptr, status as u32, info_length);
|
||||||
|
}
|
||||||
|
ctx.gpr[3] = status;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Handle lookup.
|
// Handle lookup.
|
||||||
let Some(KernelObject::File { size, position, host_path, .. }) = state.objects.get_mut(&handle) else {
|
let Some(KernelObject::File { size, position, host_path, .. }) = state.objects.get_mut(&handle) else {
|
||||||
ctx.gpr[3] = STATUS_INVALID_HANDLE;
|
ctx.gpr[3] = STATUS_INVALID_HANDLE;
|
||||||
@@ -2999,24 +3072,25 @@ fn vd_swap(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) {
|
|||||||
// xboxkrnl_video.cc:479. Currently skipped (see below).
|
// xboxkrnl_video.cc:479. Currently skipped (see below).
|
||||||
let _ = fetch_dwords; // silence unused — will be live again under the deferred path
|
let _ = fetch_dwords; // silence unused — will be live again under the deferred path
|
||||||
|
|
||||||
// iterate-2T: mirror xenia-canary `VdSwap_entry` (xboxkrnl_video.cc:518-548)
|
// iterate-2V: mirror xenia-canary `VdSwap_entry` (xboxkrnl_video.cc:518-548)
|
||||||
// FAITHFULLY. The game reserves 64 dwords (256 bytes) in the primary ring
|
// FAITHFULLY. The game reserves 64 dwords (256 bytes) in the primary ring
|
||||||
// at `buffer_ptr`; canary writes a `PM4_TYPE0(SHADER_CONSTANT_FETCH_00_0)`
|
// at `buffer_ptr`; canary writes a `PM4_TYPE0(SHADER_CONSTANT_FETCH_00_0)`
|
||||||
// fetch-constant patch followed by `PM4_TYPE3(PM4_XE_SWAP)`, then pads with
|
// fetch-constant patch followed by `PM4_TYPE3(PM4_XE_SWAP)`, then pads with
|
||||||
// NOPs. We do the same, then bump WPTR by 64 so the drain consumes the
|
// NOPs — and **NEVER touches `CP_RB_WPTR`**. The game advances the primary
|
||||||
// PM4_XE_SWAP **in command-stream order** — i.e. AFTER any in-stream
|
// ring write-pointer itself via its own doorbell once it has finished
|
||||||
// callback-arming Type-0 writes the game already queued.
|
// populating the reserved slot, so VdSwap only fills the bytes.
|
||||||
//
|
//
|
||||||
// Why this matters (the iterate-2T root): the previous M2b short-circuit
|
// iterate-2V FIX (the bug this removes): a prior revision bumped the
|
||||||
// called `notify_xe_swap` directly from the HLE, which synthesized a CP
|
// primary ring `CP_RB_WPTR` out-of-band here (`extend_write_ptr_by(64)`).
|
||||||
// swap-complete interrupt OUT OF BAND. When that interrupt reached the
|
// But `buffer_ptr` (~0x4add6efc) is NOT inside the primary ring (base
|
||||||
// graphics ISR (`sub_824BE9A0`) before D3D had armed its swap-callback
|
// ~0x4adcd000, 8192 dwords) — it lives ~10k dwords past it, in the
|
||||||
// slot (`[gfx+10772]+16` still the `0xBADF00D` placeholder), the ISR hit
|
// renderer indirect-buffer region. The bogus WPTR bump pushed the GPU
|
||||||
// its "ERR[D3D]: Unanticipated CPU_INTERRUPT. Sign of a corrupt command
|
// read-pointer PAST the guest's real write-pointer, the drain treated the
|
||||||
// buffer?" assert (`twi` at 0x824BE9DC). Routing the swap through the ring
|
// overshoot as a circular wrap, and **re-executed the splash's draw
|
||||||
// packet keeps the interrupt naturally ordered after arming, matching
|
// indirect-buffers ~2×** — inflating draws to 78 (real splash ≈ 28; 12
|
||||||
// canary (whose VdSwap raises NO interrupt itself; swap-complete CP
|
// INDIRECT_BUFFERs vs the real 6). Canary's `VdSwap_entry` writes the
|
||||||
// interrupts come only from in-stream `PM4_INTERRUPT` packets).
|
// block and returns; the swap-complete CP interrupt comes only from the
|
||||||
|
// game's own in-stream `PM4_INTERRUPT` packets, never from VdSwap.
|
||||||
if buffer_ptr != 0 {
|
if buffer_ptr != 0 {
|
||||||
let mut off = 0u32;
|
let mut off = 0u32;
|
||||||
let mut put = |i: &mut u32, v: u32| {
|
let mut put = |i: &mut u32, v: u32| {
|
||||||
@@ -3052,12 +3126,15 @@ fn vd_swap(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) {
|
|||||||
put(&mut off, xenia_gpu::pm4::make_packet_type2());
|
put(&mut off, xenia_gpu::pm4::make_packet_type2());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
state.gpu.extend_write_ptr_by(64);
|
// NOTE: We deliberately do NOT bump `CP_RB_WPTR` here (see the iterate-2V
|
||||||
|
// comment above). The drain below consumes only the packets the game has
|
||||||
|
// legitimately advanced the write-pointer over.
|
||||||
|
|
||||||
// Drain the ring; the PM4_XE_SWAP we just queued (and any in-stream
|
// Drain the ring up to whatever the game has actually submitted; any
|
||||||
// PM4_INTERRUPT) executes in order. The PM4_XE_SWAP handler calls
|
// in-stream `PM4_INTERRUPT` / draw packets execute in order. The
|
||||||
// `notify_xe_swap` for host swap bookkeeping; no synthetic interrupt is
|
// reserved-slot PM4_XE_SWAP is consumed by the GPU only once the game
|
||||||
// raised (see `notify_xe_swap`).
|
// advances its own doorbell over it. The swap-counter safety net below
|
||||||
|
// keeps host swap bookkeeping live in the meantime.
|
||||||
let drained = state.gpu.drain_to_current_wptr(mem);
|
let drained = state.gpu.drain_to_current_wptr(mem);
|
||||||
tracing::debug!(drained, "VdSwap: drained PM4 packets");
|
tracing::debug!(drained, "VdSwap: drained PM4 packets");
|
||||||
|
|
||||||
@@ -3112,27 +3189,27 @@ fn vd_swap(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) {
|
|||||||
);
|
);
|
||||||
ui.publish_assets(blobs, constants);
|
ui.publish_assets(blobs, constants);
|
||||||
|
|
||||||
// P5: try to decode the primary texture (fetch constant slot 0).
|
// P5b: publish the texture the last draw's *active pixel shader*
|
||||||
// Slot 0 is the convention most games use for their main bound
|
// actually sampled. The GPU draw handler decodes the PS's real
|
||||||
// texture at draw time; full N-slot binding waits for P6+. If the
|
// `tfetch` fetch-constant slots into `last_draw_textures`; we publish
|
||||||
// slot is unset or the format isn't supported (magenta stub kicks
|
// the first (the UI binds a single texture today). When the last draw
|
||||||
// in host-side), we skip.
|
// used a flat (no-tfetch) shader the list is empty, so we fall back to
|
||||||
//
|
// the legacy slot-0 probe to preserve behavior on flat-only frames.
|
||||||
// Texture fetch constants live at `CONST_BASE_FETCH + slot*6` in
|
let published = gpu_inline.last_draw_textures.first().cloned().or_else(|| {
|
||||||
// the register file; we read the 6 dwords, decode the key, hit
|
// Fallback: probe fetch constant slot 0 directly. Texture fetch
|
||||||
// the CPU cache (with page-version freshness), and clone the
|
// constants live at `CONST_BASE_FETCH + slot*6` in the register
|
||||||
// decoded bytes across the bridge.
|
// file; read 6 dwords, decode the key, hit the CPU cache with
|
||||||
const TEX_SLOT: u32 = 0;
|
// page-version freshness, clone the bytes across the bridge.
|
||||||
let mut fetch6 = [0u32; 6];
|
const TEX_SLOT: u32 = 0;
|
||||||
for (i, slot) in fetch6.iter_mut().enumerate() {
|
let mut fetch6 = [0u32; 6];
|
||||||
*slot = gpu_inline
|
for (i, slot) in fetch6.iter_mut().enumerate() {
|
||||||
.register_file
|
*slot = gpu_inline
|
||||||
.read(xenia_gpu::gpu_system::CONST_BASE_FETCH + TEX_SLOT * 6 + i as u32);
|
.register_file
|
||||||
}
|
.read(xenia_gpu::gpu_system::CONST_BASE_FETCH + TEX_SLOT * 6 + i as u32);
|
||||||
let published = if let Some(key) = xenia_gpu::texture_cache::decode_fetch_constant(fetch6)
|
}
|
||||||
{
|
let key = xenia_gpu::texture_cache::decode_fetch_constant(fetch6)?;
|
||||||
// Span over the entire tiled texture footprint to pick the
|
// Span over the entire tiled texture footprint to pick the max
|
||||||
// max page version covering it.
|
// page version covering it.
|
||||||
let bi = key.format.block_info();
|
let bi = key.format.block_info();
|
||||||
let span_bytes = (key.pitch_texels as u32)
|
let span_bytes = (key.pitch_texels as u32)
|
||||||
* (key.height as u32)
|
* (key.height as u32)
|
||||||
@@ -3150,9 +3227,7 @@ fn vd_swap(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
});
|
||||||
None
|
|
||||||
};
|
|
||||||
metrics::gauge!("gpu.texture_cache.entries")
|
metrics::gauge!("gpu.texture_cache.entries")
|
||||||
.set(gpu_inline.texture_cache.len() as f64);
|
.set(gpu_inline.texture_cache.len() as f64);
|
||||||
ui.publish_texture(published);
|
ui.publish_texture(published);
|
||||||
@@ -5579,6 +5654,67 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `NtSetInformationFile` class 10 (`XFileRenameInformation`) must move
|
||||||
|
/// the backing host file to the new `cache:` path and update the handle.
|
||||||
|
/// Mirrors Sylpheed's asset-cache `.tmp` → `\<hash>\<dir>\<file>` move;
|
||||||
|
/// without it the nested target stays empty and the decompressed asset
|
||||||
|
/// (logo texture) never reads back. Faithful to canary `file->Rename`.
|
||||||
|
#[test]
|
||||||
|
fn nt_set_information_file_rename_moves_cache_file() {
|
||||||
|
let (mut ctx, mut mem, mut state) = fresh();
|
||||||
|
// Real temp cache root + a staging `.tmp` file with known bytes.
|
||||||
|
let root = std::env::temp_dir().join(format!("xenia-rs-rename-test-{}", std::process::id()));
|
||||||
|
let _ = std::fs::remove_dir_all(&root);
|
||||||
|
std::fs::create_dir_all(&root).unwrap();
|
||||||
|
let old_host = root.join("69d8e45ce534ffea.tmp");
|
||||||
|
std::fs::write(&old_host, b"LOGOTEX!").unwrap();
|
||||||
|
state.cache_root = Some(root.clone());
|
||||||
|
// Open handle whose backing host_path is the staging file.
|
||||||
|
let handle = state.alloc_handle_for(KernelObject::File {
|
||||||
|
path: "69d8e45ce534ffea.tmp".to_string(),
|
||||||
|
size: 8,
|
||||||
|
position: 0,
|
||||||
|
data: Arc::new(Vec::new()),
|
||||||
|
dir_enum_pos: None,
|
||||||
|
host_path: Some(old_host.clone()),
|
||||||
|
});
|
||||||
|
// X_FILE_RENAME_INFORMATION { replace@0, root_dir@4, ANSI_STRING@8 }.
|
||||||
|
// ANSI_STRING { len u16, max u16, buf u32 } at info_ptr+8; buffer holds
|
||||||
|
// the target path "cache:\69d8e45c\e\534ffea".
|
||||||
|
let info_ptr = SCRATCH_BASE + 0x100;
|
||||||
|
let str_buf = SCRATCH_BASE + 0x200;
|
||||||
|
let target = b"cache:\\69d8e45c\\e\\534ffea";
|
||||||
|
for (i, b) in target.iter().enumerate() {
|
||||||
|
mem.write_u8(str_buf + i as u32, *b);
|
||||||
|
}
|
||||||
|
mem.write_u32(info_ptr, 0); // replace_existing
|
||||||
|
mem.write_u32(info_ptr + 4, 0); // root_dir_handle
|
||||||
|
mem.write_u16(info_ptr + 8, target.len() as u16); // ANSI_STRING.Length
|
||||||
|
mem.write_u16(info_ptr + 10, target.len() as u16); // MaximumLength
|
||||||
|
mem.write_u32(info_ptr + 12, str_buf); // Buffer
|
||||||
|
let iosb_ptr = SCRATCH_BASE + 0x140;
|
||||||
|
ctx.gpr[3] = handle as u64;
|
||||||
|
ctx.gpr[4] = iosb_ptr as u64;
|
||||||
|
ctx.gpr[5] = info_ptr as u64;
|
||||||
|
ctx.gpr[6] = 16;
|
||||||
|
ctx.gpr[7] = 10; // XFileRenameInformation
|
||||||
|
nt_set_information_file(&mut ctx, &mut mem, &mut state);
|
||||||
|
assert_eq!(ctx.gpr[3], STATUS_SUCCESS);
|
||||||
|
// Staging file gone; nested target exists with the same bytes.
|
||||||
|
let new_host = root.join("69d8e45c").join("e").join("534ffea");
|
||||||
|
assert!(!old_host.exists(), "staging .tmp should be moved away");
|
||||||
|
assert_eq!(std::fs::read(&new_host).unwrap(), b"LOGOTEX!");
|
||||||
|
// Handle now points at the new host + guest path.
|
||||||
|
match state.objects.get(&handle) {
|
||||||
|
Some(KernelObject::File { host_path: Some(hp), path, .. }) => {
|
||||||
|
assert_eq!(hp, &new_host);
|
||||||
|
assert_eq!(path, "cache:/69d8e45c/e/534ffea");
|
||||||
|
}
|
||||||
|
_ => panic!("file handle lost or host_path missing"),
|
||||||
|
}
|
||||||
|
let _ = std::fs::remove_dir_all(&root);
|
||||||
|
}
|
||||||
|
|
||||||
/// Read-only VFS — truncating to a different size must fail with
|
/// Read-only VFS — truncating to a different size must fail with
|
||||||
/// `STATUS_UNSUCCESSFUL`, matching Canary's error path when
|
/// `STATUS_UNSUCCESSFUL`, matching Canary's error path when
|
||||||
/// `file->SetLength(...)` can't honour the request.
|
/// `file->SetLength(...)` can't honour the request.
|
||||||
|
|||||||
@@ -30,6 +30,12 @@ use xenia_cpu::ThreadRef;
|
|||||||
pub const INTERRUPT_SOURCE_VSYNC: u32 = 0;
|
pub const INTERRUPT_SOURCE_VSYNC: u32 = 0;
|
||||||
pub const INTERRUPT_SOURCE_CP: u32 = 1;
|
pub const INTERRUPT_SOURCE_CP: u32 = 1;
|
||||||
|
|
||||||
|
/// The processor the graphics ISR impersonates for a v-sync interrupt.
|
||||||
|
/// Canary hard-codes this: `MarkVblank` → `DispatchInterruptCallback(0, 2)`
|
||||||
|
/// (graphics_system.cc:478). CP interrupts instead use the bit index of the
|
||||||
|
/// `PM4_INTERRUPT` `cpu_mask`.
|
||||||
|
pub const VSYNC_TARGET_CPU: u8 = 2;
|
||||||
|
|
||||||
/// Guest-registered V-sync / graphics-interrupt callback (from
|
/// Guest-registered V-sync / graphics-interrupt callback (from
|
||||||
/// `VdSetGraphicsInterruptCallback`).
|
/// `VdSetGraphicsInterruptCallback`).
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
@@ -145,9 +151,16 @@ pub type PendingLocalIrq = [std::sync::atomic::AtomicU8;
|
|||||||
pub struct InterruptState {
|
pub struct InterruptState {
|
||||||
/// Registered callback (set by `VdSetGraphicsInterruptCallback`).
|
/// Registered callback (set by `VdSetGraphicsInterruptCallback`).
|
||||||
pub callback: Option<GraphicsInterruptCallback>,
|
pub callback: Option<GraphicsInterruptCallback>,
|
||||||
/// Bounded FIFO of pending interrupt sources awaiting injection.
|
/// Bounded FIFO of pending interrupts awaiting injection, as
|
||||||
/// Push-back on queue, pop-front on inject. Over-cap pushes drop.
|
/// `(source, target_cpu)`. Push-back on queue, pop-front on inject.
|
||||||
pub pending: VecDeque<u32>,
|
/// Over-cap pushes drop. `target_cpu` is the processor the graphics
|
||||||
|
/// ISR must impersonate (canary `XThread::SetActiveCpu` / the
|
||||||
|
/// `DispatchInterruptCallback(source, cpu)` argument): the bit index
|
||||||
|
/// of the CP `PM4_INTERRUPT` `cpu_mask` for source=1, and a fixed `2`
|
||||||
|
/// for vsync (canary `DispatchInterruptCallback(0, 2)`). The ISR reads
|
||||||
|
/// it from the PCR (`[r13+268]`) to clear the matching per-CPU bit of
|
||||||
|
/// the swap-acknowledge fence.
|
||||||
|
pub pending: VecDeque<(u32, u8)>,
|
||||||
/// When `Some`, some HW thread is currently running a callback; on
|
/// When `Some`, some HW thread is currently running a callback; on
|
||||||
/// return-to-sentinel we restore this and clear the flag.
|
/// return-to-sentinel we restore this and clear the flag.
|
||||||
pub saved: Option<SavedCallbackCtx>,
|
pub saved: Option<SavedCallbackCtx>,
|
||||||
@@ -211,8 +224,9 @@ impl InterruptState {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Queue an interrupt for the next safe injection point.
|
/// Queue an interrupt for the next safe injection point. `cpu` is the
|
||||||
pub fn queue_interrupt(&mut self, source: u32) {
|
/// processor the ISR must impersonate (see `pending`).
|
||||||
|
pub fn queue_interrupt(&mut self, source: u32, cpu: u8) {
|
||||||
if self.callback.is_none() {
|
if self.callback.is_none() {
|
||||||
self.dropped += 1;
|
self.dropped += 1;
|
||||||
return;
|
return;
|
||||||
@@ -221,18 +235,23 @@ impl InterruptState {
|
|||||||
self.dropped += 1;
|
self.dropped += 1;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
self.pending.push_back(source);
|
self.pending.push_back((source, cpu));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Peek at the next pending source without removing it.
|
/// Peek at the next pending source without removing it.
|
||||||
pub fn peek_next(&self) -> Option<u32> {
|
pub fn peek_next(&self) -> Option<u32> {
|
||||||
self.pending.front().copied()
|
self.pending.front().map(|&(source, _)| source)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Peek at the target CPU of the next pending interrupt.
|
||||||
|
pub fn peek_next_cpu(&self) -> Option<u8> {
|
||||||
|
self.pending.front().map(|&(_, cpu)| cpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Pop the next pending source (called by the injector after it has
|
/// Pop the next pending source (called by the injector after it has
|
||||||
/// committed to dispatching it).
|
/// committed to dispatching it).
|
||||||
pub fn take_next(&mut self) -> Option<u32> {
|
pub fn take_next(&mut self) -> Option<u32> {
|
||||||
self.pending.pop_front()
|
self.pending.pop_front().map(|(source, _)| source)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// **Legacy** — instruction-count v-sync ticker. Kept for unit tests
|
/// **Legacy** — instruction-count v-sync ticker. Kept for unit tests
|
||||||
@@ -249,7 +268,7 @@ impl InterruptState {
|
|||||||
let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD;
|
let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD;
|
||||||
self.vsync_accumulator %= VSYNC_INSTR_PERIOD;
|
self.vsync_accumulator %= VSYNC_INSTR_PERIOD;
|
||||||
for _ in 0..periods {
|
for _ in 0..periods {
|
||||||
self.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
self.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
|
||||||
}
|
}
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
@@ -288,7 +307,7 @@ impl InterruptState {
|
|||||||
self.last_vsync_instant = Some(anchor + advance);
|
self.last_vsync_instant = Some(anchor + advance);
|
||||||
let to_queue = (periods as usize).min(INTERRUPT_QUEUE_CAP);
|
let to_queue = (periods as usize).min(INTERRUPT_QUEUE_CAP);
|
||||||
for _ in 0..to_queue {
|
for _ in 0..to_queue {
|
||||||
self.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
self.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
|
||||||
}
|
}
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
@@ -306,7 +325,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn queue_interrupt_drops_without_callback() {
|
fn queue_interrupt_drops_without_callback() {
|
||||||
let mut s = InterruptState::default();
|
let mut s = InterruptState::default();
|
||||||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
|
||||||
assert_eq!(s.dropped, 1);
|
assert_eq!(s.dropped, 1);
|
||||||
assert!(s.pending.is_empty());
|
assert!(s.pending.is_empty());
|
||||||
}
|
}
|
||||||
@@ -315,9 +334,9 @@ mod tests {
|
|||||||
fn queue_interrupt_fifo_preserves_order() {
|
fn queue_interrupt_fifo_preserves_order() {
|
||||||
let mut s = InterruptState::default();
|
let mut s = InterruptState::default();
|
||||||
s.set_callback(0x1000, 0xAB);
|
s.set_callback(0x1000, 0xAB);
|
||||||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
|
||||||
s.queue_interrupt(INTERRUPT_SOURCE_CP);
|
s.queue_interrupt(INTERRUPT_SOURCE_CP, 2);
|
||||||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
|
||||||
assert_eq!(s.dropped, 0);
|
assert_eq!(s.dropped, 0);
|
||||||
// FIFO: take_next hands them out in push order.
|
// FIFO: take_next hands them out in push order.
|
||||||
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC));
|
assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC));
|
||||||
@@ -331,11 +350,11 @@ mod tests {
|
|||||||
let mut s = InterruptState::default();
|
let mut s = InterruptState::default();
|
||||||
s.set_callback(0x1000, 0xAB);
|
s.set_callback(0x1000, 0xAB);
|
||||||
for _ in 0..INTERRUPT_QUEUE_CAP {
|
for _ in 0..INTERRUPT_QUEUE_CAP {
|
||||||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
|
||||||
}
|
}
|
||||||
// Over-cap: drops rather than evicting the oldest.
|
// Over-cap: drops rather than evicting the oldest.
|
||||||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
|
||||||
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC);
|
s.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
|
||||||
assert_eq!(s.dropped, 2);
|
assert_eq!(s.dropped, 2);
|
||||||
assert_eq!(s.pending.len(), INTERRUPT_QUEUE_CAP);
|
assert_eq!(s.pending.len(), INTERRUPT_QUEUE_CAP);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ use xenia_memory::{GuestMemory, MemoryAccess};
|
|||||||
/// u16 Length
|
/// u16 Length
|
||||||
/// u16 MaximumLength
|
/// u16 MaximumLength
|
||||||
/// u32 Buffer (guest pointer)
|
/// u32 Buffer (guest pointer)
|
||||||
fn read_ansi_string(mem: &GuestMemory, ptr: u32) -> Option<String> {
|
pub fn read_ansi_string(mem: &GuestMemory, ptr: u32) -> Option<String> {
|
||||||
if ptr == 0 {
|
if ptr == 0 {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user