diff --git a/crates/xenia-app/tests/golden/sylpheed_n50m.json b/crates/xenia-app/tests/golden/sylpheed_n50m.json index 70f7a6c..0c2f40e 100644 --- a/crates/xenia-app/tests/golden/sylpheed_n50m.json +++ b/crates/xenia-app/tests/golden/sylpheed_n50m.json @@ -1,9 +1,9 @@ { - "instructions": 50000014, - "imports": 178937, + "instructions": 19274336, + "imports": 72513, "unimpl": 0, - "draws": 78, - "swaps": 4, + "draws": 28, + "swaps": 2, "unique_render_targets": 2, "shader_blobs_live": 3, "texture_cache_entries": 0 diff --git a/crates/xenia-kernel/src/exports.rs b/crates/xenia-kernel/src/exports.rs index df89131..a211432 100644 --- a/crates/xenia-kernel/src/exports.rs +++ b/crates/xenia-kernel/src/exports.rs @@ -2999,24 +2999,25 @@ fn vd_swap(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // xboxkrnl_video.cc:479. Currently skipped (see below). let _ = fetch_dwords; // silence unused — will be live again under the deferred path - // iterate-2T: mirror xenia-canary `VdSwap_entry` (xboxkrnl_video.cc:518-548) + // iterate-2V: mirror xenia-canary `VdSwap_entry` (xboxkrnl_video.cc:518-548) // FAITHFULLY. The game reserves 64 dwords (256 bytes) in the primary ring // at `buffer_ptr`; canary writes a `PM4_TYPE0(SHADER_CONSTANT_FETCH_00_0)` // fetch-constant patch followed by `PM4_TYPE3(PM4_XE_SWAP)`, then pads with - // NOPs. We do the same, then bump WPTR by 64 so the drain consumes the - // PM4_XE_SWAP **in command-stream order** — i.e. AFTER any in-stream - // callback-arming Type-0 writes the game already queued. + // NOPs — and **NEVER touches `CP_RB_WPTR`**. The game advances the primary + // ring write-pointer itself via its own doorbell once it has finished + // populating the reserved slot, so VdSwap only fills the bytes. // - // Why this matters (the iterate-2T root): the previous M2b short-circuit - // called `notify_xe_swap` directly from the HLE, which synthesized a CP - // swap-complete interrupt OUT OF BAND. When that interrupt reached the - // graphics ISR (`sub_824BE9A0`) before D3D had armed its swap-callback - // slot (`[gfx+10772]+16` still the `0xBADF00D` placeholder), the ISR hit - // its "ERR[D3D]: Unanticipated CPU_INTERRUPT. Sign of a corrupt command - // buffer?" assert (`twi` at 0x824BE9DC). Routing the swap through the ring - // packet keeps the interrupt naturally ordered after arming, matching - // canary (whose VdSwap raises NO interrupt itself; swap-complete CP - // interrupts come only from in-stream `PM4_INTERRUPT` packets). + // iterate-2V FIX (the bug this removes): a prior revision bumped the + // primary ring `CP_RB_WPTR` out-of-band here (`extend_write_ptr_by(64)`). + // But `buffer_ptr` (~0x4add6efc) is NOT inside the primary ring (base + // ~0x4adcd000, 8192 dwords) — it lives ~10k dwords past it, in the + // renderer indirect-buffer region. The bogus WPTR bump pushed the GPU + // read-pointer PAST the guest's real write-pointer, the drain treated the + // overshoot as a circular wrap, and **re-executed the splash's draw + // indirect-buffers ~2×** — inflating draws to 78 (real splash ≈ 28; 12 + // INDIRECT_BUFFERs vs the real 6). Canary's `VdSwap_entry` writes the + // block and returns; the swap-complete CP interrupt comes only from the + // game's own in-stream `PM4_INTERRUPT` packets, never from VdSwap. if buffer_ptr != 0 { let mut off = 0u32; let mut put = |i: &mut u32, v: u32| { @@ -3052,12 +3053,15 @@ fn vd_swap(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { put(&mut off, xenia_gpu::pm4::make_packet_type2()); } } - state.gpu.extend_write_ptr_by(64); + // NOTE: We deliberately do NOT bump `CP_RB_WPTR` here (see the iterate-2V + // comment above). The drain below consumes only the packets the game has + // legitimately advanced the write-pointer over. - // Drain the ring; the PM4_XE_SWAP we just queued (and any in-stream - // PM4_INTERRUPT) executes in order. The PM4_XE_SWAP handler calls - // `notify_xe_swap` for host swap bookkeeping; no synthetic interrupt is - // raised (see `notify_xe_swap`). + // Drain the ring up to whatever the game has actually submitted; any + // in-stream `PM4_INTERRUPT` / draw packets execute in order. The + // reserved-slot PM4_XE_SWAP is consumed by the GPU only once the game + // advances its own doorbell over it. The swap-counter safety net below + // keeps host swap bookkeeping live in the meantime. let drained = state.gpu.drain_to_current_wptr(mem); tracing::debug!(drained, "VdSwap: drained PM4 packets");