diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs
index 5a41154..bcdba26 100644
--- a/crates/xenia-app/src/main.rs
+++ b/crates/xenia-app/src/main.rs
@@ -2151,7 +2151,13 @@ fn coord_pre_round(
     let fired = if kernel.parallel_active {
         kernel.interrupts.tick_vsync_wallclock()
     } else {
-        kernel.interrupts.tick_vsync_instr(stats.instruction_count)
+        // iterate-3AJ: present-anchored — pass the guest's live present
+        // (`VdSwap`) count so vsync tracks the real present rate once the
+        // guest is presenting (≈1 vblank/present), instead of firing a
+        // fixed instruction quantum that over-fires ~66× during one heavy
+        // splash asset-load frame and collapsed the logo fade-in.
+        let presents = kernel.gpu.swaps_seen();
+        kernel.interrupts.tick_vsync_instr(stats.instruction_count, presents)
     };
     if fired {
         use std::sync::atomic::Ordering;
diff --git a/crates/xenia-app/tests/golden/sylpheed_n50m.json b/crates/xenia-app/tests/golden/sylpheed_n50m.json
index 9687b99..a6e7f75 100644
--- a/crates/xenia-app/tests/golden/sylpheed_n50m.json
+++ b/crates/xenia-app/tests/golden/sylpheed_n50m.json
@@ -1,9 +1,9 @@
 {
-  "instructions": 50000014,
-  "imports": 352251,
+  "instructions": 50000007,
+  "imports": 333453,
   "unimpl": 0,
-  "draws": 718,
-  "swaps": 147,
+  "draws": 1274,
+  "swaps": 259,
   "unique_render_targets": 2,
   "shader_blobs_live": 6,
   "texture_cache_entries": 1
diff --git a/crates/xenia-gpu/src/handle.rs b/crates/xenia-gpu/src/handle.rs
index fa3db8a..467b156 100644
--- a/crates/xenia-gpu/src/handle.rs
+++ b/crates/xenia-gpu/src/handle.rs
@@ -444,6 +444,23 @@ impl GpuBackend {
         }
     }
 
+    /// Current guest present (`VdSwap`) count. Cheap single-field read used
+    /// by the present-anchored vsync ticker (iterate-3AJ) every scheduler
+    /// round. Inline mode reads the live counter directly; threaded mode
+    /// reads the last-published digest mirror under a brief lock (the
+    /// `--parallel` path uses the wall-clock vsync ticker anyway, so the
+    /// exact freshness here is not load-bearing).
+    pub fn swaps_seen(&self) -> u64 {
+        match self {
+            GpuBackend::Inline(s) => s.stats.swaps_seen,
+            GpuBackend::Threaded(h) => h
+                .digest
+                .lock()
+                .map(|d| d.stats.swaps_seen)
+                .unwrap_or(0),
+        }
+    }
+
     /// Forward [`GpuSystem::has_pending_interrupts`] under inline mode;
     /// under threaded mode peek the `int_rx` channel.
     pub fn has_pending_interrupts(&self) -> bool {
diff --git a/crates/xenia-kernel/src/interrupts.rs b/crates/xenia-kernel/src/interrupts.rs
index aa7cbf7..84ce584 100644
--- a/crates/xenia-kernel/src/interrupts.rs
+++ b/crates/xenia-kernel/src/interrupts.rs
@@ -183,6 +183,28 @@ pub struct InterruptState {
     /// ticker. `tick_vsync_instr` diffs against this to advance
     /// `vsync_accumulator`.
     pub last_instr_count: u64,
+    /// **iterate-3AJ — present-anchored vsync.** Set `true` once the guest
+    /// has presented at least one frame (a `VdSwap`). Before this, the
+    /// vsync ticker uses the legacy fixed instruction-quantum cadence so
+    /// the boot present-loop bootstrap (iterate-2W) still gets the vsyncs
+    /// it needs *before* the first present. After this, vsync is anchored
+    /// to the guest's real present rate (≈1 vblank per present, as on real
+    /// hardware where the title double-buffers at vblank), with only a
+    /// small capped instruction-quantum *fallback* for frames where the
+    /// guest genuinely stops presenting (heavy asset load). This stops the
+    /// proxy from firing ~66 vsyncs during one heavy load frame, which
+    /// collapsed the splash-logo intro fade-in (the guest's vsync counter
+    /// jumped 0→66 in one frame instead of ramping smoothly).
+    pub vsync_present_anchored: bool,
+    /// Last observed guest present (`VdSwap`) count. `tick_vsync_instr`
+    /// diffs the live count against this each call to emit one vblank per
+    /// new present once `vsync_present_anchored` is set.
+    pub last_present_count: u64,
+    /// How many *fallback* (non-present-driven) vsyncs have fired in the
+    /// current dry (no-present) window. Reset to 0 whenever a present
+    /// occurs. Capped at [`DRY_FALLBACK_CAP`] so one heavy non-presenting
+    /// frame cannot fire a long burst of vsyncs (the fade-in regression).
+    pub dry_fallback_fired: u32,
     /// Wall-clock anchor for the production v-sync ticker. `None` until
     /// the first `tick_vsync_wallclock` call (lazy init so unit tests
     /// that never invoke that function don't construct an Instant).
@@ -208,6 +230,21 @@ pub struct InterruptState {
 /// determinism.
 pub const VSYNC_INSTR_PERIOD: u64 = 150_000;
 
+/// **iterate-3AJ — present-anchored vsync fallback.**
+///
+/// Once the guest is in its present loop (`vsync_present_anchored`), each
+/// guest present emits exactly one vblank — vsync *is* the present cadence,
+/// as on real Xbox 360 hardware where the title double-buffers at vblank.
+/// For a frame where the guest stops presenting (e.g. the ~1.1 s splash
+/// asset-load), we still need *some* vsyncs to keep timers / the present
+/// loop alive, but firing one per [`VSYNC_INSTR_PERIOD`] would reproduce the
+/// ~66-vsync spike that collapsed the fade-in. So the fallback fires one
+/// vblank per `VSYNC_INSTR_PERIOD` of *non-presenting* instructions, but at
+/// most [`DRY_FALLBACK_CAP`] per dry window (the counter resets on each
+/// present). A heavy load frame therefore advances the guest vsync counter
+/// by ≤ `DRY_FALLBACK_CAP` (a small ramp like canary's 0/5/10/2/1…), not 66.
+pub const DRY_FALLBACK_CAP: u32 = 4;
+
 /// Wall-clock period for the **production** v-sync ticker. 16.667 ms
 /// targets exactly 60 Hz. KRNBUG-D08 — converting from the
 /// instruction-count proxy fixes the `--parallel` rate drop while
@@ -254,23 +291,83 @@ impl InterruptState {
         self.pending.pop_front().map(|(source, _)| source)
     }
 
-    /// **Legacy** — instruction-count v-sync ticker. Kept for unit tests
-    /// that need a deterministic clock source. Production code calls
-    /// `tick_vsync_wallclock` instead. Returns `true` if at least one
-    /// v-sync was queued.
-    pub fn tick_vsync_instr(&mut self, current_instr_count: u64) -> bool {
+    /// **Present-anchored** instruction-paced v-sync ticker (the lockstep
+    /// production path; also used by unit tests for a deterministic clock).
+    ///
+    /// `current_instr_count` is the running retired-instruction count.
+    /// `present_count` is the guest's running `VdSwap` count (monotonic).
+    ///
+    /// Two regimes:
+    ///
+    /// 1. **Bootstrap** (`!vsync_present_anchored`, i.e. before the guest's
+    ///    first present): legacy fixed-quantum cadence — one vsync per
+    ///    [`VSYNC_INSTR_PERIOD`] retired instructions. The boot present loop
+    ///    (iterate-2W) needs vsyncs delivered *before* it can present, so
+    ///    this regime is unchanged from the original ticker. The first
+    ///    observed present flips `vsync_present_anchored`.
+    ///
+    /// 2. **Present-anchored** (after the first present): one vblank per
+    ///    guest present (vsync *is* the present cadence on real hardware),
+    ///    plus a small capped instruction-quantum fallback ([`DRY_FALLBACK_CAP`]
+    ///    per dry window) so a frame where the guest stops presenting (heavy
+    ///    asset load) still ticks a *few* vsyncs — not ~66, which collapsed
+    ///    the splash fade-in.
+    ///
+    /// Returns `true` if at least one v-sync was queued.
+    pub fn tick_vsync_instr(&mut self, current_instr_count: u64, present_count: u64) -> bool {
         let delta = current_instr_count.saturating_sub(self.last_instr_count);
         self.last_instr_count = current_instr_count;
         self.vsync_accumulator = self.vsync_accumulator.saturating_add(delta);
-        if self.vsync_accumulator < VSYNC_INSTR_PERIOD {
-            return false;
+
+        let new_presents = present_count.saturating_sub(self.last_present_count);
+        self.last_present_count = present_count;
+        if new_presents > 0 {
+            self.vsync_present_anchored = true;
         }
-        let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD;
-        self.vsync_accumulator %= VSYNC_INSTR_PERIOD;
-        for _ in 0..periods {
+
+        // Regime 1 — bootstrap: legacy fixed instruction quantum. Preserves
+        // the iterate-2W present-loop bootstrap exactly (vsyncs must fire
+        // before the guest can present).
+        if !self.vsync_present_anchored {
+            if self.vsync_accumulator < VSYNC_INSTR_PERIOD {
+                return false;
+            }
+            let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD;
+            self.vsync_accumulator %= VSYNC_INSTR_PERIOD;
+            for _ in 0..periods {
+                self.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
+            }
+            return true;
+        }
+
+        // Regime 2 — present-anchored.
+        let mut queued = false;
+
+        if new_presents > 0 {
+            // One vblank per guest present. `queue_interrupt` caps the FIFO,
+            // so a burst of presents in one round can't flood. A fresh
+            // present resets the dry-window state.
+            for _ in 0..new_presents {
+                self.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
+            }
+            self.vsync_accumulator = 0;
+            self.dry_fallback_fired = 0;
+            queued = true;
+        } else if self.vsync_accumulator >= VSYNC_INSTR_PERIOD
+            && self.dry_fallback_fired < DRY_FALLBACK_CAP
+        {
+            // Dry frame (no present this tick): the guest stopped presenting
+            // (heavy load). Tick a *capped* number of fallback vsyncs so
+            // timers/the present loop stay alive without re-introducing the
+            // ~66-vsync spike. Consume one period per fired vsync so the
+            // accumulator paces the few fallbacks.
+            self.vsync_accumulator -= VSYNC_INSTR_PERIOD;
+            self.dry_fallback_fired += 1;
             self.queue_interrupt(INTERRUPT_SOURCE_VSYNC, VSYNC_TARGET_CPU);
+            queued = true;
         }
-        true
+
+        queued
     }
 
     /// **Production** — wall-clock v-sync ticker. Fires
@@ -364,9 +461,10 @@ mod tests {
         let mut s = InterruptState::default();
         s.set_callback(0x1000, 0xAB);
         assert_eq!(VSYNC_INSTR_PERIOD, 150_000);
-        assert!(!s.tick_vsync_instr(VSYNC_INSTR_PERIOD - 1));
+        // present_count = 0 → bootstrap regime (legacy fixed quantum).
+        assert!(!s.tick_vsync_instr(VSYNC_INSTR_PERIOD - 1, 0));
         assert!(s.pending.is_empty());
-        assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD));
+        assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD, 0));
         assert_eq!(s.peek_next(), Some(INTERRUPT_SOURCE_VSYNC));
     }
 
@@ -376,10 +474,59 @@ mod tests {
         // be delivered, not lost.
         let mut s = InterruptState::default();
         s.set_callback(0x1000, 0xAB);
-        assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD * 3 + 10));
+        // present_count = 0 → bootstrap regime drains all 3 periods at once.
+        assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD * 3 + 10, 0));
         assert_eq!(s.pending.len(), 3);
     }
 
+    #[test]
+    fn tick_vsync_instr_present_anchors_after_first_present() {
+        // iterate-3AJ: once the guest presents, vsync tracks presents (one
+        // vblank per present), NOT the fixed instruction quantum.
+        let mut s = InterruptState::default();
+        s.set_callback(0x1000, 0xAB);
+        // Bootstrap: instruction quantum fires (present_count still 0).
+        assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD, 0));
+        assert_eq!(s.pending.len(), 1);
+        let _ = s.take_next();
+        // First present flips to anchored: exactly one vblank for the present.
+        assert!(s.tick_vsync_instr(VSYNC_INSTR_PERIOD * 2, 1));
+        assert!(s.vsync_present_anchored);
+        assert_eq!(s.pending.len(), 1);
+        let _ = s.take_next();
+    }
+
+    #[test]
+    fn tick_vsync_instr_heavy_dry_frame_capped_not_spiking() {
+        // iterate-3AJ: the regression. A heavy non-presenting frame retires
+        // ~10M instructions; the OLD ticker fired ~66 vsyncs (10M/150k) in
+        // that single frame, jumping the guest vsync counter 0→66 and
+        // skipping the fade-in. The present-anchored ticker caps the dry
+        // window at DRY_FALLBACK_CAP.
+        let mut s = InterruptState::default();
+        s.set_callback(0x1000, 0xAB);
+        // Enter anchored mode via one present.
+        let mut instr: u64 = VSYNC_INSTR_PERIOD;
+        assert!(s.tick_vsync_instr(instr, 1));
+        while s.take_next().is_some() {}
+        // Simulate a 10M-instruction frame with NO new present, ticked in
+        // chunks (as coord_pre_round would). Count fallback vsyncs queued.
+        let mut fallback = 0usize;
+        for _ in 0..100 {
+            instr += 100_000; // 100 chunks × 100k = 10M instructions
+            if s.tick_vsync_instr(instr, 1) {
+                while s.take_next().is_some() {
+                    fallback += 1;
+                }
+            }
+        }
+        assert_eq!(
+            fallback, DRY_FALLBACK_CAP as usize,
+            "a heavy dry frame must cap fallback vsyncs at DRY_FALLBACK_CAP, \
+             not fire ~66"
+        );
+    }
+
     #[test]
     fn tick_vsync_wallclock_first_call_sets_anchor() {
         // First call seeds the anchor and never fires. KRNBUG-D08: