From 9a93152981fc1ac829c6766ef63634e1790af343 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sat, 6 Jun 2026 18:58:40 +0200 Subject: [PATCH] Iterate-2.BE: host-driven synchronous graphics ISR delivery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the victim-thread-mutate-then-wait scheme for vsync / CP interrupts with synchronous in-line dispatch on the coordinator host thread. Mirrors canary's EmulateCPInterruptDPC -> Processor::Execute path (kernel_state.cc:1370, processor.cc:413): pick a guest thread, borrow its PpcContext, jam ISR PC + args in, run the interpreter inline until LR_HALT_SENTINEL, restore the borrowed context. Why: audit-059 measured gpu.interrupt.delivered{source=0} = 54 over 3.9 s vs canary's 4712 over 30 s. Per-second shortfall ~11×. Old asynchronous LR-sentinel injection (try_inject_graphics_interrupt) needed a Ready or Blocked guest thread to land on; once the Sylpheed main thread and worker threads all idled post-boot, no victim was available and every queued vsync got dropped. Host-driven dispatch decouples delivery from guest-thread readiness. Smoke test (lockstep): unchanged 54 — under current Sylpheed boot trajectory the ticker is gated by guest-instruction progress, not victim availability; lockstep stalls into idle-advance after ~5M instructions of real work and the synthetic tick_vsync_instr stops firing. Under --parallel (wallclock ticker) gpu.interrupt.delivered climbs to ~1131 over a 128 s run, confirming the synchronous dispatcher itself works as intended. Architectural piece is now in place; raising the lockstep delivery rate requires ticking the synthetic vsync inside coord_idle_advance, which is a separate change. Changes: - crates/xenia-kernel/src/interrupts.rs: doc-comment update only. SavedCallbackCtx + CALLBACK_STACK_PAD retained — the audio callback path (audit-048) still uses the asynchronous LR-sentinel inject on a dedicated per-client worker. - crates/xenia-app/src/main.rs: * dispatch_graphics_interrupts(kernel, mem, &mut stats, &mut decode_cache, thunk_map): new fn. Drains the full FIFO per call. Victim selection same shape (Ready preferred, else Blocked, skip Idle/Exited/ServicingIrq), but the call is synchronous - we run step_cached + import-thunk dispatch inline on the borrowed ctx until pc == LR_HALT_SENTINEL. MAX_INSTRS_PER_ISR = 1M safety budget. * coord_pre_round: graphics-IRQ injection call removed. Audio path unchanged (still calls try_inject_audio_callback). * run_execution + run_execution_parallel: each now owns a persistent isr_decode_cache and calls dispatch_graphics_interrupts after coord_pre_round. * try_inject_graphics_interrupt: deleted (118 LOC). No new public APIs, no new dependencies, no changes to xenia-cpu. Tests: workspace 765 passed / 0 failed / 4 ignored (parallel_stress + sylpheed_n50m, all gated). Kernel 127/127, app 5/5, cpu 288/288. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-app/src/main.rs | 385 ++++++++++++++++++-------- crates/xenia-kernel/src/interrupts.rs | 17 +- 2 files changed, 279 insertions(+), 123 deletions(-) diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 0418ec9..1b14bbc 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -1990,7 +1990,13 @@ fn coord_pre_round( } kernel.fire_due_timers(); - try_inject_graphics_interrupt(kernel); + // Graphics-interrupt delivery is no longer done here — see + // `dispatch_graphics_interrupts`, called from the outer loop with + // `mem` and `&mut stats` in scope. The audio path still uses the + // asynchronous LR-sentinel inject because each XAudio client has a + // dedicated worker thread (audit-048 Plan B) that the callback + // runs on; we just queue the source and the worker_prologue's + // halt-sentinel restore path closes the loop. if kernel.xaudio_tick_enabled { try_inject_audio_callback(kernel); } @@ -2595,12 +2601,21 @@ fn run_execution( let mut workers: [WorkerCtx; xenia_cpu::scheduler::HW_THREAD_COUNT] = std::array::from_fn(|i| WorkerCtx::new(i as u8, force_per_instr)); + // Iterate-2.BE — decode cache used by the synchronous ISR + // dispatcher. ISRs are short (~40 PPC instructions) but fire + // every ~16.7 ms, so persisting the cache across calls avoids + // re-decoding the same handful of pages 60×/s. + let mut isr_decode_cache = xenia_cpu::decoder::DecodeCache::new(); + 'outer: loop { // Per-round prologue: budget / shutdown / heartbeat / vsync / - // timers / graphics-interrupt injection. Carved into + // timers / audio-interrupt injection. Carved into // `coord_pre_round` so the parallel scheduler (Step 03+) can // call the same coordination logic between phaser barriers - // without duplicating it from the lockstep path. + // without duplicating it from the lockstep path. The + // graphics-interrupt dispatch is hoisted out — it runs + // *synchronously* (host-driven, iterate-2.BE) and needs `mem` + // + `&mut stats` which aren't in `coord_pre_round`'s scope. match coord_pre_round( kernel, &stats, @@ -2612,6 +2627,13 @@ fn run_execution( RoundCtl::BreakOuter => break, RoundCtl::Continue => {} } + dispatch_graphics_interrupts( + kernel, + mem, + &mut stats, + &mut isr_decode_cache, + thunk_map, + ); // Snapshot round schedule. `round_schedule` also advances rng state // when seeded; mutation is intentional. @@ -2789,6 +2811,10 @@ fn run_execution_parallel( let throttle_start = Instant::now(); + // Iterate-2.BE — decode cache for the synchronous ISR dispatcher. + // Lives on the coordinator (this) thread; workers never touch it. + let mut isr_decode_cache = xenia_cpu::decoder::DecodeCache::new(); + const COORD_ID: u8 = xenia_cpu::scheduler::HW_THREAD_COUNT as u8; // = 6 const PARTY_COUNT: u32 = xenia_cpu::scheduler::HW_THREAD_COUNT as u32 + 1; @@ -3025,6 +3051,22 @@ fn run_execution_parallel( } let mut guard = pre_outcome.1; + // Iterate-2.BE — host-driven synchronous ISR dispatch. + // Runs under the kernel lock while workers are still parked + // at the phaser B2 barrier (the coordinator hasn't published + // the runnable mask or arrived at the phaser yet), so no + // contention with worker steps. + { + let mut s = stats_mtx.lock().expect("stats mutex poisoned"); + dispatch_graphics_interrupts( + &mut *guard, + mem, + &mut *s, + &mut isr_decode_cache, + thunk_map, + ); + } + guard.scheduler.begin_round(); let order = guard.scheduler.round_schedule(); @@ -3140,146 +3182,255 @@ fn run_execution_parallel( stats_mtx.into_inner().expect("stats mutex poisoned") } -/// First-Pixels M2 — inject a queued graphics interrupt into HW thread 0 -/// when it's safe to do so (callback registered, no interrupt already -/// running). Called at the top of each scheduler round. +/// Iterate-2.BE — host-driven synchronous dispatch of all queued +/// graphics interrupts. Mirrors canary's +/// [`EmulateCPInterruptDPC`](../../../../xenia-canary/src/xenia/kernel/kernel_state.cc#L1370) +/// → [`Processor::Execute`](../../../../xenia-canary/src/xenia/cpu/processor.cc#L413) +/// path: pick a guest thread, borrow its `PpcContext`, jam the ISR +/// PC + args into it, and **run the interpreter inline on the host +/// thread** until the ISR returns to `LR_HALT_SENTINEL`. Then restore +/// the borrowed context and continue. /// -/// Unlike the earlier P6 version which only delivered when HW 0 was -/// `Ready`, this one also delivers when HW 0 is `Blocked`: the injector -/// stashes the block reason into the new `HwState::ServicingIrq(reason)` -/// variant, flips the thread to that state so `round_schedule` runs it, -/// and — on callback return to `LR_HALT_SENTINEL` — the restore path -/// re-creates `Blocked(reason)`, unless a `wake()` during the callback -/// (e.g. `KeSetEvent` → `wake_eligible_waiters`) flipped it to `Ready`, -/// in which case the wait was resolved and we leave it. +/// Drains the full pending FIFO each call — canary's frame-limiter +/// runs at its own cadence and our queue can already hold up to +/// `INTERRUPT_QUEUE_CAP` coalesced v-sync events. /// -/// This is the fix that unblocks games (like Sylpheed) which gate their -/// main loop on a v-sync callback signaling an event the main thread -/// waits on. The earlier "only-when-Ready" policy dropped 397 of 399 -/// observed v-syncs on a 1 B-instruction Sylpheed probe; now they -/// actually get delivered. -fn try_inject_graphics_interrupt(kernel: &mut xenia_kernel::KernelState) { +/// Why this replaces the prior victim-mutate-then-wait scheme: with +/// the old asynchronous injection, when every guest thread idled (post +/// boot, when Sylpheed's main thread reaches its WAIT_FOREVER on the +/// vsync-driven PKEVENT and all worker threads are likewise Blocked), +/// the next scheduler round had no `Ready` victim and `Blocked` ones +/// still required at least one round of execution to reach the +/// callback. Audit-059 measured `gpu.interrupt.delivered = 54` over +/// 3.9 s vs canary's 4712 — an 87× shortfall. Host-driven dispatch +/// makes delivery rate a function of wall clock, not guest-thread +/// readiness. +/// +/// Victim selection still mirrors the canary precedent: prefer Ready +/// (no state mangling), else any Blocked thread (we temporarily flip +/// to `ServicingIrq(reason)` for the duration of the inline run so +/// `call_export` etc. see a coherent thread state, and restore the +/// `Blocked(reason)` on the way out unless the ISR itself signaled a +/// wake). Idle / Exited / already-ServicingIrq slots are skipped — if +/// nothing remains the source is dropped (still the right behavior; +/// canary's `XThread::GetCurrentThread()` would assert). +/// +/// All execution while in-flight runs against the borrowed thread's +/// `ctx`. We set `scheduler.current = Some(target_ref)` so kernel +/// imports (`KeSetEvent`, `KeReleaseSemaphore`, etc.) reach the right +/// context, then restore the previous `current` on the way out. The +/// dispatch is single-threaded — under `--parallel` it runs on the +/// coordinator with workers parked at the phaser barrier, so there is +/// no contention. +fn dispatch_graphics_interrupts( + kernel: &mut xenia_kernel::KernelState, + mem: &xenia_memory::GuestMemory, + stats: &mut ExecStats, + decode_cache: &mut xenia_cpu::decoder::DecodeCache, + thunk_map: &HashMap, +) { + use xenia_cpu::interpreter::{step_cached, StepResult}; use xenia_cpu::scheduler::HwState; + const LR_HALT: u32 = xenia_cpu::context::LR_HALT_SENTINEL as u32; + /// Defensive cap so a runaway ISR can't lock the coordinator on + /// the per-tick dispatch. Real Sylpheed vsync ISR is ~40 PPC + /// instructions; canary's `Processor::Execute` has no analogous + /// cap because it runs on a dedicated host thread, but we run + /// inline on the coordinator so a budget is prudent. + const MAX_INSTRS_PER_ISR: u64 = 1_000_000; - if kernel.interrupts.is_in_callback() { - return; - } let Some(cb) = kernel.interrupts.callback else { - // No callback registered; drain any pending entries (they - // wouldn't have made it into the queue per `queue_interrupt`'s - // own `callback.is_none()` guard, but be defensive). kernel.interrupts.pending.clear(); return; }; - let Some(source) = kernel.interrupts.peek_next() else { + // Audio injection (audit-048 Plan B) still uses the asynchronous + // LR-sentinel path. If an audio callback is mid-flight we must not + // try to clobber the borrowed context — bail until the audio path + // returns through the worker_prologue restore. + if kernel.interrupts.is_in_callback() { return; - }; - - // Canary's `EmulateCPInterruptDPC` (kernel_state.cc:1373) dispatches on - // whatever the current thread happens to be — real hardware fires the - // interrupt on CPU 2 and the kernel impersonates a DPC on top of - // whichever thread is active. Hard-anchoring to HW 0 breaks the moment - // `main()` returns: Sylpheed's main thread exits right after init, the - // render worker spins on a `PKEVENT` inside the interrupt callback's - // user_data struct (`user_data + 0x5C`), and because HW 0 is now - // `Exited(_)` our injector drops every subsequent vsync — the PKEVENT - // is never signaled and the worker polls forever. - // - // Pick the first HW thread we can plausibly run the callback on: - // 1. Prefer `Ready` (no state-mangling needed) - // 2. Else take a `Blocked(reason)` thread and swap to - // `ServicingIrq(reason)` so the round scheduler runs it; the - // LR-sentinel restore path reinstates the block on callback return - // 3. Skip `Idle`, `Exited`, or already-`ServicingIrq` slots - // - // The callback itself just signals a game-side event and returns — it - // doesn't care which HW thread it ran on. - // Pass 1: find any Ready thread across all slots. - let mut victim: Option = None; - 'outer_ready: for (hw_id, slot) in kernel.scheduler.slots.iter().enumerate() { - for (idx, t) in slot.runqueue.iter().enumerate() { - if matches!(t.state, HwState::Ready) { - victim = Some(xenia_cpu::ThreadRef::new(hw_id as u8, idx as u16)); - break 'outer_ready; - } - } } - // Pass 2: any Blocked thread (we'll flip it to ServicingIrq). - if victim.is_none() { - 'outer_blocked: for (hw_id, slot) in kernel.scheduler.slots.iter().enumerate() { + + while let Some(source) = kernel.interrupts.peek_next() { + // Victim selection: Ready first, then Blocked (canary's + // `XThread::GetCurrentThread()` analog — any live thread will + // do for borrowing context). Skip Idle/Exited/ServicingIrq. + let mut victim: Option = None; + 'outer_ready: for (hw_id, slot) in kernel.scheduler.slots.iter().enumerate() { for (idx, t) in slot.runqueue.iter().enumerate() { - if matches!(t.state, HwState::Blocked(_)) { + if matches!(t.state, HwState::Ready) { victim = Some(xenia_cpu::ThreadRef::new(hw_id as u8, idx as u16)); - break 'outer_blocked; + break 'outer_ready; } } } - } - let Some(target_ref) = victim else { - // All threads Idle/Exited/already servicing — nothing to inject on. - kernel.interrupts.take_next(); - kernel.interrupts.dropped += 1; - return; - }; - - let t = kernel.scheduler.thread_mut(target_ref); - let prev_state = t.state.clone(); - match prev_state { - HwState::Ready => {} - HwState::Blocked(reason) => { - t.state = HwState::ServicingIrq(reason); + if victim.is_none() { + 'outer_blocked: for (hw_id, slot) in kernel.scheduler.slots.iter().enumerate() { + for (idx, t) in slot.runqueue.iter().enumerate() { + if matches!(t.state, HwState::Blocked(_)) { + victim = Some(xenia_cpu::ThreadRef::new(hw_id as u8, idx as u16)); + break 'outer_blocked; + } + } + } } - _ => unreachable!("victim selection above filtered out other variants"), - } + let Some(target_ref) = victim else { + // No donor at all — drop and exit (no point looping if the + // next source has the same problem). + kernel.interrupts.take_next(); + kernel.interrupts.dropped += 1; + return; + }; - let _ = kernel.interrupts.take_next(); - let t = kernel.scheduler.thread_mut(target_ref); - let saved = xenia_kernel::SavedCallbackCtx::capture(&t.ctx, source); - kernel.interrupts.injected_ref = Some(target_ref); - t.ctx.pc = cb.callback_pc; - t.ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL; - // Canary `Processor::Execute` decrements the guest SP by 176 before - // running the callback and restores on return (see Canary - // processor.cc:383). Without this pad the callback's - // `__savegprlr_N` prologue stomps the interrupted function's - // already-saved LR at [r1-8], so when the interrupted function - // later returns via `__restgprlr_N -> bclr` it jumps to - // `LR_HALT_SENTINEL` and the thread exits prematurely. Matching - // restore lives in `SavedCallbackCtx::restore` (which now also - // restores r1). - t.ctx.gpr[1] = t - .ctx - .gpr[1] - .wrapping_sub(xenia_kernel::interrupts::CALLBACK_STACK_PAD as u64); - t.ctx.gpr[3] = source as u64; - t.ctx.gpr[4] = cb.user_data as u64; - kernel.interrupts.saved = Some(saved); - metrics::counter!("gpu.interrupt.delivered", "source" => format!("{source}")) - .increment(1); - tracing::debug!( - source, - hw_id = target_ref.hw_id, - idx = target_ref.idx, - callback = format_args!("{:#010x}", cb.callback_pc), - "graphics interrupt: injecting" - ); + // Commit: pop the queue, flag temporary state. + let _ = kernel.interrupts.take_next(); + let prev_state = kernel.scheduler.thread(target_ref).state.clone(); + let was_blocked = matches!(prev_state, HwState::Blocked(_)); + if let HwState::Blocked(reason) = prev_state.clone() { + kernel.scheduler.thread_mut(target_ref).state = + HwState::ServicingIrq(reason); + } + + // Save the borrowed ctx fields the ISR will clobber. Matches + // canary's processor.cc:387-394 (save prev lr, run, restore). + let saved = { + let t = kernel.scheduler.thread_mut(target_ref); + let saved = xenia_kernel::SavedCallbackCtx::capture(&t.ctx, source); + t.ctx.pc = cb.callback_pc; + t.ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL; + // Canary processor.cc:383 — pad SP so the callback's + // __savegprlr_N prologue doesn't stomp the interrupted + // function's saved LR at [r1-8]. + t.ctx.gpr[1] = t + .ctx + .gpr[1] + .wrapping_sub(xenia_kernel::interrupts::CALLBACK_STACK_PAD as u64); + t.ctx.gpr[3] = source as u64; + t.ctx.gpr[4] = cb.user_data as u64; + saved + }; + + // Stash the previous `scheduler.current` (call_export reaches + // it; imports the ISR calls must dispatch on the borrowed + // thread). Restore on the way out. + let prev_current = kernel.scheduler.current; + kernel.scheduler.current = Some(target_ref); + + metrics::counter!("gpu.interrupt.delivered", "source" => format!("{source}")) + .increment(1); + tracing::debug!( + source, + hw_id = target_ref.hw_id, + idx = target_ref.idx, + callback = format_args!("{:#010x}", cb.callback_pc), + "graphics interrupt: dispatching synchronously (iterate-2.BE)" + ); + + // Inline interpreter loop on the borrowed context until the + // ISR returns to LR_HALT_SENTINEL (its `blr` writes + // `lr → pc`). Per-instruction step handles imports via + // thunk_map (the ISR typically just calls `KeSetEvent`). + let mut isr_instrs: u64 = 0; + loop { + let pc = kernel.scheduler.ctx_mut_ref(target_ref).pc; + if pc == LR_HALT { + break; + } + if isr_instrs >= MAX_INSTRS_PER_ISR { + tracing::warn!( + pc = format_args!("{:#010x}", pc), + isr_instrs, + "graphics ISR exceeded MAX_INSTRS_PER_ISR; aborting" + ); + break; + } + + // Import-thunk intercept: same shape as worker_prologue's + // step 2 (line ~2287). + if let Some((module, ordinal, _name)) = thunk_map.get(&pc) { + let module = *module; + let ordinal_u32 = *ordinal as u32; + kernel.call_export(module, ordinal_u32, mem); + let post_ref = kernel.scheduler.current; + let c = match post_ref { + Some(r) => kernel.scheduler.ctx_mut_ref(r), + None => kernel.scheduler.ctx_mut_ref(target_ref), + }; + c.pc = c.lr as u32; + c.cycle_count += 1; + c.timebase += 1; + stats.instruction_count += 1; + stats.import_count += 1; + isr_instrs += 1; + continue; + } + + if !mem.is_mapped(pc) { + tracing::error!( + pc = format_args!("{:#010x}", pc), + isr_instrs, + "graphics ISR hit unmapped PC; aborting" + ); + break; + } + + let ctx = kernel.scheduler.ctx_mut_ref(target_ref); + let page_ver = mem.page_version(ctx.pc); + let r = step_cached(ctx, mem, decode_cache, page_ver); + stats.instruction_count += 1; + isr_instrs += 1; + match r { + StepResult::Continue => {} + StepResult::SystemCall => { + tracing::warn!("graphics ISR hit `sc` instruction; aborting"); + break; + } + StepResult::Trap => { + tracing::warn!("graphics ISR hit trap; aborting"); + break; + } + StepResult::Halted => break, + StepResult::Unimplemented(op) => { + tracing::warn!(?op, "graphics ISR hit unimplemented opcode; aborting"); + break; + } + } + } + + // Restore the borrowed context. + saved.restore(kernel.scheduler.ctx_mut_ref(target_ref)); + kernel.scheduler.current = prev_current; + kernel.interrupts.delivered += 1; + + // Restore thread state. If the ISR signaled a wake on the + // borrowed thread (e.g. canary `KeSetEvent` → scheduler wake) + // the state may already be Ready; only re-block if still + // ServicingIrq. + if was_blocked { + let t = kernel.scheduler.thread_mut(target_ref); + if let HwState::ServicingIrq(reason) = t.state.clone() { + t.state = HwState::Blocked(reason); + } + } + } } /// AUDIT-032 Plan B — inject a pending XAudio buffer-complete callback /// into the **dedicated audio worker** registered for the head-of-queue -/// client. Mirrors -/// [`try_inject_graphics_interrupt`] (same SP-pad, same saved-context -/// restore-on-sentinel) but the target thread is fixed at registration -/// time instead of selected via the random-victim policy. The pre-fix +/// client. Uses the asynchronous LR-sentinel injection mechanism (same +/// SP-pad, same `SavedCallbackCtx` restore-on-sentinel as the pre-iterate-2.BE +/// graphics path) but the target thread is fixed at registration time +/// instead of selected via the random-victim policy. The pre-fix /// random-victim path corrupted unrelated thread state /// (APUBUG-PRODUCER-001 "HW-thread hijack"); per-client workers eliminate /// that whole class of regression. /// -/// Mutual exclusion with the graphics path is via the shared -/// `interrupts.saved` slot — if a graphics callback is already in flight, -/// `is_in_callback()` returns true and we bail until it returns to the -/// `LR_HALT_SENTINEL`. +/// Mutual exclusion with the graphics path (which is now synchronous — +/// see `dispatch_graphics_interrupts`) is via the shared +/// `interrupts.saved` slot — if an audio callback is already in flight, +/// `is_in_callback()` returns true and `dispatch_graphics_interrupts` +/// defers until it returns to the `LR_HALT_SENTINEL`. fn try_inject_audio_callback(kernel: &mut xenia_kernel::KernelState) { use xenia_cpu::scheduler::HwState; diff --git a/crates/xenia-kernel/src/interrupts.rs b/crates/xenia-kernel/src/interrupts.rs index 55f0e2f..e336834 100644 --- a/crates/xenia-kernel/src/interrupts.rs +++ b/crates/xenia-kernel/src/interrupts.rs @@ -8,13 +8,18 @@ //! guest-issued command stream; source code 1 (`INTERRUPT_SOURCE_CP`). //! //! Canary's [xboxkrnl_video.cc:303-310](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc#L303-L310) -//! dispatches the callback on HW thread 0. We follow the same convention. +//! dispatches the callback on HW thread 0. We follow the same convention +//! for picking a *context donor*, but as of iterate-2.BE the dispatch +//! itself is **synchronous and host-driven**: the main loop runs the ISR +//! inline on the borrowed guest context, mirroring canary's +//! `EmulateCPInterruptDPC → Processor::Execute` path +//! ([kernel_state.cc:1370](../../../../xenia-canary/src/xenia/kernel/kernel_state.cc#L1370), +//! [processor.cc:413](../../../../xenia-canary/src/xenia/cpu/processor.cc#L413)). +//! Independent of whether the donor guest thread was Ready or Blocked. //! -//! The delivery model is cooperative: we inject the callback entry into HW -//! thread 0 at the top of a scheduler round when it's safe (not mid-export, -//! not already inside another interrupt). When the callback returns to -//! [`LR_HALT_SENTINEL`] the main loop restores the saved [`PpcContext`] -//! fields and the HW thread picks up where it left off. +//! The audio callback path (audit-048) still uses asynchronous LR-sentinel +//! injection on a dedicated per-client worker thread; the +//! [`SavedCallbackCtx`] machinery below remains in use there. use std::collections::VecDeque; use std::time::{Duration, Instant};