From f3b7e8b7600bc59b507765e4c59014152e08c30a Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sat, 13 Jun 2026 10:02:02 +0200 Subject: [PATCH] [iterate-2F] Scheduler anti-starvation floor: fix job-4 handoff render gate The lockstep scheduler's pick_runnable is strict priority (max_by_key (priority, -idx)). On a cooperative single-host HW slot, a CPU-bound spinner that never blocks (the silph poll loop pinned by affinity to hw=5) wins pick_runnable every round forever, permanently starving a co-located peer (the submitter, tid6) that the spinner is actually waiting on. On real hardware those threads run on separate SMT contexts concurrently, so the spinner never starves the submitter; ours collapses them onto one slot with no anti-starvation, turning priority (or equal-priority index order) into permanent starvation. The starved submitter never dequeued job-4 -> the worker-hub (tid5) blocked INFINITE on completion event 0x1080 -> silph (tid13) wedged on 0x1078 -> no vsync -> draws_seen=0, the publisher splash never renders. (decrement_quantum's within-slot rotation is dead: begin_slot_visit unconditionally re-pick_runnable()s each round, discarding the rotated running_idx. The fix is therefore evaluated at pick time, not via that discarded rotation.) Fix (Option A, bounded anti-starvation, deterministic): - Add per-thread steps_starved counter to GuestThread. - begin_slot_visit increments it for every Ready peer passed over this visit, resets it to 0 for the picked thread. - pick_runnable selects by effective_priority: once steps_starved reaches STARVE_LIMIT (4096) the thread is lifted to i32::MAX and wins exactly one pick, then resets. The genuinely higher-priority thread still wins ~4095/4096 visits -- the boost grants periodic forward progress only, it does NOT invert priority. Pure function of counter/priority/index -> deterministic (no wall-clock, no RNG). Cascade (lockstep exec, XENIA_CACHE_PERSIST=1, -n 200M): - submitter dequeue sub_82458508 now fires 4x (was 3x); the 4th job (buf 0x40baa2c0) is dequeued at cycle 6.15M. - hub tid5 leaves Blocked(0x1080) -> now Ready (no more INFINITE wait). - GPU packets 0 -> 116,101,363 (command stream now flowing). - tid13 (silph::UImpl) advances past the old 0x1078 wedge to a NEW downstream wait (handle 0x10a0); 3 new threads spawn (tid14/15/16). - draws_seen still 0 -> the splash's first draw is a NEW downstream gate, not this starvation. Determinism: two cold lockstep `check -n 5M` runs byte-identical (full and stable digests). New n50m stable digest deterministic across two cold runs. Golden re-baselined: instructions 50000007->50000003, imports 92317->90296 (trajectory shift from the changed pick order). Tests: 666/666 (+1 test_anti_starvation_bounded_progress). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../xenia-app/tests/golden/sylpheed_n50m.json | 4 +- crates/xenia-cpu/src/scheduler.rs | 110 +++++++++++++++++- 2 files changed, 107 insertions(+), 7 deletions(-) diff --git a/crates/xenia-app/tests/golden/sylpheed_n50m.json b/crates/xenia-app/tests/golden/sylpheed_n50m.json index bde4037..cce349d 100644 --- a/crates/xenia-app/tests/golden/sylpheed_n50m.json +++ b/crates/xenia-app/tests/golden/sylpheed_n50m.json @@ -1,6 +1,6 @@ { - "instructions": 50000007, - "imports": 92317, + "instructions": 50000003, + "imports": 90296, "unimpl": 0, "draws": 0, "swaps": 1, diff --git a/crates/xenia-cpu/src/scheduler.rs b/crates/xenia-cpu/src/scheduler.rs index 6a4b837..54409b5 100644 --- a/crates/xenia-cpu/src/scheduler.rs +++ b/crates/xenia-cpu/src/scheduler.rs @@ -35,6 +35,20 @@ pub const INITIAL_GUEST_TID: u32 = 1; /// Axis 1 carries the field on every thread but doesn't decrement yet. pub const QUANTUM_DEFAULT: u32 = 50_000; +/// Anti-starvation floor. On a cooperative single-host slot, strict-priority +/// `pick_runnable` lets a high-priority CPU-bound spinner (e.g. a pri-15 +/// time-critical poll loop pinned by affinity) win every round forever, +/// permanently starving a co-located lower-priority peer that the spinner is +/// actually *waiting on* — a deadlock that never occurs on real hardware, +/// where SMT contexts run those threads concurrently. +/// +/// Once a Ready thread has been passed over this many consecutive slot +/// visits, `pick_runnable` grants it ONE pick (then its counter resets). The +/// limit is large enough that the genuinely-higher-priority thread still wins +/// the overwhelming majority of visits (here: ~4095/4096); the boost only +/// guarantees *bounded* forward progress, it does not invert priority. +pub const STARVE_LIMIT: u32 = 4096; + /// Above this depth, `spawn` prunes `Exited` entries from a slot's runqueue /// before pushing the new thread. Keeps peer `ThreadRef`s stable on the /// common (low-depth) path — a game that spawns a handful of long-lived @@ -117,6 +131,12 @@ pub struct GuestThread { /// Axis 3 instruction budget. Decremented per retired step on this /// thread; on zero, slot rotates within same-priority tier. pub quantum_remaining: u32, + /// Anti-starvation counter. Incremented each slot visit this thread is + /// Ready but NOT picked; reset to 0 when picked. When it reaches + /// `STARVE_LIMIT`, `pick_runnable` grants this thread one boosted pick so + /// a monopolizing higher-priority peer on the same slot cannot starve it + /// indefinitely. Deterministic: a pure function of pick history. + pub steps_starved: u32, /// SpawnParams.entry — the BL target the trampoline jumped to. /// Persisted so kernel exports can filter syscalls by spawning /// chain (e.g. the silph UI auto-signal POC). 0 for the initial @@ -144,6 +164,7 @@ impl GuestThread { affinity_mask: 0xFF, ideal_processor: None, quantum_remaining: QUANTUM_DEFAULT, + steps_starved: 0, start_entry: 0, start_context: 0, } @@ -218,15 +239,35 @@ impl Default for HwSlot { impl HwSlot { /// Index of the highest-priority Ready/ServicingIrq thread in this /// slot's runqueue. Tiebreak: prefer lower index (deterministic). + /// + /// Selection is by *effective* priority: a Ready thread that has been + /// passed over for `STARVE_LIMIT` consecutive visits is boosted so it + /// wins exactly one pick, then [`Scheduler::begin_slot_visit`] resets its + /// counter. This restores the guest-visible invariant that every Ready + /// thread makes forward progress, without inverting the intended priority + /// order (a starved thread only beats its monopolizer once per + /// `STARVE_LIMIT` visits). The boost is a pure function of the per-thread + /// counters/priority/index, so picks stay deterministic. pub fn pick_runnable(&self) -> Option { self.runqueue .iter() .enumerate() .filter(|(_, t)| matches!(t.state, HwState::Ready | HwState::ServicingIrq(_))) - .max_by_key(|(i, t)| (t.priority, -(*i as i64))) + .max_by_key(|(i, t)| (Self::effective_priority(t), -(*i as i64))) .map(|(i, _)| i) } + /// Priority used for selection. A thread starved for `STARVE_LIMIT` + /// visits is lifted to `i32::MAX` so it wins the next pick regardless of + /// peer priority; otherwise its nominal priority is used unchanged. + fn effective_priority(t: &GuestThread) -> i32 { + if t.steps_starved >= STARVE_LIMIT { + i32::MAX + } else { + t.priority + } + } + /// How many non-Exited threads currently live on this slot (used by /// placement policies). pub fn live_depth(&self) -> usize { @@ -790,10 +831,22 @@ impl Scheduler { /// stashes `self.current` so exports can reach it. pub fn begin_slot_visit(&mut self, hw_id: u8) { let slot = &mut self.slots[hw_id as usize]; - slot.running_idx = slot.pick_runnable(); - self.current = slot - .running_idx - .map(|idx| ThreadRef::new(hw_id, idx as u16)); + let picked = slot.pick_runnable(); + slot.running_idx = picked; + // Anti-starvation bookkeeping: reset the picked thread's counter, + // increment every other Ready peer that was passed over this visit. + // Once a passed-over thread reaches STARVE_LIMIT it wins the next + // pick_runnable (effective_priority -> i32::MAX), then lands here as + // `picked` and resets — bounding any thread's starvation. Pure + // function of pick history, so it stays deterministic. + for (i, t) in slot.runqueue.iter_mut().enumerate() { + if Some(i) == picked { + t.steps_starved = 0; + } else if matches!(t.state, HwState::Ready | HwState::ServicingIrq(_)) { + t.steps_starved = t.steps_starved.saturating_add(1); + } + } + self.current = picked.map(|idx| ThreadRef::new(hw_id, idx as u16)); } /// Clear `current` at the end of each per-slot visit. @@ -1962,6 +2015,53 @@ mod tests { assert_eq!(t.quantum_remaining, QUANTUM_DEFAULT, "quantum reloaded"); } + #[test] + fn test_anti_starvation_bounded_progress() { + // Reproduces the Sylpheed render-gate deadlock: a high-priority + // CPU-bound spinner (the pri-15 poll loop) co-located on one slot + // with a pri-0 worker (the submitter) the spinner is waiting on. + // Strict priority would starve the worker forever; the anti-starve + // floor must hand it a pick within STARVE_LIMIT+1 visits, then the + // spinner reclaims the slot (priority is NOT inverted). + let mut s = mk_empty_scheduler(); + let mut spinner = SpawnParams::default(); + spinner.guest_tid = 1; + spinner.thread_handle = 0x1000; + spinner.affinity_mask = 0b0001; + spinner.pcr_base = 0x4000_0000; + spinner.priority = 15; + s.spawn(spinner, &mut NullPcr).unwrap(); + let mut worker = SpawnParams::default(); + worker.guest_tid = 2; + worker.thread_handle = 0x1004; + worker.affinity_mask = 0b0001; + worker.pcr_base = 0x4000_1000; + worker.priority = 0; + s.spawn(worker, &mut NullPcr).unwrap(); + + let mut worker_picks = 0u32; + let mut spinner_picks = 0u32; + // Both stay Ready (the spinner never blocks — that's the bug shape). + for _ in 0..(STARVE_LIMIT + 2) { + s.begin_slot_visit(0); + match s.thread(s.current.unwrap()).tid { + 1 => spinner_picks += 1, + 2 => worker_picks += 1, + other => panic!("unexpected tid {other}"), + } + s.end_slot_visit(); + } + assert_eq!( + worker_picks, 1, + "starved worker gets exactly one bounded pick within STARVE_LIMIT+2 visits" + ); + assert_eq!( + spinner_picks, + STARVE_LIMIT + 1, + "high-priority spinner still dominates — priority is not inverted" + ); + } + #[test] fn test_cooperative_yield_does_not_need_quantum() { let mut s = mk_empty_scheduler();