ITERATE-2.V: scheduler priority aging closes 18-day AUDIT-049 wedge
Priority aging in xenia-cpu/scheduler.rs:pick_runnable
(effective_priority = base + age_bonus(now_round - last_run_round),
capped at +31, AGING_ROUNDS_PER_BONUS=1). Strict-priority was parking
priority=0 threads behind CPU-bound priority=15 audio mixer
(sub_824D1328 guest spinwait at PC=0x824d1404 on CPU5). Aging
eventually picks the starved thread, breaking the producer-consumer
cycle that caused 5-tid wedge at PC=0x824ac578 since AUDIT-049 (10 May).
Cascade observed: tid=13 clean exit; events 121K -> 13M (107x); last
host_ns 767ms -> 51,011ms (66x); 8 new threads spawn; VdSwap 1 -> 2.
Complete two-day iterate sequence (2026-05-27 -> 2026-05-28):
- 2.F: VdSwap drain timeout 900ms -> 1ms (xenia-gpu/handle.rs); 876x
perf win on VdSwap kernel callback
- 2.H: vA0000000 physical heap bucket added (state.rs, exports.rs);
ctx_ptrs now in 0xA0000000-0xBFFFFFFF range matching canary
- 2.L: Phase-A diff harness categorized [return_value mismatch],
[status mismatch], [args_resolved.path mismatch] tags
(tools/diff-events/diff_events.py); closes reading-error #41
(silent test-harness state leak invalidating trace diffs)
- 2.M: always-on exit-thread-state.json sibling to Phase-A JSONL
(event_log.rs + xenia-app/main.rs); closes reading-error #42
(Phase-A blind to blocked-forever waits)
- 2.Q: signal.match kernel instrumentation in NtSetEvent /
NtReleaseSemaphore / KeSetEvent / KeReleaseSemaphore
(exports.rs); emits target_handle + waiter_count + waiter_tids
- 2.T: wake.requested kernel instrumentation in wake_eligible_waiters
(exports.rs); emits target_tid + transition + new_state
- 2.V: scheduler priority aging (xenia-cpu/scheduler.rs) [keystone]
Plus accumulated WIP from earlier May (contention_manifest,
phase_b_snapshot, xam/xaudio enhancements, analysis db, xex loader,
xenia-app main loop, etc.). Audit-runs/ artifacts remain untracked
per project convention.
Tests: 300 xenia-cpu / 227 xenia-kernel / 5 xenia-app / 19 xenia-path
/ 30+ smaller suites -- all PASS, 0 regressions. Determinism preserved
(2x cold runs bit-identical at 13,003,881 events post-2.V).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -117,17 +117,27 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::addis => {
|
||||
// Xbox 360 user mode is 32-bit ABI (MSR.SF=0), so addis must
|
||||
// produce a value whose upper 32 bits don't pollute downstream
|
||||
// 64-bit arithmetic. The PPC ISA in 64-bit mode sign-extends
|
||||
// simm16 before the shift, producing 0xFFFFFFFF_xxxx0000 for
|
||||
// negative simm16 (high bit set). When this value flows into
|
||||
// a 64-bit subfc against a zero-extended lwz value, the unsigned
|
||||
// 64-bit comparison yields wrong CA. Truncate to 32 bits to
|
||||
// simulate 32-bit ABI behavior.
|
||||
let ra_val = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] };
|
||||
let result = ra_val.wrapping_add((instr.simm16() as i64 as u64) << 16);
|
||||
ctx.gpr[instr.rd()] = result as u32 as u64;
|
||||
// Phase C+23: `addis` (and the `lis` simplified mnemonic) must
|
||||
// sign-extend the shifted immediate to the full 64 bits before
|
||||
// storing into the GPR, matching canary's HIR emitter
|
||||
// (`InstrEmit_addis` in `ppc_emit_alu.cc`: `EXTS16(SI) << 16`
|
||||
// as a 64-bit constant). Game code commonly builds a negative
|
||||
// 32-bit value via `lis rN, 0xFFFB; ori rN, rN, 0x6C20`
|
||||
// (yielding the i32 -300,000 for a 30ms `KeWait` timeout) and
|
||||
// then stores it as a 64-bit doubleword via `std`. Without
|
||||
// sign extension the high half on the wire was 0x00000000,
|
||||
// turning the timeout into a positive ~4.3-billion-tick
|
||||
// absolute deadline (~7 minutes) instead of a 30ms relative
|
||||
// wait — surfacing as `wait.begin.timeout_ns=429466729600`
|
||||
// on canary tid=12 → ours tid=7 idx=3 sister chain
|
||||
// (cold-vs-cold C+22 baseline). Defensive 32-bit truncation
|
||||
// for the arithmetic chain consumers (`subfcx`/`addex`/etc.)
|
||||
// is already implemented at each consumer site (see PPCBUG-002/
|
||||
// 007/etc.), so widening `addis` here does NOT regress them.
|
||||
let ra_val = if instr.ra() == 0 { 0i64 } else { ctx.gpr[instr.ra()] as i64 };
|
||||
let shifted = (instr.simm16() as i64) << 16;
|
||||
let result = ra_val.wrapping_add(shifted);
|
||||
ctx.gpr[instr.rd()] = result as u64;
|
||||
ctx.pc += 4;
|
||||
}
|
||||
PpcOpcode::addic => {
|
||||
@@ -4934,6 +4944,92 @@ mod tests {
|
||||
assert_eq!(ctx.gpr[3], 0x10000);
|
||||
}
|
||||
|
||||
/// Phase C+23 regression: `addis rD, 0, neg_simm` (the `lis` form
|
||||
/// with a negative immediate) must sign-extend the result to the
|
||||
/// full 64 bits, matching canary's HIR emitter. Without this fix,
|
||||
/// game code that builds a 32-bit negative value via
|
||||
/// `lis r11, 0xFFFB; ori r11, r11, 0x6C20` and then stores the
|
||||
/// result as a 64-bit doubleword via `std` would put 0x00000000
|
||||
/// in the high half instead of the correct 0xFFFFFFFF, turning a
|
||||
/// 30 ms relative `KeWaitForSingleObject` timeout into a positive
|
||||
/// absolute deadline ~7 minutes away. Anchored by the cold-vs-cold
|
||||
/// sister chain canary tid=12 → ours tid=7 idx=3 divergence.
|
||||
#[test]
|
||||
fn addis_with_negative_simm_sign_extends_to_64_bits() {
|
||||
let mut ctx = PpcContext::new();
|
||||
let mut mem = TestMem::new();
|
||||
// addis r11, r0, 0xFFFB (lis r11, 0xFFFB)
|
||||
// op=15, rd=11, ra=0, simm=0xFFFB.
|
||||
let raw = (15u32 << 26) | (11u32 << 21) | (0u32 << 16) | 0xFFFBu32;
|
||||
write_instr(&mut mem, 0, raw);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
assert_eq!(
|
||||
ctx.gpr[11], 0xFFFFFFFF_FFFB0000u64,
|
||||
"addis with negative simm must sign-extend to 64 bits"
|
||||
);
|
||||
}
|
||||
|
||||
/// Phase C+23 regression: the full `lis + ori + std` sequence that
|
||||
/// builds the −300,000 timeout tick count used by Sylpheed for its
|
||||
/// 30 ms `KeWait` calls must produce 0xFFFFFFFFFFFB6C20 on the wire,
|
||||
/// not 0x00000000FFFB6C20. This is the proximate cause of the
|
||||
/// `wait.begin.timeout_ns = 429466729600` divergence on canary tid=12
|
||||
/// → ours tid=7 idx=3 in the cold-vs-cold C+22 baseline.
|
||||
#[test]
|
||||
fn lis_ori_std_negative_timeout_writes_sign_extended_doubleword() {
|
||||
let mut ctx = PpcContext::new();
|
||||
let mut mem = TestMem::new();
|
||||
// r1 = 0x100 (stack pointer surrogate). Storage slot at r1+8.
|
||||
ctx.gpr[1] = 0x100;
|
||||
// lis r11, 0xFFFB ; r11 = 0xFFFFFFFFFFFB0000
|
||||
let lis = (15u32 << 26) | (11u32 << 21) | (0u32 << 16) | 0xFFFBu32;
|
||||
// ori r11, r11, 0x6C20 ; r11 = 0xFFFFFFFFFFFB6C20
|
||||
// op=24 (ori): D-form encoding | rs(11) | ra(11) | uimm.
|
||||
let ori = (24u32 << 26) | (11u32 << 21) | (11u32 << 16) | 0x6C20u32;
|
||||
// std r11, 8(r1) ; mem[0x108..0x110] = 0xFFFFFFFFFFFB6C20
|
||||
// op=62, DS-form, ds_field=8>>2=2, xo=0.
|
||||
let std_op = (62u32 << 26) | (11u32 << 21) | (1u32 << 16) | (8u32 & 0xFFFCu32);
|
||||
write_instr(&mut mem, 0, lis);
|
||||
write_instr(&mut mem, 4, ori);
|
||||
write_instr(&mut mem, 8, std_op);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem); // lis
|
||||
assert_eq!(ctx.gpr[11], 0xFFFFFFFF_FFFB0000u64);
|
||||
step(&mut ctx, &mut mem); // ori
|
||||
assert_eq!(ctx.gpr[11], 0xFFFFFFFF_FFFB6C20u64);
|
||||
step(&mut ctx, &mut mem); // std
|
||||
let stored = mem.read_u64(0x108);
|
||||
assert_eq!(
|
||||
stored, 0xFFFFFFFF_FFFB6C20u64,
|
||||
"std must persist all 64 bits of the sign-extended GPR"
|
||||
);
|
||||
// Interpreting the stored doubleword as a 100ns NT TIMEOUT tick
|
||||
// count: it must round-trip to −300,000 (30 ms relative wait),
|
||||
// NOT to +4,294,667,296 (the C+22 broken value).
|
||||
assert_eq!(stored as i64, -300_000i64);
|
||||
assert_eq!((stored as i64).wrapping_mul(100), -30_000_000i64);
|
||||
}
|
||||
|
||||
/// Phase C+23 regression: ensure `addis` against a non-zero rA still
|
||||
/// performs the canonical Add with 64-bit semantics. Used by
|
||||
/// arithmetic chains that combine a sign-extended `lis` high half
|
||||
/// with a subsequent `addi` low half. Equivalent to canary's HIR
|
||||
/// `Add(LoadGPR(rA), const_i64(simm << 16))`.
|
||||
#[test]
|
||||
fn addis_with_nonzero_ra_adds_in_64_bit() {
|
||||
let mut ctx = PpcContext::new();
|
||||
let mut mem = TestMem::new();
|
||||
// r4 = 0x1234 already. addis r5, r4, 0xFFFE => r5 = r4 + (-2<<16)
|
||||
// = 0x1234 + 0xFFFFFFFFFFFE0000
|
||||
ctx.gpr[4] = 0x1234;
|
||||
let raw = (15u32 << 26) | (5u32 << 21) | (4u32 << 16) | 0xFFFEu32;
|
||||
write_instr(&mut mem, 0, raw);
|
||||
ctx.pc = 0;
|
||||
step(&mut ctx, &mut mem);
|
||||
assert_eq!(ctx.gpr[5], 0xFFFFFFFF_FFFE1234u64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lwz_stw() {
|
||||
let mut ctx = PpcContext::new();
|
||||
|
||||
@@ -42,6 +42,19 @@ pub const QUANTUM_DEFAULT: u32 = 50_000;
|
||||
/// gets one when the slot fills up.
|
||||
const PRUNE_DEPTH_THRESHOLD: usize = 4;
|
||||
|
||||
/// Scheduler rounds per +1 age-priority bonus. With one bonus point per
|
||||
/// round a thread sits Ready without being picked, a priority-0 thread
|
||||
/// reaches parity with a same-slot priority-N peer after N rounds and wins
|
||||
/// the tiebreak on round N+1. Iterate 2.V: closes the strict-priority
|
||||
/// starvation hole that left tid=6 (pri=0) on CPU5 indefinitely behind a
|
||||
/// CPU-bound tid=10 (pri=15). Counts in scheduler round_count, which is
|
||||
/// fully deterministic (no host_ns / wallclock dependency).
|
||||
const AGING_ROUNDS_PER_BONUS: u64 = 1;
|
||||
|
||||
/// Cap on the age-priority bonus. 31 easily overwhelms any realistic NT
|
||||
/// priority-class difference (max is ~31) without saturating i32 math.
|
||||
const MAX_AGE_BONUS: i32 = 31;
|
||||
|
||||
/// Stable identity for a guest thread across all scheduler tables.
|
||||
///
|
||||
/// The positional `idx` is only valid while the source slot's runqueue
|
||||
@@ -117,6 +130,14 @@ pub struct GuestThread {
|
||||
/// Axis 3 instruction budget. Decremented per retired step on this
|
||||
/// thread; on zero, slot rotates within same-priority tier.
|
||||
pub quantum_remaining: u32,
|
||||
/// Iterate 2.V: scheduler `round_count` at the last time this thread
|
||||
/// was picked to run on its slot (via `begin_slot_visit` or the
|
||||
/// `decrement_quantum` rotation hand-off). Used by `pick_runnable`
|
||||
/// to compute an age-priority bonus so a CPU-bound high-priority
|
||||
/// peer can't strict-priority-starve a same-slot Ready peer forever.
|
||||
/// Initialized to the scheduler's `round_count` at spawn so a fresh
|
||||
/// thread doesn't inherit a giant age bonus on its first pick.
|
||||
pub last_run_round: u64,
|
||||
}
|
||||
|
||||
impl GuestThread {
|
||||
@@ -136,6 +157,7 @@ impl GuestThread {
|
||||
affinity_mask: 0xFF,
|
||||
ideal_processor: None,
|
||||
quantum_remaining: QUANTUM_DEFAULT,
|
||||
last_run_round: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -206,14 +228,23 @@ impl Default for HwSlot {
|
||||
}
|
||||
|
||||
impl HwSlot {
|
||||
/// Index of the highest-priority Ready/ServicingIrq thread in this
|
||||
/// slot's runqueue. Tiebreak: prefer lower index (deterministic).
|
||||
pub fn pick_runnable(&self) -> Option<usize> {
|
||||
/// Index of the highest *effective* priority Ready/ServicingIrq
|
||||
/// thread in this slot's runqueue. Effective priority = base priority
|
||||
/// + age bonus, where age = scheduler rounds since the thread was
|
||||
/// last picked. The age bonus prevents strict-priority starvation:
|
||||
/// without it, a CPU-bound priority=15 peer pinned to the same slot
|
||||
/// would deterministically beat any Ready priority=0 peer forever
|
||||
/// (closes iterate 2.V's root-cause wedge — tid=10 vs tid=6 on CPU5).
|
||||
/// Tiebreak on equal effective priority: lower idx wins (deterministic).
|
||||
///
|
||||
/// `now_round` is the scheduler's current `round_count`; passing it in
|
||||
/// keeps this method side-effect-free and decouples it from `Scheduler`.
|
||||
pub fn pick_runnable(&self, now_round: u64) -> Option<usize> {
|
||||
self.runqueue
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, t)| matches!(t.state, HwState::Ready | HwState::ServicingIrq(_)))
|
||||
.max_by_key(|(i, t)| (t.priority, -(*i as i64)))
|
||||
.max_by_key(|(i, t)| (effective_priority(t, now_round), -(*i as i64)))
|
||||
.map(|(i, _)| i)
|
||||
}
|
||||
|
||||
@@ -228,10 +259,31 @@ impl HwSlot {
|
||||
|
||||
}
|
||||
|
||||
/// Compute the effective scheduling priority of `t` at scheduler round
|
||||
/// `now_round`. Adds a deterministic age bonus equal to
|
||||
/// `(now_round - t.last_run_round) / AGING_ROUNDS_PER_BONUS`, capped at
|
||||
/// `MAX_AGE_BONUS`. `saturating_sub` guards against the case where
|
||||
/// `last_run_round` was set in a future round (shouldn't happen, but
|
||||
/// keeps the math defensive). See module-level docs at
|
||||
/// `AGING_ROUNDS_PER_BONUS` for rationale.
|
||||
#[inline]
|
||||
fn effective_priority(t: &GuestThread, now_round: u64) -> i32 {
|
||||
let age = now_round.saturating_sub(t.last_run_round);
|
||||
let bonus_u64 = age / AGING_ROUNDS_PER_BONUS;
|
||||
let bonus = bonus_u64.min(MAX_AGE_BONUS as u64) as i32;
|
||||
t.priority.saturating_add(bonus)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum OrderMode {
|
||||
Fixed,
|
||||
Seeded { seed: u64 },
|
||||
/// Stage 0 quantum-preemption spike. Replaces `QUANTUM_DEFAULT` at every
|
||||
/// quantum-reload site with `ticks`, so the scheduler rotates between
|
||||
/// same-priority peers more (or less) frequently. Used to probe whether
|
||||
/// scheduling shape alone unblocks the 104,607 RtlEnterCS cap before
|
||||
/// committing to the contention-replay manifest stages.
|
||||
ScanQuantum { ticks: u32 },
|
||||
}
|
||||
|
||||
impl OrderMode {
|
||||
@@ -244,6 +296,14 @@ impl OrderMode {
|
||||
.unwrap_or(0xC0FFEE_C0FFEE);
|
||||
OrderMode::Seeded { seed }
|
||||
}
|
||||
Some("quantum") | Some("Quantum") | Some("QUANTUM") => {
|
||||
let ticks = std::env::var("XENIA_SCHED_QUANTUM")
|
||||
.ok()
|
||||
.and_then(|s| s.parse::<u32>().ok())
|
||||
.filter(|&t| t > 0)
|
||||
.unwrap_or(1000);
|
||||
OrderMode::ScanQuantum { ticks }
|
||||
}
|
||||
_ => OrderMode::Fixed,
|
||||
}
|
||||
}
|
||||
@@ -369,7 +429,7 @@ impl Scheduler {
|
||||
pub fn new() -> Self {
|
||||
let order = OrderMode::from_env();
|
||||
let rng_state = match order {
|
||||
OrderMode::Fixed => 0,
|
||||
OrderMode::Fixed | OrderMode::ScanQuantum { .. } => 0,
|
||||
OrderMode::Seeded { seed } => seed.max(1),
|
||||
};
|
||||
Scheduler {
|
||||
@@ -379,7 +439,15 @@ impl Scheduler {
|
||||
order,
|
||||
rng_state,
|
||||
timed_waits: Vec::new(),
|
||||
tls_slot_count: 0,
|
||||
// Match canary's `kDefaultTlsSlotCount = 1024` (xthread.cc:335).
|
||||
// Per-thread `tls_values` vec is sized to this count when spawned
|
||||
// (see [`Self::install_main_thread`] / [`Self::spawn`]). Cost is
|
||||
// 4 KiB per guest thread; 24 KiB across the 6 HW slots. Without
|
||||
// this, `tls_values` starts empty and any `lwz rN, off(rTLS)`
|
||||
// before the first `ExAllocateTls` reads guest memory zeros
|
||||
// (matches canary observably) while host-side `tls_values[idx]`
|
||||
// accesses panic on out-of-range until the lazy grow kicks in.
|
||||
tls_slot_count: 1024,
|
||||
non_empty_runnable: 0,
|
||||
rotation_cursor: 0,
|
||||
reservation_table: None,
|
||||
@@ -614,6 +682,13 @@ impl Scheduler {
|
||||
t.priority = params.priority;
|
||||
t.affinity_mask = mask;
|
||||
t.ideal_processor = params.ideal_processor;
|
||||
// Stage 0 — honor ScanQuantum reload on the freshly-spawned thread;
|
||||
// `default_fields` set QUANTUM_DEFAULT before the scheduler was reachable.
|
||||
t.quantum_remaining = Self::quantum_for(self.order);
|
||||
// Iterate 2.V — pin the age-bonus baseline so a freshly-spawned
|
||||
// thread doesn't inherit a large age bonus from the scheduler's
|
||||
// accumulated round_count.
|
||||
t.last_run_round = self.round_count;
|
||||
// M3.7 — populate the inter-thread reservation handle + slot id
|
||||
// so the interpreter can route lwarx/stwcx through the table.
|
||||
t.ctx.hw_id = slot_id;
|
||||
@@ -663,6 +738,11 @@ impl Scheduler {
|
||||
t.pcr_base = pcr_base;
|
||||
t.tls_base = tls_base;
|
||||
t.tls_values = vec![0; self.tls_slot_count];
|
||||
// Stage 0 — same ScanQuantum override as `spawn`; default_fields
|
||||
// doesn't know about the scheduler's order.
|
||||
t.quantum_remaining = Self::quantum_for(self.order);
|
||||
// Iterate 2.V — same baseline pin as `spawn`.
|
||||
t.last_run_round = self.round_count;
|
||||
self.slots[0].runqueue.push(t);
|
||||
mem.write_pcr_id(pcr_base, 0);
|
||||
self.recompute_slot_runnable(0);
|
||||
@@ -742,9 +822,17 @@ impl Scheduler {
|
||||
/// Called by the step loop at the top of each per-slot visit. Picks the
|
||||
/// highest-priority Ready thread on the slot, sets `running_idx`, and
|
||||
/// stashes `self.current` so exports can reach it.
|
||||
///
|
||||
/// Iterate 2.V: passes the scheduler's `round_count` to `pick_runnable`
|
||||
/// for age-priority computation, then stamps the winner's
|
||||
/// `last_run_round` so the next round's age math starts from now.
|
||||
pub fn begin_slot_visit(&mut self, hw_id: u8) {
|
||||
let now_round = self.round_count;
|
||||
let slot = &mut self.slots[hw_id as usize];
|
||||
slot.running_idx = slot.pick_runnable();
|
||||
slot.running_idx = slot.pick_runnable(now_round);
|
||||
if let Some(idx) = slot.running_idx {
|
||||
slot.runqueue[idx].last_run_round = now_round;
|
||||
}
|
||||
self.current = slot
|
||||
.running_idx
|
||||
.map(|idx| ThreadRef::new(hw_id, idx as u16));
|
||||
@@ -765,6 +853,18 @@ impl Scheduler {
|
||||
///
|
||||
/// Returns `true` if a rotation occurred (purely informational;
|
||||
/// callers don't need to act on it).
|
||||
/// Quantum reload value to use given the current `OrderMode`. Returns
|
||||
/// `QUANTUM_DEFAULT` for `Fixed`/`Seeded` so existing baselines stay
|
||||
/// byte-identical; returns `ticks.max(1)` for `ScanQuantum` so the Stage
|
||||
/// 0 spike can sweep faster rotations.
|
||||
#[inline]
|
||||
fn quantum_for(order: OrderMode) -> u32 {
|
||||
match order {
|
||||
OrderMode::ScanQuantum { ticks } => ticks.max(1),
|
||||
_ => QUANTUM_DEFAULT,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decrement_quantum(&mut self) -> bool {
|
||||
let Some(r) = self.current else { return false; };
|
||||
let slot = &mut self.slots[r.hw_id as usize];
|
||||
@@ -778,7 +878,7 @@ impl Scheduler {
|
||||
return false;
|
||||
}
|
||||
let my_pri = t.priority;
|
||||
t.quantum_remaining = QUANTUM_DEFAULT;
|
||||
t.quantum_remaining = Self::quantum_for(self.order);
|
||||
// Scan the rest of the runqueue for a same-priority Ready peer.
|
||||
// Priority-higher peers are already going to win the next
|
||||
// `pick_runnable` on this slot, so we only need to find an *equal*
|
||||
@@ -795,6 +895,9 @@ impl Scheduler {
|
||||
}
|
||||
let cand = &slot.runqueue[i];
|
||||
if cand.priority == my_pri && matches!(cand.state, HwState::Ready) {
|
||||
// Iterate 2.V — pin age-bonus baseline on the freshly
|
||||
// promoted thread so the next round sees age 0 for it.
|
||||
slot.runqueue[i].last_run_round = self.round_count;
|
||||
slot.running_idx = Some(i);
|
||||
self.current = Some(ThreadRef::new(r.hw_id, i as u16));
|
||||
return true;
|
||||
@@ -846,7 +949,7 @@ impl Scheduler {
|
||||
_ => return,
|
||||
}
|
||||
t.state = HwState::Ready;
|
||||
t.quantum_remaining = QUANTUM_DEFAULT;
|
||||
t.quantum_remaining = Self::quantum_for(self.order);
|
||||
self.timed_waits.retain(|&(_, tr)| tr != r);
|
||||
self.recompute_slot_runnable(r.hw_id);
|
||||
}
|
||||
@@ -868,7 +971,7 @@ impl Scheduler {
|
||||
}
|
||||
if t.suspend_count == 0 && matches!(t.state, HwState::Blocked(BlockReason::Suspended)) {
|
||||
t.state = HwState::Ready;
|
||||
t.quantum_remaining = QUANTUM_DEFAULT;
|
||||
t.quantum_remaining = Self::quantum_for(self.order);
|
||||
}
|
||||
self.recompute_slot_runnable(r.hw_id);
|
||||
prev
|
||||
@@ -1121,7 +1224,7 @@ impl Scheduler {
|
||||
BlockReason::Suspended
|
||||
}
|
||||
};
|
||||
t.quantum_remaining = QUANTUM_DEFAULT;
|
||||
t.quantum_remaining = Self::quantum_for(self.order);
|
||||
self.recompute_slot_runnable(r.hw_id);
|
||||
tracing::info!(
|
||||
"scheduler: advanced to deadline {} waking hw={} idx={}",
|
||||
@@ -1182,6 +1285,7 @@ impl Scheduler {
|
||||
/// `ctx_mut_ref(r).gpr[3]`. Returns the refs that were woken.
|
||||
pub fn unblock_on_deadlock(&mut self) -> Vec<ThreadRef> {
|
||||
let mut woken = Vec::new();
|
||||
let quantum = Self::quantum_for(self.order);
|
||||
for (hw_id, slot) in self.slots.iter_mut().enumerate() {
|
||||
for (idx, t) in slot.runqueue.iter_mut().enumerate() {
|
||||
if matches!(
|
||||
@@ -1191,7 +1295,7 @@ impl Scheduler {
|
||||
| HwState::Blocked(BlockReason::CriticalSection(_))
|
||||
) {
|
||||
t.state = HwState::Ready;
|
||||
t.quantum_remaining = QUANTUM_DEFAULT;
|
||||
t.quantum_remaining = quantum;
|
||||
woken.push(ThreadRef::new(hw_id as u8, idx as u16));
|
||||
}
|
||||
}
|
||||
@@ -1916,4 +2020,146 @@ mod tests {
|
||||
assert_eq!(s.thread(r).state, HwState::Ready);
|
||||
assert_eq!(s.thread(r).quantum_remaining, QUANTUM_DEFAULT);
|
||||
}
|
||||
|
||||
// ---- Stage 0: OrderMode::ScanQuantum --------------------------------
|
||||
|
||||
#[test]
|
||||
fn quantum_for_fixed_returns_default() {
|
||||
assert_eq!(Scheduler::quantum_for(OrderMode::Fixed), QUANTUM_DEFAULT);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quantum_for_seeded_returns_default() {
|
||||
assert_eq!(
|
||||
Scheduler::quantum_for(OrderMode::Seeded { seed: 0xC0FFEE }),
|
||||
QUANTUM_DEFAULT
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quantum_for_scan_quantum_returns_ticks() {
|
||||
assert_eq!(
|
||||
Scheduler::quantum_for(OrderMode::ScanQuantum { ticks: 1000 }),
|
||||
1000
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quantum_for_scan_quantum_floor_is_one() {
|
||||
// ticks=0 would deadlock the rotation; quantum_for clamps to >=1.
|
||||
assert_eq!(
|
||||
Scheduler::quantum_for(OrderMode::ScanQuantum { ticks: 0 }),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
fn mk_scheduler_with_order(order: OrderMode) -> Scheduler {
|
||||
let mut s = Scheduler::new();
|
||||
s.order = order;
|
||||
s
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spawn_under_scan_quantum_uses_ticks() {
|
||||
let mut s = mk_scheduler_with_order(OrderMode::ScanQuantum { ticks: 7 });
|
||||
s.spawn(worker_spawn_params(1, 0x1000), &mut NullPcr).unwrap();
|
||||
let r = ThreadRef { hw_id: 0, idx: 0, generation: 0 };
|
||||
assert_eq!(s.thread(r).quantum_remaining, 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn install_initial_under_scan_quantum_uses_ticks() {
|
||||
let mut s = mk_scheduler_with_order(OrderMode::ScanQuantum { ticks: 42 });
|
||||
let mut ctx = PpcContext::new();
|
||||
ctx.pc = 0x8200_0000;
|
||||
s.install_initial_thread(
|
||||
ctx,
|
||||
0x7000_0000,
|
||||
0x10_0000,
|
||||
0x7FFF_0000,
|
||||
0x7FFE_0000,
|
||||
0x1000,
|
||||
&mut NullPcr,
|
||||
);
|
||||
let r = ThreadRef { hw_id: 0, idx: 0, generation: 0 };
|
||||
assert_eq!(s.thread(r).quantum_remaining, 42);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wake_ref_under_scan_quantum_reloads_ticks_not_default() {
|
||||
let mut s = mk_scheduler_with_order(OrderMode::ScanQuantum { ticks: 13 });
|
||||
let mut p = SpawnParams::default();
|
||||
p.guest_tid = 2;
|
||||
p.thread_handle = 0x2000;
|
||||
p.affinity_mask = 0b0010;
|
||||
p.pcr_base = 0x4000_1000;
|
||||
s.spawn(p, &mut NullPcr).unwrap();
|
||||
let r = ThreadRef { hw_id: 1, idx: 0, generation: 0 };
|
||||
s.thread_mut(r).state = HwState::Blocked(BlockReason::WaitAny {
|
||||
handles: vec![0xDEAD],
|
||||
deadline: None,
|
||||
});
|
||||
s.thread_mut(r).quantum_remaining = 1;
|
||||
s.wake_ref(r);
|
||||
assert_eq!(s.thread(r).quantum_remaining, 13);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decrement_quantum_under_scan_quantum_rotates_after_ticks() {
|
||||
let mut s = mk_scheduler_with_order(OrderMode::ScanQuantum { ticks: 4 });
|
||||
for tid in [1u32, 2] {
|
||||
let mut p = SpawnParams::default();
|
||||
p.guest_tid = tid;
|
||||
p.thread_handle = 0x1000 + tid * 4;
|
||||
p.affinity_mask = 0b0001;
|
||||
p.pcr_base = 0x4000_0000 + tid * 0x1000;
|
||||
s.spawn(p, &mut NullPcr).unwrap();
|
||||
}
|
||||
s.begin_slot_visit(0);
|
||||
let first_tid = s.thread(s.current.unwrap()).tid;
|
||||
// ticks=4: three decrements stay on first, the fourth rotates.
|
||||
for _ in 0..3 {
|
||||
assert!(!s.decrement_quantum());
|
||||
}
|
||||
assert!(s.decrement_quantum(), "fourth tick should rotate");
|
||||
let second_tid = s.thread(s.current.unwrap()).tid;
|
||||
assert_ne!(first_tid, second_tid);
|
||||
// And the freshly-current thread also gets ticks=4, not DEFAULT.
|
||||
assert_eq!(s.thread(s.current.unwrap()).quantum_remaining, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn order_from_env_parses_quantum_arm() {
|
||||
// SAFETY: tests in this module run serially within a single process;
|
||||
// set_var/remove_var here matches the existing rng/seeded test idiom
|
||||
// elsewhere in the crate. If we ever shard tests across threads, gate
|
||||
// this group behind a Mutex.
|
||||
let prev_order = std::env::var("XENIA_SCHED_ORDER").ok();
|
||||
let prev_q = std::env::var("XENIA_SCHED_QUANTUM").ok();
|
||||
unsafe {
|
||||
std::env::set_var("XENIA_SCHED_ORDER", "quantum");
|
||||
std::env::set_var("XENIA_SCHED_QUANTUM", "250");
|
||||
}
|
||||
match OrderMode::from_env() {
|
||||
OrderMode::ScanQuantum { ticks } => assert_eq!(ticks, 250),
|
||||
other => panic!("expected ScanQuantum, got {:?}", other),
|
||||
}
|
||||
// ticks=0 falls back to the 1000 default (filter(>0)).
|
||||
unsafe { std::env::set_var("XENIA_SCHED_QUANTUM", "0") };
|
||||
match OrderMode::from_env() {
|
||||
OrderMode::ScanQuantum { ticks } => assert_eq!(ticks, 1000),
|
||||
other => panic!("expected ScanQuantum, got {:?}", other),
|
||||
}
|
||||
// Restore env so siblings don't see leftover state.
|
||||
unsafe {
|
||||
match prev_order {
|
||||
Some(v) => std::env::set_var("XENIA_SCHED_ORDER", v),
|
||||
None => std::env::remove_var("XENIA_SCHED_ORDER"),
|
||||
}
|
||||
match prev_q {
|
||||
Some(v) => std::env::set_var("XENIA_SCHED_QUANTUM", v),
|
||||
None => std::env::remove_var("XENIA_SCHED_QUANTUM"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user