ITERATE-2.V: scheduler priority aging closes 18-day AUDIT-049 wedge
Priority aging in xenia-cpu/scheduler.rs:pick_runnable
(effective_priority = base + age_bonus(now_round - last_run_round),
capped at +31, AGING_ROUNDS_PER_BONUS=1). Strict-priority was parking
priority=0 threads behind CPU-bound priority=15 audio mixer
(sub_824D1328 guest spinwait at PC=0x824d1404 on CPU5). Aging
eventually picks the starved thread, breaking the producer-consumer
cycle that caused 5-tid wedge at PC=0x824ac578 since AUDIT-049 (10 May).
Cascade observed: tid=13 clean exit; events 121K -> 13M (107x); last
host_ns 767ms -> 51,011ms (66x); 8 new threads spawn; VdSwap 1 -> 2.
Complete two-day iterate sequence (2026-05-27 -> 2026-05-28):
- 2.F: VdSwap drain timeout 900ms -> 1ms (xenia-gpu/handle.rs); 876x
perf win on VdSwap kernel callback
- 2.H: vA0000000 physical heap bucket added (state.rs, exports.rs);
ctx_ptrs now in 0xA0000000-0xBFFFFFFF range matching canary
- 2.L: Phase-A diff harness categorized [return_value mismatch],
[status mismatch], [args_resolved.path mismatch] tags
(tools/diff-events/diff_events.py); closes reading-error #41
(silent test-harness state leak invalidating trace diffs)
- 2.M: always-on exit-thread-state.json sibling to Phase-A JSONL
(event_log.rs + xenia-app/main.rs); closes reading-error #42
(Phase-A blind to blocked-forever waits)
- 2.Q: signal.match kernel instrumentation in NtSetEvent /
NtReleaseSemaphore / KeSetEvent / KeReleaseSemaphore
(exports.rs); emits target_handle + waiter_count + waiter_tids
- 2.T: wake.requested kernel instrumentation in wake_eligible_waiters
(exports.rs); emits target_tid + transition + new_state
- 2.V: scheduler priority aging (xenia-cpu/scheduler.rs) [keystone]
Plus accumulated WIP from earlier May (contention_manifest,
phase_b_snapshot, xam/xaudio enhancements, analysis db, xex loader,
xenia-app main loop, etc.). Audit-runs/ artifacts remain untracked
per project convention.
Tests: 300 xenia-cpu / 227 xenia-kernel / 5 xenia-app / 19 xenia-path
/ 30+ smaller suites -- all PASS, 0 regressions. Determinism preserved
(2x cold runs bit-identical at 13,003,881 events post-2.V).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -339,6 +339,23 @@ pub struct GpuSystem {
|
||||
/// `GpuSystem::new` and lives for the whole GPU lifetime — no
|
||||
/// per-frame churn.
|
||||
pub edram: crate::edram::ShadowEdram,
|
||||
/// 256-entry `DC_LUT_30_COLOR` gamma ramp (10-bit BGR packed per entry).
|
||||
/// Mirrors canary's `gamma_ramp_256_entry_table_` array on
|
||||
/// `CommandProcessor` (`command_processor.cc:130-148`). Pre-loaded
|
||||
/// with the linear sRGB ramp at construction so any code path that
|
||||
/// queries gamma before the guest writes its own ramp sees the same
|
||||
/// initial values as canary. MMIO read/write index handling for
|
||||
/// `DC_LUT_RW_INDEX` is NOT yet wired in ours, so guests can't access
|
||||
/// these bytes today; the field exists for state parity and to give
|
||||
/// future MMIO handlers a populated buffer.
|
||||
pub gamma_ramp_256: Vec<u32>,
|
||||
/// 128-entry per-channel `DC_LUT_PWL_DATA` gamma ramp (base/delta pairs,
|
||||
/// stored interleaved RGB → 384 u32 entries). Layout matches
|
||||
/// `gamma_ramp_pwl_rgb_[i][j]` in canary (`command_processor.cc:141-148`):
|
||||
/// index = `i * 3 + j` where `i ∈ [0,128)` and `j ∈ {0,1,2}` for R/G/B.
|
||||
/// Same status as `gamma_ramp_256`: state-parity only until MMIO
|
||||
/// handlers are added.
|
||||
pub gamma_ramp_pwl: Vec<u32>,
|
||||
}
|
||||
|
||||
impl GpuSystem {
|
||||
@@ -365,9 +382,47 @@ impl GpuSystem {
|
||||
last_resolve: None,
|
||||
texture_cache: crate::texture_cache::TextureCache::new(),
|
||||
edram: crate::edram::ShadowEdram::new(),
|
||||
gamma_ramp_256: Self::default_gamma_ramp_256(),
|
||||
gamma_ramp_pwl: Self::default_gamma_ramp_pwl(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build canary's default 256-entry sRGB linear ramp. Per
|
||||
/// `command_processor.cc:134-140`: for each `i ∈ [0,256)`, the 10-bit
|
||||
/// per-channel value is `i * 0x3FF / 0xFF`; the BGR triple is packed
|
||||
/// into a single `DC_LUT_30_COLOR` u32. The packing here is BGR-low
|
||||
/// to match canary's `color_10_blue` / `green` / `red` field order
|
||||
/// (low bits = blue, high bits = red).
|
||||
fn default_gamma_ramp_256() -> Vec<u32> {
|
||||
let mut v = Vec::with_capacity(256);
|
||||
for i in 0..256u32 {
|
||||
let lane = (i * 0x3FF) / 0xFF;
|
||||
// DC_LUT_30_COLOR bit layout: blue[0..10] | green[10..20] | red[20..30].
|
||||
let entry = lane | (lane << 10) | (lane << 20);
|
||||
v.push(entry);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
/// Build canary's default 128-entry PWL ramp (interleaved RGB →
|
||||
/// 384 u32s). Per `command_processor.cc:141-148`: for each
|
||||
/// `i ∈ [0,128)`, `base = (i * 0xFFFF / 0x7F) & ~0x3F`, and
|
||||
/// `delta = 0x200` when `i < 0x7F` else `0`. Same value mirrored
|
||||
/// across R/G/B (j=0/1/2). Each `DC_LUT_PWL_DATA` is one u32
|
||||
/// (`base` in low 16, `delta` in high 16).
|
||||
fn default_gamma_ramp_pwl() -> Vec<u32> {
|
||||
let mut v = Vec::with_capacity(128 * 3);
|
||||
for i in 0..128u32 {
|
||||
let base = ((i * 0xFFFF) / 0x7F) & !0x3Fu32;
|
||||
let delta: u32 = if i < 0x7F { 0x200 } else { 0 };
|
||||
let entry = (base & 0xFFFF) | ((delta & 0xFFFF) << 16);
|
||||
for _ in 0..3 {
|
||||
v.push(entry);
|
||||
}
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
/// P8 — insert a shader blob + bump the FIFO so long-running games
|
||||
/// don't grow `shader_blobs` without bound. Caps at [`SHADER_BLOB_CAP`].
|
||||
/// Never evicts the currently-active VS/PS blobs (if they ended up at
|
||||
|
||||
@@ -390,7 +390,17 @@ impl GpuBackend {
|
||||
// fires; the safety-net fallback warning fired twice for
|
||||
// each Sylpheed run.
|
||||
let target = s.mmio.cp_rb_wptr.load(Ordering::Acquire);
|
||||
s.drain_until_wptr(mem, target, Duration::from_millis(900))
|
||||
// GPUBUG-DRAIN-001 (iterate-2F, 2026-05-27): cap the inline
|
||||
// drain at 1 ms so vd_swap does not block the main guest
|
||||
// thread for ~900 ms per swap. Canary's `VdSwap_entry`
|
||||
// returns in ~6.6 us — no synchronous drain. The 900 ms
|
||||
// deadline parked tid=1 long enough to starve the post-swap
|
||||
// worker fan-out at `sub_825070F0`, which in turn left
|
||||
// tid=13's wait predicate unsatisfiable (wedge at
|
||||
// PC=0x821CB1DC). Remaining packets stay queued in the
|
||||
// ring; the next drain (next vd_swap or kernel-callback
|
||||
// boundary) consumes them.
|
||||
s.drain_until_wptr(mem, target, Duration::from_millis(1))
|
||||
}
|
||||
GpuBackend::Threaded(h) => {
|
||||
let target_wptr = h.mmio.cp_rb_wptr.load(Ordering::Acquire);
|
||||
@@ -560,7 +570,12 @@ impl GpuWorker {
|
||||
// empty (rptr == wptr after modulo) or a packet
|
||||
// returns `Idle`/`Blocked`.
|
||||
self.system.sync_with_mmio();
|
||||
let deadline = Instant::now() + Duration::from_millis(900);
|
||||
// GPUBUG-DRAIN-001 (iterate-2F, 2026-05-27): cap at
|
||||
// 1 ms so the CPU's `recv_timeout(1s)` returns
|
||||
// promptly. Canary doesn't synchronously drain in
|
||||
// VdSwap; mirroring that frees tid=1 to spawn
|
||||
// post-swap workers in time.
|
||||
let deadline = Instant::now() + Duration::from_millis(1);
|
||||
while self.system.is_ready(&*memory) {
|
||||
if Instant::now() >= deadline {
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user