From 93f60a3ba030878e38caaa3f854bfb6f1c0a149c Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sat, 13 Jun 2026 18:08:46 +0200 Subject: [PATCH] [iterate-2M] PCR+0x10C (PRCB.current_cpu): init per-HW-thread to unwedge spin-barrier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ours never initialized the PRCB `current_cpu` byte at PCR+0x10C (prcb_data@0x100 + current_cpu@0xC). Canary sets it from `GetFakeCpuNumber(affinity)` (xthread.cc:847 `pcr->prcb_data.current_cpu = cpu_index`), which equals the HW thread id ours already writes at PCR+0x2C. Left unwritten it read 0 for every thread. Guest spin-barrier `sub_824D1328` (used by the audio/update pump threads at entries 0x824D2878 / 0x824D2940, ours tid 9 / tid 10) indexes a per-HW-thread occupancy byte array via `lbz r11, 268(r13)` then `stbx ..., [array+index]`. With index 0 for all threads, every thread marked slot 0; the multi-byte rendezvous signature it then spins on (`ld [obj+0x164]` compared against the packed per-slot expectation) could never assemble. Both pump threads busied at pc 0x824d140c/0x824d1410 forever (Ready, 5M+ barrier iterations) and never ran their `KeSetEvent` loops — so the events they signal (the 21k-per-thread heartbeat in canary) never fired, starving the downstream worker handshake. Fix: write `hw_id` to PCR+0x10C alongside PCR+0x2C in both the static thread image init (thread.rs) and the dynamic PcrWriter (state.rs, used by scheduler spawn + affinity migration) so the two stay in sync. Runtime-verified BOTH engines. Post-fix the pump threads escape the barrier (barrier iterations 5M+ -> 3) and advance into their loop bodies, now correctly Blocked(WaitAny) at pc 0x824d28d0 / 0x824d29c0 (was spinning at 0x824d140c). imports at n50M 339,766 -> 451,508; deterministic (two cold runs byte-identical). draws still 0 (a later, separate render gate). golden re-baselined. cargo test --workspace: 672 passed, 0 failed. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/xenia-app/tests/golden/sylpheed_n50m.json | 4 ++-- crates/xenia-kernel/src/state.rs | 10 ++++++++++ crates/xenia-kernel/src/thread.rs | 5 +++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/crates/xenia-app/tests/golden/sylpheed_n50m.json b/crates/xenia-app/tests/golden/sylpheed_n50m.json index 066a518..552dc43 100644 --- a/crates/xenia-app/tests/golden/sylpheed_n50m.json +++ b/crates/xenia-app/tests/golden/sylpheed_n50m.json @@ -1,6 +1,6 @@ { - "instructions": 50000000, - "imports": 339766, + "instructions": 50000003, + "imports": 451508, "unimpl": 0, "draws": 0, "swaps": 2, diff --git a/crates/xenia-kernel/src/state.rs b/crates/xenia-kernel/src/state.rs index 0d6c8cb..0f427ae 100644 --- a/crates/xenia-kernel/src/state.rs +++ b/crates/xenia-kernel/src/state.rs @@ -17,6 +17,16 @@ impl PcrWriter for GuestMemoryPcr<'_> { // `GuestMemory::write_u32` takes `&self` post-M2 trait flip; the // wrapping `&'a GuestMemory` is sufficient. self.0.write_u32(pcr_base + 0x2C, hw_id as u32); + // PRCB.current_cpu byte at PCR+0x10C (prcb_data@0x100 + current_cpu@0xC). + // Canary writes `GetFakeCpuNumber(affinity)` here (xthread.cc:847 + // `pcr->prcb_data.current_cpu = cpu_index`), which equals the HW thread + // id we already compute. Guest spin-barriers (e.g. sub_824D1328, used by + // the audio/update pump threads at entries 0x824D2878/0x824D2940) index a + // per-HW-thread occupancy array by `lbz r11, 268(r13)` = this byte. Left + // unwritten it stayed 0 for every thread, so all threads collided on + // slot 0 and the multi-thread rendezvous signature never assembled — + // the pump threads spun forever and never fired their KeSetEvent loops. + self.0.write_u8(pcr_base + 0x10C, hw_id); } } diff --git a/crates/xenia-kernel/src/thread.rs b/crates/xenia-kernel/src/thread.rs index 9f4bc53..96cc09b 100644 --- a/crates/xenia-kernel/src/thread.rs +++ b/crates/xenia-kernel/src/thread.rs @@ -57,6 +57,11 @@ pub fn allocate_thread_image( mem.write_u32(pcr_base, tls_base); mem.write_u32(pcr_base + 0x2C, hw_thread_id as u32); mem.write_u32(pcr_base + 0x100, 0x1000); + // +0x10C prcb_data.current_cpu — canary `pcr->prcb_data.current_cpu` + // (PRCB@0x100 + current_cpu@0xC). Guest spin-barriers index a + // per-HW-thread slot array by `lbz r11, 268(r13)` = this byte; it + // must equal the HW thread id (== PCR+0x2C). See state.rs PcrWriter. + mem.write_u8(pcr_base + 0x10C, hw_thread_id); mem.write_u32(pcr_base + 0x150, 0); Some(ThreadImage {