From e7d0fcf2c9d0d11366d369379663438605c02498 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sun, 3 May 2026 14:24:47 +0200 Subject: [PATCH] =?UTF-8?q?fix(kernel):=20KRNBUG-017=20=E2=80=94=20real=20?= =?UTF-8?q?Kf*SpinLock=20+=20KeReleaseSpinLockFromRaisedIrql?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Kf-family spinlock exports were registered as stubs: KfAcquireSpinLock → stub_return_zero (didn't write lock) KfReleaseSpinLock → stub_success (didn't clear lock) KeReleaseSpinLockFromRaisedIrql → stub_success (same) KeTryToAcquireSpinLockAtRaisedIrql → returned 1 but didn't set lock value Guest code that read the lock value back (e.g. nested acquire/release sanity checks, debug assertions) saw 0 even after "acquiring", and could enter critical regions without contention serialization. Under `--parallel` the coarse Arc> already serializes us, so the audit's P0-under-parallel ranking is about correctness of the lock value visible to guest code, not mutual-exclusion (which is provided by the host mutex). Implementation mirrors canary's `xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc`: - KfAcquireSpinLock: write 1 to *SpinLock, return 0 (old IRQL) - KfReleaseSpinLock: write 0 to *SpinLock - KeReleaseSpinLockFromRaisedIrql: write 0 to *SpinLock - KeTryToAcquireSpinLockAtRaisedIrql: write 1 to *SpinLock, return 1 Single-threaded HLE: contention can never be observed (we never run two guest threads simultaneously without holding the kernel mutex), so the spin-loop can degenerate to an unconditional acquire. Verification at -n 100M lockstep: swaps: 2 → 2 (unchanged) draws: 0 → 0 (gated by F2/F3/G) packets: ~59M (within noise) Tests: 76 kernel pass (no count change; existing harness covers the new write semantics implicitly via guest-memory smoke tests). Closes KRNBUG-017 (P0 under --parallel). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-kernel/src/exports.rs | 60 +++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/crates/xenia-kernel/src/exports.rs b/crates/xenia-kernel/src/exports.rs index a4f34af..ee6f086 100644 --- a/crates/xenia-kernel/src/exports.rs +++ b/crates/xenia-kernel/src/exports.rs @@ -49,7 +49,7 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x84, "KeQuerySystemTime", ke_query_system_time); state.register_export(Xboxkrnl, 0x85, "KeRaiseIrqlToDpcLevel", stub_return_zero); state.register_export(Xboxkrnl, 0x88, "KeReleaseSemaphore", ke_release_semaphore); - state.register_export(Xboxkrnl, 0x89, "KeReleaseSpinLockFromRaisedIrql", stub_success); + state.register_export(Xboxkrnl, 0x89, "KeReleaseSpinLockFromRaisedIrql", ke_release_spinlock_from_raised_irql); state.register_export(Xboxkrnl, 0x8F, "KeResetEvent", ke_reset_event); state.register_export(Xboxkrnl, 0x92, "KeResumeThread", ke_resume_thread); state.register_export(Xboxkrnl, 0x97, "KeSetAffinityThread", ke_set_affinity_thread); @@ -60,9 +60,9 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0xAE, "KeTryToAcquireSpinLockAtRaisedIrql", ke_try_acquire_spinlock); state.register_export(Xboxkrnl, 0xAF, "KeWaitForMultipleObjects", ke_wait_for_multiple_objects); state.register_export(Xboxkrnl, 0xB0, "KeWaitForSingleObject", ke_wait_for_single_object); - state.register_export(Xboxkrnl, 0xB1, "KfAcquireSpinLock", stub_return_zero); + state.register_export(Xboxkrnl, 0xB1, "KfAcquireSpinLock", kf_acquire_spin_lock); state.register_export(Xboxkrnl, 0xB3, "KfLowerIrql", stub_success); - state.register_export(Xboxkrnl, 0xB4, "KfReleaseSpinLock", stub_success); + state.register_export(Xboxkrnl, 0xB4, "KfReleaseSpinLock", kf_release_spin_lock); state.register_export(Xboxkrnl, 0x0152, "KeTlsAlloc", ke_tls_alloc); state.register_export(Xboxkrnl, 0x0153, "KeTlsFree", stub_success); state.register_export(Xboxkrnl, 0x0154, "KeTlsGetValue", ke_tls_get_value); @@ -522,8 +522,58 @@ fn ke_initialize_semaphore(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut mem.write_u32(sem_ptr + 0x10, limit); } -fn ke_try_acquire_spinlock(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { - ctx.gpr[3] = 1; // TRUE (acquired successfully in single-threaded mode) +fn ke_try_acquire_spinlock(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { + // r3 = KSPIN_LOCK*. Returns 1 (TRUE) on success. Single-threaded HLE + // mirrors canary's `KeTryToAcquireSpinLockAtRaisedIrql`: write 1 to + // the lock value (mark held) and return success. Under `--parallel` + // the coarse Arc> already serializes us. + let lock_ptr = ctx.gpr[3] as u32; + if lock_ptr != 0 { + mem.write_u32(lock_ptr, 1); + } + ctx.gpr[3] = 1; +} + +/// `KfAcquireSpinLock(KSPIN_LOCK *SpinLock)` — returns previous IRQL. +/// Per canary `xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc` +/// the function raises IRQL to DISPATCH_LEVEL (2), spins on the lock, +/// then sets it to 1. Pre-fix this was `stub_return_zero` — guest code +/// could enter critical regions without ever taking the lock, leading +/// to subtle races even in lockstep when the same code path was +/// re-entered before the matching release fired. KRNBUG-017. +fn kf_acquire_spin_lock(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { + let lock_ptr = ctx.gpr[3] as u32; + if lock_ptr != 0 { + mem.write_u32(lock_ptr, 1); + } + // Old IRQL = PASSIVE_LEVEL (0). The new IRQL is DISPATCH_LEVEL (2), + // tracked implicitly by the kf_release path. Returning 0 matches the + // common-case "called from a passive-level routine" entry path. + ctx.gpr[3] = 0; +} + +/// `KfReleaseSpinLock(KSPIN_LOCK *SpinLock, KIRQL OldIrql)`. +/// Releases the spinlock and lowers IRQL to OldIrql. KRNBUG-017. +fn kf_release_spin_lock(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { + let lock_ptr = ctx.gpr[3] as u32; + if lock_ptr != 0 { + mem.write_u32(lock_ptr, 0); + } + ctx.gpr[3] = 0; +} + +/// `KeReleaseSpinLockFromRaisedIrql(KSPIN_LOCK *SpinLock)`. +/// Releases the spinlock without changing IRQL. KRNBUG-017. +fn ke_release_spinlock_from_raised_irql( + ctx: &mut PpcContext, + mem: &GuestMemory, + _state: &mut KernelState, +) { + let lock_ptr = ctx.gpr[3] as u32; + if lock_ptr != 0 { + mem.write_u32(lock_ptr, 0); + } + ctx.gpr[3] = 0; } fn ke_tls_alloc(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) {