use std::collections::HashMap; use xenia_cpu::scheduler::{PcrWriter, Scheduler}; use xenia_cpu::{PpcContext, ThreadRef}; use xenia_memory::{GuestMemory, MemoryAccess}; use xenia_vfs::VfsDevice; use crate::audit::{HandleAudit, HandleAuditEntry}; use crate::objects::KernelObject; use crate::ui_bridge::UiBridge; /// Adapter: write PCR+0x2C on guest memory. Lets `Scheduler::spawn` and /// Axis 4's migration call through without `xenia-cpu` depending on the /// memory crate. pub struct GuestMemoryPcr<'a>(pub &'a GuestMemory); impl PcrWriter for GuestMemoryPcr<'_> { fn write_pcr_id(&mut self, pcr_base: u32, hw_id: u8) { // `GuestMemory::write_u32` takes `&self` post-M2 trait flip; the // wrapping `&'a GuestMemory` is sufficient. self.0.write_u32(pcr_base + 0x2C, hw_id as u32); } } /// Function signature for HLE kernel exports. /// /// The first argument is the **currently running** HW thread's `PpcContext`, /// which the caller has temporarily moved out of the scheduler slot to avoid /// aliasing. Exports that only touch register/GPR state use `ctx` directly; /// exports that need scheduler state (spawn/park/wake/tls/etc.) reach /// through `state.scheduler` — note that `state.scheduler.hw_threads[current]` /// holds a placeholder `PpcContext` for the duration of the call, not the /// live one passed as `ctx`. pub type KernelExportFn = fn(&mut PpcContext, &GuestMemory, &mut KernelState); /// Module identifier for kernel exports. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ModuleId { Xboxkrnl, Xam, Xbdm, } /// Pseudo-`HMODULE` values returned by `XexGetModuleHandle` and accepted by /// `XexGetProcedureAddress`. Distinct from real loaded-image bases /// (>=0x82000000) and from kernel handles (0x1000+, allocated by /// `alloc_handle`). The 0xFFFE_xxxx prefix is unused by both guest segments /// and our handle allocator. pub const HMODULE_XBOXKRNL: u32 = 0xFFFE_0001; pub const HMODULE_XAM: u32 = 0xFFFE_0002; /// Central kernel state tracking all guest OS state. pub struct KernelState { exports: HashMap<(ModuleId, u32), (&'static str, KernelExportFn)>, /// M2.4: bump allocator for kernel handles. `AtomicU32` so concurrent /// HLE calls under M3 can `fetch_add` without a lock. `Relaxed` is /// fine — the allocated value is a fresh ID with no prior payload to /// publish; observers (the kernel object table) are guarded by /// their own synchronization. next_handle: std::sync::atomic::AtomicU32, /// Scheduler managing all emulated HW threads + their per-slot /// runqueues. Starts empty — the app installs the initial guest thread /// on slot 0 via `KernelState::install_initial_thread` once it has the /// entry address. pub scheduler: Scheduler, /// TLS slot allocator — index counter only. Per-thread *values* live on /// `GuestThread::tls_values` (see scheduler). M2.4: `AtomicU32`. pub next_tls_index: std::sync::atomic::AtomicU32, /// Critical-section waiter map: guest `cs_ptr` → guest threads parked /// on it. Critical sections are in guest memory (not kernel objects), /// so their waiter list lives here rather than on an object. pub cs_waiters: HashMap>, /// Kernel object table: handle → object pub objects: HashMap, /// Bump allocator for guest heap (NtAllocateVirtualMemory etc.). /// M2.4: `AtomicU32` for lock-free concurrent allocation. pub heap_cursor: std::sync::atomic::AtomicU32, /// Stack allocator cursor for MmCreateKernelStack. M2.4: atomic. pub stack_cursor: std::sync::atomic::AtomicU32, /// GPU command buffer address (set by VdGetSystemCommandBuffer) pub gpu_command_buffer: u32, /// GPU backend. M1.4: was `xenia_gpu::GpuSystem` directly, now a /// [`xenia_gpu::GpuBackend`] enum so the kernel can hold either an /// inline `GpuSystem` (synchronous, default) or a `GpuHandle` proxy /// pointing at a worker thread (`--gpu-thread`). Forwarding methods /// on the enum keep call sites in [`crate::exports`] terse. pub gpu: xenia_gpu::GpuBackend, /// Monotonic packet number returned by `XamInputGetState`. Games detect /// input changes by watching this increment. pub input_packet_number: u32, /// Previous gamepad snapshot; `input_packet_number` only advances when /// the state bytes actually change, matching host XInput semantics. pub last_input_bytes: u128, /// Image base of the loaded XEX (for XexExecutableModuleHandle etc.) pub image_base: u32, /// Next thread ID. M2.4: atomic. pub next_thread_id: std::sync::atomic::AtomicU32, /// Virtual file system for NtCreateFile/NtReadFile/etc. The app mounts /// the disc image or host directory into this slot; file I/O handlers /// route all reads through it. pub vfs: Option>, /// Bridge to the host UI. `None` when running headless. Installed by /// `cmd_exec` when the user passes `--ui`. pub ui: Option, /// P6 — graphics interrupt + synthetic v-sync bookkeeping. Registers /// the callback set by `VdSetGraphicsInterruptCallback` and tracks /// the paused-context snapshot while HW thread 0 is running it. pub interrupts: crate::interrupts::InterruptState, /// Per-handle refcount. Since `NtDuplicateObject` aliases (returns the /// source handle value as the "new" handle rather than minting a fresh /// id), a single handle commonly has multiple logical references. This /// map tracks that count so a stray `NtClose` on one reference doesn't /// destroy the object while another reference is still live. Canary's /// `ObjectTable::ReleaseHandle` (object_table.cc:189) is the parity /// reference. Initialized to 1 in `alloc_handle_for`; incremented in /// `nt_duplicate_object` when `DUPLICATE_CLOSE_SOURCE` is absent; /// decremented in `nt_close` which drops the underlying object only /// when the count reaches zero. pub handle_refcount: HashMap, /// Pending timer expirations — `(deadline, handle)` sorted ascending by /// deadline. Pushed by `arm_timer`, popped by `fire_due_timers`. Kept in /// lockstep with the per-`Timer` object's `deadline` field via the /// `arm_timer`/`disarm_timer` helpers. See the plan's step 3/6 for the /// design rationale — timer deadlines coexist with /// `Scheduler::timed_waits` but track a different class (signaled object /// fires, not thread wake-ups). pub pending_timer_fires: Vec<(u64, u32)>, /// Per-handle signal/wait/wake audit trail. Default `enabled=false` → /// every record method is a no-op. Flip via `--trace-handles`/ /// `XENIA_TRACE_HANDLES` to diagnose missing-signal deadlocks (handles /// 0x10FC / 0x1014 / 0x1104 / 0x10DC / 0x10F0 specifically). See /// [`crate::audit`] for layout. pub audit: HandleAudit, /// M2.2 — banked reservation table for `lwarx`/`stwcx.` under M3's /// per-HW-thread parallelism. Always allocated. Consulted by the /// interpreter when `reservations.is_enabled()` is true; otherwise /// the legacy per-`PpcContext` fields drive observable behavior. /// Settable via `--reservations-table` / `XENIA_RESERVATIONS_TABLE=1` /// for golden verification, or implicitly under `--parallel`. /// See [`xenia_cpu::ReservationTable`] for the concurrency model. pub reservations: std::sync::Arc, /// Map from `(module, ordinal)` to the guest-side import-thunk address /// resolved at load time. Reverse of `xenia-app/src/main.rs`'s /// `thunk_map`. Populated from xenia-app's Phase 1 (record_type==1 /// only). Used by `xex_get_procedure_address` to resolve ordinals back /// to callable thunks. thunks_by_ordinal: HashMap<(ModuleId, u16), u32>, /// First-Pixels diagnostic latch. Set the first time /// `RtlRaiseException` fires with code `0xE06D7363` (MSVC C++ throw) /// so the deep stack-walk + `runtime_error` decode in /// `rtl_raise_exception` only emits once per run, regardless of how /// many subsequent throws fire. Reset on each fresh process start. pub cxx_throw_logged: bool, } impl KernelState { /// Construct a kernel with the supplied GPU backend. /// /// The caller (typically `cmd_exec_inner`) decides whether to install /// an inline backend (default) or a threaded one (`--gpu-thread`). /// Most existing call sites build via [`Self::new`], which defaults to /// an inline backend; the threaded constructor lives at /// [`Self::with_gpu`]. pub fn with_gpu(gpu: xenia_gpu::GpuBackend) -> Self { // Scheduler starts empty; the app installs the initial thread on // slot 0 via `install_initial_thread` right after construction. let mut scheduler = Scheduler::new(); use std::sync::atomic::AtomicU32; let reservations = std::sync::Arc::new(xenia_cpu::ReservationTable::new()); // M3.7 — wire the reservation table to the scheduler so // `spawn`/`install_initial_thread` populate every PpcContext's // `reservation_table` clone. The table is `disabled` by // default; `--reservations-table` / `XENIA_RESERVATIONS_TABLE` // / M3 spawn flip it on. scheduler.set_reservation_table(Some(reservations.clone())); let mut state = Self { exports: HashMap::new(), next_handle: AtomicU32::new(0x1000), scheduler, next_tls_index: AtomicU32::new(0), cs_waiters: HashMap::new(), objects: HashMap::new(), heap_cursor: AtomicU32::new(0x4000_0000), // Start of user heap region stack_cursor: AtomicU32::new(0x7100_0000), // Above main stack gpu_command_buffer: 0, gpu, input_packet_number: 0, last_input_bytes: 0, image_base: 0, next_thread_id: AtomicU32::new(1), vfs: None, ui: None, interrupts: crate::interrupts::InterruptState::default(), handle_refcount: HashMap::new(), pending_timer_fires: Vec::new(), audit: HandleAudit::default(), reservations, thunks_by_ordinal: HashMap::new(), cxx_throw_logged: false, }; crate::exports::register_exports(&mut state); crate::xam::register_exports(&mut state); state } /// Default constructor — installs an inline `GpuSystem`. Kept for /// callers that don't (yet) thread a `GpuBackend` choice through. pub fn new() -> Self { Self::with_gpu(xenia_gpu::GpuBackend::Inline(xenia_gpu::GpuSystem::new())) } pub fn register_export( &mut self, module: ModuleId, ordinal: u32, name: &'static str, func: KernelExportFn, ) { self.exports.insert((module, ordinal), (name, func)); } /// Record an import-thunk address resolved at load time. Called once /// per `record_type==1` import in xenia-app's Phase 1. Idempotent: a /// duplicate ordinal overwrites (later wins; in practice the loader /// emits each ordinal once per module). pub fn register_thunk(&mut self, module: ModuleId, ordinal: u16, address: u32) { self.thunks_by_ordinal.insert((module, ordinal), address); } /// Resolve a `(module, ordinal)` to its registered thunk address. pub fn resolve_thunk(&self, module: ModuleId, ordinal: u16) -> Option { self.thunks_by_ordinal.get(&(module, ordinal)).copied() } /// Map a pseudo-`HMODULE` (as returned by `XexGetModuleHandle`) back /// to its `ModuleId`. Returns `None` for unknown handles, including /// the loaded XEX's `image_base` (which is *not* a kernel module). pub fn module_id_from_hmodule(&self, handle: u32) -> Option { match handle { HMODULE_XBOXKRNL => Some(ModuleId::Xboxkrnl), HMODULE_XAM => Some(ModuleId::Xam), _ => None, } } /// Dispatch a kernel export on the current HW thread. Uses `mem::replace` /// to temporarily move the active `PpcContext` out of its scheduler slot, /// so the export function can receive `&mut ctx` while also getting /// `&mut self` (which contains the scheduler). Without this, the export /// signature would have to avoid aliasing via a bundle struct — see the /// approved plan's ExportCtx section for the alternative we rejected. /// /// While the export runs, `scheduler.hw_threads[current_hw_id].ctx` holds /// a freshly-constructed placeholder. Exports that reach through /// `state.scheduler` must not touch the current slot's `ctx` field. /// /// **Perf note (First-Pixels M1):** this function fires ~250K/s on /// Sylpheed (1 import per 40 guest instructions). A former /// `#[tracing::instrument]` attribute + two `tracing::info!` call /// sites made up ~28% of `run_execution` wall time on a post-Tier-3 /// profile — most of it in `tracing::span::Span::new` + /// `Layered::new_span` + `ErrorLayer::on_new_span`. The span was at /// `level = "debug"` but the span **construction** happened /// unconditionally; only the emit was level-gated. Removing the /// attribute + the two `info!` lines recovers the overhead without /// losing any observability — the `metrics::counter!("kernel.calls", /// "name" => name)` below still tracks per-export counts, and /// unimplemented lookups still emit a `warn!`. pub fn call_export( &mut self, module: ModuleId, ordinal: u32, mem: &GuestMemory, ) -> bool { // The thread whose ctx we're swapping out must be addressed by // `ThreadRef`, not `hw_id` — under per-slot runqueues a bare // `hw_id` alone can't distinguish multiple threads on the same // slot, and Axis 4 migration can change the slot underneath us. let r = self .scheduler .current .expect("call_export: no current thread"); let mut ctx = std::mem::replace( self.scheduler.ctx_mut_ref(r), PpcContext::new(), ); let result = if let Some(&(name, func)) = self.exports.get(&(module, ordinal)) { metrics::counter!("kernel.calls", "name" => name).increment(1); tracing::trace!(target: "probe_calls", "hw={} call={} r3={:#x} r4={:#x} r5={:#x} lr={:#x}", r.hw_id, name, ctx.gpr[3], ctx.gpr[4], ctx.gpr[5], ctx.lr); func(&mut ctx, mem, self); true } else { metrics::counter!("kernel.unimplemented").increment(1); tracing::warn!( module = ?module, ordinal = format_args!("{:#x}", ordinal), "unimplemented kernel export" ); ctx.gpr[3] = 0; false }; // Restore the (possibly mutated) ctx by ThreadRef. Axis 4 // self-migration (KeSetAffinityThread(NtCurrentThread, ...)) // updates `scheduler.current` in place; re-read here so we // restore onto the thread's new slot, not its old one. let final_ref = self.scheduler.current.unwrap_or(r); *self.scheduler.ctx_mut_ref(final_ref) = ctx; result } /// Axis 4: `KeSetAffinityThread` orchestration. Drives the scheduler's /// migration and fixes up every `ThreadRef` held outside the /// scheduler (kernel object waiter lists, critical-section waiters, /// `interrupts.injected_ref`). Returns the previous mask. pub fn set_affinity(&mut self, handle: u32, new_mask: u8, mem: &GuestMemory) -> u8 { let Some(r) = self.scheduler.find_by_handle(handle) else { return 0; }; let (old_mask, _new_ref, fixup) = self.scheduler.set_affinity_ref( r, new_mask, &mut GuestMemoryPcr(mem), ); if let Some(fx) = fixup { use crate::objects::KernelObject; for obj in self.objects.values_mut() { match obj { KernelObject::Event { waiters, .. } | KernelObject::Semaphore { waiters, .. } | KernelObject::Thread { waiters, .. } | KernelObject::Mutex { waiters, .. } => { for w in waiters.iter_mut() { fx.apply(w); } } _ => {} } } for list in self.cs_waiters.values_mut() { for w in list.iter_mut() { fx.apply(w); } } if let Some(ref mut ir) = self.interrupts.injected_ref { fx.apply(ir); } } old_mask } /// Install the initial (main) guest thread on HW slot 0. Called once at /// startup after the app allocates the main stack/PCR/TLS blocks. pub fn install_initial_thread( &mut self, ctx: PpcContext, stack_base: u32, stack_size: u32, pcr_base: u32, tls_base: u32, thread_handle: u32, mem: &GuestMemory, ) { self.scheduler.install_initial_thread( ctx, stack_base, stack_size, pcr_base, tls_base, thread_handle, &mut GuestMemoryPcr(mem), ); } pub fn export_name(&self, module: ModuleId, ordinal: u32) -> Option<&'static str> { self.exports.get(&(module, ordinal)).map(|&(name, _)| name) } pub fn alloc_handle(&mut self) -> u32 { // M2.4: lock-free fetch_add. Relaxed is sufficient — IDs are // opaque tokens; no payload is sequenced against the counter. self.next_handle .fetch_add(4, std::sync::atomic::Ordering::Relaxed) } pub fn alloc_handle_for(&mut self, obj: KernelObject) -> u32 { let h = self.alloc_handle(); self.objects.insert(h, obj); // Each fresh handle starts with one logical reference (the creator). // `NtDuplicateObject` bumps this; `NtClose` decrements; the object is // only dropped when the count reaches zero. See `nt_close` for the // aliased-handle rationale. self.handle_refcount.insert(h, 1); h } // ===== Handle audit hooks ===== // // These are no-ops when `audit.enabled == false`, so call sites can // unconditionally invoke them without a hot-path branch in release builds // (the `inline` `if !enabled return` short-circuits before any work). /// Build a [`HandleAuditEntry`] describing the *current* call-site — /// captures cycle (slot-0 timebase), current `tid`, and `lr` from the /// passed `PpcContext`. fn audit_entry(&self, lr: u32, source: &'static str, aux: u64) -> HandleAuditEntry { let hw_id = self.scheduler.current_hw_id().unwrap_or(0); let cycle = self.scheduler.ctx(hw_id).timebase; let tid = self.scheduler.tid(hw_id).unwrap_or(0); HandleAuditEntry { cycle, tid, lr, source, aux } } /// Record the creation of a fresh handle. `kind` is one of the stable /// labels documented on [`crate::audit::HandleAuditTrail::kind`]. pub fn audit_create(&mut self, handle: u32, kind: &'static str, lr: u32, source: &'static str) { if !self.audit.enabled { return; } let entry = self.audit_entry(lr, source, 0); self.audit.record_create(handle, kind, entry); } /// Record a Set/Pulse/Release/etc. call against a handle. `aux` is the /// previous signal state (or per-export-specific data). pub fn audit_signal(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) { if !self.audit.enabled { return; } let entry = self.audit_entry(lr, source, aux); self.audit.record_signal(handle, entry); } /// Record a `Wait*` call against a handle. `aux` packs `(alertable as u64) /// | (timeout_kind << 8)` etc. — schema is informal; the dump just prints /// it. pub fn audit_wait(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) { if !self.audit.enabled { return; } let entry = self.audit_entry(lr, source, aux); self.audit.record_wait(handle, entry); } /// Record a wake event (called from `wake_eligible_waiters`). `aux` /// is the status code stamped into the woken thread's `gpr[3]`. pub fn audit_wake(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) { if !self.audit.enabled { return; } let entry = self.audit_entry(lr, source, aux); self.audit.record_wake(handle, entry); } /// Read a TLS slot for the currently running HW thread. pub fn tls_get(&self, index: u32) -> u64 { self.scheduler.tls_get(index) } /// Write a TLS slot for the currently running HW thread. pub fn tls_set(&mut self, index: u32, value: u64) { self.scheduler.tls_set(index, value); } /// Allocate a new global TLS slot index. Grows every HW thread's /// `tls_values` array to match. pub fn tls_alloc(&mut self) -> u32 { use std::sync::atomic::Ordering; // M2.4: atomic bump. The Scheduler::tls_grow_to call still needs // a coherent post-bump value, so we read the new size from the // fetch_add return. let idx = self.next_tls_index.fetch_add(1, Ordering::Relaxed); let new_size = idx + 1; self.scheduler.tls_grow_to(new_size as usize); idx } /// Allocate guest memory from the heap bump allocator. /// Returns the base address of the allocated region. pub fn heap_alloc(&mut self, size: u32, mem: &GuestMemory) -> Option { use std::sync::atomic::Ordering; let aligned_size = (size + 0xFFF) & !0xFFF; // Page-align // M2.4: atomic bump, then verify post-bump invariants. If the // bump pushed us past the heap-region ceiling, the cursor stays // advanced — subsequent allocations also fail, matching the // pre-M2 sequential semantics. We don't try to "undo" the bump // because that opens a CAS-loop race for marginal benefit (a // failing alloc near the limit is already game-over). let base = self.heap_cursor.fetch_add(aligned_size, Ordering::Relaxed); let new_top = base.checked_add(aligned_size)?; if new_top > 0x6FFF_FFFF { return None; } let protect = xenia_memory::page_table::MemoryProtect::READ | xenia_memory::page_table::MemoryProtect::WRITE; mem.alloc(base, aligned_size, protect).ok()?; Some(base) } /// Allocate a kernel stack. pub fn stack_alloc(&mut self, size: u32, mem: &GuestMemory) -> Option { use std::sync::atomic::Ordering; let aligned_size = (size + 0xFFF) & !0xFFF; let base = self.stack_cursor.fetch_add(aligned_size, Ordering::Relaxed); let protect = xenia_memory::page_table::MemoryProtect::READ | xenia_memory::page_table::MemoryProtect::WRITE; mem.alloc(base, aligned_size, protect).ok()?; Some(base + aligned_size) // Return top of stack } // ===== Timer subsystem ===== /// Idempotent arm — removes any prior entry for `handle`, then inserts /// the new `(deadline, handle)` pair and re-sorts by deadline ascending. /// The per-`Timer` object's `deadline` field must be set separately by /// the caller (see `NtSetTimerEx` in exports.rs) — this helper only /// manages the central pending-fires list so `fire_due_timers` has a /// sorted head to peek. pub fn arm_timer(&mut self, handle: u32, deadline: u64) { self.pending_timer_fires.retain(|&(_, h)| h != handle); self.pending_timer_fires.push((deadline, handle)); self.pending_timer_fires.sort_by_key(|&(d, _)| d); } /// Idempotent disarm — strip any entry for `handle`. Safe to call /// regardless of prior state; `NtClose`, `NtCancelTimer`, and the /// periodic-rearm guard all invoke this. pub fn disarm_timer(&mut self, handle: u32) { self.pending_timer_fires.retain(|&(_, h)| h != handle); } /// Peek the earliest pending timer deadline. Paired with /// `Scheduler::earliest_wait_deadline` by the main loop's "advance to /// next event" coordination — the earlier of the two drives /// `advance_all_timebases_to`. pub fn earliest_timer_deadline(&self) -> Option { self.pending_timer_fires.first().map(|&(d, _)| d) } /// Fire every timer whose deadline is `<= now` (derived from slot 0's /// timebase, matching `parse_timeout`'s "current thread" fallback). /// For each fire: mark the timer `signaled=true`, clear its /// `deadline`, rearm if periodic, then wake eligible waiters via /// `exports::wake_eligible_waiters`. Returns `true` iff any timer /// fired — the caller uses this to decide whether the scheduler round /// needs a follow-up `advance_to_next_wake_if_due` step. pub fn fire_due_timers(&mut self) -> bool { let now = self.scheduler.ctx(0).timebase; let mut fired = false; loop { let Some(&(deadline, handle)) = self.pending_timer_fires.first() else { break; }; if deadline > now { break; } self.pending_timer_fires.remove(0); // Mark signaled + capture period before any rearm so we don't // double-borrow the object while calling wake_eligible_waiters. let periodic_next = if let Some(KernelObject::Timer { signaled, deadline: obj_deadline, period_ticks, .. }) = self.objects.get_mut(&handle) { *signaled = true; *obj_deadline = None; if *period_ticks > 0 { Some(now + *period_ticks) } else { None } } else { // Closed handle — its entry lingered because disarm on // NtClose was missed, OR fire_due_timers picked up a // race. Skip silently; nothing to wake. None }; if let Some(next) = periodic_next { if let Some(KernelObject::Timer { deadline, .. }) = self.objects.get_mut(&handle) { *deadline = Some(next); } self.arm_timer(handle, next); } crate::exports::wake_eligible_waiters(self, handle); fired = true; } fired } /// Handle deadline-expiry cleanup for a thread whose wait timed out. /// Called by the main loop right after `Scheduler::advance_to_next_wake` /// returns a `Some((ref, reason))`. Stamps `STATUS_TIMEOUT` into the /// woken thread's `gpr[3]` and scrubs its `ThreadRef` out of any /// handle's waiter list so a later signal can't consume the /// auto-reset slot into a stale waiter. /// /// `BlockReason::DelayUntil` is a pure sleep and expects /// `STATUS_SUCCESS` — the default pre-populated value in /// `ke_delay_execution_thread` — so we leave `gpr[3]` alone for it. pub fn handle_timeout_wake( &mut self, r: ThreadRef, reason: xenia_cpu::scheduler::BlockReason, ) { use xenia_cpu::scheduler::BlockReason; const STATUS_TIMEOUT: u64 = 0x0000_0102; match reason { BlockReason::WaitAny { handles, .. } | BlockReason::WaitAll { handles, .. } => { self.scheduler.ctx_mut_ref(r).gpr[3] = STATUS_TIMEOUT; for h in handles { if let Some(obj) = self.objects.get_mut(&h) { if let Some(waiters) = obj.waiters_mut() { waiters.retain(|&w| w != r); } } } } BlockReason::DelayUntil(_) => { // Pure sleep → default STATUS_SUCCESS is correct; no handles // to scrub. } BlockReason::CriticalSection(cs_ptr) => { self.scheduler.ctx_mut_ref(r).gpr[3] = STATUS_TIMEOUT; if let Some(list) = self.cs_waiters.get_mut(&cs_ptr) { list.retain(|&w| w != r); } } BlockReason::Suspended => {} } } } impl Default for KernelState { fn default() -> Self { Self::new() } } #[cfg(test)] mod tests { use super::*; use xenia_memory::GuestMemory; /// Ten consecutive `heap_alloc(0x14)` calls must return distinct /// page-aligned addresses. A previous bug had kernel exports passing 0 as /// `size`, causing the bump allocator to return the same address every /// time — 10 "allocations" that all aliased 0x40105000 and silently /// corrupted the guest's static-constructor state. #[test] fn heap_alloc_advances_for_nonzero_size() { let mut mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); let mut seen = Vec::new(); for _ in 0..10 { let addr = state .heap_alloc(0x14, &mut mem) .expect("heap must have room for 0x14 bytes"); assert_eq!(addr & 0xFFF, 0, "heap returns page-aligned addresses"); assert!(!seen.contains(&addr), "heap returned duplicate address {addr:#x}"); seen.push(addr); } } /// `heap_alloc(0)` must not advance the cursor (it has nothing to do). /// The kernel exports that previously hit this path did so because they /// read the wrong argument register; guarded at the export boundary now. #[test] fn heap_alloc_zero_is_noop_in_cursor() { use std::sync::atomic::Ordering; let mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); let before = state.heap_cursor.load(Ordering::Relaxed); let _ = state.heap_alloc(0, &mem); let after = state.heap_cursor.load(Ordering::Relaxed); assert_eq!(before, after, "zero-size alloc must not advance heap cursor"); } /// M2.4: concurrent handle allocations must produce distinct values. /// Ten threads each allocate 100 handles via `alloc_handle`; the union /// must contain exactly 1000 distinct values, and the maximum equals /// `0x1000 + 4 * (1000 - 1)` (ascending step is 4 per the kernel /// allocator's policy). #[test] fn concurrent_alloc_handle_distinct() { use std::collections::HashSet; use std::sync::Mutex; use std::sync::atomic::{AtomicU32, Ordering}; // Use a free-standing AtomicU32 mirroring `next_handle`'s semantics; // we can't easily share `&mut KernelState` across threads. The // production code uses the same `fetch_add(4, Relaxed)` recipe. let counter = std::sync::Arc::new(AtomicU32::new(0x1000)); let collected: std::sync::Arc>> = std::sync::Arc::new(Mutex::new(HashSet::new())); let mut handles = Vec::new(); for _ in 0..10 { let c = counter.clone(); let s = collected.clone(); handles.push(std::thread::spawn(move || { let mut local = Vec::with_capacity(100); for _ in 0..100 { local.push(c.fetch_add(4, Ordering::Relaxed)); } let mut g = s.lock().unwrap(); for v in local { g.insert(v); } })); } for h in handles { h.join().unwrap(); } let set = collected.lock().unwrap(); assert_eq!( set.len(), 1000, "expected 1000 distinct handles, got {}", set.len() ); assert!(set.iter().all(|h| (h - 0x1000) % 4 == 0)); } }