use std::collections::HashMap; use xenia_cpu::scheduler::{PcrWriter, Scheduler}; use xenia_cpu::{PpcContext, ThreadRef}; use xenia_memory::{GuestMemory, MemoryAccess}; use xenia_vfs::VfsDevice; use crate::audit::{HandleAudit, HandleAuditEntry}; use crate::objects::KernelObject; use crate::ui_bridge::UiBridge; /// Adapter: write PCR+0x2C on guest memory. Lets `Scheduler::spawn` and /// Axis 4's migration call through without `xenia-cpu` depending on the /// memory crate. pub struct GuestMemoryPcr<'a>(pub &'a GuestMemory); impl PcrWriter for GuestMemoryPcr<'_> { fn write_pcr_id(&mut self, pcr_base: u32, hw_id: u8) { // `GuestMemory::write_u32` takes `&self` post-M2 trait flip; the // wrapping `&'a GuestMemory` is sufficient. self.0.write_u32(pcr_base + 0x2C, hw_id as u32); } } /// Function signature for HLE kernel exports. /// /// The first argument is the **currently running** HW thread's `PpcContext`, /// which the caller has temporarily moved out of the scheduler slot to avoid /// aliasing. Exports that only touch register/GPR state use `ctx` directly; /// exports that need scheduler state (spawn/park/wake/tls/etc.) reach /// through `state.scheduler` — note that `state.scheduler.hw_threads[current]` /// holds a placeholder `PpcContext` for the duration of the call, not the /// live one passed as `ctx`. pub type KernelExportFn = fn(&mut PpcContext, &GuestMemory, &mut KernelState); /// Module identifier for kernel exports. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ModuleId { Xboxkrnl, Xam, Xbdm, } /// Pseudo-`HMODULE` values returned by `XexGetModuleHandle` and accepted by /// `XexGetProcedureAddress`. Distinct from real loaded-image bases /// (>=0x82000000) and from kernel handles (0x1000+, allocated by /// `alloc_handle`). The 0xFFFE_xxxx prefix is unused by both guest segments /// and our handle allocator. pub const HMODULE_XBOXKRNL: u32 = 0xFFFE_0001; pub const HMODULE_XAM: u32 = 0xFFFE_0002; /// Central kernel state tracking all guest OS state. pub struct KernelState { exports: HashMap<(ModuleId, u32), (&'static str, KernelExportFn)>, /// M2.4: bump allocator for kernel handles. `AtomicU32` so concurrent /// HLE calls under M3 can `fetch_add` without a lock. `Relaxed` is /// fine — the allocated value is a fresh ID with no prior payload to /// publish; observers (the kernel object table) are guarded by /// their own synchronization. next_handle: std::sync::atomic::AtomicU32, /// AUDIT-059 R34: FIFO free list of closed handle slots, mirroring /// canary's slab/free-list `ObjectTable`. Without this, ours' bump /// allocator monotonically grows so a recycled slot in canary /// (e.g. `F8000098` reused 130× per 30s) corresponds to a fresh, /// never-reused slot in ours — the kernel-object identity drifts. /// Recycling closes that gap and (per AUDIT-042 / R30) may /// side-effect-unwedge γ-cluster #2 by letting silph signals land /// on the same handle slot the wait registered for. Population is /// gated on `KernelState::release_handle_slot` (only IDs in /// `[HANDLE_BASE, 0xF000_0000)` are recycled — synthetic XAudio /// handles at `0xF000_0000+` are reserved and must never be reused). free_handles: std::collections::VecDeque, /// Scheduler managing all emulated HW threads + their per-slot /// runqueues. Starts empty — the app installs the initial guest thread /// on slot 0 via `KernelState::install_initial_thread` once it has the /// entry address. pub scheduler: Scheduler, /// TLS slot allocator — index counter only. Per-thread *values* live on /// `GuestThread::tls_values` (see scheduler). M2.4: `AtomicU32`. pub next_tls_index: std::sync::atomic::AtomicU32, /// Critical-section waiter map: guest `cs_ptr` → guest threads parked /// on it. Critical sections are in guest memory (not kernel objects), /// so their waiter list lives here rather than on an object. pub cs_waiters: HashMap>, /// Kernel object table: handle → object pub objects: HashMap, /// Bump allocator for guest heap (NtAllocateVirtualMemory etc.). /// M2.4: `AtomicU32` for lock-free concurrent allocation. pub heap_cursor: std::sync::atomic::AtomicU32, /// Stack allocator cursor for MmCreateKernelStack. M2.4: atomic. pub stack_cursor: std::sync::atomic::AtomicU32, /// GPU command buffer address (set by VdGetSystemCommandBuffer) pub gpu_command_buffer: u32, /// GPU backend. M1.4: was `xenia_gpu::GpuSystem` directly, now a /// [`xenia_gpu::GpuBackend`] enum so the kernel can hold either an /// inline `GpuSystem` (synchronous, default) or a `GpuHandle` proxy /// pointing at a worker thread (`--gpu-thread`). Forwarding methods /// on the enum keep call sites in [`crate::exports`] terse. pub gpu: xenia_gpu::GpuBackend, /// Monotonic packet number returned by `XamInputGetState`. Games detect /// input changes by watching this increment. pub input_packet_number: u32, /// Previous gamepad snapshot; `input_packet_number` only advances when /// the state bytes actually change, matching host XInput semantics. pub last_input_bytes: u128, /// Image base of the loaded XEX (for XexExecutableModuleHandle etc.) pub image_base: u32, /// `XEX_HEADER_SYSTEM_FLAGS` (key `0x00030000`) parsed from the loaded /// XEX header. Queried by `XexCheckExecutablePrivilege`: privilege bit /// `n` is set iff `(xex_system_flags & (1 << n)) != 0`. Zero before the /// app installs the loaded image — that matches canary's behavior when /// no executable module is registered (returns 0). pub xex_system_flags: u32, /// One-shot log gate for `XexCheckExecutablePrivilege`: tracks which /// privilege numbers have already produced a `tracing::info!` line so /// the import-hot path doesn't spam at -n 500M. pub xex_priv_logged: std::collections::HashSet, /// Whether the first listener whose mask covers `kXNotifySystem` has /// been registered — gate for the two startup system notifications /// per `kernel_state.cc:1020-1025`. pub has_notified_startup: bool, /// Same for `kXNotifyLive` per `kernel_state.cc:1026-1032`. pub has_notified_live_startup: bool, /// Next thread ID. M2.4: atomic. pub next_thread_id: std::sync::atomic::AtomicU32, /// Virtual file system for NtCreateFile/NtReadFile/etc. The app mounts /// the disc image or host directory into this slot; file I/O handlers /// route all reads through it. pub vfs: Option>, /// AUDIT-038 — host directory backing the persistent `cache:` mount /// (mirrors canary's `cache:` → `\CACHE` symlink in xenia_main.cc:649, /// implemented atop `HostPathDevice`). When `Some`, opens of `cache:\*` /// paths go through real `std::fs` I/O against this directory; when /// `None`, they fall back to the legacy "Synthesized empty file" stub /// (which doesn't persist writes — see audit-037 for the record-layout /// divergence that motivated this fix). Set up by [`init_cache_root`] /// at startup; cleared at the same time so lockstep digests stay /// reproducible across reruns. pub cache_root: Option, /// Bridge to the host UI. `None` when running headless. Installed by /// `cmd_exec` when the user passes `--ui`. pub ui: Option, /// P6 — graphics interrupt + synthetic v-sync bookkeeping. Registers /// the callback set by `VdSetGraphicsInterruptCallback` and tracks /// the paused-context snapshot while HW thread 0 is running it. pub interrupts: crate::interrupts::InterruptState, /// XAudio render-driver clients + buffer-complete callback ticker. /// Mirrors canary's [`xenia/apu/audio_system.cc`] worker — registered /// guest callbacks can fire at the audio frame rate so guest threads /// parked on audio-buffer events get woken (APUBUG-PRODUCER-001). /// Shares the [`crate::interrupts::InterruptState::saved`] / /// `injected_ref` slot at injection time; mutual exclusion with /// graphics interrupts is enforced by the injector's /// `is_in_callback()` guard. pub xaudio: crate::xaudio::XAudioState, /// AUDIT-032 Plan B (default true). When true, the round prologue /// runs the XAudio ticker + `try_inject_audio_callback`. Pre-fix this /// was off by default because injection used random-victim selection /// (APUBUG-PRODUCER-001 HW-thread hijack) which corrupted unrelated /// state. With dedicated per-client audio workers spawned at /// `XAudioRegisterRenderDriverClient`, injection only ever runs on /// the registered worker so it is safe to leave on. Lockstep goldens /// `sylpheed_n*m.json` will drift on this fix and need re-baselining /// (handled out-of-band). The `--xaudio-tick` flag / `XENIA_XAUDIO_TICK=1` /// env var now act as explicit-override; flipping it off restores the /// pre-fix path (no audio callbacks fire at all). pub xaudio_tick_enabled: bool, /// Per-handle refcount. Since `NtDuplicateObject` aliases (returns the /// source handle value as the "new" handle rather than minting a fresh /// id), a single handle commonly has multiple logical references. This /// map tracks that count so a stray `NtClose` on one reference doesn't /// destroy the object while another reference is still live. Canary's /// `ObjectTable::ReleaseHandle` (object_table.cc:189) is the parity /// reference. Initialized to 1 in `alloc_handle_for`; incremented in /// `nt_duplicate_object` when `DUPLICATE_CLOSE_SOURCE` is absent; /// decremented in `nt_close` which drops the underlying object only /// when the count reaches zero. pub handle_refcount: HashMap, /// Pending timer expirations — `(deadline, handle)` sorted ascending by /// deadline. Pushed by `arm_timer`, popped by `fire_due_timers`. Kept in /// lockstep with the per-`Timer` object's `deadline` field via the /// `arm_timer`/`disarm_timer` helpers. See the plan's step 3/6 for the /// design rationale — timer deadlines coexist with /// `Scheduler::timed_waits` but track a different class (signaled object /// fires, not thread wake-ups). pub pending_timer_fires: Vec<(u64, u32)>, /// Per-handle signal/wait/wake audit trail. Default `enabled=false` → /// every record method is a no-op. Flip via `--trace-handles`/ /// `XENIA_TRACE_HANDLES` to diagnose missing-signal deadlocks (handles /// 0x10FC / 0x1014 / 0x1104 / 0x10DC / 0x10F0 specifically). See /// [`crate::audit`] for layout. pub audit: HandleAudit, /// M2.2 — banked reservation table for `lwarx`/`stwcx.` under M3's /// per-HW-thread parallelism. Always allocated. Consulted by the /// interpreter when `reservations.is_enabled()` is true; otherwise /// the legacy per-`PpcContext` fields drive observable behavior. /// Settable via `--reservations-table` / `XENIA_RESERVATIONS_TABLE=1` /// for golden verification, or implicitly under `--parallel`. /// See [`xenia_cpu::ReservationTable`] for the concurrency model. pub reservations: std::sync::Arc, /// True when the runtime was started with `--parallel`. Read by the /// v-sync ticker (KRNBUG-D08): lockstep uses the deterministic /// instruction-count proxy so the `sylpheed_n*m.json` goldens stay /// bit-stable; `--parallel` uses wall-clock so the rate doesn't /// drop to ~2 v-syncs / 100M as the instruction-count proxy did. /// Set once at startup and never mutated. pub parallel_active: bool, /// Map from `(module, ordinal)` to the guest-side import-thunk address /// resolved at load time. Reverse of `xenia-app/src/main.rs`'s /// `thunk_map`. Populated from xenia-app's Phase 1 (record_type==1 /// only). Used by `xex_get_procedure_address` to resolve ordinals back /// to callable thunks. thunks_by_ordinal: HashMap<(ModuleId, u16), u32>, /// First-Pixels diagnostic latch. Set the first time /// `RtlRaiseException` fires with code `0xE06D7363` (MSVC C++ throw) /// so the deep stack-walk + `runtime_error` decode in /// `rtl_raise_exception` only emits once per run, regardless of how /// many subsequent throws fire. Reset on each fresh process start. pub cxx_throw_logged: bool, /// Cached primary ring base/size, set during `VdInitializeRingBuffer`. /// Used by `vd_swap` (KRNBUG-Vd-04) so the kernel can write PM4 /// packets directly into ring memory without going through the GPU /// backend (which lives on the worker thread under `--gpu-thread`). pub ring_base: u32, pub ring_size_dwords: u32, /// Diagnostic. PCs at which the worker prologue fires a one-shot /// stack/back-chain dump capturing live `r3` (= `this` in MSVC /// PPC ctors), `lr` (= return site), and the cycle/tid that hit /// the PC. Populated from `--ctor-probe=0x8217C850,0x...` / /// `XENIA_CTOR_PROBE`. Empty by default → check is a single /// `is_empty()` test, no extra cost on the unprobed hot path. /// Read-only diagnostic — no guest state is mutated, so the /// `sylpheed_n*m.json` lockstep digest is preserved. /// /// **Why a per-PC probe instead of per-handle?** The MSVC ctors /// at `sub_8217C850` (and friends) don't preserve `this` in r31 /// across the inner `bl` to `silph::Event::Construct`, so the /// AUDIT-002 multi-frame back-chain at `NtCreateEvent` only /// recovers stack-relative pointers — never the pool-element /// `this`. Hooking the ctor's PRE-prologue PC captures r3 = this /// before any save/restore can clobber it. pub ctor_probe_pcs: std::collections::HashSet, /// Diagnostic. Optional per-PC dispatcher snapshot. Maps a probe PC /// to a `(dispatcher_addr, offset)` pair; when the PC fires, the /// helper additionally logs the value of `[dispatcher_addr + /// offset]` — i.e. exactly what the producer's `lwz r3, OFF(r3)` /// is about to read after the `bl outer_getter` returns the /// dispatcher pointer in r3. Populated from the `PC@DISP:OFF` /// extended syntax of `--pc-probe` / `--ctor-probe`. Read-only /// load — does not mutate guest state. pub pc_probe_consumers: HashMap, /// Diagnostic. Comma-separated set of guest PCs that, when reached, /// emit a single compact one-line `BRANCH-PROBE` record. The line /// includes (pc, tid, hw, cycle, r3, lr, cr0.{lt,gt,eq}, cr6.{lt,gt,eq}) /// — designed for tracing every conditional-branch fire inside a /// candidate-gate function (sub_824A9710 etc.) so the LAST PC /// reached before function epilogue identifies the exit branch. /// Distinct from `ctor_probe_pcs` because that helper emits 8 /// frames of back-chain per hit — too noisy for branch tracing. pub branch_probe_pcs: std::collections::HashSet, /// AUDIT-2BF — diagnostic. PCs at which to emit a structured one-line /// `AUDIT-PC-PROBE` record on every fire, designed for the silph init /// chain virtual-dispatch site at `sub_82172BA0+0x1E8` (PC /// `0x82172D88`, a `bctrl` after a 3-deep load of vtable slot 6). The /// emitted line carries (pc, tid, hw, cycle, lr, r3, r11) plus four /// guest-memory dereferences off `r3`: `[r3+0]` (vtable), `[[r3+0]+24]` /// (slot 6 method pointer = the bctrl target), `[r3+0x0C]` (audit-059 /// round-9 canary-known auxiliary handle `0xF80000D8`), and `[r3+0x30]` /// (canary-known embedded sub-object vtable `0x820A1870`). Distinct /// from `branch_probe_pcs` because that helper only logs registers (no /// memory) and from `lr_trace_pcs` because that emits JSON intended /// for canary diffing, not the four hard-coded indirect dereferences /// needed here. Read-only — no guest state mutation. Lockstep /// digest unaffected. Settable via `--audit-pc-probe-hex` / /// `XENIA_AUDIT_PC_PROBE`. pub audit_pc_probe_pcs: std::collections::HashSet, /// AUDIT-2BF round 14 — diagnostic. Optional guest VA. When set, each /// `AUDIT-PC-PROBE` fire emits a paired `AUDIT-MEM-READ` line with /// `addr`, `*addr` (singleton value), `**addr` (vtable), `***addr+0` /// (vtable[0] = first virtual method), and `***addr+24` (vtable[6] /// in 4-byte stride = slot 6 = silph chain bctrl target). Three-deep /// dereference to resolve the vtable[0] target at the bctrl site /// `0x822F1B4C` inside `sub_822F1AA8`. Read-only; lockstep digest /// unaffected. Settable via `--audit-mem-read-hex` / /// `XENIA_AUDIT_MEM_READ`. pub audit_mem_read_addr: Option, /// AUDIT-052 — diagnostic. When set, each `AUDIT-PC-PROBE` fire /// additionally emits an `AUDIT-R3-DUMP` line with N bytes of guest /// memory dumped from `r3` as `u32` lanes (4-byte aligned only). /// Sized for audit-051's 80-byte stack-local struct at `r31+96` /// inside `sub_82452DC0` (probe `sub_8245B000` entry where /// `r3 == parent's r31+96`). Read-only; lockstep digest unaffected. /// Settable via `--audit-r3-dump-bytes` / /// `XENIA_AUDIT_R3_DUMP_BYTES`. pub audit_r3_dump_bytes: Option, /// M12 — diagnostic. PCs at which to emit a structured JSONL record /// per fire, designed for diffing against xenia-canary's /// `--log_lr_on_pc` patch output. Each line carries /// `(pc, tid, hw, cycle, r3, r4, r5, r6, lr)` — a superset of what /// canary logs. Settable via `--lr-trace` / `XENIA_LR_TRACE`. Stdout /// by default; redirect with `--lr-trace-out=PATH`. Read-only; /// lockstep digest unaffected. pub lr_trace_pcs: std::collections::HashSet, /// M12 — optional file writer for `--lr-trace` output. `None` means /// stdout. pub lr_trace_writer: Option>, /// Diagnostic. Guest addresses to dump (64 bytes each, hex + u32 /// lanes) at end-of-run. Populated from `--dump-addr=0x828F3D08, /// 0x828F4070`. Used to inspect static dispatcher / job-queue / /// pool struct layouts identified by AUDIT-003. Read-only — the /// dump is performed by `dump_thread_diagnostic`, never during /// the hot interpreter loop, so lockstep determinism is unaffected. pub dump_addrs: Vec, /// `--dump-section=BASE:LEN:PATH` end-of-run snapshot, page-gated by `is_mapped`. pub dump_section: Option<(u32, u32, std::path::PathBuf)>, /// AUDIT-2.BF — synthetic silph::WorkerCtx spawn one-shot latch. Set on /// first call to [`crate::silph_synth::spawn_silph_workers`] (triggered /// by the first observation of a load-bearing VFS path such as /// `dat/movie`), then reused — subsequent triggers are no-ops. pub silph_synth_done: bool, /// AUDIT-2.BF — VA of the synthesised silph::WorkerCtx. Zero before the /// first spawn; set to the ctx base by `spawn_silph_workers`. Held on /// the kernel state so future export hooks can find it (no caller does /// yet — placeholder for round 19+ wiring). pub silph_synth_ctx: u32, /// AUDIT-2.BF — kernel handles for the 4 synthetic worker threads. pub silph_synth_handles: [Option; 4], /// AUDIT-2.BF — `ThreadRef` cache for the 4 synthetic workers. pub silph_synth_refs: [Option; 4], /// ITERATE-2C Phase D — auto-signal delay for silph::UImpl /// `NtCreateEvent` calls (see [`Self::maybe_register_silph_autosignal`]). /// `None` = feature disabled; populated once from /// `XENIA_SILPH_UI_AUTOSIGNAL_DELAY=` at construction. pub silph_autosignal_delay: Option, /// ITERATE-2C Phase D — pending auto-signal queue. Drained each /// outer round by [`Self::fire_due_silph_autosignals`]. pub silph_autosignal_pending: Vec, /// ITERATE-2C Phase D — most recent `stats.instruction_count` /// deposited by the scheduler loop (see /// [`Self::set_now_cycle_hint`]). Used by /// [`Self::maybe_register_silph_autosignal`] to compute absolute /// deadlines, since `nt_create_event` doesn't see `ExecStats`. pub last_cycle_hint: u64, /// ITERATE-2C Phase D — one-shot diagnostic latch. Flipped by /// [`Self::fire_due_silph_autosignals`] on the first visit where /// the pending queue is non-empty but no entry is due yet. pub silph_autosignal_diag_logged: bool, } /// ITERATE-2C Phase D — one queued auto-signal. `deadline_cycle` is /// absolute (cycle hint at register time + configured delay). #[derive(Debug, Clone, Copy)] pub struct AutoSignalPending { pub handle: u32, pub deadline_cycle: u64, } impl KernelState { /// Construct a kernel with the supplied GPU backend. /// /// The caller (typically `cmd_exec_inner`) decides whether to install /// an inline backend (default) or a threaded one (`--gpu-thread`). /// Most existing call sites build via [`Self::new`], which defaults to /// an inline backend; the threaded constructor lives at /// [`Self::with_gpu`]. pub fn with_gpu(gpu: xenia_gpu::GpuBackend) -> Self { // Scheduler starts empty; the app installs the initial thread on // slot 0 via `install_initial_thread` right after construction. let mut scheduler = Scheduler::new(); use std::sync::atomic::AtomicU32; let reservations = std::sync::Arc::new(xenia_cpu::ReservationTable::new()); // M3.7 — wire the reservation table to the scheduler so // `spawn`/`install_initial_thread` populate every PpcContext's // `reservation_table` clone. The table is `disabled` by // default; `--reservations-table` / `XENIA_RESERVATIONS_TABLE` // / M3 spawn flip it on. scheduler.set_reservation_table(Some(reservations.clone())); let mut state = Self { exports: HashMap::new(), next_handle: AtomicU32::new(0x1000), free_handles: std::collections::VecDeque::new(), scheduler, next_tls_index: AtomicU32::new(0), cs_waiters: HashMap::new(), objects: HashMap::new(), heap_cursor: AtomicU32::new(0x4000_0000), // Start of user heap region stack_cursor: AtomicU32::new(0x7100_0000), // Above main stack gpu_command_buffer: 0, gpu, input_packet_number: 0, last_input_bytes: 0, image_base: 0, xex_system_flags: 0, xex_priv_logged: std::collections::HashSet::new(), has_notified_startup: false, has_notified_live_startup: false, next_thread_id: AtomicU32::new(1), vfs: None, cache_root: None, ui: None, interrupts: crate::interrupts::InterruptState::default(), xaudio: crate::xaudio::XAudioState::default(), // AUDIT-032: dedicated audio worker per client (Plan B in // `xaudio_register_render_driver`) — not victim hijack, so safe // to enable by default. Previously gated off because the // random-victim selection corrupted unrelated thread state. xaudio_tick_enabled: true, handle_refcount: HashMap::new(), pending_timer_fires: Vec::new(), audit: HandleAudit::default(), reservations, thunks_by_ordinal: HashMap::new(), cxx_throw_logged: false, ring_base: 0, ring_size_dwords: 0, parallel_active: false, ctor_probe_pcs: std::collections::HashSet::new(), pc_probe_consumers: HashMap::new(), branch_probe_pcs: std::collections::HashSet::new(), audit_pc_probe_pcs: std::collections::HashSet::new(), audit_mem_read_addr: None, audit_r3_dump_bytes: None, lr_trace_pcs: std::collections::HashSet::new(), lr_trace_writer: None, dump_addrs: Vec::new(), dump_section: None, silph_synth_done: false, silph_synth_ctx: 0, silph_synth_handles: [None; 4], silph_synth_refs: [None; 4], silph_autosignal_delay: std::env::var("XENIA_SILPH_UI_AUTOSIGNAL_DELAY") .ok() .and_then(|v| v.parse::().ok()), silph_autosignal_pending: Vec::new(), last_cycle_hint: 0, silph_autosignal_diag_logged: false, }; crate::exports::register_exports(&mut state); crate::xam::register_exports(&mut state); // AUDIT-054 — cache root selection. Defaults to AUDIT-038's // per-process tmpdir + wipe (lockstep determinism + avoids // the journal-append-on-reboot self-inconsistency Sylpheed's // `.tmp` writes produce). Opt-in to persistence via // `XENIA_CACHE_ROOT=` (explicit) or // `XENIA_CACHE_PERSIST=1` (`$XDG_DATA_HOME/xenia-rs/cache`). // Errors are non-fatal (cache I/O degrades to the synth-stub // fallback) but logged. let (root, wipe) = resolve_default_cache_root(); let init_result = if wipe { state.init_cache_root(root.clone()) } else { std::fs::create_dir_all(&root).map(|()| { state.cache_root = Some(root.clone()); }) }; if let Err(e) = init_result { tracing::warn!( "Failed to initialise cache root at {:?}: {} — cache:/* opens \ will fall back to the synth-empty-file stub", root, e ); } state } /// Default constructor — installs an inline `GpuSystem`. Kept for /// callers that don't (yet) thread a `GpuBackend` choice through. pub fn new() -> Self { Self::with_gpu(xenia_gpu::GpuBackend::Inline(xenia_gpu::GpuSystem::new())) } pub fn register_export( &mut self, module: ModuleId, ordinal: u32, name: &'static str, func: KernelExportFn, ) { self.exports.insert((module, ordinal), (name, func)); } /// AUDIT-038 — install a host directory as the backing store for the /// `cache:` mount. The directory is unconditionally cleared (and then /// re-created) on entry so two consecutive runs see byte-identical /// initial state — required for the `sylpheed_n*m.json` lockstep /// goldens. Mirrors canary's `xenia_main.cc:611-651` setup, which /// `RegisterSymbolicLink("cache:", "\\CACHE")` against a per-emulator /// host path. /// /// Returns `Ok(())` on success; bubbles up any I/O error from the /// clear/create dance so the caller can surface it. pub fn init_cache_root(&mut self, root: std::path::PathBuf) -> std::io::Result<()> { // Clear-then-recreate. Determinism beats incremental persistence // here: Sylpheed's cache subsystem treats a missing/empty cache // identically to a stale one (cache-miss → reconstruct), so // wiping is safe and gives reproducible boots. if root.exists() { std::fs::remove_dir_all(&root)?; } std::fs::create_dir_all(&root)?; self.cache_root = Some(root); Ok(()) } /// AUDIT-054 — direct (non-wiping) cache-root install for tests /// that want byte-for-byte control over what's already on disk /// when the kernel boots. Skips the `init_cache_root` clear pass. pub fn set_cache_root(&mut self, root: std::path::PathBuf) { self.cache_root = Some(root); } /// Resolve a guest VFS path (e.g. `cache:\d4ea4615e46ee8ca.tmp`) to /// the host-FS path that backs it. Returns `None` if the path doesn't /// have a `cache:` prefix or if no cache root is mounted (legacy /// synth-stub fallback). /// /// Path-traversal guard: leading `..\` components are stripped so a /// malicious guest can't escape the cache directory. Backslashes are /// normalised to host separators on Linux. pub fn resolve_cache_path(&self, raw: &str) -> Option { let root = self.cache_root.as_ref()?; let lower = raw.to_ascii_lowercase(); // Match any of the writable cache prefixes (case-insensitive). // canary uses separate `\CACHE0`/`\CACHE1` host dirs for cache0:/ // cache1:, but Sylpheed only references `cache:`; collapse all // three to one backing root until a future game splits them. let after_prefix = if let Some(rest) = lower.strip_prefix("cache:\\") { &raw[raw.len() - rest.len()..] } else if let Some(rest) = lower.strip_prefix("cache:/") { &raw[raw.len() - rest.len()..] } else if let Some(rest) = lower.strip_prefix("cache0:\\") .or_else(|| lower.strip_prefix("cache0:/")) .or_else(|| lower.strip_prefix("cache1:\\")) .or_else(|| lower.strip_prefix("cache1:/")) { &raw[raw.len() - rest.len()..] } else { return None; }; let normalised = after_prefix.replace('\\', "/"); // Strip leading slashes + path-traversal segments. let clean: std::path::PathBuf = normalised .split('/') .filter(|s| !s.is_empty() && *s != "." && *s != "..") .collect(); Some(root.join(clean)) } /// Record an import-thunk address resolved at load time. Called once /// per `record_type==1` import in xenia-app's Phase 1. Idempotent: a /// duplicate ordinal overwrites (later wins; in practice the loader /// emits each ordinal once per module). pub fn register_thunk(&mut self, module: ModuleId, ordinal: u16, address: u32) { self.thunks_by_ordinal.insert((module, ordinal), address); } /// Resolve a `(module, ordinal)` to its registered thunk address. pub fn resolve_thunk(&self, module: ModuleId, ordinal: u16) -> Option { self.thunks_by_ordinal.get(&(module, ordinal)).copied() } /// Map a pseudo-`HMODULE` (as returned by `XexGetModuleHandle`) back /// to its `ModuleId`. Returns `None` for unknown handles, including /// the loaded XEX's `image_base` (which is *not* a kernel module). pub fn module_id_from_hmodule(&self, handle: u32) -> Option { match handle { HMODULE_XBOXKRNL => Some(ModuleId::Xboxkrnl), HMODULE_XAM => Some(ModuleId::Xam), _ => None, } } /// Dispatch a kernel export on the current HW thread. Uses `mem::replace` /// to temporarily move the active `PpcContext` out of its scheduler slot, /// so the export function can receive `&mut ctx` while also getting /// `&mut self` (which contains the scheduler). Without this, the export /// signature would have to avoid aliasing via a bundle struct — see the /// approved plan's ExportCtx section for the alternative we rejected. /// /// While the export runs, `scheduler.hw_threads[current_hw_id].ctx` holds /// a freshly-constructed placeholder. Exports that reach through /// `state.scheduler` must not touch the current slot's `ctx` field. /// /// **Perf note (First-Pixels M1):** this function fires ~250K/s on /// Sylpheed (1 import per 40 guest instructions). A former /// `#[tracing::instrument]` attribute + two `tracing::info!` call /// sites made up ~28% of `run_execution` wall time on a post-Tier-3 /// profile — most of it in `tracing::span::Span::new` + /// `Layered::new_span` + `ErrorLayer::on_new_span`. The span was at /// `level = "debug"` but the span **construction** happened /// unconditionally; only the emit was level-gated. Removing the /// attribute + the two `info!` lines recovers the overhead without /// losing any observability — the `metrics::counter!("kernel.calls", /// "name" => name)` below still tracks per-export counts, and /// unimplemented lookups still emit a `warn!`. pub fn call_export( &mut self, module: ModuleId, ordinal: u32, mem: &GuestMemory, ) -> bool { // The thread whose ctx we're swapping out must be addressed by // `ThreadRef`, not `hw_id` — under per-slot runqueues a bare // `hw_id` alone can't distinguish multiple threads on the same // slot, and Axis 4 migration can change the slot underneath us. let r = self .scheduler .current .expect("call_export: no current thread"); let mut ctx = std::mem::replace( self.scheduler.ctx_mut_ref(r), PpcContext::new(), ); let result = if let Some(&(name, func)) = self.exports.get(&(module, ordinal)) { metrics::counter!("kernel.calls", "name" => name).increment(1); tracing::trace!(target: "probe_calls", "hw={} call={} r3={:#x} r4={:#x} r5={:#x} lr={:#x}", r.hw_id, name, ctx.gpr[3], ctx.gpr[4], ctx.gpr[5], ctx.lr); func(&mut ctx, mem, self); true } else { metrics::counter!("kernel.unimplemented").increment(1); tracing::warn!( module = ?module, ordinal = format_args!("{:#x}", ordinal), "unimplemented kernel export" ); ctx.gpr[3] = 0; false }; // Restore the (possibly mutated) ctx by ThreadRef. Axis 4 // self-migration (KeSetAffinityThread(NtCurrentThread, ...)) // updates `scheduler.current` in place; re-read here so we // restore onto the thread's new slot, not its old one. let final_ref = self.scheduler.current.unwrap_or(r); *self.scheduler.ctx_mut_ref(final_ref) = ctx; result } /// Axis 4: `KeSetAffinityThread` orchestration. Drives the scheduler's /// migration and fixes up every `ThreadRef` held outside the /// scheduler (kernel object waiter lists, critical-section waiters, /// `interrupts.injected_ref`). Returns the previous mask. pub fn set_affinity(&mut self, handle: u32, new_mask: u8, mem: &GuestMemory) -> u8 { let Some(r) = self.scheduler.find_by_handle(handle) else { return 0; }; let (old_mask, _new_ref, fixup) = self.scheduler.set_affinity_ref( r, new_mask, &mut GuestMemoryPcr(mem), ); if let Some(fx) = fixup { use crate::objects::KernelObject; for obj in self.objects.values_mut() { match obj { KernelObject::Event { waiters, .. } | KernelObject::Semaphore { waiters, .. } | KernelObject::Thread { waiters, .. } | KernelObject::Mutex { waiters, .. } => { for w in waiters.iter_mut() { fx.apply(w); } } _ => {} } } for list in self.cs_waiters.values_mut() { for w in list.iter_mut() { fx.apply(w); } } if let Some(ref mut ir) = self.interrupts.injected_ref { fx.apply(ir); } } old_mask } /// Install the initial (main) guest thread on HW slot 0. Called once at /// startup after the app allocates the main stack/PCR/TLS blocks. pub fn install_initial_thread( &mut self, ctx: PpcContext, stack_base: u32, stack_size: u32, pcr_base: u32, tls_base: u32, thread_handle: u32, mem: &GuestMemory, ) { self.scheduler.install_initial_thread( ctx, stack_base, stack_size, pcr_base, tls_base, thread_handle, &mut GuestMemoryPcr(mem), ); } pub fn export_name(&self, module: ModuleId, ordinal: u32) -> Option<&'static str> { self.exports.get(&(module, ordinal)).map(|&(name, _)| name) } pub fn alloc_handle(&mut self) -> u32 { // AUDIT-059 R34: prefer recycling a closed slot (FIFO, matching // canary's `ObjectTable` slab) before bumping. The Arc> already serializes us; no extra synchronization. if let Some(slot) = self.free_handles.pop_front() { return slot; } // M2.4: lock-free fetch_add. Relaxed is sufficient — IDs are // opaque tokens; no payload is sequenced against the counter. self.next_handle .fetch_add(4, std::sync::atomic::Ordering::Relaxed) } /// AUDIT-059 R34. Return a freshly-closed handle slot to the FIFO /// recycle queue. No-op for the synthetic XAudio range (`>= 0xF000_0000`, /// AUDIT-048) and the reserved `< 0x1000` band. Call site: `nt_close`'s /// `objects.remove` branch when refcount reaches zero. /// /// ABA guard (subsystem-audit 2026-06-12): never recycle a slot that a /// thread is still parked on. Without this, a closed slot could be /// re-minted for a new object and a signal on that new object would wake /// the stale waiter that was blocked on the OLD object at the same slot. /// Such a slot is simply leaked (it stays out of `free_handles`), /// reproducing the pre-R34 bump-only behaviour for that rare case. pub fn release_handle_slot(&mut self, handle: u32) { if handle < 0x1000 || handle >= 0xF000_0000 { return; } if self.scheduler.any_thread_waiting_on(handle) { return; } self.free_handles.push_back(handle); } pub fn alloc_handle_for(&mut self, obj: KernelObject) -> u32 { let h = self.alloc_handle(); self.objects.insert(h, obj); // Each fresh handle starts with one logical reference (the creator). // `NtDuplicateObject` bumps this; `NtClose` decrements; the object is // only dropped when the count reaches zero. See `nt_close` for the // aliased-handle rationale. self.handle_refcount.insert(h, 1); h } // ===== Handle audit hooks ===== // // These are no-ops when `audit.enabled == false`, so call sites can // unconditionally invoke them without a hot-path branch in release builds // (the `inline` `if !enabled return` short-circuits before any work). /// Build a [`HandleAuditEntry`] describing the *current* call-site — /// captures cycle (slot-0 timebase), current `tid`, and `lr` from the /// passed `PpcContext`. fn audit_entry(&self, lr: u32, source: &'static str, aux: u64) -> HandleAuditEntry { let hw_id = self.scheduler.current_hw_id().unwrap_or(0); let cycle = self.scheduler.ctx(hw_id).timebase; let tid = self.scheduler.tid(hw_id).unwrap_or(0); HandleAuditEntry { cycle, tid, lr, source, aux } } /// Record the creation of a fresh handle. `kind` is one of the stable /// labels documented on [`crate::audit::HandleAuditTrail::kind`]. pub fn audit_create(&mut self, handle: u32, kind: &'static str, lr: u32, source: &'static str) { if !self.audit.enabled { return; } let entry = self.audit_entry(lr, source, 0); self.audit.record_create(handle, kind, entry); } /// KRNBUG-AUDIT-002. Variant of `audit_create` that additionally /// captures a 6-frame guest stack trace at allocation time when the /// handle is in `audit.focus`. Outside the focus set this falls back /// to plain `audit_create` (no stack walk → no extra cost on the hot /// path of unfocused handle creation). /// /// The walk reads the PPC EABI back-chain: `[r1] = prev_sp`, and the /// LR saved by *that* prev frame's prologue lives at `[prev_sp - 8]`. /// Frame 0 is the live frame `(ctx.gpr[1], ctx.lr)`. Frames 1..N walk /// upward. A read returning 0 / 0xFFFF_FFFF, or a self-loop, ends the /// walk early. This is read-only — guest memory and CPU state are not /// mutated, so lockstep determinism is unaffected (a parallel run with /// no focus is byte-identical to one without this code path). pub fn audit_create_with_ctx( &mut self, handle: u32, kind: &'static str, ctx: &PpcContext, mem: &GuestMemory, source: &'static str, ) { if !self.audit.enabled { return; } let lr = ctx.lr as u32; let entry = self.audit_entry(lr, source, 0); if !self.audit.focus.contains(&handle) { self.audit.record_create(handle, kind, entry); return; } let stack = walk_guest_back_chain(ctx.gpr[1] as u32, lr, mem, 6); let probes = probe_create_stack_classes(ctx, &stack, mem); self.audit .record_create_with_stack_and_probes(handle, kind, entry, stack, probes); } /// Record a Set/Pulse/Release/etc. call against a handle. `aux` is the /// previous signal state (or per-export-specific data). pub fn audit_signal(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) { if !self.audit.enabled { return; } let entry = self.audit_entry(lr, source, aux); self.audit.record_signal(handle, entry); } /// Record a `Wait*` call against a handle. `aux` packs `(alertable as u64) /// | (timeout_kind << 8)` etc. — schema is informal; the dump just prints /// it. pub fn audit_wait(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) { if !self.audit.enabled { return; } let entry = self.audit_entry(lr, source, aux); self.audit.record_wait(handle, entry); } /// Record a wake event (called from `wake_eligible_waiters`). `aux` /// is the status code stamped into the woken thread's `gpr[3]`. pub fn audit_wake(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) { if !self.audit.enabled { return; } let entry = self.audit_entry(lr, source, aux); self.audit.record_wake(handle, entry); } /// ITERATE-2C Phase D — deposit the latest scheduler instruction /// count so `nt_create_event` can compute absolute auto-signal /// deadlines. Called once per outer round from the app's /// `coord_pre_round` site. No-op when the feature env is unset. pub fn set_now_cycle_hint(&mut self, now_cycle: u64) { self.last_cycle_hint = now_cycle; } /// ITERATE-2C Phase D — register a freshly-allocated event for /// auto-signal after the configured delay, **iff** the creating /// thread matches the silph::UImpl tid=13 chain that wedges in /// audit-049. Filter: /// /// * Env `XENIA_SILPH_UI_AUTOSIGNAL_DELAY` set (= delay non-None) /// * Frame-1 LR (the guest caller's post-bl PC, walked one step up /// from the live thunk-wrapper frame) is in /// `[0x821CB15C, 0x821CB160]` — this is the `NtCreateEvent` call /// site inside `sub_821CB030+0x128`. The live `ctx.lr` is the /// thunk wrapper's return slot (e.g. `0x824a9f6c`), so we walk /// one back-chain step to reach the actual guest caller. /// * Creating thread's `start_entry == 0x821748F0` (silph trampoline) /// * Creating thread's `start_context == 0x4024a840` /// /// On match, the handle is queued with `deadline = last_cycle_hint + /// delay`. Drained by [`Self::fire_due_silph_autosignals`] from the /// outer scheduler loop. pub fn maybe_register_silph_autosignal( &mut self, handle: u32, ctx: &PpcContext, mem: &GuestMemory, ) { let Some(delay) = self.silph_autosignal_delay else { return; }; let Some((entry, start_ctx)) = self.scheduler.current_thread_entry_and_ctx() else { return; }; if entry != 0x821748F0 || start_ctx != 0x4024_a840 { return; } let frames = walk_guest_back_chain(ctx.gpr[1] as u32, ctx.lr as u32, mem, 2); let caller_lr = match frames.get(1) { Some((_, lr)) => *lr, None => return, }; if !(0x821CB15C..=0x821CB160).contains(&caller_lr) { return; } let deadline = self.last_cycle_hint.saturating_add(delay); self.silph_autosignal_pending .push(AutoSignalPending { handle, deadline_cycle: deadline }); tracing::info!( "silph autosignal: scheduled handle={:#x} caller_lr={:#x} for cycle {} (now={}, delay={})", handle, caller_lr, deadline, self.last_cycle_hint, delay, ); } /// ITERATE-2C Phase D — drain pending entries whose deadline has /// passed. Each fires by setting `Event { signaled = true }` and /// invoking the existing `wake_eligible_waiters` to release blocked /// waiters. No-op when the queue is empty (the common case). pub fn fire_due_silph_autosignals(&mut self, now_cycle: u64) { if self.silph_autosignal_pending.is_empty() { return; } let any_due = self .silph_autosignal_pending .iter() .any(|p| p.deadline_cycle <= now_cycle); if !any_due { // Diagnostic for the Phase D POC: log first time we visit // with a non-empty queue but nothing due yet. if !self.silph_autosignal_diag_logged { self.silph_autosignal_diag_logged = true; if let Some(first) = self.silph_autosignal_pending.first() { tracing::info!( "silph autosignal: tick (first visit, none due) now={} pending={} first_deadline={}", now_cycle, self.silph_autosignal_pending.len(), first.deadline_cycle, ); } } } let mut i = 0; while i < self.silph_autosignal_pending.len() { if self.silph_autosignal_pending[i].deadline_cycle <= now_cycle { let p = self.silph_autosignal_pending.swap_remove(i); let prev = match self.objects.get_mut(&p.handle) { Some(KernelObject::Event { signaled, .. }) => { let was = *signaled; *signaled = true; Some(was) } _ => None, }; tracing::info!( "silph autosignal: firing handle={:#x} prev_signaled={:?} at cycle {}", p.handle, prev, now_cycle, ); self.audit_signal(p.handle, 0, "silph_autosignal", prev.unwrap_or(false) as u64); crate::exports::wake_eligible_waiters(self, p.handle); // do not advance i — swap_remove pulled a new entry into i } else { i += 1; } } } /// Diagnostic. If the live PC for HW slot `hw_id` is in /// `self.ctor_probe_pcs`, emit a single `CTOR-PROBE` line with /// the current cycle, tid, hw_id, sp, r3, lr, plus an 8-frame /// back-chain walk. Read-only — no guest state is mutated, so a /// run with the probe set is byte-identical to one without (the /// probe only adds println noise). /// /// Intended call site: top of `worker_prologue`, after `pc` has /// been read but before any thunk-dispatch / step-block branch. /// Fires once per hit — if the same PC is reached again (e.g. /// the bridge ctor sub_8217C850 called 8 times by the static- /// init driver), it fires 8 times, which is exactly what we want /// for pool-element identification. pub fn fire_ctor_probe_if_match(&self, hw_id: u8, mem: &GuestMemory) { if self.ctor_probe_pcs.is_empty() { return; } let ctx = self.scheduler.ctx(hw_id); let pc = ctx.pc; if !self.ctor_probe_pcs.contains(&pc) { return; } let tid = self.scheduler.tid(hw_id).unwrap_or(0); let r3 = ctx.gpr[3] as u32; let lr = ctx.lr as u32; let sp = ctx.gpr[1] as u32; let cycle = ctx.cycle_count; let frames = walk_guest_back_chain(sp, lr, mem, 8); println!( "CTOR-PROBE pc={:#010x} tid={} hw={} cycle={} sp={:#010x} r3={:#010x} lr={:#010x}", pc, tid, hw_id, cycle, sp, r3, lr, ); if let Some(&(disp, off)) = self.pc_probe_consumers.get(&pc) { let field_addr = disp.wrapping_add(off); let field_val = mem.read_u32(field_addr); println!( " CTOR-PROBE consumer disp={:#010x} off={} field={:#010x} (= [disp+off])", disp, off, field_val, ); } for (i, (fp, frame_lr)) in frames.iter().enumerate() { let saved_r31 = mem.read_u32(fp.wrapping_sub(12)); let saved_r30 = mem.read_u32(fp.wrapping_sub(16)); println!( " CTOR-PROBE frame={} fp={:#010x} lr={:#010x} saved-r31={:#010x} saved-r30={:#010x}", i, fp, frame_lr, saved_r31, saved_r30, ); } } /// Diagnostic. If the live PC for HW slot `hw_id` is in /// `self.branch_probe_pcs`, emit one compact `BRANCH-PROBE` line /// with (pc, tid, hw, cycle, r3, lr, cr0.{lt,gt,eq}, cr6.{lt,gt,eq}). /// No back-chain walk — designed for tracing every conditional /// branch fire inside a candidate-gate function. Read-only. /// Lockstep digest unaffected. pub fn fire_branch_probe_if_match(&self, hw_id: u8) { if self.branch_probe_pcs.is_empty() { return; } let ctx = self.scheduler.ctx(hw_id); let pc = ctx.pc; if !self.branch_probe_pcs.contains(&pc) { return; } let tid = self.scheduler.tid(hw_id).unwrap_or(0); let r3 = ctx.gpr[3] as u32; let lr = ctx.lr as u32; let cycle = ctx.cycle_count; let cr0 = &ctx.cr[0]; let cr6 = &ctx.cr[6]; println!( "BRANCH-PROBE pc={:#010x} tid={} hw={} cycle={} r3={:#010x} lr={:#010x} cr0={}{}{} cr6={}{}{}", pc, tid, hw_id, cycle, r3, lr, if cr0.lt { 'L' } else { '.' }, if cr0.gt { 'G' } else { '.' }, if cr0.eq { 'E' } else { '.' }, if cr6.lt { 'L' } else { '.' }, if cr6.gt { 'G' } else { '.' }, if cr6.eq { 'E' } else { '.' }, ); } /// AUDIT-2BF — diagnostic. If the live PC for HW slot `hw_id` is in /// `self.audit_pc_probe_pcs`, emit a single one-line /// `AUDIT-PC-PROBE` record with (pc, tid, hw, cycle, lr, r3, r11) /// plus four guest-memory dereferences off r3: `[r3+0]` (vtable), /// `[[r3+0]+24]` (slot 6 method = bctrl target), `[r3+0x0C]` /// (auxiliary handle field), `[r3+0x30]` (embedded sub-object /// vtable field). Tuned for the silph init chain virtual-dispatch /// site at `sub_82172BA0+0x1E8` (PC `0x82172D88`). /// /// Read-only. No guest-state mutation; lockstep digest unaffected. /// Empty set is the common case → single `is_empty()` test on the /// hot path. pub fn fire_audit_pc_probe_if_match(&self, hw_id: u8, mem: &GuestMemory) { if self.audit_pc_probe_pcs.is_empty() { return; } let ctx = self.scheduler.ctx(hw_id); let pc = ctx.pc; if !self.audit_pc_probe_pcs.contains(&pc) { return; } let tid = self.scheduler.tid(hw_id).unwrap_or(0); let r3 = ctx.gpr[3] as u32; let r11 = ctx.gpr[11] as u32; let lr = ctx.lr as u32; let cycle = ctx.cycle_count; // Memory dereferences. Guest pointers may be unmapped/garbage; // `read_u32` returns 0 for unmapped pages (heap.rs:510 returns // a default), so an all-zero block in the output reliably // indicates an invalid `r3`. let vtable = mem.read_u32(r3); let slot6_method = if vtable != 0 { mem.read_u32(vtable.wrapping_add(24)) } else { 0 }; let aux_handle = mem.read_u32(r3.wrapping_add(0x0C)); let sub_vt = mem.read_u32(r3.wrapping_add(0x30)); println!( "AUDIT-PC-PROBE pc={:#010x} tid={} hw={} cycle={} lr={:#010x} r3={:#010x} r11={:#010x} \ [r3+0]={:#010x} [[r3+0]+24]={:#010x} [r3+0x0C]={:#010x} [r3+0x30]={:#010x}", pc, tid, hw_id, cycle, lr, r3, r11, vtable, slot6_method, aux_handle, sub_vt, ); // AUDIT-2BF round 14 — paired memory-read. When // `audit_mem_read_addr` is set, dereference 3 deep: singleton // pointer → vtable → vtable[0] / vtable[24]. Defensively // null-checks each level. `read_u32` returns 0 for unmapped // pages so all-zero output is the unmapped/uninitialized // signature. if let Some(addr) = self.audit_mem_read_addr { let val = mem.read_u32(addr); let vt = if val != 0 { mem.read_u32(val) } else { 0 }; let m0 = if vt != 0 { mem.read_u32(vt) } else { 0 }; let m6 = if vt != 0 { mem.read_u32(vt.wrapping_add(24)) } else { 0 }; println!( "AUDIT-MEM-READ addr={:#010x} val={:#010x} vtable={:#010x} \ vtable[0]={:#010x} vtable[24]={:#010x} pc={:#010x} tid={} cycle={}", addr, val, vt, m0, m6, pc, tid, cycle, ); } // AUDIT-052 — dump N bytes of guest memory from r3 as u32 lanes // when `audit_r3_dump_bytes` is set. Sized for the 80-byte // stack-local struct at sub_82452DC0's `r31+96` (probe is // sub_8245B000 entry where r3 IS the struct ptr). Output // format: `AUDIT-R3-DUMP pc=… r3=… +0x00=… +0x04=… …`. if let Some(n) = self.audit_r3_dump_bytes { let n = n.min(256) & !3u32; // cap 256B, 4-byte align let mut out = String::with_capacity(64 + (n as usize) * 16); use std::fmt::Write as _; let _ = write!( &mut out, "AUDIT-R3-DUMP pc={:#010x} tid={} cycle={} r3={:#010x}", pc, tid, cycle, r3, ); let mut off: u32 = 0; while off < n { let v = mem.read_u32(r3.wrapping_add(off)); let _ = write!(&mut out, " +0x{:02x}={:#010x}", off, v); off = off.wrapping_add(4); } println!("{}", out); } } /// M12 — diagnostic. If the live PC for HW slot `hw_id` is in /// `self.lr_trace_pcs`, emit one JSONL record. Format mirrors what /// xenia-canary's `--log_lr_on_pc` patch emits, plus the cycle /// counter. Read-only; lockstep digest unaffected. pub fn fire_lr_trace_if_match(&self, hw_id: u8) { if self.lr_trace_pcs.is_empty() { return; } let ctx = self.scheduler.ctx(hw_id); let pc = ctx.pc; if !self.lr_trace_pcs.contains(&pc) { return; } let tid = self.scheduler.tid(hw_id).unwrap_or(0); let r3 = ctx.gpr[3] as u32; let r4 = ctx.gpr[4] as u32; let r5 = ctx.gpr[5] as u32; let r6 = ctx.gpr[6] as u32; let lr = ctx.lr as u32; let cycle = ctx.cycle_count; let line = format!( "{{\"pc\":\"{:#010x}\",\"tid\":{},\"hw\":{},\"cycle\":{},\ \"r3\":\"{:#010x}\",\"r4\":\"{:#010x}\",\"r5\":\"{:#010x}\",\ \"r6\":\"{:#010x}\",\"lr\":\"{:#010x}\"}}\n", pc, tid, hw_id, cycle, r3, r4, r5, r6, lr, ); match &self.lr_trace_writer { Some(mu) => { if let Ok(mut f) = mu.lock() { use std::io::Write; let _ = f.write_all(line.as_bytes()); } } None => { // Stdout path; small alloc, fine for diagnostic use. print!("{line}"); } } } /// Read a TLS slot for the currently running HW thread. pub fn tls_get(&self, index: u32) -> u64 { self.scheduler.tls_get(index) } /// Write a TLS slot for the currently running HW thread. pub fn tls_set(&mut self, index: u32, value: u64) { self.scheduler.tls_set(index, value); } /// Allocate a new global TLS slot index. Grows every HW thread's /// `tls_values` array to match. pub fn tls_alloc(&mut self) -> u32 { use std::sync::atomic::Ordering; // M2.4: atomic bump. The Scheduler::tls_grow_to call still needs // a coherent post-bump value, so we read the new size from the // fetch_add return. let idx = self.next_tls_index.fetch_add(1, Ordering::Relaxed); let new_size = idx + 1; self.scheduler.tls_grow_to(new_size as usize); idx } /// Allocate guest memory from the heap bump allocator. /// Returns the base address of the allocated region. pub fn heap_alloc(&mut self, size: u32, mem: &GuestMemory) -> Option { use std::sync::atomic::Ordering; let aligned_size = (size + 0xFFF) & !0xFFF; // Page-align // M2.4: atomic bump, then verify post-bump invariants. If the // bump pushed us past the heap-region ceiling, the cursor stays // advanced — subsequent allocations also fail, matching the // pre-M2 sequential semantics. We don't try to "undo" the bump // because that opens a CAS-loop race for marginal benefit (a // failing alloc near the limit is already game-over). let base = self.heap_cursor.fetch_add(aligned_size, Ordering::Relaxed); let new_top = base.checked_add(aligned_size)?; if new_top > 0x6FFF_FFFF { return None; } let protect = xenia_memory::page_table::MemoryProtect::READ | xenia_memory::page_table::MemoryProtect::WRITE; mem.alloc(base, aligned_size, protect).ok()?; Some(base) } /// Allocate a kernel stack. pub fn stack_alloc(&mut self, size: u32, mem: &GuestMemory) -> Option { use std::sync::atomic::Ordering; let aligned_size = (size + 0xFFF) & !0xFFF; let base = self.stack_cursor.fetch_add(aligned_size, Ordering::Relaxed); let protect = xenia_memory::page_table::MemoryProtect::READ | xenia_memory::page_table::MemoryProtect::WRITE; mem.alloc(base, aligned_size, protect).ok()?; Some(base + aligned_size) // Return top of stack } // ===== Timer subsystem ===== /// Idempotent arm — removes any prior entry for `handle`, then inserts /// the new `(deadline, handle)` pair and re-sorts by deadline ascending. /// The per-`Timer` object's `deadline` field must be set separately by /// the caller (see `NtSetTimerEx` in exports.rs) — this helper only /// manages the central pending-fires list so `fire_due_timers` has a /// sorted head to peek. pub fn arm_timer(&mut self, handle: u32, deadline: u64) { self.pending_timer_fires.retain(|&(_, h)| h != handle); self.pending_timer_fires.push((deadline, handle)); self.pending_timer_fires.sort_by_key(|&(d, _)| d); } /// Idempotent disarm — strip any entry for `handle`. Safe to call /// regardless of prior state; `NtClose`, `NtCancelTimer`, and the /// periodic-rearm guard all invoke this. pub fn disarm_timer(&mut self, handle: u32) { self.pending_timer_fires.retain(|&(_, h)| h != handle); } /// Peek the earliest pending timer deadline. Paired with /// `Scheduler::earliest_wait_deadline` by the main loop's "advance to /// next event" coordination — the earlier of the two drives /// `advance_all_timebases_to`. pub fn earliest_timer_deadline(&self) -> Option { self.pending_timer_fires.first().map(|&(d, _)| d) } /// Coherent "now" basis for deadline arithmetic, gated on execution mode. /// /// In **lockstep** (`parallel_active == false`) this returns exactly the /// pre-existing per-thread `ctx(hw_id).timebase` each call site read /// before, so the deterministic lockstep trace is byte-identical (no /// golden re-baseline). In **parallel** (`parallel_active == true`) the /// per-thread timebases are incoherent (workers extract/zero their slots /// while stepping unlocked), so we return the scheduler's single /// monotonic `global_clock` instead — the basis that breaks the /// timebase-desync livelock. Callers pass the `hw_id` they would have /// used for the lockstep `ctx()` read (slot 0 for coordinator-side /// drains, the current thread's slot for in-guest waits). pub fn now_basis_at(&self, hw_id: u8) -> u64 { if self.parallel_active { self.scheduler.global_clock() } else { self.scheduler.ctx(hw_id).timebase } } /// Fire every timer whose deadline is `<= now` (derived from slot 0's /// timebase, matching `parse_timeout`'s "current thread" fallback). /// For each fire: mark the timer `signaled=true`, clear its /// `deadline`, rearm if periodic, then wake eligible waiters via /// `exports::wake_eligible_waiters`. Returns `true` iff any timer /// fired — the caller uses this to decide whether the scheduler round /// needs a follow-up `advance_to_next_wake_if_due` step. pub fn fire_due_timers(&mut self) -> bool { let now = self.now_basis_at(0); let mut fired = false; loop { let Some(&(deadline, handle)) = self.pending_timer_fires.first() else { break; }; if deadline > now { break; } self.pending_timer_fires.remove(0); // Mark signaled + capture period before any rearm so we don't // double-borrow the object while calling wake_eligible_waiters. let periodic_next = if let Some(KernelObject::Timer { signaled, deadline: obj_deadline, period_ticks, .. }) = self.objects.get_mut(&handle) { *signaled = true; *obj_deadline = None; if *period_ticks > 0 { Some(now + *period_ticks) } else { None } } else { // Closed handle — its entry lingered because disarm on // NtClose was missed, OR fire_due_timers picked up a // race. Skip silently; nothing to wake. None }; if let Some(next) = periodic_next { if let Some(KernelObject::Timer { deadline, .. }) = self.objects.get_mut(&handle) { *deadline = Some(next); } self.arm_timer(handle, next); } crate::exports::wake_eligible_waiters(self, handle); fired = true; } fired } /// Handle deadline-expiry cleanup for a thread whose wait timed out. /// Called by the main loop right after `Scheduler::advance_to_next_wake` /// returns a `Some((ref, reason))`. Stamps `STATUS_TIMEOUT` into the /// woken thread's `gpr[3]` and scrubs its `ThreadRef` out of any /// handle's waiter list so a later signal can't consume the /// auto-reset slot into a stale waiter. /// /// `BlockReason::DelayUntil` is a pure sleep and expects /// `STATUS_SUCCESS` — the default pre-populated value in /// `ke_delay_execution_thread` — so we leave `gpr[3]` alone for it. pub fn handle_timeout_wake( &mut self, r: ThreadRef, reason: xenia_cpu::scheduler::BlockReason, ) { use xenia_cpu::scheduler::BlockReason; const STATUS_TIMEOUT: u64 = 0x0000_0102; match reason { BlockReason::WaitAny { handles, .. } | BlockReason::WaitAll { handles, .. } => { self.scheduler.ctx_mut_ref(r).gpr[3] = STATUS_TIMEOUT; for h in handles { if let Some(obj) = self.objects.get_mut(&h) { if let Some(waiters) = obj.waiters_mut() { waiters.retain(|&w| w != r); } } } } BlockReason::DelayUntil(_) => { // Pure sleep → default STATUS_SUCCESS is correct; no handles // to scrub. } BlockReason::CriticalSection(cs_ptr) => { self.scheduler.ctx_mut_ref(r).gpr[3] = STATUS_TIMEOUT; if let Some(list) = self.cs_waiters.get_mut(&cs_ptr) { list.retain(|&w| w != r); } } BlockReason::Suspended => {} } } } impl Default for KernelState { fn default() -> Self { Self::new() } } /// AUDIT-054 — pick the cache root path + wipe-on-init mode for a /// fresh `KernelState`. /// /// Default behaviour matches AUDIT-038: per-process tmpdir + full /// wipe so two consecutive runs see byte-identical initial state /// (lockstep / oracle determinism). AUDIT-054 found that Sylpheed's /// `cache:\.tmp` journal-style writes append on each boot, so /// a naive persistent root makes the on-disk state self-inconsistent /// after the second boot (`runtime_error` throws from version-check /// on reload). Opt-in to persistence via env: /// * `XENIA_CACHE_ROOT=` — explicit persistent path. Caller /// is responsible for wiping when needed. /// * `XENIA_CACHE_PERSIST=1` — use `$XDG_DATA_HOME/xenia-rs/cache` /// (or `$HOME/.local/share/xenia-rs/cache`) without wiping. /// /// Returns `(root, wipe)` where `wipe = true` triggers the /// `init_cache_root` clear-then-recreate dance. fn resolve_default_cache_root() -> (std::path::PathBuf, bool) { if let Ok(p) = std::env::var("XENIA_CACHE_ROOT") { if !p.is_empty() { return (std::path::PathBuf::from(p), false); } } let persist = std::env::var("XENIA_CACHE_PERSIST") .map(|v| v == "1" || v.eq_ignore_ascii_case("true")) .unwrap_or(false); if persist { if let Ok(xdg) = std::env::var("XDG_DATA_HOME") { if !xdg.is_empty() { return ( std::path::PathBuf::from(xdg).join("xenia-rs/cache"), false, ); } } if let Ok(home) = std::env::var("HOME") { if !home.is_empty() { return ( std::path::PathBuf::from(home).join(".local/share/xenia-rs/cache"), false, ); } } } static NEXT_CACHE_ID: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0); let id = NEXT_CACHE_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed); ( std::env::temp_dir().join(format!( "xenia-rs-cache-{}-{}", std::process::id(), id )), true, ) } /// KRNBUG-AUDIT-003. Outcome of probing a guest pointer as the `this` /// of a C++ object: read `[this]` as the vtable, then attempt MSVC /// RTTI to recover the decorated class name. Pure read; lockstep-safe. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ClassReadout { /// MSVC RTTI was intact. `mangled` is the decorated name as stored /// in the TypeDescriptor (`.?AVEvent@silph@@` form). Named { vtable: u32, mangled: String }, /// `[this]` looked like a vtable pointer but RTTI was stripped (or /// the COL/TypeDescriptor chain didn't yield a printable name). /// `virtuals` are the first 4 vtable slots — resolve via the /// analysis DB's `functions` table for offline class identification. VtableOnly { vtable: u32, virtuals: [u32; 4] }, /// Either `this` itself isn't a plausible heap pointer or `[this]` /// doesn't land in the image's read-only-data range. Caller skips. NotAnObject, } /// Probe a candidate `this` pointer as a C++ object on the guest heap. /// Read-only; safe to call from the diagnostic dump path. Behaviour: /// 1. Reject non-heap candidate pointers (anything outside the user/ /// image range). /// 2. Read `[this]` as vtable; reject if it's not in the image range /// where MSVC stores read-only `vftable` symbols. /// 3. MSVC RTTI traversal: /// vtable[-4 bytes] = RTTICompleteObjectLocator* /// COL+0x0c = TypeDescriptor* /// TypeDescriptor+0x08 = mangled name (NUL-terminated ASCII) /// If every link looks plausible AND the name starts with `.?A` /// (the MSVC class-name prefix), return `Named`. /// 4. Otherwise return `VtableOnly` with the first 4 virtual slots /// so the caller can resolve method names via the analysis DB. pub fn read_class_at_this(this: u32, mem: &GuestMemory) -> ClassReadout { if !is_likely_guest_heap_ptr(this) { return ClassReadout::NotAnObject; } let vtable = mem.read_u32(this); if !is_likely_image_ptr(vtable) { return ClassReadout::NotAnObject; } let col = mem.read_u32(vtable.wrapping_sub(4)); if is_likely_image_ptr(col) { let type_desc = mem.read_u32(col.wrapping_add(12)); if is_likely_image_ptr(type_desc) { let name = read_ascii_cstring(mem, type_desc.wrapping_add(8), 128); if name.starts_with(".?A") { return ClassReadout::Named { vtable, mangled: name, }; } } } let virtuals = [ mem.read_u32(vtable), mem.read_u32(vtable.wrapping_add(4)), mem.read_u32(vtable.wrapping_add(8)), mem.read_u32(vtable.wrapping_add(12)), ]; // False-positive guard: when [this] points at the entry of a // function (e.g. the CRT static-init iterator with r31 holding a // pointer into the init-fn array), `vtable` is the function PC and // the "first virtuals" are the function's prologue *instructions* // — words like 0x7D8802A6 (`mflr r12`) which are NOT in the image // pointer range. A real C++ vtable's first slot is always a member // function pointer in the image range. Require the first slot AND // the second slot to look like image-range function pointers, // else return `NotAnObject`. if !is_likely_image_ptr(virtuals[0]) || !is_likely_image_ptr(virtuals[1]) { return ClassReadout::NotAnObject; } ClassReadout::VtableOnly { vtable, virtuals } } /// KRNBUG-AUDIT-003. At handle creation time, walk the captured frames /// and probe each frame's most-likely `this` candidates for an MSVC C++ /// class name. Returns one pre-formatted line per hit (Named or /// VtableOnly); silent on `NotAnObject` so the noise floor stays low. /// /// Candidates per frame: /// * Frame 0 (live): ctx.gpr[31] (canonical C++ `this`), ctx.gpr[30] /// (often a secondary captured `this` in nested method calls), and /// ctx.gpr[3] (the live first arg — at the moment NtCreateEvent is /// entered, this is `&Event` being constructed). /// * Frame K ≥ 1: read `[fp - 12]` and `[fp - 16]` — the standard /// PPC EABI `__savegprlr_NN` spill area where the callee's prologue /// placed the caller's r31 / r30 just before its `stwu`. So those /// slots hold the value of the function-at-frame-K's r31 / r30 /// captured at the moment IT made the bl into the next frame down. /// /// Read-only; never mutates guest state. pub fn probe_create_stack_classes( ctx: &PpcContext, frames: &[(u32, u32)], mem: &GuestMemory, ) -> Vec { let mut out = Vec::new(); for (idx, (fp, lr)) in frames.iter().enumerate() { let (raw_r31, raw_r30, raw_r3) = if idx == 0 { (ctx.gpr[31] as u32, ctx.gpr[30] as u32, ctx.gpr[3] as u32) } else { ( mem.read_u32(fp.wrapping_sub(12)), mem.read_u32(fp.wrapping_sub(16)), 0, ) }; // Emit one always-on raw line per frame so the back-chain plus // saved-register dump is captured even when the RTTI probe is // silent. Investigators can resolve the raw values offline via // the analysis DB (lookup of vtable / static-init iterator // pointers / etc. is otherwise impossible from logs alone). if idx == 0 { out.push(format!( "frame={} lr={:#010x} live r31={:#010x} r30={:#010x} r3={:#010x}", idx, lr, raw_r31, raw_r30, raw_r3, )); } else { out.push(format!( "frame={} lr={:#010x} saved-r31={:#010x} saved-r30={:#010x}", idx, lr, raw_r31, raw_r30, )); } let candidates: [(u32, &'static str); 3] = if idx == 0 { [(raw_r31, "r31"), (raw_r30, "r30"), (raw_r3, "r3")] } else { [ (raw_r31, "saved-r31"), (raw_r30, "saved-r30"), (0, ""), ] }; for (this_ptr, label) in candidates { if label.is_empty() { continue; } match read_class_at_this(this_ptr, mem) { ClassReadout::Named { vtable, mangled } => { out.push(format!( " → frame={} {}={:#010x} vtable={:#010x} class={}", idx, label, this_ptr, vtable, mangled, )); } ClassReadout::VtableOnly { vtable, virtuals } => { out.push(format!( " → frame={} {}={:#010x} vtable={:#010x} virtuals=[{:#010x},{:#010x},{:#010x},{:#010x}] (RTTI stripped)", idx, label, this_ptr, vtable, virtuals[0], virtuals[1], virtuals[2], virtuals[3], )); } ClassReadout::NotAnObject => {} } } } out } /// Heap-pointer plausibility: Xbox 360 user heap is 0x40000000–0x50000000; /// the image and read-only-data are 0x82000000–0x83000000. Allow both — /// dispatcher objects in Sylpheed live in static-init pools (image rdata) /// AND in heap-allocated singletons. fn is_likely_guest_heap_ptr(p: u32) -> bool { matches!(p, 0x4000_0000..=0x4FFF_FFFF | 0x8200_0000..=0x82FF_FFFF) } /// Image-pointer plausibility: vtables and RTTI structures live in the /// module's read-only image, which on Xbox 360 maps at 0x82000000. fn is_likely_image_ptr(p: u32) -> bool { matches!(p, 0x8200_0000..=0x82FF_FFFF) } /// Read a NUL-terminated ASCII string from guest memory, capped at /// `max` bytes. Returns the empty string on any non-printable byte /// (a cheap signal that `addr` doesn't actually point at a name). fn read_ascii_cstring(mem: &GuestMemory, addr: u32, max: usize) -> String { let mut s = String::with_capacity(max); for i in 0..max { let b = mem.read_u8(addr.wrapping_add(i as u32)); if b == 0 { return s; } if !(0x20..=0x7E).contains(&b) { return String::new(); } s.push(b as char); } s } /// Walk the PPC EABI back-chain starting from `sp` (the value in r1 at /// the moment of capture). Returns up to `max_frames` entries of /// `(frame_pointer, saved_lr)`. Index 0 is the live frame /// `(sp, live_lr)` — `live_lr` is the caller-supplied current LR, since /// it has not yet been spilled to memory by this frame's prologue. /// /// PPC convention reminder: a function's prologue stores the caller's /// LR at `[old_sp - 8]` *before* bumping `r1` down to the new frame. So /// from the live `sp`, `prev_sp = mem[sp]` and the LR saved in the /// frame above is at `mem[prev_sp - 8]`. The walk stops on a /// 0/0xFFFFFFFF/self-loop sentinel — those guard against /// uninitialized stacks and the topmost frame. /// /// This is read-only; it never mutates guest memory or CPU state. pub fn walk_guest_back_chain( sp: u32, live_lr: u32, mem: &GuestMemory, max_frames: usize, ) -> Vec<(u32, u32)> { let mut frames = Vec::with_capacity(max_frames); if max_frames == 0 { return frames; } frames.push((sp, live_lr)); let mut cur = sp; while frames.len() < max_frames { if cur == 0 || cur == 0xFFFF_FFFF { break; } let prev = mem.read_u32(cur); if prev == 0 || prev == 0xFFFF_FFFF || prev == cur { break; } let saved_lr = mem.read_u32(prev.wrapping_sub(8)); frames.push((prev, saved_lr)); cur = prev; } frames } #[cfg(test)] mod tests { use super::*; use xenia_memory::GuestMemory; /// Ten consecutive `heap_alloc(0x14)` calls must return distinct /// page-aligned addresses. A previous bug had kernel exports passing 0 as /// `size`, causing the bump allocator to return the same address every /// time — 10 "allocations" that all aliased 0x40105000 and silently /// corrupted the guest's static-constructor state. #[test] fn heap_alloc_advances_for_nonzero_size() { let mut mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); let mut seen = Vec::new(); for _ in 0..10 { let addr = state .heap_alloc(0x14, &mut mem) .expect("heap must have room for 0x14 bytes"); assert_eq!(addr & 0xFFF, 0, "heap returns page-aligned addresses"); assert!(!seen.contains(&addr), "heap returned duplicate address {addr:#x}"); seen.push(addr); } } /// `heap_alloc(0)` must not advance the cursor (it has nothing to do). /// The kernel exports that previously hit this path did so because they /// read the wrong argument register; guarded at the export boundary now. #[test] fn heap_alloc_zero_is_noop_in_cursor() { use std::sync::atomic::Ordering; let mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); let before = state.heap_cursor.load(Ordering::Relaxed); let _ = state.heap_alloc(0, &mem); let after = state.heap_cursor.load(Ordering::Relaxed); assert_eq!(before, after, "zero-size alloc must not advance heap cursor"); } /// M2.4: concurrent handle allocations must produce distinct values. /// Ten threads each allocate 100 handles via `alloc_handle`; the union /// must contain exactly 1000 distinct values, and the maximum equals /// `0x1000 + 4 * (1000 - 1)` (ascending step is 4 per the kernel /// allocator's policy). #[test] fn concurrent_alloc_handle_distinct() { use std::collections::HashSet; use std::sync::Mutex; use std::sync::atomic::{AtomicU32, Ordering}; // Use a free-standing AtomicU32 mirroring `next_handle`'s semantics; // we can't easily share `&mut KernelState` across threads. The // production code uses the same `fetch_add(4, Relaxed)` recipe. let counter = std::sync::Arc::new(AtomicU32::new(0x1000)); let collected: std::sync::Arc>> = std::sync::Arc::new(Mutex::new(HashSet::new())); let mut handles = Vec::new(); for _ in 0..10 { let c = counter.clone(); let s = collected.clone(); handles.push(std::thread::spawn(move || { let mut local = Vec::with_capacity(100); for _ in 0..100 { local.push(c.fetch_add(4, Ordering::Relaxed)); } let mut g = s.lock().unwrap(); for v in local { g.insert(v); } })); } for h in handles { h.join().unwrap(); } let set = collected.lock().unwrap(); assert_eq!( set.len(), 1000, "expected 1000 distinct handles, got {}", set.len() ); assert!(set.iter().all(|h| (h - 0x1000) % 4 == 0)); } /// KRNBUG-AUDIT-002: synthesize a 3-level back-chain in mapped guest /// memory and walk it. Verifies that frame 0 is the live-LR frame and /// that subsequent frames pull `prev_sp` from `[sp]` and the saved LR /// from `[prev_sp - 8]`. #[test] fn back_chain_walker_resolves_synthetic_frames() { let mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); let base = state.heap_alloc(0x4000, &mem).expect("scratch"); // Lay out three frames inside the scratch page. Each frame gets // its own 0x100-byte slot. Frame N's `[sp + 0]` points at frame // N+1's sp, and frame N+1's `[sp - 8]` holds the LR saved by // that frame for the call into frame N. let sp0 = base + 0x100; let sp1 = base + 0x300; let sp2 = base + 0x500; // Back-chain pointers mem.write_u32(sp0, sp1); mem.write_u32(sp1, sp2); mem.write_u32(sp2, 0); // top of stack // Saved LRs (the LR of the call that reached the *next* frame // up are stored at the next frame's sp - 8) mem.write_u32(sp1.wrapping_sub(8), 0xAAAA_BBBB); mem.write_u32(sp2.wrapping_sub(8), 0xCCCC_DDDD); let frames = walk_guest_back_chain(sp0, 0x1111_2222, &mem, 6); assert_eq!(frames.len(), 3); assert_eq!(frames[0], (sp0, 0x1111_2222)); assert_eq!(frames[1], (sp1, 0xAAAA_BBBB)); assert_eq!(frames[2], (sp2, 0xCCCC_DDDD)); } /// Walker must not loop on a self-referential back-chain (a corrupted /// frame where `[sp] == sp`). #[test] fn back_chain_walker_stops_on_self_loop() { let mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); let base = state.heap_alloc(0x1000, &mem).expect("scratch"); let sp = base + 0x100; mem.write_u32(sp, sp); // self-loop let frames = walk_guest_back_chain(sp, 0x4242_4242, &mem, 6); assert_eq!(frames.len(), 1); assert_eq!(frames[0], (sp, 0x4242_4242)); } /// Walker must terminate on the standard top-of-stack sentinel /// (`[sp] == 0`) without spilling a bogus frame. #[test] fn back_chain_walker_stops_on_zero_sentinel() { let mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); let base = state.heap_alloc(0x1000, &mem).expect("scratch"); let sp = base + 0x100; mem.write_u32(sp, 0); let frames = walk_guest_back_chain(sp, 0x8242_0000, &mem, 6); assert_eq!(frames.len(), 1); assert_eq!(frames[0], (sp, 0x8242_0000)); } /// KRNBUG-AUDIT-003: synthesize a C++ object with intact MSVC RTTI /// in mapped guest memory. The probe must traverse vtable[-4] → /// COL → TypeDescriptor and recover the decorated mangled name. #[test] fn read_class_at_this_resolves_intact_rtti() { use xenia_memory::page_table::MemoryProtect; let mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); let this = state.heap_alloc(0x40, &mem).expect("heap object"); // Map an image-range page so vtable / COL / TypeDescriptor // pointers pass `is_likely_image_ptr`. let img = 0x8280_0000u32; mem.alloc(img, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE) .expect("image-range page"); let vtable = img + 0x40; let col = img + 0x80; let type_desc = img + 0xC0; // [this] = vtable mem.write_u32(this, vtable); // vtable[-4] = COL (one word before the first virtual) mem.write_u32(vtable.wrapping_sub(4), col); // COL+0xC = TypeDescriptor mem.write_u32(col + 12, type_desc); // TypeDescriptor+8 = NUL-terminated mangled name let name = b".?AVAsyncQueue@silph@@\0"; for (i, b) in name.iter().enumerate() { mem.write_u8(type_desc + 8 + i as u32, *b); } let r = read_class_at_this(this, &mem); match r { ClassReadout::Named { vtable: v, mangled } => { assert_eq!(v, vtable); assert_eq!(mangled, ".?AVAsyncQueue@silph@@"); } other => panic!("expected Named, got {:?}", other), } } /// RTTI-stripped fallback: vtable looks plausible but vtable[-4] is /// zero. The probe must return `VtableOnly` with the first 4 virtual /// PCs so the caller can resolve method names via the analysis DB. #[test] fn read_class_at_this_falls_back_when_rtti_stripped() { use xenia_memory::page_table::MemoryProtect; let mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); let this = state.heap_alloc(0x40, &mem).expect("heap object"); let img = 0x8281_0000u32; mem.alloc(img, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE) .expect("image-range page"); let vtable = img + 0x100; mem.write_u32(this, vtable); // No COL — vtable[-4] left as zero, which fails `is_likely_image_ptr`. // Populate first four virtuals with image-range PCs. let virts = [0x8200_AAAA, 0x8201_BBBB, 0x8202_CCCC, 0x8203_DDDD]; for (i, v) in virts.iter().enumerate() { mem.write_u32(vtable + (i as u32) * 4, *v); } match read_class_at_this(this, &mem) { ClassReadout::VtableOnly { vtable: v, virtuals, } => { assert_eq!(v, vtable); assert_eq!(virtuals, virts); } other => panic!("expected VtableOnly, got {:?}", other), } } /// `this` outside the heap/image range, or `[this]` not in the image /// range, must yield `NotAnObject` so the dump skips the candidate /// without printing noise. #[test] fn read_class_at_this_rejects_non_objects() { use xenia_memory::page_table::MemoryProtect; let mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); // Out-of-range this. assert_eq!( read_class_at_this(0x0000_1234, &mem), ClassReadout::NotAnObject ); assert_eq!( read_class_at_this(0xFFFF_FFFF, &mem), ClassReadout::NotAnObject ); // In-range `this`, but [this] is zero (unmapped → reads as 0, // which is not a plausible image pointer). let this = state.heap_alloc(0x40, &mem).expect("heap object"); assert_eq!(read_class_at_this(this, &mem), ClassReadout::NotAnObject); // In-range this, [this] points into the heap range — also rejected // because vtables live in the image rdata. mem.alloc(0x4500_0000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE) .expect("aux heap page"); mem.write_u32(this, 0x4500_0080); assert_eq!(read_class_at_this(this, &mem), ClassReadout::NotAnObject); } /// `probe_create_stack_classes` is the integration of the back-chain /// walker output and the per-frame RTTI probe used at handle creation /// time. Build a minimal 2-frame scenario where frame 1's /// `[fp - 12]` saved-r31 slot points at a heap C++ object with intact /// MSVC RTTI, and verify the helper produces a `class=...` line. #[test] fn probe_create_stack_classes_recovers_saved_r31_class() { use xenia_memory::page_table::MemoryProtect; let mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); // Heap-allocate a fake `this` and lay out vtable / COL / TD in // an image-range page. let this = state.heap_alloc(0x40, &mem).expect("heap object"); let img = 0x8282_0000u32; mem.alloc(img, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE) .expect("image-range page"); let vtable = img + 0x40; let col = img + 0x80; let td = img + 0xC0; mem.write_u32(this, vtable); mem.write_u32(vtable.wrapping_sub(4), col); mem.write_u32(col + 12, td); for (i, b) in b".?AVDispatcher@silph@@\0".iter().enumerate() { mem.write_u8(td + 8 + i as u32, *b); } // Synthesize a 2-frame back-chain. Place the saved-r31 slot at // [frames[1].fp - 12] = `this`. let stack_base = state.heap_alloc(0x4000, &mem).expect("stack page"); let sp0 = stack_base + 0x100; let sp1 = stack_base + 0x300; mem.write_u32(sp1.wrapping_sub(12), this); let frames = vec![(sp0, 0x824a_9f6c), (sp1, 0x8217_8500)]; // Live ctx — r3 holds &Event (some random value, not a real // class), r31/r30 zero so frame 0 produces no hits. let mut ctx = PpcContext::new(); ctx.gpr[3] = 0x4000_BEEF; let probes = probe_create_stack_classes(&ctx, &frames, &mem); assert!(probes.iter().any(|s| s.contains(".?AVDispatcher@silph@@")), "expected probes to contain the dispatcher class, got {:?}", probes); assert!(probes.iter().any(|s| s.contains("frame=1")), "expected at least one frame=1 line, got {:?}", probes); } /// A NUL-terminated ASCII string is read up to `max`; non-printable /// bytes mark the candidate as bogus (return empty string). The /// `.?A` prefix gating in `read_class_at_this` then rejects them. /// `fire_ctor_probe_if_match` only emits when `pc` matches a /// configured PC. We assert it's a no-op on miss and a no-panic /// on hit (the println goes to stdout; we just check the helper /// reads the back-chain without faulting). #[test] fn fire_ctor_probe_if_match_no_op_on_empty_set() { let mem = GuestMemory::new().expect("memory init"); let state = KernelState::new(); // No probes set → must be a no-op even when the scheduler // ctx has whatever PC. state.fire_ctor_probe_if_match(0, &mem); assert!(state.ctor_probe_pcs.is_empty()); } #[test] fn fire_ctor_probe_if_match_only_fires_on_listed_pc() { // We can't easily redirect stdout under cargo-test, so this // test mostly verifies the membership check + that no panic // occurs when frame walking encounters zero/sentinel pages. // The empty-stack walk returns just `[(sp, lr)]`, exercising // the loop body once safely. let mem = GuestMemory::new().expect("memory init"); let mut state = KernelState::new(); state.ctor_probe_pcs.insert(0x8217_C850); // The default PpcContext on slot 0 has pc=0 (idle sentinel), // so the probe set membership test misses → no fire. state.fire_ctor_probe_if_match(0, &mem); // Sanity: an unrelated PC isn't claimed. assert!(!state.ctor_probe_pcs.contains(&0x8200_0000)); assert!(state.ctor_probe_pcs.contains(&0x8217_C850)); } #[test] fn read_ascii_cstring_handles_termination_and_garbage() { use xenia_memory::page_table::MemoryProtect; let mem = GuestMemory::new().expect("memory init"); mem.alloc(0x4000_0000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE) .expect("page"); let addr = 0x4000_0100u32; // Plain NUL-terminated. mem.write_bytes(addr, b"hello\0world"); assert_eq!(read_ascii_cstring(&mem, addr, 32), "hello"); // Non-printable byte should reject the read. mem.write_u8(addr, 0x01); assert_eq!(read_ascii_cstring(&mem, addr, 32), ""); } }