Adds a read-only MSVC RTTI traversal helper (`read_class_at_this`)
and a `probe_create_stack_classes` integration that walks each
captured back-chain frame for handle creates in `--trace-handles-focus`
and probes each frame's most-likely `this` candidate (live r31/r30/r3
for frame 0; saved-r31/r30 from the prologue spill area at [fp-12]/
[fp-16] for deeper frames). False-positive guard rejects the CRT
static-init iterator pattern (vtable's first two slots must be image-
range function pointers — PPC instruction words like `mflr r12` are
not in 0x82xxxxxx).
`dump_thread_diagnostic` now takes `&GuestMemory` so the FOCUS report
prints, for each parked waiter, a WAIT-THREAD block with full back-
chain frames and per-slot saved-register dump for offline lookup.
End-to-end finding (-n 500M producer-trace):
* Handle 0x100c dispatcher = 0x828F3D08 (image rdata; verified by
sub_82181750 disasm + xref table). [this+0] = -1 sentinel — POD
job queue, NOT a C++ polymorphic class.
* Handle 0x15e0 dispatcher = 0x828F4070 (same shape).
* Handle 0x1004's 8-instance pool members still TBD (MSVC ctors
didn't preserve `this` in r31).
* 0x42450b5c is a separate audit class (heap-allocated, parks via
non-`do_wait_single` path).
Decisive xref audit: every reference to 0x828F3D08 / 0x828F4070 in
the static analysis is in a ctor or the CRT init driver. NO producer
code references either dispatcher base. Confirms `signal_attempts=0`
is unreachable-producer, not broken-producer.
Tests: 581 → 586 green (+5: RTTI-intact / RTTI-stripped / non-object
/ cstring / probe_create_stack integration). `--stable-digest -n
100M` instructions=100000002 unchanged. Master HEAD prior: 6440261.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1249 lines
55 KiB
Rust
1249 lines
55 KiB
Rust
use std::collections::HashMap;
|
||
use xenia_cpu::scheduler::{PcrWriter, Scheduler};
|
||
use xenia_cpu::{PpcContext, ThreadRef};
|
||
use xenia_memory::{GuestMemory, MemoryAccess};
|
||
use xenia_vfs::VfsDevice;
|
||
|
||
use crate::audit::{HandleAudit, HandleAuditEntry};
|
||
use crate::objects::KernelObject;
|
||
use crate::ui_bridge::UiBridge;
|
||
|
||
/// Adapter: write PCR+0x2C on guest memory. Lets `Scheduler::spawn` and
|
||
/// Axis 4's migration call through without `xenia-cpu` depending on the
|
||
/// memory crate.
|
||
pub struct GuestMemoryPcr<'a>(pub &'a GuestMemory);
|
||
impl PcrWriter for GuestMemoryPcr<'_> {
|
||
fn write_pcr_id(&mut self, pcr_base: u32, hw_id: u8) {
|
||
// `GuestMemory::write_u32` takes `&self` post-M2 trait flip; the
|
||
// wrapping `&'a GuestMemory` is sufficient.
|
||
self.0.write_u32(pcr_base + 0x2C, hw_id as u32);
|
||
}
|
||
}
|
||
|
||
/// Function signature for HLE kernel exports.
|
||
///
|
||
/// The first argument is the **currently running** HW thread's `PpcContext`,
|
||
/// which the caller has temporarily moved out of the scheduler slot to avoid
|
||
/// aliasing. Exports that only touch register/GPR state use `ctx` directly;
|
||
/// exports that need scheduler state (spawn/park/wake/tls/etc.) reach
|
||
/// through `state.scheduler` — note that `state.scheduler.hw_threads[current]`
|
||
/// holds a placeholder `PpcContext` for the duration of the call, not the
|
||
/// live one passed as `ctx`.
|
||
pub type KernelExportFn = fn(&mut PpcContext, &GuestMemory, &mut KernelState);
|
||
|
||
/// Module identifier for kernel exports.
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||
pub enum ModuleId {
|
||
Xboxkrnl,
|
||
Xam,
|
||
Xbdm,
|
||
}
|
||
|
||
/// Pseudo-`HMODULE` values returned by `XexGetModuleHandle` and accepted by
|
||
/// `XexGetProcedureAddress`. Distinct from real loaded-image bases
|
||
/// (>=0x82000000) and from kernel handles (0x1000+, allocated by
|
||
/// `alloc_handle`). The 0xFFFE_xxxx prefix is unused by both guest segments
|
||
/// and our handle allocator.
|
||
pub const HMODULE_XBOXKRNL: u32 = 0xFFFE_0001;
|
||
pub const HMODULE_XAM: u32 = 0xFFFE_0002;
|
||
|
||
/// Central kernel state tracking all guest OS state.
|
||
pub struct KernelState {
|
||
exports: HashMap<(ModuleId, u32), (&'static str, KernelExportFn)>,
|
||
/// M2.4: bump allocator for kernel handles. `AtomicU32` so concurrent
|
||
/// HLE calls under M3 can `fetch_add` without a lock. `Relaxed` is
|
||
/// fine — the allocated value is a fresh ID with no prior payload to
|
||
/// publish; observers (the kernel object table) are guarded by
|
||
/// their own synchronization.
|
||
next_handle: std::sync::atomic::AtomicU32,
|
||
/// Scheduler managing all emulated HW threads + their per-slot
|
||
/// runqueues. Starts empty — the app installs the initial guest thread
|
||
/// on slot 0 via `KernelState::install_initial_thread` once it has the
|
||
/// entry address.
|
||
pub scheduler: Scheduler,
|
||
/// TLS slot allocator — index counter only. Per-thread *values* live on
|
||
/// `GuestThread::tls_values` (see scheduler). M2.4: `AtomicU32`.
|
||
pub next_tls_index: std::sync::atomic::AtomicU32,
|
||
/// Critical-section waiter map: guest `cs_ptr` → guest threads parked
|
||
/// on it. Critical sections are in guest memory (not kernel objects),
|
||
/// so their waiter list lives here rather than on an object.
|
||
pub cs_waiters: HashMap<u32, Vec<ThreadRef>>,
|
||
/// Kernel object table: handle → object
|
||
pub objects: HashMap<u32, KernelObject>,
|
||
/// Bump allocator for guest heap (NtAllocateVirtualMemory etc.).
|
||
/// M2.4: `AtomicU32` for lock-free concurrent allocation.
|
||
pub heap_cursor: std::sync::atomic::AtomicU32,
|
||
/// Stack allocator cursor for MmCreateKernelStack. M2.4: atomic.
|
||
pub stack_cursor: std::sync::atomic::AtomicU32,
|
||
/// GPU command buffer address (set by VdGetSystemCommandBuffer)
|
||
pub gpu_command_buffer: u32,
|
||
/// GPU backend. M1.4: was `xenia_gpu::GpuSystem` directly, now a
|
||
/// [`xenia_gpu::GpuBackend`] enum so the kernel can hold either an
|
||
/// inline `GpuSystem` (synchronous, default) or a `GpuHandle` proxy
|
||
/// pointing at a worker thread (`--gpu-thread`). Forwarding methods
|
||
/// on the enum keep call sites in [`crate::exports`] terse.
|
||
pub gpu: xenia_gpu::GpuBackend,
|
||
/// Monotonic packet number returned by `XamInputGetState`. Games detect
|
||
/// input changes by watching this increment.
|
||
pub input_packet_number: u32,
|
||
/// Previous gamepad snapshot; `input_packet_number` only advances when
|
||
/// the state bytes actually change, matching host XInput semantics.
|
||
pub last_input_bytes: u128,
|
||
/// Image base of the loaded XEX (for XexExecutableModuleHandle etc.)
|
||
pub image_base: u32,
|
||
/// Next thread ID. M2.4: atomic.
|
||
pub next_thread_id: std::sync::atomic::AtomicU32,
|
||
/// Virtual file system for NtCreateFile/NtReadFile/etc. The app mounts
|
||
/// the disc image or host directory into this slot; file I/O handlers
|
||
/// route all reads through it.
|
||
pub vfs: Option<Box<dyn VfsDevice>>,
|
||
/// Bridge to the host UI. `None` when running headless. Installed by
|
||
/// `cmd_exec` when the user passes `--ui`.
|
||
pub ui: Option<UiBridge>,
|
||
/// P6 — graphics interrupt + synthetic v-sync bookkeeping. Registers
|
||
/// the callback set by `VdSetGraphicsInterruptCallback` and tracks
|
||
/// the paused-context snapshot while HW thread 0 is running it.
|
||
pub interrupts: crate::interrupts::InterruptState,
|
||
/// XAudio render-driver clients + buffer-complete callback ticker.
|
||
/// Mirrors canary's [`xenia/apu/audio_system.cc`] worker — registered
|
||
/// guest callbacks can fire at the audio frame rate so guest threads
|
||
/// parked on audio-buffer events get woken (APUBUG-PRODUCER-001).
|
||
/// Shares the [`crate::interrupts::InterruptState::saved`] /
|
||
/// `injected_ref` slot at injection time; mutual exclusion with
|
||
/// graphics interrupts is enforced by the injector's
|
||
/// `is_in_callback()` guard.
|
||
pub xaudio: crate::xaudio::XAudioState,
|
||
/// Default false. When true, the round prologue runs the XAudio
|
||
/// ticker + `try_inject_audio_callback`. Off by default because the
|
||
/// callback firing shifts the boot trajectory under Sylpheed
|
||
/// (regresses `swaps=2`→`1` and 12×s `imports`), which would break
|
||
/// the `sylpheed_n*m.json` lockstep goldens. Flipped on by
|
||
/// `--xaudio-tick` / `XENIA_XAUDIO_TICK=1` for the diagnostic
|
||
/// producer-hunt path.
|
||
pub xaudio_tick_enabled: bool,
|
||
/// Per-handle refcount. Since `NtDuplicateObject` aliases (returns the
|
||
/// source handle value as the "new" handle rather than minting a fresh
|
||
/// id), a single handle commonly has multiple logical references. This
|
||
/// map tracks that count so a stray `NtClose` on one reference doesn't
|
||
/// destroy the object while another reference is still live. Canary's
|
||
/// `ObjectTable::ReleaseHandle` (object_table.cc:189) is the parity
|
||
/// reference. Initialized to 1 in `alloc_handle_for`; incremented in
|
||
/// `nt_duplicate_object` when `DUPLICATE_CLOSE_SOURCE` is absent;
|
||
/// decremented in `nt_close` which drops the underlying object only
|
||
/// when the count reaches zero.
|
||
pub handle_refcount: HashMap<u32, u32>,
|
||
/// Pending timer expirations — `(deadline, handle)` sorted ascending by
|
||
/// deadline. Pushed by `arm_timer`, popped by `fire_due_timers`. Kept in
|
||
/// lockstep with the per-`Timer` object's `deadline` field via the
|
||
/// `arm_timer`/`disarm_timer` helpers. See the plan's step 3/6 for the
|
||
/// design rationale — timer deadlines coexist with
|
||
/// `Scheduler::timed_waits` but track a different class (signaled object
|
||
/// fires, not thread wake-ups).
|
||
pub pending_timer_fires: Vec<(u64, u32)>,
|
||
/// Per-handle signal/wait/wake audit trail. Default `enabled=false` →
|
||
/// every record method is a no-op. Flip via `--trace-handles`/
|
||
/// `XENIA_TRACE_HANDLES` to diagnose missing-signal deadlocks (handles
|
||
/// 0x10FC / 0x1014 / 0x1104 / 0x10DC / 0x10F0 specifically). See
|
||
/// [`crate::audit`] for layout.
|
||
pub audit: HandleAudit,
|
||
/// M2.2 — banked reservation table for `lwarx`/`stwcx.` under M3's
|
||
/// per-HW-thread parallelism. Always allocated. Consulted by the
|
||
/// interpreter when `reservations.is_enabled()` is true; otherwise
|
||
/// the legacy per-`PpcContext` fields drive observable behavior.
|
||
/// Settable via `--reservations-table` / `XENIA_RESERVATIONS_TABLE=1`
|
||
/// for golden verification, or implicitly under `--parallel`.
|
||
/// See [`xenia_cpu::ReservationTable`] for the concurrency model.
|
||
pub reservations: std::sync::Arc<xenia_cpu::ReservationTable>,
|
||
/// True when the runtime was started with `--parallel`. Read by the
|
||
/// v-sync ticker (KRNBUG-D08): lockstep uses the deterministic
|
||
/// instruction-count proxy so the `sylpheed_n*m.json` goldens stay
|
||
/// bit-stable; `--parallel` uses wall-clock so the rate doesn't
|
||
/// drop to ~2 v-syncs / 100M as the instruction-count proxy did.
|
||
/// Set once at startup and never mutated.
|
||
pub parallel_active: bool,
|
||
/// Map from `(module, ordinal)` to the guest-side import-thunk address
|
||
/// resolved at load time. Reverse of `xenia-app/src/main.rs`'s
|
||
/// `thunk_map`. Populated from xenia-app's Phase 1 (record_type==1
|
||
/// only). Used by `xex_get_procedure_address` to resolve ordinals back
|
||
/// to callable thunks.
|
||
thunks_by_ordinal: HashMap<(ModuleId, u16), u32>,
|
||
/// First-Pixels diagnostic latch. Set the first time
|
||
/// `RtlRaiseException` fires with code `0xE06D7363` (MSVC C++ throw)
|
||
/// so the deep stack-walk + `runtime_error` decode in
|
||
/// `rtl_raise_exception` only emits once per run, regardless of how
|
||
/// many subsequent throws fire. Reset on each fresh process start.
|
||
pub cxx_throw_logged: bool,
|
||
/// Cached primary ring base/size, set during `VdInitializeRingBuffer`.
|
||
/// Used by `vd_swap` (KRNBUG-Vd-04) so the kernel can write PM4
|
||
/// packets directly into ring memory without going through the GPU
|
||
/// backend (which lives on the worker thread under `--gpu-thread`).
|
||
pub ring_base: u32,
|
||
pub ring_size_dwords: u32,
|
||
}
|
||
|
||
impl KernelState {
|
||
/// Construct a kernel with the supplied GPU backend.
|
||
///
|
||
/// The caller (typically `cmd_exec_inner`) decides whether to install
|
||
/// an inline backend (default) or a threaded one (`--gpu-thread`).
|
||
/// Most existing call sites build via [`Self::new`], which defaults to
|
||
/// an inline backend; the threaded constructor lives at
|
||
/// [`Self::with_gpu`].
|
||
pub fn with_gpu(gpu: xenia_gpu::GpuBackend) -> Self {
|
||
// Scheduler starts empty; the app installs the initial thread on
|
||
// slot 0 via `install_initial_thread` right after construction.
|
||
let mut scheduler = Scheduler::new();
|
||
use std::sync::atomic::AtomicU32;
|
||
let reservations = std::sync::Arc::new(xenia_cpu::ReservationTable::new());
|
||
// M3.7 — wire the reservation table to the scheduler so
|
||
// `spawn`/`install_initial_thread` populate every PpcContext's
|
||
// `reservation_table` clone. The table is `disabled` by
|
||
// default; `--reservations-table` / `XENIA_RESERVATIONS_TABLE`
|
||
// / M3 spawn flip it on.
|
||
scheduler.set_reservation_table(Some(reservations.clone()));
|
||
let mut state = Self {
|
||
exports: HashMap::new(),
|
||
next_handle: AtomicU32::new(0x1000),
|
||
scheduler,
|
||
next_tls_index: AtomicU32::new(0),
|
||
cs_waiters: HashMap::new(),
|
||
objects: HashMap::new(),
|
||
heap_cursor: AtomicU32::new(0x4000_0000), // Start of user heap region
|
||
stack_cursor: AtomicU32::new(0x7100_0000), // Above main stack
|
||
gpu_command_buffer: 0,
|
||
gpu,
|
||
input_packet_number: 0,
|
||
last_input_bytes: 0,
|
||
image_base: 0,
|
||
next_thread_id: AtomicU32::new(1),
|
||
vfs: None,
|
||
ui: None,
|
||
interrupts: crate::interrupts::InterruptState::default(),
|
||
xaudio: crate::xaudio::XAudioState::default(),
|
||
xaudio_tick_enabled: false,
|
||
handle_refcount: HashMap::new(),
|
||
pending_timer_fires: Vec::new(),
|
||
audit: HandleAudit::default(),
|
||
reservations,
|
||
thunks_by_ordinal: HashMap::new(),
|
||
cxx_throw_logged: false,
|
||
ring_base: 0,
|
||
ring_size_dwords: 0,
|
||
parallel_active: false,
|
||
};
|
||
crate::exports::register_exports(&mut state);
|
||
crate::xam::register_exports(&mut state);
|
||
state
|
||
}
|
||
|
||
/// Default constructor — installs an inline `GpuSystem`. Kept for
|
||
/// callers that don't (yet) thread a `GpuBackend` choice through.
|
||
pub fn new() -> Self {
|
||
Self::with_gpu(xenia_gpu::GpuBackend::Inline(xenia_gpu::GpuSystem::new()))
|
||
}
|
||
|
||
pub fn register_export(
|
||
&mut self,
|
||
module: ModuleId,
|
||
ordinal: u32,
|
||
name: &'static str,
|
||
func: KernelExportFn,
|
||
) {
|
||
self.exports.insert((module, ordinal), (name, func));
|
||
}
|
||
|
||
/// Record an import-thunk address resolved at load time. Called once
|
||
/// per `record_type==1` import in xenia-app's Phase 1. Idempotent: a
|
||
/// duplicate ordinal overwrites (later wins; in practice the loader
|
||
/// emits each ordinal once per module).
|
||
pub fn register_thunk(&mut self, module: ModuleId, ordinal: u16, address: u32) {
|
||
self.thunks_by_ordinal.insert((module, ordinal), address);
|
||
}
|
||
|
||
/// Resolve a `(module, ordinal)` to its registered thunk address.
|
||
pub fn resolve_thunk(&self, module: ModuleId, ordinal: u16) -> Option<u32> {
|
||
self.thunks_by_ordinal.get(&(module, ordinal)).copied()
|
||
}
|
||
|
||
/// Map a pseudo-`HMODULE` (as returned by `XexGetModuleHandle`) back
|
||
/// to its `ModuleId`. Returns `None` for unknown handles, including
|
||
/// the loaded XEX's `image_base` (which is *not* a kernel module).
|
||
pub fn module_id_from_hmodule(&self, handle: u32) -> Option<ModuleId> {
|
||
match handle {
|
||
HMODULE_XBOXKRNL => Some(ModuleId::Xboxkrnl),
|
||
HMODULE_XAM => Some(ModuleId::Xam),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
/// Dispatch a kernel export on the current HW thread. Uses `mem::replace`
|
||
/// to temporarily move the active `PpcContext` out of its scheduler slot,
|
||
/// so the export function can receive `&mut ctx` while also getting
|
||
/// `&mut self` (which contains the scheduler). Without this, the export
|
||
/// signature would have to avoid aliasing via a bundle struct — see the
|
||
/// approved plan's ExportCtx section for the alternative we rejected.
|
||
///
|
||
/// While the export runs, `scheduler.hw_threads[current_hw_id].ctx` holds
|
||
/// a freshly-constructed placeholder. Exports that reach through
|
||
/// `state.scheduler` must not touch the current slot's `ctx` field.
|
||
///
|
||
/// **Perf note (First-Pixels M1):** this function fires ~250K/s on
|
||
/// Sylpheed (1 import per 40 guest instructions). A former
|
||
/// `#[tracing::instrument]` attribute + two `tracing::info!` call
|
||
/// sites made up ~28% of `run_execution` wall time on a post-Tier-3
|
||
/// profile — most of it in `tracing::span::Span::new` +
|
||
/// `Layered::new_span` + `ErrorLayer::on_new_span`. The span was at
|
||
/// `level = "debug"` but the span **construction** happened
|
||
/// unconditionally; only the emit was level-gated. Removing the
|
||
/// attribute + the two `info!` lines recovers the overhead without
|
||
/// losing any observability — the `metrics::counter!("kernel.calls",
|
||
/// "name" => name)` below still tracks per-export counts, and
|
||
/// unimplemented lookups still emit a `warn!`.
|
||
pub fn call_export(
|
||
&mut self,
|
||
module: ModuleId,
|
||
ordinal: u32,
|
||
mem: &GuestMemory,
|
||
) -> bool {
|
||
// The thread whose ctx we're swapping out must be addressed by
|
||
// `ThreadRef`, not `hw_id` — under per-slot runqueues a bare
|
||
// `hw_id` alone can't distinguish multiple threads on the same
|
||
// slot, and Axis 4 migration can change the slot underneath us.
|
||
let r = self
|
||
.scheduler
|
||
.current
|
||
.expect("call_export: no current thread");
|
||
let mut ctx = std::mem::replace(
|
||
self.scheduler.ctx_mut_ref(r),
|
||
PpcContext::new(),
|
||
);
|
||
|
||
let result = if let Some(&(name, func)) = self.exports.get(&(module, ordinal)) {
|
||
metrics::counter!("kernel.calls", "name" => name).increment(1);
|
||
tracing::trace!(target: "probe_calls", "hw={} call={} r3={:#x} r4={:#x} r5={:#x} lr={:#x}",
|
||
r.hw_id, name, ctx.gpr[3], ctx.gpr[4], ctx.gpr[5], ctx.lr);
|
||
func(&mut ctx, mem, self);
|
||
true
|
||
} else {
|
||
metrics::counter!("kernel.unimplemented").increment(1);
|
||
tracing::warn!(
|
||
module = ?module,
|
||
ordinal = format_args!("{:#x}", ordinal),
|
||
"unimplemented kernel export"
|
||
);
|
||
ctx.gpr[3] = 0;
|
||
false
|
||
};
|
||
|
||
// Restore the (possibly mutated) ctx by ThreadRef. Axis 4
|
||
// self-migration (KeSetAffinityThread(NtCurrentThread, ...))
|
||
// updates `scheduler.current` in place; re-read here so we
|
||
// restore onto the thread's new slot, not its old one.
|
||
let final_ref = self.scheduler.current.unwrap_or(r);
|
||
*self.scheduler.ctx_mut_ref(final_ref) = ctx;
|
||
result
|
||
}
|
||
|
||
/// Axis 4: `KeSetAffinityThread` orchestration. Drives the scheduler's
|
||
/// migration and fixes up every `ThreadRef` held outside the
|
||
/// scheduler (kernel object waiter lists, critical-section waiters,
|
||
/// `interrupts.injected_ref`). Returns the previous mask.
|
||
pub fn set_affinity(&mut self, handle: u32, new_mask: u8, mem: &GuestMemory) -> u8 {
|
||
let Some(r) = self.scheduler.find_by_handle(handle) else {
|
||
return 0;
|
||
};
|
||
let (old_mask, _new_ref, fixup) = self.scheduler.set_affinity_ref(
|
||
r,
|
||
new_mask,
|
||
&mut GuestMemoryPcr(mem),
|
||
);
|
||
if let Some(fx) = fixup {
|
||
use crate::objects::KernelObject;
|
||
for obj in self.objects.values_mut() {
|
||
match obj {
|
||
KernelObject::Event { waiters, .. }
|
||
| KernelObject::Semaphore { waiters, .. }
|
||
| KernelObject::Thread { waiters, .. }
|
||
| KernelObject::Mutex { waiters, .. } => {
|
||
for w in waiters.iter_mut() {
|
||
fx.apply(w);
|
||
}
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
for list in self.cs_waiters.values_mut() {
|
||
for w in list.iter_mut() {
|
||
fx.apply(w);
|
||
}
|
||
}
|
||
if let Some(ref mut ir) = self.interrupts.injected_ref {
|
||
fx.apply(ir);
|
||
}
|
||
}
|
||
old_mask
|
||
}
|
||
|
||
/// Install the initial (main) guest thread on HW slot 0. Called once at
|
||
/// startup after the app allocates the main stack/PCR/TLS blocks.
|
||
pub fn install_initial_thread(
|
||
&mut self,
|
||
ctx: PpcContext,
|
||
stack_base: u32,
|
||
stack_size: u32,
|
||
pcr_base: u32,
|
||
tls_base: u32,
|
||
thread_handle: u32,
|
||
mem: &GuestMemory,
|
||
) {
|
||
self.scheduler.install_initial_thread(
|
||
ctx,
|
||
stack_base,
|
||
stack_size,
|
||
pcr_base,
|
||
tls_base,
|
||
thread_handle,
|
||
&mut GuestMemoryPcr(mem),
|
||
);
|
||
}
|
||
|
||
pub fn export_name(&self, module: ModuleId, ordinal: u32) -> Option<&'static str> {
|
||
self.exports.get(&(module, ordinal)).map(|&(name, _)| name)
|
||
}
|
||
|
||
pub fn alloc_handle(&mut self) -> u32 {
|
||
// M2.4: lock-free fetch_add. Relaxed is sufficient — IDs are
|
||
// opaque tokens; no payload is sequenced against the counter.
|
||
self.next_handle
|
||
.fetch_add(4, std::sync::atomic::Ordering::Relaxed)
|
||
}
|
||
|
||
pub fn alloc_handle_for(&mut self, obj: KernelObject) -> u32 {
|
||
let h = self.alloc_handle();
|
||
self.objects.insert(h, obj);
|
||
// Each fresh handle starts with one logical reference (the creator).
|
||
// `NtDuplicateObject` bumps this; `NtClose` decrements; the object is
|
||
// only dropped when the count reaches zero. See `nt_close` for the
|
||
// aliased-handle rationale.
|
||
self.handle_refcount.insert(h, 1);
|
||
h
|
||
}
|
||
|
||
// ===== Handle audit hooks =====
|
||
//
|
||
// These are no-ops when `audit.enabled == false`, so call sites can
|
||
// unconditionally invoke them without a hot-path branch in release builds
|
||
// (the `inline` `if !enabled return` short-circuits before any work).
|
||
|
||
/// Build a [`HandleAuditEntry`] describing the *current* call-site —
|
||
/// captures cycle (slot-0 timebase), current `tid`, and `lr` from the
|
||
/// passed `PpcContext`.
|
||
fn audit_entry(&self, lr: u32, source: &'static str, aux: u64) -> HandleAuditEntry {
|
||
let hw_id = self.scheduler.current_hw_id().unwrap_or(0);
|
||
let cycle = self.scheduler.ctx(hw_id).timebase;
|
||
let tid = self.scheduler.tid(hw_id).unwrap_or(0);
|
||
HandleAuditEntry { cycle, tid, lr, source, aux }
|
||
}
|
||
|
||
/// Record the creation of a fresh handle. `kind` is one of the stable
|
||
/// labels documented on [`crate::audit::HandleAuditTrail::kind`].
|
||
pub fn audit_create(&mut self, handle: u32, kind: &'static str, lr: u32, source: &'static str) {
|
||
if !self.audit.enabled {
|
||
return;
|
||
}
|
||
let entry = self.audit_entry(lr, source, 0);
|
||
self.audit.record_create(handle, kind, entry);
|
||
}
|
||
|
||
/// KRNBUG-AUDIT-002. Variant of `audit_create` that additionally
|
||
/// captures a 6-frame guest stack trace at allocation time when the
|
||
/// handle is in `audit.focus`. Outside the focus set this falls back
|
||
/// to plain `audit_create` (no stack walk → no extra cost on the hot
|
||
/// path of unfocused handle creation).
|
||
///
|
||
/// The walk reads the PPC EABI back-chain: `[r1] = prev_sp`, and the
|
||
/// LR saved by *that* prev frame's prologue lives at `[prev_sp - 8]`.
|
||
/// Frame 0 is the live frame `(ctx.gpr[1], ctx.lr)`. Frames 1..N walk
|
||
/// upward. A read returning 0 / 0xFFFF_FFFF, or a self-loop, ends the
|
||
/// walk early. This is read-only — guest memory and CPU state are not
|
||
/// mutated, so lockstep determinism is unaffected (a parallel run with
|
||
/// no focus is byte-identical to one without this code path).
|
||
pub fn audit_create_with_ctx(
|
||
&mut self,
|
||
handle: u32,
|
||
kind: &'static str,
|
||
ctx: &PpcContext,
|
||
mem: &GuestMemory,
|
||
source: &'static str,
|
||
) {
|
||
if !self.audit.enabled {
|
||
return;
|
||
}
|
||
let lr = ctx.lr as u32;
|
||
let entry = self.audit_entry(lr, source, 0);
|
||
if !self.audit.focus.contains(&handle) {
|
||
self.audit.record_create(handle, kind, entry);
|
||
return;
|
||
}
|
||
let stack = walk_guest_back_chain(ctx.gpr[1] as u32, lr, mem, 6);
|
||
let probes = probe_create_stack_classes(ctx, &stack, mem);
|
||
self.audit
|
||
.record_create_with_stack_and_probes(handle, kind, entry, stack, probes);
|
||
}
|
||
|
||
/// Record a Set/Pulse/Release/etc. call against a handle. `aux` is the
|
||
/// previous signal state (or per-export-specific data).
|
||
pub fn audit_signal(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) {
|
||
if !self.audit.enabled {
|
||
return;
|
||
}
|
||
let entry = self.audit_entry(lr, source, aux);
|
||
self.audit.record_signal(handle, entry);
|
||
}
|
||
|
||
/// Record a `Wait*` call against a handle. `aux` packs `(alertable as u64)
|
||
/// | (timeout_kind << 8)` etc. — schema is informal; the dump just prints
|
||
/// it.
|
||
pub fn audit_wait(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) {
|
||
if !self.audit.enabled {
|
||
return;
|
||
}
|
||
let entry = self.audit_entry(lr, source, aux);
|
||
self.audit.record_wait(handle, entry);
|
||
}
|
||
|
||
/// Record a wake event (called from `wake_eligible_waiters`). `aux`
|
||
/// is the status code stamped into the woken thread's `gpr[3]`.
|
||
pub fn audit_wake(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) {
|
||
if !self.audit.enabled {
|
||
return;
|
||
}
|
||
let entry = self.audit_entry(lr, source, aux);
|
||
self.audit.record_wake(handle, entry);
|
||
}
|
||
|
||
/// Read a TLS slot for the currently running HW thread.
|
||
pub fn tls_get(&self, index: u32) -> u64 {
|
||
self.scheduler.tls_get(index)
|
||
}
|
||
|
||
/// Write a TLS slot for the currently running HW thread.
|
||
pub fn tls_set(&mut self, index: u32, value: u64) {
|
||
self.scheduler.tls_set(index, value);
|
||
}
|
||
|
||
/// Allocate a new global TLS slot index. Grows every HW thread's
|
||
/// `tls_values` array to match.
|
||
pub fn tls_alloc(&mut self) -> u32 {
|
||
use std::sync::atomic::Ordering;
|
||
// M2.4: atomic bump. The Scheduler::tls_grow_to call still needs
|
||
// a coherent post-bump value, so we read the new size from the
|
||
// fetch_add return.
|
||
let idx = self.next_tls_index.fetch_add(1, Ordering::Relaxed);
|
||
let new_size = idx + 1;
|
||
self.scheduler.tls_grow_to(new_size as usize);
|
||
idx
|
||
}
|
||
|
||
/// Allocate guest memory from the heap bump allocator.
|
||
/// Returns the base address of the allocated region.
|
||
pub fn heap_alloc(&mut self, size: u32, mem: &GuestMemory) -> Option<u32> {
|
||
use std::sync::atomic::Ordering;
|
||
let aligned_size = (size + 0xFFF) & !0xFFF; // Page-align
|
||
// M2.4: atomic bump, then verify post-bump invariants. If the
|
||
// bump pushed us past the heap-region ceiling, the cursor stays
|
||
// advanced — subsequent allocations also fail, matching the
|
||
// pre-M2 sequential semantics. We don't try to "undo" the bump
|
||
// because that opens a CAS-loop race for marginal benefit (a
|
||
// failing alloc near the limit is already game-over).
|
||
let base = self.heap_cursor.fetch_add(aligned_size, Ordering::Relaxed);
|
||
let new_top = base.checked_add(aligned_size)?;
|
||
if new_top > 0x6FFF_FFFF {
|
||
return None;
|
||
}
|
||
let protect = xenia_memory::page_table::MemoryProtect::READ
|
||
| xenia_memory::page_table::MemoryProtect::WRITE;
|
||
mem.alloc(base, aligned_size, protect).ok()?;
|
||
Some(base)
|
||
}
|
||
|
||
/// Allocate a kernel stack.
|
||
pub fn stack_alloc(&mut self, size: u32, mem: &GuestMemory) -> Option<u32> {
|
||
use std::sync::atomic::Ordering;
|
||
let aligned_size = (size + 0xFFF) & !0xFFF;
|
||
let base = self.stack_cursor.fetch_add(aligned_size, Ordering::Relaxed);
|
||
let protect = xenia_memory::page_table::MemoryProtect::READ
|
||
| xenia_memory::page_table::MemoryProtect::WRITE;
|
||
mem.alloc(base, aligned_size, protect).ok()?;
|
||
Some(base + aligned_size) // Return top of stack
|
||
}
|
||
|
||
// ===== Timer subsystem =====
|
||
|
||
/// Idempotent arm — removes any prior entry for `handle`, then inserts
|
||
/// the new `(deadline, handle)` pair and re-sorts by deadline ascending.
|
||
/// The per-`Timer` object's `deadline` field must be set separately by
|
||
/// the caller (see `NtSetTimerEx` in exports.rs) — this helper only
|
||
/// manages the central pending-fires list so `fire_due_timers` has a
|
||
/// sorted head to peek.
|
||
pub fn arm_timer(&mut self, handle: u32, deadline: u64) {
|
||
self.pending_timer_fires.retain(|&(_, h)| h != handle);
|
||
self.pending_timer_fires.push((deadline, handle));
|
||
self.pending_timer_fires.sort_by_key(|&(d, _)| d);
|
||
}
|
||
|
||
/// Idempotent disarm — strip any entry for `handle`. Safe to call
|
||
/// regardless of prior state; `NtClose`, `NtCancelTimer`, and the
|
||
/// periodic-rearm guard all invoke this.
|
||
pub fn disarm_timer(&mut self, handle: u32) {
|
||
self.pending_timer_fires.retain(|&(_, h)| h != handle);
|
||
}
|
||
|
||
/// Peek the earliest pending timer deadline. Paired with
|
||
/// `Scheduler::earliest_wait_deadline` by the main loop's "advance to
|
||
/// next event" coordination — the earlier of the two drives
|
||
/// `advance_all_timebases_to`.
|
||
pub fn earliest_timer_deadline(&self) -> Option<u64> {
|
||
self.pending_timer_fires.first().map(|&(d, _)| d)
|
||
}
|
||
|
||
/// Fire every timer whose deadline is `<= now` (derived from slot 0's
|
||
/// timebase, matching `parse_timeout`'s "current thread" fallback).
|
||
/// For each fire: mark the timer `signaled=true`, clear its
|
||
/// `deadline`, rearm if periodic, then wake eligible waiters via
|
||
/// `exports::wake_eligible_waiters`. Returns `true` iff any timer
|
||
/// fired — the caller uses this to decide whether the scheduler round
|
||
/// needs a follow-up `advance_to_next_wake_if_due` step.
|
||
pub fn fire_due_timers(&mut self) -> bool {
|
||
let now = self.scheduler.ctx(0).timebase;
|
||
let mut fired = false;
|
||
loop {
|
||
let Some(&(deadline, handle)) = self.pending_timer_fires.first() else {
|
||
break;
|
||
};
|
||
if deadline > now {
|
||
break;
|
||
}
|
||
self.pending_timer_fires.remove(0);
|
||
// Mark signaled + capture period before any rearm so we don't
|
||
// double-borrow the object while calling wake_eligible_waiters.
|
||
let periodic_next =
|
||
if let Some(KernelObject::Timer {
|
||
signaled,
|
||
deadline: obj_deadline,
|
||
period_ticks,
|
||
..
|
||
}) = self.objects.get_mut(&handle)
|
||
{
|
||
*signaled = true;
|
||
*obj_deadline = None;
|
||
if *period_ticks > 0 {
|
||
Some(now + *period_ticks)
|
||
} else {
|
||
None
|
||
}
|
||
} else {
|
||
// Closed handle — its entry lingered because disarm on
|
||
// NtClose was missed, OR fire_due_timers picked up a
|
||
// race. Skip silently; nothing to wake.
|
||
None
|
||
};
|
||
if let Some(next) = periodic_next {
|
||
if let Some(KernelObject::Timer { deadline, .. }) =
|
||
self.objects.get_mut(&handle)
|
||
{
|
||
*deadline = Some(next);
|
||
}
|
||
self.arm_timer(handle, next);
|
||
}
|
||
crate::exports::wake_eligible_waiters(self, handle);
|
||
fired = true;
|
||
}
|
||
fired
|
||
}
|
||
|
||
/// Handle deadline-expiry cleanup for a thread whose wait timed out.
|
||
/// Called by the main loop right after `Scheduler::advance_to_next_wake`
|
||
/// returns a `Some((ref, reason))`. Stamps `STATUS_TIMEOUT` into the
|
||
/// woken thread's `gpr[3]` and scrubs its `ThreadRef` out of any
|
||
/// handle's waiter list so a later signal can't consume the
|
||
/// auto-reset slot into a stale waiter.
|
||
///
|
||
/// `BlockReason::DelayUntil` is a pure sleep and expects
|
||
/// `STATUS_SUCCESS` — the default pre-populated value in
|
||
/// `ke_delay_execution_thread` — so we leave `gpr[3]` alone for it.
|
||
pub fn handle_timeout_wake(
|
||
&mut self,
|
||
r: ThreadRef,
|
||
reason: xenia_cpu::scheduler::BlockReason,
|
||
) {
|
||
use xenia_cpu::scheduler::BlockReason;
|
||
const STATUS_TIMEOUT: u64 = 0x0000_0102;
|
||
match reason {
|
||
BlockReason::WaitAny { handles, .. } | BlockReason::WaitAll { handles, .. } => {
|
||
self.scheduler.ctx_mut_ref(r).gpr[3] = STATUS_TIMEOUT;
|
||
for h in handles {
|
||
if let Some(obj) = self.objects.get_mut(&h) {
|
||
if let Some(waiters) = obj.waiters_mut() {
|
||
waiters.retain(|&w| w != r);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
BlockReason::DelayUntil(_) => {
|
||
// Pure sleep → default STATUS_SUCCESS is correct; no handles
|
||
// to scrub.
|
||
}
|
||
BlockReason::CriticalSection(cs_ptr) => {
|
||
self.scheduler.ctx_mut_ref(r).gpr[3] = STATUS_TIMEOUT;
|
||
if let Some(list) = self.cs_waiters.get_mut(&cs_ptr) {
|
||
list.retain(|&w| w != r);
|
||
}
|
||
}
|
||
BlockReason::Suspended => {}
|
||
}
|
||
}
|
||
}
|
||
|
||
impl Default for KernelState {
|
||
fn default() -> Self {
|
||
Self::new()
|
||
}
|
||
}
|
||
|
||
/// KRNBUG-AUDIT-003. Outcome of probing a guest pointer as the `this`
|
||
/// of a C++ object: read `[this]` as the vtable, then attempt MSVC
|
||
/// RTTI to recover the decorated class name. Pure read; lockstep-safe.
|
||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||
pub enum ClassReadout {
|
||
/// MSVC RTTI was intact. `mangled` is the decorated name as stored
|
||
/// in the TypeDescriptor (`.?AVEvent@silph@@` form).
|
||
Named { vtable: u32, mangled: String },
|
||
/// `[this]` looked like a vtable pointer but RTTI was stripped (or
|
||
/// the COL/TypeDescriptor chain didn't yield a printable name).
|
||
/// `virtuals` are the first 4 vtable slots — resolve via the
|
||
/// analysis DB's `functions` table for offline class identification.
|
||
VtableOnly { vtable: u32, virtuals: [u32; 4] },
|
||
/// Either `this` itself isn't a plausible heap pointer or `[this]`
|
||
/// doesn't land in the image's read-only-data range. Caller skips.
|
||
NotAnObject,
|
||
}
|
||
|
||
/// Probe a candidate `this` pointer as a C++ object on the guest heap.
|
||
/// Read-only; safe to call from the diagnostic dump path. Behaviour:
|
||
/// 1. Reject non-heap candidate pointers (anything outside the user/
|
||
/// image range).
|
||
/// 2. Read `[this]` as vtable; reject if it's not in the image range
|
||
/// where MSVC stores read-only `vftable` symbols.
|
||
/// 3. MSVC RTTI traversal:
|
||
/// vtable[-4 bytes] = RTTICompleteObjectLocator*
|
||
/// COL+0x0c = TypeDescriptor*
|
||
/// TypeDescriptor+0x08 = mangled name (NUL-terminated ASCII)
|
||
/// If every link looks plausible AND the name starts with `.?A`
|
||
/// (the MSVC class-name prefix), return `Named`.
|
||
/// 4. Otherwise return `VtableOnly` with the first 4 virtual slots
|
||
/// so the caller can resolve method names via the analysis DB.
|
||
pub fn read_class_at_this(this: u32, mem: &GuestMemory) -> ClassReadout {
|
||
if !is_likely_guest_heap_ptr(this) {
|
||
return ClassReadout::NotAnObject;
|
||
}
|
||
let vtable = mem.read_u32(this);
|
||
if !is_likely_image_ptr(vtable) {
|
||
return ClassReadout::NotAnObject;
|
||
}
|
||
let col = mem.read_u32(vtable.wrapping_sub(4));
|
||
if is_likely_image_ptr(col) {
|
||
let type_desc = mem.read_u32(col.wrapping_add(12));
|
||
if is_likely_image_ptr(type_desc) {
|
||
let name = read_ascii_cstring(mem, type_desc.wrapping_add(8), 128);
|
||
if name.starts_with(".?A") {
|
||
return ClassReadout::Named {
|
||
vtable,
|
||
mangled: name,
|
||
};
|
||
}
|
||
}
|
||
}
|
||
let virtuals = [
|
||
mem.read_u32(vtable),
|
||
mem.read_u32(vtable.wrapping_add(4)),
|
||
mem.read_u32(vtable.wrapping_add(8)),
|
||
mem.read_u32(vtable.wrapping_add(12)),
|
||
];
|
||
// False-positive guard: when [this] points at the entry of a
|
||
// function (e.g. the CRT static-init iterator with r31 holding a
|
||
// pointer into the init-fn array), `vtable` is the function PC and
|
||
// the "first virtuals" are the function's prologue *instructions*
|
||
// — words like 0x7D8802A6 (`mflr r12`) which are NOT in the image
|
||
// pointer range. A real C++ vtable's first slot is always a member
|
||
// function pointer in the image range. Require the first slot AND
|
||
// the second slot to look like image-range function pointers,
|
||
// else return `NotAnObject`.
|
||
if !is_likely_image_ptr(virtuals[0]) || !is_likely_image_ptr(virtuals[1]) {
|
||
return ClassReadout::NotAnObject;
|
||
}
|
||
ClassReadout::VtableOnly { vtable, virtuals }
|
||
}
|
||
|
||
/// KRNBUG-AUDIT-003. At handle creation time, walk the captured frames
|
||
/// and probe each frame's most-likely `this` candidates for an MSVC C++
|
||
/// class name. Returns one pre-formatted line per hit (Named or
|
||
/// VtableOnly); silent on `NotAnObject` so the noise floor stays low.
|
||
///
|
||
/// Candidates per frame:
|
||
/// * Frame 0 (live): ctx.gpr[31] (canonical C++ `this`), ctx.gpr[30]
|
||
/// (often a secondary captured `this` in nested method calls), and
|
||
/// ctx.gpr[3] (the live first arg — at the moment NtCreateEvent is
|
||
/// entered, this is `&Event` being constructed).
|
||
/// * Frame K ≥ 1: read `[fp - 12]` and `[fp - 16]` — the standard
|
||
/// PPC EABI `__savegprlr_NN` spill area where the callee's prologue
|
||
/// placed the caller's r31 / r30 just before its `stwu`. So those
|
||
/// slots hold the value of the function-at-frame-K's r31 / r30
|
||
/// captured at the moment IT made the bl into the next frame down.
|
||
///
|
||
/// Read-only; never mutates guest state.
|
||
pub fn probe_create_stack_classes(
|
||
ctx: &PpcContext,
|
||
frames: &[(u32, u32)],
|
||
mem: &GuestMemory,
|
||
) -> Vec<String> {
|
||
let mut out = Vec::new();
|
||
for (idx, (fp, lr)) in frames.iter().enumerate() {
|
||
let (raw_r31, raw_r30, raw_r3) = if idx == 0 {
|
||
(ctx.gpr[31] as u32, ctx.gpr[30] as u32, ctx.gpr[3] as u32)
|
||
} else {
|
||
(
|
||
mem.read_u32(fp.wrapping_sub(12)),
|
||
mem.read_u32(fp.wrapping_sub(16)),
|
||
0,
|
||
)
|
||
};
|
||
// Emit one always-on raw line per frame so the back-chain plus
|
||
// saved-register dump is captured even when the RTTI probe is
|
||
// silent. Investigators can resolve the raw values offline via
|
||
// the analysis DB (lookup of vtable / static-init iterator
|
||
// pointers / etc. is otherwise impossible from logs alone).
|
||
if idx == 0 {
|
||
out.push(format!(
|
||
"frame={} lr={:#010x} live r31={:#010x} r30={:#010x} r3={:#010x}",
|
||
idx, lr, raw_r31, raw_r30, raw_r3,
|
||
));
|
||
} else {
|
||
out.push(format!(
|
||
"frame={} lr={:#010x} saved-r31={:#010x} saved-r30={:#010x}",
|
||
idx, lr, raw_r31, raw_r30,
|
||
));
|
||
}
|
||
let candidates: [(u32, &'static str); 3] = if idx == 0 {
|
||
[(raw_r31, "r31"), (raw_r30, "r30"), (raw_r3, "r3")]
|
||
} else {
|
||
[
|
||
(raw_r31, "saved-r31"),
|
||
(raw_r30, "saved-r30"),
|
||
(0, ""),
|
||
]
|
||
};
|
||
for (this_ptr, label) in candidates {
|
||
if label.is_empty() {
|
||
continue;
|
||
}
|
||
match read_class_at_this(this_ptr, mem) {
|
||
ClassReadout::Named { vtable, mangled } => {
|
||
out.push(format!(
|
||
" → frame={} {}={:#010x} vtable={:#010x} class={}",
|
||
idx, label, this_ptr, vtable, mangled,
|
||
));
|
||
}
|
||
ClassReadout::VtableOnly { vtable, virtuals } => {
|
||
out.push(format!(
|
||
" → frame={} {}={:#010x} vtable={:#010x} virtuals=[{:#010x},{:#010x},{:#010x},{:#010x}] (RTTI stripped)",
|
||
idx, label, this_ptr, vtable,
|
||
virtuals[0], virtuals[1], virtuals[2], virtuals[3],
|
||
));
|
||
}
|
||
ClassReadout::NotAnObject => {}
|
||
}
|
||
}
|
||
}
|
||
out
|
||
}
|
||
|
||
/// Heap-pointer plausibility: Xbox 360 user heap is 0x40000000–0x50000000;
|
||
/// the image and read-only-data are 0x82000000–0x83000000. Allow both —
|
||
/// dispatcher objects in Sylpheed live in static-init pools (image rdata)
|
||
/// AND in heap-allocated singletons.
|
||
fn is_likely_guest_heap_ptr(p: u32) -> bool {
|
||
matches!(p, 0x4000_0000..=0x4FFF_FFFF | 0x8200_0000..=0x82FF_FFFF)
|
||
}
|
||
|
||
/// Image-pointer plausibility: vtables and RTTI structures live in the
|
||
/// module's read-only image, which on Xbox 360 maps at 0x82000000.
|
||
fn is_likely_image_ptr(p: u32) -> bool {
|
||
matches!(p, 0x8200_0000..=0x82FF_FFFF)
|
||
}
|
||
|
||
/// Read a NUL-terminated ASCII string from guest memory, capped at
|
||
/// `max` bytes. Returns the empty string on any non-printable byte
|
||
/// (a cheap signal that `addr` doesn't actually point at a name).
|
||
fn read_ascii_cstring(mem: &GuestMemory, addr: u32, max: usize) -> String {
|
||
let mut s = String::with_capacity(max);
|
||
for i in 0..max {
|
||
let b = mem.read_u8(addr.wrapping_add(i as u32));
|
||
if b == 0 {
|
||
return s;
|
||
}
|
||
if !(0x20..=0x7E).contains(&b) {
|
||
return String::new();
|
||
}
|
||
s.push(b as char);
|
||
}
|
||
s
|
||
}
|
||
|
||
/// Walk the PPC EABI back-chain starting from `sp` (the value in r1 at
|
||
/// the moment of capture). Returns up to `max_frames` entries of
|
||
/// `(frame_pointer, saved_lr)`. Index 0 is the live frame
|
||
/// `(sp, live_lr)` — `live_lr` is the caller-supplied current LR, since
|
||
/// it has not yet been spilled to memory by this frame's prologue.
|
||
///
|
||
/// PPC convention reminder: a function's prologue stores the caller's
|
||
/// LR at `[old_sp - 8]` *before* bumping `r1` down to the new frame. So
|
||
/// from the live `sp`, `prev_sp = mem[sp]` and the LR saved in the
|
||
/// frame above is at `mem[prev_sp - 8]`. The walk stops on a
|
||
/// 0/0xFFFFFFFF/self-loop sentinel — those guard against
|
||
/// uninitialized stacks and the topmost frame.
|
||
///
|
||
/// This is read-only; it never mutates guest memory or CPU state.
|
||
pub fn walk_guest_back_chain(
|
||
sp: u32,
|
||
live_lr: u32,
|
||
mem: &GuestMemory,
|
||
max_frames: usize,
|
||
) -> Vec<(u32, u32)> {
|
||
let mut frames = Vec::with_capacity(max_frames);
|
||
if max_frames == 0 {
|
||
return frames;
|
||
}
|
||
frames.push((sp, live_lr));
|
||
let mut cur = sp;
|
||
while frames.len() < max_frames {
|
||
if cur == 0 || cur == 0xFFFF_FFFF {
|
||
break;
|
||
}
|
||
let prev = mem.read_u32(cur);
|
||
if prev == 0 || prev == 0xFFFF_FFFF || prev == cur {
|
||
break;
|
||
}
|
||
let saved_lr = mem.read_u32(prev.wrapping_sub(8));
|
||
frames.push((prev, saved_lr));
|
||
cur = prev;
|
||
}
|
||
frames
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use xenia_memory::GuestMemory;
|
||
|
||
/// Ten consecutive `heap_alloc(0x14)` calls must return distinct
|
||
/// page-aligned addresses. A previous bug had kernel exports passing 0 as
|
||
/// `size`, causing the bump allocator to return the same address every
|
||
/// time — 10 "allocations" that all aliased 0x40105000 and silently
|
||
/// corrupted the guest's static-constructor state.
|
||
#[test]
|
||
fn heap_alloc_advances_for_nonzero_size() {
|
||
let mut mem = GuestMemory::new().expect("memory init");
|
||
let mut state = KernelState::new();
|
||
let mut seen = Vec::new();
|
||
for _ in 0..10 {
|
||
let addr = state
|
||
.heap_alloc(0x14, &mut mem)
|
||
.expect("heap must have room for 0x14 bytes");
|
||
assert_eq!(addr & 0xFFF, 0, "heap returns page-aligned addresses");
|
||
assert!(!seen.contains(&addr), "heap returned duplicate address {addr:#x}");
|
||
seen.push(addr);
|
||
}
|
||
}
|
||
|
||
/// `heap_alloc(0)` must not advance the cursor (it has nothing to do).
|
||
/// The kernel exports that previously hit this path did so because they
|
||
/// read the wrong argument register; guarded at the export boundary now.
|
||
#[test]
|
||
fn heap_alloc_zero_is_noop_in_cursor() {
|
||
use std::sync::atomic::Ordering;
|
||
let mem = GuestMemory::new().expect("memory init");
|
||
let mut state = KernelState::new();
|
||
let before = state.heap_cursor.load(Ordering::Relaxed);
|
||
let _ = state.heap_alloc(0, &mem);
|
||
let after = state.heap_cursor.load(Ordering::Relaxed);
|
||
assert_eq!(before, after, "zero-size alloc must not advance heap cursor");
|
||
}
|
||
|
||
/// M2.4: concurrent handle allocations must produce distinct values.
|
||
/// Ten threads each allocate 100 handles via `alloc_handle`; the union
|
||
/// must contain exactly 1000 distinct values, and the maximum equals
|
||
/// `0x1000 + 4 * (1000 - 1)` (ascending step is 4 per the kernel
|
||
/// allocator's policy).
|
||
#[test]
|
||
fn concurrent_alloc_handle_distinct() {
|
||
use std::collections::HashSet;
|
||
use std::sync::Mutex;
|
||
use std::sync::atomic::{AtomicU32, Ordering};
|
||
|
||
// Use a free-standing AtomicU32 mirroring `next_handle`'s semantics;
|
||
// we can't easily share `&mut KernelState` across threads. The
|
||
// production code uses the same `fetch_add(4, Relaxed)` recipe.
|
||
let counter = std::sync::Arc::new(AtomicU32::new(0x1000));
|
||
let collected: std::sync::Arc<Mutex<HashSet<u32>>> =
|
||
std::sync::Arc::new(Mutex::new(HashSet::new()));
|
||
|
||
let mut handles = Vec::new();
|
||
for _ in 0..10 {
|
||
let c = counter.clone();
|
||
let s = collected.clone();
|
||
handles.push(std::thread::spawn(move || {
|
||
let mut local = Vec::with_capacity(100);
|
||
for _ in 0..100 {
|
||
local.push(c.fetch_add(4, Ordering::Relaxed));
|
||
}
|
||
let mut g = s.lock().unwrap();
|
||
for v in local {
|
||
g.insert(v);
|
||
}
|
||
}));
|
||
}
|
||
for h in handles {
|
||
h.join().unwrap();
|
||
}
|
||
let set = collected.lock().unwrap();
|
||
assert_eq!(
|
||
set.len(),
|
||
1000,
|
||
"expected 1000 distinct handles, got {}",
|
||
set.len()
|
||
);
|
||
assert!(set.iter().all(|h| (h - 0x1000) % 4 == 0));
|
||
}
|
||
|
||
/// KRNBUG-AUDIT-002: synthesize a 3-level back-chain in mapped guest
|
||
/// memory and walk it. Verifies that frame 0 is the live-LR frame and
|
||
/// that subsequent frames pull `prev_sp` from `[sp]` and the saved LR
|
||
/// from `[prev_sp - 8]`.
|
||
#[test]
|
||
fn back_chain_walker_resolves_synthetic_frames() {
|
||
let mem = GuestMemory::new().expect("memory init");
|
||
let mut state = KernelState::new();
|
||
let base = state.heap_alloc(0x4000, &mem).expect("scratch");
|
||
// Lay out three frames inside the scratch page. Each frame gets
|
||
// its own 0x100-byte slot. Frame N's `[sp + 0]` points at frame
|
||
// N+1's sp, and frame N+1's `[sp - 8]` holds the LR saved by
|
||
// that frame for the call into frame N.
|
||
let sp0 = base + 0x100;
|
||
let sp1 = base + 0x300;
|
||
let sp2 = base + 0x500;
|
||
// Back-chain pointers
|
||
mem.write_u32(sp0, sp1);
|
||
mem.write_u32(sp1, sp2);
|
||
mem.write_u32(sp2, 0); // top of stack
|
||
// Saved LRs (the LR of the call that reached the *next* frame
|
||
// up are stored at the next frame's sp - 8)
|
||
mem.write_u32(sp1.wrapping_sub(8), 0xAAAA_BBBB);
|
||
mem.write_u32(sp2.wrapping_sub(8), 0xCCCC_DDDD);
|
||
|
||
let frames = walk_guest_back_chain(sp0, 0x1111_2222, &mem, 6);
|
||
assert_eq!(frames.len(), 3);
|
||
assert_eq!(frames[0], (sp0, 0x1111_2222));
|
||
assert_eq!(frames[1], (sp1, 0xAAAA_BBBB));
|
||
assert_eq!(frames[2], (sp2, 0xCCCC_DDDD));
|
||
}
|
||
|
||
/// Walker must not loop on a self-referential back-chain (a corrupted
|
||
/// frame where `[sp] == sp`).
|
||
#[test]
|
||
fn back_chain_walker_stops_on_self_loop() {
|
||
let mem = GuestMemory::new().expect("memory init");
|
||
let mut state = KernelState::new();
|
||
let base = state.heap_alloc(0x1000, &mem).expect("scratch");
|
||
let sp = base + 0x100;
|
||
mem.write_u32(sp, sp); // self-loop
|
||
let frames = walk_guest_back_chain(sp, 0x4242_4242, &mem, 6);
|
||
assert_eq!(frames.len(), 1);
|
||
assert_eq!(frames[0], (sp, 0x4242_4242));
|
||
}
|
||
|
||
/// Walker must terminate on the standard top-of-stack sentinel
|
||
/// (`[sp] == 0`) without spilling a bogus frame.
|
||
#[test]
|
||
fn back_chain_walker_stops_on_zero_sentinel() {
|
||
let mem = GuestMemory::new().expect("memory init");
|
||
let mut state = KernelState::new();
|
||
let base = state.heap_alloc(0x1000, &mem).expect("scratch");
|
||
let sp = base + 0x100;
|
||
mem.write_u32(sp, 0);
|
||
let frames = walk_guest_back_chain(sp, 0x8242_0000, &mem, 6);
|
||
assert_eq!(frames.len(), 1);
|
||
assert_eq!(frames[0], (sp, 0x8242_0000));
|
||
}
|
||
|
||
/// KRNBUG-AUDIT-003: synthesize a C++ object with intact MSVC RTTI
|
||
/// in mapped guest memory. The probe must traverse vtable[-4] →
|
||
/// COL → TypeDescriptor and recover the decorated mangled name.
|
||
#[test]
|
||
fn read_class_at_this_resolves_intact_rtti() {
|
||
use xenia_memory::page_table::MemoryProtect;
|
||
let mem = GuestMemory::new().expect("memory init");
|
||
let mut state = KernelState::new();
|
||
let this = state.heap_alloc(0x40, &mem).expect("heap object");
|
||
// Map an image-range page so vtable / COL / TypeDescriptor
|
||
// pointers pass `is_likely_image_ptr`.
|
||
let img = 0x8280_0000u32;
|
||
mem.alloc(img, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE)
|
||
.expect("image-range page");
|
||
let vtable = img + 0x40;
|
||
let col = img + 0x80;
|
||
let type_desc = img + 0xC0;
|
||
// [this] = vtable
|
||
mem.write_u32(this, vtable);
|
||
// vtable[-4] = COL (one word before the first virtual)
|
||
mem.write_u32(vtable.wrapping_sub(4), col);
|
||
// COL+0xC = TypeDescriptor
|
||
mem.write_u32(col + 12, type_desc);
|
||
// TypeDescriptor+8 = NUL-terminated mangled name
|
||
let name = b".?AVAsyncQueue@silph@@\0";
|
||
for (i, b) in name.iter().enumerate() {
|
||
mem.write_u8(type_desc + 8 + i as u32, *b);
|
||
}
|
||
let r = read_class_at_this(this, &mem);
|
||
match r {
|
||
ClassReadout::Named { vtable: v, mangled } => {
|
||
assert_eq!(v, vtable);
|
||
assert_eq!(mangled, ".?AVAsyncQueue@silph@@");
|
||
}
|
||
other => panic!("expected Named, got {:?}", other),
|
||
}
|
||
}
|
||
|
||
/// RTTI-stripped fallback: vtable looks plausible but vtable[-4] is
|
||
/// zero. The probe must return `VtableOnly` with the first 4 virtual
|
||
/// PCs so the caller can resolve method names via the analysis DB.
|
||
#[test]
|
||
fn read_class_at_this_falls_back_when_rtti_stripped() {
|
||
use xenia_memory::page_table::MemoryProtect;
|
||
let mem = GuestMemory::new().expect("memory init");
|
||
let mut state = KernelState::new();
|
||
let this = state.heap_alloc(0x40, &mem).expect("heap object");
|
||
let img = 0x8281_0000u32;
|
||
mem.alloc(img, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE)
|
||
.expect("image-range page");
|
||
let vtable = img + 0x100;
|
||
mem.write_u32(this, vtable);
|
||
// No COL — vtable[-4] left as zero, which fails `is_likely_image_ptr`.
|
||
// Populate first four virtuals with image-range PCs.
|
||
let virts = [0x8200_AAAA, 0x8201_BBBB, 0x8202_CCCC, 0x8203_DDDD];
|
||
for (i, v) in virts.iter().enumerate() {
|
||
mem.write_u32(vtable + (i as u32) * 4, *v);
|
||
}
|
||
match read_class_at_this(this, &mem) {
|
||
ClassReadout::VtableOnly {
|
||
vtable: v,
|
||
virtuals,
|
||
} => {
|
||
assert_eq!(v, vtable);
|
||
assert_eq!(virtuals, virts);
|
||
}
|
||
other => panic!("expected VtableOnly, got {:?}", other),
|
||
}
|
||
}
|
||
|
||
/// `this` outside the heap/image range, or `[this]` not in the image
|
||
/// range, must yield `NotAnObject` so the dump skips the candidate
|
||
/// without printing noise.
|
||
#[test]
|
||
fn read_class_at_this_rejects_non_objects() {
|
||
use xenia_memory::page_table::MemoryProtect;
|
||
let mem = GuestMemory::new().expect("memory init");
|
||
let mut state = KernelState::new();
|
||
// Out-of-range this.
|
||
assert_eq!(
|
||
read_class_at_this(0x0000_1234, &mem),
|
||
ClassReadout::NotAnObject
|
||
);
|
||
assert_eq!(
|
||
read_class_at_this(0xFFFF_FFFF, &mem),
|
||
ClassReadout::NotAnObject
|
||
);
|
||
// In-range `this`, but [this] is zero (unmapped → reads as 0,
|
||
// which is not a plausible image pointer).
|
||
let this = state.heap_alloc(0x40, &mem).expect("heap object");
|
||
assert_eq!(read_class_at_this(this, &mem), ClassReadout::NotAnObject);
|
||
// In-range this, [this] points into the heap range — also rejected
|
||
// because vtables live in the image rdata.
|
||
mem.alloc(0x4500_0000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE)
|
||
.expect("aux heap page");
|
||
mem.write_u32(this, 0x4500_0080);
|
||
assert_eq!(read_class_at_this(this, &mem), ClassReadout::NotAnObject);
|
||
}
|
||
|
||
/// `probe_create_stack_classes` is the integration of the back-chain
|
||
/// walker output and the per-frame RTTI probe used at handle creation
|
||
/// time. Build a minimal 2-frame scenario where frame 1's
|
||
/// `[fp - 12]` saved-r31 slot points at a heap C++ object with intact
|
||
/// MSVC RTTI, and verify the helper produces a `class=...` line.
|
||
#[test]
|
||
fn probe_create_stack_classes_recovers_saved_r31_class() {
|
||
use xenia_memory::page_table::MemoryProtect;
|
||
let mem = GuestMemory::new().expect("memory init");
|
||
let mut state = KernelState::new();
|
||
// Heap-allocate a fake `this` and lay out vtable / COL / TD in
|
||
// an image-range page.
|
||
let this = state.heap_alloc(0x40, &mem).expect("heap object");
|
||
let img = 0x8282_0000u32;
|
||
mem.alloc(img, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE)
|
||
.expect("image-range page");
|
||
let vtable = img + 0x40;
|
||
let col = img + 0x80;
|
||
let td = img + 0xC0;
|
||
mem.write_u32(this, vtable);
|
||
mem.write_u32(vtable.wrapping_sub(4), col);
|
||
mem.write_u32(col + 12, td);
|
||
for (i, b) in b".?AVDispatcher@silph@@\0".iter().enumerate() {
|
||
mem.write_u8(td + 8 + i as u32, *b);
|
||
}
|
||
// Synthesize a 2-frame back-chain. Place the saved-r31 slot at
|
||
// [frames[1].fp - 12] = `this`.
|
||
let stack_base = state.heap_alloc(0x4000, &mem).expect("stack page");
|
||
let sp0 = stack_base + 0x100;
|
||
let sp1 = stack_base + 0x300;
|
||
mem.write_u32(sp1.wrapping_sub(12), this);
|
||
let frames = vec![(sp0, 0x824a_9f6c), (sp1, 0x8217_8500)];
|
||
// Live ctx — r3 holds &Event (some random value, not a real
|
||
// class), r31/r30 zero so frame 0 produces no hits.
|
||
let mut ctx = PpcContext::new();
|
||
ctx.gpr[3] = 0x4000_BEEF;
|
||
let probes = probe_create_stack_classes(&ctx, &frames, &mem);
|
||
assert!(probes.iter().any(|s| s.contains(".?AVDispatcher@silph@@")),
|
||
"expected probes to contain the dispatcher class, got {:?}", probes);
|
||
assert!(probes.iter().any(|s| s.contains("frame=1")),
|
||
"expected at least one frame=1 line, got {:?}", probes);
|
||
}
|
||
|
||
/// A NUL-terminated ASCII string is read up to `max`; non-printable
|
||
/// bytes mark the candidate as bogus (return empty string). The
|
||
/// `.?A` prefix gating in `read_class_at_this` then rejects them.
|
||
#[test]
|
||
fn read_ascii_cstring_handles_termination_and_garbage() {
|
||
use xenia_memory::page_table::MemoryProtect;
|
||
let mem = GuestMemory::new().expect("memory init");
|
||
mem.alloc(0x4000_0000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE)
|
||
.expect("page");
|
||
let addr = 0x4000_0100u32;
|
||
// Plain NUL-terminated.
|
||
mem.write_bytes(addr, b"hello\0world");
|
||
assert_eq!(read_ascii_cstring(&mem, addr, 32), "hello");
|
||
// Non-printable byte should reject the read.
|
||
mem.write_u8(addr, 0x01);
|
||
assert_eq!(read_ascii_cstring(&mem, addr, 32), "");
|
||
}
|
||
}
|