diff --git a/crates/xenia-kernel/Cargo.toml b/crates/xenia-kernel/Cargo.toml index 50de041..f9dd134 100644 --- a/crates/xenia-kernel/Cargo.toml +++ b/crates/xenia-kernel/Cargo.toml @@ -8,6 +8,10 @@ license.workspace = true xenia-types = { workspace = true } xenia-memory = { workspace = true } xenia-cpu = { workspace = true } +xenia-vfs = { workspace = true } +xenia-hid = { workspace = true } +xenia-gpu = { workspace = true } tracing = { workspace = true } +metrics = { workspace = true } thiserror = { workspace = true } anyhow = { workspace = true } diff --git a/crates/xenia-kernel/src/audit.rs b/crates/xenia-kernel/src/audit.rs new file mode 100644 index 0000000..204dbe8 --- /dev/null +++ b/crates/xenia-kernel/src/audit.rs @@ -0,0 +1,195 @@ +//! Per-handle audit trail for diagnosing HLE sync gaps. +//! +//! When enabled (via `--trace-handles` / `XENIA_TRACE_HANDLES=1`), the kernel +//! records every handle's create/signal/wait/wake events into a bounded +//! ring per handle. `dump_thread_diagnostic` (in `xenia-app`) prints the +//! trail at end-of-run, which lets a session see *who* signaled (or failed +//! to signal) a given handle and *who* parked on it. +//! +//! The harness is behavior-neutral: when `enabled = false` (the default), +//! every record method is an `#[inline]` no-op. When enabled, each record +//! costs an O(1) HashMap probe + a `VecDeque::push_back` with a bounded +//! `pop_front` to keep memory at ~32 KiB per handle worst case. +//! +//! See [project_xenia_rs_scheduler.md] note on the latent +//! `scheduler.deadlock_recoveries` event during boot — this harness exists +//! to identify which kernel API should signal handles +//! `0x10FC / 0x1014 / 0x1104 / 0x10DC / 0x10F0` but doesn't. + +use std::collections::{HashMap, VecDeque}; + +/// Maximum events per category per handle. Bounded so a long-running session +/// doesn't OOM if a handle is signaled millions of times. +pub const AUDIT_RING_CAPACITY: usize = 32; + +/// One audit record. Captured at the export's call site so `lr` points at +/// the guest caller (one instruction past the `bl` to the kernel thunk). +#[derive(Debug, Clone, Copy)] +pub struct HandleAuditEntry { + /// Per-thread timebase tick at the time of the event. Useful for + /// ordering events across threads — same units as + /// `Scheduler::ctx(0).timebase`. + pub cycle: u64, + /// Guest thread id (NOT hw_id — `tid` survives migration). + pub tid: u32, + /// Caller's LR (the guest pc one past the `bl` to the export). + pub lr: u32, + /// Stable, kernel-internal label naming the source export. e.g. + /// "KeSetEvent", "NtSetEvent", "wake_eligible_waiters". + pub source: &'static str, + /// Free-form auxiliary data. For signals: previous_state. For waits: + /// `(alertable, timeout_ns_or_max)` packed. For wakes: `gpr[3]` set. + /// Read by callers as needed. + pub aux: u64, +} + +/// Per-handle audit trail. Lives in `KernelState::audit.trails`. +#[derive(Debug)] +pub struct HandleAuditTrail { + /// Stable label: "Event/Manual", "Event/Auto", "Semaphore", "Timer/Manual", + /// "Timer/Auto", "Mutant", "Thread". Used for filtering in the dump. + pub kind: &'static str, + /// When/who/where the handle was minted. + pub created: HandleAuditEntry, + /// Bounded ring of signal events. + pub signals: VecDeque, + /// Bounded ring of wait-entry events (one per `Wait*` call). + pub waits: VecDeque, + /// Bounded ring of wake events (one per scheduler-side wake). + pub wakes: VecDeque, +} + +impl HandleAuditTrail { + fn new(kind: &'static str, created: HandleAuditEntry) -> Self { + Self { + kind, + created, + signals: VecDeque::with_capacity(AUDIT_RING_CAPACITY), + waits: VecDeque::with_capacity(AUDIT_RING_CAPACITY), + wakes: VecDeque::with_capacity(AUDIT_RING_CAPACITY), + } + } +} + +/// The audit table itself. Lives on `KernelState`; opt-in via `enabled`. +#[derive(Debug, Default)] +pub struct HandleAudit { + pub trails: HashMap, + pub enabled: bool, +} + +impl HandleAudit { + /// Push an entry into a bounded ring, dropping the oldest when full. + #[inline] + fn push_bounded(ring: &mut VecDeque, entry: HandleAuditEntry) { + if ring.len() == AUDIT_RING_CAPACITY { + ring.pop_front(); + } + ring.push_back(entry); + } + + #[inline] + pub fn record_create(&mut self, handle: u32, kind: &'static str, entry: HandleAuditEntry) { + if !self.enabled { + return; + } + self.trails + .insert(handle, HandleAuditTrail::new(kind, entry)); + } + + #[inline] + pub fn record_signal(&mut self, handle: u32, entry: HandleAuditEntry) { + if !self.enabled { + return; + } + if let Some(trail) = self.trails.get_mut(&handle) { + Self::push_bounded(&mut trail.signals, entry); + } + } + + #[inline] + pub fn record_wait(&mut self, handle: u32, entry: HandleAuditEntry) { + if !self.enabled { + return; + } + if let Some(trail) = self.trails.get_mut(&handle) { + Self::push_bounded(&mut trail.waits, entry); + } + } + + #[inline] + pub fn record_wake(&mut self, handle: u32, entry: HandleAuditEntry) { + if !self.enabled { + return; + } + if let Some(trail) = self.trails.get_mut(&handle) { + Self::push_bounded(&mut trail.wakes, entry); + } + } + + /// Convenience: `(signal_count, wait_count, wake_count)` for a handle. + /// Returns `None` if no trail exists. + pub fn counts(&self, handle: u32) -> Option<(usize, usize, usize)> { + self.trails + .get(&handle) + .map(|t| (t.signals.len(), t.waits.len(), t.wakes.len())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn entry(cycle: u64, source: &'static str) -> HandleAuditEntry { + HandleAuditEntry { cycle, tid: 1, lr: 0x8200_0000, source, aux: 0 } + } + + #[test] + fn disabled_audit_is_a_noop() { + let mut a = HandleAudit::default(); + a.record_create(0x1000, "Event/Auto", entry(0, "NtCreateEvent")); + a.record_signal(0x1000, entry(1, "NtSetEvent")); + assert!(a.trails.is_empty()); + } + + #[test] + fn enabled_records_create_and_events() { + let mut a = HandleAudit { enabled: true, ..HandleAudit::default() }; + a.record_create(0x1014, "Event/Auto", entry(0, "NtCreateEvent")); + a.record_signal(0x1014, entry(10, "NtSetEvent")); + a.record_wait(0x1014, entry(5, "NtWaitForSingleObjectEx")); + a.record_wake(0x1014, entry(11, "wake_eligible_waiters")); + + let counts = a.counts(0x1014).unwrap(); + assert_eq!(counts, (1, 1, 1)); + } + + #[test] + fn signal_for_unknown_handle_is_dropped() { + let mut a = HandleAudit { enabled: true, ..HandleAudit::default() }; + // No `record_create` first → handle has no trail. + a.record_signal(0x9999, entry(1, "NtSetEvent")); + assert!(a.trails.is_empty()); + } + + #[test] + fn ring_is_bounded_to_capacity() { + let mut a = HandleAudit { enabled: true, ..HandleAudit::default() }; + a.record_create(0x10FC, "Event/Auto", entry(0, "NtCreateEvent")); + for i in 0..(AUDIT_RING_CAPACITY * 3) as u64 { + a.record_signal(0x10FC, entry(i, "NtSetEvent")); + } + let trail = &a.trails[&0x10FC]; + assert_eq!(trail.signals.len(), AUDIT_RING_CAPACITY); + // Oldest should have been dropped — the first remaining entry is at + // cycle = 2 * AUDIT_RING_CAPACITY (i.e. 64 if capacity = 32). + let first = trail.signals.front().unwrap(); + assert_eq!(first.cycle, (AUDIT_RING_CAPACITY * 2) as u64); + } + + #[test] + fn unknown_handle_counts_returns_none() { + let a = HandleAudit::default(); + assert!(a.counts(0x10FC).is_none()); + } +} diff --git a/crates/xenia-kernel/src/exports.rs b/crates/xenia-kernel/src/exports.rs index 15a8b5e..1e570a3 100644 --- a/crates/xenia-kernel/src/exports.rs +++ b/crates/xenia-kernel/src/exports.rs @@ -2,10 +2,15 @@ //! Each export mirrors a function from xboxkrnl_table.inc. use crate::objects::KernelObject; -use crate::state::{KernelState, ModuleId}; -use xenia_cpu::PpcContext; +use crate::state::{GuestMemoryPcr, KernelState, ModuleId}; +use crate::thread::allocate_thread_image; +use xenia_cpu::scheduler::{BlockReason, SpawnParams}; +use xenia_cpu::{PpcContext, ThreadRef}; use xenia_memory::{GuestMemory, MemoryAccess}; +// NTSTATUS constants used by wait/sync paths. +const STATUS_TIMEOUT: u64 = 0x0000_0102; + pub fn register_exports(state: &mut KernelState) { use ModuleId::Xboxkrnl; @@ -29,7 +34,7 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x4D, "KeAcquireSpinLockAtRaisedIrql", stub_return_zero); state.register_export(Xboxkrnl, 0x52, "KeBugCheck", ke_bug_check); state.register_export(Xboxkrnl, 0x53, "KeBugCheckEx", ke_bug_check_ex); - state.register_export(Xboxkrnl, 0x5A, "KeDelayExecutionThread", stub_success); + state.register_export(Xboxkrnl, 0x5A, "KeDelayExecutionThread", ke_delay_execution_thread); state.register_export(Xboxkrnl, 0x5D, "KeEnableFpuExceptions", stub_success); state.register_export(Xboxkrnl, 0x5F, "KeEnterCriticalRegion", stub_success); state.register_export(Xboxkrnl, 0x66, "KeGetCurrentProcessType", ke_get_current_process_type); @@ -37,21 +42,24 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x6C, "KeUnlockL2", stub_success); state.register_export(Xboxkrnl, 0x74, "KeInitializeSemaphore", ke_initialize_semaphore); state.register_export(Xboxkrnl, 0x7D, "KeLeaveCriticalRegion", stub_success); - state.register_export(Xboxkrnl, 0x81, "KeQueryBasePriorityThread", stub_return_zero); + state.register_export(Xboxkrnl, 0x7F, "KePulseEvent", ke_pulse_event); + state.register_export(Xboxkrnl, 0x81, "KeQueryBasePriorityThread", ke_query_base_priority_thread); + state.register_export(Xboxkrnl, 0x82, "KeQueryIdealProcessor", ke_query_ideal_processor); state.register_export(Xboxkrnl, 0x83, "KeQueryPerformanceFrequency", ke_query_performance_frequency); state.register_export(Xboxkrnl, 0x84, "KeQuerySystemTime", ke_query_system_time); state.register_export(Xboxkrnl, 0x85, "KeRaiseIrqlToDpcLevel", stub_return_zero); - state.register_export(Xboxkrnl, 0x88, "KeReleaseSemaphore", stub_return_zero); + state.register_export(Xboxkrnl, 0x88, "KeReleaseSemaphore", ke_release_semaphore); state.register_export(Xboxkrnl, 0x89, "KeReleaseSpinLockFromRaisedIrql", stub_success); - state.register_export(Xboxkrnl, 0x8F, "KeResetEvent", stub_return_zero); - state.register_export(Xboxkrnl, 0x92, "KeResumeThread", stub_return_zero); - state.register_export(Xboxkrnl, 0x97, "KeSetAffinityThread", stub_return_zero); - state.register_export(Xboxkrnl, 0x99, "KeSetBasePriorityThread", stub_return_zero); + state.register_export(Xboxkrnl, 0x8F, "KeResetEvent", ke_reset_event); + state.register_export(Xboxkrnl, 0x92, "KeResumeThread", ke_resume_thread); + state.register_export(Xboxkrnl, 0x97, "KeSetAffinityThread", ke_set_affinity_thread); + state.register_export(Xboxkrnl, 0x98, "KeSetIdealProcessor", ke_set_ideal_processor); + state.register_export(Xboxkrnl, 0x99, "KeSetBasePriorityThread", ke_set_base_priority_thread); state.register_export(Xboxkrnl, 0x9B, "KeSetCurrentStackPointers", stub_success); - state.register_export(Xboxkrnl, 0x9D, "KeSetEvent", stub_return_zero); + state.register_export(Xboxkrnl, 0x9D, "KeSetEvent", ke_set_event); state.register_export(Xboxkrnl, 0xAE, "KeTryToAcquireSpinLockAtRaisedIrql", ke_try_acquire_spinlock); - state.register_export(Xboxkrnl, 0xAF, "KeWaitForMultipleObjects", stub_success); - state.register_export(Xboxkrnl, 0xB0, "KeWaitForSingleObject", stub_success); + state.register_export(Xboxkrnl, 0xAF, "KeWaitForMultipleObjects", ke_wait_for_multiple_objects); + state.register_export(Xboxkrnl, 0xB0, "KeWaitForSingleObject", ke_wait_for_single_object); state.register_export(Xboxkrnl, 0xB1, "KfAcquireSpinLock", stub_return_zero); state.register_export(Xboxkrnl, 0xB3, "KfLowerIrql", stub_success); state.register_export(Xboxkrnl, 0xB4, "KfReleaseSpinLock", stub_success); @@ -72,34 +80,42 @@ pub fn register_exports(state: &mut KernelState) { // Nt* state.register_export(Xboxkrnl, 0xCC, "NtAllocateVirtualMemory", nt_allocate_virtual_memory); - state.register_export(Xboxkrnl, 0xCD, "NtCancelTimer", stub_success); - state.register_export(Xboxkrnl, 0xCE, "NtClearEvent", stub_success); + state.register_export(Xboxkrnl, 0xCD, "NtCancelTimer", nt_cancel_timer); + state.register_export(Xboxkrnl, 0xCE, "NtClearEvent", nt_clear_event); state.register_export(Xboxkrnl, 0xCF, "NtClose", nt_close); state.register_export(Xboxkrnl, 0xD1, "NtCreateEvent", nt_create_event); state.register_export(Xboxkrnl, 0xD2, "NtCreateFile", nt_create_file); state.register_export(Xboxkrnl, 0xD5, "NtCreateSemaphore", nt_create_semaphore); state.register_export(Xboxkrnl, 0xD7, "NtCreateTimer", nt_create_timer); state.register_export(Xboxkrnl, 0xD9, "NtDeviceIoControlFile", stub_success); - state.register_export(Xboxkrnl, 0xDA, "NtDuplicateObject", stub_success); + state.register_export(Xboxkrnl, 0xDA, "NtDuplicateObject", nt_duplicate_object); state.register_export(Xboxkrnl, 0xDB, "NtFlushBuffersFile", stub_success); state.register_export(Xboxkrnl, 0xDC, "NtFreeVirtualMemory", stub_success); state.register_export(Xboxkrnl, 0xDF, "NtOpenFile", nt_open_file); + state.register_export(Xboxkrnl, 0xE2, "NtPulseEvent", nt_pulse_event); state.register_export(Xboxkrnl, 0xE4, "NtQueryDirectoryFile", nt_query_directory_file); state.register_export(Xboxkrnl, 0xE7, "NtQueryFullAttributesFile", nt_query_full_attributes_file); - state.register_export(Xboxkrnl, 0xE8, "NtQueryInformationFile", stub_success); + state.register_export(Xboxkrnl, 0xE8, "NtQueryInformationFile", nt_query_information_file); state.register_export(Xboxkrnl, 0xEE, "NtQueryVirtualMemory", stub_success); - state.register_export(Xboxkrnl, 0xEF, "NtQueryVolumeInformationFile", stub_success); + state.register_export(Xboxkrnl, 0xEF, "NtQueryVolumeInformationFile", nt_query_volume_information_file); state.register_export(Xboxkrnl, 0xF0, "NtReadFile", nt_read_file); - state.register_export(Xboxkrnl, 0xF3, "NtReleaseSemaphore", stub_return_zero); - state.register_export(Xboxkrnl, 0xF5, "NtResumeThread", stub_return_zero); - state.register_export(Xboxkrnl, 0xF6, "NtSetEvent", stub_success); - state.register_export(Xboxkrnl, 0xF7, "NtSetInformationFile", stub_success); - state.register_export(Xboxkrnl, 0xFA, "NtSetTimerEx", stub_success); - state.register_export(Xboxkrnl, 0xFC, "NtSuspendThread", stub_return_zero); - state.register_export(Xboxkrnl, 0xFD, "NtWaitForSingleObjectEx", stub_success); - state.register_export(Xboxkrnl, 0xFE, "NtWaitForMultipleObjectsEx", stub_success); + state.register_export(Xboxkrnl, 0xF3, "NtReleaseSemaphore", nt_release_semaphore); + state.register_export(Xboxkrnl, 0xF5, "NtResumeThread", nt_resume_thread); + state.register_export(Xboxkrnl, 0xF6, "NtSetEvent", nt_set_event); + state.register_export(Xboxkrnl, 0xF7, "NtSetInformationFile", nt_set_information_file); + state.register_export(Xboxkrnl, 0xFA, "NtSetTimerEx", nt_set_timer_ex); + // NOTE: `NtSetInformationThread` is NOT in xboxkrnl_table.inc on + // Xbox 360 — canary confirms ordinal 0xFB is + // `NtSignalAndWaitForSingleObjectEx`. The prior registration at 0xFB + // was silently overwritten by the registration below; the + // `nt_set_information_thread` body is retained for the direct-call + // unit test but no longer exposed as an ordinal. + state.register_export(Xboxkrnl, 0xFC, "NtSuspendThread", nt_suspend_thread); + state.register_export(Xboxkrnl, 0xFB, "NtSignalAndWaitForSingleObjectEx", nt_signal_and_wait_for_single_object_ex); + state.register_export(Xboxkrnl, 0xFD, "NtWaitForSingleObjectEx", nt_wait_for_single_object_ex); + state.register_export(Xboxkrnl, 0xFE, "NtWaitForMultipleObjectsEx", nt_wait_for_multiple_objects_ex); state.register_export(Xboxkrnl, 0xFF, "NtWriteFile", nt_write_file); - state.register_export(Xboxkrnl, 0x0101, "NtYieldExecution", stub_success); + state.register_export(Xboxkrnl, 0x0101, "NtYieldExecution", nt_yield_execution); // Object state.register_export(Xboxkrnl, 0x0103, "ObCreateSymbolicLink", stub_success); @@ -107,7 +123,7 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x0105, "ObDereferenceObject", stub_success); state.register_export(Xboxkrnl, 0x010B, "ObLookupThreadByThreadId", stub_success); state.register_export(Xboxkrnl, 0x010E, "ObOpenObjectByPointer", stub_success); - state.register_export(Xboxkrnl, 0x0110, "ObReferenceObjectByHandle", stub_success); + state.register_export(Xboxkrnl, 0x0110, "ObReferenceObjectByHandle", ob_reference_object_by_handle); // RTL state.register_export(Xboxkrnl, 0x0119, "RtlCaptureContext", rtl_capture_context); @@ -140,12 +156,12 @@ pub fn register_exports(state: &mut KernelState) { // Video state.register_export(Xboxkrnl, 0x01B1, "VdCallGraphicsNotificationRoutines", stub_success); state.register_export(Xboxkrnl, 0x01B4, "VdEnableDisableClockGating", stub_success); - state.register_export(Xboxkrnl, 0x01B6, "VdEnableRingBufferRPtrWriteBack", stub_success); - state.register_export(Xboxkrnl, 0x01B9, "VdGetCurrentDisplayGamma", stub_return_zero); + state.register_export(Xboxkrnl, 0x01B6, "VdEnableRingBufferRPtrWriteBack", vd_enable_ring_buffer_rptr_writeback); + state.register_export(Xboxkrnl, 0x01B9, "VdGetCurrentDisplayGamma", vd_get_current_display_gamma); state.register_export(Xboxkrnl, 0x01BA, "VdGetCurrentDisplayInformation", stub_success); state.register_export(Xboxkrnl, 0x01BD, "VdGetSystemCommandBuffer", vd_get_system_command_buffer); state.register_export(Xboxkrnl, 0x01C2, "VdInitializeEngines", stub_success); - state.register_export(Xboxkrnl, 0x01C3, "VdInitializeRingBuffer", stub_success); + state.register_export(Xboxkrnl, 0x01C3, "VdInitializeRingBuffer", vd_initialize_ring_buffer); state.register_export(Xboxkrnl, 0x01C5, "VdInitializeScalerCommandBuffer", stub_success); state.register_export(Xboxkrnl, 0x01C6, "VdIsHSIOTrainingSucceeded", vd_is_hsio_training_succeeded); state.register_export(Xboxkrnl, 0x01C7, "VdPersistDisplay", stub_success); @@ -154,7 +170,7 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x0269, "VdRetrainEDRAM", stub_success); state.register_export(Xboxkrnl, 0x026A, "VdRetrainEDRAMWorker", stub_success); state.register_export(Xboxkrnl, 0x01D3, "VdSetDisplayMode", stub_success); - state.register_export(Xboxkrnl, 0x01D5, "VdSetGraphicsInterruptCallback", stub_success); + state.register_export(Xboxkrnl, 0x01D5, "VdSetGraphicsInterruptCallback", vd_set_graphics_interrupt_callback); state.register_export(Xboxkrnl, 0x01D9, "VdSetSystemCommandBufferGpuIdentifierAddress", stub_success); state.register_export(Xboxkrnl, 0x01DC, "VdShutdownEngines", stub_success); state.register_export(Xboxkrnl, 0x025B, "VdSwap", vd_swap); @@ -175,7 +191,7 @@ pub fn register_exports(state: &mut KernelState) { // Xex module state.register_export(Xboxkrnl, 0x0194, "XexCheckExecutablePrivilege", stub_return_zero); - state.register_export(Xboxkrnl, 0x0195, "XexGetModuleHandle", stub_return_zero); + state.register_export(Xboxkrnl, 0x0195, "XexGetModuleHandle", xex_get_module_handle); state.register_export(Xboxkrnl, 0x0197, "XexGetProcedureAddress", xex_get_procedure_address); // Exception handling @@ -184,21 +200,21 @@ pub fn register_exports(state: &mut KernelState) { // ===== Generic stubs ===== -fn stub_success(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn stub_success(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 0; // STATUS_SUCCESS } -fn stub_return_zero(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn stub_return_zero(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 0; } // ===== Debug ===== -fn dbg_break_point(_ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn dbg_break_point(_ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { tracing::warn!("DbgBreakPoint hit"); } -fn dbg_print(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn dbg_print(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { let str_ptr = ctx.gpr[3] as u32; if str_ptr != 0 { let s = read_cstring(mem, str_ptr); @@ -209,58 +225,268 @@ fn dbg_print(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelSta // ===== Threading ===== -fn ex_create_thread(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { - // r3 = handle_ptr, r4 = stack_size, r5 = thread_id_ptr, r6 = xapi_startup - // r7 = start_address, r8 = start_context, r9 = creation_flags +/// `ExCreateThread(handle_ptr, stack_size, thread_id_ptr, xapi_startup, +/// start_address, start_context, creation_flags)` — +/// signature per xenia-canary's xboxkrnl_threading.cc. Creation flags bit 0 = +/// CREATE_SUSPENDED; top 8 bits encode the affinity mask (logged, not +/// enforced under Model B with 1-instr quantum). +fn ex_create_thread(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { let handle_ptr = ctx.gpr[3] as u32; + let stack_size = ctx.gpr[4] as u32; let thread_id_ptr = ctx.gpr[5] as u32; + let start_address = ctx.gpr[7] as u32; + let start_context = ctx.gpr[8] as u32; + let creation_flags = ctx.gpr[9] as u32; - let tid = state.next_thread_id; - state.next_thread_id += 1; - let handle = state.alloc_handle_for(KernelObject::Thread { id: tid }); + let create_suspended = (creation_flags & 0x1) != 0; + let affinity = (creation_flags >> 24) & 0xFF; - if handle_ptr != 0 { - mem.write_u32(handle_ptr, handle); + let Some(image) = allocate_thread_image(state, mem, stack_size, 0) else { + tracing::error!("ExCreateThread: failed to allocate thread image"); + ctx.gpr[3] = 0xC000_009A; // STATUS_INSUFFICIENT_RESOURCES + return; + }; + + use std::sync::atomic::Ordering; + let tid = state.next_thread_id.fetch_add(1, Ordering::Relaxed); + let handle = state.alloc_handle_for(KernelObject::Thread { + id: tid, + hw_id: None, + exit_code: None, + waiters: Vec::new(), + }); + + let tls_slot_count = state.next_tls_index.load(Ordering::Relaxed); + let params = SpawnParams { + entry: start_address, + start_context, + stack_base: image.stack_base, + stack_size: image.stack_size, + pcr_base: image.pcr_base, + tls_base: image.tls_base, + thread_handle: handle, + guest_tid: tid, + create_suspended, + is_initial: false, + tls_slot_count, + affinity_mask: affinity as u8, + priority: 0, + ideal_processor: None, + }; + let result = state.scheduler.spawn(params, &mut GuestMemoryPcr(mem)); + match result { + Ok(hw_id) => { + metrics::counter!("scheduler.spawn.ok").increment(1); + if let Some(KernelObject::Thread { hw_id: slot, .. }) = state.objects.get_mut(&handle) { + *slot = Some(hw_id); + } + if handle_ptr != 0 { + mem.write_u32(handle_ptr, handle); + } + if thread_id_ptr != 0 { + mem.write_u32(thread_id_ptr, tid); + } + tracing::info!( + "ExCreateThread: tid={} handle={:#x} hw={} entry={:#010x} start_ctx={:#010x} suspended={} aff={:#04x}", + tid, + handle, + hw_id, + start_address, + start_context, + create_suspended, + affinity, + ); + ctx.gpr[3] = STATUS_SUCCESS; + } + Err(_) => { + metrics::counter!("scheduler.spawn.rejected").increment(1); + tracing::error!("ExCreateThread: no free HW thread slot"); + ctx.gpr[3] = 0xC000_009A; + } } - if thread_id_ptr != 0 { - mem.write_u32(thread_id_ptr, tid); - } - tracing::info!("ExCreateThread: handle={:#x} tid={}", handle, tid); - ctx.gpr[3] = 0; // STATUS_SUCCESS } -fn ex_terminate_thread(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { - tracing::info!("ExTerminateThread: exit_status={:#x}", ctx.gpr[3]); +/// `ExTerminateThread(exit_code)` — terminates the current guest thread. The +/// thread transitions to Exited and the main loop unschedules it. Joiners +/// waiting on the thread handle are woken with STATUS_SUCCESS. +fn ex_terminate_thread(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { + let exit_code = ctx.gpr[3] as u32; + let (hw_id, tid, handle_opt) = state.scheduler.exit_current(exit_code); + tracing::info!( + "ExTerminateThread: tid={:?} hw={} exit_code={}", + tid, + hw_id, + exit_code + ); + if let Some(handle) = handle_opt + && let Some(KernelObject::Thread { + exit_code: ec, + waiters, + .. + }) = state.objects.get_mut(&handle) + { + *ec = Some(exit_code); + let to_wake: Vec = std::mem::take(waiters); + for w in to_wake { + state.scheduler.wake_ref(w); + } + } + tracing::debug!("ExTerminateThread: exit_status={:#x}", ctx.gpr[3]); ctx.gpr[3] = 0; } -fn hal_return_to_firmware(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn hal_return_to_firmware(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { tracing::warn!("HalReturnToFirmware: reason={:#x}", ctx.gpr[3]); ctx.gpr[3] = 0; } // ===== Ke* ===== -fn ke_bug_check(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +/// `KeSetBasePriorityThread(thread_handle, priority) -> i32 old_priority` — +/// Axis 1 wiring. Sylpheed calls this from its worker-init prologue on +/// newly-created threads to bump them to time-critical / high. Storing the +/// value on the `GuestThread` makes `HwSlot::pick_runnable` honor it. +fn ke_set_base_priority_thread( + ctx: &mut PpcContext, + _mem: &GuestMemory, + state: &mut KernelState, +) { + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let new_pri = ctx.gpr[4] as i32; + let prev = state + .scheduler + .find_by_handle(handle) + .map(|r| state.scheduler.set_priority_ref(r, new_pri)) + .unwrap_or(0); + ctx.gpr[3] = prev as u32 as u64; +} + +fn ke_query_base_priority_thread( + ctx: &mut PpcContext, + _mem: &GuestMemory, + state: &mut KernelState, +) { + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let pri = state + .scheduler + .find_by_handle(handle) + .map(|r| state.scheduler.priority_ref(r)) + .unwrap_or(0); + ctx.gpr[3] = pri as u32 as u64; +} + +/// `KeSetIdealProcessor(thread_handle, proc_number) -> u8 old_ideal` — +/// Axis 5. Stores the hint on the `GuestThread` for future spawn-sibling +/// placement; does NOT migrate a live thread (use `KeSetAffinityThread` +/// for that). +fn ke_set_ideal_processor( + ctx: &mut PpcContext, + _mem: &GuestMemory, + state: &mut KernelState, +) { + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let ideal = ctx.gpr[4] as u8; + let prev = state + .scheduler + .find_by_handle(handle) + .map(|r| state.scheduler.set_ideal_ref(r, ideal)) + .unwrap_or(0xFF); + ctx.gpr[3] = prev as u64; +} + +fn ke_query_ideal_processor( + ctx: &mut PpcContext, + _mem: &GuestMemory, + state: &mut KernelState, +) { + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let ideal = state + .scheduler + .find_by_handle(handle) + .and_then(|r| state.scheduler.ideal_ref(r)) + .unwrap_or(0); + ctx.gpr[3] = ideal as u64; +} + +/// `NtSetInformationThread(handle, info_class, info_ptr, info_len)` — +/// minimal Axis 5 wiring for priority / affinity / ideal-processor +/// classes. Other classes return `STATUS_INVALID_INFO_CLASS`. +/// +/// Not registered as an ordinal: Xbox 360's `xboxkrnl.exe` doesn't export +/// this function — canary's table assigns `0xFB` to +/// `NtSignalAndWaitForSingleObjectEx`. The body is retained only for the +/// direct-call unit test below. +#[allow(dead_code)] +fn nt_set_information_thread( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + const STATUS_INVALID_INFO_CLASS: u64 = 0xC000_0003; + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let info_class = ctx.gpr[4] as u32; + let info_ptr = ctx.gpr[5] as u32; + let info_len = ctx.gpr[6] as u32; + let Some(r) = state.scheduler.find_by_handle(handle) else { + ctx.gpr[3] = STATUS_INVALID_HANDLE; + return; + }; + match info_class { + 2 /* ThreadPriority */ if info_len >= 4 => { + let pri = mem.read_u32(info_ptr) as i32; + state.scheduler.set_priority_ref(r, pri); + ctx.gpr[3] = STATUS_SUCCESS; + } + 3 /* ThreadAffinityMask */ if info_len >= 4 => { + let mask = mem.read_u32(info_ptr) as u8; + state.set_affinity(handle, mask, mem); + ctx.gpr[3] = STATUS_SUCCESS; + } + 13 /* ThreadIdealProcessor */ if info_len >= 4 => { + let ideal = mem.read_u32(info_ptr) as u8; + state.scheduler.set_ideal_ref(r, ideal); + ctx.gpr[3] = STATUS_SUCCESS; + } + _ => { + ctx.gpr[3] = STATUS_INVALID_INFO_CLASS; + } + } +} + +/// `KeSetAffinityThread(thread_handle, new_mask) -> old_mask` — Axis 4. +/// Drives `KernelState::set_affinity` which delegates to the scheduler +/// and then fixes up every outstanding `ThreadRef` held in waiter lists. +fn ke_set_affinity_thread( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let new_mask = (ctx.gpr[4] as u32) as u8; + let old = state.set_affinity(handle, new_mask, mem); + ctx.gpr[3] = old as u64; +} + +fn ke_bug_check(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { tracing::error!("KeBugCheck: code={:#x}", ctx.gpr[3]); ctx.gpr[3] = 0; } -fn ke_bug_check_ex(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn ke_bug_check_ex(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { tracing::error!("KeBugCheckEx: code={:#x} p1={:#x} p2={:#x} p3={:#x}", ctx.gpr[3], ctx.gpr[4], ctx.gpr[5], ctx.gpr[6]); ctx.gpr[3] = 0; } -fn ke_get_current_process_type(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn ke_get_current_process_type(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 1; // PROC_USER } -fn ke_query_performance_frequency(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn ke_query_performance_frequency(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 50_000_000; // 50 MHz } -fn ke_query_system_time(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn ke_query_system_time(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { let time_ptr = ctx.gpr[3] as u32; if time_ptr != 0 { let fake_time: u64 = 132_500_000_000_000_000; // ~2021 FILETIME @@ -269,44 +495,60 @@ fn ke_query_system_time(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mu } } -fn ke_initialize_semaphore(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { - // r3 = semaphore_ptr, r4 = count, r5 = limit +fn ke_initialize_semaphore(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { + // r3 = PKSEMAPHORE, r4 = initial count, r5 = limit. + // Mirrors xenia-canary KeInitializeSemaphore_entry + // (xboxkrnl_threading.cc:692). `ensure_dispatcher_object` (below) + // reads type@+0, signal_state@+4, and limit@+0x10 to mint the + // kernel-side shadow on first wait/release — so dropping the count + // and limit args (the prior zero-fill) silently produced + // `Semaphore { count: 0, max: 1 }` regardless of caller intent. let sem_ptr = ctx.gpr[3] as u32; - if sem_ptr != 0 { - // Zero-init the KSEMAPHORE structure (0x14 bytes) - for i in (0..0x14).step_by(4) { - mem.write_u32(sem_ptr + i, 0); - } + let count = ctx.gpr[4] as u32; + let limit = ctx.gpr[5] as u32; + if sem_ptr == 0 { + return; } + // DISPATCHER_HEADER: type=5 (Semaphore), absolute=0, size=5 u32s, + // inserted=0, signal_state=count, then 8-byte wait_list_head, then + // limit at +0x10. + mem.write_u8(sem_ptr, 5); + mem.write_u8(sem_ptr + 0x01, 0); + mem.write_u8(sem_ptr + 0x02, 5); + mem.write_u8(sem_ptr + 0x03, 0); + mem.write_u32(sem_ptr + 0x04, count); + mem.write_u32(sem_ptr + 0x08, 0); + mem.write_u32(sem_ptr + 0x0C, 0); + mem.write_u32(sem_ptr + 0x10, limit); } -fn ke_try_acquire_spinlock(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn ke_try_acquire_spinlock(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 1; // TRUE (acquired successfully in single-threaded mode) } -fn ke_tls_alloc(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut KernelState) { +fn ke_tls_alloc(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { ctx.gpr[3] = state.tls_alloc() as u64; } -fn ke_tls_get_value(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut KernelState) { +fn ke_tls_get_value(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { let index = ctx.gpr[3] as u32; ctx.gpr[3] = state.tls_get(index); } -fn ke_tls_set_value(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut KernelState) { +fn ke_tls_set_value(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { let index = ctx.gpr[3] as u32; let value = ctx.gpr[4]; state.tls_set(index, value); ctx.gpr[3] = 1; // TRUE } -fn ex_get_xconfig_setting(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn ex_get_xconfig_setting(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 0; // STATUS_SUCCESS (writes nothing) } // ===== Memory ===== -fn nt_allocate_virtual_memory(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { +fn nt_allocate_virtual_memory(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = base_addr_ptr (in/out), r4 = region_size_ptr (in/out) // r5 = alloc_type, r6 = protect let base_ptr = ctx.gpr[3] as u32; @@ -349,38 +591,72 @@ fn nt_allocate_virtual_memory(ctx: &mut PpcContext, mem: &mut GuestMemory, state ctx.gpr[3] = 0; // STATUS_SUCCESS } -fn mm_allocate_physical_memory_ex(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { - // r3 = size, r4 = protect, r5 = min_addr, r6 = max_addr, r7 = alignment - let size = ctx.gpr[3] as u32; +fn mm_allocate_physical_memory_ex(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // Matches xenia-canary `MmAllocatePhysicalMemoryEx_entry` — see + // `xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc:489-494`. + // r3 = flags, r4 = region_size, r5 = protect_bits, + // r6 = min_addr_range, r7 = max_addr_range, r8 = alignment + // Return value is the guest address; 0 indicates failure (Xbox ABI). + let flags = ctx.gpr[3] as u32; + let size = ctx.gpr[4] as u32; + if size == 0 { + tracing::warn!(flags, "MmAllocatePhysicalMemoryEx: zero-size request → returning 0"); + ctx.gpr[3] = 0; + return; + } match state.heap_alloc(size, mem) { - Some(addr) => ctx.gpr[3] = addr as u64, - None => ctx.gpr[3] = 0, + Some(addr) => { + tracing::debug!( + flags, + size = format_args!("{size:#x}"), + addr = format_args!("{addr:#010x}"), + "MmAllocatePhysicalMemoryEx" + ); + ctx.gpr[3] = addr as u64; + } + None => { + tracing::warn!( + flags, + size = format_args!("{size:#x}"), + "MmAllocatePhysicalMemoryEx: heap exhausted" + ); + ctx.gpr[3] = 0; + } } } -fn mm_create_kernel_stack(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { - // r3 = stack_size, r4 = reserved - let size = std::cmp::max(ctx.gpr[3] as u32, 0x4000); // Min 16KB +fn mm_create_kernel_stack(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // xenia-canary `MmCreateKernelStack_entry(stack_size, r4)`; returns stack top. + // `xboxkrnl_threading.cc` — see DECLARE_XBOXKRNL_EXPORT on MmCreateKernelStack. + let requested = ctx.gpr[3] as u32; + let size = std::cmp::max(requested, 0x4000); // Min 16KB per canary match state.stack_alloc(size, mem) { Some(top) => { - tracing::info!("MmCreateKernelStack: top={:#010x} size={:#x}", top, size); + tracing::info!( + top = format_args!("{top:#010x}"), + size = format_args!("{size:#x}"), + "MmCreateKernelStack" + ); ctx.gpr[3] = top as u64; } - None => ctx.gpr[3] = 0, + None => { + tracing::warn!(size = format_args!("{size:#x}"), "MmCreateKernelStack: stack heap exhausted"); + ctx.gpr[3] = 0; + } } } -fn mm_get_physical_address(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn mm_get_physical_address(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { // r3 = virtual address -> return physical address - ctx.gpr[3] = ctx.gpr[3] & 0x1FFF_FFFF; // Mask to 512MB physical + ctx.gpr[3] &= 0x1FFF_FFFF; // Mask to 512MB physical } -fn mm_query_address_protect(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn mm_query_address_protect(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { // Return PAGE_READWRITE (0x04) ctx.gpr[3] = 0x04; } -fn mm_query_statistics(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn mm_query_statistics(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = stats_ptr — write fake memory statistics let ptr = ctx.gpr[3] as u32; if ptr != 0 { @@ -393,125 +669,1097 @@ fn mm_query_statistics(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut // ===== File I/O ===== -fn nt_create_file(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut KernelState) { - let handle = state.alloc_handle_for(KernelObject::File { path: String::new() }); - tracing::info!("NtCreateFile: handle={:#x}", handle); - ctx.gpr[3] = 0; +/// NT error codes the file handlers need. Keeping them inline avoids pulling +/// in a whole NTSTATUS module for a single file. +const STATUS_SUCCESS: u64 = 0x0000_0000; +const STATUS_END_OF_FILE: u64 = 0xC000_0011; +const STATUS_INVALID_HANDLE: u64 = 0xC000_0008; +const STATUS_OBJECT_NAME_NOT_FOUND: u64 = 0xC000_0034; +const STATUS_NO_MORE_FILES: u64 = 0x8000_0006; +const STATUS_SEMAPHORE_LIMIT_EXCEEDED: u64 = 0xC000_0047; +const STATUS_UNSUCCESSFUL: u64 = 0xC000_0001; +const STATUS_INVALID_INFO_CLASS: u64 = 0xC000_0003; +const STATUS_INFO_LENGTH_MISMATCH: u64 = 0xC000_0004; +/// `X_ERROR_NOT_FOUND` from xenia-canary `xenia/xbox.h`. Returned by +/// `XexGetModuleHandle` for unknown module names. +const X_ERROR_NOT_FOUND: u64 = 0x0000_048B; + +/// A sentinel byte-offset value meaning "read at current file position". +const FILE_USE_FILE_POINTER_POSITION: u64 = 0xFFFF_FFFF_FFFF_FFFE; + +/// Write an `IO_STATUS_BLOCK { status, information }` if the pointer is non-null. +fn write_io_status_block(mem: &GuestMemory, ptr: u32, status: u32, information: u32) { + if ptr == 0 { + return; + } + mem.write_u32(ptr, status); + mem.write_u32(ptr + 4, information); } -fn nt_open_file(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut KernelState) { - let handle = state.alloc_handle_for(KernelObject::File { path: String::new() }); - tracing::info!("NtOpenFile: handle={:#x}", handle); - ctx.gpr[3] = 0; +/// Open a VFS-backed file. Shared between NtCreateFile and NtOpenFile — the +/// create/open distinction only matters for writable volumes, which the disc +/// image isn't. +fn open_vfs_file( + mem: &GuestMemory, + state: &mut KernelState, + handle_out: u32, + io_status_block: u32, + obj_attrs_ptr: u32, +) -> u64 { + // Accept the empty-after-prefix case (e.g. `NtCreateFile("game:\")`) as + // a valid "open the partition/device root" request — Canary's + // `NtCreateFile_entry` in xboxkrnl_io.cc:39 lets empty paths through + // to the VFS, which resolves them as a directory handle on the root. + // Sylpheed opens `game:\` near the end of its boot as a disc-validation + // probe; returning `STATUS_OBJECT_NAME_NOT_FOUND` makes the async worker + // see a null handle later and trigger `XamShowDirtyDiscErrorUI`. + let path = crate::path::object_attributes_to_vfs_path(mem, obj_attrs_ptr) + .unwrap_or_default(); + if path.is_empty() && obj_attrs_ptr == 0 { + if handle_out != 0 { + mem.write_u32(handle_out, 0); + } + write_io_status_block(mem, io_status_block, STATUS_OBJECT_NAME_NOT_FOUND as u32, 0); + return STATUS_OBJECT_NAME_NOT_FOUND; + } + if path.is_empty() { + // Empty path after prefix strip is the "open the device/partition + // root" case (e.g. `NtCreateFile("game:\")`). Canary's + // `NtCreateFile_entry` resolves these through the VFS and returns + // a directory handle. We don't model directory entries, so synth + // a zero-byte "file" whose `path` is empty; `nt_query_information_file` + // then reports `Directory=1` / `FILE_ATTRIBUTE_DIRECTORY` based on + // the path shape, which is how Sylpheed's disc-validation probe + // decides it found a directory and proceeds. + let handle = state.alloc_handle_for(KernelObject::File { + path: String::new(), + size: 0, + position: 0, + data: std::sync::Arc::new(Vec::new()), + dir_enum_pos: None, + }); + if handle_out != 0 { + mem.write_u32(handle_out, handle); + } + write_io_status_block(mem, io_status_block, STATUS_SUCCESS as u32, 0); + return STATUS_SUCCESS; + } + + let vfs = match state.vfs.as_ref() { + Some(v) => v, + None => { + tracing::warn!("NtCreateFile/NtOpenFile for {:?}: no VFS mounted", path); + if handle_out != 0 { + mem.write_u32(handle_out, 0); + } + write_io_status_block(mem, io_status_block, STATUS_OBJECT_NAME_NOT_FOUND as u32, 0); + return STATUS_OBJECT_NAME_NOT_FOUND; + } + }; + + match vfs.read_file(&path) { + Ok(bytes) => { + let size = bytes.len() as u64; + let handle = state.alloc_handle_for(KernelObject::File { + path: path.clone(), + size, + position: 0, + data: std::sync::Arc::new(bytes), + dir_enum_pos: None, + }); + if handle_out != 0 { + mem.write_u32(handle_out, handle); + } + write_io_status_block(mem, io_status_block, STATUS_SUCCESS as u32, 0); + tracing::info!("File opened: path={:?} size={} handle={:#x}", path, size, handle); + STATUS_SUCCESS + } + Err(e) => { + // When the VFS can't resolve a path we synthesize a zero-byte + // virtual file rather than returning NOT_FOUND. Two rationales: + // + // 1. **Writable system partitions** (`cache:/`, `cache0:`, + // `cache1:`, `partition0:`, `partition1:`) aren't backed by + // the disc — Canary mounts them on host directories + // ([xenia_main.cc:612-651](xenia-canary/src/xenia/app/xenia_main.cc)). + // We skip the host mount for now, so opens there always miss + // without this fallback. + // + // 2. **Disc files that didn't make it into the ISO rip** (e.g., + // Sylpheed's `dat/files.tbl`, which the retail disc shipped + // but our dump doesn't contain). Returning NOT_FOUND makes + // Sylpheed's boot validator call `XamShowDirtyDiscErrorUI` + // → dashboard exit; see Canary's `XamShowDirtyDiscErrorUI` + // at xam_ui.cc:562 for the "bad or unimplemented file IO + // calls" framing. + // + // A zero-byte file lets the game's existence probe succeed, its + // read return EOF, and its "is the content here" sanity checks + // pass. If the game actually needs the bytes for gameplay we'll + // see a fresh failure downstream and can decide what to stub next. + let handle = state.alloc_handle_for(KernelObject::File { + path: path.clone(), + size: 0, + position: 0, + data: std::sync::Arc::new(Vec::new()), + dir_enum_pos: None, + }); + if handle_out != 0 { + mem.write_u32(handle_out, handle); + } + write_io_status_block(mem, io_status_block, STATUS_SUCCESS as u32, 0); + tracing::info!( + "Synthesized empty file for missing path: path={:?} err={} handle={:#x}", + path, + e, + handle + ); + STATUS_SUCCESS + } + } } -fn nt_read_file(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { - ctx.gpr[3] = 0xC000_0011; // STATUS_END_OF_FILE +fn nt_create_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = handle_out, r4 = desired_access, r5 = obj_attrs, r6 = io_status_block, + // r7 = allocation_size, r8 = file_attributes, r9 = share_access, r10 = create_disposition + let handle_out = ctx.gpr[3] as u32; + let obj_attrs_ptr = ctx.gpr[5] as u32; + let io_status_block = ctx.gpr[6] as u32; + ctx.gpr[3] = open_vfs_file(mem, state, handle_out, io_status_block, obj_attrs_ptr); } -fn nt_write_file(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { - ctx.gpr[3] = 0; // STATUS_SUCCESS (discard data) +fn nt_open_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = handle_out, r4 = desired_access, r5 = obj_attrs, + // r6 = io_status_block, r7 = share_access, r8 = open_options + let handle_out = ctx.gpr[3] as u32; + let obj_attrs_ptr = ctx.gpr[5] as u32; + let io_status_block = ctx.gpr[6] as u32; + ctx.gpr[3] = open_vfs_file(mem, state, handle_out, io_status_block, obj_attrs_ptr); } -fn nt_query_full_attributes_file(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { - ctx.gpr[3] = 0xC000_0034; // STATUS_OBJECT_NAME_NOT_FOUND +/// Signal an NT-style completion event on synchronous I/O completion. +/// +/// `NtReadFile` / `NtWriteFile` take an event handle at r4. The NT contract +/// is: on a real async driver, the event pulses when the I/O finishes. +/// Games that use the common "issue I/O then wait on the event" idiom will +/// deadlock if we return `STATUS_SUCCESS` without signaling — observed on +/// Sylpheed with four stuck threads parked on `WaitAny { handles: [evt] }` +/// that nothing else could wake. We finish I/O synchronously so we signal +/// immediately on *every* completion path (success, EOF, invalid-handle). +/// No-op when the caller passes a null handle (synchronous-wait style). +fn signal_io_completion_event(state: &mut KernelState, event_handle: u32) { + if event_handle == 0 { + return; + } + let prev = if let Some(KernelObject::Event { signaled, .. }) = state.objects.get_mut(&event_handle) { + let was = *signaled; + *signaled = true; + was as u64 + } else { + 0 + }; + state.audit_signal(event_handle, 0, "signal_io_completion_event", prev); + wake_eligible_waiters(state, event_handle); } -fn nt_query_directory_file(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { - ctx.gpr[3] = 0xC000_0034; // STATUS_OBJECT_NAME_NOT_FOUND -} - -fn nt_close(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut KernelState) { +fn nt_read_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = handle, r4 = event, r5 = apc_routine, r6 = apc_ctx, + // r7 = io_status_block, r8 = buffer, r9 = length, r10 = byte_offset_ptr let handle = ctx.gpr[3] as u32; - state.objects.remove(&handle); + let event_handle = ctx.gpr[4] as u32; + let io_status_block = ctx.gpr[7] as u32; + let buffer = ctx.gpr[8] as u32; + let length = ctx.gpr[9] as u32; + let byte_offset_ptr = ctx.gpr[10] as u32; + + let Some(KernelObject::File { path, size, position, data, .. }) = state.objects.get_mut(&handle) else { + tracing::warn!("NtReadFile: invalid handle {:#x}", handle); + ctx.gpr[3] = STATUS_INVALID_HANDLE; + write_io_status_block(mem, io_status_block, STATUS_INVALID_HANDLE as u32, 0); + signal_io_completion_event(state, event_handle); + return; + }; + + // If the caller supplied an explicit byte offset (not 0xFFFFFFFFFFFFFFFE) + // seek to it; otherwise continue from the stored cursor. + let start_pos = if byte_offset_ptr != 0 { + let offset = mem.read_u64(byte_offset_ptr); + if offset != FILE_USE_FILE_POINTER_POSITION && offset != u64::MAX { + *position = offset; + } + *position + } else { + *position + }; + + let total = *size; + if start_pos >= total { + write_io_status_block(mem, io_status_block, STATUS_END_OF_FILE as u32, 0); + ctx.gpr[3] = STATUS_END_OF_FILE; + signal_io_completion_event(state, event_handle); + return; + } + + let avail = (total - start_pos).min(length as u64) as usize; + if avail == 0 { + write_io_status_block(mem, io_status_block, STATUS_END_OF_FILE as u32, 0); + ctx.gpr[3] = STATUS_END_OF_FILE; + signal_io_completion_event(state, event_handle); + return; + } + + let start = start_pos as usize; + let end = start + avail; + let slice = &data[start..end]; + mem.write_bulk(buffer, slice); + *position = start_pos + avail as u64; + + tracing::info!( + "NtReadFile: {} bytes from {:?} @ {} (handle={:#x})", + avail, path, start_pos, handle, + ); + write_io_status_block(mem, io_status_block, STATUS_SUCCESS as u32, avail as u32); + ctx.gpr[3] = STATUS_SUCCESS; + signal_io_completion_event(state, event_handle); +} + +fn nt_write_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // We don't back anything writable, so discard. Still report the full + // length as written via IO_STATUS_BLOCK so the caller doesn't retry. + let event_handle = ctx.gpr[4] as u32; + let io_status_block = ctx.gpr[7] as u32; + let length = ctx.gpr[9] as u32; + write_io_status_block(mem, io_status_block, STATUS_SUCCESS as u32, length); + ctx.gpr[3] = STATUS_SUCCESS; + signal_io_completion_event(state, event_handle); +} + +/// Minimal `NtQueryInformationFile`. The only classes Sylpheed (and most +/// games) use are `FileStandardInformation` (5) and `FilePositionInformation` +/// (14). Anything else gets zeros + success. +fn nt_query_information_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = handle, r4 = io_status_block, r5 = file_info, r6 = length, r7 = class + let handle = ctx.gpr[3] as u32; + let io_status_block = ctx.gpr[4] as u32; + let file_info = ctx.gpr[5] as u32; + let length = ctx.gpr[6] as u32; + let class = ctx.gpr[7] as u32; + + let Some(KernelObject::File { size, position, path, .. }) = state.objects.get(&handle) else { + ctx.gpr[3] = STATUS_INVALID_HANDLE; + write_io_status_block(mem, io_status_block, STATUS_INVALID_HANDLE as u32, 0); + return; + }; + + // Root-of-device opens (`game:\`, `cache:\`, `partition0`) strip to + // an empty string post-prefix — see `open_vfs_file`'s synth path. + // Games query these as directories (DirectoryObject probe), and + // reporting `Directory=0` makes Sylpheed treat the open as "found a + // non-directory where I expected a directory" and call + // `XamShowDirtyDiscErrorUI`. Canary's `NtQueryInformationFile` pulls + // the real file-system entry's kind; we key on path shape since we + // don't model directory entries. + let is_directory = path.is_empty() + || path.ends_with('/') + || path.ends_with(':'); + let size = *size; + let position = *position; + + // `FILE_ATTRIBUTE_DIRECTORY` (NT / Xbox) — advertised in + // `FileNetworkOpenInformation.FileAttributes`; Sylpheed's async-I/O + // worker queries with class=34 and the calling code checks this bit + // to decide whether the open resolved to a directory before + // continuing down the non-error path. + const FILE_ATTRIBUTE_DIRECTORY: u32 = 0x10; + const FILE_ATTRIBUTE_NORMAL: u32 = 0x80; + let written: u32 = match class { + // FileStandardInformation: AllocationSize(i64), EndOfFile(i64), NumberOfLinks(u32), DeletePending(u8), Directory(u8), pad(u16) + 5 if length >= 24 => { + mem.write_u64(file_info, size); + mem.write_u64(file_info + 8, size); + mem.write_u32(file_info + 16, 1); + mem.write_u8(file_info + 20, 0); + mem.write_u8(file_info + 21, if is_directory { 1 } else { 0 }); + mem.write_u16(file_info + 22, 0); + 24 + } + // FilePositionInformation: CurrentByteOffset(i64) + 14 if length >= 8 => { + mem.write_u64(file_info, position); + 8 + } + // FileNetworkOpenInformation: timestamps(4x i64) @ 0..32, + // AllocationSize(i64) @ 32, EndOfFile(i64) @ 40, FileAttributes(u32) @ 48 + // Sylpheed's async-validation worker asks for this (`length=56`) + // and the caller checks `FileAttributes & FILE_ATTRIBUTE_DIRECTORY` + // right after. Without populating the attributes the bit is + // clear, the caller decides the open "found a non-directory + // where a directory was expected", and the outer routine calls + // `XamShowDirtyDiscErrorUI` → `XamLoaderLaunchTitle` → garbage. + 34 if length >= 56 => { + // Zero timestamps (we don't track real times). + for off in (0..32).step_by(8) { + mem.write_u64(file_info + off, 0); + } + mem.write_u64(file_info + 32, size); + mem.write_u64(file_info + 40, size); + let attrs = if is_directory { + FILE_ATTRIBUTE_DIRECTORY + } else { + FILE_ATTRIBUTE_NORMAL + }; + mem.write_u32(file_info + 48, attrs); + mem.write_u32(file_info + 52, 0); // pad + 56 + } + _ => { + // Zero out whatever the caller asked for — conservative default. + for i in 0..length { + mem.write_u8(file_info + i, 0); + } + length + } + }; + + write_io_status_block(mem, io_status_block, STATUS_SUCCESS as u32, written); + ctx.gpr[3] = STATUS_SUCCESS; +} + +/// `NtSetInformationFile(FileHandle, IoStatusBlock*, FileInformation, +/// Length, FileInformationClass)`. Mirrors Canary +/// [xboxkrnl_io_info.cc:180-304](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io_info.cc). +/// +/// Validates `info_class` (must have a defined minimum size) and +/// `info_length` (must meet that minimum); returns +/// `STATUS_INVALID_INFO_CLASS` / `STATUS_INFO_LENGTH_MISMATCH` in those +/// cases. The only class with real side-effects in xenia-rs is +/// `XFilePositionInformation` (14) — seek updates the file's cursor. +/// Read-only VFS means `XFileEndOfFileInformation` (20, truncate) can +/// only succeed if the new length equals the current size, otherwise +/// returns `STATUS_UNSUCCESSFUL`. Other classes acknowledge the write +/// but have no backing store. +fn nt_set_information_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = handle, r4 = io_status_block, r5 = info_ptr, + // r6 = info_length, r7 = info_class. + let handle = ctx.gpr[3] as u32; + let iosb_ptr = ctx.gpr[4] as u32; + let info_ptr = ctx.gpr[5] as u32; + let info_length = ctx.gpr[6] as u32; + let info_class = ctx.gpr[7] as u32; + + // Matches Canary's `GetSetFileInfoMinimumLength`. A return of 0 means + // "class we don't recognise for SetInfo" → STATUS_INVALID_INFO_CLASS. + let min_length = match info_class { + 4 => 40, // XFileBasicInformation (times + attributes) + 10 => 16, // XFileRenameInformation + 13 => 4, // XFileDispositionInformation (delete_file u32) + 14 => 8, // XFilePositionInformation (i64 current offset) + 16 | 31 => 4, // XFileModeInformation / XFileIoPriorityInformation + 19 | 20 | 23 => 8, // XFileAllocationInformation / EndOfFileInformation / MountPartitionInformation + 11 => 16, // XFileLinkInformation + 24 => 152, // XFileMountPartitionsInformation + 30 => 8, // XFileCompletionInformation (handle + key, 2 dwords) + _ => 0, + }; + if min_length == 0 { + ctx.gpr[3] = STATUS_INVALID_INFO_CLASS; + return; + } + if info_length < min_length { + ctx.gpr[3] = STATUS_INFO_LENGTH_MISMATCH; + return; + } + + // Handle lookup. + let Some(KernelObject::File { size, position, .. }) = state.objects.get_mut(&handle) else { + ctx.gpr[3] = STATUS_INVALID_HANDLE; + return; + }; + + let (status, out_length): (u64, u32) = match info_class { + // XFilePositionInformation (14): i64 new byte offset. + 14 => { + let new_offset = mem.read_u64(info_ptr); + // Canary clamps nothing — it assigns directly. Game is + // responsible for staying within the file; reads past EOF + // return STATUS_END_OF_FILE from NtReadFile. + *position = new_offset; + (STATUS_SUCCESS, 8) + } + // XFileEndOfFileInformation (20): i64 new length. Read-only VFS + // → only a no-op truncate-to-same-size succeeds. + 20 => { + let new_eof = mem.read_u64(info_ptr); + if new_eof == *size { + (STATUS_SUCCESS, 8) + } else { + (STATUS_UNSUCCESSFUL, 8) + } + } + // XFileAllocationInformation (19): pre-allocation hint. Canary + // explicitly `XELOGW`s and reports out_length=8; we do the same. + 19 => (STATUS_SUCCESS, 8), + // XFileBasicInformation (4): times + attributes. Read-only VFS + // can't persist these, but acknowledge the write to match Canary's + // behaviour on a read-only entry. + 4 => (STATUS_SUCCESS, 40), + // XFileDispositionInformation (13): delete-on-close. Read-only VFS + // → log the bit and succeed; the file is never actually removed. + 13 => { + let delete_flag = mem.read_u32(info_ptr) != 0; + tracing::debug!( + handle = format_args!("{handle:#x}"), + delete = delete_flag, + "NtSetInformationFile: disposition (read-only VFS, no-op)" + ); + (STATUS_SUCCESS, 0) + } + // Other recognised classes: accept and report back the minimum + // length so callers don't bail on zero-information. + _ => (STATUS_SUCCESS, min_length), + }; + + if iosb_ptr != 0 { + write_io_status_block(mem, iosb_ptr, status as u32, out_length); + } + ctx.gpr[3] = status; +} + +fn nt_query_full_attributes_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = obj_attrs, r4 = network_open_info + let obj_attrs_ptr = ctx.gpr[3] as u32; + let out = ctx.gpr[4] as u32; + + let path = match crate::path::object_attributes_to_vfs_path(mem, obj_attrs_ptr) { + Some(p) if !p.is_empty() => p, + _ => { + ctx.gpr[3] = STATUS_OBJECT_NAME_NOT_FOUND; + return; + } + }; + + let Some(vfs) = state.vfs.as_ref() else { + ctx.gpr[3] = STATUS_OBJECT_NAME_NOT_FOUND; + return; + }; + + match vfs.stat(&path) { + Ok(entry) => { + // FILE_NETWORK_OPEN_INFORMATION (56 bytes): 4 × FILETIME, + // AllocationSize(i64), EndOfFile(i64), FileAttributes(u32), pad(u32) + let filetime: u64 = 132_500_000_000_000_000; + if out != 0 { + mem.write_u32(out, (filetime >> 32) as u32); + mem.write_u32(out + 4, filetime as u32); + mem.write_u32(out + 8, (filetime >> 32) as u32); + mem.write_u32(out + 12, filetime as u32); + mem.write_u32(out + 16, (filetime >> 32) as u32); + mem.write_u32(out + 20, filetime as u32); + mem.write_u32(out + 24, (filetime >> 32) as u32); + mem.write_u32(out + 28, filetime as u32); + mem.write_u64(out + 32, entry.size); + mem.write_u64(out + 40, entry.size); + let attrs: u32 = if entry.is_directory { 0x10 } else { 0x80 }; + mem.write_u32(out + 48, attrs); + mem.write_u32(out + 52, 0); + } + ctx.gpr[3] = STATUS_SUCCESS; + } + Err(_) => { + ctx.gpr[3] = STATUS_OBJECT_NAME_NOT_FOUND; + } + } +} + +fn nt_query_volume_information_file(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { + // r3 = handle, r4 = io_status_block, r5 = info, r6 = length, r7 = class + let io_status_block = ctx.gpr[4] as u32; + let info = ctx.gpr[5] as u32; + let length = ctx.gpr[6] as u32; + let class = ctx.gpr[7] as u32; + + // FileFsSizeInformation (class 3): 24 bytes + // TotalAllocationUnits(i64), AvailableAllocationUnits(i64), + // SectorsPerAllocationUnit(u32), BytesPerSector(u32) + let written: u32 = match class { + 3 if length >= 24 => { + mem.write_u64(info, 0x10_0000); // ~2GB at 2KB sectors + mem.write_u64(info + 8, 0); + mem.write_u32(info + 16, 1); + mem.write_u32(info + 20, 2048); + 24 + } + _ => { + for i in 0..length { + mem.write_u8(info + i, 0); + } + length + } + }; + + write_io_status_block(mem, io_status_block, STATUS_SUCCESS as u32, written); + ctx.gpr[3] = STATUS_SUCCESS; +} + +/// Enumerate the immediate children of a directory handle, writing +/// `X_FILE_DIRECTORY_INFORMATION` entries into the caller's buffer. +/// Mirrors Canary [xboxkrnl_io.cc:516-557](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc) +/// and the entry layout in +/// [xfile.h:35-73](xenia-canary/src/xenia/kernel/xfile.h). +/// +/// Pagination: each call consumes `dir_enum_pos` on the File handle. +/// `None` = fresh handle → start at index 0; `Some(N)` = resume from +/// N-th matching entry. On exhaustion the cursor stays past the end +/// and subsequent calls return `STATUS_NO_MORE_FILES`. The `restart_scan` +/// flag (9th arg, on the stack) is not yet threaded through; callers +/// that want to rescan must close and re-open the directory handle. +fn nt_query_directory_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3=file_handle, r4=event_handle, r5=apc_routine, r6=apc_context, + // r7=io_status_block, r8=file_info_ptr, r9=length, r10=file_name, + // sp+... = restart_scan. + let handle = ctx.gpr[3] as u32; + let event_handle = ctx.gpr[4] as u32; + let iosb_ptr = ctx.gpr[7] as u32; + let info_ptr = ctx.gpr[8] as u32; + let length = ctx.gpr[9] as u32; + + // Canary requires at least one fixed prefix + some filename room. + const ENTRY_FIXED_SIZE: u32 = 0x40; // bytes 0..64 fixed fields + const CANARY_MIN_LENGTH: u32 = 72; // xboxkrnl_io.cc:521 + const FILE_ATTRIBUTE_DIRECTORY: u32 = 0x10; + const FILE_ATTRIBUTE_NORMAL: u32 = 0x80; + if length < CANARY_MIN_LENGTH { + ctx.gpr[3] = STATUS_INFO_LENGTH_MISMATCH; + signal_io_completion_event(state, event_handle); + return; + } + + // Look up the handle and snapshot the directory prefix. + let dir_path = match state.objects.get(&handle) { + Some(KernelObject::File { path, .. }) => path.clone(), + _ => { + if iosb_ptr != 0 { + write_io_status_block(mem, iosb_ptr, STATUS_INVALID_HANDLE as u32, 0); + } + ctx.gpr[3] = STATUS_INVALID_HANDLE; + signal_io_completion_event(state, event_handle); + return; + } + }; + + // Gather the directory's immediate children from the VFS. An empty + // `dir_path` refers to the disc root; non-empty paths match entries + // whose name starts with `dir_path + "/"` and whose suffix (relative + // to that prefix) contains no further slashes. + let prefix: String = if dir_path.is_empty() { + String::new() + } else if dir_path.ends_with('/') { + dir_path.clone() + } else { + format!("{}/", dir_path) + }; + let entries: Vec = match state.vfs.as_ref() { + Some(vfs) => vfs + .list_root() + .unwrap_or_default() + .into_iter() + .filter_map(|e| { + let relative: &str = if prefix.is_empty() { + e.name.as_str() + } else { + match e.name.strip_prefix(prefix.as_str()) { + Some(s) => s, + None => return None, + } + }; + if relative.is_empty() || relative.contains('/') { + return None; + } + Some(xenia_vfs::VfsEntry { + name: relative.to_string(), + is_directory: e.is_directory, + size: e.size, + offset: e.offset, + }) + }) + .collect(), + None => Vec::new(), + }; + + // Load / initialise the enumeration cursor. + let start_index = match state.objects.get_mut(&handle) { + Some(KernelObject::File { dir_enum_pos, .. }) => { + let pos = dir_enum_pos.unwrap_or(0); + *dir_enum_pos = Some(pos); + pos + } + _ => 0, + }; + + if start_index >= entries.len() { + if iosb_ptr != 0 { + write_io_status_block(mem, iosb_ptr, STATUS_NO_MORE_FILES as u32, 0); + } + ctx.gpr[3] = STATUS_NO_MORE_FILES; + signal_io_completion_event(state, event_handle); + return; + } + + // Pack as many entries as fit into `length`. `NextEntryOffset` is the + // byte distance to the next entry from the start of the current one; + // 0 marks the last entry. Entries are 8-byte aligned per Canary. + let mut cursor: u32 = 0; + let mut emitted: usize = 0; + let mut last_entry_offset: Option = None; + for (i, entry) in entries.iter().enumerate().skip(start_index) { + let name_bytes = entry.name.as_bytes(); + let name_len = name_bytes.len() as u32; + let raw_size = ENTRY_FIXED_SIZE + name_len; + let aligned_size = (raw_size + 7) & !7; + if cursor + raw_size > length { + // Entry wouldn't fit — leave the buffer truncated and stop. + break; + } + let base = info_ptr + cursor; + mem.write_u32(base + 0x00, 0); // next_entry_offset (patched later) + mem.write_u32(base + 0x04, i as u32); // file_index + // Timestamps zeroed — xenia-rs doesn't track them. + mem.write_u64(base + 0x08, 0); + mem.write_u64(base + 0x10, 0); + mem.write_u64(base + 0x18, 0); + mem.write_u64(base + 0x20, 0); + mem.write_u64(base + 0x28, entry.size); + mem.write_u64(base + 0x30, entry.size); + let attrs = if entry.is_directory { + FILE_ATTRIBUTE_DIRECTORY + } else { + FILE_ATTRIBUTE_NORMAL + }; + mem.write_u32(base + 0x38, attrs); + mem.write_u32(base + 0x3C, name_len); + for (k, &b) in name_bytes.iter().enumerate() { + mem.write_u8(base + ENTRY_FIXED_SIZE + k as u32, b); + } + // Patch the previous entry's next_entry_offset to point here. + if let Some(prev_base) = last_entry_offset { + mem.write_u32(prev_base + 0x00, cursor - (prev_base - info_ptr)); + } + last_entry_offset = Some(base); + cursor = std::cmp::min(cursor + aligned_size, length); + emitted += 1; + if cursor + ENTRY_FIXED_SIZE > length { + // No room for another fixed header; stop before truncating. + break; + } + } + + // Advance cursor on the handle. + if let Some(KernelObject::File { dir_enum_pos, .. }) = state.objects.get_mut(&handle) { + *dir_enum_pos = Some(start_index + emitted); + } + + if emitted == 0 { + if iosb_ptr != 0 { + write_io_status_block(mem, iosb_ptr, STATUS_NO_MORE_FILES as u32, 0); + } + ctx.gpr[3] = STATUS_NO_MORE_FILES; + } else { + if iosb_ptr != 0 { + write_io_status_block(mem, iosb_ptr, STATUS_SUCCESS as u32, cursor); + } + ctx.gpr[3] = STATUS_SUCCESS; + } + signal_io_completion_event(state, event_handle); +} + +fn nt_close(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { + let handle = ctx.gpr[3] as u32; + // Aliased refcount: `NtDuplicateObject` returns the *source* handle as the + // "new" handle (we don't mint fresh values), so the game commonly holds + // two logical references to the same handle value. Without refcount, the + // first `NtClose` wipes the object while the second reference is still + // live, which traps any later wait on that handle (Sylpheed's + // create→dup(SAME_ACCESS)→set→close pattern at 0x8246079c manifests this + // — main thread then parks forever on the closed handle). Mirror Canary's + // `ObjectTable::ReleaseHandle` (object_table.cc:189): decrement the + // per-handle refcount and only drop the object when it reaches zero. + let remaining = state + .handle_refcount + .get_mut(&handle) + .map(|c| { + *c = c.saturating_sub(1); + *c + }) + .unwrap_or(0); + if remaining == 0 { + state.objects.remove(&handle); + state.handle_refcount.remove(&handle); + // If the object was an armed Timer, strip its pending-fire entry + // so a later scheduler round doesn't try to signal a dead handle. + // `disarm_timer` is a no-op for non-timer handles. + state.disarm_timer(handle); + } ctx.gpr[3] = 0; } -fn nt_create_event(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { +fn nt_create_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = handle_ptr, r4 = obj_attrs, r5 = event_type, r6 = initial_state let handle_ptr = ctx.gpr[3] as u32; let manual_reset = ctx.gpr[5] != 0; let signaled = ctx.gpr[6] != 0; - let handle = state.alloc_handle_for(KernelObject::Event { manual_reset, signaled }); + let handle = state.alloc_handle_for(KernelObject::Event { + manual_reset, + signaled, + waiters: Vec::new(), + }); + state.audit_create( + handle, + if manual_reset { "Event/Manual" } else { "Event/Auto" }, + ctx.lr as u32, + "NtCreateEvent", + ); if handle_ptr != 0 { mem.write_u32(handle_ptr, handle); } ctx.gpr[3] = 0; } -fn nt_create_semaphore(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { +fn nt_create_semaphore(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = handle_ptr, r4 = obj_attrs, r5 = initial_count, r6 = max_count let handle_ptr = ctx.gpr[3] as u32; let count = ctx.gpr[5] as i32; let max = ctx.gpr[6] as i32; - let handle = state.alloc_handle_for(KernelObject::Semaphore { count, max }); + let handle = state.alloc_handle_for(KernelObject::Semaphore { + count, + max, + waiters: Vec::new(), + }); + state.audit_create(handle, "Semaphore", ctx.lr as u32, "NtCreateSemaphore"); if handle_ptr != 0 { mem.write_u32(handle_ptr, handle); } ctx.gpr[3] = 0; } -fn nt_create_timer(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { +/// `NtCreateTimer(OUT handle_ptr, obj_attributes, timer_type)` — mint a +/// Timer kernel object in the handle table. `timer_type` selects between +/// NotificationTimer (0, manual-reset) and SynchronizationTimer (1, +/// auto-reset); any other value returns `STATUS_INVALID_PARAMETER` +/// matching Canary's `assert_always` on bad types (xtimer.cc:32). +/// Named-object dedup (Canary's `LookupNamedObject`) is out of +/// scope — Sylpheed uses anonymous timers. +fn nt_create_timer(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + const STATUS_INVALID_PARAMETER: u64 = 0xC000_000D; let handle_ptr = ctx.gpr[3] as u32; - let handle = state.alloc_handle_for(KernelObject::Timer); + let timer_type = ctx.gpr[5] as u32; + if timer_type > 1 { + ctx.gpr[3] = STATUS_INVALID_PARAMETER; + return; + } + let handle = state.alloc_handle_for(KernelObject::Timer { + manual_reset: timer_type == 0, + signaled: false, + deadline: None, + period_ticks: 0, + period_ms: 0, + callback_routine: 0, + callback_arg: 0, + waiters: Vec::new(), + }); + state.audit_create( + handle, + if timer_type == 0 { "Timer/Manual" } else { "Timer/Auto" }, + ctx.lr as u32, + "NtCreateTimer", + ); if handle_ptr != 0 { mem.write_u32(handle_ptr, handle); } - ctx.gpr[3] = 0; + ctx.gpr[3] = STATUS_SUCCESS; +} + +/// `NtSetTimerEx(handle, due_time_ptr, routine, mode, routine_arg, resume, +/// period_ms, unk_zero)` — arm a Timer object. Mirrors Canary's +/// [`NtSetTimerEx_entry`](xboxkrnl_threading.cc:897): reads i64 `due_time` +/// (100ns units; negative = relative), converts to an absolute deadline +/// on our tick timebase (same `/100` scale as `parse_timeout`), stores +/// `period_ms` for periodic rearm, and registers the fire in +/// `state.pending_timer_fires` via `arm_timer`. +/// +/// APC delivery (`routine != 0`) is deferred — the timer still signals +/// itself on fire, and any `Wait*`-on-the-timer-handle waiter wakes +/// correctly. If a real-world probe shows `timer_apc` warns firing, +/// that's the signal to lift the APC subsystem into its own PR. +fn nt_set_timer_ex(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + const STATUS_INVALID_HANDLE: u64 = 0xC000_0008; + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let due_time_ptr = ctx.gpr[4] as u32; + let routine = ctx.gpr[5] as u32; + let _mode = ctx.gpr[6] as u32; + let routine_arg = ctx.gpr[7] as u32; + let _resume = ctx.gpr[8] as u32; + let period_ms = ctx.gpr[9] as u32; + + // Look up handle + confirm it's a Timer. We pull the current hw's + // timebase separately (immutable borrow) before any mutation of the + // object to keep the borrow-checker happy. + let hw_id = state.scheduler.current_hw_id().unwrap_or(0); + let now = state.scheduler.ctx(hw_id).timebase; + + // Read signed i64 due_time (big-endian hi/lo — same pattern as + // parse_timeout). Negative = relative-from-now, positive = absolute + // (FILETIME). We treat magnitude as relative for both signs; games on + // Xbox 360 overwhelmingly pass negative values for timers, and the + // positive-absolute path is handled best-effort for bring-up. + let hi = mem.read_u32(due_time_ptr) as i32; + let lo = mem.read_u32(due_time_ptr + 4); + let raw = ((hi as i64) << 32) | (lo as i64 & 0xFFFF_FFFF); + let magnitude = raw.unsigned_abs().max(1); + let abs_deadline = now.saturating_add(magnitude / 100); + // period_ms → ticks: ms × 1,000,000 ns / 100 ns-per-tick-divisor = + // ms × 10_000 (raw ticks) ÷ 100 (our scale factor) = ms × 100. Matches + // the same divisor `parse_timeout` applies. + let period_ticks = (period_ms as u64) * 100; + + match state.objects.get_mut(&handle) { + Some(KernelObject::Timer { + signaled, + deadline, + period_ticks: obj_period_ticks, + period_ms: obj_period_ms, + callback_routine, + callback_arg, + .. + }) => { + *signaled = false; + *deadline = Some(abs_deadline); + *obj_period_ticks = period_ticks; + *obj_period_ms = period_ms; + *callback_routine = routine; + *callback_arg = routine_arg; + } + _ => { + ctx.gpr[3] = STATUS_INVALID_HANDLE; + return; + } + } + + if routine != 0 { + tracing::warn!( + target: "timer_apc", + routine = format_args!("{:#010x}", routine), + arg = format_args!("{:#010x}", routine_arg), + handle = format_args!("{:#010x}", handle), + "NtSetTimerEx: routine != 0 — APC delivery deferred; timer self-signal still works" + ); + } + + state.arm_timer(handle, abs_deadline); + ctx.gpr[3] = STATUS_SUCCESS; +} + +/// `NtCancelTimer(handle, OUT current_state_ptr)` — disarm a Timer. The +/// OUT pointer receives `0` per Canary's +/// [`NtCancelTimer_entry`](xboxkrnl_threading.cc:938-940), regardless of +/// prior signaled state. The Timer object stays in the handle table +/// (closed via NtClose); subsequent rearm via `NtSetTimerEx` is fine. +fn nt_cancel_timer(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + const STATUS_INVALID_HANDLE: u64 = 0xC000_0008; + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let current_state_ptr = ctx.gpr[4] as u32; + match state.objects.get_mut(&handle) { + Some(KernelObject::Timer { deadline, .. }) => { + *deadline = None; + } + _ => { + ctx.gpr[3] = STATUS_INVALID_HANDLE; + return; + } + } + state.disarm_timer(handle); + if current_state_ptr != 0 { + mem.write_u32(current_state_ptr, 0); + } + ctx.gpr[3] = STATUS_SUCCESS; } // ===== RTL ===== -fn rtl_initialize_critical_section(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { - // r3 = critical_section_ptr (28 bytes on Xbox 360) +// ----- RTL_CRITICAL_SECTION layout (Xbox 360 NT): ----- +// +0x00 DebugInfo (unused here) +// +0x04 LockCount (signed) +// +0x08 RecursionCount (signed; -1 while unlocked) +// +0x0C OwningThread (guest thread id, 0 when free) +// +0x10 LockSemaphore (unused) +// +0x14 SpinCount +// +// We enforce real mutual exclusion by reading/writing OwningThread and +// RecursionCount. Parked HW ids live in `KernelState::cs_waiters[cs_ptr]`. + +// X_RTL_CRITICAL_SECTION layout (28 bytes, Canary `xboxkrnl_rtl.cc:536-543`): +// +0x00: X_DISPATCH_HEADER (16 bytes) +// +0x00: type (u8) = 1 (EventSynchronizationObject / auto-reset) +// +0x01: absolute (u8) = spin-count/256 +// +0x02: size (u8) +// +0x03: inserted (u8) +// +0x04: signal_state (i32) +// +0x08: WaitListHead (two u32 pointers) +// +0x10: lock_count (i32) — starts at -1; first acquire → 0 +// +0x14: recursion_count (i32) — starts at 0; first acquire → 1 +// +0x18: owning_thread (u32) — 0 unless held +const CS_OFFS_TYPE: u32 = 0x00; +const CS_OFFS_LOCK_COUNT: u32 = 0x10; +const CS_OFFS_RECURSION_COUNT: u32 = 0x14; +const CS_OFFS_OWNING_THREAD: u32 = 0x18; +const CS_STRUCT_SIZE: u32 = 0x1C; + +fn rtl_initialize_critical_section( + ctx: &mut PpcContext, + mem: &GuestMemory, + _state: &mut KernelState, +) { let cs_ptr = ctx.gpr[3] as u32; if cs_ptr != 0 { - for i in (0..28).step_by(4) { + // Zero the whole struct, then set dispatcher type=1 and + // lock_count=-1 per Canary `xeRtlInitializeCriticalSection`. + for i in (0..CS_STRUCT_SIZE).step_by(4) { mem.write_u32(cs_ptr + i, 0); } - // Set recursion count to -1 (unlocked) - mem.write_u32(cs_ptr + 8, 0xFFFF_FFFF_u32); + mem.write_u8(cs_ptr + CS_OFFS_TYPE, 1); + mem.write_u32(cs_ptr + CS_OFFS_LOCK_COUNT, 0xFFFF_FFFF_u32); // -1 } ctx.gpr[3] = 0; } -fn rtl_enter_critical_section(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { - // r3 = critical_section_ptr - // For single-threaded: increment lock count, always succeed +fn rtl_enter_critical_section( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { let cs_ptr = ctx.gpr[3] as u32; - if cs_ptr != 0 { - let lock_count = mem.read_u32(cs_ptr + 4) as i32; - mem.write_u32(cs_ptr + 4, (lock_count + 1) as u32); - let recursion = mem.read_u32(cs_ptr + 8) as i32; - mem.write_u32(cs_ptr + 8, (recursion + 1) as u32); + if cs_ptr == 0 { + ctx.gpr[3] = 0; + return; + } + let current_tid = ctx.thread_id; + let owner = mem.read_u32(cs_ptr + CS_OFFS_OWNING_THREAD); + + // "Effective owner" — if the stored tid doesn't correspond to any live HW + // thread, the CS memory is either uninitialized (.data junk from the XEX + // image) or the previous owner already exited. Treat it as free. + let owner_is_live = + owner != 0 && state.scheduler.find_by_tid(owner).is_some(); + + if owner == 0 || !owner_is_live { + if owner != 0 { + tracing::debug!( + "rtl_enter_cs: cs={:#010x} stored owner={} has no live HW thread — claiming", + cs_ptr, + owner + ); + } + mem.write_u32(cs_ptr + CS_OFFS_OWNING_THREAD, current_tid); + mem.write_u32(cs_ptr + CS_OFFS_LOCK_COUNT, 0); // -1 → 0 on first lock + mem.write_u32(cs_ptr + CS_OFFS_RECURSION_COUNT, 1); + ctx.gpr[3] = 0; + return; + } + if owner == current_tid { + let lc = mem.read_u32(cs_ptr + CS_OFFS_LOCK_COUNT) as i32; + mem.write_u32(cs_ptr + CS_OFFS_LOCK_COUNT, (lc + 1) as u32); + let rc = mem.read_u32(cs_ptr + CS_OFFS_RECURSION_COUNT) as i32; + mem.write_u32(cs_ptr + CS_OFFS_RECURSION_COUNT, (rc + 1) as u32); + ctx.gpr[3] = 0; + return; + } + // Truly contended against a live peer — park. + let lc = mem.read_u32(cs_ptr + CS_OFFS_LOCK_COUNT) as i32; + mem.write_u32(cs_ptr + CS_OFFS_LOCK_COUNT, (lc + 1) as u32); + let current_ref = state.scheduler.current_ref(); + state + .cs_waiters + .entry(cs_ptr) + .or_default() + .push(current_ref); + tracing::debug!( + "rtl_enter_cs: hw={} park on cs={:#010x} owner_tid={}", + current_ref.hw_id, + cs_ptr, + owner + ); + ctx.gpr[3] = 0; + state + .scheduler + .park_current(BlockReason::CriticalSection(cs_ptr)); +} + +fn rtl_leave_critical_section( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + let cs_ptr = ctx.gpr[3] as u32; + if cs_ptr == 0 { + ctx.gpr[3] = 0; + return; + } + + let lc = mem.read_u32(cs_ptr + CS_OFFS_LOCK_COUNT) as i32; + let rc = mem.read_u32(cs_ptr + CS_OFFS_RECURSION_COUNT) as i32; + if rc > 1 { + // Still nested; decrement both counts and keep ownership. + mem.write_u32(cs_ptr + CS_OFFS_LOCK_COUNT, (lc - 1) as u32); + mem.write_u32(cs_ptr + CS_OFFS_RECURSION_COUNT, (rc - 1) as u32); + ctx.gpr[3] = 0; + return; + } + // Fully releasing — wake the next waiter (if any) and transfer ownership. + mem.write_u32(cs_ptr + CS_OFFS_LOCK_COUNT, (lc - 1) as u32); + mem.write_u32(cs_ptr + CS_OFFS_RECURSION_COUNT, 0); + mem.write_u32(cs_ptr + CS_OFFS_OWNING_THREAD, 0); + if let Some(queue) = state.cs_waiters.get_mut(&cs_ptr) + && !queue.is_empty() { + let next_ref = queue.remove(0); + // Find the woken thread's guest tid and hand it the lock. + let next_tid = state.scheduler.thread(next_ref).tid; + mem.write_u32(cs_ptr + CS_OFFS_OWNING_THREAD, next_tid); + mem.write_u32(cs_ptr + CS_OFFS_RECURSION_COUNT, 1); + state.scheduler.wake_ref(next_ref); + } + ctx.gpr[3] = 0; +} + +fn rtl_try_enter_critical_section( + ctx: &mut PpcContext, + mem: &GuestMemory, + _state: &mut KernelState, +) { + let cs_ptr = ctx.gpr[3] as u32; + if cs_ptr == 0 { + ctx.gpr[3] = 0; + return; + } + let current_tid = ctx.thread_id; + let owner = mem.read_u32(cs_ptr + CS_OFFS_OWNING_THREAD); + if owner == 0 { + mem.write_u32(cs_ptr + CS_OFFS_OWNING_THREAD, current_tid); + mem.write_u32(cs_ptr + CS_OFFS_LOCK_COUNT, 0); + mem.write_u32(cs_ptr + CS_OFFS_RECURSION_COUNT, 1); + ctx.gpr[3] = 1; + return; + } + if owner == current_tid { + let lc = mem.read_u32(cs_ptr + CS_OFFS_LOCK_COUNT) as i32; + mem.write_u32(cs_ptr + CS_OFFS_LOCK_COUNT, (lc + 1) as u32); + let rc = mem.read_u32(cs_ptr + CS_OFFS_RECURSION_COUNT) as i32; + mem.write_u32(cs_ptr + CS_OFFS_RECURSION_COUNT, (rc + 1) as u32); + ctx.gpr[3] = 1; + return; } ctx.gpr[3] = 0; } -fn rtl_leave_critical_section(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { - let cs_ptr = ctx.gpr[3] as u32; - if cs_ptr != 0 { - let lock_count = mem.read_u32(cs_ptr + 4) as i32; - mem.write_u32(cs_ptr + 4, (lock_count - 1) as u32); - let recursion = mem.read_u32(cs_ptr + 8) as i32; - mem.write_u32(cs_ptr + 8, (recursion - 1) as u32); - } - ctx.gpr[3] = 0; -} - -fn rtl_try_enter_critical_section(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { - // Always succeed in single-threaded mode - let cs_ptr = ctx.gpr[3] as u32; - if cs_ptr != 0 { - let lock_count = mem.read_u32(cs_ptr + 4) as i32; - mem.write_u32(cs_ptr + 4, (lock_count + 1) as u32); - let recursion = mem.read_u32(cs_ptr + 8) as i32; - mem.write_u32(cs_ptr + 8, (recursion + 1) as u32); - } - ctx.gpr[3] = 1; // TRUE -} - -fn rtl_init_ansi_string(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn rtl_init_ansi_string(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { let dest_ptr = ctx.gpr[3] as u32; let src_ptr = ctx.gpr[4] as u32; if src_ptr != 0 { @@ -531,7 +1779,7 @@ fn rtl_init_ansi_string(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mu } } -fn rtl_init_unicode_string(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn rtl_init_unicode_string(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { let dest_ptr = ctx.gpr[3] as u32; let src_ptr = ctx.gpr[4] as u32; if src_ptr != 0 { @@ -551,7 +1799,7 @@ fn rtl_init_unicode_string(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: } } -fn rtl_capture_context(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn rtl_capture_context(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = context_ptr — write CPU registers to CONTEXT structure let ptr = ctx.gpr[3] as u32; if ptr != 0 { @@ -562,7 +1810,7 @@ fn rtl_capture_context(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut } } -fn rtl_compare_memory_ulong(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn rtl_compare_memory_ulong(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = source, r4 = length, r5 = pattern let source = ctx.gpr[3] as u32; let length = ctx.gpr[4] as u32; @@ -579,7 +1827,7 @@ fn rtl_compare_memory_ulong(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: ctx.gpr[3] = matched as u64; } -fn rtl_fill_memory_ulong(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn rtl_fill_memory_ulong(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = destination, r4 = length, r5 = pattern let dest = ctx.gpr[3] as u32; let length = ctx.gpr[4] as u32; @@ -590,13 +1838,13 @@ fn rtl_fill_memory_ulong(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &m } } -fn rtl_image_xex_header_field(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn rtl_image_xex_header_field(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { // r3 = xex_header_ptr, r4 = field_id // Return 0 for all fields ctx.gpr[3] = 0; } -fn rtl_multi_byte_to_unicode_n(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn rtl_multi_byte_to_unicode_n(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = unicode_str, r4 = max_bytes_out, r5 = bytes_written_ptr // r6 = multi_byte_str, r7 = multi_byte_len let uni_ptr = ctx.gpr[3] as u32; @@ -617,7 +1865,7 @@ fn rtl_multi_byte_to_unicode_n(ctx: &mut PpcContext, mem: &mut GuestMemory, _sta ctx.gpr[3] = 0; } -fn rtl_nt_status_to_dos_error(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn rtl_nt_status_to_dos_error(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { // Simple mapping for common cases let status = ctx.gpr[3] as u32; ctx.gpr[3] = match status { @@ -628,17 +1876,224 @@ fn rtl_nt_status_to_dos_error(ctx: &mut PpcContext, _mem: &mut GuestMemory, _sta }; } -fn rtl_raise_exception(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { - tracing::warn!("RtlRaiseException: record_ptr={:#010x}", ctx.gpr[3]); - // Don't halt — just log and return +fn rtl_raise_exception(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // X_EXCEPTION_RECORD layout (big-endian, Xbox; mirrors + // xenia-canary/src/xenia/kernel/kernel.h:227-236, total 0x50 bytes): + // +0x00 DWORD ExceptionCode + // +0x04 DWORD ExceptionFlags + // +0x08 PVOID ExceptionRecord (chain) + // +0x0C PVOID ExceptionAddress + // +0x10 DWORD NumberParameters + // +0x14 ULONG_PTR ExceptionInformation[15] <-- info[0] starts here + // + // For MSVC C++ throws (code = 0xE06D7363) the parameter convention is: + // info[0] = magic (0x19930520) + // info[1] = thrown object pointer + // info[2] = ThrowInfo* (TI descriptor in .rdata) + let record_ptr = ctx.gpr[3] as u32; + if record_ptr == 0 { + tracing::warn!(tid = ctx.thread_id, "RtlRaiseException: null record"); + return; + } + let code = mem.read_u32(record_ptr); + let flags = mem.read_u32(record_ptr + 0x04); + let addr = mem.read_u32(record_ptr + 0x0C); + let nparams = mem.read_u32(record_ptr + 0x10); + let info0 = if nparams > 0 { mem.read_u32(record_ptr + 0x14) } else { 0 }; + let info1 = if nparams > 1 { mem.read_u32(record_ptr + 0x18) } else { 0 }; + let info2 = if nparams > 2 { mem.read_u32(record_ptr + 0x1C) } else { 0 }; + + tracing::warn!( + tid = ctx.thread_id, + record = format_args!("{record_ptr:#010x}"), + code = format_args!("{code:#010x}"), + flags = format_args!("{flags:#010x}"), + exception_addr = format_args!("{addr:#010x}"), + caller_lr = format_args!("{:#010x}", ctx.lr as u32), + nparams, + info0 = format_args!("{info0:#010x}"), + info1 = format_args!("{info1:#010x}"), + info2 = format_args!("{info2:#010x}"), + "RtlRaiseException (stubbed return)", + ); + + // One-shot deep diagnostic for MSVC C++ throws. Mirrors the latch + // pattern used elsewhere (see render.rs:693-707 first_dispatch_logged). + // Fires once per process start; subsequent throws still log the + // header line above but don't repeat the expensive stack walk + decode. + if code == 0xE06D_7363 && !state.cxx_throw_logged { + state.cxx_throw_logged = true; + + // Walk the PPC frame chain ~6 levels back from r1. + // PPC/EABI prologue: `mflr r12; stw r12, -8(r1); stwu r1, -F(r1)`. + // After prologue, [r1] = back-chain to old_r1, and the LR saved + // in *that* frame's prologue lives at [old_r1 - 8]. + // Walking up: prev_sp = mem.read_u32(sp); + // saved_lr_for_that_frame = mem.read_u32(prev_sp - 8); + // Level 0 is the live frame: its return address is in ctx.lr + // (no need to read the stack). + let mut frames: Vec<(u32, u32)> = Vec::with_capacity(8); + frames.push((ctx.gpr[1] as u32, ctx.lr as u32)); + let mut sp = ctx.gpr[1] as u32; + for _ in 0..6 { + if sp == 0 || sp == 0xFFFF_FFFF { break; } + let prev_sp = mem.read_u32(sp); + if prev_sp == 0 || prev_sp == sp || prev_sp == 0xFFFF_FFFF { + break; + } + let saved_lr = mem.read_u32(prev_sp.wrapping_sub(8)); + frames.push((prev_sp, saved_lr)); + sp = prev_sp; + } + for (i, (fp, lr)) in frames.iter().enumerate() { + tracing::warn!( + level = i, + frame_ptr = format_args!("{fp:#010x}"), + saved_lr = format_args!("{lr:#010x}"), + "cxx_throw stack frame", + ); + } + + // Extract lhs — the "not valid instance" pointer — from __CxxThrow wrapper's + // saved r30. sub_825F23D8 (__CxxThrow) does `std r30, -24(r1)` in its prologue + // where r1 = sub_82454770's current SP = frames[2].0 (L2 frame pointer). + // `std` is a 64-bit big-endian store; the 32-bit guest address is in the + // lower 4 bytes at [frames[2].0 - 24 + 4] = [frames[2].0 - 20]. + if frames.len() >= 3 { + let l2_fp = frames[2].0; + let lhs = mem.read_u32(l2_fp.wrapping_sub(20)); + tracing::warn!( + l2_fp = format_args!("{l2_fp:#010x}"), + lhs = format_args!("{lhs:#010x}"), + "cxx_throw lhs (not-registered instance)", + ); + + // Walk the instance registry BST at 0x828F3DA8 to show what IS registered. + // Layout: [+0..+27]=CriticalSection (28 bytes), [+28..+31]=some field, + // [+32]=sentinel heap ptr, [+36]=node count. + // Sentinel (heap-allocated): [+0]=left,[+4]=next,[+8]=right,[+12]=key,[+17]=is_valid(1). + // A real node has is_valid=0. + let registry_base = 0x828F3DA8_u32; + let sentinel_ptr = mem.read_u32(registry_base + 32); + let node_count = mem.read_u32(registry_base + 36); + tracing::warn!( + sentinel = format_args!("{sentinel_ptr:#010x}"), + node_count, + "cxx_throw registry state", + ); + if sentinel_ptr != 0 { + // Replicate validator sub_82454600's BST ceil search: + // Find min key >= lhs. If candidate_key == lhs → should be valid. + let root = mem.read_u32(sentinel_ptr.wrapping_add(4)); + let mut node = root; + let mut candidate = sentinel_ptr; // "no candidate" marker + let mut steps = 0_u32; + loop { + if mem.read_u8(node.wrapping_add(17)) != 0 { + break; // sentinel (is_valid != 0) + } + if steps >= 128 { + break; // guard against runaway + } + let key = mem.read_u32(node.wrapping_add(12)); + if key >= lhs { + candidate = node; + node = mem.read_u32(node); // go left (node[+0]) + } else { + node = mem.read_u32(node.wrapping_add(8)); // go right (node[+8]) + } + steps += 1; + } + let (candidate_key, candidate_is_sentinel) = if candidate != sentinel_ptr { + (mem.read_u32(candidate.wrapping_add(12)), false) + } else { + (0, true) + }; + tracing::warn!( + root = format_args!("{root:#010x}"), + root_key = format_args!("{:#010x}", mem.read_u32(root.wrapping_add(12))), + lhs = format_args!("{lhs:#010x}"), + candidate = format_args!("{candidate:#010x}"), + candidate_key = format_args!("{candidate_key:#010x}"), + candidate_is_sentinel, + steps, + match_found = (candidate_key == lhs && !candidate_is_sentinel), + "cxx_throw BST ceil search", + ); + } else { + tracing::warn!("cxx_throw registry: sentinel_ptr is null"); + } + } + + // Decode runtime_error::what() — verified layout via the + // destructor at sub_8216DBC0 (it does `addi r3, obj, 12` + // before calling the std::string destructor). MSVC layout + // for this CRT: + // +0x00 vtbl* + // +0x04 char* _Mywhat (lazy; set by what(); often 0 at throw) + // +0x08 uint8_t _Mydofree + // +0x0C std::string _Mystr { + // union _Bx { char _Buf[16]; char* _Ptr; } (+0x0C..+0x1C) + // size_t _Mysize (+0x1C) + // size_t _Myres (+0x20) capacity + // } + // SSO: when _Myres < 16, chars are inline at +0x0C; otherwise + // +0x0C is a heap char*. Log BOTH interpretations + raw + // _Mysize/_Myres so the right one is obvious from the values. + if info1 != 0 { + let mut sso_buf = [0u8; 16]; + mem.read_bytes(info1.wrapping_add(0x0C), &mut sso_buf); + let nul = sso_buf.iter().position(|&b| b == 0).unwrap_or(16); + let sso_msg = String::from_utf8_lossy(&sso_buf[..nul]).into_owned(); + + let heap_ptr = mem.read_u32(info1.wrapping_add(0x0C)); + let heap_msg = if heap_ptr != 0 + && heap_ptr != info1.wrapping_add(0x0C) + && (0x10000..0xC000_0000).contains(&heap_ptr) + { + read_cstring(mem, heap_ptr) + } else { + String::new() + }; + + let mysize = mem.read_u32(info1.wrapping_add(0x1C)); + let myres = mem.read_u32(info1.wrapping_add(0x20)); + let mywhat = mem.read_u32(info1.wrapping_add(0x04)); + let mywhat_str = if mywhat != 0 && (0x10000..0xC000_0000).contains(&mywhat) { + read_cstring(mem, mywhat) + } else { + String::new() + }; + + tracing::warn!( + obj = format_args!("{info1:#010x}"), + throwinfo = format_args!("{info2:#010x}"), + magic = format_args!("{info0:#010x}"), + mysize, + myres, + heap_ptr = format_args!("{heap_ptr:#010x}"), + mywhat_ptr = format_args!("{mywhat:#010x}"), + mywhat = %mywhat_str, + sso_msg = %sso_msg, + heap_msg = %heap_msg, + "cxx_throw runtime_error decoded", + ); + } + } + + // Keep the existing stub-return semantics: Canary's RtlRaiseException + // also returns rather than unwinds (xboxkrnl_debug.cc:131-151 — the + // TODO comment there reads "unwinding. This is going to suck."). + // The Canary-aligned path is to fix the upstream HLE that triggered + // the throw, not to implement SEH dispatch here. } -fn rtl_unwind(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn rtl_unwind(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { tracing::warn!("RtlUnwind: target_frame={:#010x}", ctx.gpr[3]); // Stub — in a real implementation this would walk the stack } -fn stub_sprintf(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn stub_sprintf(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { let dest = ctx.gpr[3] as u32; let fmt = ctx.gpr[4] as u32; if fmt != 0 && dest != 0 { @@ -655,7 +2110,7 @@ fn stub_sprintf(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut Kernel ctx.gpr[3] = 0; } -fn stub_vsnprintf(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn stub_vsnprintf(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = buffer, r4 = count, r5 = format, r6 = va_list let dest = ctx.gpr[3] as u32; let fmt = ctx.gpr[5] as u32; @@ -675,7 +2130,32 @@ fn stub_vsnprintf(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut Kern // ===== Video ===== -fn vd_query_video_mode(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +/// `VdGetCurrentDisplayGamma(type_ptr, power_ptr)` — matches Canary's +/// impl (xboxkrnl_video.cc:119). Writes the active gamma ramp kind and +/// its power exponent. Returning without writing leaves stack garbage for +/// the game to consume; Sylpheed's boot sequence branches on the type and, +/// with uninitialized bytes, takes the "unknown gamma → abort init" exit +/// path — `main()` then returns to the CRT entry and the title terminates +/// before the render loop starts. +fn vd_get_current_display_gamma( + ctx: &mut PpcContext, + mem: &GuestMemory, + _state: &mut KernelState, +) { + let type_ptr = ctx.gpr[3] as u32; + let power_ptr = ctx.gpr[4] as u32; + if type_ptr != 0 { + mem.write_u32(type_ptr, 2); // BT.709 / TV gamma — the Xbox 360 default + } + if power_ptr != 0 { + // float 2.22222 ≈ 0x4011C720, matches Canary's + // `kernel_display_gamma_power` cvar default. + mem.write_u32(power_ptr, 0x4011_C720); + } + ctx.gpr[3] = 0; +} + +fn vd_query_video_mode(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { let mode_ptr = ctx.gpr[3] as u32; if mode_ptr != 0 { mem.write_u32(mode_ptr, 1280); @@ -687,46 +2167,395 @@ fn vd_query_video_mode(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut ctx.gpr[3] = 0; } -fn vd_get_system_command_buffer(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { - // r3 = cmd_buffer_ptr_ptr, r4 = cmd_buffer_size_ptr - let buf_ptr_ptr = ctx.gpr[3] as u32; - let buf_size_ptr = ctx.gpr[4] as u32; - - if state.gpu_command_buffer == 0 { - // Allocate a 64KB command buffer - if let Some(addr) = state.heap_alloc(0x10000, mem) { - state.gpu_command_buffer = addr; +fn vd_get_system_command_buffer( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + // Matches `VdGetSystemCommandBuffer_entry` in + // `xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc:330-334`: + // void VdGetSystemCommandBuffer_entry(lpunknown_t p0_ptr, lpunknown_t p1_ptr) { + // p0_ptr.Zero(0x94); + // xe::store_and_swap(p0_ptr, 0xBEEF0000); + // xe::store_and_swap(p1_ptr, 0xBEEF0001); + // } + // Games pass two out-pointers; the first points at a 148-byte block they + // expect zeroed, and the first dword of each block is a "token" that + // xenia-canary hard-codes. The tokens aren't further dereferenced — they + // are later fed back to Vd* calls and checked for non-zero. + let p0_ptr = ctx.gpr[3] as u32; + let p1_ptr = ctx.gpr[4] as u32; + if p0_ptr != 0 { + for i in (0..0x94u32).step_by(4) { + mem.write_u32(p0_ptr + i, 0); } + mem.write_u32(p0_ptr, 0xBEEF_0000); } + if p1_ptr != 0 { + mem.write_u32(p1_ptr, 0xBEEF_0001); + } + state.gpu_command_buffer = p0_ptr; // kept for informational use in --ui HUD + ctx.gpr[3] = 0; +} - if buf_ptr_ptr != 0 { - mem.write_u32(buf_ptr_ptr, state.gpu_command_buffer); - } - if buf_size_ptr != 0 { - mem.write_u32(buf_size_ptr, 0x10000); +fn vd_is_hsio_training_succeeded(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { + ctx.gpr[3] = 1; // TRUE +} + +fn vd_initialize_ring_buffer(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { + // Matches `VdInitializeRingBuffer_entry` at + // `xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc:313-319`: + // r3 = ring buffer guest address (physical, WRITE_COMBINE) + // r4 = log2(size) in bytes + let ptr = ctx.gpr[3] as u32; + let size_log2 = ctx.gpr[4] as u32; + state.gpu.initialize_ring_buffer(ptr, size_log2); + ctx.gpr[3] = 0; +} + +fn vd_enable_ring_buffer_rptr_writeback( + ctx: &mut PpcContext, + _mem: &GuestMemory, + state: &mut KernelState, +) { + // Matches `VdEnableRingBufferRPtrWriteBack_entry` at + // `xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc:322-326`. + let ptr = ctx.gpr[3] as u32; + let block_log2 = ctx.gpr[4] as u32; + state.gpu.enable_rptr_writeback(ptr, block_log2); + ctx.gpr[3] = 0; +} + +fn vd_set_graphics_interrupt_callback( + ctx: &mut PpcContext, + _mem: &GuestMemory, + state: &mut KernelState, +) { + // r3 = callback, r4 = user_data. P6: store the callback so the synthetic + // v-sync ticker + PM4_INTERRUPT path can invoke it. Zero means "unregister". + let cb = ctx.gpr[3] as u32; + let user = ctx.gpr[4] as u32; + if cb == 0 { + state.interrupts.callback = None; + tracing::info!("VdSetGraphicsInterruptCallback: unregistered"); + } else { + state.interrupts.set_callback(cb, user); + tracing::info!( + "VdSetGraphicsInterruptCallback({:#010x}, {:#010x}) — callback armed", + cb, + user + ); } ctx.gpr[3] = 0; } -fn vd_is_hsio_training_succeeded(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { - ctx.gpr[3] = 1; // TRUE -} +fn vd_swap(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // Argument order from xenia-canary VdSwap_entry: + // r3 = buffer_ptr (slot the game reserved in the primary ring) + // r4 = fetch_ptr (6-dword D3D9 texture fetch header) + // r5 = unk2 (system writeback ptr — ignored here) + // r6 = unk3 (system cmd buf — ignored) + // r7 = unk4 (system cmd buf — ignored) + // r8 = frontbuffer_ptr (*u32, guest writes its virtual FB address) + // r9 = texture_format_ptr(*u32) + // r10 = color_space_ptr (*u32) + // stack[0] = width_ptr (*u32) — we decode from fetch instead + // stack[1] = height_ptr (*u32) — same + let buffer_ptr = ctx.gpr[3] as u32; + let fetch_ptr = ctx.gpr[4] as u32; + let frontbuffer_ptr = ctx.gpr[8] as u32; + let texture_format_ptr = ctx.gpr[9] as u32; + let color_space_ptr = ctx.gpr[10] as u32; -fn vd_swap(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { - tracing::info!("VdSwap (frame boundary)"); + // Decode the D3D9 texture fetch header — 6 dwords. The interesting bits + // are base_address (dword_1) and size_2d (dword_2). Mirrors + // xenia-canary/src/xenia/gpu/xenos.h xe_gpu_texture_fetch_t. + let mut fetch_dwords = [0u32; 6]; + if fetch_ptr != 0 { + for (i, slot) in fetch_dwords.iter_mut().enumerate() { + *slot = mem.read_u32(fetch_ptr + (i as u32) * 4); + } + } + // dword_1 bits 12:31 hold base_address shifted right by 12. + let frontbuffer_virt = (fetch_dwords[1] >> 12) << 12; + // dword_2: width in bits 0..12 (width-1), height in bits 13..25 (height-1). + // Fall back to the reported video mode when the fetch is empty. + let (width, height) = if fetch_dwords[2] != 0 { + let w = (fetch_dwords[2] & 0x1FFF) + 1; + let h = ((fetch_dwords[2] >> 13) & 0x1FFF) + 1; + (w, h) + } else { + (1280, 720) + }; + // The guest also writes the virtual frontbuffer address to *frontbuffer_ptr. + // Prefer that when the fetch-derived address is zero. + let frontbuffer_addr = if frontbuffer_virt != 0 { + frontbuffer_virt + } else if frontbuffer_ptr != 0 { + mem.read_u32(frontbuffer_ptr) + } else { + 0 + }; + let texture_format = if texture_format_ptr != 0 { + mem.read_u32(texture_format_ptr) + } else { + 0 + }; + let color_space = if color_space_ptr != 0 { + mem.read_u32(color_space_ptr) + } else { + 0 + }; + + // First-Pixels M2b — two-part commit path. + // + // 1) Fill the guest's reserved 64-dword slot with PM4 Type-2 NOPs + // (0x8000_0000). Some titles consume `buffer_ptr..+256` after + // VdSwap returns and assume they're skippable. Matches the prior + // behaviour. + if buffer_ptr != 0 { + for i in 0..64u32 { + mem.write_u32(buffer_ptr + i * 4, 0x8000_0000); + } + } + + // 2) Advance the ring's write pointer by 64 dwords (the slot the + // game "reserved" via VdSwap's buffer_ptr convention). Despite + // `buffer_ptr` being in the system command buffer rather than the + // primary ring, the 64-dword bump correctly exposes packets the + // game wrote into the primary ring since our last `sync_with_mmio`. + // Empirically (pre-M2b) this path drained 512 packets through 1 B + // guest instructions — the setup packets that D3D9-init writes. + // + // M1.5: bump routes through the shared MMIO atomic so both + // backends produce the same observable WPTR sequence. Inline + // picks it up on its next `sync_with_mmio`; threaded's worker + // observes the same atomic. + state.gpu.extend_write_ptr_by(64); + + // Drain the exposed packets — the D3D9-init setup the game batched + // into the ring plus any leftovers. The synthetic `PM4_XE_SWAP` + // packet the prior code wrote at `buffer_ptr` is **not** written + // anymore; the drain's `ring.base + rptr*4` walk couldn't find it + // anyway (see the pre-M2b `swaps=0 with packets=512` failure mode). + // + // M1.5: backend-aware drain. Inline: synchronous `sync_with_mmio + drain`. + // Threaded: posts `DrainFence` + blocks on reply (1 s defensive timeout + // on CPU; 900 ms internal deadline on worker). + let drained = state.gpu.drain_to_current_wptr(mem); + tracing::debug!(drained, "VdSwap: drained PM4 packets"); + + // 3) Fire the swap notification — bumps `swaps_seen`, records + // `last_swap`, enqueues an `InterruptSource::Swap` interrupt for + // the scheduler-round graphics callback path. M1.5: backend-aware; + // threaded sends `NotifyXeSwap` (fire-and-forget). + if frontbuffer_addr != 0 && width > 0 && height > 0 { + state.gpu.notify_xe_swap(frontbuffer_addr, width, height); + } + + // The remaining vd_swap work (UI publish: shader blobs, constants, + // texture cache, frontbuffer detile, ui.notify_swap) reads + // `state.gpu`'s internal state directly. In threaded mode that state + // lives on the worker thread; the UI bridge itself is `None` under + // `--gpu-thread` today (run_with_ui panics if both flags are set), so + // the early-return below is exact rather than a workaround. + let Some(gpu_inline) = state.gpu.as_inline_mut() else { + ctx.gpr[3] = 0; + return; + }; + + // Prefer the swap info the executor learned from PM4_XE_SWAP (that's + // the source of truth after draining). + let swap = gpu_inline.last_swap.unwrap_or(xenia_gpu::SwapNotification { + frame_index: gpu_inline.swap_counter, + frontbuffer_phys: frontbuffer_addr, + width, + height, + }); + + // P3b: publish the shader blob map + constants snapshot to the UI so + // the Xenos uber-shader has what it needs to execute captured draws. + // Do this before `notify_swap` so by the time the UI processes the + // SwapInfo the matching assets are visible through `UiHandles`. + if let Some(ref ui) = state.ui { + let blobs: std::collections::HashMap> = gpu_inline + .shader_blobs + .iter() + .map(|(k, b)| (*k, b.dwords.clone())) + .collect(); + let constants = xenia_gpu::xenos_constants::XenosConstantsBlock::snapshot( + &gpu_inline.register_file, + ); + ui.publish_assets(blobs, constants); + + // P5: try to decode the primary texture (fetch constant slot 0). + // Slot 0 is the convention most games use for their main bound + // texture at draw time; full N-slot binding waits for P6+. If the + // slot is unset or the format isn't supported (magenta stub kicks + // in host-side), we skip. + // + // Texture fetch constants live at `CONST_BASE_FETCH + slot*6` in + // the register file; we read the 6 dwords, decode the key, hit + // the CPU cache (with page-version freshness), and clone the + // decoded bytes across the bridge. + const TEX_SLOT: u32 = 0; + let mut fetch6 = [0u32; 6]; + for (i, slot) in fetch6.iter_mut().enumerate() { + *slot = gpu_inline + .register_file + .read(xenia_gpu::gpu_system::CONST_BASE_FETCH + TEX_SLOT * 6 + i as u32); + } + let published = if let Some(key) = xenia_gpu::texture_cache::decode_fetch_constant(fetch6) + { + // Span over the entire tiled texture footprint to pick the + // max page version covering it. + let bi = key.format.block_info(); + let span_bytes = (key.pitch_texels as u32) + * (key.height as u32) + * (bi.bytes_per_block as u32) + / (bi.block_w as u32); + let version = mem.max_page_version(key.base_address, span_bytes.max(4)); + match gpu_inline.texture_cache.ensure_cached(key, version, mem) { + Ok(entry) => Some((entry.key, entry.bytes.clone())), + Err(e) => { + metrics::counter!( + "gpu.texture.reject", + "reason" => format!("{:?}", e), + ) + .increment(1); + None + } + } + } else { + None + }; + metrics::gauge!("gpu.texture_cache.entries") + .set(gpu_inline.texture_cache.len() as f64); + ui.publish_texture(published); + } + // Notify the UI. + if let Some(ui) = state.ui.clone() { + let (last_prim, last_verts) = match gpu_inline.last_draw { + Some(ds) => { + // PrimitiveType variants without Display; encode as raw bits. + let code = match ds.primitive { + xenia_gpu::draw_state::PrimitiveType::None => 0, + xenia_gpu::draw_state::PrimitiveType::PointList => 1, + xenia_gpu::draw_state::PrimitiveType::LineList => 2, + xenia_gpu::draw_state::PrimitiveType::LineStrip => 3, + xenia_gpu::draw_state::PrimitiveType::TriangleList => 4, + xenia_gpu::draw_state::PrimitiveType::TriangleFan => 5, + xenia_gpu::draw_state::PrimitiveType::TriangleStrip => 6, + xenia_gpu::draw_state::PrimitiveType::RectangleList => 8, + xenia_gpu::draw_state::PrimitiveType::QuadList => 13, + xenia_gpu::draw_state::PrimitiveType::Unknown(x) => x as u32, + }; + (code, ds.vertex_count) + } + None => (0, 0), + }; + let instructions_total: u64 = state + .scheduler + .slots + .iter() + .flat_map(|slot| slot.runqueue.iter()) + .map(|t| t.ctx.cycle_count) + .sum(); + // P4: CPU-side detile of the guest frontbuffer. We treat the + // frontbuffer as a tiled k_8_8_8_8 image (the overwhelmingly + // common format games resolve to), read it out of guest memory, + // run it through `tiled_2d` / `detile_2d`, and hand the resulting + // linear RGBA8 bytes to the UI via a dedicated bridge closure. + // The UI upgrades the previous "no frontbuffer content" placeholder + // path to real game output. Failures (OOB reads, malformed fetch + // headers) silently skip the publish. + if swap.frontbuffer_phys != 0 && swap.width > 0 && swap.height > 0 { + let pitch_aligned = + xenia_gpu::tiled_address::align_pitch_to_macro_tile(swap.width); + let total_tiled_bytes = (pitch_aligned * swap.height * 4) as usize; + // The guest address is 32-bit virtual but in the physical heap; + // safer to cap the read at the known total size to avoid OOB. + let mut tiled = Vec::with_capacity(total_tiled_bytes); + let mut ok = true; + for i in 0..total_tiled_bytes { + // read_u8 is cheap — the VirtualMemory handler returns 0 + // for unmapped pages so we get a recognisable dark frame + // rather than a crash if the address turned out bogus. + let addr = swap.frontbuffer_phys.wrapping_add(i as u32); + tiled.push(mem.read_u8(addr)); + if addr < swap.frontbuffer_phys { + ok = false; + break; + } + } + if ok { + let mut linear = vec![0u8; (swap.width * swap.height * 4) as usize]; + if xenia_gpu::tiled_address::detile_2d( + &tiled, + &mut linear, + swap.width, + swap.height, + pitch_aligned, + 4, + ) + .is_ok() + { + ui.publish_frontbuffer(swap.width, swap.height, linear); + } + } + } + ui.notify_swap( + crate::ui_bridge::SwapInfo { + frontbuffer_addr: swap.frontbuffer_phys, + width: swap.width, + height: swap.height, + texture_format, + color_space, + frame_index: swap.frame_index, + draws_total: gpu_inline.stats.draws_seen, + packets_total: gpu_inline.stats.packets_executed, + last_draw_prim: last_prim, + last_draw_vertex_count: last_verts, + indirect_buffer_jumps: gpu_inline.stats.indirect_buffer_jumps, + wait_reg_mem_blocks: gpu_inline.stats.wait_reg_mem_blocks, + instructions_total, + vs_blob_key: gpu_inline.active_vs_key.unwrap_or(0), + ps_blob_key: gpu_inline.active_ps_key.unwrap_or(0), + resolves_total: gpu_inline.stats.resolves_total, + resolves_copied_total: gpu_inline.stats.resolves_copied_total, + resolves_skipped_total: gpu_inline.stats.resolves_skipped_total, + unique_render_targets: gpu_inline.stats.unique_render_targets, + interrupts_delivered: state.interrupts.delivered, + interrupts_dropped: state.interrupts.dropped, + }, + mem, + ); + } + tracing::info!( + frame = swap.frame_index, + fb = format_args!("{:#010x}", swap.frontbuffer_phys), + width = swap.width, + height = swap.height, + fmt = texture_format, + cs = color_space, + drained, + buffer_ptr = format_args!("{buffer_ptr:#010x}"), + fetch_ptr = format_args!("{fetch_ptr:#010x}"), + "VdSwap complete" + ); ctx.gpr[3] = 0; } // ===== Audio ===== -fn xaudio_register_render_driver(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut KernelState) { +fn xaudio_register_render_driver(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { let handle = state.alloc_handle(); tracing::info!("XAudioRegisterRenderDriverClient: handle={:#x}", handle); // r3 = callback_ptr, r4 = driver_ptr -> write handle ctx.gpr[3] = 0; } -fn xma_create_context(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut KernelState) { +fn xma_create_context(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { let handle = state.alloc_handle(); tracing::info!("XMACreateContext: handle={:#x}", handle); ctx.gpr[3] = handle as u64; @@ -734,19 +2563,1016 @@ fn xma_create_context(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut // ===== Xex ===== -fn xex_get_procedure_address(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn xex_get_procedure_address(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // Mirrors xenia-canary XexGetProcedureAddress_entry + // (xboxkrnl_modules.cc:195): r3 = hmodule, r4 = ordinal, + // r5 = lpdword_t out_function_ptr. Returns NTSTATUS in r3; on success + // writes the resolved thunk address to *out_function_ptr. + let hmodule = ctx.gpr[3] as u32; let ordinal = ctx.gpr[4] as u32; - tracing::warn!("XexGetProcedureAddress: ordinal={:#x} not found", ordinal); - ctx.gpr[3] = 0xC000_0034; // STATUS_OBJECT_NAME_NOT_FOUND + let out_ptr = ctx.gpr[5] as u32; + if out_ptr != 0 { + mem.write_u32(out_ptr, 0); + } + + let Some(module) = state.module_id_from_hmodule(hmodule) else { + tracing::warn!( + "XexGetProcedureAddress: unknown hmodule={:#x} ordinal={:#x}", + hmodule, + ordinal, + ); + ctx.gpr[3] = STATUS_INVALID_HANDLE; + return; + }; + match state.resolve_thunk(module, ordinal as u16) { + Some(addr) => { + if out_ptr != 0 { + mem.write_u32(out_ptr, addr); + } + ctx.gpr[3] = STATUS_SUCCESS; + } + None => { + tracing::warn!( + "XexGetProcedureAddress: ordinal {:#x} not registered for {:?}", + ordinal, + module, + ); + // STATUS_DRIVER_ENTRYPOINT_NOT_FOUND == 0xC000_0034. + ctx.gpr[3] = STATUS_OBJECT_NAME_NOT_FOUND; + } + } } // ===== Exception handling ===== -fn c_specific_handler(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn c_specific_handler(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { tracing::warn!("__C_specific_handler called (exception handling stub)"); ctx.gpr[3] = 1; // ExceptionContinueSearch } +// ===== Synchronization (events / semaphores / waits) ===== + +/// Is the handle currently signaled / acquirable? For events and semaphores +/// this tests the counting state; for thread handles it's true once the +/// thread has exited. +pub(crate) fn handle_signaled(state: &KernelState, handle: u32) -> bool { + match state.objects.get(&handle) { + Some(KernelObject::Event { signaled, .. }) => *signaled, + Some(KernelObject::Timer { signaled, .. }) => *signaled, + Some(KernelObject::Semaphore { count, .. }) => *count > 0, + Some(KernelObject::Thread { exit_code, .. }) => exit_code.is_some(), + _ => false, + } +} + +/// Refresh a PKEVENT/PKSEMAPHORE shadow from the guest's dispatcher +/// struct. Handle-keyed Nt objects (small integer keys) are managed +/// entirely by the kernel and don't need this — but pointer-keyed Ke +/// shadows can desync when the guest signals the dispatcher via a direct +/// memory write (e.g. Sylpheed's graphics-interrupt callback writes +/// `SignalState = 1` into its user_data struct instead of going through +/// `KeSetEvent`). Before a wait check, we re-load byte +4 and reconcile +/// the shadow's `signaled` / `count` with guest memory so the wait +/// reflects the current dispatcher state. +/// +/// Without this, tid=5's render-dispatcher poll loop on the Sylpheed +/// intro spun 4.5M times per 100M instructions with only 11K resolved +/// wakes — the callback was firing but the shadow stayed unsignaled, +/// so every wait deadlined to `STATUS_TIMEOUT` and the worker looped +/// without ever running its real render path. +fn refresh_pkevent_shadow_from_guest(state: &mut KernelState, mem: &GuestMemory, ptr: u32) { + if ptr < 0x1_0000 { + return; + } + let Some(obj) = state.objects.get_mut(&ptr) else { + return; + }; + let signal_state = mem.read_u32(ptr + 4); + match obj { + KernelObject::Event { signaled, .. } | KernelObject::Timer { signaled, .. } => { + if signal_state != 0 { + *signaled = true; + } + // Intentionally only pull the rising edge from guest + // memory. If the guest wrote 0 but the shadow says + // signaled=true because a `KeSetEvent` hasn't been + // consumed yet, we'd spuriously clear; leave clearing + // to `KeResetEvent` / auto-reset `handle_consume`. + } + KernelObject::Semaphore { count, .. } => { + let guest_count = signal_state as i32; + if guest_count > *count { + *count = guest_count; + } + } + _ => {} + } +} + +/// Consume one signal slot on a handle (auto-reset events, semaphore +/// decrement, mutex-ish). Assumes `handle_signaled` just returned true. +pub(crate) fn handle_consume(state: &mut KernelState, handle: u32) { + match state.objects.get_mut(&handle) { + Some(KernelObject::Event { + manual_reset, + signaled, + .. + }) + | Some(KernelObject::Timer { + manual_reset, + signaled, + .. + }) => { + if !*manual_reset { + *signaled = false; + } + } + Some(KernelObject::Semaphore { count, .. }) => { + if *count > 0 { + *count -= 1; + } + } + _ => {} + } +} + +/// Register a guest thread as a waiter on a handle (for later wake). +pub(crate) fn handle_enqueue_waiter(state: &mut KernelState, handle: u32, r: ThreadRef) { + match state.objects.get_mut(&handle) { + Some(KernelObject::Event { waiters, .. }) + | Some(KernelObject::Semaphore { waiters, .. }) + | Some(KernelObject::Thread { waiters, .. }) + | Some(KernelObject::Timer { waiters, .. }) + | Some(KernelObject::Mutex { waiters, .. }) => { + if !waiters.contains(&r) { + waiters.push(r); + } + } + _ => {} + } +} + +/// Remove a ThreadRef from every waiter list it might be on. Called on wake +/// so a thread woken on one of its WaitAny handles doesn't linger as a +/// waiter on the others. +pub(crate) fn handle_remove_waiter_everywhere(state: &mut KernelState, r: ThreadRef) { + for obj in state.objects.values_mut() { + if let Some(waiters) = obj.waiters_mut() { + waiters.retain(|&w| w != r); + } + } + for list in state.cs_waiters.values_mut() { + list.retain(|&w| w != r); + } +} + +/// Parse a PowerPC-style LARGE_INTEGER timeout pointer. +/// Returns `None` for "wait forever" (null pointer), `Some(0)` for +/// "poll / don't block" (timeout value 0), else `Some(abs_deadline)`. +/// Xbox 360 timeouts are signed 100-ns units; negative = relative. +/// We convert to an absolute deadline on the current thread's timebase. +pub(crate) fn parse_timeout(state: &KernelState, timeout_ptr: u32, mem: &GuestMemory) -> Option> { + if timeout_ptr == 0 { + return Some(None); // wait infinitely + } + let hi = mem.read_u32(timeout_ptr) as i32; + let lo = mem.read_u32(timeout_ptr + 4); + let raw = ((hi as i64) << 32) | (lo as i64 & 0xFFFF_FFFF); + if raw == 0 { + return Some(Some(0)); // poll + } + let hw_id = state.scheduler.current_hw_id().unwrap_or(0); + let now = state.scheduler.ctx(hw_id).timebase; + // Negative = relative, positive = absolute wall-clock. Our timebase is a + // plain instruction counter, so we treat all timeouts as "time-units + // after now" regardless of sign, using the magnitude. + let magnitude = raw.unsigned_abs(); + // Scale: 100-ns units → ~1 tick per ns is fine for emulation (games just + // want monotonic progress). Divide by 100 so multi-millisecond timeouts + // don't exceed u64 and wake quickly. + let deadline = now.saturating_add(magnitude.max(1) / 100); + Some(Some(deadline)) +} + +/// Resolve NT pseudo-handles to real kernel handles, matching Canary's +/// [`ObjectTable::TranslateHandle`](https://github.com/xenia-canary/xenia-canary/blob/canary/src/xenia/kernel/util/object_table.cc): +/// +/// * `0xFFFFFFFE` — `NtCurrentThread()` → the currently running thread's handle +/// * `0xFFFFFFFF` — `NtCurrentProcess()` → 0 (not meaningful in our HLE) +/// * anything else passes through untouched +/// +/// Every kernel function that accepts a handle argument should translate +/// first. Canary does this centrally in `LookupObject` — we don't have the +/// same chokepoint, so the pattern is "call this at the top of each Ob/Ke/Nt +/// entry point that consumes a handle". +/// +/// Without this, Sylpheed's worker-thread prologue calls +/// `ObReferenceObjectByHandle((HANDLE)-2, ...)` (= "get my own thread"), +/// gets `STATUS_INVALID_HANDLE`, and proceeds with a null "thread object +/// pointer" through `KeSetAffinityThread` — the worker then exits without +/// running its real body, leaving the main thread parked forever on the +/// completion event. +fn resolve_pseudo_handle(state: &KernelState, handle: u32) -> u32 { + match handle { + 0xFFFF_FFFF => 0, + 0xFFFF_FFFE => { + let hw_id = state.scheduler.current_hw_id().unwrap_or(0); + state.scheduler.thread_handle(hw_id).unwrap_or(0) + } + h => h, + } +} + +/// Lazily register a shadow kernel object for a guest `PKEVENT` / `PKSEMAPHORE` +/// pointer on first touch from a `Ke*` sync function. +/// +/// Background: on Xenon the `Nt*` family takes `HANDLE` integers (allocated +/// by us via `alloc_handle`), but the `Ke*` family takes pointers to +/// dispatcher structs in guest memory. `KeInitializeEvent` is an inline +/// helper baked into the game's code — it writes the DISPATCHER_HEADER in +/// place and we never see the call. As a result, when the game later calls +/// e.g. `KeSetEvent(&kevent)`, our handle-lookup misses and the operation +/// silently no-ops, leaving waiters parked forever. That was the root cause +/// of Sylpheed's 562K/50M `KeResetEvent` poll-loop on pointer `0x42450b5c`. +/// +/// We mint a shadow [`KernelObject`] in `state.objects` keyed by the guest +/// pointer (pointers live above the handle range — `next_handle` starts at +/// `0x1000` and bumps by 4, so collisions with a real handle are impossible +/// for any sane pointer). Subsequent Ke/Nt operations hit the shadow. +/// +/// Xenon DISPATCHER_HEADER layout (big-endian): +/// +0 Type (u8) 0=NotificationEvent, 1=SynchronizationEvent, +/// 5=Semaphore. Others unsupported (Mutant/Timer +/// paths fall back to the prior no-op behavior). +/// +1 Absolute (u8) +/// +2 Size (u8) in u32 words +/// +3 Inserted (u8) +/// +4 SignalState (i32) +/// +8 WaitListHead (2 × u32) LIST_ENTRY +/// For KSEMAPHORE, `Limit` (i32) follows at +0x10. +/// +/// Caveat: the shadow is authoritative once created. If the guest writes +/// directly into the dispatcher struct bypassing the kernel API, the shadow +/// drifts — but well-behaved NT code never does that. +fn ensure_dispatcher_object(state: &mut KernelState, mem: &GuestMemory, ptr: u32) { + // Pointer-vs-handle discriminator: our handles are small (<= low + // tens of thousands for any realistic session). Anything higher is + // almost certainly a guest pointer. Also bail if already registered. + if ptr < 0x1_0000 || state.objects.contains_key(&ptr) { + return; + } + let ty = mem.read_u8(ptr); + let signal_state = mem.read_u32(ptr + 4); + let obj = match ty { + 0 => KernelObject::Event { + manual_reset: true, + signaled: signal_state != 0, + waiters: Vec::new(), + }, + 1 => KernelObject::Event { + manual_reset: false, + signaled: signal_state != 0, + waiters: Vec::new(), + }, + 5 => { + let limit = mem.read_u32(ptr + 0x10) as i32; + KernelObject::Semaphore { + count: signal_state as i32, + max: limit.max(1), + waiters: Vec::new(), + } + } + // KTIMER DISPATCHER_HEADER: type=8 NotificationTimer (manual-reset), + // type=9 SynchronizationTimer (auto-reset). Mint a disarmed shadow — + // deadline/period live in KTIMER's extended fields (+0x20 onward) + // which we don't mirror; games that want the timer armed go through + // NtSetTimerEx / KeSetTimer (handle-based), and Sylpheed uses the + // handle path exclusively. + 8 | 9 => KernelObject::Timer { + manual_reset: ty == 8, + signaled: signal_state != 0, + deadline: None, + period_ticks: 0, + period_ms: 0, + callback_routine: 0, + callback_arg: 0, + waiters: Vec::new(), + }, + _ => return, + }; + state.objects.insert(ptr, obj); +} + +/// Set `gpr[3]` on a just-woken HW thread to reflect which handle in its +/// wait set was the one that fired. Canary's `WaitMultiple` returns +/// `STATUS_WAIT_0 + index` on WaitAny success; games branch on it. The +/// default pre-populated status is `STATUS_SUCCESS` (== WAIT_0), which only +/// matches when the first handle is the signaling one — anything else +/// looks like a spurious index-0 wake to the caller. +fn set_wake_status_for_waitany(state: &mut KernelState, r: ThreadRef, signaled_handle: u32) { + use xenia_cpu::scheduler::{BlockReason, HwState}; + let Some(t) = state.scheduler.try_thread_mut(r) else { + return; + }; + let idx = match &t.state { + HwState::Blocked(BlockReason::WaitAny { handles, .. }) + | HwState::ServicingIrq(BlockReason::WaitAny { handles, .. }) => { + handles.iter().position(|&h| h == signaled_handle) + } + _ => None, + }; + if let Some(i) = idx { + t.ctx.gpr[3] = i as u64; + } +} + +/// Wake all waiters whose predicate now holds on the given handle (manual +/// reset fans out; auto-reset/semaphore wakes one and consumes). +pub(crate) fn wake_eligible_waiters(state: &mut KernelState, handle: u32) { + loop { + let Some(obj) = state.objects.get_mut(&handle) else { + return; + }; + let (manual_reset, should_signal, consume) = match obj { + KernelObject::Event { + manual_reset, + signaled, + waiters, + } + | KernelObject::Timer { + manual_reset, + signaled, + waiters, + .. + } => { + if *signaled && !waiters.is_empty() { + (*manual_reset, true, !*manual_reset) + } else { + return; + } + } + KernelObject::Semaphore { + count, waiters, .. + } => { + if *count > 0 && !waiters.is_empty() { + (false, true, true) + } else { + return; + } + } + KernelObject::Thread { + exit_code, waiters, .. + } => { + if exit_code.is_some() && !waiters.is_empty() { + (true, true, false) + } else { + return; + } + } + _ => return, + }; + if !should_signal { + return; + } + let winner = match obj { + KernelObject::Event { waiters, .. } + | KernelObject::Timer { waiters, .. } + | KernelObject::Semaphore { waiters, .. } + | KernelObject::Thread { waiters, .. } => { + if manual_reset { + // Take the whole queue at once; manual-reset fires once + // and stays signaled so every parked waiter clears. + let list = std::mem::take(waiters); + for w in list { + set_wake_status_for_waitany(state, w, handle); + state.scheduler.wake_ref(w); + handle_remove_waiter_everywhere(state, w); + // scheduler.wake_ref also loses timed-waits entry + if state.audit.enabled { + // Record one wake per thread woken. `aux` carries + // the resolved status (gpr[3]) we just set. + let status = state.scheduler.thread(w).ctx.gpr[3]; + state.audit_wake(handle, 0, "wake_eligible_waiters/manual", status); + } + } + return; + } else { + waiters.remove(0) + } + } + _ => return, + }; + if consume { + handle_consume(state, handle); + } + set_wake_status_for_waitany(state, winner, handle); + state.scheduler.wake_ref(winner); + handle_remove_waiter_everywhere(state, winner); + if state.audit.enabled { + let status = state.scheduler.thread(winner).ctx.gpr[3]; + state.audit_wake(handle, 0, "wake_eligible_waiters/auto", status); + } + // continue loop for semaphores that may wake more + } +} + +fn ke_set_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = PKEVENT on Ke* (guest pointer). See `ensure_dispatcher_object` + // for why we need the lazy-shadow step here. + let h = ctx.gpr[3] as u32; + ensure_dispatcher_object(state, mem, h); + let previous = match state.objects.get_mut(&h) { + Some(KernelObject::Event { signaled, .. }) => { + let prev = *signaled; + *signaled = true; + prev as u32 + } + _ => 0, + }; + state.audit_signal(h, ctx.lr as u32, "KeSetEvent", previous as u64); + wake_eligible_waiters(state, h); + ctx.gpr[3] = previous as u64; +} + +fn ke_reset_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + let h = ctx.gpr[3] as u32; + ensure_dispatcher_object(state, mem, h); + let previous = match state.objects.get_mut(&h) { + Some(KernelObject::Event { signaled, .. }) => { + let prev = *signaled; + *signaled = false; + prev as u32 + } + _ => 0, + }; + ctx.gpr[3] = previous as u64; +} + +fn nt_set_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + let handle = ctx.gpr[3] as u32; + let prev_ptr = ctx.gpr[4] as u32; + let previous = match state.objects.get_mut(&handle) { + Some(KernelObject::Event { signaled, .. }) => { + let prev = *signaled; + *signaled = true; + prev as u32 + } + _ => 0, + }; + state.audit_signal(handle, ctx.lr as u32, "NtSetEvent", previous as u64); + wake_eligible_waiters(state, handle); + if prev_ptr != 0 { + mem.write_u32(prev_ptr, previous); + } + ctx.gpr[3] = STATUS_SUCCESS; +} + +fn nt_clear_event(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { + let handle = ctx.gpr[3] as u32; + if let Some(KernelObject::Event { signaled, .. }) = state.objects.get_mut(&handle) { + *signaled = false; + } + ctx.gpr[3] = STATUS_SUCCESS; +} + +/// Pulse an event: wake current waiters as if signaled, then leave the event +/// in the non-signaled state. For manual-reset events this wakes *all* +/// parked waiters at once; for auto-reset events it wakes at most one (the +/// first in the FIFO) and implicitly consumes the pulse. +/// +/// Canary impl: [xboxkrnl_threading.cc::KePulseEvent_entry](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc) +/// → [xevent.cc::XEvent::Pulse](xenia-canary/src/xenia/kernel/xevent.cc). +fn pulse_event_on_object(state: &mut KernelState, key: u32) -> u32 { + // Capture previous state; then temporarily mark the event signaled so + // `wake_eligible_waiters` does the right wake-all vs wake-one split. + let previous = match state.objects.get_mut(&key) { + Some(KernelObject::Event { signaled, .. }) => { + let prev = *signaled; + *signaled = true; + prev as u32 + } + _ => return 0, + }; + wake_eligible_waiters(state, key); + // Pulse leaves the event non-signaled regardless of type — manual-reset + // would otherwise stay latched after `wake_eligible_waiters`, and auto- + // reset with no waiters would linger signaled until the first wait. + if let Some(KernelObject::Event { signaled, .. }) = state.objects.get_mut(&key) { + *signaled = false; + } + previous +} + +fn ke_pulse_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = PKEVENT (guest pointer), r4 = increment, r5 = wait (ignored). + let h = ctx.gpr[3] as u32; + ensure_dispatcher_object(state, mem, h); + let previous = pulse_event_on_object(state, h); + state.audit_signal(h, ctx.lr as u32, "KePulseEvent", previous as u64); + ctx.gpr[3] = previous as u64; +} + +fn nt_pulse_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = handle, r4 = previous_state_ptr (optional). + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let prev_ptr = ctx.gpr[4] as u32; + if !state.objects.contains_key(&handle) { + ctx.gpr[3] = STATUS_INVALID_HANDLE; + return; + } + let previous = pulse_event_on_object(state, handle); + state.audit_signal(handle, ctx.lr as u32, "NtPulseEvent", previous as u64); + if prev_ptr != 0 { + mem.write_u32(prev_ptr, previous); + } + ctx.gpr[3] = STATUS_SUCCESS; +} + +/// Attempt `*count += adjust` with the cap at `max`. Returns `(previous, +/// updated)` where `updated == false` means the adjustment would have +/// exceeded `max` (or overflowed `i32`) and the count was left untouched. +fn try_release_semaphore(count: &mut i32, max: i32, adjust: i32) -> (i32, bool) { + let prev = *count; + match count.checked_add(adjust) { + Some(new) if new <= max => { + *count = new; + (prev, true) + } + _ => (prev, false), + } +} + +fn ke_release_semaphore(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = PKSEMAPHORE, r4 = adjustment. Ke-form returns the previous + // count directly (never a status); if the release would exceed + // `Limit` the count silently stays put — Canary `xeKeReleaseSemaphore` + // at xboxkrnl_threading.cc:707-722 marks the success return of + // `ReleaseSemaphore` `[[maybe_unused]]`. + let h = ctx.gpr[3] as u32; + ensure_dispatcher_object(state, mem, h); + let adjust = ctx.gpr[4] as i32; + let previous = match state.objects.get_mut(&h) { + Some(KernelObject::Semaphore { count, max, .. }) => { + let (prev, _updated) = try_release_semaphore(count, *max, adjust); + prev + } + _ => 0, + }; + state.audit_signal(h, ctx.lr as u32, "KeReleaseSemaphore", previous as u64); + wake_eligible_waiters(state, h); + ctx.gpr[3] = previous as u64; +} + +fn nt_release_semaphore(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = handle, r4 = release_count, r5 = previous_count* (optional). + // Canary `NtReleaseSemaphore_entry` (xboxkrnl_threading.cc:771-797) + // returns `X_STATUS_SEMAPHORE_LIMIT_EXCEEDED` (0xC000_0047) when the + // post-release count would exceed `Limit`, AND does NOT update the + // count in that case. `previous_count` is written regardless. + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let release = ctx.gpr[4] as i32; + let prev_ptr = ctx.gpr[5] as u32; + let (previous, status) = match state.objects.get_mut(&handle) { + Some(KernelObject::Semaphore { count, max, .. }) => { + let (prev, updated) = try_release_semaphore(count, *max, release); + if updated { + (prev, STATUS_SUCCESS) + } else { + (prev, STATUS_SEMAPHORE_LIMIT_EXCEEDED) + } + } + Some(_) | None => { + ctx.gpr[3] = STATUS_INVALID_HANDLE; + return; + } + }; + state.audit_signal(handle, ctx.lr as u32, "NtReleaseSemaphore", previous as u64); + if status == STATUS_SUCCESS { + wake_eligible_waiters(state, handle); + } + if prev_ptr != 0 { + mem.write_u32(prev_ptr, previous as u32); + } + ctx.gpr[3] = status; +} + +/// Single-handle wait with timeout. If the handle is already signaled, consume +/// and return success. Otherwise park the current HW thread and set ctx.gpr[3] +/// to STATUS_SUCCESS — when a waker arrives the thread resumes at its caller's +/// return address with success already in r3. Timeout=0 never parks. +fn do_wait_single(ctx: &mut PpcContext, state: &mut KernelState, handle: u32, timeout_ptr: u32, mem: &GuestMemory) { + state.audit_wait(handle, ctx.lr as u32, "do_wait_single", 0); + if handle_signaled(state, handle) { + handle_consume(state, handle); + ctx.gpr[3] = STATUS_SUCCESS; + return; + } + let deadline_opt = parse_timeout(state, timeout_ptr, mem); + let deadline = match deadline_opt { + Some(Some(0)) => { + ctx.gpr[3] = STATUS_TIMEOUT; + return; + } + Some(Some(d)) => Some(d), + Some(None) => None, + None => None, + }; + let current_ref = state.scheduler.current_ref(); + handle_enqueue_waiter(state, handle, current_ref); + tracing::debug!( + "wait_single: hw={} handle={:#x} park{}", + current_ref.hw_id, + handle, + match deadline { + Some(d) => format!(" until_tick={}", d), + None => " forever".into(), + } + ); + // Pre-populate the return code — most wakes resolve as STATUS_SUCCESS; + // timeouts overwrite via the scheduler's deadline-wake path. + ctx.gpr[3] = STATUS_SUCCESS; + state.scheduler.park_current(BlockReason::WaitAny { + handles: vec![handle], + deadline, + }); +} + +/// Multi-handle wait. `wait_type` 0 = WaitAll, 1 = WaitAny (NT convention). +fn do_wait_multiple( + ctx: &mut PpcContext, + state: &mut KernelState, + handles: Vec, + wait_all: bool, + timeout_ptr: u32, + mem: &GuestMemory, +) { + if state.audit.enabled { + // Pack (wait_all flag) | (handle_count << 1) into aux for the trail. + let aux = (wait_all as u64) | ((handles.len() as u64) << 1); + for &h in &handles { + state.audit_wait(h, ctx.lr as u32, "do_wait_multiple", aux); + } + } + let already_ok = if wait_all { + handles.iter().all(|&h| handle_signaled(state, h)) + } else { + handles.iter().any(|&h| handle_signaled(state, h)) + }; + if already_ok { + // Canary's `XObject::WaitMultiple` returns the **index** of the + // first-signaled handle for WaitAny (`STATUS_WAIT_0 + n`), not + // plain `STATUS_SUCCESS`. `STATUS_WAIT_0` is numerically 0, so + // index 0 still looks like success, but index 1+ matters: games + // commonly dispatch on the index. Sylpheed's worker prologue does + // `wait_any([start_event, work_sem])` and branches on the result: + // 0 means "start-event fired" (cleanup/exit), 1 means "sem fired" + // (run user proc then signal completion). Returning 0 for a sem + // wake made the worker always take the cleanup branch and exit + // without ever signaling the completion event. + if wait_all { + for &h in &handles { + handle_consume(state, h); + } + ctx.gpr[3] = STATUS_SUCCESS; + } else if let Some((idx, &h)) = handles + .iter() + .enumerate() + .find(|&(_, &h)| handle_signaled(state, h)) + { + handle_consume(state, h); + ctx.gpr[3] = idx as u64; // STATUS_WAIT_0 + idx + } else { + ctx.gpr[3] = STATUS_SUCCESS; + } + return; + } + let deadline_opt = parse_timeout(state, timeout_ptr, mem); + let deadline = match deadline_opt { + Some(Some(0)) => { + ctx.gpr[3] = STATUS_TIMEOUT; + return; + } + Some(Some(d)) => Some(d), + Some(None) => None, + None => None, + }; + let current_ref = state.scheduler.current_ref(); + for &h in &handles { + handle_enqueue_waiter(state, h, current_ref); + } + ctx.gpr[3] = STATUS_SUCCESS; + let reason = if wait_all { + BlockReason::WaitAll { + handles: handles.clone(), + deadline, + } + } else { + BlockReason::WaitAny { handles, deadline } + }; + state.scheduler.park_current(reason); +} + +fn nt_wait_for_single_object_ex( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + // r3 = handle, r4 = wait_mode, r5 = alertable, r6 = timeout_ptr + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let timeout_ptr = ctx.gpr[6] as u32; + do_wait_single(ctx, state, handle, timeout_ptr, mem); +} + +/// `NtSignalAndWaitForSingleObjectEx(signal_handle, wait_handle, wait_mode, +/// alertable, timeout_ptr)` — atomically signal one kernel object and wait on +/// another. Matches Canary's `NtSignalAndWaitForSingleObjectEx_entry` +/// (xboxkrnl_threading.cc:1103). Common producer/consumer handshake primitive: +/// producer calls `NSAWFSO(work_done, work_free)` so the consumer's wait +/// resolves at the same instant the producer starts waiting for the next +/// bucket. +/// +/// Before this export existed games that relied on the primitive saw the +/// call surface as `unimplemented kernel export`, their threads proceeded +/// without the signal being fired, and the paired consumer-thread wait +/// would block indefinitely. Sylpheed's I/O dispatcher uses this for its +/// async file-query completion signaling. +fn nt_signal_and_wait_for_single_object_ex( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + // r3 = signal_handle, r4 = wait_handle, r5 = wait_mode, r6 = alertable, r7 = timeout_ptr + let signal_handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let wait_handle = resolve_pseudo_handle(state, ctx.gpr[4] as u32); + let timeout_ptr = ctx.gpr[7] as u32; + + // Signal phase — mirror `nt_set_event` for Event handles; if the + // handle is unknown we return `STATUS_INVALID_HANDLE` without waiting, + // matching Canary's "lookup both, fail fast if either missing" guard. + let signal_prev: u64 = match state.objects.get_mut(&signal_handle) { + Some(KernelObject::Event { signaled, .. }) => { + let was = *signaled; + *signaled = true; + was as u64 + } + Some(KernelObject::Semaphore { count, .. }) => { + let was = *count as u64; + *count = count.saturating_add(1); + was + } + _ => { + ctx.gpr[3] = STATUS_INVALID_HANDLE; + return; + } + }; + state.audit_signal( + signal_handle, + ctx.lr as u32, + "NtSignalAndWaitForSingleObjectEx", + signal_prev, + ); + wake_eligible_waiters(state, signal_handle); + + // Then fall into the normal single-wait path on wait_handle. + do_wait_single(ctx, state, wait_handle, timeout_ptr, mem); +} + +fn ke_wait_for_single_object( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + // r3 = PKEVENT (guest pointer), r4 = wait_reason, r5 = wait_mode, + // r6 = alertable, r7 = timeout_ptr + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + ensure_dispatcher_object(state, mem, handle); + refresh_pkevent_shadow_from_guest(state, mem, handle); + let timeout_ptr = ctx.gpr[7] as u32; + do_wait_single(ctx, state, handle, timeout_ptr, mem); +} + +fn nt_wait_for_multiple_objects_ex( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + // r3 = count, r4 = handles_ptr, r5 = wait_type (0=All, 1=Any), + // r6 = wait_mode, r7 = alertable, r8 = timeout_ptr + let count = ctx.gpr[3] as u32; + let handles_ptr = ctx.gpr[4] as u32; + let wait_type = ctx.gpr[5] as u32; + let timeout_ptr = ctx.gpr[8] as u32; + let handles: Vec = (0..count) + .map(|i| resolve_pseudo_handle(state, mem.read_u32(handles_ptr + i * 4))) + .collect(); + let wait_all = wait_type == 0; + do_wait_multiple(ctx, state, handles, wait_all, timeout_ptr, mem); +} + +fn ke_wait_for_multiple_objects( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + // r3 = count, r4 = objects_ptr (array of PKEVENT/PKSEMAPHORE pointers), + // r5 = wait_type, r6 = wait_reason, r7 = wait_mode, r8 = alertable, + // r9 = timeout_ptr, r10 = wait_blocks (ignored) + let count = ctx.gpr[3] as u32; + let handles_ptr = ctx.gpr[4] as u32; + let wait_type = ctx.gpr[5] as u32; + let timeout_ptr = ctx.gpr[9] as u32; + let handles: Vec = (0..count) + .map(|i| resolve_pseudo_handle(state, mem.read_u32(handles_ptr + i * 4))) + .collect(); + for &h in &handles { + ensure_dispatcher_object(state, mem, h); + refresh_pkevent_shadow_from_guest(state, mem, h); + } + let wait_all = wait_type == 0; + do_wait_multiple(ctx, state, handles, wait_all, timeout_ptr, mem); +} + +fn ke_delay_execution_thread( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + // r3 = wait_mode, r4 = alertable, r5 = interval_ptr (LARGE_INTEGER 100-ns) + let interval_ptr = ctx.gpr[5] as u32; + let deadline_opt = parse_timeout(state, interval_ptr, mem); + let deadline = match deadline_opt { + Some(Some(0)) => { + // Yield-like — return immediately. + ctx.gpr[3] = STATUS_SUCCESS; + return; + } + Some(Some(d)) => d, + Some(None) => u64::MAX, // KeDelayExecution with NULL interval = sleep forever (unusual) + None => u64::MAX, + }; + ctx.gpr[3] = STATUS_SUCCESS; + state + .scheduler + .park_current(BlockReason::DelayUntil(deadline)); +} + +fn nt_yield_execution(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { + // The next round of the scheduler already hands control to another HW + // thread, so we don't need to park. Just return success. + ctx.gpr[3] = STATUS_SUCCESS; +} + +fn ke_resume_thread(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { + // r3 = thread_ptr (KTHREAD). We don't track KTHREAD ↔ HW mapping through + // guest memory addresses, so accept and succeed. Real NtResumeThread + // below handles the handle-based path properly. + ctx.gpr[3] = 0; + let _ = state; +} + +fn nt_resume_thread(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = handle, r4 = prev_suspend_count_ptr + let handle = ctx.gpr[3] as u32; + let prev_ptr = ctx.gpr[4] as u32; + let prev = state + .scheduler + .find_by_handle(handle) + .map(|r| state.scheduler.resume_ref(r)) + .unwrap_or(0); + if prev_ptr != 0 { + mem.write_u32(prev_ptr, prev); + } + ctx.gpr[3] = STATUS_SUCCESS; +} + +fn nt_suspend_thread(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = handle, r4 = prev_suspend_count_ptr + let handle = ctx.gpr[3] as u32; + let prev_ptr = ctx.gpr[4] as u32; + let prev = state + .scheduler + .find_by_handle(handle) + .map(|r| state.scheduler.suspend_ref(r)) + .unwrap_or(0); + if prev_ptr != 0 { + mem.write_u32(prev_ptr, prev); + } + ctx.gpr[3] = STATUS_SUCCESS; +} + +// ===== Object & module lookup ===== + +fn xex_get_module_handle(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // Mirrors xenia-canary XexGetModuleHandle_entry + // (xboxkrnl_modules.cc:42): r3 = lpstring_t module_name, + // r4 = lpdword_t hmodule_ptr. Returns NTSTATUS in r3; writes the + // resolved handle to *hmodule_ptr. `X_ERROR_NOT_FOUND` for unknown + // names. Distinct pseudo-handles for kernel modules so a follow-up + // `XexGetProcedureAddress` can route to the right ordinal table. + let name_ptr = ctx.gpr[3] as u32; + let out_ptr = ctx.gpr[4] as u32; + if out_ptr != 0 { + mem.write_u32(out_ptr, 0); + } + + let resolved: Option = if name_ptr == 0 { + Some(state.image_base) + } else { + let name = read_cstring(mem, name_ptr); + if name.is_empty() || name.eq_ignore_ascii_case("default.xex") { + Some(state.image_base) + } else if name.eq_ignore_ascii_case("xboxkrnl.exe") { + Some(crate::state::HMODULE_XBOXKRNL) + } else if name.eq_ignore_ascii_case("xam.xex") { + Some(crate::state::HMODULE_XAM) + } else { + None + } + }; + + match resolved { + Some(h) => { + if out_ptr != 0 { + mem.write_u32(out_ptr, h); + } + ctx.gpr[3] = STATUS_SUCCESS; + } + None => ctx.gpr[3] = X_ERROR_NOT_FOUND, + } +} + +/// `NtDuplicateObject(handle, new_handle_ptr, options)` — per Canary's +/// `NtDuplicateObject_entry`: +/// * r3 = source handle (pseudo-handles like `(HANDLE)-2` are common — the +/// Canary comment explicitly notes "this function seems to be used to get +/// the current thread handle") +/// * r4 = new_handle_ptr (if zero, the call is actually a close) +/// * r5 = options (bit 0 = DUPLICATE_CLOSE_SOURCE) +/// +/// Canary allocates a fresh handle id that refcounts the same underlying +/// `XObject`. We don't refcount, so we alias: write the *source* handle back +/// as the "new" handle. The game then uses it interchangeably, and both ids +/// resolve to the same `KernelObject` entry. +/// +/// A prior `stub_success` left `*new_handle_ptr` uninitialized — Sylpheed's +/// thread-dispatch prologue does `NtDuplicateObject(event, &dup)` then passes +/// `dup` to the worker, and the worker does `NtSetEvent(dup)` to signal +/// completion. With the stub, `dup` was stack garbage → set-event lookup +/// failed silently → main thread blocked forever on the source event. +fn nt_duplicate_object(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + let source = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let out_ptr = ctx.gpr[4] as u32; + let options = ctx.gpr[5] as u32; + + if !state.objects.contains_key(&source) { + if out_ptr != 0 { + mem.write_u32(out_ptr, 0); + } + ctx.gpr[3] = STATUS_INVALID_HANDLE; + return; + } + if out_ptr != 0 { + mem.write_u32(out_ptr, source); + } + // Aliased-handle refcount: since we return the source handle as the "new" + // handle (no fresh id), every duplicate must bump the per-handle refcount + // so the later `NtClose` pair (one for source, one for dup) doesn't + // destroy the object mid-flight. `DUPLICATE_CLOSE_SOURCE` (bit 0) closes + // the source in Canary (xboxkrnl_ob.cc:389), so in our aliased model the + // source-close cancels the dup-gain: net refcount is unchanged. Without + // `CLOSE_SOURCE`, both the source and the dup are separately live and we + // need +1. + const DUPLICATE_CLOSE_SOURCE: u32 = 0x0000_0001; + if options & DUPLICATE_CLOSE_SOURCE == 0 + && let Some(c) = state.handle_refcount.get_mut(&source) + { + *c += 1; + } + ctx.gpr[3] = STATUS_SUCCESS; +} + +fn ob_reference_object_by_handle(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = handle, r4 = object_type, r5 = out_object_ptr + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let out_ptr = ctx.gpr[5] as u32; + if handle == 0 || !state.objects.contains_key(&handle) { + ctx.gpr[3] = STATUS_INVALID_HANDLE; + if out_ptr != 0 { + mem.write_u32(out_ptr, 0); + } + return; + } + if out_ptr != 0 { + // We don't maintain real KTHREAD/KEVENT structs in guest memory, so + // pass back the handle as a stable cookie — downstream Ke* calls + // that take a "thread pointer" (e.g. KeSetAffinityThread) then look + // up the same handle via `state.objects`. Matches Canary semantics + // for our HLE without requiring a host-visible object-struct backing. + mem.write_u32(out_ptr, handle); + } + ctx.gpr[3] = STATUS_SUCCESS; +} + // ===== Helpers ===== fn read_cstring(mem: &GuestMemory, addr: u32) -> String { @@ -761,3 +3587,1525 @@ fn read_cstring(mem: &GuestMemory, addr: u32) -> String { } s } + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + use xenia_memory::page_table::MemoryProtect; + + /// Scratch region the nt_read_file/nt_write_file tests write into + /// (iosb + buffer). A single committed page is plenty. + const SCRATCH_BASE: u32 = 0x4000_0000; + + fn fresh() -> (PpcContext, GuestMemory, KernelState) { + let mut mem = GuestMemory::new().expect("memory init"); + mem.alloc(SCRATCH_BASE, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE) + .expect("scratch page must commit"); + let mut state = KernelState::new(); + // Under per-slot runqueues, most kernel exports reach through + // `scheduler.current` — tests that exercise those paths need a + // live thread installed on slot 0 first. Older tests (file I/O + // etc.) don't touch it and are unaffected. + state.install_initial_thread( + PpcContext::default(), + 0x7000_0000, + 0x10_0000, + SCRATCH_BASE + 0x800, + SCRATCH_BASE + 0xC00, + 0x1000, + &mut mem, + ); + state.scheduler.begin_slot_visit(0); + (PpcContext::default(), mem, state) + } + + fn make_file(state: &mut KernelState, bytes: Vec) -> u32 { + let size = bytes.len() as u64; + state.alloc_handle_for(KernelObject::File { + path: "test.bin".to_string(), + size, + position: 0, + data: Arc::new(bytes), + dir_enum_pos: None, + }) + } + + fn make_event(state: &mut KernelState) -> u32 { + state.alloc_handle_for(KernelObject::Event { + manual_reset: true, + signaled: false, + waiters: Vec::new(), + }) + } + + fn event_signaled(state: &KernelState, h: u32) -> bool { + match state.objects.get(&h) { + Some(KernelObject::Event { signaled, .. }) => *signaled, + _ => panic!("expected Event at handle {:#x}", h), + } + } + + /// Axis 4: `KeSetAffinityThread` actually migrates between slots + /// now. Spawn a secondary thread with affinity 0x02 (slot 1 only), + /// then call the export to move it to slot 4. + #[test] + fn ke_set_affinity_thread_migrates_and_returns_old() { + let (mut ctx, mut mem, mut state) = fresh(); + // Pre-fresh() set up the main thread on slot 0. Spawn a worker + // on slot 1 via ex_create_thread so the handle / PCR are real. + // Simpler: inject directly via scheduler.spawn. + use xenia_cpu::scheduler::SpawnParams; + let pcr_base = SCRATCH_BASE + 0x500; + mem.write_u32(pcr_base + 0x2C, 0xDEAD_BEEF); // sentinel + let params = SpawnParams { + entry: 0x8200_0000, + start_context: 0, + stack_base: 0x7200_0000, + stack_size: 0x10000, + pcr_base, + tls_base: 0, + thread_handle: 0x2000, + guest_tid: 42, + create_suspended: false, + is_initial: false, + tls_slot_count: 0, + affinity_mask: 0b0000_0010, + priority: 0, + ideal_processor: None, + }; + state + .scheduler + .spawn(params, &mut crate::state::GuestMemoryPcr(&mut mem)) + .unwrap(); + // Confirm PCR was written by the spawn (sanity). + assert_eq!(mem.read_u32(pcr_base + 0x2C), 1); + + // Now call KeSetAffinityThread(handle=0x2000, new_mask=0x20). + ctx.gpr[3] = 0x2000; + ctx.gpr[4] = 0x20; // slot 5 only + ke_set_affinity_thread(&mut ctx, &mut mem, &mut state); + // Return value = previous mask = 0x02. + assert_eq!(ctx.gpr[3], 0x02); + // PCR rewritten to 5. + assert_eq!(mem.read_u32(pcr_base + 0x2C), 5); + // Thread now on slot 5. + let r = state.scheduler.find_by_handle(0x2000).expect("still alive"); + assert_eq!(r.hw_id, 5); + } + + /// Axis 5: `KeSetIdealProcessor` stores a hint on the thread + /// without migrating it; query round-trips. + #[test] + fn ke_set_ideal_processor_round_trips() { + let (mut ctx, mut mem, mut state) = fresh(); + // Main thread handle is 0x1000. + ctx.gpr[3] = 0x1000; + ctx.gpr[4] = 3; + ke_set_ideal_processor(&mut ctx, &mut mem, &mut state); + // Prior was 0xFF (unset sentinel). + assert_eq!(ctx.gpr[3], 0xFF); + ctx.gpr[3] = 0x1000; + ke_query_ideal_processor(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 3); + } + + /// Axis 5: `NtSetInformationThread` class `ThreadAffinityMask` + /// routes through `KernelState::set_affinity` and actually migrates. + #[test] + fn nt_set_information_thread_affinity_migrates() { + let (mut ctx, mut mem, mut state) = fresh(); + // Park info buffer in scratch. + let info_ptr = SCRATCH_BASE + 0x40; + mem.write_u32(info_ptr, 0x08); // mask = slot 3 + ctx.gpr[3] = 0x1000; // main handle + ctx.gpr[4] = 3; // ThreadAffinityMask + ctx.gpr[5] = info_ptr as u64; + ctx.gpr[6] = 4; // info_len + nt_set_information_thread(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + // Main should have migrated to slot 3. + let r = state.scheduler.find_by_handle(0x1000).expect("still alive"); + assert_eq!(r.hw_id, 3); + } + + /// Priority wiring — `KeSetBasePriorityThread` stores on the + /// `GuestThread` and `KeQueryBasePriorityThread` reads it back. + #[test] + fn ke_set_base_priority_round_trips() { + let (mut ctx, mut mem, mut state) = fresh(); + // fresh() installs the main thread with handle 0x1000. + // Query the current priority first — default 0. + ctx.gpr[3] = 0x1000; + ke_query_base_priority_thread(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0); + // Set priority to 7 (high-ish). + ctx.gpr[3] = 0x1000; + ctx.gpr[4] = 7u64; + ke_set_base_priority_thread(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "old priority was 0"); + // Query again — now 7. + ctx.gpr[3] = 0x1000; + ke_query_base_priority_thread(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 7); + } + + /// The regression we're guarding against: Sylpheed parks a thread on the + /// event it handed to `NtReadFile`. Historically our HLE ignored r4 and + /// left the event unsignaled — the wait never released. Completion must + /// signal the event regardless of whether the read succeeds. + #[test] + fn nt_read_file_signals_completion_event_on_success() { + let (mut ctx, mut mem, mut state) = fresh(); + let file = make_file(&mut state, vec![0x11, 0x22, 0x33, 0x44]); + let evt = make_event(&mut state); + let iosb: u32 = 0x4000_0000; + let buf: u32 = 0x4000_0100; + // r3 = file, r4 = event, r7 = iosb, r8 = buf, r9 = len, r10 = 0 (use cursor) + ctx.gpr[3] = file as u64; + ctx.gpr[4] = evt as u64; + ctx.gpr[7] = iosb as u64; + ctx.gpr[8] = buf as u64; + ctx.gpr[9] = 4; + ctx.gpr[10] = 0; + nt_read_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "STATUS_SUCCESS expected"); + assert!(event_signaled(&state, evt), "event must be signaled on success"); + } + + #[test] + fn nt_read_file_signals_event_on_eof() { + let (mut ctx, mut mem, mut state) = fresh(); + let file = make_file(&mut state, vec![0x01, 0x02]); + // Seek cursor past end by issuing a first read that drains it. + if let Some(KernelObject::File { position, .. }) = state.objects.get_mut(&file) { + *position = 2; + } + let evt = make_event(&mut state); + ctx.gpr[3] = file as u64; + ctx.gpr[4] = evt as u64; + ctx.gpr[7] = 0x4000_0000; + ctx.gpr[8] = 0x4000_0100; + ctx.gpr[9] = 4; + ctx.gpr[10] = 0; + nt_read_file(&mut ctx, &mut mem, &mut state); + assert!(event_signaled(&state, evt), "EOF path must still signal"); + } + + #[test] + fn nt_read_file_signals_event_on_invalid_handle() { + let (mut ctx, mut mem, mut state) = fresh(); + let evt = make_event(&mut state); + ctx.gpr[3] = 0xDEAD_BEEF; // bogus file handle + ctx.gpr[4] = evt as u64; + ctx.gpr[7] = 0x4000_0000; + ctx.gpr[8] = 0x4000_0100; + ctx.gpr[9] = 4; + ctx.gpr[10] = 0; + nt_read_file(&mut ctx, &mut mem, &mut state); + assert!(event_signaled(&state, evt), "invalid-handle path must still signal"); + } + + /// Many callers pass r4 = 0 (synchronous-wait style). The signal helper + /// must no-op rather than corrupt the handle table or panic. + #[test] + fn nt_read_file_accepts_null_event_handle() { + let (mut ctx, mut mem, mut state) = fresh(); + let file = make_file(&mut state, vec![0xAA; 8]); + ctx.gpr[3] = file as u64; + ctx.gpr[4] = 0; + ctx.gpr[7] = 0x4000_0000; + ctx.gpr[8] = 0x4000_0100; + ctx.gpr[9] = 8; + ctx.gpr[10] = 0; + nt_read_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "STATUS_SUCCESS expected with null event"); + } + + #[test] + fn nt_write_file_signals_completion_event() { + let (mut ctx, mut mem, mut state) = fresh(); + let evt = make_event(&mut state); + ctx.gpr[3] = 0x1234; // file handle not consulted on the discard path + ctx.gpr[4] = evt as u64; + ctx.gpr[7] = 0x4000_0000; + ctx.gpr[9] = 16; + nt_write_file(&mut ctx, &mut mem, &mut state); + assert!(event_signaled(&state, evt), "write must signal too"); + } + + /// Verify `FileStandardInformation` reports `Directory=1` for empty-path + /// (device-root) synthesized file handles. Sylpheed calls + /// `NtCreateFile("game:\\")` then `NtQueryInformationFile` on the returned + /// handle as a disc-validation probe — seeing `Directory=0` triggers its + /// `XamShowDirtyDiscErrorUI` path. + #[test] + fn nt_query_information_file_reports_directory_for_root_synth() { + let (mut ctx, mut mem, mut state) = fresh(); + // Synth a "game:\" style empty-path file, matching what `open_vfs_file` + // produces when the prefix-strip leaves nothing behind. + let h = state.alloc_handle_for(KernelObject::File { + path: String::new(), + size: 0, + position: 0, + data: std::sync::Arc::new(Vec::new()), + dir_enum_pos: None, + }); + let info_buf = SCRATCH_BASE + 0x600; + ctx.gpr[3] = h as u64; // handle + ctx.gpr[4] = SCRATCH_BASE as u64; // iosb + ctx.gpr[5] = info_buf as u64; // file_info + ctx.gpr[6] = 24; // length + ctx.gpr[7] = 5; // FileStandardInformation + nt_query_information_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "STATUS_SUCCESS expected"); + assert_eq!( + mem.read_u8(info_buf + 21), + 1, + "Directory byte must be 1 for root-of-device synth" + ); + } + + /// `NtQueryDirectoryFile` takes an optional completion event at r4 + /// (Canary `xboxkrnl_io.cc:516`). The handler must signal that event + /// so waiters wake up, and must write the IOSB at r7 (the prior stub + /// mis-used r4, clobbering low guest memory). Without a VFS mounted + /// the handler finds no children and reports + /// `STATUS_NO_MORE_FILES`; the event still has to fire. + #[test] + fn nt_query_directory_file_signals_completion_event_and_uses_correct_iosb_reg() { + let (mut ctx, mut mem, mut state) = fresh(); + let evt = make_event(&mut state); + // A root-shaped synth directory — exactly what `NtCreateFile("game:\\")` + // produces when the prefix-strip leaves nothing behind. + let handle = state.alloc_handle_for(KernelObject::File { + path: String::new(), + size: 0, + position: 0, + data: std::sync::Arc::new(Vec::new()), + dir_enum_pos: None, + }); + let buf = SCRATCH_BASE + 0x100; + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = evt as u64; + ctx.gpr[7] = SCRATCH_BASE as u64; // IOSB must land here + ctx.gpr[8] = buf as u64; + ctx.gpr[9] = 128; // length >= 72 (Canary minimum) + ctx.gpr[10] = 0; + nt_query_directory_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_NO_MORE_FILES); + assert_eq!(mem.read_u32(SCRATCH_BASE), STATUS_NO_MORE_FILES as u32); + assert!(event_signaled(&state, evt), "completion event must be signaled"); + } + + /// Info-length-mismatch (Canary: length < 72 → STATUS_INFO_LENGTH_MISMATCH). + #[test] + fn nt_query_directory_file_rejects_short_buffer() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::File { + path: String::new(), + size: 0, + position: 0, + data: std::sync::Arc::new(Vec::new()), + dir_enum_pos: None, + }); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 0; + ctx.gpr[7] = 0; + ctx.gpr[8] = SCRATCH_BASE as u64; + ctx.gpr[9] = 16; // below 72 + ctx.gpr[10] = 0; + nt_query_directory_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_INFO_LENGTH_MISMATCH); + } + + /// Minimal `VfsDevice` impl that returns a hard-coded entry list — + /// lets us drive `NtQueryDirectoryFile` through a real enumeration + /// path without needing a disc image on disk. + struct StubVfs { + entries: Vec, + } + impl xenia_vfs::VfsDevice for StubVfs { + fn name(&self) -> &str { "stub" } + fn list_root(&self) -> Result, xenia_vfs::VfsError> { + Ok(self.entries.clone()) + } + fn read_file(&self, _path: &str) -> Result, xenia_vfs::VfsError> { + Err(xenia_vfs::VfsError::NotFound("stub".into())) + } + fn stat(&self, _path: &str) -> Result { + Err(xenia_vfs::VfsError::NotFound("stub".into())) + } + } + + /// Real enumeration of the root directory. The stub VFS exposes two + /// top-level entries and one nested entry; `NtQueryDirectoryFile` + /// must return the two top-level ones and skip the grandchild. + #[test] + fn nt_query_directory_file_enumerates_root_children() { + let (mut ctx, mut mem, mut state) = fresh(); + state.vfs = Some(Box::new(StubVfs { + entries: vec![ + xenia_vfs::VfsEntry { + name: "default.xex".into(), + is_directory: false, + size: 0x1000, + offset: 0, + }, + xenia_vfs::VfsEntry { + name: "dat".into(), + is_directory: true, + size: 0, + offset: 0, + }, + // A grandchild — must NOT appear in root enumeration. + xenia_vfs::VfsEntry { + name: "dat/tables.pak".into(), + is_directory: false, + size: 0x2000, + offset: 0, + }, + ], + })); + let handle = state.alloc_handle_for(KernelObject::File { + path: String::new(), + size: 0, + position: 0, + data: std::sync::Arc::new(Vec::new()), + dir_enum_pos: None, + }); + let buf = SCRATCH_BASE + 0x100; + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 0; + ctx.gpr[7] = SCRATCH_BASE as u64; + ctx.gpr[8] = buf as u64; + ctx.gpr[9] = 512; + ctx.gpr[10] = 0; + nt_query_directory_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + // First entry header lives at `buf`: file_name_length at +0x3C, + // attributes at +0x38, name bytes starting at +0x40. Verify both + // entries land in the buffer by walking the linked list via + // NextEntryOffset. + let mut cursor: u32 = 0; + let mut names: Vec = Vec::new(); + loop { + let entry_base = buf + cursor; + let name_len = mem.read_u32(entry_base + 0x3C) as usize; + let mut bytes = Vec::with_capacity(name_len); + for i in 0..name_len as u32 { + bytes.push(mem.read_u8(entry_base + 0x40 + i)); + } + names.push(String::from_utf8(bytes).unwrap()); + let next = mem.read_u32(entry_base); + if next == 0 { + break; + } + cursor += next; + } + assert_eq!(names, vec!["default.xex", "dat"]); + // A second call on the same handle must return NO_MORE_FILES — + // the cursor has advanced past the end. + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 0; + ctx.gpr[7] = SCRATCH_BASE as u64; + ctx.gpr[8] = buf as u64; + ctx.gpr[9] = 512; + nt_query_directory_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_NO_MORE_FILES); + } + + /// Invalid handle → STATUS_INVALID_HANDLE, IOSB gets the error, and + /// the completion event still fires so callers don't hang. + #[test] + fn nt_query_directory_file_invalid_handle_still_signals() { + let (mut ctx, mut mem, mut state) = fresh(); + let evt = make_event(&mut state); + ctx.gpr[3] = 0xDEAD_BEEF; + ctx.gpr[4] = evt as u64; + ctx.gpr[7] = SCRATCH_BASE as u64; + ctx.gpr[8] = SCRATCH_BASE as u64 + 0x100; + ctx.gpr[9] = 128; + ctx.gpr[10] = 0; + nt_query_directory_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_INVALID_HANDLE); + assert_eq!(mem.read_u32(SCRATCH_BASE), STATUS_INVALID_HANDLE as u32); + assert!(event_signaled(&state, evt)); + } + + /// `NtSignalAndWaitForSingleObjectEx` signals handle A, then does a + /// single wait on handle B. If A is already signaled via the atomic + /// set, any waiter on A wakes immediately; the caller then parks on + /// B (or returns success if B is already signaled). Canary reference: + /// `xboxkrnl_threading.cc:1103` — `XObject::SignalAndWait`. + #[test] + fn nt_signal_and_wait_signals_first_then_waits() { + let (mut ctx, mut mem, mut state) = fresh(); + // Pre-signaled event we'll wait on — so the whole call returns success. + let wait_h = state.alloc_handle_for(KernelObject::Event { + manual_reset: true, + signaled: true, + waiters: Vec::new(), + }); + let signal_h = make_event(&mut state); // starts unsignaled + ctx.gpr[3] = signal_h as u64; + ctx.gpr[4] = wait_h as u64; + ctx.gpr[7] = 0; // timeout_ptr = null → infinite, but wait-handle already signaled + nt_signal_and_wait_for_single_object_ex(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "wait returns STATUS_SUCCESS"); + assert!(event_signaled(&state, signal_h), "signal handle set"); + } + + /// An unknown signal handle must return `STATUS_INVALID_HANDLE` and + /// NOT fall through to the wait — matches Canary's early-return guard. + #[test] + fn nt_signal_and_wait_rejects_unknown_signal_handle() { + let (mut ctx, mut mem, mut state) = fresh(); + ctx.gpr[3] = 0xDEAD_BEEF; + ctx.gpr[4] = 0x1234_5678; + ctx.gpr[7] = 0; + nt_signal_and_wait_for_single_object_ex(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_INVALID_HANDLE); + } + + /// `FileNetworkOpenInformation` (class 34) — 56 bytes, `FileAttributes` + /// at +48 must carry `FILE_ATTRIBUTE_DIRECTORY` (0x10) for root synths. + /// Sylpheed's async worker asks for this class and the caller dispatches + /// on the attributes bits; a zeroed buffer meant `Directory` was clear + /// and forced the dirty-disc path. + #[test] + fn nt_query_information_file_network_open_sets_dir_attribute() { + let (mut ctx, mut mem, mut state) = fresh(); + let h = state.alloc_handle_for(KernelObject::File { + path: String::new(), + size: 0, + position: 0, + data: std::sync::Arc::new(Vec::new()), + dir_enum_pos: None, + }); + let info_buf = SCRATCH_BASE + 0x200; + ctx.gpr[3] = h as u64; + ctx.gpr[4] = SCRATCH_BASE as u64; + ctx.gpr[5] = info_buf as u64; + ctx.gpr[6] = 56; + ctx.gpr[7] = 34; // FileNetworkOpenInformation + nt_query_information_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0); + let attrs = mem.read_u32(info_buf + 48); + assert_eq!(attrs, 0x10, "FILE_ATTRIBUTE_DIRECTORY expected for root synth"); + } + + /// Normal file paths must still report `Directory=0` so games reading + /// actual files (`dat/tables.pak`, `config.ini`) don't see them as + /// directories. + #[test] + fn nt_query_information_file_reports_file_for_normal_path() { + let (mut ctx, mut mem, mut state) = fresh(); + let h = state.alloc_handle_for(KernelObject::File { + path: "dat/tables.pak".to_string(), + size: 964, + position: 0, + data: std::sync::Arc::new(vec![0; 964]), + dir_enum_pos: None, + }); + let info_buf = SCRATCH_BASE + 0x700; + ctx.gpr[3] = h as u64; + ctx.gpr[4] = SCRATCH_BASE as u64; + ctx.gpr[5] = info_buf as u64; + ctx.gpr[6] = 24; + ctx.gpr[7] = 5; + nt_query_information_file(&mut ctx, &mut mem, &mut state); + assert_eq!(mem.read_u8(info_buf + 21), 0, "normal file not directory"); + } + + // ===== PKEVENT shim ===== + + /// Write a DISPATCHER_HEADER at the given guest pointer. + /// ty: 0 = Notification (manual-reset), 1 = Synchronization (auto-reset), + /// 5 = Semaphore. + fn write_dispatcher_header(mem: &GuestMemory, ptr: u32, ty: u8, signal_state: u32) { + mem.write_u8(ptr, ty); + mem.write_u8(ptr + 1, 0); // Absolute + mem.write_u8(ptr + 2, 4); // Size (u32 words) — four words is plausible + mem.write_u8(ptr + 3, 0); // Inserted + mem.write_u32(ptr + 4, signal_state); + // WaitListHead (8 bytes) — zero-init is fine; shadow owns waiters. + mem.write_u32(ptr + 8, 0); + mem.write_u32(ptr + 12, 0); + } + + #[test] + fn ke_set_event_shadows_pkevent_pointer() { + let (mut ctx, mut mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0x100; + write_dispatcher_header(&mut mem, kevent_ptr, 1, 0); // synchronization, unsignaled + ctx.gpr[3] = kevent_ptr as u64; + ke_set_event(&mut ctx, &mut mem, &mut state); + // Shadow must have been minted AND signaled. + match state.objects.get(&kevent_ptr) { + Some(KernelObject::Event { manual_reset, signaled, .. }) => { + assert!(!*manual_reset, "type=1 must be auto-reset"); + assert!(*signaled, "ke_set_event must signal the shadow"); + } + other => panic!("expected Event shadow at pkevent_ptr, got {:?}", other), + } + } + + #[test] + fn ke_reset_event_shadows_pkevent_pointer() { + let (mut ctx, mut mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0x200; + // Initial signal state = 1 in guest memory → shadow starts signaled. + write_dispatcher_header(&mut mem, kevent_ptr, 0, 1); // notification + ctx.gpr[3] = kevent_ptr as u64; + ke_reset_event(&mut ctx, &mut mem, &mut state); + // After reset, shadow exists and is unsignaled; gpr[3] reports previous=1. + assert_eq!(ctx.gpr[3], 1, "previous state must be reported"); + match state.objects.get(&kevent_ptr) { + Some(KernelObject::Event { manual_reset, signaled, .. }) => { + assert!(*manual_reset, "type=0 must be manual-reset"); + assert!(!*signaled, "ke_reset_event must clear the shadow"); + } + other => panic!("expected Event shadow, got {:?}", other), + } + } + + /// End-to-end: set + wait across the same PKEVENT pointer. This is the + /// exact contract Sylpheed relies on — without the shim, KeWait parks + /// on a nonexistent handle and KeSet no-ops, so the wait never resolves. + #[test] + fn ke_set_then_wait_on_pkevent_returns_success() { + let (mut ctx, mut mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0x300; + write_dispatcher_header(&mut mem, kevent_ptr, 1, 0); // synchronization + // First signal the event. + ctx.gpr[3] = kevent_ptr as u64; + ke_set_event(&mut ctx, &mut mem, &mut state); + // Now wait with timeout = 0 (poll). Since it's signaled, the auto- + // reset consumes the signal and we should get STATUS_SUCCESS. + // Timeout pointer at scratch top: LARGE_INTEGER = 0. + let timeout_ptr = SCRATCH_BASE + 0x800; + mem.write_u32(timeout_ptr, 0); + mem.write_u32(timeout_ptr + 4, 0); + ctx.gpr[3] = kevent_ptr as u64; + ctx.gpr[7] = timeout_ptr as u64; + ke_wait_for_single_object(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "STATUS_SUCCESS expected on signaled wait"); + // Auto-reset: signal must have been consumed. + match state.objects.get(&kevent_ptr) { + Some(KernelObject::Event { signaled, .. }) => assert!(!*signaled), + other => panic!("expected Event shadow, got {:?}", other), + } + } + + /// Semaphore shim: header type 5, Limit at +0x10. + #[test] + fn ke_release_semaphore_shadows_pksemaphore_pointer() { + let (mut ctx, mut mem, mut state) = fresh(); + let ksem_ptr = SCRATCH_BASE + 0x400; + write_dispatcher_header(&mut mem, ksem_ptr, 5, 2); // initial count 2 + mem.write_u32(ksem_ptr + 0x10, 10); // Limit + ctx.gpr[3] = ksem_ptr as u64; + ctx.gpr[4] = 1; // adjust = +1 + ke_release_semaphore(&mut ctx, &mut mem, &mut state); + match state.objects.get(&ksem_ptr) { + Some(KernelObject::Semaphore { count, max, .. }) => { + assert_eq!(*count, 3, "count was 2, +1 → 3"); + assert_eq!(*max, 10); + } + other => panic!("expected Semaphore shadow, got {:?}", other), + } + } + + /// Regression guard: genuine Nt handles must still work unchanged — + /// the shim's lower-bound check (`ptr < 0x1_0000`) skips our handle + /// range (0x1000 + 4·N). + #[test] + fn ke_set_event_leaves_nt_handles_intact() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::Event { + manual_reset: true, + signaled: false, + waiters: Vec::new(), + }); + assert!(handle < 0x1_0000, "handle must be in low range"); + ctx.gpr[3] = handle as u64; + ke_set_event(&mut ctx, &mut mem, &mut state); + // Shadow must NOT have been created at the handle key (already exists); + // the existing Event just flips to signaled. + match state.objects.get(&handle) { + Some(KernelObject::Event { signaled, .. }) => assert!(*signaled), + _ => panic!("handle lookup broken"), + } + } + + /// Type bytes we don't understand (e.g., Mutant=2, Timer=8) must leave + /// the handle table untouched rather than conjuring wrong-typed shadows. + #[test] + fn ensure_dispatcher_object_ignores_unknown_type() { + let (mut _ctx, mut mem, mut state) = fresh(); + let ptr = SCRATCH_BASE + 0x500; + write_dispatcher_header(&mut mem, ptr, 2, 0); // Mutant — unsupported + ensure_dispatcher_object(&mut state, &mem, ptr); + assert!(!state.objects.contains_key(&ptr), "no shadow for unknown type"); + } + + /// `KePulseEvent` on a manual-reset event must wake every parked waiter + /// and leave the event unsignaled afterwards. This models the transient- + /// signal idiom that `NtSetEvent`+`NtClearEvent` cannot express atomically. + #[test] + fn ke_pulse_event_manual_reset_wakes_all_and_leaves_unsignaled() { + let (mut ctx, mut mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0x600; + write_dispatcher_header(&mut mem, kevent_ptr, 0, 0); // manual-reset, unsignaled + // Mint the shadow and park two fake waiters. + ctx.gpr[3] = kevent_ptr as u64; + ke_reset_event(&mut ctx, &mut mem, &mut state); + match state.objects.get_mut(&kevent_ptr) { + Some(KernelObject::Event { waiters, .. }) => { + // Fake waiter refs — wake_ref silently no-ops on + // out-of-bounds so the test only observes list drainage. + waiters.push(ThreadRef { hw_id: 2, idx: 0, generation: 0 }); + waiters.push(ThreadRef { hw_id: 3, idx: 0, generation: 0 }); + } + _ => panic!("shadow not minted"), + } + // Pulse. + ctx.gpr[3] = kevent_ptr as u64; + ke_pulse_event(&mut ctx, &mut mem, &mut state); + // Previous state = 0 (unsignaled). + assert_eq!(ctx.gpr[3], 0); + // Event must be unsignaled post-pulse, and waiter list drained. + match state.objects.get(&kevent_ptr) { + Some(KernelObject::Event { signaled, waiters, .. }) => { + assert!(!*signaled, "pulse leaves event non-signaled"); + assert!(waiters.is_empty(), "all manual-reset waiters must be woken"); + } + _ => panic!("shadow vanished"), + } + } + + /// Auto-reset pulse wakes exactly one waiter (the head of the FIFO) and + /// consumes the transient signal, matching `NtSetEvent` on an auto-reset + /// event with no linger. + #[test] + fn ke_pulse_event_auto_reset_wakes_one() { + let (mut ctx, mut mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0x700; + write_dispatcher_header(&mut mem, kevent_ptr, 1, 0); // auto-reset, unsignaled + ctx.gpr[3] = kevent_ptr as u64; + ke_reset_event(&mut ctx, &mut mem, &mut state); + match state.objects.get_mut(&kevent_ptr) { + Some(KernelObject::Event { waiters, .. }) => { + // Fake waiter refs — wake_ref silently no-ops on + // out-of-bounds so the test only observes list drainage. + waiters.push(ThreadRef { hw_id: 2, idx: 0, generation: 0 }); + waiters.push(ThreadRef { hw_id: 3, idx: 0, generation: 0 }); + } + _ => panic!("shadow not minted"), + } + ctx.gpr[3] = kevent_ptr as u64; + ke_pulse_event(&mut ctx, &mut mem, &mut state); + match state.objects.get(&kevent_ptr) { + Some(KernelObject::Event { signaled, waiters, .. }) => { + assert!(!*signaled, "pulse leaves auto-reset event non-signaled"); + assert_eq!(waiters.len(), 1, "auto-reset pulse wakes exactly one waiter"); + } + _ => panic!("shadow vanished"), + } + } + + /// `NtPulseEvent` must return `STATUS_SUCCESS` + write prior state to + /// the optional `previous_state_ptr` (r4). If the handle is invalid, + /// it must return `STATUS_INVALID_HANDLE` without touching memory. + #[test] + fn nt_pulse_event_writes_previous_state_and_clears() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::Event { + manual_reset: true, + signaled: true, // initially signaled → prior = 1 + waiters: Vec::new(), + }); + let prev_ptr = SCRATCH_BASE + 0x10; + mem.write_u32(prev_ptr, 0xFFFF_FFFF); // sentinel + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = prev_ptr as u64; + nt_pulse_event(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + assert_eq!(mem.read_u32(prev_ptr), 1, "previous state was signaled=1"); + match state.objects.get(&handle) { + Some(KernelObject::Event { signaled, .. }) => { + assert!(!*signaled, "nt_pulse_event must leave event cleared"); + } + _ => panic!("handle lost"), + } + } + + #[test] + fn nt_pulse_event_invalid_handle_returns_status() { + let (mut ctx, mut mem, mut state) = fresh(); + ctx.gpr[3] = 0xDEAD_BEEF; // not in object table + ctx.gpr[4] = 0; + nt_pulse_event(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_INVALID_HANDLE); + } + + /// `NtReleaseSemaphore` must return `STATUS_SEMAPHORE_LIMIT_EXCEEDED` + /// (0xC000_0047) when the post-release count would exceed `Limit`, + /// and must *not* update the count in that case. The prior + /// saturating-add behaviour silently clamped to i32::MAX, masking + /// overflow from games that key work-queue logic on the status code. + #[test] + fn nt_release_semaphore_rejects_over_limit() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::Semaphore { + count: 3, + max: 5, + waiters: Vec::new(), + }); + let prev_ptr = SCRATCH_BASE + 0x40; + mem.write_u32(prev_ptr, 0xFFFF_FFFF); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 10; // 3 + 10 = 13 > max=5 → reject + ctx.gpr[5] = prev_ptr as u64; + nt_release_semaphore(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SEMAPHORE_LIMIT_EXCEEDED); + assert_eq!(mem.read_u32(prev_ptr), 3, "previous count written even on reject"); + match state.objects.get(&handle) { + Some(KernelObject::Semaphore { count, .. }) => { + assert_eq!(*count, 3, "count must not change on reject"); + } + _ => panic!("handle lost"), + } + } + + /// A normal release inside the limit increments `count` and returns + /// `STATUS_SUCCESS` with the previous count written out. + #[test] + fn nt_release_semaphore_normal_path_updates_count() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::Semaphore { + count: 2, + max: 5, + waiters: Vec::new(), + }); + let prev_ptr = SCRATCH_BASE + 0x50; + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 2; // 2 + 2 = 4 <= 5 → ok + ctx.gpr[5] = prev_ptr as u64; + nt_release_semaphore(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + assert_eq!(mem.read_u32(prev_ptr), 2); + match state.objects.get(&handle) { + Some(KernelObject::Semaphore { count, .. }) => assert_eq!(*count, 4), + _ => panic!("handle lost"), + } + } + + /// Invalid handle path: Canary returns `STATUS_INVALID_HANDLE` + /// without touching any state. Previous behaviour silently returned + /// `STATUS_SUCCESS` with `previous = 0`, which games couldn't tell + /// from a genuine release. + #[test] + fn nt_release_semaphore_invalid_handle_returns_status() { + let (mut ctx, mut mem, mut state) = fresh(); + ctx.gpr[3] = 0xDEAD_BEEF; + ctx.gpr[4] = 1; + ctx.gpr[5] = 0; + nt_release_semaphore(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_INVALID_HANDLE); + } + + /// `RtlInitializeCriticalSection` must lay out the guest-visible + /// X_RTL_CRITICAL_SECTION per Canary `xboxkrnl_rtl.cc:536-553`: + /// dispatcher-header type=1 at +0x00, lock_count=-1 at +0x10, + /// recursion_count=0 at +0x14, owning_thread=0 at +0x18. Prior to + /// this fix xenia-rs wrote lock_count at +0x04 (landing inside the + /// dispatcher header's signal_state field) and owning_thread at + /// +0x0C (landing inside the WaitListHead). Any game that reads a + /// pre-initialized CS from its `.data` segment — Canary's comment + /// at line 533-534 notes this is common — would see garbage. + #[test] + fn rtl_initialize_critical_section_lays_out_canary_struct() { + let (mut ctx, mut mem, mut state) = fresh(); + let cs_ptr = SCRATCH_BASE + 0x100; + // Pre-fill with a sentinel so we can see every byte we touch. + for i in (0..28).step_by(4) { + mem.write_u32(cs_ptr + i, 0xDEAD_BEEF); + } + ctx.gpr[3] = cs_ptr as u64; + rtl_initialize_critical_section(&mut ctx, &mut mem, &mut state); + assert_eq!(mem.read_u8(cs_ptr + CS_OFFS_TYPE), 1, "type = synchronization"); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_LOCK_COUNT), 0xFFFF_FFFF, "lock_count = -1"); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_RECURSION_COUNT), 0); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_OWNING_THREAD), 0); + } + + /// End-to-end: init → enter → nested enter → leave → leave. The + /// CS must roll back through `(lc=0,rc=2) → (lc=0,rc=1) → (lc=-1, + /// rc=0,owner=0)` with the correct field offsets, and owner must + /// land at +0x18 — not anywhere else. + #[test] + fn rtl_critical_section_nested_enter_leave_roundtrip() { + let (mut ctx, mut mem, mut state) = fresh(); + // Install a live guest TID on the current HW slot so + // `rtl_enter_critical_section`'s `find_by_tid` sees us as a + // genuine owner. `find_by_tid` filters out `HwState::Idle` — + // the default placeholder state — so we also flip to Ready. + // Without both, `owner_is_live` stays false on self-recursion + // and the nested-enter branch is never taken. + let tid: u32 = 42; + // Update the live thread planted by `fresh()` on slot 0 so + // `find_by_tid(42)` resolves it. + state.scheduler.slots[0].runqueue[0].tid = tid; + state.scheduler.slots[0].runqueue[0].state = xenia_cpu::scheduler::HwState::Ready; + ctx.thread_id = tid; + let cs_ptr = SCRATCH_BASE + 0x200; + ctx.gpr[3] = cs_ptr as u64; + rtl_initialize_critical_section(&mut ctx, &mut mem, &mut state); + // First enter → owner = tid, LC = 0, RC = 1. + ctx.gpr[3] = cs_ptr as u64; + rtl_enter_critical_section(&mut ctx, &mut mem, &mut state); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_OWNING_THREAD), tid); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_LOCK_COUNT) as i32, 0); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_RECURSION_COUNT), 1); + // Nested enter (same tid) → LC = 1, RC = 2. + ctx.gpr[3] = cs_ptr as u64; + rtl_enter_critical_section(&mut ctx, &mut mem, &mut state); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_LOCK_COUNT) as i32, 1); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_RECURSION_COUNT), 2); + // First leave → LC = 0, RC = 1, owner stays. + ctx.gpr[3] = cs_ptr as u64; + rtl_leave_critical_section(&mut ctx, &mut mem, &mut state); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_OWNING_THREAD), tid); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_LOCK_COUNT) as i32, 0); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_RECURSION_COUNT), 1); + // Second leave → LC = -1, RC = 0, owner cleared. + ctx.gpr[3] = cs_ptr as u64; + rtl_leave_critical_section(&mut ctx, &mut mem, &mut state); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_OWNING_THREAD), 0); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_LOCK_COUNT) as i32, -1); + assert_eq!(mem.read_u32(cs_ptr + CS_OFFS_RECURSION_COUNT), 0); + } + + /// `NtSetInformationFile` class 14 (`XFilePositionInformation`) must + /// update the file cursor. Read back via `NtQueryInformationFile` + /// class 14 — round-trip proves both sides agree on the layout. + #[test] + fn nt_set_information_file_position_updates_cursor() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = make_file(&mut state, vec![0u8; 0x100]); + let info_ptr = SCRATCH_BASE + 0x20; + let iosb_ptr = SCRATCH_BASE + 0x40; + mem.write_u64(info_ptr, 0x40); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = iosb_ptr as u64; + ctx.gpr[5] = info_ptr as u64; + ctx.gpr[6] = 8; + ctx.gpr[7] = 14; // XFilePositionInformation + nt_set_information_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + assert_eq!(mem.read_u32(iosb_ptr), STATUS_SUCCESS as u32); + assert_eq!(mem.read_u32(iosb_ptr + 4), 8); + match state.objects.get(&handle) { + Some(KernelObject::File { position, .. }) => assert_eq!(*position, 0x40), + _ => panic!("file handle lost"), + } + } + + /// Read-only VFS — truncating to a different size must fail with + /// `STATUS_UNSUCCESSFUL`, matching Canary's error path when + /// `file->SetLength(...)` can't honour the request. + #[test] + fn nt_set_information_file_truncate_to_different_size_fails() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = make_file(&mut state, vec![0u8; 0x100]); + let info_ptr = SCRATCH_BASE + 0x80; + mem.write_u64(info_ptr, 0x200); // new EOF != current 0x100 + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 0; + ctx.gpr[5] = info_ptr as u64; + ctx.gpr[6] = 8; + ctx.gpr[7] = 20; // XFileEndOfFileInformation + nt_set_information_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_UNSUCCESSFUL); + } + + #[test] + fn nt_set_information_file_invalid_class_returns_status() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = make_file(&mut state, Vec::new()); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 0; + ctx.gpr[5] = 0; + ctx.gpr[6] = 0; + ctx.gpr[7] = 999; // not a defined class + nt_set_information_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_INVALID_INFO_CLASS); + } + + #[test] + fn nt_set_information_file_short_buffer_returns_length_mismatch() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = make_file(&mut state, Vec::new()); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 0; + ctx.gpr[5] = SCRATCH_BASE as u64; + ctx.gpr[6] = 4; // class 14 needs 8 + ctx.gpr[7] = 14; + nt_set_information_file(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_INFO_LENGTH_MISMATCH); + } + + /// `KeReleaseSemaphore` is lenient: it never reports errors, but the + /// count must still cap at `Limit` (Canary's underlying primitive + /// `XSemaphore::ReleaseSemaphore` enforces the cap, even though the + /// Ke wrapper discards the success bool). + #[test] + fn ke_release_semaphore_silently_caps_at_limit() { + let (mut ctx, mut mem, mut state) = fresh(); + let ksem_ptr = SCRATCH_BASE + 0x60; + // Dispatcher header: type=5 (Semaphore), signal_state/count=4, Limit=5. + write_dispatcher_header(&mut mem, ksem_ptr, 5, 4); + mem.write_u32(ksem_ptr + 0x10, 5); // Limit + ctx.gpr[3] = ksem_ptr as u64; + ctx.gpr[4] = 10; // 4 + 10 > 5 → reject silently + ke_release_semaphore(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 4, "Ke returns previous count even on cap"); + match state.objects.get(&ksem_ptr) { + Some(KernelObject::Semaphore { count, .. }) => { + assert_eq!(*count, 4, "count must not exceed Limit even via Ke-form"); + } + _ => panic!("shadow missing"), + } + } + + // ===== Timer subsystem ===== + + /// Helper: write a LARGE_INTEGER (i64 in big-endian hi/lo u32 pair) to + /// guest memory. Matches the format `parse_timeout` / `nt_set_timer_ex` + /// read from. + fn write_large_integer(mem: &GuestMemory, ptr: u32, raw: i64) { + mem.write_u32(ptr, (raw >> 32) as u32); + mem.write_u32(ptr + 4, raw as u32); + } + + #[test] + fn nt_create_timer_sync_type_creates_auto_reset() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle_ptr = SCRATCH_BASE + 0x20; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[4] = 0; // obj_attributes — ignored + ctx.gpr[5] = 1; // SynchronizationTimer + nt_create_timer(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + let handle = mem.read_u32(handle_ptr); + match state.objects.get(&handle) { + Some(KernelObject::Timer { + manual_reset, + signaled, + deadline, + waiters, + .. + }) => { + assert!(!*manual_reset, "type=1 is SynchronizationTimer (auto-reset)"); + assert!(!*signaled); + assert!(deadline.is_none()); + assert!(waiters.is_empty()); + } + other => panic!("expected Timer at handle {:#x}, got {:?}", handle, other), + } + } + + #[test] + fn nt_create_timer_notification_type_creates_manual_reset() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle_ptr = SCRATCH_BASE + 0x20; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[5] = 0; // NotificationTimer + nt_create_timer(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + let handle = mem.read_u32(handle_ptr); + match state.objects.get(&handle) { + Some(KernelObject::Timer { manual_reset, .. }) => assert!(*manual_reset), + _ => panic!("expected Timer"), + } + } + + #[test] + fn nt_create_timer_invalid_type_returns_invalid_parameter() { + let (mut ctx, mut mem, mut state) = fresh(); + ctx.gpr[3] = (SCRATCH_BASE + 0x20) as u64; + ctx.gpr[5] = 42; // invalid + nt_create_timer(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0xC000_000D); // STATUS_INVALID_PARAMETER + assert!( + state.objects.is_empty() + || state + .objects + .values() + .all(|o| !matches!(o, KernelObject::Timer { .. })), + "no Timer object must be minted on invalid type" + ); + } + + #[test] + fn nt_set_timer_ex_schedules_pending_fire() { + let (mut ctx, mut mem, mut state) = fresh(); + // Create the timer first. + let handle_ptr = SCRATCH_BASE + 0x20; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[5] = 1; + nt_create_timer(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_ptr); + + // Arm with -1_000_000 (= 100ms) relative. + let due_time_ptr = SCRATCH_BASE + 0x40; + write_large_integer(&mut mem, due_time_ptr, -1_000_000); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = due_time_ptr as u64; + ctx.gpr[5] = 0; // routine + ctx.gpr[6] = 1; // mode + ctx.gpr[7] = 0; // routine_arg + ctx.gpr[8] = 0; // resume + ctx.gpr[9] = 0; // period_ms + nt_set_timer_ex(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + + assert_eq!(state.pending_timer_fires.len(), 1); + let (deadline, h) = state.pending_timer_fires[0]; + assert_eq!(h, handle); + assert!(deadline > 0, "deadline must advance past now"); + match state.objects.get(&handle) { + Some(KernelObject::Timer { + deadline: obj_d, + signaled, + .. + }) => { + assert_eq!(*obj_d, Some(deadline)); + assert!(!*signaled, "arm clears any stale signaled flag"); + } + _ => panic!("Timer vanished"), + } + } + + #[test] + fn nt_set_timer_ex_rearm_replaces_entry() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle_ptr = SCRATCH_BASE + 0x20; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[5] = 1; + nt_create_timer(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_ptr); + let due_time_ptr = SCRATCH_BASE + 0x40; + // First arm. + write_large_integer(&mut mem, due_time_ptr, -1_000_000); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = due_time_ptr as u64; + ctx.gpr[5] = 0; + ctx.gpr[6] = 1; + ctx.gpr[7] = 0; + ctx.gpr[8] = 0; + ctx.gpr[9] = 0; + nt_set_timer_ex(&mut ctx, &mut mem, &mut state); + // Second arm (later). + write_large_integer(&mut mem, due_time_ptr, -5_000_000); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = due_time_ptr as u64; + nt_set_timer_ex(&mut ctx, &mut mem, &mut state); + assert_eq!( + state.pending_timer_fires.len(), + 1, + "rearm must replace, not duplicate" + ); + } + + #[test] + fn nt_cancel_timer_disarms_and_writes_zero() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle_ptr = SCRATCH_BASE + 0x20; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[5] = 1; + nt_create_timer(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_ptr); + let due_time_ptr = SCRATCH_BASE + 0x40; + write_large_integer(&mut mem, due_time_ptr, -1_000_000); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = due_time_ptr as u64; + ctx.gpr[5] = 0; + ctx.gpr[6] = 1; + ctx.gpr[7] = 0; + ctx.gpr[8] = 0; + ctx.gpr[9] = 0; + nt_set_timer_ex(&mut ctx, &mut mem, &mut state); + + let prev_ptr = SCRATCH_BASE + 0x60; + mem.write_u32(prev_ptr, 0xDEAD_BEEF); // sentinel — must be overwritten to 0 + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = prev_ptr as u64; + nt_cancel_timer(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + assert_eq!(mem.read_u32(prev_ptr), 0, "canary always writes 0"); + assert!(state.pending_timer_fires.is_empty()); + match state.objects.get(&handle) { + Some(KernelObject::Timer { deadline, .. }) => assert!(deadline.is_none()), + _ => panic!("Timer gone after cancel — must stay in table"), + } + } + + #[test] + fn nt_cancel_timer_invalid_handle_returns_status() { + let (mut ctx, mut mem, mut state) = fresh(); + ctx.gpr[3] = 0xDEAD_BEEF; + ctx.gpr[4] = 0; + nt_cancel_timer(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_INVALID_HANDLE); + } + + #[test] + fn timer_fire_wakes_auto_reset_waiter_and_consumes_signal() { + let (mut ctx, mut mem, mut state) = fresh(); + // Arm an auto-reset timer with deadline slightly in the future. + let handle_ptr = SCRATCH_BASE + 0x20; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[5] = 1; + nt_create_timer(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_ptr); + // Deadline: now + 1000 ticks. Directly set the state to avoid + // dependence on parse_timeout's divisor. + let now = state.scheduler.ctx(0).timebase; + let deadline = now + 1000; + match state.objects.get_mut(&handle) { + Some(KernelObject::Timer { + deadline: obj_d, .. + }) => *obj_d = Some(deadline), + _ => panic!("no timer"), + } + state.arm_timer(handle, deadline); + + // Park the current (initial) thread on a WaitForSingleObject of the + // timer handle. `do_wait_single` sees signaled=false, enqueues the + // current ref, and parks via `park_current`. + ctx.gpr[3] = handle as u64; + ctx.gpr[6] = 0; // NULL timeout → wait forever + nt_wait_for_single_object_ex(&mut ctx, &mut mem, &mut state); + let initial_ref = state.scheduler.current_ref(); + match state.scheduler.thread(initial_ref).state { + xenia_cpu::scheduler::HwState::Blocked(_) => {} + ref other => panic!("expected Blocked after wait, got {:?}", other), + } + + // Advance time past the deadline; fire_due_timers should signal and + // wake the waiter. + state.scheduler.advance_all_timebases_to(deadline); + let fired = state.fire_due_timers(); + assert!(fired); + + // After fire on auto-reset: signaled cleared via handle_consume, no + // pending entry, waiter promoted to Ready. + match state.objects.get(&handle) { + Some(KernelObject::Timer { + signaled, waiters, .. + }) => { + assert!(!*signaled, "auto-reset consumed on single-waiter wake"); + assert!(waiters.is_empty(), "waiter dequeued by wake_eligible_waiters"); + } + _ => panic!("timer lost"), + } + assert!(state.pending_timer_fires.is_empty()); + match state.scheduler.thread(initial_ref).state { + xenia_cpu::scheduler::HwState::Ready => {} + ref other => panic!("expected Ready after fire, got {:?}", other), + } + } + + #[test] + fn timer_fire_manual_reset_wakes_all_and_stays_signaled() { + let (mut ctx, mut mem, mut state) = fresh(); + // Manual-reset timer. + let handle_ptr = SCRATCH_BASE + 0x20; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[5] = 0; // NotificationTimer (manual-reset) + nt_create_timer(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_ptr); + + let now = state.scheduler.ctx(0).timebase; + let deadline = now + 1000; + match state.objects.get_mut(&handle) { + Some(KernelObject::Timer { + deadline: obj_d, .. + }) => *obj_d = Some(deadline), + _ => unreachable!(), + } + state.arm_timer(handle, deadline); + + // Park two synthetic waiters (out-of-bounds refs — `wake_ref` + // silently no-ops on them; we only care about the drain-all + // semantics of manual-reset.) + match state.objects.get_mut(&handle) { + Some(KernelObject::Timer { waiters, .. }) => { + waiters.push(ThreadRef { hw_id: 2, idx: 0, generation: 0 }); + waiters.push(ThreadRef { hw_id: 3, idx: 0, generation: 0 }); + } + _ => unreachable!(), + } + + state.scheduler.advance_all_timebases_to(deadline); + assert!(state.fire_due_timers()); + match state.objects.get(&handle) { + Some(KernelObject::Timer { + signaled, waiters, .. + }) => { + assert!(*signaled, "manual-reset stays signaled after fire"); + assert!(waiters.is_empty(), "manual-reset drains all waiters"); + } + _ => unreachable!(), + } + } + + #[test] + fn periodic_timer_rearms_after_fire() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle_ptr = SCRATCH_BASE + 0x20; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[5] = 1; + nt_create_timer(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_ptr); + + let now = state.scheduler.ctx(0).timebase; + let deadline = now + 1000; + let period_ticks = 500; + match state.objects.get_mut(&handle) { + Some(KernelObject::Timer { + deadline: obj_d, + period_ticks: obj_p, + .. + }) => { + *obj_d = Some(deadline); + *obj_p = period_ticks; + } + _ => unreachable!(), + } + state.arm_timer(handle, deadline); + + state.scheduler.advance_all_timebases_to(deadline); + assert!(state.fire_due_timers()); + + // After fire, a new entry must sit at deadline + period_ticks. + assert_eq!(state.pending_timer_fires.len(), 1); + let (new_deadline, h) = state.pending_timer_fires[0]; + assert_eq!(h, handle); + assert_eq!(new_deadline, deadline + period_ticks); + match state.objects.get(&handle) { + Some(KernelObject::Timer { deadline: obj_d, .. }) => { + assert_eq!(*obj_d, Some(new_deadline)); + } + _ => unreachable!(), + } + } + + #[test] + fn nt_close_scrubs_pending_timer_fires() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle_ptr = SCRATCH_BASE + 0x20; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[5] = 1; + nt_create_timer(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_ptr); + // Arm. + let due_time_ptr = SCRATCH_BASE + 0x40; + write_large_integer(&mut mem, due_time_ptr, -1_000_000); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = due_time_ptr as u64; + ctx.gpr[5] = 0; + ctx.gpr[6] = 1; + ctx.gpr[7] = 0; + ctx.gpr[8] = 0; + ctx.gpr[9] = 0; + nt_set_timer_ex(&mut ctx, &mut mem, &mut state); + assert_eq!(state.pending_timer_fires.len(), 1); + // Close. + ctx.gpr[3] = handle as u64; + nt_close(&mut ctx, &mut mem, &mut state); + assert!( + state.pending_timer_fires.is_empty(), + "nt_close must scrub pending timer entry" + ); + assert!(!state.objects.contains_key(&handle)); + } + + #[test] + fn advance_to_next_wake_returns_ref_and_reason_for_timeout_path() { + let (mut ctx, mut mem, mut state) = fresh(); + // Create an event (unsignaled), park current thread on it with a + // finite deadline via NtWaitForSingleObjectEx. + let ev = state.alloc_handle_for(KernelObject::Event { + manual_reset: false, + signaled: false, + waiters: Vec::new(), + }); + let timeout_ptr = SCRATCH_BASE + 0x80; + write_large_integer(&mut mem, timeout_ptr, -1_000_000); + ctx.gpr[3] = ev as u64; + ctx.gpr[6] = timeout_ptr as u64; + nt_wait_for_single_object_ex(&mut ctx, &mut mem, &mut state); + let initial_ref = state.scheduler.current_ref(); + + // Current thread must be parked with that handle in its waiter list. + match state.objects.get(&ev) { + Some(KernelObject::Event { waiters, .. }) => { + assert!(waiters.contains(&initial_ref), "waiter enqueued"); + } + _ => unreachable!(), + } + + // Advance past the deadline. `advance_to_next_wake` returns the + // woken ref + its block reason; the main loop would then stamp + // STATUS_TIMEOUT and scrub waiter lists via `handle_timeout_wake`. + let (r, reason) = state + .scheduler + .advance_to_next_wake() + .expect("deadline exists"); + assert_eq!(r, initial_ref); + state.handle_timeout_wake(r, reason); + + // Post-wake: gpr[3] == STATUS_TIMEOUT (0x102) AND the waiter list + // scrubbed. Prior code returned 0 and left the waiter stranded. + assert_eq!(state.scheduler.ctx_mut_ref(r).gpr[3], 0x0000_0102); + match state.objects.get(&ev) { + Some(KernelObject::Event { waiters, .. }) => { + assert!( + !waiters.contains(&initial_ref), + "waiter scrubbed from handle list on timeout" + ); + } + _ => unreachable!(), + } + } + + /// Ordinal 0xFB must resolve to `NtSignalAndWaitForSingleObjectEx` + /// (canary's table) — the former `NtSetInformationThread` + /// registration collided and was removed. + #[test] + fn ordinal_0xfb_maps_to_nt_signal_and_wait() { + let state = KernelState::new(); + let name = state + .export_name(crate::state::ModuleId::Xboxkrnl, 0xFB) + .expect("0xFB must be registered"); + assert_eq!(name, "NtSignalAndWaitForSingleObjectEx"); + } + + /// `KeInitializeSemaphore` must seed the count and limit fields in + /// guest memory so that `ensure_dispatcher_object` later mints the + /// kernel-side shadow with the caller's parameters — not the + /// zero-fill default of `count=0, max=1`. + #[test] + fn ke_initialize_semaphore_seeds_count_and_limit() { + let (mut ctx, mem, mut state) = fresh(); + let sem_ptr = SCRATCH_BASE + 0x500; + ctx.gpr[3] = sem_ptr as u64; + ctx.gpr[4] = 3; + ctx.gpr[5] = 7; + ke_initialize_semaphore(&mut ctx, &mem, &mut state); + assert_eq!(mem.read_u8(sem_ptr), 5, "type=5 (semaphore)"); + assert_eq!(mem.read_u32(sem_ptr + 0x04), 3, "signal_state=count"); + assert_eq!(mem.read_u32(sem_ptr + 0x10), 7, "limit"); + + // Round-trip: KeReleaseSemaphore mints the shadow via + // `ensure_dispatcher_object`, which reads the fields we just wrote. + ctx.gpr[3] = sem_ptr as u64; + ctx.gpr[4] = 1; + ke_release_semaphore(&mut ctx, &mem, &mut state); + match state.objects.get(&sem_ptr) { + Some(KernelObject::Semaphore { count, max, .. }) => { + assert_eq!(*count, 4, "3 + 1 = 4"); + assert_eq!(*max, 7, "limit must propagate from r5, not default to 1"); + } + other => panic!("expected Semaphore shadow, got {:?}", other), + } + assert_eq!(ctx.gpr[3], 3, "previous count must be 3 (post-init, pre-release)"); + } + + /// `XexGetProcedureAddress` must honor r3=hmodule, look up the + /// (module, ordinal) in the thunk reverse-map, and write the address + /// to *r5. Three branches: success, unknown ordinal, unknown hmodule. + #[test] + fn xex_get_procedure_address_resolves_registered_thunk() { + let (mut ctx, mem, mut state) = fresh(); + state.register_thunk(crate::state::ModuleId::Xboxkrnl, 0x12, 0x8200_1234); + let out_ptr = SCRATCH_BASE + 0x600; + + // Success path. + mem.write_u32(out_ptr, 0xDEAD_BEEF); + ctx.gpr[3] = crate::state::HMODULE_XBOXKRNL as u64; + ctx.gpr[4] = 0x12; + ctx.gpr[5] = out_ptr as u64; + xex_get_procedure_address(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "STATUS_SUCCESS"); + assert_eq!(mem.read_u32(out_ptr), 0x8200_1234, "thunk address written"); + + // Unknown ordinal: STATUS_OBJECT_NAME_NOT_FOUND, *out cleared. + // Reset r3 because the prior call overwrote it with the status code. + mem.write_u32(out_ptr, 0xDEAD_BEEF); + ctx.gpr[3] = crate::state::HMODULE_XBOXKRNL as u64; + ctx.gpr[4] = 0x99; + xex_get_procedure_address(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0xC000_0034); + assert_eq!(mem.read_u32(out_ptr), 0); + + // Unknown hmodule: STATUS_INVALID_HANDLE. + ctx.gpr[3] = 0xCAFE_BABE; + ctx.gpr[4] = 0x12; + xex_get_procedure_address(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0xC000_0008); + } + + /// `XexGetModuleHandle` must return distinct pseudo-handles for the + /// main image, xboxkrnl.exe, and xam.xex; write the handle to *r4 + /// (not r3); and return NTSTATUS in r3 (`X_ERROR_NOT_FOUND` for + /// unknown names). + #[test] + fn xex_get_module_handle_distinguishes_modules() { + let (mut ctx, mem, mut state) = fresh(); + state.image_base = 0x8200_0000; + let out_ptr = SCRATCH_BASE + 0x700; + let scratch_str = SCRATCH_BASE + 0x780; + + let mut call = |name: Option<&str>, + st: &mut KernelState, + mem: &GuestMemory, + ctx: &mut PpcContext| + -> (u64, u32) { + match name { + Some(s) => { + for (i, b) in s.as_bytes().iter().enumerate() { + mem.write_u8(scratch_str + i as u32, *b); + } + mem.write_u8(scratch_str + s.len() as u32, 0); + ctx.gpr[3] = scratch_str as u64; + } + None => ctx.gpr[3] = 0, + } + ctx.gpr[4] = out_ptr as u64; + mem.write_u32(out_ptr, 0xDEAD_BEEF); + xex_get_module_handle(ctx, mem, st); + (ctx.gpr[3], mem.read_u32(out_ptr)) + }; + + let (s_main, h_main) = call(Some(""), &mut state, &mem, &mut ctx); + let (s_krnl, h_krnl) = call(Some("xboxkrnl.exe"), &mut state, &mem, &mut ctx); + let (s_xam, h_xam) = call(Some("xam.xex"), &mut state, &mem, &mut ctx); + let (s_bad, h_bad) = call(Some("nope.xex"), &mut state, &mem, &mut ctx); + + assert_eq!(s_main, 0); + assert_eq!(h_main, 0x8200_0000); + assert_eq!(s_krnl, 0); + assert_eq!(h_krnl, crate::state::HMODULE_XBOXKRNL); + assert_eq!(s_xam, 0); + assert_eq!(h_xam, crate::state::HMODULE_XAM); + assert_eq!(s_bad, 0x0000_048B); + assert_eq!(h_bad, 0, "out cleared on miss"); + assert_ne!(h_main, h_krnl, "main module distinct from xboxkrnl"); + assert_ne!(h_krnl, h_xam, "xboxkrnl distinct from xam"); + } +} diff --git a/crates/xenia-kernel/src/interrupts.rs b/crates/xenia-kernel/src/interrupts.rs new file mode 100644 index 0000000..f7d9477 --- /dev/null +++ b/crates/xenia-kernel/src/interrupts.rs @@ -0,0 +1,424 @@ +//! Graphics interrupt + synthetic v-sync bookkeeping (P6). +//! +//! The Xbox 360 graphics driver calls `VdSetGraphicsInterruptCallback` to +//! register a single per-process callback that the OS invokes on: +//! +//! 1. **V-sync** — at 60 Hz; source code 0 (`INTERRUPT_SOURCE_VSYNC`). +//! 2. **Command-processor interrupt** — when `PM4_INTERRUPT` fires from the +//! guest-issued command stream; source code 1 (`INTERRUPT_SOURCE_CP`). +//! +//! Canary's [xboxkrnl_video.cc:303-310](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc#L303-L310) +//! dispatches the callback on HW thread 0. We follow the same convention. +//! +//! The delivery model is cooperative: we inject the callback entry into HW +//! thread 0 at the top of a scheduler round when it's safe (not mid-export, +//! not already inside another interrupt). When the callback returns to +//! [`LR_HALT_SENTINEL`] the main loop restores the saved [`PpcContext`] +//! fields and the HW thread picks up where it left off. + +use std::collections::VecDeque; + +use xenia_cpu::context::{CrField, PpcContext}; +use xenia_cpu::ThreadRef; + +pub const INTERRUPT_SOURCE_VSYNC: u32 = 0; +pub const INTERRUPT_SOURCE_CP: u32 = 1; + +/// Guest-registered V-sync / graphics-interrupt callback (from +/// `VdSetGraphicsInterruptCallback`). +#[derive(Debug, Clone, Copy)] +pub struct GraphicsInterruptCallback { + pub callback_pc: u32, + pub user_data: u32, +} + +/// Snapshot of the fields we mutate when diverting a HW thread into an +/// interrupt callback. Restored when the callback returns to +/// `LR_HALT_SENTINEL`. +/// +/// We save **all PPC volatile registers** (r0, r2–r12) plus `r1` (SP), +/// `pc`, `lr`, `ctr`, and `cr`. Non-volatile regs (r13–r31) are preserved +/// by the callback's own `__savegprlr_N` prologue/epilogue per the PPC +/// ELF ABI, so they don't need stashing here. +/// +/// **SP (`gpr[1]`) is included because the injector decrements it by +/// [`CALLBACK_STACK_PAD`] before the callback runs** — see that constant's +/// docs for why. Without this, the callback's `__savegprlr_N` prologue +/// overwrites the interrupted function's own stack-saved LR (which lives +/// at `[r1 - 8]`), and when the interrupted function later tries to +/// return, `bclr` jumps to `LR_HALT_SENTINEL` and the thread exits +/// prematurely. +#[derive(Debug, Clone, Copy)] +pub struct SavedCallbackCtx { + pub pc: u32, + pub lr: u64, + pub ctr: u64, + /// All PPC volatile GPRs (r0, r2–r12) plus r1 (SP) in index order. + /// Index 0 = r0, 1 = r1, 2 = r2, …, 12 = r12. Index 13..32 unused. + pub gprs: [u64; 13], + pub cr: [CrField; 8], + pub source: u32, +} + +/// Bytes the injector reserves below the interrupted thread's SP before +/// running the ISR callback. Matches Canary's +/// [`Processor::Execute`](../../../../xenia-canary/src/xenia/cpu/processor.cc#L383) +/// which decrements `r[1]` by `64 + 112 = 176` before +/// `function->Call(...)` and restores afterwards. The pad must be larger +/// than any plausible sum of `__savegprlr_N`'s save-area (up to 64 B for +/// r25-r31 + 8 B for LR) plus the callback's own `stwu r1,-N(r1)` frame +/// (the Sylpheed vsync ISR uses 128 B). +/// +/// Pre-fix: the ISR's `__savegprlr_25` stored the callback's saved LR +/// (= `LR_HALT_SENTINEL`, from injection) at `[r1 - 8]` — exactly where +/// the interrupted thread's current `bl`-saved LR lived. The +/// interrupted function's return site got stomped with `SENTINEL`, so +/// `__restgprlr_N -> bclr` jumped to the halt sentinel and the thread +/// exited through the wrong path. Manifested in Sylpheed as tid=5 +/// (producer for the render queue) terminating at cycle 7.5M, starving +/// both `0x10fc` (main's completion wait) and the PKEVENT that tid=6 +/// polls — no second `VdSwap`, no first pixel. +pub const CALLBACK_STACK_PAD: u32 = 64 + 112; + +impl SavedCallbackCtx { + pub fn capture(ctx: &PpcContext, source: u32) -> Self { + let mut gprs = [0u64; 13]; + for i in 0..13 { + gprs[i] = ctx.gpr[i]; + } + Self { + pc: ctx.pc, + lr: ctx.lr, + ctr: ctx.ctr, + gprs, + cr: ctx.cr, + source, + } + } + + pub fn restore(self, ctx: &mut PpcContext) { + ctx.pc = self.pc; + ctx.lr = self.lr; + ctx.ctr = self.ctr; + for i in 0..13 { + ctx.gpr[i] = self.gprs[i]; + } + ctx.cr = self.cr; + } +} + +/// Maximum pending sources held in the FIFO queue before new ones are +/// dropped. Four is enough to absorb a short burst (a few v-syncs arriving +/// while HW 0 is mid-callback from a prior one) without letting runaway +/// delivery swamp the guest. +pub const INTERRUPT_QUEUE_CAP: usize = 4; + +/// All interrupt bookkeeping — single field on `KernelState`. +/// +/// **First-Pixels M2 (2026-04-20)** — changed from a single-slot +/// `pending_source: Option` coalesce to a bounded FIFO so bursts +/// don't drop silently, and dropped `VSYNC_INSTR_PERIOD` from 500k to +/// 150k so cadence approximates 60 Hz at the current ~10 MIPS interpreter +/// throughput. Combined with the `HwState::ServicingIrq` variant added to +/// `xenia-cpu::scheduler`, interrupts can now be delivered even when HW 0 +/// is `Blocked(WaitAny)` — the injector stashes the block into the new +/// variant and the restore path re-blocks when the callback returns, +/// unless a `wake()` during the callback resolved the wait. +/// M2.5 — per-slot pending-IRQ bitmask. Each `AtomicU8` holds one bit per +/// interrupt source (currently 2 sources: VSYNC=bit 0, CP=bit 1) destined +/// for that specific HW slot. Used by the M3 parallel path: T_main (or +/// the GPU thread) sets a bit Release on the target slot's atomic; the +/// target T_cpu_i checks the bit Acquire at its quantum boundary and +/// self-injects without taking another thread's slot lock. +/// +/// The 6-element fixed-size array mirrors `xenia_cpu::scheduler::HW_THREAD_COUNT`. +pub type PendingLocalIrq = [std::sync::atomic::AtomicU8; + xenia_cpu::scheduler::HW_THREAD_COUNT]; + +#[derive(Debug, Default)] +pub struct InterruptState { + /// Registered callback (set by `VdSetGraphicsInterruptCallback`). + pub callback: Option, + /// Bounded FIFO of pending interrupt sources awaiting injection. + /// Push-back on queue, pop-front on inject. Over-cap pushes drop. + pub pending: VecDeque, + /// When `Some`, some HW thread is currently running a callback; on + /// return-to-sentinel we restore this and clear the flag. + pub saved: Option, + /// Which guest thread the current callback was injected into. + /// Required because we no longer anchor delivery to HW 0 — any + /// non-Exited thread is a valid target. Meaningful only while + /// `saved.is_some()`. Stored as a `ThreadRef` so per-slot + /// runqueues don't get ambiguous addressing. + pub injected_ref: Option, + /// Monotonic count of delivered interrupts. + pub delivered: u64, + /// Dropped interrupts (callback unset, queue full, or thread + /// exited/idle at inject time). + pub dropped: u64, + /// Instruction-count accumulator for the synthetic v-sync ticker. At + /// `VSYNC_INSTR_PERIOD` the main loop pushes an `INTERRUPT_SOURCE_VSYNC` + /// onto `pending` and resets. + pub vsync_accumulator: u64, + /// Last observed instruction count — `tick_vsync` diffs against + /// this to advance `vsync_accumulator`. + pub last_instr_count: u64, + /// M2.5 — per-slot pending-IRQ bits. Set by the producer (M3's + /// IRQ-routing logic on `T_main`) with `Release`; consumed by the + /// target T_cpu_i with `Acquire` at quantum boundary. Unused under + /// the lockstep path (M2's single-host-thread model still uses + /// `pending` + `try_inject_graphics_interrupt`); the field is wired + /// here so M3's per-HW-thread path is a flag flip, not a refactor. + pub pending_local_irq: PendingLocalIrq, +} + +/// How many guest instructions correspond to one synthetic v-sync. +/// +/// Targets **~60 Hz at the post-Tier-3 interpreter throughput (~10 MIPS)**: +/// 10e6 instr/s ÷ 60 Hz ≈ 167k — we use 150k to give a small cushion. +/// Before M2 this was 500k (~20 Hz), which was enough for games that +/// don't gate anything on v-sync but not enough for titles like Sylpheed +/// whose main loop waits on the v-sync callback to signal an event every +/// frame. +pub const VSYNC_INSTR_PERIOD: u64 = 150_000; + +impl InterruptState { + /// Record a new callback registration. + pub fn set_callback(&mut self, callback_pc: u32, user_data: u32) { + self.callback = Some(GraphicsInterruptCallback { + callback_pc, + user_data, + }); + } + + /// Queue an interrupt for the next safe injection point. + pub fn queue_interrupt(&mut self, source: u32) { + if self.callback.is_none() { + self.dropped += 1; + return; + } + if self.pending.len() >= INTERRUPT_QUEUE_CAP { + self.dropped += 1; + return; + } + self.pending.push_back(source); + } + + /// Peek at the next pending source without removing it. + pub fn peek_next(&self) -> Option { + self.pending.front().copied() + } + + /// Pop the next pending source (called by the injector after it has + /// committed to dispatching it). + pub fn take_next(&mut self) -> Option { + self.pending.pop_front() + } + + /// Advance the v-sync accumulator by the delta since the last call. + /// Returns `true` if a new v-sync was queued. + pub fn tick_vsync(&mut self, current_instr_count: u64) -> bool { + let delta = current_instr_count.saturating_sub(self.last_instr_count); + self.last_instr_count = current_instr_count; + self.vsync_accumulator = self.vsync_accumulator.saturating_add(delta); + if self.vsync_accumulator < VSYNC_INSTR_PERIOD { + return false; + } + // Multiple periods may have elapsed in a single tick call if a + // large instruction delta went by (e.g. a long export). Drain + // the accumulator fully so we don't lag behind. + let periods = self.vsync_accumulator / VSYNC_INSTR_PERIOD; + self.vsync_accumulator %= VSYNC_INSTR_PERIOD; + for _ in 0..periods { + self.queue_interrupt(INTERRUPT_SOURCE_VSYNC); + } + true + } + + /// Is HW thread 0 currently in a callback? + pub fn is_in_callback(&self) -> bool { + self.saved.is_some() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn queue_interrupt_drops_without_callback() { + let mut s = InterruptState::default(); + s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); + assert_eq!(s.dropped, 1); + assert!(s.pending.is_empty()); + } + + #[test] + fn queue_interrupt_fifo_preserves_order() { + let mut s = InterruptState::default(); + s.set_callback(0x1000, 0xAB); + s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); + s.queue_interrupt(INTERRUPT_SOURCE_CP); + s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); + assert_eq!(s.dropped, 0); + // FIFO: take_next hands them out in push order. + assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC)); + assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_CP)); + assert_eq!(s.take_next(), Some(INTERRUPT_SOURCE_VSYNC)); + assert_eq!(s.take_next(), None); + } + + #[test] + fn queue_interrupt_caps_at_queue_size() { + let mut s = InterruptState::default(); + s.set_callback(0x1000, 0xAB); + for _ in 0..INTERRUPT_QUEUE_CAP { + s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); + } + // Over-cap: drops rather than evicting the oldest. + s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); + s.queue_interrupt(INTERRUPT_SOURCE_VSYNC); + assert_eq!(s.dropped, 2); + assert_eq!(s.pending.len(), INTERRUPT_QUEUE_CAP); + } + + #[test] + fn tick_vsync_fires_at_new_150k_threshold() { + let mut s = InterruptState::default(); + s.set_callback(0x1000, 0xAB); + assert_eq!(VSYNC_INSTR_PERIOD, 150_000); + assert!(!s.tick_vsync(VSYNC_INSTR_PERIOD - 1)); + assert!(s.pending.is_empty()); + assert!(s.tick_vsync(VSYNC_INSTR_PERIOD)); + assert_eq!(s.peek_next(), Some(INTERRUPT_SOURCE_VSYNC)); + } + + #[test] + fn tick_vsync_drains_multiple_periods_in_one_call() { + // Long kernel export → big instr delta → multiple v-syncs must + // be delivered, not lost. + let mut s = InterruptState::default(); + s.set_callback(0x1000, 0xAB); + assert!(s.tick_vsync(VSYNC_INSTR_PERIOD * 3 + 10)); + assert_eq!(s.pending.len(), 3); + } + + /// Simulates what the main loop does: inject, execute guest code up + /// to the sentinel, restore. Uses a single-instruction `bclr` callback + /// — the interpreter sees `pc == callback_pc`, steps, and the blr + /// instruction writes `lr` into `pc`, which equals `LR_HALT_SENTINEL` + /// → main loop detects and triggers restore. + #[test] + fn inject_restore_roundtrip_smoke() { + let mut ctx = PpcContext::new(); + ctx.pc = 0x1000_0000; + ctx.lr = 0xCAFE_BABE; + ctx.gpr[3] = 0x1234; + ctx.gpr[4] = 0x5678; + + let mut s = InterruptState::default(); + s.set_callback(0x2000_0000, 0xDEAD); + + // Simulate main loop inject: save ctx fields, divert pc/lr/r3/r4. + let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC); + s.saved = Some(saved); + ctx.pc = 0x2000_0000; + ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL; + ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64; + ctx.gpr[4] = 0xDEAD; + assert!(s.is_in_callback()); + + // Guest callback "runs" to the sentinel — simulate by writing + // pc = lr (what `blr` would do). + ctx.pc = ctx.lr as u32; + + // Main loop detects pc == LR_HALT_SENTINEL while in_callback: + let saved = s.saved.take().unwrap(); + saved.restore(&mut ctx); + s.delivered += 1; + + assert_eq!(ctx.pc, 0x1000_0000); + assert_eq!(ctx.lr, 0xCAFE_BABE); + assert_eq!(ctx.gpr[3], 0x1234); + assert_eq!(ctx.gpr[4], 0x5678); + assert!(!s.is_in_callback()); + assert_eq!(s.delivered, 1); + } + + #[test] + fn saved_ctx_roundtrip() { + let mut ctx = PpcContext::new(); + ctx.pc = 0x11223344; + ctx.lr = 0xDEADBEEF; + ctx.gpr[3] = 0xAAAA; + ctx.gpr[4] = 0xBBBB; + let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC); + ctx.pc = 0; + ctx.lr = 0; + ctx.gpr[3] = 0; + ctx.gpr[4] = 0; + saved.restore(&mut ctx); + assert_eq!(ctx.pc, 0x11223344); + assert_eq!(ctx.lr, 0xDEADBEEF); + assert_eq!(ctx.gpr[3], 0xAAAA); + assert_eq!(ctx.gpr[4], 0xBBBB); + } + + /// Full volatile-GPR + SP roundtrip. Regression test for the + /// 2026-04-24 IRQ-injection fix: the ISR callback's prologue clobbers + /// `[r1 - 8]` on the interrupted thread's stack unless the injector + /// pre-decrements SP by [`CALLBACK_STACK_PAD`] and the saved ctx puts + /// SP (and the rest of the PPC volatile set) back on return. + #[test] + fn saved_ctx_covers_sp_and_all_volatile_gprs() { + let mut ctx = PpcContext::new(); + ctx.pc = 0xAAAA_BBBB; + ctx.lr = 0x1111_2222; + ctx.ctr = 0x3333_4444; + for i in 0..13 { + ctx.gpr[i] = 0x1000 + i as u64; + } + // r13..r31 are non-volatile and should survive the callback's own + // save/restore — the saved ctx deliberately does NOT cover them. + for i in 13..32 { + ctx.gpr[i] = 0xDEAD_0000 + i as u64; + } + + let saved = SavedCallbackCtx::capture(&ctx, INTERRUPT_SOURCE_VSYNC); + + // Simulate injector: flip pc/lr/r1/r3/r4 (what the real injector + // actually does — see try_inject_graphics_interrupt in main.rs). + ctx.pc = 0xCAFE; + ctx.lr = xenia_cpu::context::LR_HALT_SENTINEL; + ctx.gpr[1] = ctx.gpr[1].wrapping_sub(CALLBACK_STACK_PAD as u64); + ctx.gpr[3] = INTERRUPT_SOURCE_VSYNC as u64; + ctx.gpr[4] = 0xBEEF; + // Simulate callback clobbering a few volatile regs that aren't + // part of the "obviously diverted" set. + ctx.gpr[0] = 0xFEED_FACE; + ctx.gpr[7] = 0x9999; + ctx.gpr[12] = 0xABCD; + + saved.restore(&mut ctx); + + // All volatile GPRs restored to pre-injection. + for i in 0..13 { + assert_eq!( + ctx.gpr[i], + 0x1000 + i as u64, + "volatile r{} clobbered by callback was not restored", + i + ); + } + // SP specifically back to the pre-pad value. + assert_eq!(ctx.gpr[1], 0x1001, "SP must be restored to pre-injection"); + // Non-volatile regs were never captured; they stay as the callback + // left them (here, untouched because we didn't modify 13..32). + for i in 13..32 { + assert_eq!(ctx.gpr[i], 0xDEAD_0000 + i as u64); + } + assert_eq!(ctx.pc, 0xAAAA_BBBB); + assert_eq!(ctx.lr, 0x1111_2222); + assert_eq!(ctx.ctr, 0x3333_4444); + } +} diff --git a/crates/xenia-kernel/src/lib.rs b/crates/xenia-kernel/src/lib.rs index 457b815..9cdceb9 100644 --- a/crates/xenia-kernel/src/lib.rs +++ b/crates/xenia-kernel/src/lib.rs @@ -1,6 +1,17 @@ +pub mod audit; pub mod exports; +pub mod interrupts; pub mod objects; +pub mod path; pub mod state; +pub mod thread; +pub mod ui_bridge; pub mod xam; +pub use interrupts::{ + GraphicsInterruptCallback, InterruptState, SavedCallbackCtx, INTERRUPT_SOURCE_CP, + INTERRUPT_SOURCE_VSYNC, VSYNC_INSTR_PERIOD, +}; pub use state::{KernelState, ModuleId}; +pub use thread::{allocate_thread_image, ThreadImage}; +pub use ui_bridge::{SwapInfo, UiBridge}; diff --git a/crates/xenia-kernel/src/objects.rs b/crates/xenia-kernel/src/objects.rs index 117399b..434ba34 100644 --- a/crates/xenia-kernel/src/objects.rs +++ b/crates/xenia-kernel/src/objects.rs @@ -1,12 +1,94 @@ //! Kernel object tracking for HLE. +use std::sync::Arc; + +use xenia_cpu::ThreadRef; + /// Kernel object types tracked by handle. +/// +/// Sync variants (`Event`, `Semaphore`, `Mutex`, `Thread`) carry an in-place +/// waiter list so wait/set/release sites keep invariants local — dropping the +/// object implicitly drops its waiters. Waiters are stored as `ThreadRef` +/// (post-Axis-1) — a bare `hw_id: u8` would have been ambiguous under per-slot +/// runqueues where multiple guest threads share one HW slot. #[derive(Debug)] pub enum KernelObject { - Event { manual_reset: bool, signaled: bool }, - Semaphore { count: i32, max: i32 }, - File { path: String }, - Thread { id: u32 }, - Timer, - Mutex, + Event { + manual_reset: bool, + signaled: bool, + /// Guest threads parked on this event. + waiters: Vec, + }, + Semaphore { + count: i32, + max: i32, + waiters: Vec, + }, + File { + /// Normalized VFS path (e.g. "default.xex", "media/shared/foo.pkg"). + path: String, + /// Full file size in bytes. + size: u64, + /// Current read/write cursor. + position: u64, + /// Whole-file buffer — VFS reads the entire file up front so + /// subsequent NtReadFile calls are O(1) slice copies. + /// `Arc>` so duplicate handles could share backing storage. + data: Arc>, + /// Directory-enumeration cursor consumed by `NtQueryDirectoryFile`. + /// `None` before the first call; `Some(N)` = next VFS entry index + /// to emit. Reset to `Some(0)` when the guest passes + /// `restart_scan=1`. Unused on non-directory files. + dir_enum_pos: Option, + }, + Thread { + id: u32, + /// HW thread slot currently running this guest thread (None once exited + /// — `exit_code` becomes Some). + hw_id: Option, + /// None while the thread is running; populated on ExTerminateThread + /// or halt-sentinel return. + exit_code: Option, + /// Guest threads parked in KeWaitForSingleObject on this thread handle. + waiters: Vec, + }, + Timer { + /// Xbox 360 timer_type 0 = NotificationTimer (manual-reset), + /// 1 = SynchronizationTimer (auto-reset). Same shape as Event. + manual_reset: bool, + signaled: bool, + /// Absolute tick-space deadline; None when disarmed. + deadline: Option, + /// Period in ticks (same units as `deadline`); 0 = one-shot. + period_ticks: u64, + /// Original ms value (canary's SetTimer keeps it for diagnostics). + period_ms: u32, + /// APC routine (deferred — see `timer_apc` warn in nt_set_timer_ex). + callback_routine: u32, + callback_arg: u32, + waiters: Vec, + }, + Mutex { + /// HW thread id currently holding the mutex; None when free. + owner: Option, + recursion: u32, + waiters: Vec, + }, +} + +impl KernelObject { + /// Returns the per-object waiter list for the 5 sync variants (Event, + /// Semaphore, Thread, Timer, Mutex) and `None` for `File`. Used by + /// deadline-expiry scrub in `KernelState::handle_timeout_wake` so a + /// timed-out waiter isn't left stranded in a handle's waiters list. + pub fn waiters_mut(&mut self) -> Option<&mut Vec> { + match self { + KernelObject::Event { waiters, .. } + | KernelObject::Semaphore { waiters, .. } + | KernelObject::Thread { waiters, .. } + | KernelObject::Timer { waiters, .. } + | KernelObject::Mutex { waiters, .. } => Some(waiters), + KernelObject::File { .. } => None, + } + } } diff --git a/crates/xenia-kernel/src/path.rs b/crates/xenia-kernel/src/path.rs new file mode 100644 index 0000000..edb7e7d --- /dev/null +++ b/crates/xenia-kernel/src/path.rs @@ -0,0 +1,139 @@ +//! Path normalization for kernel file I/O. +//! +//! Guests pass file paths inside an `OBJECT_ATTRIBUTES` struct that points at +//! an `ANSI_STRING` descriptor. Those paths come in several Xbox-flavored +//! forms — NT device paths (`\Device\Cdrom0\...`), drive letters (`D:\...`, +//! `d:\...`), or symbolic link prefixes (`game:\...`). We strip whichever +//! prefix applies and return a plain slash-separated path relative to the +//! mounted VFS root, so `VfsDevice::read_file` can look it up directly. + +use xenia_memory::{GuestMemory, MemoryAccess}; + +/// Xbox `ANSI_STRING`: +/// u16 Length +/// u16 MaximumLength +/// u32 Buffer (guest pointer) +fn read_ansi_string(mem: &GuestMemory, ptr: u32) -> Option { + if ptr == 0 { + return None; + } + let length = mem.read_u16(ptr) as u32; + let buffer = mem.read_u32(ptr + 4); + if buffer == 0 || length == 0 { + return Some(String::new()); + } + let mut out = String::with_capacity(length as usize); + for i in 0..length { + let c = mem.read_u8(buffer + i); + if c == 0 { + break; + } + out.push(c as char); + } + Some(out) +} + +/// Xbox `OBJECT_ATTRIBUTES`: +/// u32 RootDirectory (handle) +/// u32 Name (pointer to ANSI_STRING) +/// u32 Attributes +fn read_object_attributes_name(mem: &GuestMemory, obj_attrs_ptr: u32) -> Option { + if obj_attrs_ptr == 0 { + return None; + } + let name_ptr = mem.read_u32(obj_attrs_ptr + 4); + read_ansi_string(mem, name_ptr) +} + +/// Known Xbox device prefixes that need to be stripped before looking a path +/// up in the VFS. The list mirrors the symbolic links xenia-canary sets up +/// at boot (see `xboxkrnl_io.cc`). Case-insensitive matching. +const DEVICE_PREFIXES: &[&str] = &[ + "\\Device\\Cdrom0\\", + "\\Device\\Harddisk0\\Partition1\\", + "\\Device\\Harddisk0\\Partition0\\", + "\\Device\\Harddisk0\\", + "\\Device\\Mu0\\", + "\\Device\\Mu1\\", + "\\Device\\Mass0\\", + "\\Device\\Mass1\\", + "\\Device\\Mass2\\", + "\\SystemRoot\\", + "\\??\\", + "game:\\", + "d:\\", + "D:\\", +]; + +/// Strip any Xbox device prefix and normalize backslashes to forward slashes. +/// Returns the path relative to the VFS root. +pub fn normalize_path(raw: &str) -> String { + let mut s = raw.trim().to_string(); + + // Case-insensitive prefix strip. + let lowered = s.to_ascii_lowercase(); + for prefix in DEVICE_PREFIXES { + let pl = prefix.to_ascii_lowercase(); + if lowered.starts_with(&pl) { + s = s[pl.len()..].to_string(); + break; + } + } + + // Drop any leading slash/backslash that survived prefix stripping. + while s.starts_with('\\') || s.starts_with('/') { + s.remove(0); + } + + // Canonical form: forward slashes. + s.replace('\\', "/") +} + +/// Convenience: read the OBJECT_ATTRIBUTES struct at `obj_attrs_ptr` and +/// return a normalized VFS path. Returns `None` if the struct pointer or its +/// inner name pointer is null. +pub fn object_attributes_to_vfs_path(mem: &GuestMemory, obj_attrs_ptr: u32) -> Option { + let raw = read_object_attributes_name(mem, obj_attrs_ptr)?; + if raw.is_empty() { + return None; + } + Some(normalize_path(&raw)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn strips_device_cdrom() { + assert_eq!(normalize_path("\\Device\\Cdrom0\\default.xex"), "default.xex"); + } + + #[test] + fn strips_drive_letter_lowercase() { + assert_eq!(normalize_path("d:\\media\\shared\\foo.pkg"), "media/shared/foo.pkg"); + } + + #[test] + fn strips_drive_letter_uppercase() { + assert_eq!(normalize_path("D:\\default.xex"), "default.xex"); + } + + #[test] + fn strips_game_prefix() { + assert_eq!(normalize_path("game:\\data\\whatever.bin"), "data/whatever.bin"); + } + + #[test] + fn preserves_relative_path() { + assert_eq!(normalize_path("scripts/init.lua"), "scripts/init.lua"); + } + + #[test] + fn handles_partition1() { + assert_eq!( + normalize_path("\\Device\\Harddisk0\\Partition1\\content\\abc.sav"), + "content/abc.sav" + ); + } +} diff --git a/crates/xenia-kernel/src/state.rs b/crates/xenia-kernel/src/state.rs index cc47e4e..c09dc63 100644 --- a/crates/xenia-kernel/src/state.rs +++ b/crates/xenia-kernel/src/state.rs @@ -1,11 +1,35 @@ use std::collections::HashMap; -use xenia_cpu::PpcContext; -use xenia_memory::GuestMemory; +use xenia_cpu::scheduler::{PcrWriter, Scheduler}; +use xenia_cpu::{PpcContext, ThreadRef}; +use xenia_memory::{GuestMemory, MemoryAccess}; +use xenia_vfs::VfsDevice; +use crate::audit::{HandleAudit, HandleAuditEntry}; use crate::objects::KernelObject; +use crate::ui_bridge::UiBridge; + +/// Adapter: write PCR+0x2C on guest memory. Lets `Scheduler::spawn` and +/// Axis 4's migration call through without `xenia-cpu` depending on the +/// memory crate. +pub struct GuestMemoryPcr<'a>(pub &'a GuestMemory); +impl PcrWriter for GuestMemoryPcr<'_> { + fn write_pcr_id(&mut self, pcr_base: u32, hw_id: u8) { + // `GuestMemory::write_u32` takes `&self` post-M2 trait flip; the + // wrapping `&'a GuestMemory` is sufficient. + self.0.write_u32(pcr_base + 0x2C, hw_id as u32); + } +} /// Function signature for HLE kernel exports. -pub type KernelExportFn = fn(&mut PpcContext, &mut GuestMemory, &mut KernelState); +/// +/// The first argument is the **currently running** HW thread's `PpcContext`, +/// which the caller has temporarily moved out of the scheduler slot to avoid +/// aliasing. Exports that only touch register/GPR state use `ctx` directly; +/// exports that need scheduler state (spawn/park/wake/tls/etc.) reach +/// through `state.scheduler` — note that `state.scheduler.hw_threads[current]` +/// holds a placeholder `PpcContext` for the duration of the call, not the +/// live one passed as `ctx`. +pub type KernelExportFn = fn(&mut PpcContext, &GuestMemory, &mut KernelState); /// Module identifier for kernel exports. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -15,45 +39,174 @@ pub enum ModuleId { Xbdm, } +/// Pseudo-`HMODULE` values returned by `XexGetModuleHandle` and accepted by +/// `XexGetProcedureAddress`. Distinct from real loaded-image bases +/// (>=0x82000000) and from kernel handles (0x1000+, allocated by +/// `alloc_handle`). The 0xFFFE_xxxx prefix is unused by both guest segments +/// and our handle allocator. +pub const HMODULE_XBOXKRNL: u32 = 0xFFFE_0001; +pub const HMODULE_XAM: u32 = 0xFFFE_0002; + /// Central kernel state tracking all guest OS state. pub struct KernelState { exports: HashMap<(ModuleId, u32), (&'static str, KernelExportFn)>, - next_handle: u32, - pub tls_slots: HashMap, - next_tls_index: u32, + /// M2.4: bump allocator for kernel handles. `AtomicU32` so concurrent + /// HLE calls under M3 can `fetch_add` without a lock. `Relaxed` is + /// fine — the allocated value is a fresh ID with no prior payload to + /// publish; observers (the kernel object table) are guarded by + /// their own synchronization. + next_handle: std::sync::atomic::AtomicU32, + /// Scheduler managing all emulated HW threads + their per-slot + /// runqueues. Starts empty — the app installs the initial guest thread + /// on slot 0 via `KernelState::install_initial_thread` once it has the + /// entry address. + pub scheduler: Scheduler, + /// TLS slot allocator — index counter only. Per-thread *values* live on + /// `GuestThread::tls_values` (see scheduler). M2.4: `AtomicU32`. + pub next_tls_index: std::sync::atomic::AtomicU32, + /// Critical-section waiter map: guest `cs_ptr` → guest threads parked + /// on it. Critical sections are in guest memory (not kernel objects), + /// so their waiter list lives here rather than on an object. + pub cs_waiters: HashMap>, /// Kernel object table: handle → object pub objects: HashMap, - /// Bump allocator for guest heap (NtAllocateVirtualMemory etc.) - pub heap_cursor: u32, - /// Stack allocator cursor for MmCreateKernelStack - pub stack_cursor: u32, + /// Bump allocator for guest heap (NtAllocateVirtualMemory etc.). + /// M2.4: `AtomicU32` for lock-free concurrent allocation. + pub heap_cursor: std::sync::atomic::AtomicU32, + /// Stack allocator cursor for MmCreateKernelStack. M2.4: atomic. + pub stack_cursor: std::sync::atomic::AtomicU32, /// GPU command buffer address (set by VdGetSystemCommandBuffer) pub gpu_command_buffer: u32, + /// GPU backend. M1.4: was `xenia_gpu::GpuSystem` directly, now a + /// [`xenia_gpu::GpuBackend`] enum so the kernel can hold either an + /// inline `GpuSystem` (synchronous, default) or a `GpuHandle` proxy + /// pointing at a worker thread (`--gpu-thread`). Forwarding methods + /// on the enum keep call sites in [`crate::exports`] terse. + pub gpu: xenia_gpu::GpuBackend, + /// Monotonic packet number returned by `XamInputGetState`. Games detect + /// input changes by watching this increment. + pub input_packet_number: u32, + /// Previous gamepad snapshot; `input_packet_number` only advances when + /// the state bytes actually change, matching host XInput semantics. + pub last_input_bytes: u128, /// Image base of the loaded XEX (for XexExecutableModuleHandle etc.) pub image_base: u32, - /// Next thread ID - pub next_thread_id: u32, + /// Next thread ID. M2.4: atomic. + pub next_thread_id: std::sync::atomic::AtomicU32, + /// Virtual file system for NtCreateFile/NtReadFile/etc. The app mounts + /// the disc image or host directory into this slot; file I/O handlers + /// route all reads through it. + pub vfs: Option>, + /// Bridge to the host UI. `None` when running headless. Installed by + /// `cmd_exec` when the user passes `--ui`. + pub ui: Option, + /// P6 — graphics interrupt + synthetic v-sync bookkeeping. Registers + /// the callback set by `VdSetGraphicsInterruptCallback` and tracks + /// the paused-context snapshot while HW thread 0 is running it. + pub interrupts: crate::interrupts::InterruptState, + /// Per-handle refcount. Since `NtDuplicateObject` aliases (returns the + /// source handle value as the "new" handle rather than minting a fresh + /// id), a single handle commonly has multiple logical references. This + /// map tracks that count so a stray `NtClose` on one reference doesn't + /// destroy the object while another reference is still live. Canary's + /// `ObjectTable::ReleaseHandle` (object_table.cc:189) is the parity + /// reference. Initialized to 1 in `alloc_handle_for`; incremented in + /// `nt_duplicate_object` when `DUPLICATE_CLOSE_SOURCE` is absent; + /// decremented in `nt_close` which drops the underlying object only + /// when the count reaches zero. + pub handle_refcount: HashMap, + /// Pending timer expirations — `(deadline, handle)` sorted ascending by + /// deadline. Pushed by `arm_timer`, popped by `fire_due_timers`. Kept in + /// lockstep with the per-`Timer` object's `deadline` field via the + /// `arm_timer`/`disarm_timer` helpers. See the plan's step 3/6 for the + /// design rationale — timer deadlines coexist with + /// `Scheduler::timed_waits` but track a different class (signaled object + /// fires, not thread wake-ups). + pub pending_timer_fires: Vec<(u64, u32)>, + /// Per-handle signal/wait/wake audit trail. Default `enabled=false` → + /// every record method is a no-op. Flip via `--trace-handles`/ + /// `XENIA_TRACE_HANDLES` to diagnose missing-signal deadlocks (handles + /// 0x10FC / 0x1014 / 0x1104 / 0x10DC / 0x10F0 specifically). See + /// [`crate::audit`] for layout. + pub audit: HandleAudit, + /// M2.2 — banked reservation table for `lwarx`/`stwcx.` under M3's + /// per-HW-thread parallelism. Always allocated. Consulted by the + /// interpreter when `reservations.is_enabled()` is true; otherwise + /// the legacy per-`PpcContext` fields drive observable behavior. + /// Settable via `--reservations-table` / `XENIA_RESERVATIONS_TABLE=1` + /// for golden verification, or implicitly under `--parallel`. + /// See [`xenia_cpu::ReservationTable`] for the concurrency model. + pub reservations: std::sync::Arc, + /// Map from `(module, ordinal)` to the guest-side import-thunk address + /// resolved at load time. Reverse of `xenia-app/src/main.rs`'s + /// `thunk_map`. Populated from xenia-app's Phase 1 (record_type==1 + /// only). Used by `xex_get_procedure_address` to resolve ordinals back + /// to callable thunks. + thunks_by_ordinal: HashMap<(ModuleId, u16), u32>, + /// First-Pixels diagnostic latch. Set the first time + /// `RtlRaiseException` fires with code `0xE06D7363` (MSVC C++ throw) + /// so the deep stack-walk + `runtime_error` decode in + /// `rtl_raise_exception` only emits once per run, regardless of how + /// many subsequent throws fire. Reset on each fresh process start. + pub cxx_throw_logged: bool, } impl KernelState { - pub fn new() -> Self { + /// Construct a kernel with the supplied GPU backend. + /// + /// The caller (typically `cmd_exec_inner`) decides whether to install + /// an inline backend (default) or a threaded one (`--gpu-thread`). + /// Most existing call sites build via [`Self::new`], which defaults to + /// an inline backend; the threaded constructor lives at + /// [`Self::with_gpu`]. + pub fn with_gpu(gpu: xenia_gpu::GpuBackend) -> Self { + // Scheduler starts empty; the app installs the initial thread on + // slot 0 via `install_initial_thread` right after construction. + let mut scheduler = Scheduler::new(); + use std::sync::atomic::AtomicU32; + let reservations = std::sync::Arc::new(xenia_cpu::ReservationTable::new()); + // M3.7 — wire the reservation table to the scheduler so + // `spawn`/`install_initial_thread` populate every PpcContext's + // `reservation_table` clone. The table is `disabled` by + // default; `--reservations-table` / `XENIA_RESERVATIONS_TABLE` + // / M3 spawn flip it on. + scheduler.set_reservation_table(Some(reservations.clone())); let mut state = Self { exports: HashMap::new(), - next_handle: 0x1000, - tls_slots: HashMap::new(), - next_tls_index: 0, + next_handle: AtomicU32::new(0x1000), + scheduler, + next_tls_index: AtomicU32::new(0), + cs_waiters: HashMap::new(), objects: HashMap::new(), - heap_cursor: 0x4000_0000, // Start of user heap region - stack_cursor: 0x7100_0000, // Above main stack + heap_cursor: AtomicU32::new(0x4000_0000), // Start of user heap region + stack_cursor: AtomicU32::new(0x7100_0000), // Above main stack gpu_command_buffer: 0, + gpu, + input_packet_number: 0, + last_input_bytes: 0, image_base: 0, - next_thread_id: 1, + next_thread_id: AtomicU32::new(1), + vfs: None, + ui: None, + interrupts: crate::interrupts::InterruptState::default(), + handle_refcount: HashMap::new(), + pending_timer_fires: Vec::new(), + audit: HandleAudit::default(), + reservations, + thunks_by_ordinal: HashMap::new(), + cxx_throw_logged: false, }; crate::exports::register_exports(&mut state); crate::xam::register_exports(&mut state); state } + /// Default constructor — installs an inline `GpuSystem`. Kept for + /// callers that don't (yet) thread a `GpuBackend` choice through. + pub fn new() -> Self { + Self::with_gpu(xenia_gpu::GpuBackend::Inline(xenia_gpu::GpuSystem::new())) + } + pub fn register_export( &mut self, module: ModuleId, @@ -64,31 +217,159 @@ impl KernelState { self.exports.insert((module, ordinal), (name, func)); } + /// Record an import-thunk address resolved at load time. Called once + /// per `record_type==1` import in xenia-app's Phase 1. Idempotent: a + /// duplicate ordinal overwrites (later wins; in practice the loader + /// emits each ordinal once per module). + pub fn register_thunk(&mut self, module: ModuleId, ordinal: u16, address: u32) { + self.thunks_by_ordinal.insert((module, ordinal), address); + } + + /// Resolve a `(module, ordinal)` to its registered thunk address. + pub fn resolve_thunk(&self, module: ModuleId, ordinal: u16) -> Option { + self.thunks_by_ordinal.get(&(module, ordinal)).copied() + } + + /// Map a pseudo-`HMODULE` (as returned by `XexGetModuleHandle`) back + /// to its `ModuleId`. Returns `None` for unknown handles, including + /// the loaded XEX's `image_base` (which is *not* a kernel module). + pub fn module_id_from_hmodule(&self, handle: u32) -> Option { + match handle { + HMODULE_XBOXKRNL => Some(ModuleId::Xboxkrnl), + HMODULE_XAM => Some(ModuleId::Xam), + _ => None, + } + } + + /// Dispatch a kernel export on the current HW thread. Uses `mem::replace` + /// to temporarily move the active `PpcContext` out of its scheduler slot, + /// so the export function can receive `&mut ctx` while also getting + /// `&mut self` (which contains the scheduler). Without this, the export + /// signature would have to avoid aliasing via a bundle struct — see the + /// approved plan's ExportCtx section for the alternative we rejected. + /// + /// While the export runs, `scheduler.hw_threads[current_hw_id].ctx` holds + /// a freshly-constructed placeholder. Exports that reach through + /// `state.scheduler` must not touch the current slot's `ctx` field. + /// + /// **Perf note (First-Pixels M1):** this function fires ~250K/s on + /// Sylpheed (1 import per 40 guest instructions). A former + /// `#[tracing::instrument]` attribute + two `tracing::info!` call + /// sites made up ~28% of `run_execution` wall time on a post-Tier-3 + /// profile — most of it in `tracing::span::Span::new` + + /// `Layered::new_span` + `ErrorLayer::on_new_span`. The span was at + /// `level = "debug"` but the span **construction** happened + /// unconditionally; only the emit was level-gated. Removing the + /// attribute + the two `info!` lines recovers the overhead without + /// losing any observability — the `metrics::counter!("kernel.calls", + /// "name" => name)` below still tracks per-export counts, and + /// unimplemented lookups still emit a `warn!`. pub fn call_export( &mut self, module: ModuleId, ordinal: u32, - ctx: &mut PpcContext, - mem: &mut GuestMemory, + mem: &GuestMemory, ) -> bool { - if let Some(&(name, func)) = self.exports.get(&(module, ordinal)) { - tracing::info!( - "Kernel call: {:?}:{:#x} ({}) args=[{:#x}, {:#x}, {:#x}, {:#x}]", - module, ordinal, name, - ctx.gpr[3], ctx.gpr[4], ctx.gpr[5], ctx.gpr[6] - ); - func(ctx, mem, self); - tracing::info!(" -> returned {:#x}", ctx.gpr[3]); + // The thread whose ctx we're swapping out must be addressed by + // `ThreadRef`, not `hw_id` — under per-slot runqueues a bare + // `hw_id` alone can't distinguish multiple threads on the same + // slot, and Axis 4 migration can change the slot underneath us. + let r = self + .scheduler + .current + .expect("call_export: no current thread"); + let mut ctx = std::mem::replace( + self.scheduler.ctx_mut_ref(r), + PpcContext::new(), + ); + + let result = if let Some(&(name, func)) = self.exports.get(&(module, ordinal)) { + metrics::counter!("kernel.calls", "name" => name).increment(1); + tracing::trace!(target: "probe_calls", "hw={} call={} r3={:#x} r4={:#x} r5={:#x} lr={:#x}", + r.hw_id, name, ctx.gpr[3], ctx.gpr[4], ctx.gpr[5], ctx.lr); + func(&mut ctx, mem, self); true } else { + metrics::counter!("kernel.unimplemented").increment(1); tracing::warn!( - "Unimplemented kernel export: {:?}:{:#x}", - module, ordinal + module = ?module, + ordinal = format_args!("{:#x}", ordinal), + "unimplemented kernel export" ); - // Return 0 (STATUS_SUCCESS) by default for unimplemented calls ctx.gpr[3] = 0; false + }; + + // Restore the (possibly mutated) ctx by ThreadRef. Axis 4 + // self-migration (KeSetAffinityThread(NtCurrentThread, ...)) + // updates `scheduler.current` in place; re-read here so we + // restore onto the thread's new slot, not its old one. + let final_ref = self.scheduler.current.unwrap_or(r); + *self.scheduler.ctx_mut_ref(final_ref) = ctx; + result + } + + /// Axis 4: `KeSetAffinityThread` orchestration. Drives the scheduler's + /// migration and fixes up every `ThreadRef` held outside the + /// scheduler (kernel object waiter lists, critical-section waiters, + /// `interrupts.injected_ref`). Returns the previous mask. + pub fn set_affinity(&mut self, handle: u32, new_mask: u8, mem: &GuestMemory) -> u8 { + let Some(r) = self.scheduler.find_by_handle(handle) else { + return 0; + }; + let (old_mask, _new_ref, fixup) = self.scheduler.set_affinity_ref( + r, + new_mask, + &mut GuestMemoryPcr(mem), + ); + if let Some(fx) = fixup { + use crate::objects::KernelObject; + for obj in self.objects.values_mut() { + match obj { + KernelObject::Event { waiters, .. } + | KernelObject::Semaphore { waiters, .. } + | KernelObject::Thread { waiters, .. } + | KernelObject::Mutex { waiters, .. } => { + for w in waiters.iter_mut() { + fx.apply(w); + } + } + _ => {} + } + } + for list in self.cs_waiters.values_mut() { + for w in list.iter_mut() { + fx.apply(w); + } + } + if let Some(ref mut ir) = self.interrupts.injected_ref { + fx.apply(ir); + } } + old_mask + } + + /// Install the initial (main) guest thread on HW slot 0. Called once at + /// startup after the app allocates the main stack/PCR/TLS blocks. + pub fn install_initial_thread( + &mut self, + ctx: PpcContext, + stack_base: u32, + stack_size: u32, + pcr_base: u32, + tls_base: u32, + thread_handle: u32, + mem: &GuestMemory, + ) { + self.scheduler.install_initial_thread( + ctx, + stack_base, + stack_size, + pcr_base, + tls_base, + thread_handle, + &mut GuestMemoryPcr(mem), + ); } pub fn export_name(&self, module: ModuleId, ordinal: u32) -> Option<&'static str> { @@ -96,60 +377,261 @@ impl KernelState { } pub fn alloc_handle(&mut self) -> u32 { - let h = self.next_handle; - self.next_handle += 4; - h + // M2.4: lock-free fetch_add. Relaxed is sufficient — IDs are + // opaque tokens; no payload is sequenced against the counter. + self.next_handle + .fetch_add(4, std::sync::atomic::Ordering::Relaxed) } pub fn alloc_handle_for(&mut self, obj: KernelObject) -> u32 { let h = self.alloc_handle(); self.objects.insert(h, obj); + // Each fresh handle starts with one logical reference (the creator). + // `NtDuplicateObject` bumps this; `NtClose` decrements; the object is + // only dropped when the count reaches zero. See `nt_close` for the + // aliased-handle rationale. + self.handle_refcount.insert(h, 1); h } + // ===== Handle audit hooks ===== + // + // These are no-ops when `audit.enabled == false`, so call sites can + // unconditionally invoke them without a hot-path branch in release builds + // (the `inline` `if !enabled return` short-circuits before any work). + + /// Build a [`HandleAuditEntry`] describing the *current* call-site — + /// captures cycle (slot-0 timebase), current `tid`, and `lr` from the + /// passed `PpcContext`. + fn audit_entry(&self, lr: u32, source: &'static str, aux: u64) -> HandleAuditEntry { + let hw_id = self.scheduler.current_hw_id().unwrap_or(0); + let cycle = self.scheduler.ctx(hw_id).timebase; + let tid = self.scheduler.tid(hw_id).unwrap_or(0); + HandleAuditEntry { cycle, tid, lr, source, aux } + } + + /// Record the creation of a fresh handle. `kind` is one of the stable + /// labels documented on [`crate::audit::HandleAuditTrail::kind`]. + pub fn audit_create(&mut self, handle: u32, kind: &'static str, lr: u32, source: &'static str) { + if !self.audit.enabled { + return; + } + let entry = self.audit_entry(lr, source, 0); + self.audit.record_create(handle, kind, entry); + } + + /// Record a Set/Pulse/Release/etc. call against a handle. `aux` is the + /// previous signal state (or per-export-specific data). + pub fn audit_signal(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) { + if !self.audit.enabled { + return; + } + let entry = self.audit_entry(lr, source, aux); + self.audit.record_signal(handle, entry); + } + + /// Record a `Wait*` call against a handle. `aux` packs `(alertable as u64) + /// | (timeout_kind << 8)` etc. — schema is informal; the dump just prints + /// it. + pub fn audit_wait(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) { + if !self.audit.enabled { + return; + } + let entry = self.audit_entry(lr, source, aux); + self.audit.record_wait(handle, entry); + } + + /// Record a wake event (called from `wake_eligible_waiters`). `aux` + /// is the status code stamped into the woken thread's `gpr[3]`. + pub fn audit_wake(&mut self, handle: u32, lr: u32, source: &'static str, aux: u64) { + if !self.audit.enabled { + return; + } + let entry = self.audit_entry(lr, source, aux); + self.audit.record_wake(handle, entry); + } + + /// Read a TLS slot for the currently running HW thread. pub fn tls_get(&self, index: u32) -> u64 { - self.tls_slots.get(&index).copied().unwrap_or(0) + self.scheduler.tls_get(index) } + /// Write a TLS slot for the currently running HW thread. pub fn tls_set(&mut self, index: u32, value: u64) { - self.tls_slots.insert(index, value); + self.scheduler.tls_set(index, value); } + /// Allocate a new global TLS slot index. Grows every HW thread's + /// `tls_values` array to match. pub fn tls_alloc(&mut self) -> u32 { - let idx = self.next_tls_index; - self.next_tls_index += 1; + use std::sync::atomic::Ordering; + // M2.4: atomic bump. The Scheduler::tls_grow_to call still needs + // a coherent post-bump value, so we read the new size from the + // fetch_add return. + let idx = self.next_tls_index.fetch_add(1, Ordering::Relaxed); + let new_size = idx + 1; + self.scheduler.tls_grow_to(new_size as usize); idx } /// Allocate guest memory from the heap bump allocator. /// Returns the base address of the allocated region. - pub fn heap_alloc(&mut self, size: u32, mem: &mut GuestMemory) -> Option { + pub fn heap_alloc(&mut self, size: u32, mem: &GuestMemory) -> Option { + use std::sync::atomic::Ordering; let aligned_size = (size + 0xFFF) & !0xFFF; // Page-align - let base = self.heap_cursor; - if base.checked_add(aligned_size).is_none() || base + aligned_size > 0x6FFF_FFFF { + // M2.4: atomic bump, then verify post-bump invariants. If the + // bump pushed us past the heap-region ceiling, the cursor stays + // advanced — subsequent allocations also fail, matching the + // pre-M2 sequential semantics. We don't try to "undo" the bump + // because that opens a CAS-loop race for marginal benefit (a + // failing alloc near the limit is already game-over). + let base = self.heap_cursor.fetch_add(aligned_size, Ordering::Relaxed); + let new_top = base.checked_add(aligned_size)?; + if new_top > 0x6FFF_FFFF { return None; } let protect = xenia_memory::page_table::MemoryProtect::READ | xenia_memory::page_table::MemoryProtect::WRITE; - if mem.alloc(base, aligned_size, protect).is_err() { - return None; - } - self.heap_cursor += aligned_size; + mem.alloc(base, aligned_size, protect).ok()?; Some(base) } /// Allocate a kernel stack. - pub fn stack_alloc(&mut self, size: u32, mem: &mut GuestMemory) -> Option { + pub fn stack_alloc(&mut self, size: u32, mem: &GuestMemory) -> Option { + use std::sync::atomic::Ordering; let aligned_size = (size + 0xFFF) & !0xFFF; - let base = self.stack_cursor; + let base = self.stack_cursor.fetch_add(aligned_size, Ordering::Relaxed); let protect = xenia_memory::page_table::MemoryProtect::READ | xenia_memory::page_table::MemoryProtect::WRITE; - if mem.alloc(base, aligned_size, protect).is_err() { - return None; - } - self.stack_cursor += aligned_size; + mem.alloc(base, aligned_size, protect).ok()?; Some(base + aligned_size) // Return top of stack } + + // ===== Timer subsystem ===== + + /// Idempotent arm — removes any prior entry for `handle`, then inserts + /// the new `(deadline, handle)` pair and re-sorts by deadline ascending. + /// The per-`Timer` object's `deadline` field must be set separately by + /// the caller (see `NtSetTimerEx` in exports.rs) — this helper only + /// manages the central pending-fires list so `fire_due_timers` has a + /// sorted head to peek. + pub fn arm_timer(&mut self, handle: u32, deadline: u64) { + self.pending_timer_fires.retain(|&(_, h)| h != handle); + self.pending_timer_fires.push((deadline, handle)); + self.pending_timer_fires.sort_by_key(|&(d, _)| d); + } + + /// Idempotent disarm — strip any entry for `handle`. Safe to call + /// regardless of prior state; `NtClose`, `NtCancelTimer`, and the + /// periodic-rearm guard all invoke this. + pub fn disarm_timer(&mut self, handle: u32) { + self.pending_timer_fires.retain(|&(_, h)| h != handle); + } + + /// Peek the earliest pending timer deadline. Paired with + /// `Scheduler::earliest_wait_deadline` by the main loop's "advance to + /// next event" coordination — the earlier of the two drives + /// `advance_all_timebases_to`. + pub fn earliest_timer_deadline(&self) -> Option { + self.pending_timer_fires.first().map(|&(d, _)| d) + } + + /// Fire every timer whose deadline is `<= now` (derived from slot 0's + /// timebase, matching `parse_timeout`'s "current thread" fallback). + /// For each fire: mark the timer `signaled=true`, clear its + /// `deadline`, rearm if periodic, then wake eligible waiters via + /// `exports::wake_eligible_waiters`. Returns `true` iff any timer + /// fired — the caller uses this to decide whether the scheduler round + /// needs a follow-up `advance_to_next_wake_if_due` step. + pub fn fire_due_timers(&mut self) -> bool { + let now = self.scheduler.ctx(0).timebase; + let mut fired = false; + loop { + let Some(&(deadline, handle)) = self.pending_timer_fires.first() else { + break; + }; + if deadline > now { + break; + } + self.pending_timer_fires.remove(0); + // Mark signaled + capture period before any rearm so we don't + // double-borrow the object while calling wake_eligible_waiters. + let periodic_next = + if let Some(KernelObject::Timer { + signaled, + deadline: obj_deadline, + period_ticks, + .. + }) = self.objects.get_mut(&handle) + { + *signaled = true; + *obj_deadline = None; + if *period_ticks > 0 { + Some(now + *period_ticks) + } else { + None + } + } else { + // Closed handle — its entry lingered because disarm on + // NtClose was missed, OR fire_due_timers picked up a + // race. Skip silently; nothing to wake. + None + }; + if let Some(next) = periodic_next { + if let Some(KernelObject::Timer { deadline, .. }) = + self.objects.get_mut(&handle) + { + *deadline = Some(next); + } + self.arm_timer(handle, next); + } + crate::exports::wake_eligible_waiters(self, handle); + fired = true; + } + fired + } + + /// Handle deadline-expiry cleanup for a thread whose wait timed out. + /// Called by the main loop right after `Scheduler::advance_to_next_wake` + /// returns a `Some((ref, reason))`. Stamps `STATUS_TIMEOUT` into the + /// woken thread's `gpr[3]` and scrubs its `ThreadRef` out of any + /// handle's waiter list so a later signal can't consume the + /// auto-reset slot into a stale waiter. + /// + /// `BlockReason::DelayUntil` is a pure sleep and expects + /// `STATUS_SUCCESS` — the default pre-populated value in + /// `ke_delay_execution_thread` — so we leave `gpr[3]` alone for it. + pub fn handle_timeout_wake( + &mut self, + r: ThreadRef, + reason: xenia_cpu::scheduler::BlockReason, + ) { + use xenia_cpu::scheduler::BlockReason; + const STATUS_TIMEOUT: u64 = 0x0000_0102; + match reason { + BlockReason::WaitAny { handles, .. } | BlockReason::WaitAll { handles, .. } => { + self.scheduler.ctx_mut_ref(r).gpr[3] = STATUS_TIMEOUT; + for h in handles { + if let Some(obj) = self.objects.get_mut(&h) { + if let Some(waiters) = obj.waiters_mut() { + waiters.retain(|&w| w != r); + } + } + } + } + BlockReason::DelayUntil(_) => { + // Pure sleep → default STATUS_SUCCESS is correct; no handles + // to scrub. + } + BlockReason::CriticalSection(cs_ptr) => { + self.scheduler.ctx_mut_ref(r).gpr[3] = STATUS_TIMEOUT; + if let Some(list) = self.cs_waiters.get_mut(&cs_ptr) { + list.retain(|&w| w != r); + } + } + BlockReason::Suspended => {} + } + } } impl Default for KernelState { @@ -157,3 +639,89 @@ impl Default for KernelState { Self::new() } } + +#[cfg(test)] +mod tests { + use super::*; + use xenia_memory::GuestMemory; + + /// Ten consecutive `heap_alloc(0x14)` calls must return distinct + /// page-aligned addresses. A previous bug had kernel exports passing 0 as + /// `size`, causing the bump allocator to return the same address every + /// time — 10 "allocations" that all aliased 0x40105000 and silently + /// corrupted the guest's static-constructor state. + #[test] + fn heap_alloc_advances_for_nonzero_size() { + let mut mem = GuestMemory::new().expect("memory init"); + let mut state = KernelState::new(); + let mut seen = Vec::new(); + for _ in 0..10 { + let addr = state + .heap_alloc(0x14, &mut mem) + .expect("heap must have room for 0x14 bytes"); + assert_eq!(addr & 0xFFF, 0, "heap returns page-aligned addresses"); + assert!(!seen.contains(&addr), "heap returned duplicate address {addr:#x}"); + seen.push(addr); + } + } + + /// `heap_alloc(0)` must not advance the cursor (it has nothing to do). + /// The kernel exports that previously hit this path did so because they + /// read the wrong argument register; guarded at the export boundary now. + #[test] + fn heap_alloc_zero_is_noop_in_cursor() { + use std::sync::atomic::Ordering; + let mem = GuestMemory::new().expect("memory init"); + let mut state = KernelState::new(); + let before = state.heap_cursor.load(Ordering::Relaxed); + let _ = state.heap_alloc(0, &mem); + let after = state.heap_cursor.load(Ordering::Relaxed); + assert_eq!(before, after, "zero-size alloc must not advance heap cursor"); + } + + /// M2.4: concurrent handle allocations must produce distinct values. + /// Ten threads each allocate 100 handles via `alloc_handle`; the union + /// must contain exactly 1000 distinct values, and the maximum equals + /// `0x1000 + 4 * (1000 - 1)` (ascending step is 4 per the kernel + /// allocator's policy). + #[test] + fn concurrent_alloc_handle_distinct() { + use std::collections::HashSet; + use std::sync::Mutex; + use std::sync::atomic::{AtomicU32, Ordering}; + + // Use a free-standing AtomicU32 mirroring `next_handle`'s semantics; + // we can't easily share `&mut KernelState` across threads. The + // production code uses the same `fetch_add(4, Relaxed)` recipe. + let counter = std::sync::Arc::new(AtomicU32::new(0x1000)); + let collected: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(HashSet::new())); + + let mut handles = Vec::new(); + for _ in 0..10 { + let c = counter.clone(); + let s = collected.clone(); + handles.push(std::thread::spawn(move || { + let mut local = Vec::with_capacity(100); + for _ in 0..100 { + local.push(c.fetch_add(4, Ordering::Relaxed)); + } + let mut g = s.lock().unwrap(); + for v in local { + g.insert(v); + } + })); + } + for h in handles { + h.join().unwrap(); + } + let set = collected.lock().unwrap(); + assert_eq!( + set.len(), + 1000, + "expected 1000 distinct handles, got {}", + set.len() + ); + assert!(set.iter().all(|h| (h - 0x1000) % 4 == 0)); + } +} diff --git a/crates/xenia-kernel/src/thread.rs b/crates/xenia-kernel/src/thread.rs new file mode 100644 index 0000000..9f4bc53 --- /dev/null +++ b/crates/xenia-kernel/src/thread.rs @@ -0,0 +1,68 @@ +//! Guest-thread image allocation — shared by the initial thread setup in +//! `xenia-app/src/main.rs` and `ExCreateThread`. Stack, PCR, and TLS blocks +//! all come from the existing kernel bump allocators so layout is consistent. + +use xenia_memory::{GuestMemory, MemoryAccess}; + +use crate::state::KernelState; + +/// Addresses the caller passes to `Scheduler::spawn` / the initial-thread +/// setup. Matches xenia-canary's per-thread allocations: a stack, a PCR, and +/// a TLS block. +#[derive(Debug, Clone, Copy)] +pub struct ThreadImage { + pub stack_base: u32, + pub stack_size: u32, + pub pcr_base: u32, + pub tls_base: u32, +} + +/// Allocate stack + PCR + TLS for one guest thread and initialize the PCR +/// fields that games read in their thread prolog. +/// +/// - Stack comes from `KernelState::stack_alloc` (bump allocator at +/// 0x7100_0000 upward). The returned base is the *bottom*; callers +/// compute SP as `base + size`. +/// - PCR and TLS are fixed 4 KiB pages allocated via `heap_alloc` so they +/// land in the user heap region together with other kernel metadata. +/// - `hw_thread_id` is written at PCR+0x2C so `KeGetCurrentProcessorNumber`- +/// style reads from r13 resolve correctly even though we never register +/// that export. +pub fn allocate_thread_image( + kernel: &mut KernelState, + mem: &GuestMemory, + stack_size: u32, + hw_thread_id: u8, +) -> Option { + // Round stack size to a page and give games a minimum that matches + // xenia-canary's 16 MiB default when callers request 0 (common for + // ExCreateThread when the caller lets the kernel pick). + let stack_size = if stack_size == 0 { + 0x10_0000 + } else { + (stack_size + 0xFFF) & !0xFFF + }; + // stack_alloc returns top-of-stack; we need the base. + let stack_top = kernel.stack_alloc(stack_size, mem)?; + let stack_base = stack_top - stack_size; + + let pcr_base = kernel.heap_alloc(0x1000, mem)?; + let tls_base = kernel.heap_alloc(0x1000, mem)?; + + // PCR layout (canary xboxkrnl/xboxkrnl_module.cc, simplified): + // +0x000 tls_ptr → TLS block base + // +0x02C current_processor_id → HW thread id (0..5) + // +0x100 current_thread → placeholder non-zero tag + // +0x150 dpc_active → 0 (no DPC queued) + mem.write_u32(pcr_base, tls_base); + mem.write_u32(pcr_base + 0x2C, hw_thread_id as u32); + mem.write_u32(pcr_base + 0x100, 0x1000); + mem.write_u32(pcr_base + 0x150, 0); + + Some(ThreadImage { + stack_base, + stack_size, + pcr_base, + tls_base, + }) +} diff --git a/crates/xenia-kernel/src/ui_bridge.rs b/crates/xenia-kernel/src/ui_bridge.rs new file mode 100644 index 0000000..a4b2289 --- /dev/null +++ b/crates/xenia-kernel/src/ui_bridge.rs @@ -0,0 +1,185 @@ +//! Bridge between the kernel (CPU-thread side) and a host UI (main-thread side). +//! +//! The kernel side needs to: +//! - snapshot the latest host gamepad each time a guest calls +//! `XamInputGetState`, and +//! - signal the UI when the guest calls `VdSwap` so the UI can upload the +//! guest's frontbuffer to a wgpu texture and present it. +//! +//! Both directions are expressed as trait-object closures so that `xenia-kernel` +//! does not have to depend on winit/wgpu/gilrs. The [`UiBridge`] is installed +//! on [`KernelState::ui`] by `cmd_exec` when `--ui` is passed. + +use std::collections::HashMap; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, AtomicU64}; + +use xenia_gpu::texture_cache::TextureKey; +use xenia_gpu::xenos_constants::XenosConstantsBlock; +use xenia_hid::GamepadState; +use xenia_memory::MemoryAccess; + +/// Information surfaced to the UI each time the guest presents a frame. +/// +/// Fields mirror the seven "interesting" arguments to `VdSwap` in +/// `xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc`: the raw +/// frontbuffer pointer, its dimensions, and the format/color-space enum values +/// the guest passed through. +#[derive(Clone, Copy, Debug)] +pub struct SwapInfo { + /// Guest physical/virtual address of the frontbuffer to present. + pub frontbuffer_addr: u32, + /// Width in pixels as reported by the guest. + pub width: u32, + /// Height in pixels as reported by the guest. + pub height: u32, + /// Xenos texture format enum (the guest passes a pointer; we dereference + /// it here). 0 means "unknown / guest passed a null pointer". + pub texture_format: u32, + /// Color-space enum (sRGB / BT.709 / …). + pub color_space: u32, + /// Monotonically increasing frame counter maintained by the kernel; useful + /// for HUD display and deduping. + pub frame_index: u64, + /// Total PM4 `DRAW_INDX*` packets the GPU has captured since boot. + /// Surfaced so the UI HUD can show progress even before the full + /// uber-shader pipeline is wired in. + pub draws_total: u64, + /// Total PM4 packets executed, across all opcodes — useful signal for + /// "is the GPU actually getting anything at all to consume?". + pub packets_total: u64, + /// Most-recent draw's Xenos primitive-type code (0 = none yet). + pub last_draw_prim: u32, + /// Most-recent draw's vertex count. + pub last_draw_vertex_count: u32, + /// Indirect-buffer jumps so far (useful "is the game driving the ring + /// buffer through IBs?" signal). + pub indirect_buffer_jumps: u64, + /// WAIT_REG_MEM stalls observed on the GPU slot. + pub wait_reg_mem_blocks: u64, + /// Summed CPU instruction count across all 6 HW threads. Mirrors the + /// `cycle_count` field each `PpcContext` maintains; gives the HUD a live + /// "how far has the guest run?" readout. + pub instructions_total: u64, + /// Active VS shader blob key at the most recent DRAW_INDX* (0 = none). + /// P3b: the UI uses this to index into `handles.shader_blobs` so the + /// Xenos uber-shader interpreter can upload the matching microcode. + pub vs_blob_key: u32, + /// Active PS shader blob key at the most recent DRAW_INDX*. + pub ps_blob_key: u32, + /// P4: total EDRAM→memory resolves fired since boot (TILE_FLUSH + /// events). Non-zero means the game is committing pixels. + pub resolves_total: u64, + /// Subset of `resolves_total` whose byte-copy path succeeded and wrote + /// at least one sample into guest memory. + pub resolves_copied_total: u64, + /// Subset of `resolves_total` that were skipped by the byte-copy path + /// due to an unsupported format / MSAA mode / 3D destination. + pub resolves_skipped_total: u64, + /// P4: unique RT keys seen (from the GPU's internal render-target + /// cache). Grows as the game exercises new RT footprints. + pub unique_render_targets: u64, + /// P6: total graphics-interrupt callbacks delivered (v-sync + CP). + /// Non-zero means `VdSetGraphicsInterruptCallback` has been wired end + /// to end and callbacks are actually running. + pub interrupts_delivered: u64, + /// P6: graphics-interrupts queued but dropped (callback unset, + /// thread 0 blocked, or already inside another callback). + pub interrupts_dropped: u64, +} + +/// Handles the kernel uses to talk to a running host UI. +/// +/// None of the closures are allowed to block for long — they are called from +/// the CPU interpreter thread on the hot path. +#[derive(Clone)] +pub struct UiBridge { + /// Snapshot the host gamepad. Called from `XamInputGetState`. + pub gamepad: Arc GamepadState + Send + Sync>, + /// Report that the guest completed a frame. The closure gets the swap + /// metadata plus a borrow of guest memory so it can copy the frontbuffer + /// bytes into a UI-owned staging buffer before returning. Called from + /// `VdSwap` on the CPU thread. + pub post_swap: Arc, + /// Indicates the UI wants the CPU loop to stop. Checked periodically by + /// the interpreter loop. + pub shutdown: Arc, + /// Set to `true` when a gamepad is present. `XamInputGetState` returns + /// `ERROR_DEVICE_NOT_CONNECTED` when this is `false`. + pub gamepad_connected: Arc, + /// Live CPU instruction counter mirror. The app's run loop publishes + /// the sum of `ctx.cycle_count` across HW threads here every ~8k + /// instructions so the HUD can report progress between VdSwap events. + pub instructions_counter: Arc, + /// P3b asset publish: `vd_swap` snapshots the GPU's `shader_blobs` and + /// constants register region and feeds them to the UI so the Xenos + /// uber-shader interpreter has the microcode + constants needed to + /// execute the guest draw. Split from `post_swap` so the asset wire + /// stays optional — if the UI doesn't need them (headless mode) the + /// closure is a no-op. + pub publish_xenos_assets: + Arc>, XenosConstantsBlock) + Send + Sync>, + /// P4 frontbuffer publish: at each `VdSwap`, the kernel CPU-side + /// detiles the guest frontbuffer (k_8_8_8_8 Tiled2D) into a linear + /// RGBA8 buffer and hands it to the UI. The closure receives + /// `(width, height, bytes)` — the UI uploads it as a texture. + pub publish_frontbuffer: + Arc) + Send + Sync>, + /// P5 primary texture publish: at each `VdSwap`, the kernel thread + /// decodes the PS shader's primary-texture fetch constant (slot 0 + /// for now) and hands the decoded linear bytes + key to the UI so + /// the xenos pipeline can bind a real texture at `@group(1)`. + /// Receives `(TextureKey, bytes)`; when `None` is sent the UI + /// reverts to its magenta stub. + pub publish_texture: + Arc)>) + Send + Sync>, +} + +impl UiBridge { + /// Snapshot input state (user 0 only; higher indices are unconnected). + pub fn snapshot_gamepad(&self) -> GamepadState { + (self.gamepad)() + } + + /// True iff a gamepad is connected for user 0. + pub fn is_connected(&self, user_index: u32) -> bool { + user_index == 0 + && self + .gamepad_connected + .load(std::sync::atomic::Ordering::Relaxed) + } + + /// Push a swap event to the UI thread. + pub fn notify_swap(&self, info: SwapInfo, mem: &dyn MemoryAccess) { + (self.post_swap)(info, mem); + } + + /// Snapshot current shader blobs + constants and hand them to the UI. + /// Call from `vd_swap` so the UI has the matching assets for every + /// draw captured in this frame. + pub fn publish_assets( + &self, + blobs: HashMap>, + constants: XenosConstantsBlock, + ) { + (self.publish_xenos_assets)(blobs, constants); + } + + /// True iff the UI asked for shutdown. + pub fn should_shutdown(&self) -> bool { + self.shutdown.load(std::sync::atomic::Ordering::Relaxed) + } + + /// Hand a detiled frontbuffer frame to the UI. Called at most once per + /// `VdSwap`. `bytes` must be `width * height * 4` bytes in + /// `Rgba8Unorm` order (the UI pipeline's expected layout). + pub fn publish_frontbuffer(&self, width: u32, height: u32, bytes: Vec) { + (self.publish_frontbuffer)(width, height, bytes); + } + + /// Hand one decoded guest texture to the UI. `Some` = update the bound + /// slot; `None` = revert to the magenta stub. + pub fn publish_texture(&self, tex: Option<(TextureKey, Vec)>) { + (self.publish_texture)(tex); + } +} diff --git a/crates/xenia-kernel/src/xam.rs b/crates/xenia-kernel/src/xam.rs index 29d210b..0f98adc 100644 --- a/crates/xenia-kernel/src/xam.rs +++ b/crates/xenia-kernel/src/xam.rs @@ -12,10 +12,10 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xam, 0x02, "NetDll_WSACleanup", stub_success); // Input - state.register_export(Xam, 0x0190, "XamInputGetCapabilities", xam_input_not_connected); - state.register_export(Xam, 0x0191, "XamInputGetState", xam_input_not_connected); - state.register_export(Xam, 0x0192, "XamInputSetState", xam_input_not_connected); - state.register_export(Xam, 0x0198, "XamInputGetKeystrokeEx", xam_input_not_connected); + state.register_export(Xam, 0x0190, "XamInputGetCapabilities", xam_input_get_capabilities); + state.register_export(Xam, 0x0191, "XamInputGetState", xam_input_get_state); + state.register_export(Xam, 0x0192, "XamInputSetState", xam_input_set_state); + state.register_export(Xam, 0x0198, "XamInputGetKeystrokeEx", xam_input_get_keystroke); // Inactivity state.register_export(Xam, 0x01A0, "XamEnableInactivityProcessing", stub_success); @@ -94,39 +94,114 @@ pub fn register_exports(state: &mut KernelState) { // ===== Generic stubs ===== -fn stub_success(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn stub_success(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 0; } -fn stub_return_zero(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn stub_return_zero(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 0; } -fn stub_error_no_more_files(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn stub_error_no_more_files(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 0x12; // ERROR_NO_MORE_FILES } // ===== Input ===== -fn xam_input_not_connected(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { - ctx.gpr[3] = 0x48F; // ERROR_DEVICE_NOT_CONNECTED +/// Helper: pack a `GamepadState` into a 12-byte key used to detect input +/// changes. Cheap to compare across frames. +fn gamepad_key(state: &xenia_hid::GamepadState) -> u128 { + let mut bytes = [0u8; 16]; + bytes[0..2].copy_from_slice(&state.buttons.to_be_bytes()); + bytes[2] = state.left_trigger; + bytes[3] = state.right_trigger; + bytes[4..6].copy_from_slice(&state.left_stick_x.to_be_bytes()); + bytes[6..8].copy_from_slice(&state.left_stick_y.to_be_bytes()); + bytes[8..10].copy_from_slice(&state.right_stick_x.to_be_bytes()); + bytes[10..12].copy_from_slice(&state.right_stick_y.to_be_bytes()); + u128::from_be_bytes(bytes) +} + +fn xam_input_get_capabilities( + ctx: &mut PpcContext, + mem: &GuestMemory, + state: &mut KernelState, +) { + // r3 = user_index, r4 = flags, r5 = out X_INPUT_CAPABILITIES* + let user = ctx.gpr[3] as u32; + let out_ptr = ctx.gpr[5] as u32; + let connected = state.ui.as_ref().is_some_and(|ui| ui.is_connected(user)); + if !connected { + ctx.gpr[3] = xenia_hid::errors::DEVICE_NOT_CONNECTED as u64; + return; + } + xenia_hid::write_input_capabilities(mem, out_ptr); + ctx.gpr[3] = xenia_hid::errors::SUCCESS as u64; +} + +fn xam_input_get_state(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = user_index, r4 = flags, r5 = out X_INPUT_STATE* + let user = ctx.gpr[3] as u32; + let out_ptr = ctx.gpr[5] as u32; + let Some(ui) = state.ui.as_ref() else { + ctx.gpr[3] = xenia_hid::errors::DEVICE_NOT_CONNECTED as u64; + return; + }; + if !ui.is_connected(user) { + ctx.gpr[3] = xenia_hid::errors::DEVICE_NOT_CONNECTED as u64; + return; + } + let gamepad = ui.snapshot_gamepad(); + let key = gamepad_key(&gamepad); + if key != state.last_input_bytes { + state.input_packet_number = state.input_packet_number.wrapping_add(1); + state.last_input_bytes = key; + } + xenia_hid::write_input_state(mem, out_ptr, state.input_packet_number, &gamepad); + ctx.gpr[3] = xenia_hid::errors::SUCCESS as u64; +} + +fn xam_input_set_state(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { + // r3 = user_index, r4 = flags, r5 = X_INPUT_VIBRATION* + // Rumble is out of scope for Phase 1; we accept the call and return + // success so games don't retry in a tight loop, but we never actually + // shake anything. + let user = ctx.gpr[3] as u32; + let connected = state.ui.as_ref().is_some_and(|ui| ui.is_connected(user)); + if !connected { + ctx.gpr[3] = xenia_hid::errors::DEVICE_NOT_CONNECTED as u64; + return; + } + ctx.gpr[3] = xenia_hid::errors::SUCCESS as u64; +} + +fn xam_input_get_keystroke( + ctx: &mut PpcContext, + _mem: &GuestMemory, + _state: &mut KernelState, +) { + // No keyboard input in Phase 1 — always "queue empty". Games that only + // use the gamepad ignore this return code; those that drive text entry + // through the keystroke queue simply get a permanently empty queue, which + // manifests as no virtual-keyboard input — acceptable for minimal UI. + ctx.gpr[3] = xenia_hid::errors::EMPTY as u64; } // ===== Loader ===== -fn xam_loader_launch_title(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn xam_loader_launch_title(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { tracing::warn!("XamLoaderLaunchTitle called"); ctx.gpr[3] = 0; } -fn xam_loader_terminate_title(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn xam_loader_terminate_title(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { tracing::warn!("XamLoaderTerminateTitle called"); ctx.gpr[3] = 0; } // ===== Task ===== -fn xam_task_schedule(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut KernelState) { +fn xam_task_schedule(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { let handle = state.alloc_handle(); tracing::info!("XamTaskSchedule: handle={:#x}", handle); ctx.gpr[3] = 0; @@ -134,7 +209,7 @@ fn xam_task_schedule(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut K // ===== Alloc ===== -fn xam_alloc(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { +fn xam_alloc(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = flags, r4 = size, r5 = out_ptr_ptr let size = ctx.gpr[4] as u32; let out_ptr = ctx.gpr[5] as u32; @@ -154,7 +229,7 @@ fn xam_alloc(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelStat // ===== User ===== -fn xam_user_get_xuid(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn xam_user_get_xuid(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = user_index, r4 = xuid_ptr let xuid_ptr = ctx.gpr[4] as u32; if xuid_ptr != 0 { @@ -163,7 +238,7 @@ fn xam_user_get_xuid(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut K ctx.gpr[3] = 0; } -fn xam_user_get_name(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn xam_user_get_name(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = user_index, r4 = buffer, r5 = buffer_size let buffer = ctx.gpr[4] as u32; if buffer != 0 { @@ -172,14 +247,14 @@ fn xam_user_get_name(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut K ctx.gpr[3] = 0; } -fn xam_user_read_profile_settings(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn xam_user_read_profile_settings(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { // Return error — no profile ctx.gpr[3] = 0x0000_048B; // ERROR_NOT_FOUND } // ===== System ===== -fn xam_get_execution_id(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { +fn xam_get_execution_id(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = execution_id_ptr_ptr — write pointer to execution info let ptr_ptr = ctx.gpr[3] as u32; if ptr_ptr != 0 { @@ -197,25 +272,25 @@ fn xam_get_execution_id(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut ctx.gpr[3] = 0; } -fn xam_get_system_version(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn xam_get_system_version(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 0x2000_0000; // System version } // ===== Notify ===== -fn xam_notify_create_listener(ctx: &mut PpcContext, _mem: &mut GuestMemory, state: &mut KernelState) { +fn xam_notify_create_listener(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { let handle = state.alloc_handle(); ctx.gpr[3] = handle as u64; } -fn xnotify_get_next(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn xnotify_get_next(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { // r3 = handle, r4 = id_ptr, r5 = param_ptr ctx.gpr[3] = 0; // FALSE (no notifications) } // ===== Session ===== -fn xam_session_create_handle(ctx: &mut PpcContext, mem: &mut GuestMemory, state: &mut KernelState) { +fn xam_session_create_handle(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = handle_ptr let handle_ptr = ctx.gpr[3] as u32; let handle = state.alloc_handle(); @@ -227,19 +302,19 @@ fn xam_session_create_handle(ctx: &mut PpcContext, mem: &mut GuestMemory, state: // ===== Locale ===== -fn xget_avpack(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn xget_avpack(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 0x16; // HDMI } -fn xget_game_region(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn xget_game_region(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 0xFF; // All regions } -fn xget_language(ctx: &mut PpcContext, _mem: &mut GuestMemory, _state: &mut KernelState) { +fn xget_language(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { ctx.gpr[3] = 1; // English } -fn xget_video_mode(ctx: &mut PpcContext, mem: &mut GuestMemory, _state: &mut KernelState) { +fn xget_video_mode(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = video_mode_ptr let ptr = ctx.gpr[3] as u32; if ptr != 0 {