diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index a8cf88e..91b0ed4 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -124,6 +124,15 @@ enum Commands { /// `XENIA_TRACE_HANDLES=1`. #[arg(long)] trace_handles: bool, + /// Comma-separated list of handles to capture **signal attempts** + /// for, even when no `record_create` exists. Distinguishes "guest + /// never called Nt/KeSetEvent on this handle" (signal_attempts=0) + /// from "signal landed but waiter wasn't woken" (>0). Implies + /// `--trace-handles`. Examples: + /// `--trace-handles-focus=0x1004,0x100c,0x15e4,0x42450b5c` + /// `--trace-handles-focus=4100` (decimal; 0x1004) + #[arg(long)] + trace_handles_focus: Option, /// Force the threaded GPU backend (default at M1.9). Kept for /// explicit scripted runs; equivalent to omitting both /// `--gpu-inline` and `--ui`. Settable via `XENIA_GPU_THREAD=1`. @@ -301,6 +310,7 @@ fn main() -> Result<()> { ui, halt_on_deadlock, trace_handles, + trace_handles_focus, gpu_thread, gpu_inline, reservations_table, @@ -317,6 +327,7 @@ fn main() -> Result<()> { ui, halt_on_deadlock, trace_handles, + trace_handles_focus.as_deref(), gpu_thread, gpu_inline, reservations_table, @@ -514,6 +525,7 @@ fn cmd_exec( ui: bool, halt_on_deadlock: bool, trace_handles: bool, + trace_handles_focus: Option<&str>, gpu_thread: bool, gpu_inline: bool, reservations_table: bool, @@ -531,6 +543,7 @@ fn cmd_exec( ui, halt_on_deadlock, trace_handles, + trace_handles_focus, gpu_thread, gpu_inline, reservations_table, @@ -566,6 +579,7 @@ fn cmd_check( false, // ui — headless false, // halt_on_deadlock — keep the force-wake default for probe/check runs false, // trace_handles — diagnostic only, never wanted on golden runs + None, // trace_handles_focus — same gpu_thread, gpu_inline, reservations_table, @@ -588,6 +602,7 @@ fn cmd_exec_inner( ui: bool, halt_on_deadlock: bool, trace_handles: bool, + trace_handles_focus: Option<&str>, gpu_thread: bool, gpu_inline: bool, reservations_table: bool, @@ -782,12 +797,50 @@ fn cmd_exec_inner( let audit_via_env = std::env::var("XENIA_TRACE_HANDLES") .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "True")) .unwrap_or(false); - if trace_handles || audit_via_env { + let trace_handles_active = trace_handles || audit_via_env || trace_handles_focus.is_some(); + if trace_handles_active { kernel.audit.enabled = true; if !quiet { tracing::info!("handle audit enabled (--trace-handles)"); } } + // KRNBUG-AUDIT-001: parse `--trace-handles-focus=0x1004,0x100c,...` + // into the audit focus set. When non-empty, signal attempts on these + // handles are captured into ghost trails even if no `record_create` + // exists for them — the data-driven hypothesis fail-fast for + // parked-waiter producer-trace. + if let Some(list) = trace_handles_focus { + for token in list.split(',').map(str::trim).filter(|s| !s.is_empty()) { + let parsed = if let Some(hex) = token.strip_prefix("0x").or_else(|| token.strip_prefix("0X")) { + u32::from_str_radix(hex, 16) + } else { + token.parse::() + }; + match parsed { + Ok(h) => { + kernel.audit.focus.insert(h); + } + Err(_) => { + return Err(anyhow::anyhow!( + "invalid handle in --trace-handles-focus: {token:?}" + )); + } + } + } + if !quiet && !kernel.audit.focus.is_empty() { + let focused: Vec = kernel + .audit + .focus + .iter() + .map(|h| format!("{h:#010x}")) + .collect(); + tracing::info!( + "handle audit focus: {} ({})", + kernel.audit.focus.len(), + focused.join(", ") + ); + } + } // Install the GPU register aperture MMIO region on the guest memory so // any `0x7FC8xxxx` access routes to our atomic mailbox. Matches canary's @@ -2936,6 +2989,153 @@ fn dump_thread_diagnostic(kernel: &xenia_kernel::KernelState, quiet: bool) { } } } + + // KRNBUG-AUDIT-001: per-handle FOCUS report. For each handle in + // `--trace-handles-focus`, emit a unified diagnosis: how many + // signal attempts (primary + ghost) we saw, classified by + // producer-class (GuestExport vs KernelInternal vs + // InterruptCallback), so we can deterministically distinguish: + // - "guest never tried" → producer is a missing kernel signal source + // - "signal landed but waiter wasn't woken" → wake-eligibility bug + if !kernel.audit.focus.is_empty() { + println!("\n=== Handle audit (focus) ==="); + let mut focus: Vec = kernel.audit.focus.iter().copied().collect(); + focus.sort(); + for h in focus { + let waiter_count = kernel + .objects + .get(&h) + .and_then(|o| match o { + KernelObject::Event { waiters, .. } + | KernelObject::Semaphore { waiters, .. } + | KernelObject::Thread { waiters, .. } + | KernelObject::Timer { waiters, .. } + | KernelObject::Mutex { waiters, .. } => Some(waiters.len()), + KernelObject::File { .. } => None, + }) + .unwrap_or(0); + let signaled = kernel + .objects + .get(&h) + .and_then(|o| match o { + KernelObject::Event { signaled, .. } => Some(*signaled), + _ => None, + }); + let primary = kernel.audit.trails.get(&h); + let ghost = kernel.audit.ghost_trails.get(&h); + let kind_str = primary.map(|t| t.kind).unwrap_or(""); + let primary_signals = primary.map(|t| t.signals.len()).unwrap_or(0); + let ghost_signals = ghost.map(|g| g.signals.len()).unwrap_or(0); + let total_signals = primary_signals + ghost_signals; + let primary_waits = primary.map(|t| t.waits.len()).unwrap_or(0); + let primary_wakes = primary.map(|t| t.wakes.len()).unwrap_or(0); + let audit_blind = + waiter_count > 0 && primary_waits == 0 && primary_wakes == 0; + println!( + " handle={:#010x} kind={} waiters={} signaled={} signal_attempts={} (primary={}, ghost={}) waits={} wakes={}{}", + h, + kind_str, + waiter_count, + signaled + .map(|s| if s { "true" } else { "false" }) + .unwrap_or("?"), + total_signals, + primary_signals, + ghost_signals, + primary_waits, + primary_wakes, + if audit_blind { " " } else { "" }, + ); + if let Some(t) = primary { + println!( + " created cycle={} tid={} lr={:#010x} src={}", + t.created.cycle, t.created.tid, t.created.lr, t.created.source, + ); + } + // Producer-class classification. Source strings are stable + // labels passed to `record_signal` at the export sites. + // Anything not in the GuestExport set is treated as + // KernelInternal (e.g. signal_io_completion_event). + let guest_export_sources = [ + "KeSetEvent", + "NtSetEvent", + "KePulseEvent", + "NtPulseEvent", + "KeReleaseSemaphore", + "NtReleaseSemaphore", + "NtSignalAndWaitForSingleObjectEx", + ]; + let mut guest_export = 0usize; + let mut kernel_internal = 0usize; + let count_signals = |signals: &std::collections::VecDeque< + xenia_kernel::audit::HandleAuditEntry, + >, + guest_export: &mut usize, + kernel_internal: &mut usize| { + for e in signals { + if guest_export_sources.contains(&e.source) { + *guest_export += 1; + } else { + *kernel_internal += 1; + } + } + }; + if let Some(p) = primary { + count_signals(&p.signals, &mut guest_export, &mut kernel_internal); + } + if let Some(g) = ghost { + count_signals(&g.signals, &mut guest_export, &mut kernel_internal); + } + + // Compact merged timeline (cycle-sorted) of waits + signals. + let mut tl: Vec<(u64, &str, &xenia_kernel::audit::HandleAuditEntry)> = Vec::new(); + if let Some(p) = primary { + for e in &p.waits { + tl.push((e.cycle, "wait", e)); + } + for e in &p.signals { + tl.push((e.cycle, "signal", e)); + } + } + if let Some(g) = ghost { + for e in &g.signals { + tl.push((e.cycle, "signal[ghost]", e)); + } + } + tl.sort_by_key(|(c, _, _)| *c); + if !tl.is_empty() { + println!(" timeline (last 16 of {}):", tl.len()); + let start = tl.len().saturating_sub(16); + for (cycle, label, e) in &tl[start..] { + println!( + " cycle={} tid={} lr={:#010x} src={}[{}] aux={:#x}", + cycle, e.tid, e.lr, e.source, label, e.aux, + ); + } + } + + // -- DIAGNOSIS -- + println!(" -- DIAGNOSIS --"); + println!( + " GuestExport={} KernelInternal={} waits={}", + guest_export, kernel_internal, primary_waits, + ); + let conclusion = if total_signals == 0 && audit_blind { + "waiter parked via non-audited path (CS / spinlock / DPC / sync-primitive helper) and no signals — instrument that wait path or check ObReferenceObjectByHandle plumbing" + } else if total_signals == 0 && primary_waits > 0 { + "producer is a missing kernel signal source (or BST-paradox upstream)" + } else if total_signals == 0 && primary_waits == 0 { + "no waits and no signals — handle may not be reached during this run" + } else if guest_export > 0 && waiter_count > 0 { + "signals landed but waiter still parked — wake-eligibility / mr-bit bug" + } else if kernel_internal > 0 && waiter_count > 0 { + "kernel-internal signal landed but waiter still parked — wake or routing bug" + } else { + "not stuck — signals consumed correctly" + }; + println!(" => {}", conclusion); + } + } } } diff --git a/crates/xenia-kernel/src/audit.rs b/crates/xenia-kernel/src/audit.rs index 204dbe8..db6594b 100644 --- a/crates/xenia-kernel/src/audit.rs +++ b/crates/xenia-kernel/src/audit.rs @@ -16,7 +16,7 @@ //! to identify which kernel API should signal handles //! `0x10FC / 0x1014 / 0x1104 / 0x10DC / 0x10F0` but doesn't. -use std::collections::{HashMap, VecDeque}; +use std::collections::{HashMap, HashSet, VecDeque}; /// Maximum events per category per handle. Bounded so a long-running session /// doesn't OOM if a handle is signaled millions of times. @@ -72,10 +72,34 @@ impl HandleAuditTrail { } /// The audit table itself. Lives on `KernelState`; opt-in via `enabled`. +/// +/// `focus` + `ghost_trails` form the **parked-waiter diagnostic** added for +/// audit-2026-05-fix Phase 2 (KRNBUG-AUDIT-001). When `focus` is non-empty, +/// `record_signal_attempt` keeps a "ghost trail" for handles in the focus +/// set even if no `record_create` ever observed them — i.e. the guest hand- +/// initialized a `KEVENT` (via `KeInitializeEvent` or a raw write) and the +/// existing `record_signal` would silently drop the attempt. Ghost trails +/// are the only way to distinguish "guest never called Nt/KeSetEvent on +/// this handle" from "signal landed but waiter wasn't woken". #[derive(Debug, Default)] pub struct HandleAudit { pub trails: HashMap, pub enabled: bool, + /// Focus set: when non-empty, signals targeting handles in this set are + /// captured even when no `record_create` exists. Populated from + /// `--trace-handles=0x1004,0x100c,...`. Empty = whole-table audit. + pub focus: HashSet, + /// Ghost trails for never-created handles whose signals we still want + /// to see. Keyed by handle. Only populated for handles in `focus`. + pub ghost_trails: HashMap, +} + +/// A ghost trail is a signal-only timeline for a handle that was never +/// `record_create`d. We don't have a `kind` because we never saw a creation; +/// callers rendering the report should label these as ``. +#[derive(Debug, Default)] +pub struct GhostTrail { + pub signals: VecDeque, } impl HandleAudit { @@ -104,7 +128,30 @@ impl HandleAudit { } if let Some(trail) = self.trails.get_mut(&handle) { Self::push_bounded(&mut trail.signals, entry); + return; } + // No primary trail. Fall through to ghost-trail logic so signals + // targeting focus-set handles are not silently dropped. + self.record_signal_attempt_ghost(handle, entry); + } + + /// Record a signal attempt that targeted a focus-set handle but had no + /// primary trail (i.e. the handle was never `record_create`d via one + /// of our audit hook sites). Inserts into `ghost_trails`. Bounded by + /// `AUDIT_RING_CAPACITY` per handle. No-op when `enabled = false` or + /// `handle` is not in `focus`. + /// + /// Public for direct invocation from internal kernel signal sites that + /// don't go through `record_signal` (e.g. `signal_io_completion_event`, + /// IRQ-callback paths) — those callers should both `record_signal` + /// (for the primary-trail case) AND fall through here. + #[inline] + pub fn record_signal_attempt_ghost(&mut self, handle: u32, entry: HandleAuditEntry) { + if !self.enabled || !self.focus.contains(&handle) { + return; + } + let ghost = self.ghost_trails.entry(handle).or_default(); + Self::push_bounded(&mut ghost.signals, entry); } #[inline] @@ -167,9 +214,38 @@ mod tests { #[test] fn signal_for_unknown_handle_is_dropped() { let mut a = HandleAudit { enabled: true, ..HandleAudit::default() }; - // No `record_create` first → handle has no trail. + // No `record_create` first → handle has no trail. Without focus, + // the signal is silently dropped (legacy behavior). a.record_signal(0x9999, entry(1, "NtSetEvent")); assert!(a.trails.is_empty()); + assert!(a.ghost_trails.is_empty()); + } + + #[test] + fn signal_for_focus_handle_lands_in_ghost_trail() { + let mut a = HandleAudit { enabled: true, ..HandleAudit::default() }; + a.focus.insert(0x1004); + // No `record_create` for 0x1004 — but it's in the focus set. + a.record_signal(0x1004, entry(1, "NtSetEvent")); + a.record_signal(0x1004, entry(2, "KeSetEvent")); + // 0x9999 NOT in focus → still dropped. + a.record_signal(0x9999, entry(3, "NtSetEvent")); + + assert!(a.trails.is_empty()); + let ghost = a.ghost_trails.get(&0x1004).expect("ghost trail expected"); + assert_eq!(ghost.signals.len(), 2); + assert!(!a.ghost_trails.contains_key(&0x9999)); + } + + #[test] + fn ghost_trail_does_not_double_record_when_primary_exists() { + let mut a = HandleAudit { enabled: true, ..HandleAudit::default() }; + a.focus.insert(0x1004); + a.record_create(0x1004, "Event/Manual", entry(0, "NtCreateEvent")); + a.record_signal(0x1004, entry(1, "NtSetEvent")); + + assert_eq!(a.trails[&0x1004].signals.len(), 1); + assert!(a.ghost_trails.is_empty()); } #[test]