diff --git a/Cargo.lock b/Cargo.lock index cbd6c5e..4c7f656 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -463,6 +463,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "block2" version = "0.5.1" @@ -951,6 +960,16 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dispatch" version = "0.2.0" @@ -3439,6 +3458,28 @@ dependencies = [ "serde", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -5022,7 +5063,11 @@ name = "xenia-kernel" version = "0.1.0" dependencies = [ "anyhow", + "libc", "metrics", + "serde_json", + "sha1", + "sha2", "thiserror 2.0.18", "tracing", "xenia-cpu", diff --git a/Cargo.toml b/Cargo.toml index d00a940..ef2e9ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,8 @@ thiserror = "2" anyhow = "1" serde = { version = "1", features = ["derive"] } serde_json = "1" +sha1 = "0.10" +sha2 = "0.10" aes = "0.8" duckdb = { version = "1", features = ["bundled"] } diff --git a/crates/xenia-analysis/src/db.rs b/crates/xenia-analysis/src/db.rs index 155c75a..3f64d85 100644 --- a/crates/xenia-analysis/src/db.rs +++ b/crates/xenia-analysis/src/db.rs @@ -897,17 +897,12 @@ fn insert_functions( func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { - let mut stmt = conn.prepare( - "INSERT INTO functions - (address, name, end_address, frame_size, saved_gprs, is_leaf, is_saverestore, - pdata_validated, pdata_length, has_eh) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" - )?; + let mut app = conn.appender("functions")?; for (&addr, fi) in &func_analysis.functions { let name = labels.get(&addr) .cloned() .unwrap_or_else(|| format!("sub_{addr:08X}")); - stmt.execute(params![ + app.append_row(params![ addr as i64, name, fi.end as i64, @@ -920,6 +915,7 @@ fn insert_functions( fi.has_eh, ])?; } + app.flush()?; Ok(()) } @@ -930,15 +926,13 @@ fn insert_vtables( _image_base: u32, ) -> anyhow::Result<()> { if vtables.is_empty() { return Ok(()); } - let mut stmt = conn.prepare( - "INSERT INTO vtables - (address, length, col_address, class_name, rtti_present, base_classes_json) - VALUES (?, ?, ?, ?, ?, ?) - ON CONFLICT DO NOTHING" - )?; - let mut count = 0u64; + let mut dedup: HashMap = HashMap::new(); for v in vtables { - stmt.execute(params![ + dedup.entry(v.address).or_insert(v); + } + let mut app = conn.appender("vtables")?; + for v in dedup.values() { + app.append_row(params![ v.address as i64, v.length as i64, v.col_address.map(|a| a as i64), @@ -946,8 +940,9 @@ fn insert_vtables( v.rtti_present, v.base_classes_json.as_deref(), ])?; - count += 1; } + app.flush()?; + let count = dedup.len() as u64; metrics::counter!("db.rows", "table" => "vtables").increment(count); tracing::info!(rows = count, table = "vtables", "bulk insert complete"); Ok(()) @@ -960,17 +955,17 @@ fn insert_methods_and_classes( ) -> anyhow::Result<()> { if vtables.is_empty() { return Ok(()); } - // methods rows + // methods rows — dedup by (vtable_address, slot), first-write-wins. let methods = crate::vtables::methods_table(vtables, labels); if !methods.is_empty() { - let mut stmt = conn.prepare( - "INSERT INTO methods - (vtable_address, slot, function_address, mangled_name, demangled_name) - VALUES (?, ?, ?, ?, ?) - ON CONFLICT DO NOTHING" - )?; - for (vt_addr, slot, fn_addr, mangled, demangled) in &methods { - stmt.execute(params![ + let mut idx: HashMap<(u32, u32), usize> = HashMap::new(); + for (i, m) in methods.iter().enumerate() { + idx.entry((m.0, m.1)).or_insert(i); + } + let mut app = conn.appender("methods")?; + for &i in idx.values() { + let (vt_addr, slot, fn_addr, mangled, demangled) = &methods[i]; + app.append_row(params![ *vt_addr as i64, *slot as i64, *fn_addr as i64, @@ -978,29 +973,33 @@ fn insert_methods_and_classes( demangled.as_deref(), ])?; } - metrics::counter!("db.rows", "table" => "methods").increment(methods.len() as u64); - tracing::info!(rows = methods.len(), table = "methods", "bulk insert complete"); + app.flush()?; + let n = idx.len() as u64; + metrics::counter!("db.rows", "table" => "methods").increment(n); + tracing::info!(rows = n, table = "methods", "bulk insert complete"); } - // classes rows (deduped by class_name, first-detected wins) + // classes rows — dedup by class_name, first-detected wins. let classes = crate::vtables::classes_table(vtables); if !classes.is_empty() { - let mut stmt = conn.prepare( - "INSERT INTO classes - (name, vtable_address, rtti_present, base_classes_json) - VALUES (?, ?, ?, ?) - ON CONFLICT DO NOTHING" - )?; - for (name, vt_addr, rtti, bases) in &classes { - stmt.execute(params![ + let mut idx: HashMap<&str, usize> = HashMap::new(); + for (i, c) in classes.iter().enumerate() { + idx.entry(c.0.as_str()).or_insert(i); + } + let mut app = conn.appender("classes")?; + for &i in idx.values() { + let (name, vt_addr, rtti, bases) = &classes[i]; + app.append_row(params![ name.as_str(), *vt_addr as i64, *rtti, bases.as_deref(), ])?; } - metrics::counter!("db.rows", "table" => "classes").increment(classes.len() as u64); - tracing::info!(rows = classes.len(), table = "classes", "bulk insert complete"); + app.flush()?; + let n = idx.len() as u64; + metrics::counter!("db.rows", "table" => "classes").increment(n); + tracing::info!(rows = n, table = "classes", "bulk insert complete"); } Ok(()) @@ -1011,20 +1010,21 @@ fn insert_strings( strings: &[crate::strings::DetectedString], ) -> anyhow::Result<()> { if strings.is_empty() { return Ok(()); } - let mut stmt = conn.prepare( - "INSERT INTO strings (address, encoding, length, content) VALUES (?, ?, ?, ?) - ON CONFLICT DO NOTHING" - )?; - let mut count = 0u64; + let mut dedup: HashMap = HashMap::new(); for s in strings { - stmt.execute(params![ + dedup.entry(s.address).or_insert(s); + } + let mut app = conn.appender("strings")?; + for s in dedup.values() { + app.append_row(params![ s.address as i64, s.encoding, s.length as i64, s.content.as_str(), ])?; - count += 1; } + app.flush()?; + let count = dedup.len() as u64; metrics::counter!("db.rows", "table" => "strings").increment(count); tracing::info!(rows = count, table = "strings", "bulk insert complete"); Ok(()) @@ -1035,31 +1035,17 @@ fn insert_eh_records( records: &[crate::eh_scope::EhFuncInfo], ) -> anyhow::Result<()> { if records.is_empty() { return Ok(()); } - let mut stmt_fi = conn.prepare( - "INSERT INTO eh_funcinfo - (address, magic, max_state, p_unwind_map, n_try_blocks, - p_try_block_map, n_ip_map_entries, p_ip_to_state_map, - p_es_type_list, eh_flags) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - ON CONFLICT DO NOTHING" - )?; - let mut stmt_unwind = conn.prepare( - "INSERT INTO eh_unwind_map - (funcinfo_address, state_index, to_state, action_pc) - VALUES (?, ?, ?, ?) ON CONFLICT DO NOTHING" - )?; - let mut stmt_try = conn.prepare( - "INSERT INTO eh_try_blocks - (funcinfo_address, try_index, try_low, try_high, catch_high, - n_catches, p_handler_array) - VALUES (?, ?, ?, ?, ?, ?, ?) - ON CONFLICT DO NOTHING" - )?; - let mut n_fi = 0u64; - let mut n_unwind = 0u64; - let mut n_try = 0u64; - for r in records { - stmt_fi.execute(params![ + // Dedup eh_funcinfo by PK (address), first-write-wins. + // Within a deduped record, unwind/try entries are uniquely indexed by enumerate. + let mut fi_idx: HashMap = HashMap::new(); + for (i, r) in records.iter().enumerate() { + fi_idx.entry(r.address).or_insert(i); + } + + let mut app_fi = conn.appender("eh_funcinfo")?; + for &i in fi_idx.values() { + let r = &records[i]; + app_fi.append_row(params![ r.address as i64, r.magic as i64, r.max_state as i64, r.p_unwind_map as i64, r.n_try_blocks as i64, r.p_try_block_map as i64, r.n_ip_map_entries as i64, @@ -1067,22 +1053,38 @@ fn insert_eh_records( r.p_es_type_list.map(|p| p as i64), r.eh_flags.map(|f| f as i64), ])?; - n_fi += 1; - for (i, e) in r.unwind_map.iter().enumerate() { - stmt_unwind.execute(params![ - r.address as i64, i as i64, e.to_state as i64, e.action_pc as i64, + } + app_fi.flush()?; + let n_fi = fi_idx.len() as u64; + + let mut app_unwind = conn.appender("eh_unwind_map")?; + let mut n_unwind = 0u64; + for &i in fi_idx.values() { + let r = &records[i]; + for (j, e) in r.unwind_map.iter().enumerate() { + app_unwind.append_row(params![ + r.address as i64, j as i64, e.to_state as i64, e.action_pc as i64, ])?; n_unwind += 1; } - for (i, t) in r.try_blocks.iter().enumerate() { - stmt_try.execute(params![ - r.address as i64, i as i64, + } + app_unwind.flush()?; + + let mut app_try = conn.appender("eh_try_blocks")?; + let mut n_try = 0u64; + for &i in fi_idx.values() { + let r = &records[i]; + for (j, t) in r.try_blocks.iter().enumerate() { + app_try.append_row(params![ + r.address as i64, j as i64, t.try_low as i64, t.try_high as i64, t.catch_high as i64, t.n_catches as i64, t.p_handler_array as i64, ])?; n_try += 1; } } + app_try.flush()?; + metrics::counter!("db.rows", "table" => "eh_funcinfo").increment(n_fi); metrics::counter!("db.rows", "table" => "eh_unwind_map").increment(n_unwind); metrics::counter!("db.rows", "table" => "eh_try_blocks").increment(n_try); @@ -1097,54 +1099,55 @@ fn insert_typed_ind_dispatch( conn: &Connection, t: &crate::ind_dispatch_typed::TypedIndirectResult, ) -> anyhow::Result<()> { + // Dedupe by PK before appending: the Appender writes directly to columnar + // storage and does not enforce primary keys, so duplicates here would + // surface as PK violations at flush. First-write-wins matches the previous + // `ON CONFLICT DO NOTHING` behaviour. if !t.dispatches.is_empty() { - let mut stmt_site = conn.prepare( - "INSERT INTO indirect_dispatch_sites - (dispatch_pc, vptr_offset, slot, candidate_count) - VALUES (?, ?, ?, ?) ON CONFLICT DO NOTHING" - )?; - let mut stmt_cand = conn.prepare( - "INSERT INTO indirect_dispatch_candidates - (dispatch_pc, vtable_address, method_address) - VALUES (?, ?, ?) ON CONFLICT DO NOTHING" - )?; - let mut n_sites = 0u64; - let mut n_cand = 0u64; + let mut sites: HashMap = HashMap::new(); + let mut cands: HashMap<(u32, u32), u32> = HashMap::new(); for d in &t.dispatches { - stmt_site.execute(params![ - d.dispatch_pc as i64, - d.vptr_offset as i64, - d.slot as i64, - d.candidate_vtables.len() as i64, - ])?; - n_sites += 1; + sites.entry(d.dispatch_pc) + .or_insert((d.vptr_offset, d.slot, d.candidate_vtables.len())); for (vt, m) in d.candidate_vtables.iter().zip(d.method_pcs.iter()) { - stmt_cand.execute(params![ - d.dispatch_pc as i64, *vt as i64, *m as i64, - ])?; - n_cand += 1; + cands.entry((d.dispatch_pc, *vt)).or_insert(*m); } } + + let mut app_sites = conn.appender("indirect_dispatch_sites")?; + for (pc, (off, slot, count)) in &sites { + app_sites.append_row(params![ + *pc as i64, *off as i64, *slot as i64, *count as i64, + ])?; + } + app_sites.flush()?; + + let mut app_cand = conn.appender("indirect_dispatch_candidates")?; + for ((pc, vt), m) in &cands { + app_cand.append_row(params![*pc as i64, *vt as i64, *m as i64])?; + } + app_cand.flush()?; + + let n_sites = sites.len() as u64; + let n_cand = cands.len() as u64; metrics::counter!("db.rows", "table" => "indirect_dispatch_sites").increment(n_sites); metrics::counter!("db.rows", "table" => "indirect_dispatch_candidates").increment(n_cand); tracing::info!(sites = n_sites, candidates = n_cand, "typed indirect-dispatch insert complete"); } if !t.vptr_writes.is_empty() { - let mut stmt = conn.prepare( - "INSERT INTO vptr_writes - (writer_pc, vtable_address, vptr_offset, writer_function) - VALUES (?, ?, ?, ?) ON CONFLICT DO NOTHING" - )?; - let mut n = 0u64; + let mut writes: HashMap<(u32, u32, u32), u32> = HashMap::new(); for w in &t.vptr_writes { - stmt.execute(params![ - w.writer_pc as i64, - w.vtable_addr as i64, - w.vptr_offset as i64, - w.writer_function as i64, - ])?; - n += 1; + writes.entry((w.writer_pc, w.vtable_addr, w.vptr_offset)) + .or_insert(w.writer_function); } + let mut app = conn.appender("vptr_writes")?; + for ((wpc, vt, off), wf) in &writes { + app.append_row(params![ + *wpc as i64, *vt as i64, *off as i64, *wf as i64, + ])?; + } + app.flush()?; + let n = writes.len() as u64; metrics::counter!("db.rows", "table" => "vptr_writes").increment(n); tracing::info!(rows = n, "vptr_writes insert complete"); } @@ -1156,26 +1159,31 @@ fn insert_funcptr_arrays( arrays: &[crate::funcptr_arrays::FuncPtrArray], ) -> anyhow::Result<()> { if arrays.is_empty() { return Ok(()); } - let mut stmt_arr = conn.prepare( - "INSERT INTO function_pointer_arrays (address, length, kind) VALUES (?, ?, ?) - ON CONFLICT DO NOTHING" - )?; - let mut stmt_ent = conn.prepare( - "INSERT INTO function_pointer_array_entries (array_address, slot, function_address) - VALUES (?, ?, ?) ON CONFLICT DO NOTHING" - )?; - let mut n_arr = 0u64; + // Dedup arrays by PK (address), first-write-wins. + let mut idx: HashMap = HashMap::new(); + for (i, a) in arrays.iter().enumerate() { + idx.entry(a.address).or_insert(i); + } + + let mut app_arr = conn.appender("function_pointer_arrays")?; + for &i in idx.values() { + let a = &arrays[i]; + app_arr.append_row(params![a.address as i64, a.length as i64, a.kind])?; + } + app_arr.flush()?; + let n_arr = idx.len() as u64; + + let mut app_ent = conn.appender("function_pointer_array_entries")?; let mut n_ent = 0u64; - for a in arrays { - let inserted = stmt_arr.execute(params![ - a.address as i64, a.length as i64, a.kind, - ])?; - if inserted > 0 { n_arr += 1; } - for (i, &fn_va) in a.entries.iter().enumerate() { - stmt_ent.execute(params![a.address as i64, i as i64, fn_va as i64])?; + for &i in idx.values() { + let a = &arrays[i]; + for (slot, &fn_va) in a.entries.iter().enumerate() { + app_ent.append_row(params![a.address as i64, slot as i64, fn_va as i64])?; n_ent += 1; } } + app_ent.flush()?; + metrics::counter!("db.rows", "table" => "function_pointer_arrays").increment(n_arr); metrics::counter!("db.rows", "table" => "function_pointer_array_entries").increment(n_ent); tracing::info!(arrays = n_arr, entries = n_ent, "function-pointer arrays insert complete"); @@ -1187,13 +1195,8 @@ fn insert_demangled_from_labels( labels: &HashMap, import_libraries: &[xenia_xex::header::ImportLibrary], ) -> anyhow::Result<()> { - let mut stmt = conn.prepare( - "INSERT INTO demangled_names - (address, mangled, raw_demangled, namespace_path, class_name, - method_name, params_signature) - VALUES (?, ?, ?, ?, ?, ?, ?)" - )?; - + // demangled_names has no PK — straight append, no dedup needed. + let mut app = conn.appender("demangled_names")?; let mut count = 0u64; for (&addr, name) in labels { @@ -1206,7 +1209,7 @@ fn insert_demangled_from_labels( continue; } if let Some(d) = crate::demangle::demangle(name) { - stmt.execute(params![ + app.append_row(params![ addr as i64, d.mangled, d.raw_demangled, @@ -1226,7 +1229,7 @@ fn insert_demangled_from_labels( if let Some(name) = resolved && let Some(d) = crate::demangle::demangle(name) { - stmt.execute(params![ + app.append_row(params![ imp.address as i64, d.mangled, d.raw_demangled, @@ -1240,6 +1243,7 @@ fn insert_demangled_from_labels( } } + app.flush()?; metrics::counter!("db.rows", "table" => "demangled_names").increment(count); tracing::info!(rows = count, table = "demangled_names", "demangler complete"); Ok(()) @@ -1252,14 +1256,15 @@ fn insert_pdata_entries( if entries.is_empty() { return Ok(()); } - let mut stmt = conn.prepare( - "INSERT INTO pdata_entries - (begin_address, end_address, function_length, prolog_length, flags) - VALUES (?, ?, ?, ?, ?) - ON CONFLICT DO NOTHING" - )?; - for e in entries { - stmt.execute(params![ + // Dedup by PK (begin_address), first-write-wins. + let mut idx: HashMap = HashMap::new(); + for (i, e) in entries.iter().enumerate() { + idx.entry(e.begin_address).or_insert(i); + } + let mut app = conn.appender("pdata_entries")?; + for &i in idx.values() { + let e = &entries[i]; + app.append_row(params![ e.begin_address as i64, e.end_address() as i64, e.function_length as i64, @@ -1267,6 +1272,7 @@ fn insert_pdata_entries( e.flags as i64, ])?; } + app.flush()?; Ok(()) } @@ -1274,9 +1280,8 @@ fn insert_labels( conn: &Connection, labels: &HashMap, ) -> anyhow::Result<()> { - let mut stmt = conn.prepare( - "INSERT INTO labels (address, name, kind) VALUES (?, ?, ?) ON CONFLICT DO NOTHING" - )?; + // Source is a HashMap so addresses are unique by construction — no dedup needed. + let mut app = conn.appender("labels")?; for (&addr, name) in labels { let kind = if name.starts_with("sub_") || name == "entry_point" { "function" @@ -1291,8 +1296,9 @@ fn insert_labels( } else { "other" }; - stmt.execute(params![addr as i64, name, kind])?; + app.append_row(params![addr as i64, name, kind])?; } + app.flush()?; Ok(()) } diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 0418ec9..3a20f47 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -242,6 +242,41 @@ enum Commands { /// line). Stdout when omitted. #[arg(long)] lr_trace_out: Option, + /// Phase A diff-harness — write schema-v1 JSONL events (kernel + /// calls, thread create/exit, handle create/destroy, waits) to + /// this path. Empty (default) = disabled, zero overhead. + /// Schema: `xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md`. + /// Settable via `XENIA_PHASE_A_EVENT_LOG`. + #[arg(long, value_name = "PATH")] + phase_a_event_log: Option, + /// Phase B initial-state snapshot — write structured state + /// snapshot to `/ours/` at the moment immediately before + /// the first guest PPC instruction of the XEX entry_point. + /// Empty (default) = disabled, zero overhead. Settable via + /// `XENIA_PHASE_B_SNAPSHOT_DIR`. See + /// `xenia-rs/audit-runs/phase-b-state-equivalence/`. + #[arg(long, value_name = "DIR")] + phase_b_snapshot_dir: Option, + /// Phase B: after writing the snapshot, exit the process + /// immediately (`_exit(0)`) so re-runs are byte-deterministic. + /// Settable via `XENIA_PHASE_B_SNAPSHOT_AND_EXIT`. + #[arg(long)] + phase_b_snapshot_and_exit: bool, + /// Phase B: in memory.json, populate `section_contents` with raw + /// bytes for every committed region. Default false. Settable via + /// `XENIA_PHASE_B_DUMP_SECTION_CONTENT`. + #[arg(long)] + phase_b_dump_section_content: bool, + /// review-a Step 1 diagnostic crowbar — when set, synthesises + /// the 4 `sub_825070F0` worker spawns once instruction_count + /// crosses the configured threshold (default 20M instr, + /// override via `XENIA_CROWBAR_TRIGGER_INSTR=N`). **NOT a + /// fix**: bypasses the natural-activation gate to test + /// whether the workers function correctly IF activated. Off + /// by default; settable via `XENIA_CROWBAR_WORKERS=1`. See + /// `xenia-rs/audit-runs/review-a-step1-crowbar/`. + #[arg(long)] + force_spawn_workers: bool, }, /// Browse XISO disc image contents Browse { @@ -405,7 +440,45 @@ fn main() -> Result<()> { probe_db, lr_trace, lr_trace_out, - } => cmd_exec( + phase_a_event_log, + phase_b_snapshot_dir, + phase_b_snapshot_and_exit, + phase_b_dump_section_content, + force_spawn_workers, + } => { + // review-a Step 1: CLI flag → env var so cmd_exec_inner's + // existing env-var-driven cvar wire-up picks it up. Avoids + // threading two more params through the (already long) + // cmd_exec / cmd_exec_inner signatures. + if force_spawn_workers { + // SAFETY: pre-thread-spawn process startup; no races. + unsafe { std::env::set_var("XENIA_CROWBAR_WORKERS", "1"); } + } + // Resolve the Phase A event-log path: CLI flag wins over env var. + // Empty/unset → emitter stays disabled (zero overhead). + let phase_a_path: Option = phase_a_event_log + .or_else(|| std::env::var("XENIA_PHASE_A_EVENT_LOG").ok().map(PathBuf::from)); + xenia_kernel::event_log::init(phase_a_path.as_deref()); + // Resolve Phase B flags: CLI > env var. Empty/unset = disabled. + fn truthy(s: &str) -> bool { + let s = s.trim().to_ascii_lowercase(); + s == "1" || s == "true" || s == "yes" + } + let phase_b_dir: Option = phase_b_snapshot_dir + .or_else(|| std::env::var("XENIA_PHASE_B_SNAPSHOT_DIR").ok().map(PathBuf::from)); + let phase_b_exit = phase_b_snapshot_and_exit + || std::env::var("XENIA_PHASE_B_SNAPSHOT_AND_EXIT") + .ok() + .as_deref() + .map(truthy) + .unwrap_or(false); + let phase_b_dump = phase_b_dump_section_content + || std::env::var("XENIA_PHASE_B_DUMP_SECTION_CONTENT") + .ok() + .as_deref() + .map(truthy) + .unwrap_or(false); + cmd_exec( &path, max_instructions, ips_limit, @@ -431,7 +504,11 @@ fn main() -> Result<()> { probe_db.as_deref(), lr_trace.as_deref(), lr_trace_out.as_deref(), - ), + phase_b_dir, + phase_b_exit, + phase_b_dump, + ) + } Commands::Browse { path } => cmd_browse(&path), Commands::Info { path } => cmd_info(&path), Commands::Extract { path, output, db } => cmd_extract(&path, output.as_deref(), db.as_deref()), @@ -662,6 +739,9 @@ fn cmd_exec( probe_db: Option<&str>, lr_trace: Option<&str>, lr_trace_out: Option<&str>, + phase_b_snapshot_dir: Option, + phase_b_snapshot_and_exit: bool, + phase_b_dump_section_content: bool, ) -> Result<()> { cmd_exec_inner( path, @@ -692,6 +772,9 @@ fn cmd_exec( None, None, false, + phase_b_snapshot_dir, + phase_b_snapshot_and_exit, + phase_b_dump_section_content, ) } @@ -738,6 +821,9 @@ fn cmd_check( out, expect, stable_digest, + None, // phase_b_snapshot_dir — never wanted on check goldens + false, // phase_b_snapshot_and_exit + false, // phase_b_dump_section_content ) } @@ -770,6 +856,9 @@ fn cmd_exec_inner( digest_out: Option<&str>, digest_expect: Option<&str>, stable_digest: bool, + phase_b_snapshot_dir: Option, + phase_b_snapshot_and_exit: bool, + phase_b_dump_section_content: bool, ) -> Result<()> { let started = Instant::now(); let data = load_xex_data(path)?; @@ -840,22 +929,121 @@ fn cmd_exec_inner( info!(thunks = thunk_map.len(), "import thunks mapped"); // ── Phase 2: CPU initialization per xenia-canary ───────────────────── - // Allocate stack (1MB at 0x70000000) + // + // Stack VA = `0x70000000`, size honors `XEX_HEADER_DEFAULT_STACK_SIZE` + // (key `0x00020200`) when present, falling back to 1 MiB. The XEX + // header's stack-size value is rounded up to a 4 KiB page boundary + // before allocation. NOTE: guard pages are NOT yet allocated — that + // would require extending `xenia-memory` with a `NoAccess` protection + // flag and platform-layer page-decommit support, deferred to a future + // pass. Overflow into adjacent unmapped pages currently silently + // drops the write (per `GuestMemory::write_u32`'s `is_mapped` guard). let stack_base = 0x7000_0000u32; - let stack_size = 0x10_0000u32; + let stack_size = { + let from_header = xenia_xex::loader::get_stack_size(&header); + let rounded = (from_header + 0xFFF) & !0xFFFu32; + rounded.max(0x1_0000) // never less than 64 KiB + }; mem.alloc(stack_base, stack_size, rw) .map_err(|e| anyhow::anyhow!("Failed to allocate stack: {}", e))?; - // Allocate PCR (Processor Control Region) and TLS + // ── TLS region ──────────────────────────────────────────────────────── + // + // Canary's `XEX_HEADER_TLS_INFO` (key `0x00020104`) describes the title's + // TLS template image (`raw_data_address` / `raw_data_size`) and the + // number of dynamic slots (`slot_count`, default 1024 per canary's + // `kDefaultTlsSlotCount` in `xthread.cc:335`). Layout in guest memory: + // + // [tls_extended_image (raw_data_size B) | tls_dynamic_slots (slot_count*4 B)] + // + // The PCR's `tls_ptr` (PCR+0) points at the START of the dynamic-slot + // area — i.e. the dynamic slots live IMMEDIATELY AFTER the image bytes. + // For ours we keep the historical fixed VA `0x7FFE_0000` but size the + // region from the parsed TLS info (clamped to at least 4 KiB). When + // the XEX has no TLS info, the block is a 4 KiB zero region — matching + // the pre-Phase-2 behavior. + let tls_info = xenia_xex::loader::get_tls_info(&header, &data); + let tls_raw_data_size = tls_info.map(|t| t.raw_data_size).unwrap_or(0); + let tls_slot_count = tls_info.map(|t| t.slot_count).unwrap_or(0).max(1024); + let tls_dynamic_bytes = tls_slot_count.saturating_mul(4); + let tls_total_bytes = { + let needed = tls_raw_data_size.saturating_add(tls_dynamic_bytes); + let rounded = (needed + 0xFFF) & !0xFFFu32; + rounded.max(0x1000) // never less than 4 KiB + }; + let pcr_addr = 0x7FFF_0000u32; let tls_addr = 0x7FFE_0000u32; mem.alloc(pcr_addr, 0x1000, rw)?; - mem.alloc(tls_addr, 0x1000, rw)?; + mem.alloc(tls_addr, tls_total_bytes, rw)?; - // Initialize PCR structure - mem.write_u32(pcr_addr, tls_addr); // PCR->tls_ptr - mem.write_u32(pcr_addr + 0x100, 0x1000); // PCR->current_thread (fake) - mem.write_u32(pcr_addr + 0x150, 0); // PCR->dpc_active + // Copy the title's TLS template (initial-value image for `__declspec(thread)` + // variables) into the head of the TLS region. Canary mirrors this with + // `Memory::Copy(tls_static_address_, tls_header->raw_data_address, + // tls_header->raw_data_size)` (xthread.cc:357-360). When + // `raw_data_size` is zero (no TLS image), the region is left zeroed. + if let Some(info) = tls_info { + if info.raw_data_size > 0 && info.raw_data_address != 0 { + let mut buf = vec![0u8; info.raw_data_size as usize]; + mem.read_bytes(info.raw_data_address, &mut buf); + mem.write_bulk(tls_addr, &buf); + } + } + + // ── Guest X_KTHREAD struct ──────────────────────────────────────────── + // + // Canary stores a real `X_KTHREAD` in guest memory (`xthread.h:260-`), + // and PCR `[+0x100]` (= `prcb_data.current_thread`) holds its VA. Ours + // previously wrote the bare host-side handle `0x1000` there, so any + // guest pointer-walk through `r13[+0x100]` read garbage. Allocate a + // 0x100-byte zero block at a fixed VA `0x7FFD_0000` (just below the + // TLS region, in unused address space) and populate the minimum + // credible fields: + // + // +0x00 X_DISPATCH_HEADER: + // [+0x00] u8 type = 6 (ThreadObject) + // [+0x04] u32 signal_state = 0 + // [+0x08] X_LIST_ENTRY wait_list { flink, blink } — both self-pointers + // +0x5C u32 stack_base (high addr) + // +0x60 u32 stack_limit (low addr) + // +0x68 u32 tls_address + // + // The dispatcher-header `type` byte for ThreadObject is `0x06` in the + // Vista/Xbox 360 kernel (matches DISPATCHER_HEADER reference at + // `xenia-canary/src/xenia/kernel/xobject.h:37-62`); setting it non-zero + // is what prevents the worst null-deref class on KTHREAD pointer walks. + let kthread_addr = 0x7FFD_0000u32; + let kthread_size = 0x1000u32; + mem.alloc(kthread_addr, kthread_size, rw) + .map_err(|e| anyhow::anyhow!("Failed to allocate X_KTHREAD region: {}", e))?; + // X_DISPATCH_HEADER + mem.write_u8(kthread_addr, 0x06); // type = ThreadObject + mem.write_u32(kthread_addr + 0x04, 0); // signal_state + mem.write_u32(kthread_addr + 0x08, kthread_addr + 0x08); // wait_list.flink (self) + mem.write_u32(kthread_addr + 0x0C, kthread_addr + 0x08); // wait_list.blink (self) + // Stack/TLS pointers (canary X_KTHREAD layout, xthread.h:267-270). + mem.write_u32(kthread_addr + 0x5C, stack_base + stack_size); // stack_base = high addr + mem.write_u32(kthread_addr + 0x60, stack_base); // stack_limit = low addr + mem.write_u32(kthread_addr + 0x68, tls_addr); // tls_address + + // ── PCR initialization ──────────────────────────────────────────────── + // + // Canary `X_KPCR` layout (xthread.h:171-223). Fields ours now populates: + // +0x000 tls_ptr — base of dynamic TLS slots + // +0x030 pcr_ptr u64 BE — self-reference (PCR base) + // +0x070 stack_base_ptr — top of stack (high addr) + // +0x074 stack_end_ptr — bottom of stack (low addr) + // +0x100 prcb_data.current_thread — VA of the guest X_KTHREAD + // +0x150 prcb_data.dpc_active — 0 + // +0x2A8 prcb — pointer back to prcb_data (= pcr+0x100) + // (Skipped: +0x038 `host_stash` — host pointer slot, not applicable to ours.) + mem.write_u32(pcr_addr, tls_addr); // tls_ptr + mem.write_u64(pcr_addr + 0x030, pcr_addr as u64); // pcr_ptr (self-ref, BE u64) + mem.write_u32(pcr_addr + 0x070, stack_base + stack_size); // stack_base_ptr (high) + mem.write_u32(pcr_addr + 0x074, stack_base); // stack_end_ptr (low) + mem.write_u32(pcr_addr + 0x100, kthread_addr); // prcb_data.current_thread + mem.write_u32(pcr_addr + 0x150, 0); // prcb_data.dpc_active + mem.write_u32(pcr_addr + 0x2A8, pcr_addr + 0x100); // prcb -> prcb_data // Set up CPU context per xenia-canary/cpu/thread_state.cc. // @@ -925,6 +1113,13 @@ fn cmd_exec_inner( let mut kernel = xenia_kernel::KernelState::with_gpu(gpu_backend); kernel.image_base = base; kernel.xex_system_flags = xenia_xex::loader::get_system_flags(&header); + // Phase B — install the entry-PC for the snapshot hook's identity + // check, plus the cvar-equivalent flags resolved by the caller. When + // `phase_b_snapshot_dir` is `None`, the hook short-circuits. + kernel.entry_pc = entry; + kernel.phase_b_snapshot_dir = phase_b_snapshot_dir.clone(); + kernel.phase_b_snapshot_and_exit = phase_b_snapshot_and_exit; + kernel.phase_b_dump_section_content = phase_b_dump_section_content; // Drain the reverse thunk map into the kernel so `XexGetProcedureAddress` // can resolve ordinals back to callable thunk addresses. for (module, ordinal, addr) in thunk_addr_map.drain(..) { @@ -948,6 +1143,38 @@ fn cmd_exec_inner( }); let parallel_active = parallel || parallel_via_env; kernel.parallel_active = parallel_active; + + // Phase D Stage 3 — install a contention-replay manifest if pointed + // to via `XENIA_CONTENTION_MANIFEST_PATH`. The manifest is built by + // Stage 2's python tool from a Stage-1 cvar-ON canary trace. Unset + // = default mode (no replay, identical to pre-Stage-3 behavior). + // Errors are non-fatal (log + continue without replay) so a stale + // path doesn't brick the run. + if let Ok(path) = std::env::var("XENIA_CONTENTION_MANIFEST_PATH") { + let trimmed = path.trim(); + if !trimmed.is_empty() { + let p = std::path::PathBuf::from(trimmed); + match xenia_kernel::contention_manifest::ContentionManifest::load_from_file(&p) { + Ok(m) => { + let count = m.initial_count(); + let arc = std::sync::Arc::new(m); + kernel.install_contention_manifest(Some(arc)); + tracing::info!( + "Phase D Stage 3: loaded contention manifest from {:?} ({} entries)", + p, + count + ); + } + Err(e) => { + tracing::warn!( + "Phase D Stage 3: failed to load contention manifest from {:?}: {} — replay disabled", + p, + e + ); + } + } + } + } // AUDIT-032: default is `KernelState::xaudio_tick_enabled = true` now // that the dedicated worker eliminates HW-thread hijack regressions. // Treat `--xaudio-tick` / `XENIA_XAUDIO_TICK=...` as an explicit @@ -979,6 +1206,38 @@ fn cmd_exec_inner( "XAudio callback ticker enabled (AUDIT-032 default; toggle via --xaudio-tick / XENIA_XAUDIO_TICK)" ); } + // review-a Step 1 — `--force-spawn-workers` / `XENIA_CROWBAR_WORKERS=1`. + // Diagnostic-only, default-OFF. See + // `xenia-rs/audit-runs/review-a-step1-crowbar/`. + let crowbar_env_on = std::env::var("XENIA_CROWBAR_WORKERS") + .ok() + .as_deref() + .map(|v| { + let v = v.trim().to_ascii_lowercase(); + v == "1" || v == "true" || v == "yes" + }) + .unwrap_or(false); + if crowbar_env_on { + kernel.crowbar_workers_enabled = true; + } + if let Ok(v) = std::env::var("XENIA_CROWBAR_TRIGGER_INSTR") { + if let Ok(n) = v.trim().parse::() { + kernel.crowbar_workers_trigger_instr = n; + } else { + tracing::warn!( + "XENIA_CROWBAR_TRIGGER_INSTR={:?} — failed to parse as u64; keeping default {}", + v, + kernel.crowbar_workers_trigger_instr, + ); + } + } + if kernel.crowbar_workers_enabled && !quiet { + tracing::warn!( + "review-a CROWBAR enabled: will force-spawn 4 sub_825070F0 workers \ + at instr={} (NOT a fix — diagnostic only)", + kernel.crowbar_workers_trigger_instr, + ); + } if reservations_table || reservations_via_env || parallel_active { kernel.reservations.enable(); if !quiet { @@ -1305,14 +1564,47 @@ fn cmd_exec_inner( main_handle, &mut mem, ); + // Phase C+16: main thread self-reference. Mirrors canary's + // `KernelState::LaunchModule` → `thread->Create()` → `RetainHandle()` + // at xthread.cc:414 (the "main XThread" also goes through Create()). + // Released at LR-sentinel implicit-exit in the prologue/epilogue + // path. Without this, ours's main refcount=1 (creator only) vs + // canary's 2 (creator + self) — masked at present because the guest + // never calls `NtClose` on the main-thread handle, but kept symmetric + // to avoid asymmetric `handle.destroy` on shutdown. + kernel.retain_handle(main_handle); - // If the input was a disc image, mount it so the kernel's file I/O - // handlers can serve the game's own assets via VFS. - if path.to_lowercase().ends_with(".iso") || path.to_lowercase().ends_with(".xiso") { - match xenia_vfs::disc_image::DiscImageDevice::open("d", std::path::Path::new(path)) { + // Mount the title's content into the VFS so the kernel's file I/O + // handlers (`NtCreateFile`, `NtOpenFile`, etc.) can serve game-data + // reads. Canary always mounts `game:` + `d:` regardless of input + // format (xenia_main.cc:611-651); ours's path normalisation already + // strips both prefixes to a single bucket (see + // `crate::path::DEVICE_PREFIXES` in xenia-kernel and `is_disc_prefix` + // in exports.rs:1725), so a single backing `VfsDevice` covers both. + // + // Mount logic: + // - `.iso` / `.xiso` → `DiscImageDevice` + // - directory → `HostPathDevice` rooted at the directory + // - bare .xex file → `HostPathDevice` rooted at the file's parent + // - STFS / CON / PIRS containers — NOT YET (no reader in ours; + // would be 500+ LOC. Deferred to a future pass.) + let input_path = std::path::Path::new(path); + let lower = path.to_lowercase(); + if lower.ends_with(".iso") || lower.ends_with(".xiso") { + match xenia_vfs::disc_image::DiscImageDevice::open("d", input_path) { Ok(disc) => kernel.vfs = Some(Box::new(disc)), Err(e) => tracing::warn!("Could not mount disc image for VFS: {}", e), } + } else if input_path.is_dir() { + tracing::info!("VFS: mounting host directory {:?} as game:/d:", input_path); + kernel.vfs = Some(Box::new(xenia_vfs::device::HostPathDevice::new("game", input_path))); + } else if let Some(parent) = input_path.parent() { + // Bare XEX file — mount its containing directory so the title can + // reach sibling assets via `game:\`. + if !parent.as_os_str().is_empty() { + tracing::info!("VFS: mounting XEX parent directory {:?} as game:/d:", parent); + kernel.vfs = Some(Box::new(xenia_vfs::device::HostPathDevice::new("game", parent))); + } } // ── Phase 3: Data export patching (variable imports) ───────────────── @@ -1324,14 +1616,80 @@ fn cmd_exec_inner( kernel.heap_alloc(size, mem).unwrap_or(0) }; + // Helper: allocate a 0x1C-byte X_OBJECT_TYPE descriptor with the + // four-char-code `pool_tag` at +0x18 (BE-readable) and write its + // guest VA into the import slot. Mirrors canary's + // `InitializeKernelGuestGlobals` populating per-type descriptors at + // `kernel_state.cc:1538-1615` — the type-tag bytes are non-zero + // there, so any guest code that reads the tag-byte field gets the + // real FourCC instead of zero. + let write_object_type = + |addr: u32, pool_tag: u32, mem: &xenia_memory::GuestMemory, kernel: &mut xenia_kernel::KernelState| { + let block = kernel.heap_alloc(0x1C, mem).unwrap_or(0); + if block != 0 { + mem.write_u32(block + 0x18, pool_tag); + } + mem.write_u32(addr, block); + }; + for lib in &header.import_libraries { for imp in &lib.imports { if imp.record_type != 0 { continue; } // Only variable entries let addr = imp.address; match (lib.name.as_str(), imp.ordinal) { + // ──── KernelGuestGlobals object-type descriptors ──── + // 0x1C-byte `X_OBJECT_TYPE` blocks with pool-tag FourCC at + // +0x18. Canary populates these via + // `InitializeKernelGuestGlobals` (`kernel_state.cc:1511+`); + // ours previously left every descriptor as a zero block, so + // any guest comparison against the tag-byte signature + // diverged. Tags are stored as host-order u32s whose BE + // byte-form spells the four-char code; `write_u32` BE-encodes + // automatically (see `GuestMemory::write_u32`). + ("xboxkrnl.exe", 0x000E) => { + // ExEventObjectType — pool_tag "EvEv" + write_object_type(addr, 0x76657645, &mut mem, &mut kernel); + } + ("xboxkrnl.exe", 0x0012) => { + // ExMutantObjectType — pool_tag "Mutu" + write_object_type(addr, 0x6174754D, &mut mem, &mut kernel); + } + ("xboxkrnl.exe", 0x0017) => { + // ExSemaphoreObjectType — pool_tag "Sema" + write_object_type(addr, 0x616D6553, &mut mem, &mut kernel); + } ("xboxkrnl.exe", 0x001B) => { - // ExThreadObjectType — ptr to OBJECT_TYPE descriptor (0x40 bytes) - let block = alloc_zero(0x40, &mut mem, &mut kernel); + // ExThreadObjectType — pool_tag "Thre" + write_object_type(addr, 0x65726854, &mut mem, &mut kernel); + } + ("xboxkrnl.exe", 0x001C) => { + // ExTimerObjectType — pool_tag "Time" + write_object_type(addr, 0x656D6954, &mut mem, &mut kernel); + } + ("xboxkrnl.exe", 0x0036) => { + // IoCompletionObjectType — pool_tag "Comp" + write_object_type(addr, 0x706D6F43, &mut mem, &mut kernel); + } + ("xboxkrnl.exe", 0x003A) => { + // IoDeviceObjectType — pool_tag "Devi" + write_object_type(addr, 0x69766544, &mut mem, &mut kernel); + } + ("xboxkrnl.exe", 0x003E) => { + // IoFileObjectType — pool_tag "File" + write_object_type(addr, 0x656C6946, &mut mem, &mut kernel); + } + ("xboxkrnl.exe", 0x0106) => { + // ObDirectoryObjectType — pool_tag "Dire" + write_object_type(addr, 0x65726944, &mut mem, &mut kernel); + } + ("xboxkrnl.exe", 0x0112) => { + // ObSymbolicLinkObjectType — pool_tag "Symb" + write_object_type(addr, 0x626D7953, &mut mem, &mut kernel); + } + ("xboxkrnl.exe", 0x02DB) => { + // UsbdBootEnumerationDoneEvent — 0x10-byte X_KEVENT block, + // zero-initialised (signalled=false, type=NotificationEvent). + let block = alloc_zero(0x10, &mut mem, &mut kernel); mem.write_u32(addr, block); } ("xboxkrnl.exe", 0x0059) => { @@ -1340,16 +1698,51 @@ fn cmd_exec_inner( mem.write_u32(addr, block); } ("xboxkrnl.exe", 0x00AD) => { - // KeTimeStampBundle — 0x18 block with FILETIME at +0 and - // interrupt-time u64 at +0x10. Mirrors the clock used by - // KeQuerySystemTime so fast-path readers see consistent values. + // KeTimeStampBundle — 0x18-byte `X_TIME_STAMP_BUNDLE` + // matching canary's `kernel_state.h:98-104`: + // +0x00 u64 interrupt_time (BE, 100-ns ticks since boot) + // +0x08 u64 system_time (BE, 100-ns Windows FILETIME) + // +0x10 u32 tick_count (BE, monotonic ms since boot) + // +0x14 u32 padding + // Stash the VA in `KernelState` so the 1 ms host-side + // repeating updater spawned later in this file can refresh + // the fields — without that updater, polling loops that + // wait on `tick_count` to advance hang forever (the + // previous "FILETIME at +0 and +0x10" layout never wrote + // +0x08 at all and never advanced). let block = alloc_zero(0x18, &mut mem, &mut kernel); if block != 0 { - let fake_time: u64 = 132_500_000_000_000_000; // ~2021 FILETIME - mem.write_u32(block, (fake_time >> 32) as u32); - mem.write_u32(block + 4, fake_time as u32); - mem.write_u32(block + 0x10, (fake_time >> 32) as u32); - mem.write_u32(block + 0x14, fake_time as u32); + // Match ours's existing fixed `KeQueryInterruptTime` + // / `KeQuerySystemTime` constants for the initial + // sample — the timer thread will overwrite within + // ~1 ms, so these values are seen only briefly. + mem.write_u64(block, 0x0000_0001_0000_0000); // interrupt_time + mem.write_u64(block + 0x08, 132_500_000_000_000_000); // system_time + mem.write_u32(block + 0x10, 0); // tick_count + mem.write_u32(block + 0x14, 0); // padding + kernel.ke_timestamp_bundle_ptr = block; + } + mem.write_u32(addr, block); + } + ("xboxkrnl.exe", 0x000C) => { + // ExConsoleGameRegion — 4-byte u32 = 0xFFFFFFFF (region-free). + // Canary writes this at `xboxkrnl_module.cc:144-150`. + let block = alloc_zero(4, &mut mem, &mut kernel); + if block != 0 { + mem.write_u32(block, 0xFFFF_FFFF); + } + mem.write_u32(addr, block); + } + ("xboxkrnl.exe", 0x0156) => { + // XboxHardwareInfo — 16-byte block. Canary + // (`xboxkrnl_module.cc:125-142`) writes `[0]=0x20` + // (HDD-present flag, bit 5) and `[4]=0x06` (CPU count), + // rest zero. Games branch on these for storage- and + // SMP-aware code paths. + let block = alloc_zero(16, &mut mem, &mut kernel); + if block != 0 { + mem.write_u8(block, 0x20); + mem.write_u8(block + 4, 0x06); } mem.write_u32(addr, block); } @@ -1361,13 +1754,59 @@ fn cmd_exec_inner( mem.write_u16(addr + 6, 0); } ("xboxkrnl.exe", 0x0193) => { - // XexExecutableModuleHandle -> image base + // XexExecutableModuleHandle: keep the pre-existing + // `*XexExecutableModuleHandle = base` write (the + // game's CRT branches off this slot's value; an + // attempt to wire up a proper LDR_DATA_TABLE_ENTRY + + // xex_header_base chain at idx=0 short-circuits the + // CRT's early RtlImageXexHeaderField probe, causing + // Phase A to diverge at idx=0 instead of growing past + // 102014 — see Phase C+3 investigation.md). Instead, + // allocate a guest-memory copy of the raw XEX header + // bytes (mirrors canary `user_module.cc:223-227`'s + // `guest_xex_header_`), record its VA in KernelState + // for `rtl_image_xex_header_field` to use as a + // fallback when the game passes a NULL `xex_header` + // arg (which it does here because the LDR walk + // through `base` yields PE OptionalHeader bytes, not + // a real header pointer). + let header_size = header.header_size as usize; + if header_size > 0 && header_size <= data.len() { + let xex_va = alloc_zero(header.header_size, &mut mem, &mut kernel); + if xex_va != 0 { + mem.write_bulk(xex_va, &data[0..header_size]); + kernel.xex_header_guest_ptr = xex_va; + } + } mem.write_u32(addr, base); } ("xboxkrnl.exe", 0x01AE) => { - // ExLoadedCommandLine — ANSI empty string - let block = alloc_zero(0x10, &mut mem, &mut kernel); - // Block is already zero-initialized by heap_alloc -> empty string. + // ExLoadedCommandLine — 1024-byte ANSI buffer. + // Canary's default-init path (`xboxkrnl_module.cc:176-194`) + // writes the quoted form `"default.xex"` (with literal + // ASCII double-quotes) as a placeholder until post-launch + // replacement. An empty zero-block silently violates the + // CRT contract (any title that scans for the quote + // characters sees end-of-string immediately). + let block = alloc_zero(1024, &mut mem, &mut kernel); + if block != 0 { + let cmdline: &[u8] = b"\"default.xex\"\0"; + mem.write_bulk(block, cmdline); + } + mem.write_u32(addr, block); + } + ("xboxkrnl.exe", 0x01AF) => { + // ExLoadedImageName — 256-byte ANSI buffer. Canary + // (`xboxkrnl_module.cc:166-174`, + // `kernel_state.cc:486-495`) post-launch fills this with + // the executable module path; for ours we write + // "default.xex\0" to match canary's pre-launch state. + // Size matches canary's `kExLoadedImageNameSize = 256`. + let block = alloc_zero(256, &mut mem, &mut kernel); + if block != 0 { + let imgname: &[u8] = b"default.xex\0"; + mem.write_bulk(block, imgname); + } mem.write_u32(addr, block); } ("xboxkrnl.exe", 0x01BE) => { @@ -1501,6 +1940,44 @@ fn cmd_exec_inner( // responsibility per the trait contract.) let mem_arc = std::sync::Arc::new(mem); + // ── KeTimeStampBundle 1 ms repeating updater ── + // + // Canary maintains the bundle's `interrupt_time` / `system_time` / + // `tick_count` fields via `HighResolutionTimer::CreateRepeating(1 ms, + // UpdateKeTimestampBundle)` registered at `kernel_state.cc:1272-1295`. + // Without an equivalent host-side ticker, the bundle stays frozen at its + // boot-time values and guest polling loops that wait on `tick_count` to + // advance hang forever. Spawn a detached thread that wakes every 1 ms, + // recomputes the three fields from a monotonic `Instant`, and writes + // them BE through the shared `Arc`. Cooperative shutdown + // via the existing `shutdown_arc` flag — flipped when the dispatch + // returns — so test runs don't leak threads. No-op if the patcher + // didn't allocate a bundle (the XEX never imported ord 0x00AD). + { + let ke_bundle_ptr = kernel.ke_timestamp_bundle_ptr; + if ke_bundle_ptr != 0 { + let mem_for_timer = mem_arc.clone(); + let shutdown_for_timer = shutdown_arc.clone(); + std::thread::Builder::new() + .name("ke-timestamp-bundle".to_string()) + .spawn(move || { + use xenia_memory::MemoryAccess; + let start = std::time::Instant::now(); + const SYSTEM_TIME_EPOCH: u64 = 132_500_000_000_000_000; + while !shutdown_for_timer.load(std::sync::atomic::Ordering::Relaxed) { + std::thread::sleep(std::time::Duration::from_millis(1)); + let elapsed = start.elapsed(); + let ms = elapsed.as_millis() as u32; + let ticks_100ns = (elapsed.as_micros() as u64) * 10; + mem_for_timer.write_u64(ke_bundle_ptr, ticks_100ns); + mem_for_timer.write_u64(ke_bundle_ptr + 0x08, SYSTEM_TIME_EPOCH + ticks_100ns); + mem_for_timer.write_u32(ke_bundle_ptr + 0x10, ms); + } + }) + .expect("spawn ke-timestamp-bundle thread"); + } + } + // Spawn the real GPU worker if the threaded backend was chosen at // kernel-construction time. The handle the kernel already holds // (`GpuBackend::Threaded`) is the CPU-side proxy; the worker owns @@ -1679,6 +2156,11 @@ fn cmd_exec_inner( } print_summary(kernel.scheduler.ctx(0), &debugger, &db_writer, quiet); dump_thread_diagnostic(&kernel, &*mem_arc, quiet); + // Iterate 2.M — always-on (reading-error #42). Emits a + // sibling JSON next to the Phase-A trace; runs regardless + // of --quiet so future "is the wedge moved?" questions + // never depend on a manual non-quiet re-run. + write_thread_state_dump(&kernel); info!( wall_ms = started.elapsed().as_millis() as u64, instructions = stats.instruction_count, @@ -1896,6 +2378,7 @@ enum RoundCtl { /// asks for shutdown. fn coord_pre_round( kernel: &mut xenia_kernel::KernelState, + mem: &xenia_memory::GuestMemory, stats: &ExecStats, max_instructions: Option, ips_limit: Option, @@ -1995,6 +2478,12 @@ fn coord_pre_round( try_inject_audio_callback(kernel); } + // review-a Step 1 — one-shot diagnostic crowbar. No-op when disabled + // or already fired. Uses the caller's `&GuestMemory` directly. + if kernel.crowbar_workers_enabled && !kernel.crowbar_workers_fired { + kernel.try_fire_crowbar_workers(mem, stats.instruction_count); + } + RoundCtl::Continue } @@ -2211,6 +2700,21 @@ fn worker_prologue( let pc = kernel.scheduler.ctx(hw_id).pc; + // Phase B snapshot — no-op when `phase_b_snapshot_dir == None` + // (zero-cost Option-tag test on the hot path). Fires once on the + // entry thread at the moment immediately before its first guest + // instruction at entry_pc executes. See + // crates/xenia-kernel/src/phase_b_snapshot.rs. + if kernel.phase_b_snapshot_dir.is_some() { + let current_tid = kernel.scheduler.tid(hw_id).unwrap_or(0); + xenia_kernel::phase_b_snapshot::fire_if_entry_thread( + kernel, + mem, + pc, + current_tid, + ); + } + // 0) Diagnostic ctor-probe: if `pc` is in // `kernel.ctor_probe_pcs`, capture live r3/lr/sp + back-chain // and println one record. Read-only; lockstep digest unaffected. @@ -2267,19 +2771,41 @@ fn worker_prologue( cycle = stats.instruction_count, "HW thread returned to LR sentinel — marking exited" ); + // Phase C+15-α: schema-v1 `thread.exit` event on the implicit + // (LR-sentinel) thread-exit path. Symmetric with + // `ex_terminate_thread`; canary's `XThread::Execute` ends in + // `Exit()` which emits the same event regardless of whether the + // guest called `ExTerminateThread` or simply returned. + if let (Some(t), true) = (tid, xenia_kernel::event_log::is_enabled()) { + let cycle = kernel.scheduler.ctx(hw_id).timebase; + xenia_kernel::event_log::emit_thread_exit(t, cycle, 0); + } let (_, _exited_tid, handle_opt) = kernel.scheduler.exit_current(0); - if let Some(h) = handle_opt - && let Some(xenia_kernel::objects::KernelObject::Thread { + if let Some(h) = handle_opt { + if let Some(xenia_kernel::objects::KernelObject::Thread { exit_code, waiters, .. }) = kernel.objects.get_mut(&h) - { - *exit_code = Some(0); - let to_wake: Vec = std::mem::take(waiters); - for w in to_wake { - kernel.scheduler.wake_ref(w); + { + *exit_code = Some(0); + let to_wake: Vec = std::mem::take(waiters); + for w in to_wake { + kernel.scheduler.wake_ref(w); + } } + // Phase C+16: release the thread self-reference (paired with + // the retain installed at spawn time by `ex_create_thread` / + // `xam_task_schedule`). On the main thread (INITIAL_GUEST_TID) + // no retain was installed by `install_initial_thread`, so the + // refcount stays at 1 (creator-only). Pre-condition: a + // self-retained thread has refcount ≥ 2 at this point; an + // un-retained thread (main) has refcount = 1. We unconditionally + // call `release_handle` — for main, this destroys it (which is + // fine; main is exiting). For workers, this drops the + // self-ref; if guest still holds a ref (no NtClose yet) the + // object survives; if guest already closed, this destroys. + kernel.release_handle(h); } return PrologueOutcome::Continue; } @@ -2329,10 +2855,32 @@ fn worker_prologue( // 3) Unmapped PC. if !mem.is_mapped(pc) { + // Crowbar v2 — enrich fault with tid/lr/r3 so we can attribute the + // fault back to a specific guest thread. Read-only, no behaviour + // change. The kernel lock is held by the caller per + // run_execution's invariant; tid/ctx lookups are safe. + let tid = kernel.scheduler.tid(hw_id); + let r = kernel.scheduler.current_ref(); + let t = kernel.scheduler.thread(r); + let lr = t.ctx.lr; + let r3 = t.ctx.gpr[3]; + let r4 = t.ctx.gpr[4]; + let r29 = t.ctx.gpr[29]; + let r30 = t.ctx.gpr[30]; + let r31 = t.ctx.gpr[31]; + let ctr = t.ctx.ctr; tracing::error!( cycle = stats.instruction_count, pc = format_args!("{:#010x}", pc), hw_id, + tid = ?tid, + lr = format_args!("{:#010x}", lr), + ctr = format_args!("{:#010x}", ctr), + r3 = format_args!("{:#010x}", r3), + r4 = format_args!("{:#010x}", r4), + r29 = format_args!("{:#010x}", r29), + r30 = format_args!("{:#010x}", r30), + r31 = format_args!("{:#010x}", r31), "FAULT: PC in unmapped memory" ); return PrologueOutcome::BreakOuter; @@ -2603,6 +3151,7 @@ fn run_execution( // without duplicating it from the lockstep path. match coord_pre_round( kernel, + mem, &stats, max_instructions, ips_limit, @@ -3005,6 +3554,7 @@ fn run_execution_parallel( let s = stats_mtx.lock().expect("stats mutex poisoned"); coord_pre_round( &mut *guard, + mem, &*s, max_instructions, ips_limit, @@ -3907,6 +4457,131 @@ fn dump_thread_diagnostic( } } +/// Iterate 2.M — always-on structured exit-state dump (reading-error #42). +/// +/// Phase-A's JSONL trace is blind to blocked-forever waits: a wait that +/// never returns emits no `kernel.return` event, so a wedge looks identical +/// to a clean termination. Iterate 2.J misclassified the wedge that way and +/// cost iterate 2.K a re-dispatch to recover. This dumps a machine-readable +/// snapshot of every alive thread + the handle/wedge map at exit time, +/// regardless of `--quiet`, so every future iterate has the wedge in hand +/// alongside the JSONL trace without needing a manual diagnostic re-run. +/// +/// Output: `/exit-thread-state.json` when Phase-A is +/// enabled; `./exit-thread-state.json` (CWD) otherwise. Filename is +/// predictable — the harness can `glob('**/exit-thread-state.json')`. +fn write_thread_state_dump(kernel: &xenia_kernel::KernelState) { + use serde_json::{json, Value}; + use xenia_kernel::objects::KernelObject; + let dump_path: std::path::PathBuf = xenia_kernel::event_log::output_path() + .and_then(|p| p.parent().map(|d| d.join("exit-thread-state.json"))) + .unwrap_or_else(|| std::path::PathBuf::from("exit-thread-state.json")); + let tid_of = |r: &xenia_cpu::ThreadRef| -> u32 { + kernel.scheduler.slots.get(r.hw_id as usize) + .and_then(|s| s.runqueue.get(r.idx as usize)).map(|t| t.tid).unwrap_or(0) + }; + // Returns (type_name, signaler_tid_if_known, full json payload). + let handle_meta = |h: u32| -> (&'static str, Option, Value) { + let waiters_v = |w: &Vec| -> Value { + json!(w.iter().map(&tid_of).collect::>()) + }; + match kernel.objects.get(&h) { + Some(KernelObject::Event { signaled, manual_reset, waiters }) => ("Event", None, + json!({"type":"Event","signaled":signaled,"manual_reset":manual_reset,"waiters_tid":waiters_v(waiters)})), + Some(KernelObject::Semaphore { count, max, waiters }) => ("Semaphore", None, + json!({"type":"Semaphore","count":count,"max":max,"waiters_tid":waiters_v(waiters)})), + Some(KernelObject::Thread { id, exit_code, waiters, .. }) => ("Thread", Some(*id), + json!({"type":"Thread","thread_id":id,"exited":exit_code.is_some(),"exit_code":exit_code,"signaler_tid_if_known":id,"waiters_tid":waiters_v(waiters)})), + Some(KernelObject::Timer { signaled, deadline, waiters, .. }) => ("Timer", None, + json!({"type":"Timer","signaled":signaled,"deadline":deadline,"waiters_tid":waiters_v(waiters)})), + Some(KernelObject::Mutex { owner, recursion, waiters }) => ("Mutex", None, + json!({"type":"Mutex","owner_hw":owner,"recursion":recursion,"waiters_tid":waiters_v(waiters)})), + Some(KernelObject::NotifyListener { mask, queue, waiters, .. }) => ("NotifyListener", None, + json!({"type":"NotifyListener","mask":format!("{:#x}",mask),"pending":queue.len(),"waiters_tid":waiters_v(waiters)})), + Some(KernelObject::File { path, .. }) => ("File", None, json!({"type":"File","path":path})), + None => ("unknown", None, json!({"type":"unknown_or_dropped"})), + } + }; + let mut alive: Vec = Vec::new(); + let mut wedge_map: Vec = Vec::new(); + for (hw_id, slot) in kernel.scheduler.slots.iter().enumerate() { + for (idx, t) in slot.runqueue.iter().enumerate() { + let (state_name, block_payload): (&'static str, Value) = match &t.state { + xenia_cpu::HwState::Idle => ("Idle", Value::Null), + xenia_cpu::HwState::Ready => ("Ready", Value::Null), + xenia_cpu::HwState::Exited(code) => ("Exited", json!({"exit_code":code})), + xenia_cpu::HwState::ServicingIrq(_) => ("ServicingIrq", Value::Null), + xenia_cpu::HwState::Blocked(reason) => { + let body = match reason { + xenia_cpu::BlockReason::Suspended => json!({"kind":"Suspended"}), + xenia_cpu::BlockReason::DelayUntil(d) => json!({"kind":"DelayUntil","deadline_ns":d}), + xenia_cpu::BlockReason::CriticalSection(cs) => + json!({"kind":"CriticalSection","cs_ptr":format!("{:#010x}",cs)}), + xenia_cpu::BlockReason::WaitAny { handles, deadline } + | xenia_cpu::BlockReason::WaitAll { handles, deadline } => { + let kind = if matches!(reason, xenia_cpu::BlockReason::WaitAny{..}) {"WaitAny"} else {"WaitAll"}; + let hs: Vec = handles.iter().map(|h| { + let (ty, sig_tid, meta) = handle_meta(*h); + // Wedge-map: surface every blocked-forever + // wait (deadline==None) with a one-line + // human summary + structured cross-ref so + // future iterates can diff vs canary. + if deadline.is_none() { + let summary = match ty { + "Thread" => format!("tid={} → Thread(id={})", t.tid, sig_tid.unwrap_or(0)), + "Event" => format!("tid={} → Event(sig={})", t.tid, + meta.get("signaled").and_then(|v|v.as_bool()).unwrap_or(false)), + "Semaphore" => format!("tid={} → Semaphore({}/{})", t.tid, + meta.get("count").and_then(|v|v.as_i64()).unwrap_or(0), + meta.get("max").and_then(|v|v.as_i64()).unwrap_or(0)), + _ => format!("tid={} → handle {:#010x} ({})", t.tid, h, ty), + }; + wedge_map.push(json!({ + "waiter_tid": t.tid, + "waiter_pc": format!("{:#010x}", t.ctx.pc), + "handle": format!("{:#010x}", h), + "handle_type": ty, + "signaler_tid_if_known": sig_tid, + "summary": summary, + })); + } + json!({"handle":format!("{:#010x}",h),"object":meta}) + }).collect(); + json!({"kind":kind,"handles":hs,"deadline_ns_or_inf":deadline}) + } + }; + ("Blocked", body) + } + }; + alive.push(json!({ + "tid": t.tid, "hw_id": hw_id, "idx": idx, + "pc": format!("{:#010x}", t.ctx.pc), + "lr": format!("{:#010x}", t.ctx.lr as u32), + "sp": format!("{:#010x}", t.ctx.gpr[1] as u32), + "priority": t.priority, + "affinity_mask": format!("{:#04x}", t.affinity_mask), + "suspend_count": t.suspend_count, + "state": state_name, + "block_reason": block_payload, + })); + } + } + let dump = json!({ + "schema_version": 1, "produced_by": "ours", "reason": "exit_dump", + "alive_threads": alive, "wedge_map": wedge_map, + }); + match serde_json::to_string_pretty(&dump) { + Ok(s) => match std::fs::write(&dump_path, s) { + Ok(()) => eprintln!( + "exit-thread-state: wrote {} thread(s), {} wedge entr(ies) to {}", + alive.len(), wedge_map.len(), dump_path.display(), + ), + Err(e) => eprintln!("exit-thread-state: failed to write {}: {e}", dump_path.display()), + }, + Err(e) => eprintln!("exit-thread-state: failed to serialize: {e}"), + } +} + #[allow(clippy::too_many_arguments)] #[instrument(skip_all, fields(title))] fn run_with_ui( @@ -3974,6 +4649,8 @@ fn run_with_ui( print_summary(kernel.scheduler.ctx(0), &debugger, &db_writer, quiet); dump_thread_diagnostic(&kernel, &mem, quiet); + // Iterate 2.M — see cmd_exec_inner path for rationale. + write_thread_state_dump(&kernel); info!( wall_ms = started.elapsed().as_millis() as u64, instructions = stats.instruction_count, diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs index 0e150e8..9101b54 100644 --- a/crates/xenia-cpu/src/interpreter.rs +++ b/crates/xenia-cpu/src/interpreter.rs @@ -117,17 +117,27 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) - ctx.pc += 4; } PpcOpcode::addis => { - // Xbox 360 user mode is 32-bit ABI (MSR.SF=0), so addis must - // produce a value whose upper 32 bits don't pollute downstream - // 64-bit arithmetic. The PPC ISA in 64-bit mode sign-extends - // simm16 before the shift, producing 0xFFFFFFFF_xxxx0000 for - // negative simm16 (high bit set). When this value flows into - // a 64-bit subfc against a zero-extended lwz value, the unsigned - // 64-bit comparison yields wrong CA. Truncate to 32 bits to - // simulate 32-bit ABI behavior. - let ra_val = if instr.ra() == 0 { 0u64 } else { ctx.gpr[instr.ra()] }; - let result = ra_val.wrapping_add((instr.simm16() as i64 as u64) << 16); - ctx.gpr[instr.rd()] = result as u32 as u64; + // Phase C+23: `addis` (and the `lis` simplified mnemonic) must + // sign-extend the shifted immediate to the full 64 bits before + // storing into the GPR, matching canary's HIR emitter + // (`InstrEmit_addis` in `ppc_emit_alu.cc`: `EXTS16(SI) << 16` + // as a 64-bit constant). Game code commonly builds a negative + // 32-bit value via `lis rN, 0xFFFB; ori rN, rN, 0x6C20` + // (yielding the i32 -300,000 for a 30ms `KeWait` timeout) and + // then stores it as a 64-bit doubleword via `std`. Without + // sign extension the high half on the wire was 0x00000000, + // turning the timeout into a positive ~4.3-billion-tick + // absolute deadline (~7 minutes) instead of a 30ms relative + // wait — surfacing as `wait.begin.timeout_ns=429466729600` + // on canary tid=12 → ours tid=7 idx=3 sister chain + // (cold-vs-cold C+22 baseline). Defensive 32-bit truncation + // for the arithmetic chain consumers (`subfcx`/`addex`/etc.) + // is already implemented at each consumer site (see PPCBUG-002/ + // 007/etc.), so widening `addis` here does NOT regress them. + let ra_val = if instr.ra() == 0 { 0i64 } else { ctx.gpr[instr.ra()] as i64 }; + let shifted = (instr.simm16() as i64) << 16; + let result = ra_val.wrapping_add(shifted); + ctx.gpr[instr.rd()] = result as u64; ctx.pc += 4; } PpcOpcode::addic => { @@ -4934,6 +4944,92 @@ mod tests { assert_eq!(ctx.gpr[3], 0x10000); } + /// Phase C+23 regression: `addis rD, 0, neg_simm` (the `lis` form + /// with a negative immediate) must sign-extend the result to the + /// full 64 bits, matching canary's HIR emitter. Without this fix, + /// game code that builds a 32-bit negative value via + /// `lis r11, 0xFFFB; ori r11, r11, 0x6C20` and then stores the + /// result as a 64-bit doubleword via `std` would put 0x00000000 + /// in the high half instead of the correct 0xFFFFFFFF, turning a + /// 30 ms relative `KeWaitForSingleObject` timeout into a positive + /// absolute deadline ~7 minutes away. Anchored by the cold-vs-cold + /// sister chain canary tid=12 → ours tid=7 idx=3 divergence. + #[test] + fn addis_with_negative_simm_sign_extends_to_64_bits() { + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + // addis r11, r0, 0xFFFB (lis r11, 0xFFFB) + // op=15, rd=11, ra=0, simm=0xFFFB. + let raw = (15u32 << 26) | (11u32 << 21) | (0u32 << 16) | 0xFFFBu32; + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert_eq!( + ctx.gpr[11], 0xFFFFFFFF_FFFB0000u64, + "addis with negative simm must sign-extend to 64 bits" + ); + } + + /// Phase C+23 regression: the full `lis + ori + std` sequence that + /// builds the −300,000 timeout tick count used by Sylpheed for its + /// 30 ms `KeWait` calls must produce 0xFFFFFFFFFFFB6C20 on the wire, + /// not 0x00000000FFFB6C20. This is the proximate cause of the + /// `wait.begin.timeout_ns = 429466729600` divergence on canary tid=12 + /// → ours tid=7 idx=3 in the cold-vs-cold C+22 baseline. + #[test] + fn lis_ori_std_negative_timeout_writes_sign_extended_doubleword() { + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + // r1 = 0x100 (stack pointer surrogate). Storage slot at r1+8. + ctx.gpr[1] = 0x100; + // lis r11, 0xFFFB ; r11 = 0xFFFFFFFFFFFB0000 + let lis = (15u32 << 26) | (11u32 << 21) | (0u32 << 16) | 0xFFFBu32; + // ori r11, r11, 0x6C20 ; r11 = 0xFFFFFFFFFFFB6C20 + // op=24 (ori): D-form encoding | rs(11) | ra(11) | uimm. + let ori = (24u32 << 26) | (11u32 << 21) | (11u32 << 16) | 0x6C20u32; + // std r11, 8(r1) ; mem[0x108..0x110] = 0xFFFFFFFFFFFB6C20 + // op=62, DS-form, ds_field=8>>2=2, xo=0. + let std_op = (62u32 << 26) | (11u32 << 21) | (1u32 << 16) | (8u32 & 0xFFFCu32); + write_instr(&mut mem, 0, lis); + write_instr(&mut mem, 4, ori); + write_instr(&mut mem, 8, std_op); + ctx.pc = 0; + step(&mut ctx, &mut mem); // lis + assert_eq!(ctx.gpr[11], 0xFFFFFFFF_FFFB0000u64); + step(&mut ctx, &mut mem); // ori + assert_eq!(ctx.gpr[11], 0xFFFFFFFF_FFFB6C20u64); + step(&mut ctx, &mut mem); // std + let stored = mem.read_u64(0x108); + assert_eq!( + stored, 0xFFFFFFFF_FFFB6C20u64, + "std must persist all 64 bits of the sign-extended GPR" + ); + // Interpreting the stored doubleword as a 100ns NT TIMEOUT tick + // count: it must round-trip to −300,000 (30 ms relative wait), + // NOT to +4,294,667,296 (the C+22 broken value). + assert_eq!(stored as i64, -300_000i64); + assert_eq!((stored as i64).wrapping_mul(100), -30_000_000i64); + } + + /// Phase C+23 regression: ensure `addis` against a non-zero rA still + /// performs the canonical Add with 64-bit semantics. Used by + /// arithmetic chains that combine a sign-extended `lis` high half + /// with a subsequent `addi` low half. Equivalent to canary's HIR + /// `Add(LoadGPR(rA), const_i64(simm << 16))`. + #[test] + fn addis_with_nonzero_ra_adds_in_64_bit() { + let mut ctx = PpcContext::new(); + let mut mem = TestMem::new(); + // r4 = 0x1234 already. addis r5, r4, 0xFFFE => r5 = r4 + (-2<<16) + // = 0x1234 + 0xFFFFFFFFFFFE0000 + ctx.gpr[4] = 0x1234; + let raw = (15u32 << 26) | (5u32 << 21) | (4u32 << 16) | 0xFFFEu32; + write_instr(&mut mem, 0, raw); + ctx.pc = 0; + step(&mut ctx, &mut mem); + assert_eq!(ctx.gpr[5], 0xFFFFFFFF_FFFE1234u64); + } + #[test] fn test_lwz_stw() { let mut ctx = PpcContext::new(); diff --git a/crates/xenia-cpu/src/scheduler.rs b/crates/xenia-cpu/src/scheduler.rs index 1f1d68a..aca2439 100644 --- a/crates/xenia-cpu/src/scheduler.rs +++ b/crates/xenia-cpu/src/scheduler.rs @@ -42,6 +42,19 @@ pub const QUANTUM_DEFAULT: u32 = 50_000; /// gets one when the slot fills up. const PRUNE_DEPTH_THRESHOLD: usize = 4; +/// Scheduler rounds per +1 age-priority bonus. With one bonus point per +/// round a thread sits Ready without being picked, a priority-0 thread +/// reaches parity with a same-slot priority-N peer after N rounds and wins +/// the tiebreak on round N+1. Iterate 2.V: closes the strict-priority +/// starvation hole that left tid=6 (pri=0) on CPU5 indefinitely behind a +/// CPU-bound tid=10 (pri=15). Counts in scheduler round_count, which is +/// fully deterministic (no host_ns / wallclock dependency). +const AGING_ROUNDS_PER_BONUS: u64 = 1; + +/// Cap on the age-priority bonus. 31 easily overwhelms any realistic NT +/// priority-class difference (max is ~31) without saturating i32 math. +const MAX_AGE_BONUS: i32 = 31; + /// Stable identity for a guest thread across all scheduler tables. /// /// The positional `idx` is only valid while the source slot's runqueue @@ -117,6 +130,14 @@ pub struct GuestThread { /// Axis 3 instruction budget. Decremented per retired step on this /// thread; on zero, slot rotates within same-priority tier. pub quantum_remaining: u32, + /// Iterate 2.V: scheduler `round_count` at the last time this thread + /// was picked to run on its slot (via `begin_slot_visit` or the + /// `decrement_quantum` rotation hand-off). Used by `pick_runnable` + /// to compute an age-priority bonus so a CPU-bound high-priority + /// peer can't strict-priority-starve a same-slot Ready peer forever. + /// Initialized to the scheduler's `round_count` at spawn so a fresh + /// thread doesn't inherit a giant age bonus on its first pick. + pub last_run_round: u64, } impl GuestThread { @@ -136,6 +157,7 @@ impl GuestThread { affinity_mask: 0xFF, ideal_processor: None, quantum_remaining: QUANTUM_DEFAULT, + last_run_round: 0, } } } @@ -206,14 +228,23 @@ impl Default for HwSlot { } impl HwSlot { - /// Index of the highest-priority Ready/ServicingIrq thread in this - /// slot's runqueue. Tiebreak: prefer lower index (deterministic). - pub fn pick_runnable(&self) -> Option { + /// Index of the highest *effective* priority Ready/ServicingIrq + /// thread in this slot's runqueue. Effective priority = base priority + /// + age bonus, where age = scheduler rounds since the thread was + /// last picked. The age bonus prevents strict-priority starvation: + /// without it, a CPU-bound priority=15 peer pinned to the same slot + /// would deterministically beat any Ready priority=0 peer forever + /// (closes iterate 2.V's root-cause wedge — tid=10 vs tid=6 on CPU5). + /// Tiebreak on equal effective priority: lower idx wins (deterministic). + /// + /// `now_round` is the scheduler's current `round_count`; passing it in + /// keeps this method side-effect-free and decouples it from `Scheduler`. + pub fn pick_runnable(&self, now_round: u64) -> Option { self.runqueue .iter() .enumerate() .filter(|(_, t)| matches!(t.state, HwState::Ready | HwState::ServicingIrq(_))) - .max_by_key(|(i, t)| (t.priority, -(*i as i64))) + .max_by_key(|(i, t)| (effective_priority(t, now_round), -(*i as i64))) .map(|(i, _)| i) } @@ -228,10 +259,31 @@ impl HwSlot { } +/// Compute the effective scheduling priority of `t` at scheduler round +/// `now_round`. Adds a deterministic age bonus equal to +/// `(now_round - t.last_run_round) / AGING_ROUNDS_PER_BONUS`, capped at +/// `MAX_AGE_BONUS`. `saturating_sub` guards against the case where +/// `last_run_round` was set in a future round (shouldn't happen, but +/// keeps the math defensive). See module-level docs at +/// `AGING_ROUNDS_PER_BONUS` for rationale. +#[inline] +fn effective_priority(t: &GuestThread, now_round: u64) -> i32 { + let age = now_round.saturating_sub(t.last_run_round); + let bonus_u64 = age / AGING_ROUNDS_PER_BONUS; + let bonus = bonus_u64.min(MAX_AGE_BONUS as u64) as i32; + t.priority.saturating_add(bonus) +} + #[derive(Debug, Clone, Copy)] pub enum OrderMode { Fixed, Seeded { seed: u64 }, + /// Stage 0 quantum-preemption spike. Replaces `QUANTUM_DEFAULT` at every + /// quantum-reload site with `ticks`, so the scheduler rotates between + /// same-priority peers more (or less) frequently. Used to probe whether + /// scheduling shape alone unblocks the 104,607 RtlEnterCS cap before + /// committing to the contention-replay manifest stages. + ScanQuantum { ticks: u32 }, } impl OrderMode { @@ -244,6 +296,14 @@ impl OrderMode { .unwrap_or(0xC0FFEE_C0FFEE); OrderMode::Seeded { seed } } + Some("quantum") | Some("Quantum") | Some("QUANTUM") => { + let ticks = std::env::var("XENIA_SCHED_QUANTUM") + .ok() + .and_then(|s| s.parse::().ok()) + .filter(|&t| t > 0) + .unwrap_or(1000); + OrderMode::ScanQuantum { ticks } + } _ => OrderMode::Fixed, } } @@ -369,7 +429,7 @@ impl Scheduler { pub fn new() -> Self { let order = OrderMode::from_env(); let rng_state = match order { - OrderMode::Fixed => 0, + OrderMode::Fixed | OrderMode::ScanQuantum { .. } => 0, OrderMode::Seeded { seed } => seed.max(1), }; Scheduler { @@ -379,7 +439,15 @@ impl Scheduler { order, rng_state, timed_waits: Vec::new(), - tls_slot_count: 0, + // Match canary's `kDefaultTlsSlotCount = 1024` (xthread.cc:335). + // Per-thread `tls_values` vec is sized to this count when spawned + // (see [`Self::install_main_thread`] / [`Self::spawn`]). Cost is + // 4 KiB per guest thread; 24 KiB across the 6 HW slots. Without + // this, `tls_values` starts empty and any `lwz rN, off(rTLS)` + // before the first `ExAllocateTls` reads guest memory zeros + // (matches canary observably) while host-side `tls_values[idx]` + // accesses panic on out-of-range until the lazy grow kicks in. + tls_slot_count: 1024, non_empty_runnable: 0, rotation_cursor: 0, reservation_table: None, @@ -614,6 +682,13 @@ impl Scheduler { t.priority = params.priority; t.affinity_mask = mask; t.ideal_processor = params.ideal_processor; + // Stage 0 — honor ScanQuantum reload on the freshly-spawned thread; + // `default_fields` set QUANTUM_DEFAULT before the scheduler was reachable. + t.quantum_remaining = Self::quantum_for(self.order); + // Iterate 2.V — pin the age-bonus baseline so a freshly-spawned + // thread doesn't inherit a large age bonus from the scheduler's + // accumulated round_count. + t.last_run_round = self.round_count; // M3.7 — populate the inter-thread reservation handle + slot id // so the interpreter can route lwarx/stwcx through the table. t.ctx.hw_id = slot_id; @@ -663,6 +738,11 @@ impl Scheduler { t.pcr_base = pcr_base; t.tls_base = tls_base; t.tls_values = vec![0; self.tls_slot_count]; + // Stage 0 — same ScanQuantum override as `spawn`; default_fields + // doesn't know about the scheduler's order. + t.quantum_remaining = Self::quantum_for(self.order); + // Iterate 2.V — same baseline pin as `spawn`. + t.last_run_round = self.round_count; self.slots[0].runqueue.push(t); mem.write_pcr_id(pcr_base, 0); self.recompute_slot_runnable(0); @@ -742,9 +822,17 @@ impl Scheduler { /// Called by the step loop at the top of each per-slot visit. Picks the /// highest-priority Ready thread on the slot, sets `running_idx`, and /// stashes `self.current` so exports can reach it. + /// + /// Iterate 2.V: passes the scheduler's `round_count` to `pick_runnable` + /// for age-priority computation, then stamps the winner's + /// `last_run_round` so the next round's age math starts from now. pub fn begin_slot_visit(&mut self, hw_id: u8) { + let now_round = self.round_count; let slot = &mut self.slots[hw_id as usize]; - slot.running_idx = slot.pick_runnable(); + slot.running_idx = slot.pick_runnable(now_round); + if let Some(idx) = slot.running_idx { + slot.runqueue[idx].last_run_round = now_round; + } self.current = slot .running_idx .map(|idx| ThreadRef::new(hw_id, idx as u16)); @@ -765,6 +853,18 @@ impl Scheduler { /// /// Returns `true` if a rotation occurred (purely informational; /// callers don't need to act on it). + /// Quantum reload value to use given the current `OrderMode`. Returns + /// `QUANTUM_DEFAULT` for `Fixed`/`Seeded` so existing baselines stay + /// byte-identical; returns `ticks.max(1)` for `ScanQuantum` so the Stage + /// 0 spike can sweep faster rotations. + #[inline] + fn quantum_for(order: OrderMode) -> u32 { + match order { + OrderMode::ScanQuantum { ticks } => ticks.max(1), + _ => QUANTUM_DEFAULT, + } + } + pub fn decrement_quantum(&mut self) -> bool { let Some(r) = self.current else { return false; }; let slot = &mut self.slots[r.hw_id as usize]; @@ -778,7 +878,7 @@ impl Scheduler { return false; } let my_pri = t.priority; - t.quantum_remaining = QUANTUM_DEFAULT; + t.quantum_remaining = Self::quantum_for(self.order); // Scan the rest of the runqueue for a same-priority Ready peer. // Priority-higher peers are already going to win the next // `pick_runnable` on this slot, so we only need to find an *equal* @@ -795,6 +895,9 @@ impl Scheduler { } let cand = &slot.runqueue[i]; if cand.priority == my_pri && matches!(cand.state, HwState::Ready) { + // Iterate 2.V — pin age-bonus baseline on the freshly + // promoted thread so the next round sees age 0 for it. + slot.runqueue[i].last_run_round = self.round_count; slot.running_idx = Some(i); self.current = Some(ThreadRef::new(r.hw_id, i as u16)); return true; @@ -846,7 +949,7 @@ impl Scheduler { _ => return, } t.state = HwState::Ready; - t.quantum_remaining = QUANTUM_DEFAULT; + t.quantum_remaining = Self::quantum_for(self.order); self.timed_waits.retain(|&(_, tr)| tr != r); self.recompute_slot_runnable(r.hw_id); } @@ -868,7 +971,7 @@ impl Scheduler { } if t.suspend_count == 0 && matches!(t.state, HwState::Blocked(BlockReason::Suspended)) { t.state = HwState::Ready; - t.quantum_remaining = QUANTUM_DEFAULT; + t.quantum_remaining = Self::quantum_for(self.order); } self.recompute_slot_runnable(r.hw_id); prev @@ -1121,7 +1224,7 @@ impl Scheduler { BlockReason::Suspended } }; - t.quantum_remaining = QUANTUM_DEFAULT; + t.quantum_remaining = Self::quantum_for(self.order); self.recompute_slot_runnable(r.hw_id); tracing::info!( "scheduler: advanced to deadline {} waking hw={} idx={}", @@ -1182,6 +1285,7 @@ impl Scheduler { /// `ctx_mut_ref(r).gpr[3]`. Returns the refs that were woken. pub fn unblock_on_deadlock(&mut self) -> Vec { let mut woken = Vec::new(); + let quantum = Self::quantum_for(self.order); for (hw_id, slot) in self.slots.iter_mut().enumerate() { for (idx, t) in slot.runqueue.iter_mut().enumerate() { if matches!( @@ -1191,7 +1295,7 @@ impl Scheduler { | HwState::Blocked(BlockReason::CriticalSection(_)) ) { t.state = HwState::Ready; - t.quantum_remaining = QUANTUM_DEFAULT; + t.quantum_remaining = quantum; woken.push(ThreadRef::new(hw_id as u8, idx as u16)); } } @@ -1916,4 +2020,146 @@ mod tests { assert_eq!(s.thread(r).state, HwState::Ready); assert_eq!(s.thread(r).quantum_remaining, QUANTUM_DEFAULT); } + + // ---- Stage 0: OrderMode::ScanQuantum -------------------------------- + + #[test] + fn quantum_for_fixed_returns_default() { + assert_eq!(Scheduler::quantum_for(OrderMode::Fixed), QUANTUM_DEFAULT); + } + + #[test] + fn quantum_for_seeded_returns_default() { + assert_eq!( + Scheduler::quantum_for(OrderMode::Seeded { seed: 0xC0FFEE }), + QUANTUM_DEFAULT + ); + } + + #[test] + fn quantum_for_scan_quantum_returns_ticks() { + assert_eq!( + Scheduler::quantum_for(OrderMode::ScanQuantum { ticks: 1000 }), + 1000 + ); + } + + #[test] + fn quantum_for_scan_quantum_floor_is_one() { + // ticks=0 would deadlock the rotation; quantum_for clamps to >=1. + assert_eq!( + Scheduler::quantum_for(OrderMode::ScanQuantum { ticks: 0 }), + 1 + ); + } + + fn mk_scheduler_with_order(order: OrderMode) -> Scheduler { + let mut s = Scheduler::new(); + s.order = order; + s + } + + #[test] + fn spawn_under_scan_quantum_uses_ticks() { + let mut s = mk_scheduler_with_order(OrderMode::ScanQuantum { ticks: 7 }); + s.spawn(worker_spawn_params(1, 0x1000), &mut NullPcr).unwrap(); + let r = ThreadRef { hw_id: 0, idx: 0, generation: 0 }; + assert_eq!(s.thread(r).quantum_remaining, 7); + } + + #[test] + fn install_initial_under_scan_quantum_uses_ticks() { + let mut s = mk_scheduler_with_order(OrderMode::ScanQuantum { ticks: 42 }); + let mut ctx = PpcContext::new(); + ctx.pc = 0x8200_0000; + s.install_initial_thread( + ctx, + 0x7000_0000, + 0x10_0000, + 0x7FFF_0000, + 0x7FFE_0000, + 0x1000, + &mut NullPcr, + ); + let r = ThreadRef { hw_id: 0, idx: 0, generation: 0 }; + assert_eq!(s.thread(r).quantum_remaining, 42); + } + + #[test] + fn wake_ref_under_scan_quantum_reloads_ticks_not_default() { + let mut s = mk_scheduler_with_order(OrderMode::ScanQuantum { ticks: 13 }); + let mut p = SpawnParams::default(); + p.guest_tid = 2; + p.thread_handle = 0x2000; + p.affinity_mask = 0b0010; + p.pcr_base = 0x4000_1000; + s.spawn(p, &mut NullPcr).unwrap(); + let r = ThreadRef { hw_id: 1, idx: 0, generation: 0 }; + s.thread_mut(r).state = HwState::Blocked(BlockReason::WaitAny { + handles: vec![0xDEAD], + deadline: None, + }); + s.thread_mut(r).quantum_remaining = 1; + s.wake_ref(r); + assert_eq!(s.thread(r).quantum_remaining, 13); + } + + #[test] + fn decrement_quantum_under_scan_quantum_rotates_after_ticks() { + let mut s = mk_scheduler_with_order(OrderMode::ScanQuantum { ticks: 4 }); + for tid in [1u32, 2] { + let mut p = SpawnParams::default(); + p.guest_tid = tid; + p.thread_handle = 0x1000 + tid * 4; + p.affinity_mask = 0b0001; + p.pcr_base = 0x4000_0000 + tid * 0x1000; + s.spawn(p, &mut NullPcr).unwrap(); + } + s.begin_slot_visit(0); + let first_tid = s.thread(s.current.unwrap()).tid; + // ticks=4: three decrements stay on first, the fourth rotates. + for _ in 0..3 { + assert!(!s.decrement_quantum()); + } + assert!(s.decrement_quantum(), "fourth tick should rotate"); + let second_tid = s.thread(s.current.unwrap()).tid; + assert_ne!(first_tid, second_tid); + // And the freshly-current thread also gets ticks=4, not DEFAULT. + assert_eq!(s.thread(s.current.unwrap()).quantum_remaining, 4); + } + + #[test] + fn order_from_env_parses_quantum_arm() { + // SAFETY: tests in this module run serially within a single process; + // set_var/remove_var here matches the existing rng/seeded test idiom + // elsewhere in the crate. If we ever shard tests across threads, gate + // this group behind a Mutex. + let prev_order = std::env::var("XENIA_SCHED_ORDER").ok(); + let prev_q = std::env::var("XENIA_SCHED_QUANTUM").ok(); + unsafe { + std::env::set_var("XENIA_SCHED_ORDER", "quantum"); + std::env::set_var("XENIA_SCHED_QUANTUM", "250"); + } + match OrderMode::from_env() { + OrderMode::ScanQuantum { ticks } => assert_eq!(ticks, 250), + other => panic!("expected ScanQuantum, got {:?}", other), + } + // ticks=0 falls back to the 1000 default (filter(>0)). + unsafe { std::env::set_var("XENIA_SCHED_QUANTUM", "0") }; + match OrderMode::from_env() { + OrderMode::ScanQuantum { ticks } => assert_eq!(ticks, 1000), + other => panic!("expected ScanQuantum, got {:?}", other), + } + // Restore env so siblings don't see leftover state. + unsafe { + match prev_order { + Some(v) => std::env::set_var("XENIA_SCHED_ORDER", v), + None => std::env::remove_var("XENIA_SCHED_ORDER"), + } + match prev_q { + Some(v) => std::env::set_var("XENIA_SCHED_QUANTUM", v), + None => std::env::remove_var("XENIA_SCHED_QUANTUM"), + } + } + } } diff --git a/crates/xenia-gpu/src/gpu_system.rs b/crates/xenia-gpu/src/gpu_system.rs index 8ae97d0..6ff54ae 100644 --- a/crates/xenia-gpu/src/gpu_system.rs +++ b/crates/xenia-gpu/src/gpu_system.rs @@ -339,6 +339,23 @@ pub struct GpuSystem { /// `GpuSystem::new` and lives for the whole GPU lifetime — no /// per-frame churn. pub edram: crate::edram::ShadowEdram, + /// 256-entry `DC_LUT_30_COLOR` gamma ramp (10-bit BGR packed per entry). + /// Mirrors canary's `gamma_ramp_256_entry_table_` array on + /// `CommandProcessor` (`command_processor.cc:130-148`). Pre-loaded + /// with the linear sRGB ramp at construction so any code path that + /// queries gamma before the guest writes its own ramp sees the same + /// initial values as canary. MMIO read/write index handling for + /// `DC_LUT_RW_INDEX` is NOT yet wired in ours, so guests can't access + /// these bytes today; the field exists for state parity and to give + /// future MMIO handlers a populated buffer. + pub gamma_ramp_256: Vec, + /// 128-entry per-channel `DC_LUT_PWL_DATA` gamma ramp (base/delta pairs, + /// stored interleaved RGB → 384 u32 entries). Layout matches + /// `gamma_ramp_pwl_rgb_[i][j]` in canary (`command_processor.cc:141-148`): + /// index = `i * 3 + j` where `i ∈ [0,128)` and `j ∈ {0,1,2}` for R/G/B. + /// Same status as `gamma_ramp_256`: state-parity only until MMIO + /// handlers are added. + pub gamma_ramp_pwl: Vec, } impl GpuSystem { @@ -365,9 +382,47 @@ impl GpuSystem { last_resolve: None, texture_cache: crate::texture_cache::TextureCache::new(), edram: crate::edram::ShadowEdram::new(), + gamma_ramp_256: Self::default_gamma_ramp_256(), + gamma_ramp_pwl: Self::default_gamma_ramp_pwl(), } } + /// Build canary's default 256-entry sRGB linear ramp. Per + /// `command_processor.cc:134-140`: for each `i ∈ [0,256)`, the 10-bit + /// per-channel value is `i * 0x3FF / 0xFF`; the BGR triple is packed + /// into a single `DC_LUT_30_COLOR` u32. The packing here is BGR-low + /// to match canary's `color_10_blue` / `green` / `red` field order + /// (low bits = blue, high bits = red). + fn default_gamma_ramp_256() -> Vec { + let mut v = Vec::with_capacity(256); + for i in 0..256u32 { + let lane = (i * 0x3FF) / 0xFF; + // DC_LUT_30_COLOR bit layout: blue[0..10] | green[10..20] | red[20..30]. + let entry = lane | (lane << 10) | (lane << 20); + v.push(entry); + } + v + } + + /// Build canary's default 128-entry PWL ramp (interleaved RGB → + /// 384 u32s). Per `command_processor.cc:141-148`: for each + /// `i ∈ [0,128)`, `base = (i * 0xFFFF / 0x7F) & ~0x3F`, and + /// `delta = 0x200` when `i < 0x7F` else `0`. Same value mirrored + /// across R/G/B (j=0/1/2). Each `DC_LUT_PWL_DATA` is one u32 + /// (`base` in low 16, `delta` in high 16). + fn default_gamma_ramp_pwl() -> Vec { + let mut v = Vec::with_capacity(128 * 3); + for i in 0..128u32 { + let base = ((i * 0xFFFF) / 0x7F) & !0x3Fu32; + let delta: u32 = if i < 0x7F { 0x200 } else { 0 }; + let entry = (base & 0xFFFF) | ((delta & 0xFFFF) << 16); + for _ in 0..3 { + v.push(entry); + } + } + v + } + /// P8 — insert a shader blob + bump the FIFO so long-running games /// don't grow `shader_blobs` without bound. Caps at [`SHADER_BLOB_CAP`]. /// Never evicts the currently-active VS/PS blobs (if they ended up at diff --git a/crates/xenia-gpu/src/handle.rs b/crates/xenia-gpu/src/handle.rs index fa3db8a..a93a2fb 100644 --- a/crates/xenia-gpu/src/handle.rs +++ b/crates/xenia-gpu/src/handle.rs @@ -390,7 +390,17 @@ impl GpuBackend { // fires; the safety-net fallback warning fired twice for // each Sylpheed run. let target = s.mmio.cp_rb_wptr.load(Ordering::Acquire); - s.drain_until_wptr(mem, target, Duration::from_millis(900)) + // GPUBUG-DRAIN-001 (iterate-2F, 2026-05-27): cap the inline + // drain at 1 ms so vd_swap does not block the main guest + // thread for ~900 ms per swap. Canary's `VdSwap_entry` + // returns in ~6.6 us — no synchronous drain. The 900 ms + // deadline parked tid=1 long enough to starve the post-swap + // worker fan-out at `sub_825070F0`, which in turn left + // tid=13's wait predicate unsatisfiable (wedge at + // PC=0x821CB1DC). Remaining packets stay queued in the + // ring; the next drain (next vd_swap or kernel-callback + // boundary) consumes them. + s.drain_until_wptr(mem, target, Duration::from_millis(1)) } GpuBackend::Threaded(h) => { let target_wptr = h.mmio.cp_rb_wptr.load(Ordering::Acquire); @@ -560,7 +570,12 @@ impl GpuWorker { // empty (rptr == wptr after modulo) or a packet // returns `Idle`/`Blocked`. self.system.sync_with_mmio(); - let deadline = Instant::now() + Duration::from_millis(900); + // GPUBUG-DRAIN-001 (iterate-2F, 2026-05-27): cap at + // 1 ms so the CPU's `recv_timeout(1s)` returns + // promptly. Canary doesn't synchronously drain in + // VdSwap; mirroring that frees tid=1 to spawn + // post-swap workers in time. + let deadline = Instant::now() + Duration::from_millis(1); while self.system.is_ready(&*memory) { if Instant::now() >= deadline { break; diff --git a/crates/xenia-kernel/Cargo.toml b/crates/xenia-kernel/Cargo.toml index f9dd134..22e5dc3 100644 --- a/crates/xenia-kernel/Cargo.toml +++ b/crates/xenia-kernel/Cargo.toml @@ -15,3 +15,7 @@ tracing = { workspace = true } metrics = { workspace = true } thiserror = { workspace = true } anyhow = { workspace = true } +serde_json = { workspace = true } +sha1 = { workspace = true } +sha2 = { workspace = true } +libc = "0.2" diff --git a/crates/xenia-kernel/src/contention_manifest.rs b/crates/xenia-kernel/src/contention_manifest.rs new file mode 100644 index 0000000..9c59d7f --- /dev/null +++ b/crates/xenia-kernel/src/contention_manifest.rs @@ -0,0 +1,342 @@ +//! Phase D Stage 3 — contention-replay manifest loader. +//! +//! Loads a `contention_manifest.json` produced by Stage 2's python +//! builder (`xenia-rs/tools/diff-events/build_contention_manifest.py`) +//! and exposes a `(tid, tid_event_idx) → Entry` lookup for +//! `rtl_enter_critical_section` to consult. +//! +//! The manifest tells ours: "canary observed real contention on this +//! `cs_ptr` at this `(tid, tid_event_idx)`." Ours's +//! `rtl_enter_critical_section` reads the next per-tid ordinal that +//! its `contention.observed` emit would consume and asks the manifest +//! whether to force a park. The Stage 3 forced-park is gated on the CS +//! actually having a live different-tid owner in guest memory at the +//! moment — without that, forced-park would deadlock (the plan's +//! "skip when free" branch). +//! +//! Lookup is O(1) via a `HashMap<(tid, idx), Entry>` behind a `Mutex`. +//! Single-host-thread scheduler means contention on the mutex is +//! minimal. `consume()` removes the entry on hit, so a single +//! (tid, idx) cannot re-fire — guards against any future re-entry of +//! `rtl_enter_critical_section` for the same ordinal. + +use std::collections::HashMap; +use std::fs::File; +use std::io::{self, BufReader}; +use std::path::Path; +use std::sync::Mutex; + +/// One row of the manifest, post-deserialize. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Entry { + pub tid: u32, + pub tid_event_idx: u64, + /// 16-hex string (FNV-1a 64-bit). Stage-3 verifies this matches + /// `semantic_id_shared_global(cs_ptr, object_type::CRITICAL_SECTION)`. + pub site_sid: String, + /// Guest VA of the `X_RTL_CRITICAL_SECTION`. Both engines see the + /// same value (the guest manages the struct). + pub cs_ptr: u32, +} + +pub struct ContentionManifest { + entries: Mutex>, + /// Per-tid count of `contention.observed` emits ours has fired so + /// far in this run. Each emit shifts the per-tid event-log idx by + /// +1 relative to canary's stream, so subsequent manifest lookups + /// must translate ours's `peek_tid_idx` value back to canary's idx + /// space (`ours_peek - emits_so_far`). Updated by + /// `consume_at_peek`, which is the supported lookup entry point. + emit_counts: Mutex>, + /// Sum of all entries ever loaded (cap on growth: post-load lookup + /// only). For audit logging / sanity checks. + initial_count: usize, +} + +impl ContentionManifest { + /// Load a manifest from a JSON file. The file must be a + /// well-formed `contention_manifest.json` (see Stage 2's + /// builder). Unknown top-level fields are ignored — only `entries` + /// is consumed. + /// + /// Returns a friendly error string on malformed input so the caller + /// can surface it without a `serde_json::Error` dependency creep. + pub fn load_from_file(path: &Path) -> io::Result { + let f = File::open(path)?; + let reader = BufReader::new(f); + let json: serde_json::Value = serde_json::from_reader(reader) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?; + Self::load_from_json_value(&json) + } + + pub fn load_from_str(s: &str) -> io::Result { + let json: serde_json::Value = serde_json::from_str(s) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?; + Self::load_from_json_value(&json) + } + + fn load_from_json_value(json: &serde_json::Value) -> io::Result { + let version = json.get("version").and_then(|v| v.as_u64()).unwrap_or(0); + if version != 1 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("unsupported manifest version: {version} (expected 1)"), + )); + } + let arr = json.get("entries").and_then(|v| v.as_array()).ok_or_else(|| { + io::Error::new(io::ErrorKind::InvalidData, "manifest missing `entries` array") + })?; + let mut map = HashMap::with_capacity(arr.len()); + for (i, entry) in arr.iter().enumerate() { + let tid = entry + .get("tid") + .and_then(|v| v.as_u64()) + .ok_or_else(|| io::Error::new( + io::ErrorKind::InvalidData, + format!("entry {i}: missing or non-u64 `tid`"), + ))? as u32; + let idx = entry + .get("tid_event_idx") + .and_then(|v| v.as_u64()) + .ok_or_else(|| io::Error::new( + io::ErrorKind::InvalidData, + format!("entry {i}: missing or non-u64 `tid_event_idx`"), + ))?; + let site_sid = entry + .get("site_sid") + .and_then(|v| v.as_str()) + .ok_or_else(|| io::Error::new( + io::ErrorKind::InvalidData, + format!("entry {i}: missing or non-str `site_sid`"), + ))? + .to_owned(); + // cs_ptr is emitted as "0xHHHHHHHH" — strip the prefix and parse. + let cs_ptr_str = entry + .get("cs_ptr") + .and_then(|v| v.as_str()) + .ok_or_else(|| io::Error::new( + io::ErrorKind::InvalidData, + format!("entry {i}: missing or non-str `cs_ptr`"), + ))?; + let cs_ptr = parse_hex_u32(cs_ptr_str).map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("entry {i}: cs_ptr={cs_ptr_str:?}: {e}"), + ) + })?; + let e = Entry { tid, tid_event_idx: idx, site_sid, cs_ptr }; + map.insert((tid, idx), e); + } + let initial_count = map.len(); + Ok(Self { + entries: Mutex::new(map), + emit_counts: Mutex::new(HashMap::new()), + initial_count, + }) + } + + /// Look up + REMOVE the entry for `(tid, idx)`. `None` if no entry. + /// Removal prevents a single ordinal from re-firing the forced-park + /// branch if `rtl_enter_critical_section` is re-entered at the same + /// per-tid ordinal (shouldn't happen because emits are monotone, + /// but defensive). + pub fn consume(&self, tid: u32, idx: u64) -> Option { + self.entries.lock().unwrap().remove(&(tid, idx)) + } + + /// Stage 3 lookup entry point: translate ours's `peek_tid_idx` + /// value back to canary's idx space (subtracting the count of + /// `contention.observed` events ours has already emitted on this + /// tid), then `consume()`. On hit, the per-tid emit counter is + /// bumped so the next call's translation accounts for THIS emit. + /// + /// Both halves of the bookkeeping (consume + emit-count bump) MUST + /// happen here, before the caller actually emits, to keep the + /// translation arithmetic consistent. + pub fn consume_at_peek(&self, tid: u32, peek_idx: u64) -> Option { + let mut emits = self.emit_counts.lock().unwrap(); + let already = *emits.get(&tid).unwrap_or(&0); + // Per-tid event log idx is monotone, so `peek_idx >= already` + // always — but guard against underflow defensively. + if peek_idx < already { + return None; + } + let canary_idx = peek_idx - already; + let hit = self.entries.lock().unwrap().remove(&(tid, canary_idx)); + if hit.is_some() { + *emits.entry(tid).or_insert(0) += 1; + } + hit + } + + /// Test helper: how many `contention.observed` emits we've tracked. + #[cfg(test)] + pub fn emit_count(&self, tid: u32) -> u64 { + *self.emit_counts.lock().unwrap().get(&tid).unwrap_or(&0) + } + + /// Non-destructive peek (testing only). + pub fn peek(&self, tid: u32, idx: u64) -> Option { + self.entries.lock().unwrap().get(&(tid, idx)).cloned() + } + + /// Number of entries originally loaded (constant after load). + pub fn initial_count(&self) -> usize { + self.initial_count + } + + /// Number of entries still un-consumed. + pub fn remaining_count(&self) -> usize { + self.entries.lock().unwrap().len() + } +} + +fn parse_hex_u32(s: &str) -> Result { + let trimmed = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")).unwrap_or(s); + u32::from_str_radix(trimmed, 16).map_err(|e| e.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + const MINIMAL: &str = r#"{ + "version": 1, + "source_canary_jsonl": "/tmp/x.jsonl", + "source_canary_sha256": "00", + "built_at_host_unix": 0, + "summary": {}, + "entries": [ + {"tid": 6, "tid_event_idx": 104664, "site_sid": "c26a128bf45411f7", + "cs_ptr": "0xbc65c890", "contended": true}, + {"tid": 9, "tid_event_idx": 386, "site_sid": "c26a128bf45411f7", + "cs_ptr": "0xbc65c890", "contended": true} + ] + }"#; + + #[test] + fn loads_two_entries() { + let m = ContentionManifest::load_from_str(MINIMAL).unwrap(); + assert_eq!(m.initial_count(), 2); + assert_eq!(m.remaining_count(), 2); + } + + #[test] + fn consume_returns_entry_and_decrements() { + let m = ContentionManifest::load_from_str(MINIMAL).unwrap(); + let e = m.consume(6, 104664).unwrap(); + assert_eq!(e.cs_ptr, 0xbc65c890); + assert_eq!(e.site_sid, "c26a128bf45411f7"); + assert_eq!(m.remaining_count(), 1); + // Second consume of the same key yields None. + assert!(m.consume(6, 104664).is_none()); + } + + #[test] + fn miss_returns_none() { + let m = ContentionManifest::load_from_str(MINIMAL).unwrap(); + assert!(m.consume(99, 99).is_none()); + assert!(m.consume(6, 999999).is_none()); + } + + #[test] + fn peek_is_non_destructive() { + let m = ContentionManifest::load_from_str(MINIMAL).unwrap(); + assert!(m.peek(6, 104664).is_some()); + assert!(m.peek(6, 104664).is_some()); + assert_eq!(m.remaining_count(), 2); + } + + #[test] + fn rejects_unknown_version() { + let bad = r#"{"version":99,"entries":[]}"#; + assert!(ContentionManifest::load_from_str(bad).is_err()); + } + + #[test] + fn rejects_missing_entries() { + let bad = r#"{"version":1}"#; + assert!(ContentionManifest::load_from_str(bad).is_err()); + } + + #[test] + fn rejects_bad_cs_ptr() { + let bad = r#"{"version":1,"entries":[ + {"tid":1,"tid_event_idx":0,"site_sid":"x","cs_ptr":"not-a-hex","contended":true} + ]}"#; + assert!(ContentionManifest::load_from_str(bad).is_err()); + } + + #[test] + fn parses_cs_ptr_without_0x_prefix() { + let ok = r#"{"version":1,"entries":[ + {"tid":1,"tid_event_idx":0,"site_sid":"x","cs_ptr":"DEADBEEF","contended":true} + ]}"#; + let m = ContentionManifest::load_from_str(ok).unwrap(); + assert_eq!(m.consume(1, 0).unwrap().cs_ptr, 0xDEADBEEF); + } + + #[test] + fn empty_entries_loads_zero_count() { + let ok = r#"{"version":1,"entries":[]}"#; + let m = ContentionManifest::load_from_str(ok).unwrap(); + assert_eq!(m.initial_count(), 0); + assert!(m.consume(0, 0).is_none()); + } + + #[test] + fn consume_at_peek_translates_idx() { + // Manifest stores canary idx values. Consumer's peek matches + // canary's idx on the very first lookup (no prior emits), then + // shifts by the number of emits this side has done. + let json = r#"{"version":1,"entries":[ + {"tid":1,"tid_event_idx":100,"site_sid":"aa","cs_ptr":"0xaa","contended":true}, + {"tid":1,"tid_event_idx":200,"site_sid":"bb","cs_ptr":"0xbb","contended":true}, + {"tid":1,"tid_event_idx":300,"site_sid":"cc","cs_ptr":"0xcc","contended":true} + ]}"#; + let m = ContentionManifest::load_from_str(json).unwrap(); + // First lookup: peek_idx == canary_idx (no prior emit). + let hit = m.consume_at_peek(1, 100).unwrap(); + assert_eq!(hit.tid_event_idx, 100); + assert_eq!(m.emit_count(1), 1); + // Second hit: ours's peek is 201 (canary's 200 + 1 prior emit). + let hit = m.consume_at_peek(1, 201).unwrap(); + assert_eq!(hit.tid_event_idx, 200); + assert_eq!(m.emit_count(1), 2); + // Third hit: ours's peek is 302. + let hit = m.consume_at_peek(1, 302).unwrap(); + assert_eq!(hit.tid_event_idx, 300); + assert_eq!(m.emit_count(1), 3); + } + + #[test] + fn consume_at_peek_miss_does_not_bump_emit_count() { + let json = r#"{"version":1,"entries":[ + {"tid":1,"tid_event_idx":100,"site_sid":"aa","cs_ptr":"0xaa","contended":true} + ]}"#; + let m = ContentionManifest::load_from_str(json).unwrap(); + // Miss at idx 50 — emit count stays 0. + assert!(m.consume_at_peek(1, 50).is_none()); + assert_eq!(m.emit_count(1), 0); + // Miss at idx 999 — still 0. + assert!(m.consume_at_peek(1, 999).is_none()); + assert_eq!(m.emit_count(1), 0); + } + + #[test] + fn consume_at_peek_per_tid_independent() { + let json = r#"{"version":1,"entries":[ + {"tid":1,"tid_event_idx":100,"site_sid":"a","cs_ptr":"0xa","contended":true}, + {"tid":2,"tid_event_idx":200,"site_sid":"b","cs_ptr":"0xb","contended":true}, + {"tid":2,"tid_event_idx":300,"site_sid":"c","cs_ptr":"0xc","contended":true} + ]}"#; + let m = ContentionManifest::load_from_str(json).unwrap(); + assert!(m.consume_at_peek(1, 100).is_some()); + // tid=2's count should be unaffected by tid=1's emit. + assert_eq!(m.emit_count(2), 0); + assert!(m.consume_at_peek(2, 200).is_some()); + // Now tid=2 has 1 emit; its second entry is at canary 300, so peek 301. + assert!(m.consume_at_peek(2, 301).is_some()); + assert_eq!(m.emit_count(2), 2); + } +} diff --git a/crates/xenia-kernel/src/event_log.rs b/crates/xenia-kernel/src/event_log.rs new file mode 100644 index 0000000..3ec4e6d --- /dev/null +++ b/crates/xenia-kernel/src/event_log.rs @@ -0,0 +1,774 @@ +//! Phase A event-log emitter. Schema v1 — see +//! `xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md`. +//! +//! Cvar-gated (disabled by default). Zero cost when disabled: +//! `is_enabled()` is a relaxed atomic-bool load. + +use std::collections::HashMap; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Mutex, OnceLock}; +use std::time::Instant; + +static ENABLED: AtomicBool = AtomicBool::new(false); +static SINK: OnceLock>> = OnceLock::new(); +static T0: OnceLock = OnceLock::new(); +static TID_COUNTERS: OnceLock>> = OnceLock::new(); +/// Iterate 2.M (reading-error #42): record the Phase-A trace path so the +/// always-on exit-time thread-state dump can derive a sibling JSON path +/// without re-threading CLI flags through `cmd_exec_inner`. `None` when +/// Phase-A is disabled — exit-state dump falls back to a CWD-relative +/// default in that case. +static OUTPUT_PATH: OnceLock = OnceLock::new(); + +/// Object-type codes — must match canary's enum exactly (schema-v1.md). +pub mod object_type { + pub const UNKNOWN: u32 = 0x00; + pub const EVENT: u32 = 0x01; + pub const MUTANT: u32 = 0x02; + pub const SEMAPHORE: u32 = 0x03; + pub const TIMER: u32 = 0x04; + pub const THREAD: u32 = 0x05; + pub const FILE: u32 = 0x06; + pub const IO_COMPLETION: u32 = 0x07; + pub const MODULE: u32 = 0x08; + pub const ENUM_STATE: u32 = 0x09; + pub const SECTION: u32 = 0x0A; + pub const NOTIFICATION: u32 = 0x0B; + /// Phase D Stage 1 (canary side) / Stage 3 (ours side): pseudo-type + /// used as the `object_type` input to `semantic_id_shared_global` + /// for RTL_CRITICAL_SECTION pointers. CS is NOT a real XObject + /// (it lives as a guest-memory struct, not a handle-tabled kernel + /// object), but the `site_sid` field of `contention.observed` + /// reuses the shared-global SID recipe so the Stage-3 manifest can + /// compute the same SID in both engines for the same CS pointer. + /// Must match canary's `kObjCriticalSection` exactly. + pub const CRITICAL_SECTION: u32 = 0x0C; +} + +/// Initialize the emitter. Call from main once at startup with the +/// resolved path (CLI flag or env var). `None` keeps the emitter +/// disabled; cost is one relaxed atomic-bool check per emit call. +pub fn init(path: Option<&Path>) { + let _ = T0.set(Instant::now()); + let Some(path) = path else { + return; + }; + let _ = OUTPUT_PATH.set(path.to_path_buf()); + let f = match File::create(path) { + Ok(f) => f, + Err(e) => { + eprintln!( + "phase-a event log: failed to open {:?}: {e} — disabled", + path + ); + return; + } + }; + let mut bw = BufWriter::new(f); + // Schema header (synthetic tid=0). + let host_ns = host_ns_since_start(); + let _ = writeln!( + bw, + r#"{{"schema_version":1,"engine":"ours","kind":"schema_version","tid":0,"tid_event_idx":0,"guest_cycle":0,"host_ns":{host_ns},"deterministic":true,"payload":{{"version":1,"emitter_build":"ours-phaseA"}}}}"# + ); + let _ = bw.flush(); + if SINK.set(Mutex::new(bw)).is_err() { + // Already initialized — leave alone. + return; + } + let _ = TID_COUNTERS.set(Mutex::new(HashMap::new())); + ENABLED.store(true, Ordering::Release); +} + +#[inline] +pub fn is_enabled() -> bool { + ENABLED.load(Ordering::Relaxed) +} + +/// Path passed to `init()`, if any. Used by the iterate-2.M exit-state +/// dump so the sibling JSON lands next to the Phase-A JSONL trace. +pub fn output_path() -> Option<&'static Path> { + OUTPUT_PATH.get().map(|p| p.as_path()) +} + +fn host_ns_since_start() -> u128 { + let t0 = T0.get_or_init(Instant::now); + t0.elapsed().as_nanos() +} + +fn next_tid_idx(tid: u32) -> u64 { + let map = TID_COUNTERS.get().expect("event_log not initialized"); + let mut g = map.lock().unwrap(); + let entry = g.entry(tid).or_insert(0); + let idx = *entry; + *entry = idx + 1; + idx +} + +/// Peek next tid_event_idx without consuming it. Useful for handle +/// semantic-id computation that needs to match what the next emit will use. +pub fn peek_tid_idx(tid: u32) -> u64 { + let Some(map) = TID_COUNTERS.get() else { + return 0; + }; + let g = map.lock().unwrap(); + *g.get(&tid).unwrap_or(&0) +} + +/// FNV-1a 64-bit. Identical implementation in canary (see event_log.cc). +pub fn semantic_id( + create_site_pc: u32, + creating_tid: u32, + tid_event_idx_at_creation: u64, + object_type: u32, +) -> u64 { + let mut bytes = [0u8; 4 + 4 + 8 + 4]; + bytes[0..4].copy_from_slice(&create_site_pc.to_le_bytes()); + bytes[4..8].copy_from_slice(&creating_tid.to_le_bytes()); + bytes[8..16].copy_from_slice(&tid_event_idx_at_creation.to_le_bytes()); + bytes[16..20].copy_from_slice(&object_type.to_le_bytes()); + let mut h: u64 = 0xCBF29CE484222325; + for b in bytes.iter() { + h ^= *b as u64; + h = h.wrapping_mul(0x100000001B3); + } + h +} + +/// Phase C+18: marker sentinel used as `create_site_pc` in +/// `semantic_id_shared_global` so the resulting SID is distinguishable +/// from regular per-thread handle SIDs (which use real guest PCs that +/// never collide with this value). Picked outside any plausible guest +/// code-address range. Both engines MUST use this exact constant. +pub const SHARED_GLOBAL_SID_MARKER: u32 = 0xC01AB005; + +/// Phase C+18: scheduling-invariant SID for **process-global** kernel +/// dispatcher objects that are lazy-wrapped on first guest-thread touch +/// (see ours's `ensure_dispatcher_object` and canary's +/// `XObject::GetNativeObject`). +/// +/// Whichever guest thread happens to be the first to touch a given +/// dispatcher pointer synthesizes the wrapper, but **which** thread wins +/// is timing-dependent and differs between canary and ours (and between +/// runs of the same engine). The regular per-thread `semantic_id` +/// recipe — keyed on `(create_site_pc, creating_tid, tid_event_idx)` — +/// therefore produces different SIDs in each engine for the same logical +/// object. +/// +/// This helper keys on `(SHARED_GLOBAL_SID_MARKER, 0, pointer, object_type)` +/// so the SID depends only on the object's identity, not on the +/// scheduling order. Subsequent `wait.begin` events that reference the +/// dispatcher resolve a stable cross-engine SID, and the diff tool can +/// use SID equality to cross-tid match the floating `handle.create` +/// event. +/// +/// Per the schema-v1 SID API, the inputs are still fed to the existing +/// `semantic_id()` FNV-1a function unchanged — we just choose inputs +/// that are scheduling-invariant. No new wire format. +pub fn semantic_id_shared_global(pointer: u32, object_type: u32) -> u64 { + semantic_id( + SHARED_GLOBAL_SID_MARKER, + 0, + pointer as u64, + object_type, + ) +} + +fn write_line(line: &str) { + let Some(sink) = SINK.get() else { return }; + let mut g = sink.lock().unwrap(); + let _ = g.write_all(line.as_bytes()); + let _ = g.write_all(b"\n"); + let _ = g.flush(); +} + +fn json_escape(s: &str) -> String { + let mut out = String::with_capacity(s.len() + 2); + for c in s.chars() { + match c { + '\\' => out.push_str("\\\\"), + '"' => out.push_str("\\\""), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + c if (c as u32) < 0x20 => { + out.push_str(&format!("\\u{:04x}", c as u32)); + } + c => out.push(c), + } + } + out +} + +#[inline] +fn common_prefix( + kind: &str, + tid: u32, + idx: u64, + guest_cycle: u64, + deterministic: bool, +) -> String { + let host_ns = host_ns_since_start(); + let det = if deterministic { "true" } else { "false" }; + format!( + r#"{{"schema_version":1,"engine":"ours","kind":"{kind}","tid":{tid},"tid_event_idx":{idx},"guest_cycle":{guest_cycle},"host_ns":{host_ns},"deterministic":{det}"# + ) +} + +pub fn emit_import_call(tid: u32, guest_cycle: u64, module: &str, ord: u16, name: &str) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let mut line = common_prefix("import.call", tid, idx, guest_cycle, true); + line.push_str(&format!( + r#","payload":{{"module":"{}","ord":{},"name":"{}"}}}}"#, + json_escape(module), + ord, + json_escape(name) + )); + write_line(&line); +} + +pub fn emit_kernel_call(tid: u32, guest_cycle: u64, name: &str) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let mut line = common_prefix("kernel.call", tid, idx, guest_cycle, true); + line.push_str(&format!( + r#","payload":{{"name":"{}","args":{{}},"args_resolved":{{}}}}}}"#, + json_escape(name) + )); + write_line(&line); +} + +/// Phase C+10 schema-v1 extension: emit a `kernel.call` event whose +/// `args_resolved` field carries a best-effort dereferenced path string. +/// +/// Schema-v1 already allows `args_resolved` to be a free-form object +/// (see schema-v1.md kernel.call payload), so this remains v1-compatible. +/// Cvar-gated default-off via `is_enabled()`. When the path is empty or +/// resolution failed, the caller should pass `None` and we degrade to the +/// existing empty-object form so emitter output is byte-identical to the +/// pre-extension behavior. +/// +/// Determinism: the resolved path is read directly out of guest memory +/// (OBJECT_ATTRIBUTES → ANSI_STRING → bytes). It is fully deterministic +/// across runs of the same input. The event-level `deterministic:true` +/// flag is preserved. +pub fn emit_kernel_call_with_path( + tid: u32, + guest_cycle: u64, + name: &str, + path: Option<&str>, +) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let mut line = common_prefix("kernel.call", tid, idx, guest_cycle, true); + match path { + Some(p) if !p.is_empty() => { + line.push_str(&format!( + r#","payload":{{"name":"{}","args":{{}},"args_resolved":{{"path":"{}"}}}}}}"#, + json_escape(name), + json_escape(p) + )); + } + _ => { + line.push_str(&format!( + r#","payload":{{"name":"{}","args":{{}},"args_resolved":{{}}}}}}"#, + json_escape(name) + )); + } + } + write_line(&line); +} + +pub fn emit_kernel_return(tid: u32, guest_cycle: u64, name: &str, return_value: u64) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let mut line = common_prefix("kernel.return", tid, idx, guest_cycle, true); + line.push_str(&format!( + r#","payload":{{"name":"{}","return_value":{},"status":"0x{:08x}","side_effects":[]}}}}"#, + json_escape(name), + return_value, + return_value as u32 + )); + write_line(&line); +} + +pub fn emit_handle_create( + tid: u32, + guest_cycle: u64, + semantic_id: u64, + object_type: u32, + raw_handle_id: u32, + object_name: Option<&str>, +) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let mut line = common_prefix("handle.create", tid, idx, guest_cycle, true); + let name_field = match object_name { + Some(n) => format!(r#""{}""#, json_escape(n)), + None => "null".to_string(), + }; + line.push_str(&format!( + r#","payload":{{"handle_semantic_id":"{:016x}","object_type":{},"object_name":{},"raw_handle_id":"0x{:08x}"}}}}"#, + semantic_id, object_type, name_field, raw_handle_id + )); + write_line(&line); +} + +pub fn emit_handle_destroy( + tid: u32, + guest_cycle: u64, + semantic_id: u64, + raw_handle_id: u32, + prior_refcount: u32, +) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let mut line = common_prefix("handle.destroy", tid, idx, guest_cycle, true); + line.push_str(&format!( + r#","payload":{{"handle_semantic_id":"{:016x}","raw_handle_id":"0x{:08x}","prior_refcount":{}}}}}"#, + semantic_id, raw_handle_id, prior_refcount + )); + write_line(&line); +} + +pub fn emit_thread_create( + parent_tid: u32, + guest_cycle: u64, + semantic_id: u64, + entry_pc: u32, + ctx_ptr: u32, + priority: u32, + affinity: u32, + stack_size: u32, + suspended: bool, +) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(parent_tid); + let mut line = common_prefix("thread.create", parent_tid, idx, guest_cycle, true); + line.push_str(&format!( + r#","payload":{{"handle_semantic_id":"{:016x}","parent_tid":{},"entry_pc":"0x{:08x}","ctx_ptr":"0x{:08x}","priority":{},"affinity":{},"stack_size":{},"suspended":{}}}}}"#, + semantic_id, + parent_tid, + entry_pc, + ctx_ptr, + priority, + affinity, + stack_size, + suspended + )); + write_line(&line); +} + +pub fn emit_thread_exit(tid: u32, guest_cycle: u64, exit_code: u32) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let mut line = common_prefix("thread.exit", tid, idx, guest_cycle, true); + line.push_str(&format!(r#","payload":{{"exit_code":{}}}}}"#, exit_code)); + write_line(&line); +} + +pub fn emit_wait_begin( + tid: u32, + guest_cycle: u64, + handles: &[u64], + timeout_ns: i64, + alertable: bool, + wait_all: bool, +) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let mut line = common_prefix("wait.begin", tid, idx, guest_cycle, true); + let mut ids = String::from("["); + for (i, h) in handles.iter().enumerate() { + if i > 0 { + ids.push(','); + } + ids.push_str(&format!(r#""{:016x}""#, h)); + } + ids.push(']'); + let wait_type = if wait_all { "all" } else { "any" }; + line.push_str(&format!( + r#","payload":{{"handles_semantic_ids":{},"timeout_ns":{},"alertable":{},"wait_type":"{}"}}}}"#, + ids, timeout_ns, alertable, wait_type + )); + write_line(&line); +} + +pub fn emit_wait_end( + tid: u32, + guest_cycle: u64, + status: u32, + woken_by: Option, +) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let mut line = common_prefix("wait.end", tid, idx, guest_cycle, false); + let woken = match woken_by { + Some(h) => format!(r#""{:016x}""#, h), + None => "null".to_string(), + }; + line.push_str(&format!( + r#","payload":{{"status":"0x{:08x}","woken_by_semantic_id":{},"wait_duration_cycles":0}}}}"#, + status, woken + )); + write_line(&line); +} + +// ===== Phase C+15-\u03b1 — Handle-semantic-ID registry ===== +// +// Maps raw handle id -> FNV-1a 64-bit semantic_id assigned at handle +// creation. Used by `handle.destroy`, `wait.begin`, and any future event +// that references a handle to emit a stable cross-engine identity. +// +// Lifetime: entries are inserted on `register_handle_semantic_id` and +// removed on `forget_handle_semantic_id` (handle destroy). The map is +// completely separate from the live KernelState object table — +// looking up a destroyed handle returns None and the caller emits 0. +static HANDLE_SEMANTIC_IDS: OnceLock>> = OnceLock::new(); + +fn handle_sid_map() -> &'static Mutex> { + HANDLE_SEMANTIC_IDS.get_or_init(|| Mutex::new(HashMap::new())) +} + +/// Record `(raw_handle_id -> semantic_id)` so subsequent destroy/wait +/// events can resolve the SID. No-op when event_log is disabled. +pub fn register_handle_semantic_id(raw_handle_id: u32, sid: u64) { + if !is_enabled() { + return; + } + let m = handle_sid_map(); + m.lock().unwrap().insert(raw_handle_id, sid); +} + +/// Look up the semantic_id previously registered for a raw handle. +/// Returns 0 if the handle was never registered (e.g. pre-init handles, +/// pseudo-handles, or already destroyed). +pub fn lookup_handle_semantic_id(raw_handle_id: u32) -> u64 { + let Some(map) = HANDLE_SEMANTIC_IDS.get() else { + return 0; + }; + *map.lock().unwrap().get(&raw_handle_id).unwrap_or(&0) +} + +/// Forget the semantic_id mapping for a destroyed handle. Returns the +/// previous mapping (0 if absent) so callers can emit `handle.destroy` +/// with the correct SID before the entry is dropped. +pub fn forget_handle_semantic_id(raw_handle_id: u32) -> u64 { + let Some(map) = HANDLE_SEMANTIC_IDS.get() else { + return 0; + }; + map.lock().unwrap().remove(&raw_handle_id).unwrap_or(0) +} + +/// Convenience wrapper used by both engines: at handle creation time, +/// peek the current tid_event_idx, compute the FNV-1a 64-bit semantic_id, +/// register it for the raw handle, and emit a `handle.create` event. +/// Returns the semantic_id so callers can stash it on object metadata +/// when needed (currently only used for the registry side-effect). +/// +/// `create_site_pc` is the guest LR at the kernel call that produced +/// the handle (or 0 if not available — both engines must use the same +/// value for the cross-engine SID to match). For v1.1 we pass 0 +/// universally, which preserves cross-engine identity since the SID +/// becomes `fnv1a(0, tid, idx, type)` and both engines emit the same +/// tuple in the same order. +pub fn emit_handle_create_auto( + tid: u32, + guest_cycle: u64, + create_site_pc: u32, + object_type: u32, + raw_handle_id: u32, + object_name: Option<&str>, +) -> u64 { + if !is_enabled() { + return 0; + } + let idx_at_creation = peek_tid_idx(tid); + let sid = semantic_id(create_site_pc, tid, idx_at_creation, object_type); + register_handle_semantic_id(raw_handle_id, sid); + emit_handle_create(tid, guest_cycle, sid, object_type, raw_handle_id, object_name); + sid +} + +/// Phase C+18: emit `handle.create` for a **process-global** kernel +/// dispatcher (canary `XObject::GetNativeObject` / ours +/// `ensure_dispatcher_object` first-touch synthesis). The SID is +/// computed via `semantic_id_shared_global(pointer, object_type)` so +/// the same object yields the same SID in both engines regardless of +/// which guest thread happens to be the first toucher (see C+18 +/// memory entry / schema-v1.md §"Shared-global SIDs"). The diff tool +/// cross-tid matches `handle.create` events on shared-global SIDs. +/// +/// The `raw_handle_id` is the guest dispatcher pointer itself in +/// ours; canary's `XObject::StashHandle` round-trips through the same +/// dispatcher slot. Cross-engine SID identity is independent of raw +/// handle namespace. +pub fn emit_handle_create_shared_global( + tid: u32, + guest_cycle: u64, + object_type: u32, + raw_handle_id: u32, + object_name: Option<&str>, +) -> u64 { + if !is_enabled() { + return 0; + } + let sid = semantic_id_shared_global(raw_handle_id, object_type); + register_handle_semantic_id(raw_handle_id, sid); + emit_handle_create(tid, guest_cycle, sid, object_type, raw_handle_id, object_name); + sid +} + +/// Phase D Stage 3: emit a `contention.observed` event. Mirror of canary's +/// `phase_a::EmitContentionObserved` (Stage 1). Emitted from +/// `rtl_enter_critical_section` only when the contention-manifest forces a +/// park, so per-tid ordinals stay aligned with canary's emitter. The +/// `site_sid` is computed via `semantic_id_shared_global(cs_ptr, +/// object_type::CRITICAL_SECTION)` so both engines produce the same SID +/// for the same CS pointer (cross-engine identity). +/// +/// `is_enabled()` gates this just like every other emitter — when the +/// Phase A event log is disabled, this is a zero-cost no-op. +/// +/// Note: `contention.observed` is marked `ENGINE_LOCAL_KINDS` in +/// `diff_events.py` (Stage 4), so the diff tool advances the per-tid +/// pointer past these events on either side without comparison. That +/// keeps the matched-prefix definition unchanged across cvar +/// configurations. +pub fn emit_contention_observed( + tid: u32, + guest_cycle: u64, + cs_ptr: u32, + contended: bool, +) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let site_sid = semantic_id_shared_global(cs_ptr, object_type::CRITICAL_SECTION); + let mut line = common_prefix("contention.observed", tid, idx, guest_cycle, true); + line.push_str(&format!( + r#","payload":{{"cs_ptr":"0x{:08x}","site_sid":"{:016x}","contended":{}}}}}"#, + cs_ptr, + site_sid, + if contended { "true" } else { "false" } + )); + write_line(&line); +} + +/// Iterate 2.Q: emit a `signal.match` event recording which handle a +/// signal-class call (`NtSetEvent`/`KeSetEvent`/`NtReleaseSemaphore`/ +/// `KeReleaseSemaphore`) targeted at the moment the signal fired, along +/// with the set of guest threads currently parked on that handle. The +/// caller is expected to gather `waiter_tids` BEFORE the wake fans out, +/// so the emitted set reflects the pre-wake waiter list. +/// +/// `signal_call` is the kernel symbol (static `&str`). `target_handle` +/// is the resolved (post-pseudo-handle / post-dup-id) handle id; the +/// SID is resolved from the global registry (0 when absent — e.g. +/// pre-init handles or AUDIT-062 wrong-slot targets that were never +/// registered). `waiter_count` is the length of `waiter_tids` (passed +/// explicitly so callers may skip the emit when 0). This kind is +/// ENGINE_LOCAL in the diff tool — it consumes one per-tid idx slot on +/// the emitter side without alignment cost. +/// +/// Pure observability. No behavior change. Cvar-gated default-off via +/// `is_enabled()`; when the Phase A event log is disabled the call is +/// a single relaxed atomic-bool check. +pub fn emit_signal_match( + tid: u32, + guest_cycle: u64, + signal_call: &str, + target_handle: u32, + waiter_count: usize, + waiter_tids: &[u32], +) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(tid); + let target_sid = lookup_handle_semantic_id(target_handle); + let sid_field = if target_sid != 0 { + format!(r#""{:016x}""#, target_sid) + } else { + "null".to_string() + }; + let mut tids_field = String::from("["); + for (i, t) in waiter_tids.iter().enumerate() { + if i > 0 { + tids_field.push(','); + } + tids_field.push_str(&format!("{}", t)); + } + tids_field.push(']'); + let mut line = common_prefix("signal.match", tid, idx, guest_cycle, true); + line.push_str(&format!( + r#","payload":{{"signal_call":"{}","target_handle":"0x{:08x}","target_sid":{},"waiter_count":{},"waiter_tids":{}}}}}"#, + json_escape(signal_call), + target_handle, + sid_field, + waiter_count, + tids_field, + )); + write_line(&line); +} + +/// Iterate 2.T: emit a `wake.requested` event recording one waiter the +/// wake-loop in `wake_eligible_waiters` actually touched. Distinct from +/// `signal.match` (which records pre-wake intent at the call boundary): +/// `wake.requested` records the per-waiter transition outcome the kernel +/// wake primitive produced. Together they decisively distinguish: +/// C-2a (`signal.match` fires for waiter, but no `wake.requested` for +/// the same target tid) — kernel waiter list inconsistency, OR +/// C-2b (`wake.requested` fires with `transitioned=true` / +/// `new_state="Ready"`, but target tid never executes) — +/// scheduler-pick skip on Ready threads. +/// +/// `signaling_tid` is the tid of the thread currently executing inside the +/// signal call (e.g., NtReleaseSemaphore caller). `target_tid` is the +/// woken thread's guest tid. `target_handle` is the handle we're waking +/// on. `wait_kind` is one of `"WaitAny"`, `"WaitAll"`, `"WaitSingle"`, +/// `"Other"`. `transitioned` is true iff prior_state was Blocked and +/// post-state is Ready; `new_state` carries the post-call state string +/// (`"Ready"`, `"StillBlocked"`, `"AlreadyReady"`, `"Exited"`, `"Other"`). +/// `target_cpu` is the woken thread's hw_id, or `null` if unknown. +/// +/// ENGINE_LOCAL in the diff tool (see `ENGINE_LOCAL_KINDS` in +/// `tools/diff-events/diff_events.py`). Pure observability — no behavior +/// change. +#[allow(clippy::too_many_arguments)] +pub fn emit_wake_requested( + signaling_tid: u32, + guest_cycle: u64, + target_tid: u32, + target_handle: u32, + wait_kind: &str, + transitioned: bool, + new_state: &str, + target_cpu: Option, +) { + if !is_enabled() { + return; + } + let idx = next_tid_idx(signaling_tid); + let cpu_field = match target_cpu { + Some(c) => format!("{}", c), + None => "null".to_string(), + }; + let mut line = common_prefix("wake.requested", signaling_tid, idx, guest_cycle, true); + line.push_str(&format!( + r#","payload":{{"target_tid":{},"target_handle":"0x{:08x}","wait_kind":"{}","transitioned":{},"new_state":"{}","target_cpu":{}}}}}"#, + target_tid, + target_handle, + json_escape(wait_kind), + if transitioned { "true" } else { "false" }, + json_escape(new_state), + cpu_field, + )); + write_line(&line); +} + +/// Convenience wrapper used by both engines: emit a `handle.destroy` +/// event resolving the SID from the registry, and forget the mapping. +/// Pass `prior_refcount` as observed pre-decrement. +pub fn emit_handle_destroy_auto( + tid: u32, + guest_cycle: u64, + raw_handle_id: u32, + prior_refcount: u32, +) { + if !is_enabled() { + return; + } + let sid = forget_handle_semantic_id(raw_handle_id); + emit_handle_destroy(tid, guest_cycle, sid, raw_handle_id, prior_refcount); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn fnv1a_known_vector() { + // FNV-1a 64-bit of "foobar" = 0x85944171f73967e8 (standard test vector). + let bytes = b"foobar"; + let mut h: u64 = 0xCBF29CE484222325; + for b in bytes.iter() { + h ^= *b as u64; + h = h.wrapping_mul(0x100000001B3); + } + assert_eq!(h, 0x85944171f73967e8); + } + + #[test] + fn semantic_id_stable() { + // Identity inputs → known fixed FNV-1a output. Locks the algorithm + // so a regression here is caught at build-time. + let a = semantic_id(0x82001234, 1, 0, object_type::EVENT); + let b = semantic_id(0x82001234, 1, 0, object_type::EVENT); + assert_eq!(a, b); + // Distinct input → distinct output (with overwhelming probability). + let c = semantic_id(0x82001234, 1, 1, object_type::EVENT); + assert_ne!(a, c); + } + + /// Phase C+18: the shared-global SID must depend ONLY on + /// `(pointer, object_type)`, independent of the calling tid / event idx. + /// Two calls with the same pointer+type return the same SID; otherwise + /// the diff tool's cross-tid floating-create match cannot work. + #[test] + fn semantic_id_shared_global_is_scheduling_invariant() { + let a = semantic_id_shared_global(0x828a3230, object_type::SEMAPHORE); + let b = semantic_id_shared_global(0x828a3230, object_type::SEMAPHORE); + assert_eq!(a, b); + // Distinct pointer → distinct SID. + let c = semantic_id_shared_global(0x828a3234, object_type::SEMAPHORE); + assert_ne!(a, c); + // Distinct type at the same pointer → distinct SID (defends against + // games that map the same address with different headers — unlikely + // but the property is cheap to assert). + let d = semantic_id_shared_global(0x828a3230, object_type::EVENT); + assert_ne!(a, d); + } + + /// Phase C+18: the shared-global SID must NOT collide with regular + /// per-thread SIDs for plausible inputs. The marker constant + /// `0xC01AB005` sits well outside any guest PC range (PPC text lives + /// in 0x8200_0000-0x82FF_FFFF in Sylpheed; XEX header in + /// 0x3001_xxxx; heap in 0x4xxx_xxxx). Verify the marker is also not + /// a plausible tid/idx value. + #[test] + fn semantic_id_shared_global_marker_isolated() { + // A regular per-thread SID for a plausible call site / tid / idx. + let regular = semantic_id(0x82001234, 13, 42, object_type::SEMAPHORE); + // The shared-global SID for the same type but different inputs. + let global = semantic_id_shared_global(0x828a3230, object_type::SEMAPHORE); + assert_ne!(regular, global); + // Ensure marker constant is documented. + assert_eq!(SHARED_GLOBAL_SID_MARKER, 0xC01AB005); + } +} diff --git a/crates/xenia-kernel/src/exports.rs b/crates/xenia-kernel/src/exports.rs index a4dfa7d..7582fa0 100644 --- a/crates/xenia-kernel/src/exports.rs +++ b/crates/xenia-kernel/src/exports.rs @@ -16,7 +16,12 @@ pub fn register_exports(state: &mut KernelState) { // Debug state.register_export(Xboxkrnl, 0x01, "DbgBreakPoint", dbg_break_point); - state.register_export(Xboxkrnl, 0x03, "DbgPrint", dbg_print); + // Phase C+6½: `DbgPrint` (ord 0x03) is table-entry-only in canary + // (`xboxkrnl_table.inc:17`, no `DECLARE_XBOXKRNL_EXPORT(DbgPrint)`). + // Canary routes through the syscall thunk, which emits NO Phase A + // events. Mirror that — body still logs the string (harmless side + // effect) but the Phase A emitter stays silent. + state.register_unimplemented_export(Xboxkrnl, 0x03, "DbgPrint", dbg_print); // ExCreateThread and friends state.register_export(Xboxkrnl, 0x0D, "ExCreateThread", ex_create_thread); @@ -28,7 +33,17 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x28, "HalReturnToFirmware", hal_return_to_firmware); // I/O - state.register_export(Xboxkrnl, 0x3C, "IoDismountVolumeByFileHandle", stub_success); + // Phase C+6: `IoDismountVolumeByFileHandle` has a table entry in + // canary's `xboxkrnl_table.inc:74` but NO `DECLARE_XBOXKRNL_EXPORT` + // shim, so canary routes calls through the syscall thunk + // (`xex_module.cc:1310-1335`) which emits NO Phase A events. + // Mirror that by registering as unimplemented — ours still runs + // `stub_success` for guest-visible semantics, but the Phase A + // emitter stays silent. Before this fix, ours's tid=1 main chain + // injected 3 spurious events (`import.call`/`kernel.call`/ + // `kernel.return`) at idx=102132 ahead of `NtClose`, becoming the + // first divergence vs canary which jumps straight to `NtClose`. + state.register_unimplemented_export(Xboxkrnl, 0x3C, "IoDismountVolumeByFileHandle", stub_success); // Ke* Threading/Sync state.register_export(Xboxkrnl, 0x4D, "KeAcquireSpinLockAtRaisedIrql", stub_return_zero); @@ -44,16 +59,36 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x7D, "KeLeaveCriticalRegion", stub_success); state.register_export(Xboxkrnl, 0x7F, "KePulseEvent", ke_pulse_event); state.register_export(Xboxkrnl, 0x81, "KeQueryBasePriorityThread", ke_query_base_priority_thread); - state.register_export(Xboxkrnl, 0x82, "KeQueryIdealProcessor", ke_query_ideal_processor); + // Phase C+6½ hallucination fix: ord 0x82 = `KeQueryInterruptTime` + // per canary's `xboxkrnl_table.inc:130`. Canary DECLAREs this export + // (`xboxkrnl_misc.cc:127`) — both engines emit Phase A events. + // Previously mis-labeled `KeQueryIdealProcessor` in ours; the body + // returned a wrong value (processor index instead of interrupt-time + // counter). Fixed body returns a synthetic monotonic u64. + state.register_export(Xboxkrnl, 0x82, "KeQueryInterruptTime", ke_query_interrupt_time); state.register_export(Xboxkrnl, 0x83, "KeQueryPerformanceFrequency", ke_query_performance_frequency); - state.register_export(Xboxkrnl, 0x84, "KeQuerySystemTime", ke_query_system_time); - state.register_export(Xboxkrnl, 0x85, "KeRaiseIrqlToDpcLevel", stub_return_zero); + // Canary declares `void KeQuerySystemTime_entry(lpqword_t time_ptr, ...)` + // (xboxkrnl_threading.cc:459); the time is delivered via the OUT + // pointer, not via gpr[3]. Phase A's `kernel.return.return_value` + // must be 0 (canary literal) — not r3 (which for ours is the input + // arg `time_ptr` left untouched). See `register_void_export` doc in + // state.rs. + state.register_void_export(Xboxkrnl, 0x84, "KeQuerySystemTime", ke_query_system_time); + state.register_export(Xboxkrnl, 0x85, "KeRaiseIrqlToDpcLevel", ke_raise_irql_to_dpc_level); state.register_export(Xboxkrnl, 0x88, "KeReleaseSemaphore", ke_release_semaphore); state.register_export(Xboxkrnl, 0x89, "KeReleaseSpinLockFromRaisedIrql", ke_release_spinlock_from_raised_irql); state.register_export(Xboxkrnl, 0x8F, "KeResetEvent", ke_reset_event); state.register_export(Xboxkrnl, 0x92, "KeResumeThread", ke_resume_thread); state.register_export(Xboxkrnl, 0x97, "KeSetAffinityThread", ke_set_affinity_thread); - state.register_export(Xboxkrnl, 0x98, "KeSetIdealProcessor", ke_set_ideal_processor); + // Phase C+6½ hallucination fix: ord 0x98 = `KeSetBackgroundProcessors` + // per canary's `xboxkrnl_table.inc:166`. Table-entry-only (no + // `DECLARE_XBOXKRNL_EXPORT` shim), so canary routes via the syscall + // thunk and emits NO Phase A events. Previously mis-labeled + // `KeSetIdealProcessor` in ours; the body wrote + // `GuestThread::ideal_processor` — wrong state mutation under the + // wrong name. Replaced with `stub_success` and registered as + // unimplemented to mirror canary's silence. + state.register_unimplemented_export(Xboxkrnl, 0x98, "KeSetBackgroundProcessors", stub_success); state.register_export(Xboxkrnl, 0x99, "KeSetBasePriorityThread", ke_set_base_priority_thread); state.register_export(Xboxkrnl, 0x9B, "KeSetCurrentStackPointers", stub_success); state.register_export(Xboxkrnl, 0x9D, "KeSetEvent", ke_set_event); @@ -61,7 +96,7 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0xAF, "KeWaitForMultipleObjects", ke_wait_for_multiple_objects); state.register_export(Xboxkrnl, 0xB0, "KeWaitForSingleObject", ke_wait_for_single_object); state.register_export(Xboxkrnl, 0xB1, "KfAcquireSpinLock", kf_acquire_spin_lock); - state.register_export(Xboxkrnl, 0xB3, "KfLowerIrql", stub_success); + state.register_void_export(Xboxkrnl, 0xB3, "KfLowerIrql", kf_lower_irql); state.register_export(Xboxkrnl, 0xB4, "KfReleaseSpinLock", kf_release_spin_lock); state.register_export(Xboxkrnl, 0x0152, "KeTlsAlloc", ke_tls_alloc); state.register_export(Xboxkrnl, 0x0153, "KeTlsFree", stub_success); @@ -126,13 +161,16 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x0110, "ObReferenceObjectByHandle", ob_reference_object_by_handle); // RTL - state.register_export(Xboxkrnl, 0x0119, "RtlCaptureContext", rtl_capture_context); + // Phase C+6½: `RtlCaptureContext` (ord 0x119) is table-entry-only + // in canary — no `DECLARE_XBOXKRNL_EXPORT(RtlCaptureContext)`. + // Mirror canary's silence so the Phase A emitter doesn't drift. + state.register_unimplemented_export(Xboxkrnl, 0x0119, "RtlCaptureContext", rtl_capture_context); state.register_export(Xboxkrnl, 0x011B, "RtlCompareMemoryUlong", rtl_compare_memory_ulong); state.register_export(Xboxkrnl, 0x0125, "RtlEnterCriticalSection", rtl_enter_critical_section); state.register_export(Xboxkrnl, 0x0126, "RtlFillMemoryUlong", rtl_fill_memory_ulong); state.register_export(Xboxkrnl, 0x0127, "RtlFreeAnsiString", stub_success); state.register_export(Xboxkrnl, 0x012B, "RtlImageXexHeaderField", rtl_image_xex_header_field); - state.register_export(Xboxkrnl, 0x012C, "RtlInitAnsiString", rtl_init_ansi_string); + state.register_void_export(Xboxkrnl, 0x012C, "RtlInitAnsiString", rtl_init_ansi_string); state.register_export(Xboxkrnl, 0x012D, "RtlInitUnicodeString", rtl_init_unicode_string); state.register_export(Xboxkrnl, 0x012E, "RtlInitializeCriticalSection", rtl_initialize_critical_section); state.register_export(Xboxkrnl, 0x012F, "RtlInitializeCriticalSectionAndSpinCount", rtl_initialize_critical_section); @@ -140,18 +178,27 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x0133, "RtlMultiByteToUnicodeN", rtl_multi_byte_to_unicode_n); state.register_export(Xboxkrnl, 0x0135, "RtlNtStatusToDosError", rtl_nt_status_to_dos_error); state.register_export(Xboxkrnl, 0x0136, "RtlRaiseException", rtl_raise_exception); - state.register_export(Xboxkrnl, 0x013B, "sprintf", stub_sprintf); + // Phase C+6½: `sprintf` (ord 0x13B) is table-entry-only in canary + // — no `DECLARE_XBOXKRNL_EXPORT(sprintf)`. Mirror canary's silence. + state.register_unimplemented_export(Xboxkrnl, 0x013B, "sprintf", stub_sprintf); state.register_export(Xboxkrnl, 0x013F, "RtlTimeFieldsToTime", stub_success); state.register_export(Xboxkrnl, 0x0140, "RtlTimeToTimeFields", stub_success); state.register_export(Xboxkrnl, 0x0141, "RtlTryEnterCriticalSection", rtl_try_enter_critical_section); state.register_export(Xboxkrnl, 0x0142, "RtlUnicodeStringToAnsiString", stub_success); state.register_export(Xboxkrnl, 0x0143, "RtlUnicodeToMultiByteN", stub_success); - state.register_export(Xboxkrnl, 0x0147, "RtlUnwind", rtl_unwind); - state.register_export(Xboxkrnl, 0x014D, "_vsnprintf", stub_vsnprintf); + // Phase C+6½: `RtlUnwind` (ord 0x147) is table-entry-only in canary + // — no `DECLARE_XBOXKRNL_EXPORT(RtlUnwind)`. Mirror canary's silence. + state.register_unimplemented_export(Xboxkrnl, 0x0147, "RtlUnwind", rtl_unwind); + // Phase C+6½: `_vsnprintf` (ord 0x14D) is table-entry-only in + // canary — no `DECLARE_XBOXKRNL_EXPORT(_vsnprintf)`. Mirror silence. + state.register_unimplemented_export(Xboxkrnl, 0x014D, "_vsnprintf", stub_vsnprintf); // Stfs - state.register_export(Xboxkrnl, 0x0259, "StfsCreateDevice", stub_success); - state.register_export(Xboxkrnl, 0x025A, "StfsControlDevice", stub_success); + // Phase C+6½: `StfsCreateDevice` (ord 0x259) and `StfsControlDevice` + // (ord 0x25A) are table-entry-only in canary. `StfsCreateDevice` is + // the C+6-noted driver of tid=7→tid=2 divergence at idx=15. + state.register_unimplemented_export(Xboxkrnl, 0x0259, "StfsCreateDevice", stub_success); + state.register_unimplemented_export(Xboxkrnl, 0x025A, "StfsControlDevice", stub_success); // Video state.register_export(Xboxkrnl, 0x01B1, "VdCallGraphicsNotificationRoutines", stub_success); @@ -160,12 +207,12 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x01B9, "VdGetCurrentDisplayGamma", vd_get_current_display_gamma); state.register_export(Xboxkrnl, 0x01BA, "VdGetCurrentDisplayInformation", stub_success); state.register_export(Xboxkrnl, 0x01BD, "VdGetSystemCommandBuffer", vd_get_system_command_buffer); - state.register_export(Xboxkrnl, 0x01C2, "VdInitializeEngines", stub_success); + state.register_export(Xboxkrnl, 0x01C2, "VdInitializeEngines", stub_return_one); state.register_export(Xboxkrnl, 0x01C3, "VdInitializeRingBuffer", vd_initialize_ring_buffer); state.register_export(Xboxkrnl, 0x01C5, "VdInitializeScalerCommandBuffer", stub_success); state.register_export(Xboxkrnl, 0x01C6, "VdIsHSIOTrainingSucceeded", vd_is_hsio_training_succeeded); state.register_export(Xboxkrnl, 0x01C7, "VdPersistDisplay", stub_success); - state.register_export(Xboxkrnl, 0x01C9, "VdQueryVideoFlags", stub_return_zero); + state.register_export(Xboxkrnl, 0x01C9, "VdQueryVideoFlags", vd_query_video_flags); state.register_export(Xboxkrnl, 0x01CA, "VdQueryVideoMode", vd_query_video_mode); state.register_export(Xboxkrnl, 0x0269, "VdRetrainEDRAM", stub_success); state.register_export(Xboxkrnl, 0x026A, "VdRetrainEDRAMWorker", stub_success); @@ -185,9 +232,11 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x0226, "XMAReleaseContext", stub_success); // Crypto - state.register_export(Xboxkrnl, 0x0192, "XeCryptSha", stub_success); - state.register_export(Xboxkrnl, 0x0256, "XeKeysConsolePrivateKeySign", stub_success); - state.register_export(Xboxkrnl, 0x0257, "XeKeysConsoleSignatureVerification", stub_success); + state.register_void_export(Xboxkrnl, 0x0192, "XeCryptSha", xe_crypt_sha); + state.register_export(Xboxkrnl, 0x0256, "XeKeysConsolePrivateKeySign", xe_keys_console_private_key_sign); + // Phase C+6½: `XeKeysConsoleSignatureVerification` (ord 0x257) is + // table-entry-only in canary. Mirror silence. + state.register_unimplemented_export(Xboxkrnl, 0x0257, "XeKeysConsoleSignatureVerification", stub_success); // Xex module state.register_export(Xboxkrnl, 0x0194, "XexCheckExecutablePrivilege", xex_check_executable_privilege); @@ -195,7 +244,9 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xboxkrnl, 0x0197, "XexGetProcedureAddress", xex_get_procedure_address); // Exception handling - state.register_export(Xboxkrnl, 0x01A5, "__C_specific_handler", c_specific_handler); + // Phase C+6½: `__C_specific_handler` (ord 0x1A5) is table-entry-only + // in canary. Mirror silence. + state.register_unimplemented_export(Xboxkrnl, 0x01A5, "__C_specific_handler", c_specific_handler); } // ===== Generic stubs ===== @@ -208,6 +259,16 @@ fn stub_return_zero(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut Kerne ctx.gpr[3] = 0; } +/// Phase W: a literal `return 1`. Matches canary's +/// `VdInitializeEngines_entry` in `xboxkrnl_video.cc:271-279` which +/// returns `1` (truthy success token) rather than STATUS_SUCCESS=0. +/// Sylpheed-side guest code branches on this non-zero, so returning +/// 0 made the game skip the VdInitializeRingBuffer-and-after init +/// sequence and never set up the post-init render-target state. +fn stub_return_one(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { + ctx.gpr[3] = 1; +} + // ===== Debug ===== fn dbg_break_point(_ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { @@ -280,6 +341,16 @@ fn ex_create_thread(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelS if let Some(KernelObject::Thread { hw_id: slot, .. }) = state.objects.get_mut(&handle) { *slot = Some(hw_id); } + // Phase C+16: install the "thread owns itself until exited" + // self-reference. Mirrors canary's `XThread::Create` line 414 + // `RetainHandle()`. Released by `ex_terminate_thread` and the + // main-loop LR-sentinel implicit-exit path. Without this, a + // subsequent NtClose on the thread handle (e.g. via + // `XamTaskCloseHandle`) drops the only ref and prematurely + // destroys the thread handle while the spawned thread is + // still live — the original C+16 divergence at Phase A + // idx=102168 on the main chain (canary tid=6 ↔ ours tid=1). + state.retain_handle(handle); if handle_ptr != 0 { mem.write_u32(handle_ptr, handle); } @@ -296,6 +367,33 @@ fn ex_create_thread(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelS create_suspended, affinity, ); + // Phase C+15-α: schema-v1 `thread.create` event emitted by + // the **parent** thread at the kernel call that created the + // new guest thread. The handle.create for the thread-handle + // itself was already emitted inside `alloc_handle_for` + // above; here we surface the spawn-specific metadata + // (entry_pc, ctx_ptr, priority, affinity, stack, suspended). + // Canary's symmetric emit is at `XThread::Create` after + // CreationParameters are populated. + if crate::event_log::is_enabled() { + let (parent_tid, cycle) = { + let r = state.scheduler.current_ref(); + let t = state.scheduler.thread(r); + (t.tid, t.ctx.timebase) + }; + let sid = crate::event_log::lookup_handle_semantic_id(handle); + crate::event_log::emit_thread_create( + parent_tid, + cycle, + sid, + start_address, + start_context, + /* priority */ 0, + affinity, + stack_size, + create_suspended, + ); + } ctx.gpr[3] = STATUS_SUCCESS; } Err(_) => { @@ -306,11 +404,382 @@ fn ex_create_thread(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelS } } +// ========================================================================= +// review-a Step 1 — `--force-spawn-workers` crowbar +// +// Diagnostic-only, default-OFF. Synthesizes the 4 `sub_825070F0` worker +// spawns that canary tid=6 emits at host_ns ≈ 10.383 s but ours never +// reaches (the AUDIT-049 wedge cycle). See +// `xenia-rs/audit-runs/review-a-step1-crowbar/investigation.md`. +// +// **This is NOT a fix** — the natural-activation path remains broken. +// Use the cvar `XENIA_CROWBAR_WORKERS=1` to enable. The crowbar fires +// once when `KernelState::crowbar_workers_fired` flips from false to +// true (handled by the per-round trigger in `xenia-app/src/main.rs`). +// ========================================================================= +const CROWBAR_WORKER_ENTRIES: [u32; 4] = [0x82506528, 0x82506558, 0x82506588, 0x825065B8]; +const CROWBAR_VTABLE_BASE: u32 = 0x8200_A1E8; +const CROWBAR_STACK_SIZE: u32 = 65_536; + +/// Drop-in host-side spawn for one of the four `sub_825070F0` workers. +/// Returns the new thread's handle on success, or `None` if either the +/// thread-image allocation or scheduler spawn failed. The thread is +/// created **suspended** to mirror canary's parameters; the caller is +/// expected to resume via `nt_resume_thread`-equivalent or directly via +/// `scheduler.resume_ref` once all 4 workers exist. +fn crowbar_spawn_one_worker( + state: &mut KernelState, + mem: &GuestMemory, + entry: u32, + ctx_ptr: u32, +) -> Option { + let image = allocate_thread_image(state, mem, CROWBAR_STACK_SIZE, 0)?; + use std::sync::atomic::Ordering; + let tid = state.next_thread_id.fetch_add(1, Ordering::Relaxed); + let handle = state.alloc_handle_for(KernelObject::Thread { + id: tid, + hw_id: None, + exit_code: None, + waiters: Vec::new(), + }); + let tls_slot_count = state.next_tls_index.load(Ordering::Relaxed); + let params = SpawnParams { + entry, + start_context: ctx_ptr, + stack_base: image.stack_base, + stack_size: image.stack_size, + pcr_base: image.pcr_base, + tls_base: image.tls_base, + thread_handle: handle, + guest_tid: tid, + create_suspended: true, + is_initial: false, + tls_slot_count, + affinity_mask: 0, + priority: 0, + ideal_processor: None, + }; + match state.scheduler.spawn(params, &mut GuestMemoryPcr(mem)) { + Ok(hw_id) => { + metrics::counter!("scheduler.spawn.ok").increment(1); + if let Some(KernelObject::Thread { hw_id: slot, .. }) = + state.objects.get_mut(&handle) + { + *slot = Some(hw_id); + } + state.retain_handle(handle); + tracing::warn!( + "CROWBAR: spawn worker tid={} handle={:#x} hw={} entry={:#010x} ctx={:#010x}", + tid, + handle, + hw_id, + entry, + ctx_ptr, + ); + Some(handle) + } + Err(_) => { + metrics::counter!("scheduler.spawn.rejected").increment(1); + tracing::error!( + "CROWBAR: no free HW slot for worker entry={:#010x}", + entry + ); + None + } + } +} + +/// Crowbar v2 Step 0 — dump `len` bytes of guest memory starting at `addr` +/// to the tracing log, plus parse the first few u32 slots as fn-pointer +/// candidates. Read-only. Used to verify whether the `0x8200A1E8` vtable +/// region is populated in ours's cold-boot state. +fn crowbar_dump_vtable_region(mem: &GuestMemory, addr: u32, len: u32) { + let mut buf = vec![0u8; len as usize]; + mem.read_bytes(addr, &mut buf); + // Summary stats first. + let nonzero = buf.iter().filter(|b| **b != 0).count(); + tracing::warn!( + "CROWBAR-DIAG: vtable region @{:#010x}..+{} — nonzero bytes={}/{}", + addr, + len, + nonzero, + len, + ); + // Dump as u32 big-endian slots (vtable[i]) for the first 64 slots. + let max_slots = (len as usize) / 4; + let mut nonzero_slots = 0u32; + for i in 0..max_slots { + let v = mem.read_u32(addr + (i as u32) * 4); + if v != 0 { + nonzero_slots += 1; + } + // Only log the first 48 slots fully; that covers offset 140..152 (slots 35-38). + if i < 48 { + tracing::warn!( + "CROWBAR-DIAG: slot[{:>2}] @ +{:>3} (={:#010x}) = {:#010x}", + i, + i * 4, + addr + (i as u32) * 4, + v, + ); + } + } + tracing::warn!( + "CROWBAR-DIAG: nonzero u32 slots in first {}={}; worker stub reads slots 35-38 (offsets 140/144/148/152)", + max_slots, + nonzero_slots, + ); +} + +/// Crowbar v2 Step 2 — optionally install vtable contents at `vtable_base` +/// from a binary file specified by `XENIA_CROWBAR_VTABLE_BIN`. Bytes are +/// written verbatim (no byte-swap) via `write_u8` because the file is +/// expected to be a raw guest-endian (big-endian) dump captured from +/// canary's runtime memory at the same VA. Logs a verification re-read of +/// slot 35 (offset 140) after writing. If the env var is unset, this is a +/// no-op so v1 behaviour is preserved exactly. +fn crowbar_maybe_install_vtable_from_file(mem: &GuestMemory, vtable_base: u32) { + let path = match std::env::var("XENIA_CROWBAR_VTABLE_BIN") { + Ok(p) if !p.is_empty() => p, + _ => { + tracing::warn!( + "CROWBAR: XENIA_CROWBAR_VTABLE_BIN not set — skipping vtable install \ + (v1 behaviour; workers will likely fault if vtable[35] is null)" + ); + return; + } + }; + let bytes = match std::fs::read(&path) { + Ok(b) => b, + Err(e) => { + tracing::error!( + "CROWBAR: failed to read vtable bin {:?}: {} — skipping install", + path, + e, + ); + return; + } + }; + let n = bytes.len().min(256); + for (i, b) in bytes.iter().take(n).enumerate() { + mem.write_u8(vtable_base + i as u32, *b); + } + tracing::warn!( + "CROWBAR: installed {} bytes at vtable {:#010x} from {:?}", + n, + vtable_base, + path, + ); + // Verify by re-reading the slot the worker stub actually dispatches through. + let slot35 = mem.read_u32(vtable_base + 140); + let slot36 = mem.read_u32(vtable_base + 144); + let slot37 = mem.read_u32(vtable_base + 148); + let slot38 = mem.read_u32(vtable_base + 152); + tracing::warn!( + "CROWBAR: post-install verify — vtable[35]={:#010x} vtable[36]={:#010x} \ + vtable[37]={:#010x} vtable[38]={:#010x}", + slot35, + slot36, + slot37, + slot38, + ); +} + +/// Crowbar v3 Step 2 — optionally install full ctx bytes at `ctx_ptr` +/// from a binary file specified by `XENIA_CROWBAR_CTX_BIN`. Bytes are +/// written verbatim (no byte-swap) via `write_u8` because the file is +/// expected to be a raw guest-endian (big-endian) capture of the ctx +/// layout from canary's runtime memory. Logs the first 16 u32 slots +/// after install for verification. If the env var is unset, this is a +/// no-op so v2 behaviour is preserved exactly. +/// +/// Captured via canary's `audit_68_host_mem_read_probe` cvar — see +/// `xenia-rs/audit-runs/review-a-step1c-crowbar-v3/canary-probe-run1.log`. +/// +/// **Option γ (per v3 brief)**: install verbatim, including canary-VA +/// pointer fields like `[ctx+44]=0xBCE25640`. These VAs may be unmapped +/// in ours's address space — if a worker dereferences one and faults, +/// that confirms the case-(C) recursion is required (v4 work). +fn crowbar_maybe_install_ctx_from_file(mem: &GuestMemory, ctx_ptr: u32) { + let path = match std::env::var("XENIA_CROWBAR_CTX_BIN") { + Ok(p) if !p.is_empty() => p, + _ => { + tracing::warn!( + "CROWBAR: XENIA_CROWBAR_CTX_BIN not set — skipping ctx install \ + (v2 behaviour; only +0/+4/+8/+12 are populated; \ + workers will likely fault on [ctx+44] dispatch)" + ); + return; + } + }; + let bytes = match std::fs::read(&path) { + Ok(b) => b, + Err(e) => { + tracing::error!( + "CROWBAR: failed to read ctx bin {:?}: {} — skipping install", + path, + e, + ); + return; + } + }; + let n = bytes.len().min(256); + for (i, b) in bytes.iter().take(n).enumerate() { + mem.write_u8(ctx_ptr + i as u32, *b); + } + tracing::warn!( + "CROWBAR: installed {} bytes at ctx_ptr={:#010x} from {:?}", + n, + ctx_ptr, + path, + ); + // Verify: log the first 16 u32 slots after install. + for slot in 0..16u32 { + let off = slot * 4; + let v = mem.read_u32(ctx_ptr + off); + tracing::warn!( + "CROWBAR: post-ctx-install ctx[+{:>3}] (={:#010x}) = {:#010x}", + off, + ctx_ptr + off, + v, + ); + } +} + +/// Crowbar entry point — allocate the worker ctx, install the vtable +/// + self-pointer doubly-linked-list head pattern that AUDIT-068 S3 +/// captured, spawn all 4 workers suspended, then resume each one. +/// Returns the number of workers successfully resumed (0..=4). +/// +/// **Reading-error #37 discipline**: the value written at `ctx+0` is the +/// vtable BASE `0x8200A1E8`, NOT the slot-N address `0x8200A208` cited +/// in older audits. Per AUDIT-068 S3 measurement. +pub fn crowbar_force_spawn_workers(state: &mut KernelState, mem: &GuestMemory) -> u32 { + // 0. Crowbar v2 Step 0 diagnostic — dump 256 bytes at the vtable base + // BEFORE doing anything else. Distinguishes case (A) vtable .rdata + // is missing/zero in ours vs case (B) .rdata present but vtable[35] + // is not statically populated (= runtime install needed). Per + // Reading-error #37: 0x8200A1E8 is vtable BASE; slot N is at base+4*N. + // For workers we care about slots 35/36/37/38 (offsets 140/144/148/152). + // Bump dump to 512 bytes (128 slots) so we see vtable[64] which is read + // by the slot-35 callee `sub_82506B08` at +256. + crowbar_dump_vtable_region(mem, CROWBAR_VTABLE_BASE, 512); + + // 1. Allocate ctx struct (one heap page is plenty; the real struct is + // much smaller but we never overlap because heap_alloc bumps + // page-aligned). + let ctx_ptr = match state.heap_alloc(0x1000, mem) { + Some(p) => p, + None => { + tracing::error!("CROWBAR: heap_alloc(ctx) failed — out of heap region"); + return 0; + } + }; + + // 2. Initialise ctx per AUDIT-068 S3 12-byte POD-copy signature plus + // refcount=1 at +0x0C. `write_u32` here goes through the GuestMemory + // BE-store path, so the on-guest u32 lanes are correctly byte-swapped. + mem.write_u32(ctx_ptr, CROWBAR_VTABLE_BASE); + mem.write_u32(ctx_ptr + 4, ctx_ptr); + mem.write_u32(ctx_ptr + 8, ctx_ptr); + mem.write_u32(ctx_ptr + 12, 1); + tracing::warn!( + "CROWBAR: ctx allocated at {:#010x}, vtable={:#010x}, self-links + refcount=1 installed", + ctx_ptr, + CROWBAR_VTABLE_BASE, + ); + + // 2b. Crowbar v2 Step 2 — vtable-contents install. + // If the cvar `XENIA_CROWBAR_VTABLE_BIN=` is set AND the file + // exists, read up to 256 bytes from it and write them at + // CROWBAR_VTABLE_BASE. Bytes in the file are expected to be the + // GUEST-endian (big-endian) raw vtable contents as captured from + // canary's runtime memory at the same VA. No byte-swap is performed — + // they are written via `write_u8` so they go onto the guest as-is. + // If no file is provided, we still proceed (so v1 behaviour is + // preserved exactly when the env var is unset). + crowbar_maybe_install_vtable_from_file(mem, CROWBAR_VTABLE_BASE); + + // 2c. Crowbar v3 Step 2 — full ctx-bytes install. + // If the cvar `XENIA_CROWBAR_CTX_BIN=` is set AND the file + // exists, read up to 256 bytes from it and write them at ctx_ptr. + // The file should be a raw guest-endian (big-endian) capture of the + // ctx layout — see canary's `audit_68_host_mem_read_probe` cvar. + // The v2 init at +0/+4/+8/+12 above is intentionally retained as a + // fallback when the env var is unset; the file install overwrites + // those four slots verbatim (the bytes match the v2 pattern). + // + // **Option γ (per v3 brief)**: canary-VA pointer fields like + // `[ctx+44]=0xBCE25640` are written as-is even if unmapped in + // ours — diagnostic intent is to OBSERVE the fault PC, not avoid + // it. + crowbar_maybe_install_ctx_from_file(mem, ctx_ptr); + + // 3. Spawn the 4 workers suspended (matching canary jitter sample). + let mut handles: [u32; 4] = [0; 4]; + let mut spawned = 0u32; + for (i, entry) in CROWBAR_WORKER_ENTRIES.iter().enumerate() { + if let Some(h) = crowbar_spawn_one_worker(state, mem, *entry, ctx_ptr) { + handles[i] = h; + spawned += 1; + } + } + + // 4. Resume each spawned worker directly through the scheduler. + // Mirrors the natural canary path which calls NtResumeThread soon + // after the create burst (not captured in the jsonl excerpt). + let mut resumed = 0u32; + for (i, h) in handles.iter().enumerate() { + if *h == 0 { + tracing::warn!("CROWBAR: skipping resume of handle[{}] (spawn failed)", i); + continue; + } + if let Some(r) = state.scheduler.find_by_handle(*h) { + let prev = state.scheduler.resume_ref(r); + tracing::warn!( + "CROWBAR: resumed handle[{}]={:#x} -> r=(hw={}, idx={}) prev_suspend_count={}", + i, + h, + r.hw_id, + r.idx, + prev, + ); + resumed += 1; + } else { + tracing::warn!( + "CROWBAR: find_by_handle({:#x}) returned None for handle[{}]", + h, + i, + ); + } + } + tracing::warn!( + "CROWBAR: fired — ctx={:#010x} spawned={}/4 resumed={}/4", + ctx_ptr, + spawned, + resumed, + ); + resumed +} + /// `ExTerminateThread(exit_code)` — terminates the current guest thread. The /// thread transitions to Exited and the main loop unschedules it. Joiners /// waiting on the thread handle are woken with STATUS_SUCCESS. fn ex_terminate_thread(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { let exit_code = ctx.gpr[3] as u32; + // Phase C+15-α: schema-v1 `thread.exit` event. Must emit BEFORE the + // scheduler unwinds the current thread, because `tid_event_idx` is + // per-tid and the exiting thread's counter is what gets the event. + // Canary symmetric emit at `XThread::Execute` exit (xthread.cc:540 + // ff., after `kernel_state()->processor()->Execute` returns). + if crate::event_log::is_enabled() { + let (tid, cycle) = { + let r = state.scheduler.current_ref(); + let t = state.scheduler.thread(r); + (t.tid, t.ctx.timebase) + }; + crate::event_log::emit_thread_exit(tid, cycle, exit_code); + } let (hw_id, tid, handle_opt) = state.scheduler.exit_current(exit_code); tracing::info!( "ExTerminateThread: tid={:?} hw={} exit_code={}", @@ -318,8 +787,8 @@ fn ex_terminate_thread(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut Ker hw_id, exit_code ); - if let Some(handle) = handle_opt - && let Some(KernelObject::Thread { + if let Some(handle) = handle_opt { + if let Some(KernelObject::Thread { exit_code: ec, waiters, .. @@ -331,6 +800,16 @@ fn ex_terminate_thread(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut Ker state.scheduler.wake_ref(w); } } + // Phase C+16: release the thread's self-reference installed at + // spawn time (`ex_create_thread` / `xam_task_schedule` via + // `state.retain_handle`). Mirrors canary's `XThread::Exit` + // `ReleaseHandle()` at xthread.cc:524. After this release, the + // refcount equals only the user-visible refs (1 if guest hasn't + // closed the handle, 0 if guest already called NtClose during + // the thread's lifetime — in which case the handle is destroyed + // here, emitting `handle.destroy`). + state.release_handle(handle); + } tracing::debug!("ExTerminateThread: exit_status={:#x}", ctx.gpr[3]); ctx.gpr[3] = 0; } @@ -375,38 +854,51 @@ fn ke_query_base_priority_thread( ctx.gpr[3] = pri as u32 as u64; } -/// `KeSetIdealProcessor(thread_handle, proc_number) -> u8 old_ideal` — -/// Axis 5. Stores the hint on the `GuestThread` for future spawn-sibling -/// placement; does NOT migrate a live thread (use `KeSetAffinityThread` -/// for that). -fn ke_set_ideal_processor( +/// Phase C+6½ hallucination fix: ord 0x82 maps to `KeQueryInterruptTime` +/// in canary's `xboxkrnl_table.inc:130`, with a `DECLARE_XBOXKRNL_EXPORT` +/// shim in `xboxkrnl_misc.cc:119-127`. Ours previously mis-labeled this +/// ord as `KeQueryIdealProcessor` (a real NT function, but at a different +/// position on Xbox 360 — not at 0x82). The hallucinated body returned +/// the calling thread's `ideal_processor` byte; guests calling +/// `KeQueryInterruptTime` to read the system interrupt-time counter were +/// receiving a 1-byte processor index instead. +/// +/// Canary returns `bundle->interrupt_time` (u64) — the monotonic system +/// interrupt-time counter maintained by the kernel timer ISR. Ours has +/// no `X_TIME_STAMP_BUNDLE` infrastructure, so we mirror the +/// `KeQuerySystemTime` approach: return a fixed synthetic value that +/// gives a plausible monotonic-looking u64. Determinism per `KernelState` +/// requires this be reproducible — a constant satisfies both. +fn ke_query_interrupt_time( ctx: &mut PpcContext, _mem: &GuestMemory, - state: &mut KernelState, + _state: &mut KernelState, ) { - let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); - let ideal = ctx.gpr[4] as u8; - let prev = state - .scheduler - .find_by_handle(handle) - .map(|r| state.scheduler.set_ideal_ref(r, ideal)) - .unwrap_or(0xFF); - ctx.gpr[3] = prev as u64; + // Synthetic interrupt-time count. Units are 100ns ticks since boot; + // value chosen large enough to look post-boot but small enough that + // any timer-arithmetic stays in u32 range when masked. Matches the + // determinism pattern used by `ke_query_system_time` above. + const FAKE_INTERRUPT_TIME: u64 = 0x0000_0001_0000_0000; + ctx.gpr[3] = FAKE_INTERRUPT_TIME; } -fn ke_query_ideal_processor( - ctx: &mut PpcContext, - _mem: &GuestMemory, - state: &mut KernelState, -) { - let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); - let ideal = state - .scheduler - .find_by_handle(handle) - .and_then(|r| state.scheduler.ideal_ref(r)) - .unwrap_or(0); - ctx.gpr[3] = ideal as u64; -} +/// Phase C+6½ hallucination fix: ord 0x98 maps to +/// `KeSetBackgroundProcessors` in canary's `xboxkrnl_table.inc:166`. +/// Canary has NO `DECLARE_XBOXKRNL_EXPORT` shim for this name — it's a +/// table-entry-only export, routed through the syscall thunk +/// (`xex_module.cc:1310-1335`) which is a no-op. Ours previously +/// mis-labeled this ord as `KeSetIdealProcessor` (a real NT function but +/// at a different position on Xbox 360) and the hallucinated body wrote +/// to `GuestThread::ideal_processor` — a state mutation under the wrong +/// semantic name. Guests calling `KeSetBackgroundProcessors` to mask off +/// CPUs for background work were instead pinning the thread's ideal +/// processor hint. +/// +/// Replaced with a no-op (`stub_success`) registered via +/// `register_unimplemented_export` so the Phase A emitter stays silent +/// (matching canary's syscall-thunk path). The underlying +/// `Scheduler::set_ideal_ref`/`ideal_ref` methods remain available for +/// `NtSetInformationThread` info-class `ThreadIdealProcessor`. /// `NtSetInformationThread(handle, info_class, info_ptr, info_len)` — /// minimal Axis 5 wiring for priority / affinity / ideal-processor @@ -453,18 +945,33 @@ fn nt_set_information_thread( } } -/// `KeSetAffinityThread(thread_handle, new_mask) -> old_mask` — Axis 4. -/// Drives `KernelState::set_affinity` which delegates to the scheduler -/// and then fixes up every outstanding `ThreadRef` held in waiter lists. +/// `KeSetAffinityThread(thread_ptr, affinity, prev_affinity_ptr)` — Axis 4. +/// Mirrors xenia-canary `KeSetAffinityThread_entry` +/// (xboxkrnl_threading.cc:323-346): returns `X_STATUS_SUCCESS` (0) in r3 +/// and writes the previous affinity to `*prev_affinity_ptr` (r5) when +/// non-NULL. Validates `affinity != 0` (else `X_STATUS_INVALID_PARAMETER`) +/// and that the thread handle resolves (else `X_STATUS_INVALID_HANDLE`). +/// +/// Stage 2 Batch 3 fix (2026-05-14): pre-fix, ours returned `old_mask` in +/// r3 with no OUT-pointer write — guest code expecting `STATUS_SUCCESS` +/// in r3 was reading a small bitmask as an NTSTATUS. fn ke_set_affinity_thread( ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState, ) { - let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); let new_mask = (ctx.gpr[4] as u32) as u8; + let prev_ptr = ctx.gpr[5] as u32; + if new_mask == 0 { + ctx.gpr[3] = 0xC000_000D; // X_STATUS_INVALID_PARAMETER + return; + } + let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); let old = state.set_affinity(handle, new_mask, mem); - ctx.gpr[3] = old as u64; + if prev_ptr != 0 { + mem.write_u32(prev_ptr, old as u32); + } + ctx.gpr[3] = 0; // X_STATUS_SUCCESS } fn ke_bug_check(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { @@ -495,6 +1002,49 @@ fn ke_query_system_time(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut Ke } } +/// Offset of `current_irql` (u8) within PCR. Mirrors xenia-canary's +/// `X_KPCR.current_irql` at offset 0x18 (xthread.h:189). PCR base is in +/// `ctx.gpr[13]` per scheduler setup. +const PCR_CURRENT_IRQL_OFFSET: u32 = 0x18; + +/// Mirrors xenia-canary `KeRaiseIrqlToDpcLevel_entry` +/// (xboxkrnl_threading.cc:1253-1264): reads PCR's `current_irql`, +/// returns the old value in r3, writes `DISPATCH_LEVEL` (2) back. +fn ke_raise_irql_to_dpc_level( + ctx: &mut PpcContext, + mem: &GuestMemory, + _state: &mut KernelState, +) { + let pcr = ctx.gpr[13] as u32; + let old_irql = mem.read_u8(pcr.wrapping_add(PCR_CURRENT_IRQL_OFFSET)); + if old_irql > 2 { + tracing::warn!( + old_irql = old_irql, + "KeRaiseIrqlToDpcLevel: old_irql > 2 (DISPATCH_LEVEL)" + ); + } + mem.write_u8(pcr.wrapping_add(PCR_CURRENT_IRQL_OFFSET), 2); + ctx.gpr[3] = old_irql as u64; +} + +/// Mirrors xenia-canary `KfLowerIrql_entry` +/// (xboxkrnl_threading.cc:1280-1282 calling `xeKfLowerIrql`): writes +/// `new_irql` (r3) to PCR's `current_irql`. Void return (registered via +/// `register_void_export`). +fn kf_lower_irql(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { + let new_irql = (ctx.gpr[3] as u32) as u8; + let pcr = ctx.gpr[13] as u32; + let current = mem.read_u8(pcr.wrapping_add(PCR_CURRENT_IRQL_OFFSET)); + if new_irql > current { + tracing::warn!( + new_irql = new_irql, + current = current, + "KfLowerIrql: new_irql > current_irql" + ); + } + mem.write_u8(pcr.wrapping_add(PCR_CURRENT_IRQL_OFFSET), new_irql); +} + fn ke_initialize_semaphore(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = PKSEMAPHORE, r4 = initial count, r5 = limit. // Mirrors xenia-canary KeInitializeSemaphore_entry @@ -592,8 +1142,102 @@ fn ke_tls_set_value(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut Kernel ctx.gpr[3] = 1; // TRUE } -fn ex_get_xconfig_setting(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { - ctx.gpr[3] = 0; // STATUS_SUCCESS (writes nothing) +/// Mirrors xenia-canary `ExGetXConfigSetting_entry` + `xeExGetXConfigSetting` +/// (xboxkrnl_xconfig.cc:303-319 calling :65-302). Returns a small value +/// describing one of the Xbox 360's `XCONFIG_*` settings. +/// +/// Stage 2 Batch 6 (2026-05-14): pre-fix returned STATUS_SUCCESS with no +/// buffer write — game saw uninitialized buffer data. We implement the +/// most commonly queried (category, setting) pairs as constants matching +/// canary's defaults. Unknown pairs return `STATUS_INVALID_PARAMETER_2`. +fn ex_get_xconfig_setting(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { + let category = (ctx.gpr[3] as u32) & 0xFFFF; + let setting = (ctx.gpr[4] as u32) & 0xFFFF; + let buffer_ptr = ctx.gpr[5] as u32; + let buffer_size = (ctx.gpr[6] as u32) & 0xFFFF; + let required_size_ptr = ctx.gpr[7] as u32; + + // Per-setting value encoded as big-endian bytes (canary uses + // `xe::store_and_swap`; we hand-roll the BE bytes since values + // are constant). + #[derive(Clone, Copy)] + enum SettingValue { + U8(u8), + U16Be(u16), + U32Be(u32), + } + impl SettingValue { + fn size(&self) -> u16 { + match self { + SettingValue::U8(_) => 1, + SettingValue::U16Be(_) => 2, + SettingValue::U32Be(_) => 4, + } + } + fn write(&self, mem: &GuestMemory, addr: u32) { + match self { + SettingValue::U8(v) => mem.write_u8(addr, *v), + SettingValue::U16Be(v) => mem.write_u16(addr, *v), + SettingValue::U32Be(v) => mem.write_u32(addr, *v), + } + } + } + + let value: Option = match (category, setting) { + // XCONFIG_SECURED_CATEGORY = 0x02 + (0x02, 0x02) => Some(SettingValue::U32Be(1)), // SECURED_AV_REGION = NTSCM + // XCONFIG_USER_CATEGORY = 0x03 + (0x03, 0x01) // TIME_ZONE_BIAS + | (0x03, 0x02) // TIME_ZONE_STD_NAME + | (0x03, 0x03) // TIME_ZONE_DLT_NAME + | (0x03, 0x04) // TIME_ZONE_STD_DATE + | (0x03, 0x05) // TIME_ZONE_DLT_DATE + | (0x03, 0x06) // TIME_ZONE_STD_BIAS + | (0x03, 0x07) // TIME_ZONE_DLT_BIAS + => Some(SettingValue::U32Be(0)), + (0x03, 0x09) => Some(SettingValue::U32Be(1)), // USER_LANGUAGE = en + (0x03, 0x0A) => Some(SettingValue::U32Be(0)), // USER_VIDEO_FLAGS = RatioNormal + (0x03, 0x0B) => Some(SettingValue::U32Be(0x00010001)), // USER_AUDIO_FLAGS + (0x03, 0x0C) => Some(SettingValue::U32Be(0x40)), // USER_RETAIL_FLAGS + (0x03, 0x0E) => Some(SettingValue::U8(103)), // USER_COUNTRY = US + (0x03, 0x0F) => Some(SettingValue::U8(0x03)), // USER_PC_FLAGS = XBL allowed + // XCONFIG_CONSOLE_CATEGORY = 0x07 + (0x07, 0x02) => Some(SettingValue::U16Be(0)), // SCREEN_SAVER = Off + (0x07, 0x03) => Some(SettingValue::U16Be(0)), // AUTO_SHUT_OFF = Off + _ => None, + }; + + let v = match value { + Some(v) => v, + None => { + // Unknown category or setting. Match canary's per-category + // return code: invalid category vs invalid setting both + // surface as STATUS_INVALID_PARAMETER_x in canary; we use + // STATUS_INVALID_PARAMETER_2 as a single sentinel since the + // distinction is rarely consulted by guest code. + ctx.gpr[3] = 0xC000_00F0; // X_STATUS_INVALID_PARAMETER_2 + return; + } + }; + + let setting_size = v.size(); + + if buffer_ptr != 0 { + if buffer_size < setting_size as u32 { + ctx.gpr[3] = 0xC000_0023; // X_STATUS_BUFFER_TOO_SMALL + return; + } + v.write(mem, buffer_ptr); + } else if buffer_size != 0 { + ctx.gpr[3] = 0xC000_00F1; // X_STATUS_INVALID_PARAMETER_3 + return; + } + + if required_size_ptr != 0 { + mem.write_u16(required_size_ptr, setting_size); + } + + ctx.gpr[3] = 0; // STATUS_SUCCESS } // ===== Memory ===== @@ -649,16 +1293,32 @@ fn mm_allocate_physical_memory_ex(ctx: &mut PpcContext, mem: &GuestMemory, state // Return value is the guest address; 0 indicates failure (Xbox ABI). let flags = ctx.gpr[3] as u32; let size = ctx.gpr[4] as u32; + let protect_bits = ctx.gpr[5] as u32; if size == 0 { tracing::warn!(flags, "MmAllocatePhysicalMemoryEx: zero-size request → returning 0"); ctx.gpr[3] = 0; return; } - match state.heap_alloc(size, mem) { + // Iterate 2.H — bucket routing. Canary `xeMmAllocatePhysicalMemoryEx` + // (`xboxkrnl_memory.cc:436-455`) picks `page_size` from `protect_bits`: + // X_MEM_LARGE_PAGES (0x20000000) → 64KB → vA0000000 (0xA0000000-0xBFFFFFFF) + // X_MEM_16MB_PAGES (0x80000000) → 16MB → vC0000000 (deferred to 2.I) + // default (4KB) → vE0000000 (deferred to 2.I) + // For 2.H we only wire the 64KB bucket; the others still fall through + // to the legacy `heap_alloc` at 0x40000000 (incorrect bucket, but + // preserves prior behavior for non-large-page calls). + const X_MEM_LARGE_PAGES: u32 = 0x2000_0000; + let result = if protect_bits & X_MEM_LARGE_PAGES != 0 { + state.physical_heap_alloc(size, mem) + } else { + state.heap_alloc(size, mem) + }; + match result { Some(addr) => { tracing::debug!( flags, size = format_args!("{size:#x}"), + protect = format_args!("{protect_bits:#x}"), addr = format_args!("{addr:#010x}"), "MmAllocatePhysicalMemoryEx" ); @@ -668,6 +1328,7 @@ fn mm_allocate_physical_memory_ex(ctx: &mut PpcContext, mem: &GuestMemory, state tracing::warn!( flags, size = format_args!("{size:#x}"), + protect = format_args!("{protect_bits:#x}"), "MmAllocatePhysicalMemoryEx: heap exhausted" ); ctx.gpr[3] = 0; @@ -730,6 +1391,34 @@ const STATUS_SEMAPHORE_LIMIT_EXCEEDED: u64 = 0xC000_0047; const STATUS_UNSUCCESSFUL: u64 = 0xC000_0001; const STATUS_INVALID_INFO_CLASS: u64 = 0xC000_0003; const STATUS_INFO_LENGTH_MISMATCH: u64 = 0xC000_0004; +const STATUS_OBJECT_NAME_INVALID: u64 = 0xC000_0033; +const STATUS_ACCESS_DENIED: u64 = 0xC000_0022; +// Phase C+11 — canary's `NtQueryFullAttributesFile_entry` returns +// `STATUS_NO_SUCH_FILE` (0xC000000F) on resolve-miss, not +// `STATUS_OBJECT_NAME_NOT_FOUND` (0xC0000034). Both are negative NTSTATUS +// values; Sylpheed treats them equivalently at the call site, but the +// Phase A diff compares return values byte-exact, so the codes must +// match. +const STATUS_NO_SUCH_FILE: u64 = 0xC000_000F; +/// Phase C+5 — canary's `NtWriteFile_entry` +/// (xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc:351-353) returns +/// this NT-style status code when the underlying `XFile::is_synchronous_` +/// is false (i.e. the file was opened without `FILE_SYNCHRONOUS_IO_ALERT` +/// or `FILE_SYNCHRONOUS_IO_NONALERT`). The write itself still completes +/// synchronously and the IO_STATUS_BLOCK still records STATUS_SUCCESS; +/// only the function return value flips. Real NT uses STATUS_PENDING here +/// as a "the caller may now wait on the event" convention. +const STATUS_PENDING: u64 = 0x0000_0103; + +/// `CreateOptions` bits we care about for is-synchronous tracking +/// (canary's `CreateOptions::FILE_SYNCHRONOUS_IO_ALERT` / +/// `CreateOptions::FILE_SYNCHRONOUS_IO_NONALERT` in xboxkrnl_io.cc:32-33). +/// `NtOpenFile` forwards the same options dword through its `open_options` +/// argument, so this bitmask applies to both paths. +const FILE_SYNCHRONOUS_IO_ALERT: u32 = 0x0000_0010; +const FILE_SYNCHRONOUS_IO_NONALERT: u32 = 0x0000_0020; +const FILE_SYNCHRONOUS_IO_MASK: u32 = + FILE_SYNCHRONOUS_IO_ALERT | FILE_SYNCHRONOUS_IO_NONALERT; /// `X_ERROR_NOT_FOUND` from xenia-canary `xenia/xbox.h`. Returned by /// `XexGetModuleHandle` for unknown module names. const X_ERROR_NOT_FOUND: u64 = 0x0000_048B; @@ -737,6 +1426,17 @@ const X_ERROR_NOT_FOUND: u64 = 0x0000_048B; /// A sentinel byte-offset value meaning "read at current file position". const FILE_USE_FILE_POINTER_POSITION: u64 = 0xFFFF_FFFF_FFFF_FFFE; +/// Phase C+5 — register `handle` in `state.async_file_handles` iff the +/// caller did NOT request synchronous IO (mirrors canary's +/// `XFile::is_synchronous_` derivation in xboxkrnl_io.cc:94-97). Subsequent +/// `nt_write_file` returns flip from `STATUS_SUCCESS` to `STATUS_PENDING` +/// for async-opened files only. +fn maybe_mark_async_file(state: &mut KernelState, handle: u32, create_options: u32) { + if (create_options & FILE_SYNCHRONOUS_IO_MASK) == 0 { + state.async_file_handles.insert(handle); + } +} + /// Write an `IO_STATUS_BLOCK { status, information }` if the pointer is non-null. fn write_io_status_block(mem: &GuestMemory, ptr: u32, status: u32, information: u32) { if ptr == 0 { @@ -793,32 +1493,96 @@ fn open_cache_file( // `cache:\d4ea4615` which then blocked subsequent hierarchical // creates of `cache:\d4ea4615\e\46ee8ca` with NAME_COLLISION). const FILE_DIRECTORY_FILE: u32 = 0x0000_0001; + const FILE_NON_DIRECTORY_FILE: u32 = 0x0000_0040; let want_dir = (create_options & FILE_DIRECTORY_FILE) != 0; + let want_non_dir = (create_options & FILE_NON_DIRECTORY_FILE) != 0; - // Root-of-mount case: `cache:\`, `cache:/`, `cache:` resolve to the - // cache root directory itself. Mirror canary's HostPathDevice.Open - // which returns a directory handle (success, attributes = DIR). - // Empty `path.file_name()` after our resolve_cache_path strip means - // the guest asked for the mount root. - let is_dir_open = host_path == state.cache_root.as_deref().unwrap_or(host_path) - || host_path.is_dir() - || want_dir; + // Phase C+11 — when the host path already exists, its actual on-disk + // type wins over the guest's `FILE_DIRECTORY_FILE` bit. Mirrors + // canary's `VirtualFileSystem::OpenFile` which routes to the existing + // entry's device-specific open without re-checking the bit. Sylpheed + // sets `FILE_DIRECTORY_FILE` on `NtOpenFile cache:\

.tmp` + // re-opens (the `.tmp` was already a file from a prior FILE_CREATE), + // which under the AUDIT-054 logic mis-routed to the directory branch + // and dropped `host_path` — blocking the subsequent class-10 rename + // with `STATUS_ACCESS_DENIED`. Also resolves Phase C+11's bug #2: + // `cache:\access`/`ignore`/`recent` end up as files on cold creation + // because `want_non_dir` (FILE_NON_DIRECTORY_FILE bit 0x40) takes + // precedence when set, even with FILE_DIRECTORY_FILE. + // + // Resolution order (mirrors canary): + // 1. Existing host entry: actual type wins (file ↔ dir). + // 2. `want_non_dir` set → file path (NON_DIRECTORY_FILE overrides). + // 3. `want_dir` set → directory path. + // 4. Default → file path. + // + // Root-of-mount case is captured by the existing-dir branch: the + // cache root always exists as a directory, so `host_path.is_dir()` + // is true. + let host_exists_as_dir = host_path.is_dir(); + let host_exists_as_file = host_path.is_file(); + let is_dir_open = host_exists_as_dir + || (!host_exists_as_file && !want_non_dir && want_dir); if is_dir_open { - // For non-existent paths the guest wants us to create as a - // directory, mkdir-p; canary's HostPathDevice does the same - // when FILE_DIRECTORY_FILE is set on a kCreate disposition. - if want_dir && !host_path.exists() { - if let Err(e) = std::fs::create_dir_all(host_path) { - tracing::warn!( - "cache create_dir_all({:?}) failed: {} — STATUS_UNSUCCESSFUL", - host_path, - e - ); + // Phase C+11.1 — only create the host directory when the + // disposition is *create-capable*. Mirrors canary's + // `VirtualFileSystem::OpenFile` (virtual_file_system.cc:265-273): + // for `FileDisposition::kOpen`/`kOverwrite` on a non-existent + // path the function returns `X_STATUS_OBJECT_NAME_NOT_FOUND` + // *before* any `CreatePath` call — i.e. mkdir is never invoked + // on these dispositions. The pre-fix code (Phase C+11) called + // `create_dir_all` whenever `want_dir && !host_path.exists()`, + // so Sylpheed's cold-boot probes for `cache:/access`, + // `cache:/ignore`, `cache:/recent` (disp=1, opts=0x7) succeeded + // and produced spurious host directories. Canary instead + // returns NOT_FOUND, after which Sylpheed re-creates these as + // FILES via `disp=5` + `FILE_NON_DIRECTORY_FILE`. + // + // Create-capable dispositions (mkdir OK): + // 0 FILE_SUPERSEDE + // 2 FILE_CREATE + // 3 FILE_OPEN_IF + // 5 FILE_OVERWRITE_IF + // Non-create dispositions (must miss when path is absent): + // 1 FILE_OPEN + // 4 FILE_OVERWRITE + let disp_is_create_capable = matches!( + create_disposition, + FILE_SUPERSEDE | FILE_CREATE | FILE_OPEN_IF | FILE_OVERWRITE_IF + ); + if !host_path.exists() { + if !disp_is_create_capable { if handle_out != 0 { mem.write_u32(handle_out, 0); } - write_io_status_block(mem, io_status_block, STATUS_UNSUCCESSFUL as u32, 0); - return STATUS_UNSUCCESSFUL; + write_io_status_block( + mem, + io_status_block, + STATUS_OBJECT_NAME_NOT_FOUND as u32, + 0, + ); + tracing::info!( + "cache open (dir) MISS path={:?} disp={} opts={:#x} -> NOT_FOUND", + guest_path, + create_disposition, + create_options + ); + return STATUS_OBJECT_NAME_NOT_FOUND; + } + // create-capable + want_dir → mkdir-p the directory. + if want_dir { + if let Err(e) = std::fs::create_dir_all(host_path) { + tracing::warn!( + "cache create_dir_all({:?}) failed: {} — STATUS_UNSUCCESSFUL", + host_path, + e + ); + if handle_out != 0 { + mem.write_u32(handle_out, 0); + } + write_io_status_block(mem, io_status_block, STATUS_UNSUCCESSFUL as u32, 0); + return STATUS_UNSUCCESSFUL; + } } } // Stored path ends with '/' so nt_query_information_file's @@ -828,6 +1592,10 @@ fn open_cache_file( } else { format!("{}/", guest_path) }; + // Phase C+12 — register / refresh directory entry mirror. + if let Ok(md) = host_path.metadata() { + state.register_cache_entry(guest_path, &md); + } let handle = state.alloc_handle_for(KernelObject::File { path: dir_path, size: 0, @@ -836,6 +1604,7 @@ fn open_cache_file( dir_enum_pos: None, host_path: None, }); + maybe_mark_async_file(state, handle, create_options); if handle_out != 0 { mem.write_u32(handle_out, handle); } @@ -918,10 +1687,16 @@ fn open_cache_file( return STATUS_UNSUCCESSFUL; } } - let size = host_path - .metadata() - .map(|m| m.len()) - .unwrap_or(0); + let metadata = host_path.metadata().ok(); + let size = metadata.as_ref().map(|m| m.len()).unwrap_or(0); + // Phase C+12 — register / refresh the in-memory entry mirror so + // subsequent `NtQueryFullAttributesFile` probes for this path + // resolve without re-stating the host FS (parity with canary's + // `Entry::CreateEntry`, + // `xenia-canary/src/xenia/vfs/entry.cc:88-104`). + if let Some(md) = metadata.as_ref() { + state.register_cache_entry(guest_path, md); + } let handle = state.alloc_handle_for(KernelObject::File { path: guest_path.to_string(), size, @@ -931,6 +1706,7 @@ fn open_cache_file( dir_enum_pos: None, host_path: Some(host_path.to_path_buf()), }); + maybe_mark_async_file(state, handle, create_options); if handle_out != 0 { mem.write_u32(handle_out, handle); } @@ -950,6 +1726,32 @@ fn open_cache_file( /// AUDIT-038 — additional NTSTATUS used by the cache-backed open path. const STATUS_OBJECT_NAME_COLLISION: u64 = 0xC000_0035; +/// Phase C+13 — does `raw_path` start with a prefix that aliases the +/// (read-only) game disc? Used to scope the synth-empty fallback in +/// `open_vfs_file`: missing disc files report `STATUS_OBJECT_NAME_NOT_FOUND` +/// (matching canary's `NtCreateFile_entry` for game-data lookups), while +/// missing writable-partition paths keep the legacy zero-byte synth. +/// +/// Mirrors the disc-mapped subset of `crate::path::DEVICE_PREFIXES`: +/// - `game:\` — canary's symbolic-link alias for the disc +/// (xenia-canary/src/xenia/kernel/kernel_state.cc registrations). +/// - `d:\` / `D:\` — drive-letter alias for the disc. +/// - `\Device\Cdrom0\` — NT device path for the disc. +/// +/// Compares case-insensitively to match canary's path resolver. +fn is_disc_prefix(raw_path: &str) -> bool { + let lowered = raw_path.trim_start().to_ascii_lowercase(); + const DISC_PREFIXES: &[&str] = &[ + "game:\\", + "game:/", + "d:\\", + "d:/", + "\\device\\cdrom0\\", + "\\device\\cdrom0/", + ]; + DISC_PREFIXES.iter().any(|p| lowered.starts_with(p)) +} + /// Open a VFS-backed file. Shared between NtCreateFile and NtOpenFile — the /// create/open distinction only matters for writable volumes (cache:/), /// which we now back with a host directory (audit-038). The disc image @@ -980,6 +1782,17 @@ fn open_vfs_file( // see a null handle later and trigger `XamShowDirtyDiscErrorUI`. let path = crate::path::object_attributes_to_vfs_path(mem, obj_attrs_ptr) .unwrap_or_default(); + // Phase C+13 — recover the raw (un-stripped) path so we can tell a + // disc-aliased prefix (`game:\`, `d:\`, `\Device\Cdrom0\`) apart from a + // writable-partition prefix (`\Device\Harddisk0\…`, `\??\`, raw "no + // prefix" cases). The synth-empty fallback below covers both today but + // canary's `NtCreateFile_entry` (xboxkrnl_io.cc:83-110) returns the + // VFS lookup status verbatim, which is `STATUS_OBJECT_NAME_NOT_FOUND` + // for any disc path that isn't in the ISO. Scoping the synth to + // non-disc prefixes makes us match canary's behaviour for missing + // game-data files (e.g. `game:\dat\files.tbl` at Phase C+13 idx 103862). + let raw_path = crate::path::object_attributes_raw_name(mem, obj_attrs_ptr) + .unwrap_or_default(); if path.is_empty() && obj_attrs_ptr == 0 { if handle_out != 0 { mem.write_u32(handle_out, 0); @@ -1004,6 +1817,7 @@ fn open_vfs_file( dir_enum_pos: None, host_path: None, }); + maybe_mark_async_file(state, handle, create_options); if handle_out != 0 { mem.write_u32(handle_out, handle); } @@ -1047,6 +1861,7 @@ fn open_vfs_file( dir_enum_pos: None, host_path: None, }); + maybe_mark_async_file(state, handle, create_options); if handle_out != 0 { mem.write_u32(handle_out, handle); } @@ -1055,28 +1870,43 @@ fn open_vfs_file( STATUS_SUCCESS } Err(e) => { - // When the VFS can't resolve a path we synthesize a zero-byte - // virtual file rather than returning NOT_FOUND. Two rationales: + // Phase C+13 — scope the synth-empty fallback to non-disc + // prefixes only. Canary's `NtCreateFile_entry` returns the VFS + // result verbatim (xboxkrnl_io.cc:83-110); for a missing disc + // file like `game:\dat\files.tbl` that's + // `STATUS_OBJECT_NAME_NOT_FOUND`. Sylpheed handles NOT_FOUND + // cleanly (next event in canary's trace at idx 103862 is + // `RtlNtStatusToDosError(0xc0000034) -> 2`, then the boot + // validator continues), so the synth was masking the + // correct branch. // - // 1. **Writable system partitions** (`cache:/`, `cache0:`, - // `cache1:`, `partition0:`, `partition1:`) aren't backed by - // the disc — Canary mounts them on host directories - // ([xenia_main.cc:612-651](xenia-canary/src/xenia/app/xenia_main.cc)). - // We skip the host mount for now, so opens there always miss - // without this fallback. - // - // 2. **Disc files that didn't make it into the ISO rip** (e.g., - // Sylpheed's `dat/files.tbl`, which the retail disc shipped - // but our dump doesn't contain). Returning NOT_FOUND makes - // Sylpheed's boot validator call `XamShowDirtyDiscErrorUI` - // → dashboard exit; see Canary's `XamShowDirtyDiscErrorUI` - // at xam_ui.cc:562 for the "bad or unimplemented file IO - // calls" framing. - // - // A zero-byte file lets the game's existence probe succeed, its - // read return EOF, and its "is the content here" sanity checks - // pass. If the game actually needs the bytes for gameplay we'll - // see a fresh failure downstream and can decide what to stub next. + // Synth-empty is still kept for writable system partitions + // (`\Device\Harddisk0\…`, `\Device\Mass*`, `\??\`, raw paths) + // because those aren't backed by the disc — Canary mounts + // them on host directories + // ([xenia_main.cc:612-651](xenia-canary/src/xenia/app/xenia_main.cc)); + // ours skips the host mount for those and falls back to the + // legacy stub to avoid regressing audit-006 / audit-018 + // disc-validation probes. `cache:/` was already routed to + // `open_cache_file` upstream of this branch (AUDIT-038). + if is_disc_prefix(&raw_path) { + if handle_out != 0 { + mem.write_u32(handle_out, 0); + } + write_io_status_block( + mem, + io_status_block, + STATUS_OBJECT_NAME_NOT_FOUND as u32, + 0, + ); + tracing::info!( + "Disc path missing: raw={:?} norm={:?} err={} -> NOT_FOUND", + raw_path, + path, + e + ); + return STATUS_OBJECT_NAME_NOT_FOUND; + } let handle = state.alloc_handle_for(KernelObject::File { path: path.clone(), size: 0, @@ -1085,6 +1915,7 @@ fn open_vfs_file( dir_enum_pos: None, host_path: None, }); + maybe_mark_async_file(state, handle, create_options); if handle_out != 0 { mem.write_u32(handle_out, handle); } @@ -1122,16 +1953,26 @@ fn nt_create_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelSta } fn nt_open_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { - // r3 = handle_out, r4 = desired_access, r5 = obj_attrs, - // r6 = io_status_block, r7 = share_access, r8 = open_options. - // `NtOpenFile` is FILE_OPEN-only (no create) — file must exist. - // Per xboxkrnl_io.cc:99-122, NtOpenFile forwards `open_options` + // Phase C+5 — canary `NtOpenFile_entry` + // (xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc:114-122) has + // FIVE args: (handle_out, desired_access, object_attributes, + // io_status_block, open_options). Per Xenia's shim_utils LoadValue + // (util/shim_utils.h:158-167), the 5th dword arg arrives in r7. Ours + // previously read r8 — the bit 0x01 (FILE_DIRECTORY_FILE) check still + // happened to pass because the game also left bit 0x01 set in r8 for + // dir opens (AUDIT-054 enabling condition), but the + // FILE_SYNCHRONOUS_IO_NONALERT bit (0x20) was wrongly set in r8 for + // device opens, making every file appear synchronous and causing the + // Phase C+5 NtWriteFile divergence at idx=102068 + // (canary=STATUS_PENDING / ours=STATUS_SUCCESS). + // + // Per xboxkrnl_io.cc:118-122, NtOpenFile forwards `open_options` // straight into NtCreateFile's `create_options` slot, so the - // FILE_DIRECTORY_FILE bit applies the same way. + // FILE_DIRECTORY_FILE bit + sync bits apply the same way. let handle_out = ctx.gpr[3] as u32; let obj_attrs_ptr = ctx.gpr[5] as u32; let io_status_block = ctx.gpr[6] as u32; - let open_options = ctx.gpr[8] as u32; + let open_options = ctx.gpr[7] as u32; ctx.gpr[3] = open_vfs_file( mem, state, @@ -1171,8 +2012,10 @@ fn signal_io_completion_event(state: &mut KernelState, event_handle: u32) { fn nt_read_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = handle, r4 = event, r5 = apc_routine, r6 = apc_ctx, // r7 = io_status_block, r8 = buffer, r9 = length, r10 = byte_offset_ptr - let handle = ctx.gpr[3] as u32; - let event_handle = ctx.gpr[4] as u32; + // Phase C+19: canonicalize dup ids → source so file/event lookups + // hit the canonical `state.objects` slot. + let handle = state.resolve_handle(ctx.gpr[3] as u32); + let event_handle = state.resolve_handle(ctx.gpr[4] as u32); let io_status_block = ctx.gpr[7] as u32; let buffer = ctx.gpr[8] as u32; let length = ctx.gpr[9] as u32; @@ -1293,8 +2136,9 @@ fn nt_write_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelStat // r7 = io_status_block, r8 = buffer, r9 = length, r10 = byte_offset_ptr. // For cache:/* (host_path Some) writes go to disk; everything else // is still discarded (matches legacy read-only behaviour for game:/). - let handle = ctx.gpr[3] as u32; - let event_handle = ctx.gpr[4] as u32; + // Phase C+19: canonicalize dup ids → source. + let handle = state.resolve_handle(ctx.gpr[3] as u32); + let event_handle = state.resolve_handle(ctx.gpr[4] as u32); let io_status_block = ctx.gpr[7] as u32; let buffer = ctx.gpr[8] as u32; let length = ctx.gpr[9] as u32; @@ -1320,6 +2164,7 @@ fn nt_write_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelStat *position }; + let mut wrote_ok = false; if let Some(hp) = host_path.clone() { use std::io::{Seek, SeekFrom, Write}; let mut buf = vec![0u8; length as usize]; @@ -1341,6 +2186,7 @@ fn nt_write_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelStat *size = live_size; write_io_status_block(mem, io_status_block, STATUS_SUCCESS as u32, length); ctx.gpr[3] = STATUS_SUCCESS; + wrote_ok = true; tracing::info!( "NtWriteFile cache: {} bytes to {:?} @ {} (handle={:#x})", length, path, start_pos, handle @@ -1356,6 +2202,19 @@ fn nt_write_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelStat // Legacy: discard but report full-length-written so caller proceeds. write_io_status_block(mem, io_status_block, STATUS_SUCCESS as u32, length); ctx.gpr[3] = STATUS_SUCCESS; + wrote_ok = true; + } + // Phase C+5 — canary `NtWriteFile_entry` + // (xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc:351-353) flips + // the function return value to `STATUS_PENDING` after the synchronous + // write completes when the underlying `XFile::is_synchronous_` is + // false. The IO_STATUS_BLOCK already stores STATUS_SUCCESS above; only + // the r3 return changes. Mirroring this here closes the + // `tid_event_idx=102068` divergence (canary=0x103 / ours=0) on the + // main thread without touching `NtReadFile` / `NtReadFileScatter` + // (scoped to one divergence per Phase C session, per project plan). + if wrote_ok && state.async_file_handles.contains(&handle) { + ctx.gpr[3] = STATUS_PENDING; } signal_io_completion_event(state, event_handle); } @@ -1373,7 +2232,8 @@ fn nt_device_io_control_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mu const STATUS_INVALID_PARAMETER: u64 = 0xC000_000D; const CACHE_SIZE: u64 = 0xFF000; - let event_handle = ctx.gpr[4] as u32; + // Phase C+19: canonicalize dup ids → source. + let event_handle = state.resolve_handle(ctx.gpr[4] as u32); let io_status_block = ctx.gpr[7] as u32; let io_control_code = ctx.gpr[8] as u32; let sp = ctx.gpr[1] as u32; @@ -1423,7 +2283,8 @@ fn nt_device_io_control_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mu /// (14). Anything else gets zeros + success. fn nt_query_information_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = handle, r4 = io_status_block, r5 = file_info, r6 = length, r7 = class - let handle = ctx.gpr[3] as u32; + // Phase C+19: canonicalize dup ids → source. + let handle = state.resolve_handle(ctx.gpr[3] as u32); let io_status_block = ctx.gpr[4] as u32; let file_info = ctx.gpr[5] as u32; let length = ctx.gpr[6] as u32; @@ -1517,6 +2378,123 @@ fn nt_query_information_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mu ctx.gpr[3] = STATUS_SUCCESS; } +/// Phase C+11 — XFileRenameInformation (class 10) body. Mirrors canary +/// `xboxkrnl_io_info.cc:226-243` `file->Rename(target_path)`. Sylpheed's +/// cache-build path writes `cache:\

.tmp` flat journal files, then +/// renames them to the hierarchical leaf `cache:\

\\

` via this +/// info-class. Before this body landed, ours silently fell through to the +/// `_ => STATUS_SUCCESS` catch-all and the `.tmp` never became a leaf — +/// blocking `NtQueryFullAttributesFile` at idx 102404 in the Phase A diff. +/// +/// Layout per canary `info/file.h:79-83` (16 bytes total): +/// offset 0 be replace_existing +/// offset 4 be root_dir_handle +/// offset 8 X_ANSI_STRING (u16 Length, u16 MaximumLength, u32 Buffer) +/// +/// Pulled out of `nt_set_information_file`'s main `match` because it +/// needs an immutable read of `state.cache_root` (via +/// `resolve_cache_path`) BEFORE the mutable destructure of the file +/// handle — Rust's borrow checker can't see through `state.method()` +/// across both kinds of access. +fn handle_set_info_rename( + mem: &GuestMemory, + state: &mut KernelState, + handle: u32, + info_ptr: u32, + info_length: u32, +) -> (u64, u32) { + // Read the rename target ANSI_STRING. The raw-form helper trims + // whitespace but does NOT prefix-strip — we want the original + // `cache:\...` form so the path resolver sees it. + let target_raw = + match crate::path::file_rename_information_raw_target(mem, info_ptr, info_length) { + Some(s) if !s.is_empty() => s, + _ => return (STATUS_OBJECT_NAME_INVALID, 16), + }; + + // Translate target path. Sylpheed only renames inside `cache:\`; any + // other prefix is not in scope (canary's `IsValidPath` rejects + // anything that doesn't resolve to a writable mount). + let target_host_path = match state.resolve_cache_path(&target_raw) { + Some(p) => p, + None => return (STATUS_OBJECT_NAME_INVALID, 16), + }; + + // Look up the source handle. Note: ANY non-File handle (event, + // semaphore, etc.) is INVALID_HANDLE; a File without a + // `host_path` is VFS-backed (read-only) and can't be renamed. + let Some(KernelObject::File { path, size, host_path, .. }) = state.objects.get_mut(&handle) + else { + return (STATUS_INVALID_HANDLE, 16); + }; + let Some(src_host_path) = host_path.clone() else { + // VFS-backed read-only handle (disc / synth stub). Canary's + // HostPathDevice mount is the only Rename-capable backend on + // Sylpheed; Disc/SVOD throws `kReadOnly`. + return (STATUS_ACCESS_DENIED, 16); + }; + + // Create parent directories for the destination (matches canary's + // `HostPathEntry::CreateEntryInternal` which calls + // `create_directories` before writing the file). Without this, the + // rename to `/d4ea4615/e/46ee8ca` fails when `/d4ea4615/e` + // doesn't yet exist (a common cold-cache scenario). + if let Some(parent) = target_host_path.parent() { + if let Err(e) = std::fs::create_dir_all(parent) { + tracing::warn!( + "NtSetInformationFile rename: create_dir_all({:?}): {}", + parent, + e + ); + return (STATUS_UNSUCCESSFUL, 16); + } + } + + // Perform the rename. `std::fs::rename` is atomic within a single + // filesystem on POSIX; cross-filesystem is the only failure path + // worth worrying about, and the entire cache lives under one root. + let old_path = path.clone(); + let rename_outcome = match std::fs::rename(&src_host_path, &target_host_path) { + Ok(()) => { + // Update the in-engine handle to point at the new location. + // The handle stays valid (mirrors canary's `XFile::Rename` + // which keeps the file handle open at the new path). + *path = crate::path::normalize_path(&target_raw); + *host_path = Some(target_host_path.clone()); + let new_size = std::fs::metadata(&target_host_path) + .map(|m| m.len()) + .unwrap_or(*size); + *size = new_size; + Ok(()) + } + Err(e) => { + tracing::warn!( + "NtSetInformationFile rename: rename({:?} -> {:?}): {}", + src_host_path, + target_host_path, + e + ); + Err(()) + } + }; + // Drop the mutable borrow on `state.objects` before touching + // `state.cache_entries` via the helper methods. The `let + // Some(KernelObject::File { .. }) = state.objects.get_mut(...)` + // binding above holds it until the function returns otherwise. + match rename_outcome { + Ok(()) => { + // Phase C+12 — refresh the in-memory entry tree: drop the + // source mirror, install / refresh the target mirror. + state.forget_cache_entry(&old_path); + if let Ok(md) = std::fs::metadata(&target_host_path) { + state.register_cache_entry(&target_raw, &md); + } + (STATUS_SUCCESS, 16) + } + Err(()) => (STATUS_UNSUCCESSFUL, 16), + } +} + /// `NtSetInformationFile(FileHandle, IoStatusBlock*, FileInformation, /// Length, FileInformationClass)`. Mirrors Canary /// [xboxkrnl_io_info.cc:180-304](xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io_info.cc). @@ -1524,16 +2502,17 @@ fn nt_query_information_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mu /// Validates `info_class` (must have a defined minimum size) and /// `info_length` (must meet that minimum); returns /// `STATUS_INVALID_INFO_CLASS` / `STATUS_INFO_LENGTH_MISMATCH` in those -/// cases. The only class with real side-effects in xenia-rs is -/// `XFilePositionInformation` (14) — seek updates the file's cursor. -/// Read-only VFS means `XFileEndOfFileInformation` (20, truncate) can -/// only succeed if the new length equals the current size, otherwise -/// returns `STATUS_UNSUCCESSFUL`. Other classes acknowledge the write -/// but have no backing store. +/// cases. Side-effect classes: +/// * `XFileRenameInformation` (10) — rename a cache:-backed handle. +/// * `XFilePositionInformation` (14) — seek updates the file's cursor. +/// * `XFileEndOfFileInformation` (20) — truncate (cache: only; disc-VFS +/// rejects non-identity truncates with `STATUS_UNSUCCESSFUL`). +/// Other classes acknowledge the write but have no backing store. fn nt_set_information_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = handle, r4 = io_status_block, r5 = info_ptr, // r6 = info_length, r7 = info_class. - let handle = ctx.gpr[3] as u32; + // Phase C+19: canonicalize dup ids → source. + let handle = state.resolve_handle(ctx.gpr[3] as u32); let iosb_ptr = ctx.gpr[4] as u32; let info_ptr = ctx.gpr[5] as u32; let info_length = ctx.gpr[6] as u32; @@ -1562,6 +2541,21 @@ fn nt_set_information_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut return; } + // Phase C+11 — class 10 (`XFileRenameInformation`) needs both a + // read of `state.cache_root` (via `resolve_cache_path`) AND a mutable + // borrow of the target file handle. Rust's borrow checker can't see + // through `&self.method()` calls, so split it out before the shared + // `get_mut` destructure below. + if info_class == 10 { + let (status, out_length) = + handle_set_info_rename(mem, state, handle, info_ptr, info_length); + if iosb_ptr != 0 { + write_io_status_block(mem, iosb_ptr, status as u32, out_length); + } + ctx.gpr[3] = status; + return; + } + // Handle lookup. let Some(KernelObject::File { size, position, host_path, .. }) = state.objects.get_mut(&handle) else { ctx.gpr[3] = STATUS_INVALID_HANDLE; @@ -1634,6 +2628,48 @@ fn nt_set_information_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut ctx.gpr[3] = status; } +/// Phase C+12 — write the 56-byte `X_FILE_NETWORK_OPEN_INFORMATION` +/// (`xenia-canary/src/xenia/kernel/info/file.h:117-127`) at `out` from +/// the entry's metadata. All multibyte fields are stored big-endian +/// (`be` / `be` in the canary struct); our +/// `GuestMemory::write_u{32,64}` already byte-swaps via `to_be_bytes`, +/// so the writes naturally produce the BE layout the Xbox 360 expects. +/// +/// Layout (offset / size / type / canary field): +/// ```text +/// 0 u64 CreationTime (FILETIME) +/// 8 u64 LastAccessTime +/// 16 u64 LastWriteTime +/// 24 u64 ChangeTime (= LastWriteTime per xboxkrnl_io.cc:504) +/// 32 u64 AllocationSize +/// 40 u64 EndOfFile +/// 48 u32 Attributes (FILE_ATTRIBUTE_*) +/// 52 u32 Reserved (= 0) +/// ``` +fn write_file_network_open_information( + mem: &GuestMemory, + out: u32, + meta: &crate::state::CacheEntryMeta, +) { + if out == 0 { + return; + } + mem.write_u64(out, meta.create_time); + mem.write_u64(out + 8, meta.access_time); + mem.write_u64(out + 16, meta.write_time); + // change_time = write_time per canary `xboxkrnl_io.cc:504`. + mem.write_u64(out + 24, meta.write_time); + mem.write_u64(out + 32, meta.allocation_size); + mem.write_u64(out + 40, meta.size); + let attrs = if meta.is_directory { + crate::state::X_FILE_ATTRIBUTE_DIRECTORY + } else { + crate::state::X_FILE_ATTRIBUTE_NORMAL + }; + mem.write_u32(out + 48, attrs); + mem.write_u32(out + 52, 0); +} + fn nt_query_full_attributes_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = obj_attrs, r4 = network_open_info let obj_attrs_ptr = ctx.gpr[3] as u32; @@ -1647,37 +2683,41 @@ fn nt_query_full_attributes_file(ctx: &mut PpcContext, mem: &GuestMemory, state: } }; - // AUDIT-038 — cache:/* short-circuit: stat the host-FS file directly - // so existence probes (Sylpheed's pre-open `NtQueryFullAttributesFile`) - // see real attributes for files we just created and miss for files we - // haven't. - if let Some(hp) = state.resolve_cache_path(&path) { - let entry = std::fs::metadata(&hp); - match entry { - Ok(md) => { - let filetime: u64 = 132_500_000_000_000_000; - if out != 0 { - for off in (0..32).step_by(4) { - mem.write_u32(out + off, if off & 4 == 0 { - (filetime >> 32) as u32 - } else { - filetime as u32 - }); - } - mem.write_u64(out + 32, md.len()); - mem.write_u64(out + 40, md.len()); - let attrs: u32 = if md.is_dir() { 0x10 } else { 0x80 }; - mem.write_u32(out + 48, attrs); - mem.write_u32(out + 52, 0); + // Phase C+12 — `cache:*` paths consult the in-memory entry mirror + // first, mirroring canary's `NtQueryFullAttributesFile_entry` which + // walks the in-memory entry tree via `VirtualFileSystem::ResolvePath` + // and never re-stats the host + // (`xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc:498-512`). + // + // The entry tree is seeded at mount time by + // `populate_cache_entries_from_host` (mirrors canary's eager + // `HostPathDevice::PopulateEntry`) and refreshed per-NtCreateFile + // by `register_cache_entry` (mirrors canary's `Entry::CreateEntry`). + // A second-line host-FS fallback handles the rare case where the + // entry tree lost track but the host file is present (defensive; + // canary returns NO_SUCH_FILE in that case so we keep this fallback + // narrow). + if path.to_ascii_lowercase().starts_with("cache:") { + if let Some(meta) = state.lookup_cache_entry(&path) { + write_file_network_open_information(mem, out, meta); + ctx.gpr[3] = STATUS_SUCCESS; + return; + } + // Host-FS defensive fallback — only fires when the in-memory + // tree missed but the file is on disk. Refreshes the tree as a + // side-effect so subsequent probes hit the fast path. + if let Some(hp) = state.resolve_cache_path(&path) { + if let Ok(md) = std::fs::metadata(&hp) { + state.register_cache_entry(&path, &md); + if let Some(meta) = state.lookup_cache_entry(&path) { + write_file_network_open_information(mem, out, meta); + ctx.gpr[3] = STATUS_SUCCESS; + return; } - ctx.gpr[3] = STATUS_SUCCESS; - return; - } - Err(_) => { - ctx.gpr[3] = STATUS_OBJECT_NAME_NOT_FOUND; - return; } } + ctx.gpr[3] = STATUS_NO_SUCH_FILE; + return; } let Some(vfs) = state.vfs.as_ref() else { @@ -1687,24 +2727,23 @@ fn nt_query_full_attributes_file(ctx: &mut PpcContext, mem: &GuestMemory, state: match vfs.stat(&path) { Ok(entry) => { - // FILE_NETWORK_OPEN_INFORMATION (56 bytes): 4 × FILETIME, - // AllocationSize(i64), EndOfFile(i64), FileAttributes(u32), pad(u32) - let filetime: u64 = 132_500_000_000_000_000; - if out != 0 { - mem.write_u32(out, (filetime >> 32) as u32); - mem.write_u32(out + 4, filetime as u32); - mem.write_u32(out + 8, (filetime >> 32) as u32); - mem.write_u32(out + 12, filetime as u32); - mem.write_u32(out + 16, (filetime >> 32) as u32); - mem.write_u32(out + 20, filetime as u32); - mem.write_u32(out + 24, (filetime >> 32) as u32); - mem.write_u32(out + 28, filetime as u32); - mem.write_u64(out + 32, entry.size); - mem.write_u64(out + 40, entry.size); - let attrs: u32 = if entry.is_directory { 0x10 } else { 0x80 }; - mem.write_u32(out + 48, attrs); - mem.write_u32(out + 52, 0); - } + let meta = crate::state::CacheEntryMeta { + is_directory: entry.is_directory, + size: entry.size, + // Disc/VFS entries have no host metadata; use the same + // 4 KiB alignment canary derives from + // `device->bytes_per_sector()`. Disc devices default + // to 2048 in canary + // (`xenia-canary/src/xenia/vfs/devices/disc_image_device.cc`) + // but for the existence-probe consumers we hit on + // Sylpheed boot the exact alignment doesn't matter — + // they only branch on the SUCCESS/NOT_FOUND status. + allocation_size: (entry.size + 2047) & !2047, + create_time: 0, + access_time: 0, + write_time: 0, + }; + write_file_network_open_information(mem, out, &meta); ctx.gpr[3] = STATUS_SUCCESS; } Err(_) => { @@ -1759,8 +2798,9 @@ fn nt_query_directory_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut // r3=file_handle, r4=event_handle, r5=apc_routine, r6=apc_context, // r7=io_status_block, r8=file_info_ptr, r9=length, r10=file_name, // sp+... = restart_scan. - let handle = ctx.gpr[3] as u32; - let event_handle = ctx.gpr[4] as u32; + // Phase C+19: canonicalize dup ids → source. + let handle = state.resolve_handle(ctx.gpr[3] as u32); + let event_handle = state.resolve_handle(ctx.gpr[4] as u32); let iosb_ptr = ctx.gpr[7] as u32; let info_ptr = ctx.gpr[8] as u32; let length = ctx.gpr[9] as u32; @@ -1916,15 +2956,31 @@ fn nt_query_directory_file(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut fn nt_close(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { let handle = ctx.gpr[3] as u32; - // Aliased refcount: `NtDuplicateObject` returns the *source* handle as the - // "new" handle (we don't mint fresh values), so the game commonly holds - // two logical references to the same handle value. Without refcount, the - // first `NtClose` wipes the object while the second reference is still - // live, which traps any later wait on that handle (Sylpheed's - // create→dup(SAME_ACCESS)→set→close pattern at 0x8246079c manifests this - // — main thread then parks forever on the closed handle). Mirror Canary's - // `ObjectTable::ReleaseHandle` (object_table.cc:189): decrement the - // per-handle refcount and only drop the object when it reaches zero. + close_handle_internal(state, handle); + ctx.gpr[3] = 0; +} + +/// Phase C+19: shared close path used by `nt_close`, +/// `nt_duplicate_object`'s `DUPLICATE_CLOSE_SOURCE` branch, and +/// `xam::xam_task_close_handle` (which canary defers to NtClose). +/// +/// Mirrors canary's `ObjectTable::ReleaseHandle` (object_table.cc:237-256): +/// decrement the slot's local refcount; on zero, emit `handle.destroy` for +/// the slot AND release the canonical kernel object — the canonical entry +/// (and its `KernelObject`) is removed only when `canonical_slot_count` +/// reaches zero (all dup siblings are gone). This preserves canary's +/// observable lifecycle: +/// +/// - Each `NtClose` of a slot with `handle_refcount==1` emits exactly one +/// `handle.destroy` event for that slot. +/// - The underlying object survives until the last slot closes; only then +/// are `state.objects`/`async_file_handles`/`pending_timer_fires` pruned. +pub(crate) fn close_handle_internal(state: &mut KernelState, handle: u32) { + let prior_rc = state + .handle_refcount + .get(&handle) + .copied() + .unwrap_or(0); let remaining = state .handle_refcount .get_mut(&handle) @@ -1934,14 +2990,51 @@ fn nt_close(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { }) .unwrap_or(0); if remaining == 0 { - state.objects.remove(&handle); state.handle_refcount.remove(&handle); - // If the object was an armed Timer, strip its pending-fire entry - // so a later scheduler round doesn't try to signal a dead handle. - // `disarm_timer` is a no-op for non-timer handles. - state.disarm_timer(handle); + + // Resolve the canonical id before we discard the alias entry — + // we need it to decrement the slot-count and possibly drop the + // backing object. + let canonical = state.resolve_handle(handle); + state.handle_aliases.remove(&handle); + + // Decrement the canonical's live-slot count. If this slot was the + // last one referring to the canonical, drop the underlying object. + let slots_left = match state.canonical_slot_count.get_mut(&canonical) { + Some(c) => { + *c = c.saturating_sub(1); + *c + } + None => 0, + }; + + if slots_left == 0 { + state.canonical_slot_count.remove(&canonical); + state.objects.remove(&canonical); + // Phase C+5 — prune the async-file side-table when the underlying + // handle is finally released. Mirrors the canary `XFile` dtor + // releasing `is_synchronous_`. No-op for non-file handles. + state.async_file_handles.remove(&canonical); + // If the object was an armed Timer, strip its pending-fire entry + // so a later scheduler round doesn't try to signal a dead handle. + // `disarm_timer` is a no-op for non-timer handles. + state.disarm_timer(canonical); + } + + // Phase C+15-α: schema-v1 `handle.destroy` event for the SLOT being + // closed (which is `handle`, not the canonical). Canary emits at + // `ObjectTable::RemoveHandle` (object_table.cc:294-296) per-slot, + // regardless of whether the underlying object still has sibling + // slots — so we match that. + if crate::event_log::is_enabled() { + let (tid, cycle) = { + let r = state.scheduler.current_ref(); + let t = state.scheduler.thread(r); + (t.tid, t.ctx.timebase) + }; + crate::event_log::emit_handle_destroy_auto(tid, cycle, handle, prior_rc); + } } - ctx.gpr[3] = 0; } fn nt_create_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { @@ -2186,6 +3279,139 @@ fn rtl_enter_critical_section( return; } let current_tid = ctx.thread_id; + + // Phase D Stage 3 — contention-replay manifest. When installed (via + // `XENIA_CONTENTION_MANIFEST_PATH`), the manifest tells us at which + // (tid, tid_event_idx) canary saw real contention on which CS. We + // peek the next per-tid ordinal, look up the manifest, and if it hits + // (with a matching cs_ptr) we emit a parity `contention.observed` + // event and force a park via the existing `cs_waiters` path. + // + // Wake comes when some other guest thread calls + // `RtlLeaveCriticalSection` on this CS naturally — the existing path + // at lines 2972-2980 handles the lock handoff. If no peer touches + // the CS, `Scheduler::unblock_on_deadlock` recovers with the existing + // CriticalSection-blocked wake at scheduler.rs:1208 (STATUS_TIMEOUT + // style — owner field will read 0 post-recovery, surfaceable as a + // downstream trace divergence rather than a silent hang). + // + // Default mode (no manifest installed): zero overhead, byte-identical + // to pre-Stage-3 behavior — `state.contention_manifest.as_ref()` + // short-circuits before peek_tid_idx. + // `consume_at_peek` translates ours's `peek_tid_idx` back to + // canary's idx space by subtracting the count of prior + // `contention.observed` emits on this tid (each emit shifts + // ours's per-tid idx by +1 relative to canary's stream). The + // bookkeeping is internal to the manifest; the caller just hands + // it the current peek value. + let manifest_hit = state + .contention_manifest + .as_ref() + .and_then(|m| { + let peek = crate::event_log::peek_tid_idx(current_tid); + m.consume_at_peek(current_tid, peek) + }); + if let Some(entry) = manifest_hit { + // Per-tid ordinal alignment with canary: ALWAYS emit + // `contention.observed` when the manifest fires, even if we end + // up not parking. Canary emits one here too (Stage 1) so + // consuming one per-tid idx slot on this side keeps the + // downstream events aligned. Stage 4 marks the kind + // engine-local in the diff tool, so the diff tool advances past + // these events on either side without comparison. + // + // We do NOT verify `entry.cs_ptr == cs_ptr` because canary and + // ours route guest-heap allocations to different VA regions + // (AUDIT-043 ε host-allocator divergence). Trust the + // `(tid, tid_event_idx)` alignment instead; if we got here, the + // manifest hit at the same per-tid call-site as canary's + // contention.observed. + let guest_cycle = ctx.cycle_count; + crate::event_log::emit_contention_observed( + current_tid, + guest_cycle, + cs_ptr, + true, + ); + if entry.cs_ptr != cs_ptr { + tracing::debug!( + "manifest cs_ptr cross-engine divergence at tid={} idx={}: manifest {:#010x}, ours {:#010x} (allocator ε)", + current_tid, + entry.tid_event_idx, + entry.cs_ptr, + cs_ptr, + ); + } + // Stage 3 aggressive mode: force-park even when CS is free in + // guest memory. The bet is that some other guest tid will + // naturally acquire+release this CS during ours's park window, + // triggering the natural wake at lines 2972-2980. If no peer + // touches the CS, `Scheduler::unblock_on_deadlock` recovers via + // its existing CriticalSection-blocked wake path (returning + // with owner=0 and any state divergence surfacing as a + // downstream trace mismatch rather than a silent hang). + // + // The conservative skip-when-free variant (the plan's "deadlock + // safe" branch) keeps the prefix at 104,607 because it doesn't + // actually shift behavior at the contention point. Aggressive + // mode tests whether driving the contention path is enough to + // advance past the cap. Gate via `XENIA_CONTENTION_AGGRESSIVE=1` + // so we can flip without rebuilding. + let aggressive = std::env::var("XENIA_CONTENTION_AGGRESSIVE") + .ok() + .is_some_and(|v| { + let v = v.trim().to_ascii_lowercase(); + v == "1" || v == "true" || v == "yes" + }); + let pre_owner = mem.read_u32(cs_ptr + CS_OFFS_OWNING_THREAD); + let pre_owner_live = pre_owner != 0 + && state.scheduler.find_by_tid(pre_owner).is_some(); + let natural_contention = pre_owner_live && pre_owner != current_tid; + if aggressive && !natural_contention { + // Synthesize a forced-park via the same path as the natural + // contention branch below: bump lock_count, push self onto + // cs_waiters, then park. Note: we set owning_thread to a + // SENTINEL (current_tid) so that re-entries by the same tid + // see "self owns it" and recursion paths work; the natural + // wake path will overwrite owning_thread when it transfers + // the lock. (NB: this is a hack; only enabled by an env-var + // gate so the conservative default stays deadlock-safe.) + let lc = mem.read_u32(cs_ptr + CS_OFFS_LOCK_COUNT) as i32; + mem.write_u32(cs_ptr + CS_OFFS_LOCK_COUNT, (lc + 1) as u32); + let current_ref = state.scheduler.current_ref(); + state + .cs_waiters + .entry(cs_ptr) + .or_default() + .push(current_ref); + tracing::debug!( + "manifest AGGRESSIVE force-park: hw={} cs={:#010x} tid={} idx={} (owner was {})", + current_ref.hw_id, + cs_ptr, + current_tid, + entry.tid_event_idx, + pre_owner, + ); + ctx.gpr[3] = 0; + state + .scheduler + .park_current(BlockReason::CriticalSection(cs_ptr)); + return; + } + if !natural_contention { + tracing::debug!( + "manifest hit at tid={} idx={} cs={:#010x} but CS is free/self-owned (owner={}); replay skipped (state-divergence, not schedule-divergence)", + current_tid, + entry.tid_event_idx, + cs_ptr, + pre_owner, + ); + // Fall through to natural fast-path. + } + // If natural contention conditions ARE met, fall through to the + // existing park path below. + } + let owner = mem.read_u32(cs_ptr + CS_OFFS_OWNING_THREAD); // "Effective owner" — if the stored tid doesn't correspond to any live HW @@ -2382,10 +3608,79 @@ fn rtl_fill_memory_ulong(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut K } } -fn rtl_image_xex_header_field(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { - // r3 = xex_header_ptr, r4 = field_id - // Return 0 for all fields - ctx.gpr[3] = 0; +fn rtl_image_xex_header_field(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = xex_header_guest_ptr (may be NULL — game's CRT often passes 0 + // because ours's `*XexExecutableModuleHandle = image_base` doesn't + // resolve to a real LDR_DATA_TABLE_ENTRY, so its `*(hmodule + 0x58)` + // deref yields PE OptionalHeader bytes instead of a header pointer; + // those bytes fail the game's validation and the call goes through + // with ptr=NULL). When NULL, fall back to KernelState's recorded + // `xex_header_guest_ptr` (the guest-VA of the raw XEX header copy + // set up in `xenia-app::cmd_exec`'s Phase 3, mirroring canary's + // `user_module.cc:223-227` `guest_xex_header_`). + // r4 = field_key (xex2_header_keys). + // + // Mirror of canary's `xboxkrnl_rtl.cc:501-514` → + // `UserModule::GetOptHeader(memory, header, key, &field_value)` + // (`user_module.cc:335-369`). Iterates `header->headers[]` (flat + // array of (key:u32, value:u32) pairs, both BE), and for the first + // entry where `opt_header.key == key` returns one of: + // * key & 0xFF == 0x00 → `opt_header.value` (inline value). + // * key & 0xFF == 0x01 → guest VA of `opt_header.value` itself. + // * else → `header_base + opt_header.offset` + // i.e. guest VA inside the header of the referenced data block. + // Returns 0 if the resolved header pointer is NULL or the key is + // not found. + let mut xex_header_ptr = ctx.gpr[3] as u32; + let field_key = ctx.gpr[4] as u32; + if xex_header_ptr == 0 { + xex_header_ptr = state.xex_header_guest_ptr; + } + if xex_header_ptr == 0 { + ctx.gpr[3] = 0; + return; + } + // xex2_header layout (raw, BE; see xenia-canary `xex2_info.h`): + // +0x00 magic ("XEX2"), +0x04 module_flags, +0x08 header_size, + // +0x0C reserved, +0x10 security_offset, +0x14 header_count, + // +0x18.. array of (key:u32, value:u32) pairs. + let header_count = mem.read_u32(xex_header_ptr.wrapping_add(0x14)); + let entries_base = xex_header_ptr.wrapping_add(0x18); + let mut field_value: u32 = 0; + let mut found = false; + for i in 0..header_count { + let entry_addr = entries_base.wrapping_add(i.wrapping_mul(8)); + let entry_key = mem.read_u32(entry_addr); + if entry_key != field_key { + continue; + } + found = true; + let entry_value_addr = entry_addr.wrapping_add(4); + match entry_key & 0xFF { + 0x00 => { + // Inline value. + field_value = mem.read_u32(entry_value_addr); + } + 0x01 => { + // Pointer to the inline value slot itself. + field_value = entry_value_addr; + } + _ => { + // Offset within the header. `opt_header.value` here is the + // file offset of the optional data block, which canary + // copied verbatim into guest memory at `xex_header_ptr`, + // so `xex_header_ptr + offset` is the in-guest VA. + let offset = mem.read_u32(entry_value_addr); + field_value = xex_header_ptr.wrapping_add(offset); + } + } + break; + } + if !found { + ctx.gpr[3] = 0; + return; + } + ctx.gpr[3] = field_value as u64; } fn rtl_multi_byte_to_unicode_n(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { @@ -2410,13 +3705,19 @@ fn rtl_multi_byte_to_unicode_n(ctx: &mut PpcContext, mem: &GuestMemory, _state: } fn rtl_nt_status_to_dos_error(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { - // Simple mapping for common cases + // NTSTATUS → Win32 ERROR_* translation. Canary's + // `RtlNtStatusToDosError` mirrors the documented Windows + // implementation; the subset below covers the codes Sylpheed + // surfaces in the Phase A diff window. Add new mappings as new + // divergences appear rather than synthesising a giant table up-front. let status = ctx.gpr[3] as u32; ctx.gpr[3] = match status { - 0 => 0, // ERROR_SUCCESS - 0xC000_0034 => 2, // ERROR_FILE_NOT_FOUND - 0xC000_0011 => 38, // ERROR_HANDLE_EOF - _ => status as u64, // Pass through + 0x0000_0000 => 0, // STATUS_SUCCESS → ERROR_SUCCESS + 0xC000_000F => 2, // STATUS_NO_SUCH_FILE → ERROR_FILE_NOT_FOUND + 0xC000_0011 => 38, // STATUS_END_OF_FILE → ERROR_HANDLE_EOF + 0xC000_0034 => 2, // STATUS_OBJECT_NAME_NOT_FOUND → ERROR_FILE_NOT_FOUND + 0xC000_0035 => 183, // STATUS_OBJECT_NAME_COLLISION → ERROR_ALREADY_EXISTS + _ => status as u64, // Pass through }; } @@ -2711,6 +4012,27 @@ fn vd_query_video_mode(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut Ker ctx.gpr[3] = 0; } +/// Phase C+23: mirror canary's `VdQueryVideoFlags_entry` +/// (`xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc:231-241`). +/// +/// Canary computes a bitmask from the queried video mode: +/// bit 0 (0x1) — `is_widescreen` (cvar `widescreen`, default true) +/// bit 1 (0x2) — `display_width >= 1280` (HD) +/// bit 2 (0x4) — `display_width >= 1920` (Full HD) +/// +/// Ours's `vd_query_video_mode` reports `display_width=1280` and +/// `is_widescreen=1` (the canary defaults), so the canary-equivalent +/// return value is `0x1 | 0x2 = 3`. This matches the cold-vs-cold +/// observation at main matched-prefix idx 105,138 (canary returns `3`). +/// +/// A future Vd-subsystem session can swap this for actual cvar-driven +/// logic; for now the constant return value mirrors canary 1:1 under +/// the shipping defaults. +fn vd_query_video_flags(ctx: &mut PpcContext, _mem: &GuestMemory, _state: &mut KernelState) { + // is_widescreen=1, display_width=1280 → bits 0 + 1 = 3 + ctx.gpr[3] = 0x3; +} + fn vd_get_system_command_buffer( ctx: &mut PpcContext, mem: &GuestMemory, @@ -3233,10 +4555,24 @@ fn xaudio_register_render_driver(ctx: &mut PpcContext, mem: &GuestMemory, state: state.xaudio.worker_refs[index] = Some(r); } + // Phase HostAudioEager (2026-05-19): mirror canary's + // `client_semaphore->Release(queued_frames_=8)` at + // `audio_system.cc:210` — seed the audio fire queue immediately so + // the round prologue's `try_inject_audio_callback` delivers the + // first callback within a few rounds of register-return, BEFORE + // tid=1 reaches `ExCreateThread` for the XAudio worker threads + // (tid=14/15 in canary, tid=9/10 in ours). Pre-fix, the 48k- + // instruction ticker delay let those threads spawn and enter their + // spin loop on the uninitialized voice struct before any callback + // fired. See `audit-runs/phase-host-audio-eager/investigation.md`. + let seeded = state + .xaudio + .seed_fires_for(index, crate::xaudio::XAUDIO_REGISTER_SEED_FIRES); + tracing::info!( - "XAudioRegisterRenderDriverClient: index={} callback={:#010x} arg={:#010x} wrapped={:#010x} driver={:#010x} worker_handle={:?}", + "XAudioRegisterRenderDriverClient: index={} callback={:#010x} arg={:#010x} wrapped={:#010x} driver={:#010x} worker_handle={:?} seeded_fires={}", index, callback_pc, callback_arg, wrapped, driver_id, - state.xaudio.worker_handles[index], + state.xaudio.worker_handles[index], seeded, ); ctx.gpr[3] = 0; } @@ -3266,6 +4602,78 @@ fn xma_create_context(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut Kern ctx.gpr[3] = handle as u64; } +// ===== Crypto ===== + +/// Mirrors xenia-canary `XeCryptSha_entry` (xboxkrnl_crypt.cc:469-489): +/// 3-input SHA-1 accumulator. Each of the three (ptr, size) pairs is +/// processed only when both ptr and size are non-zero. The resulting +/// 20-byte digest is copied to `output`, truncated to `output_size`. +/// Void return (registered via `register_void_export`). +fn xe_crypt_sha(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { + use sha1::{Digest, Sha1}; + let input_1 = ctx.gpr[3] as u32; + let input_1_size = ctx.gpr[4] as u32; + let input_2 = ctx.gpr[5] as u32; + let input_2_size = ctx.gpr[6] as u32; + let input_3 = ctx.gpr[7] as u32; + let input_3_size = ctx.gpr[8] as u32; + let output = ctx.gpr[9] as u32; + let output_size = ctx.gpr[10] as u32; + let mut hasher = Sha1::new(); + for (ptr, size) in [ + (input_1, input_1_size), + (input_2, input_2_size), + (input_3, input_3_size), + ] { + if ptr != 0 && size != 0 { + let mut buf = vec![0u8; size as usize]; + mem.read_bytes(ptr, &mut buf); + hasher.update(&buf); + } + } + let digest = hasher.finalize(); + let n = std::cmp::min(20, output_size as usize); + if output != 0 && n != 0 { + mem.write_bytes(output, &digest[..n]); + } +} + +/// Mirrors xenia-canary `XeKeysConsolePrivateKeySign_entry` +/// (xboxkrnl_crypt.cc:1111-1138): writes a hardcoded fake +/// `XE_CONSOLE_CERTIFICATE` (0x1A8 bytes) to `output` and returns 1 +/// (success). Returns 0 if either pointer is null. The 5-byte +/// `XE_CONSOLE_ID` bit-field at offset 0x02 is laid out per MSVC +/// `#pragma pack(1)` semantics; we write the precomputed bytes +/// directly to avoid bit-fiddling ambiguity. +fn xe_keys_console_private_key_sign( + ctx: &mut PpcContext, + mem: &GuestMemory, + _state: &mut KernelState, +) { + let hash = ctx.gpr[3] as u32; + let output = ctx.gpr[4] as u32; + if hash == 0 || output == 0 { + ctx.gpr[3] = 0; + return; + } + // Zero the 0x1A8-byte struct first (canary calls `output.Zero()`). + let zeros = [0u8; 0x1A8]; + mem.write_bytes(output, &zeros); + // XE_CONSOLE_ID at offset 0x02 (5 bytes, MSVC pack(1) bit-fields). + // RefurbBits = 0b0011, ManufactureMonth = 0b1001 → byte 0 = 0x93 + // ManufactureYear = 1, MacIndex3 = 0x40, MacIndex4 = 0x66, + // MacIndex5 = 0x7E, Crc = 0 → bytes 1..5 = 0x01,0x64,0xE6,0x07 + // (LSB-first packing of the 32-bit storage unit at offset 1.) + let console_id = [0x93u8, 0x01, 0x64, 0xE6, 0x07]; + mem.write_bytes(output + 0x02, &console_id); + // console_type (u32 BE) at 0x18 → Retail = 2 + mem.write_u32(output + 0x18, 2); + // manufacture_date[8] at 0x1C + let mfg_date = [2u8, 0, 0, 5, 1, 1, 2, 2]; + mem.write_bytes(output + 0x1C, &mfg_date); + ctx.gpr[3] = 1; +} + // ===== Xex ===== /// Mirrors xenia-canary `XexCheckExecutablePrivilege_entry` @@ -3503,14 +4911,20 @@ pub(crate) fn parse_timeout(state: &KernelState, timeout_ptr: u32, mem: &GuestMe /// running its real body, leaving the main thread parked forever on the /// completion event. fn resolve_pseudo_handle(state: &KernelState, handle: u32) -> u32 { - match handle { + let raw = match handle { 0xFFFF_FFFF => 0, 0xFFFF_FFFE => { let hw_id = state.scheduler.current_hw_id().unwrap_or(0); state.scheduler.thread_handle(hw_id).unwrap_or(0) } h => h, - } + }; + // Phase C+19: canonicalize through the dup-alias map so every Nt*/Ke* + // call site that funnels through `resolve_pseudo_handle` (18 sites at + // C+19 landing) automatically routes dup ids back to their source + // slot before indexing `state.objects`. Preserves AUDIT-062's + // signal-on-dup-wakes-wait-on-source invariant. + state.resolve_handle(raw) } /// Lazily register a shadow kernel object for a guest `PKEVENT` / `PKSEMAPHORE` @@ -3590,13 +5004,62 @@ fn ensure_dispatcher_object(state: &mut KernelState, mem: &GuestMemory, ptr: u32 }, _ => return, }; + // Phase C+17: object_type for the schema-v1 `handle.create` emit + // below. Must match `KernelObject::schema_object_type` exactly so + // re-entrant lookups via `lookup_handle_semantic_id` resolve a SID + // computed from the same tuple `(create_site_pc=0, tid, idx, type)`. + let object_type = obj.schema_object_type(); state.objects.insert(ptr, obj); + // Phase C+17: each fresh shadow gets a baseline refcount of 1 so + // the lifecycle bookkeeping is symmetric with `alloc_handle_for`. + // No `handle.destroy` is currently emitted on shadow removal — + // canary's `GetNativeObject` lazy-wrap likewise survives for the + // session — but the entry's presence guards against + // accidental-underflow when future code wires the symmetric destroy. + state.handle_refcount.entry(ptr).or_insert(1); // Mirror canary `XObject::StashHandle` (xobject.h:253-256): on first // adoption, stamp the X_DISPATCH_HEADER's wait_list with the kXObjSignature // fourcc 'X','E','N','\0' (flink_ptr) and the stash handle (blink_ptr). // Game code reads these to recognize already-adopted dispatchers. mem.write_u32(ptr + 0x08, 0x58454E00); mem.write_u32(ptr + 0x0C, ptr); + // Phase C+17: schema-v1 `handle.create` event for the synthesized + // wrapper. Mirrors canary's `ObjectTable::AddHandle` emit + // (util/object_table.cc:191-198) inside `XObject::GetNativeObject` + // (xobject.cc:436-449). The `raw_handle_id` is the guest dispatcher + // pointer itself — ours uses it as the shadow's handle key, and + // canary's `StashHandle` likewise round-trips through the same + // dispatcher slot, so cross-engine SID identity is independent of + // the concrete value. Cvar-gated default-off via + // `event_log::is_enabled()`. Registers the SID in the global + // registry so the immediately-following `wait.begin` resolves a + // non-zero `handles_semantic_ids` element. + // + // Phase C+18: use `emit_handle_create_shared_global` so the SID is + // **scheduling-invariant** — depends only on `(pointer, object_type)`. + // The dispatcher at this pointer is process-global; whichever guest + // thread happens to be the first toucher synthesizes the wrapper, but + // which thread wins is timing-dependent. Per-thread `(tid, idx)`-keyed + // SIDs would diverge between canary and ours at the SID level; the + // diff tool also uses SID equality to cross-tid match the floating + // `handle.create` event when the first-toucher is a different tid in + // each engine. See `event_log::semantic_id_shared_global` and the + // C+18 memory entry / schema-v1.md §"Shared-global SIDs". + if crate::event_log::is_enabled() { + let (tid, cycle) = if let Some(r) = state.scheduler.current { + let t = state.scheduler.thread(r); + (t.tid, t.ctx.timebase) + } else { + (0u32, 0u64) + }; + crate::event_log::emit_handle_create_shared_global( + tid, + cycle, + object_type, + ptr, + /* object_name */ None, + ); + } } /// Set `gpr[3]` on a just-woken HW thread to reflect which handle in its @@ -3622,9 +5085,90 @@ fn set_wake_status_for_waitany(state: &mut KernelState, r: ThreadRef, signaled_h } } +/// Iterate 2.T: classify a `HwState` for `wake.requested`. Pure read. +fn wake_classify_state(s: &xenia_cpu::scheduler::HwState) -> (&'static str, &'static str) { + use xenia_cpu::scheduler::{BlockReason, HwState}; + let kind = match s { + HwState::Blocked(BlockReason::WaitAny { .. }) + | HwState::ServicingIrq(BlockReason::WaitAny { .. }) => "WaitAny", + HwState::Blocked(BlockReason::WaitAll { .. }) + | HwState::ServicingIrq(BlockReason::WaitAll { .. }) => "WaitAll", + HwState::Blocked(_) | HwState::ServicingIrq(_) => "WaitSingle", + _ => "Other", + }; + let name = match s { + HwState::Ready => "Ready", + HwState::Blocked(_) => "Blocked", + HwState::Exited(_) => "Exited", + HwState::ServicingIrq(_) => "ServicingIrq", + HwState::Idle => "Idle", + }; + (kind, name) +} + +/// Iterate 2.T: capture (signaling_tid, cycle) at wake-loop entry from +/// the currently-executing HW thread (the signal-call caller). +fn wake_signaling_ctx(state: &KernelState) -> (u32, u64) { + if let Some(r) = state.scheduler.current { + let t = state.scheduler.thread(r); + (t.tid, t.ctx.timebase) + } else { + (0u32, 0u64) + } +} + +/// Iterate 2.T: capture pre-wake snapshot of a waiter — its tid, hw_id, +/// and wait-kind classification — then emit a `wake.requested` event +/// after the wake call has produced its post-state. Pure observability; +/// no behavior change. Cvar-gated default-off via `event_log::is_enabled`. +fn emit_wake_requested_for( + state: &KernelState, + signaling_tid: u32, + cycle: u64, + target: ThreadRef, + handle: u32, + prior_wait_kind: &'static str, + prior_state_name: &'static str, +) { + if !crate::event_log::is_enabled() { + return; + } + let Some(slot) = state.scheduler.slots.get(target.hw_id as usize) else { + return; + }; + let Some(t) = slot.runqueue.get(target.idx as usize) else { + return; + }; + let (_post_kind, post_name) = wake_classify_state(&t.state); + let transitioned = prior_state_name == "Blocked" && post_name == "Ready"; + let new_state = if prior_state_name == "Ready" { + "AlreadyReady" + } else if post_name == "Ready" { + "Ready" + } else if post_name == "Blocked" { + "StillBlocked" + } else { + post_name + }; + crate::event_log::emit_wake_requested( + signaling_tid, + cycle, + t.tid, + handle, + prior_wait_kind, + transitioned, + new_state, + Some(target.hw_id), + ); +} + /// Wake all waiters whose predicate now holds on the given handle (manual /// reset fans out; auto-reset/semaphore wakes one and consumes). pub(crate) fn wake_eligible_waiters(state: &mut KernelState, handle: u32) { + // Iterate 2.T: capture signaler tid + cycle ONCE at entry. The wake + // loop below may iterate multiple times for semaphores; we want every + // wake.requested event in this fan-out attributed to the same caller. + let (signaling_tid, signaling_cycle) = wake_signaling_ctx(state); loop { let Some(obj) = state.objects.get_mut(&handle) else { return; @@ -3680,6 +5224,14 @@ pub(crate) fn wake_eligible_waiters(state: &mut KernelState, handle: u32) { // and stays signaled so every parked waiter clears. let list = std::mem::take(waiters); for w in list { + // Iterate 2.T: snapshot prior state BEFORE the wake. + let (prior_kind, prior_name) = state + .scheduler + .slots + .get(w.hw_id as usize) + .and_then(|s| s.runqueue.get(w.idx as usize)) + .map(|t| wake_classify_state(&t.state)) + .unwrap_or(("Other", "Other")); set_wake_status_for_waitany(state, w, handle); state.scheduler.wake_ref(w); handle_remove_waiter_everywhere(state, w); @@ -3690,6 +5242,10 @@ pub(crate) fn wake_eligible_waiters(state: &mut KernelState, handle: u32) { let status = state.scheduler.thread(w).ctx.gpr[3]; state.audit_wake(handle, 0, "wake_eligible_waiters/manual", status); } + emit_wake_requested_for( + state, signaling_tid, signaling_cycle, w, handle, + prior_kind, prior_name, + ); } return; } else { @@ -3698,6 +5254,14 @@ pub(crate) fn wake_eligible_waiters(state: &mut KernelState, handle: u32) { } _ => return, }; + // Iterate 2.T: snapshot prior state of the auto-wake winner. + let (prior_kind, prior_name) = state + .scheduler + .slots + .get(winner.hw_id as usize) + .and_then(|s| s.runqueue.get(winner.idx as usize)) + .map(|t| wake_classify_state(&t.state)) + .unwrap_or(("Other", "Other")); if consume { handle_consume(state, handle); } @@ -3708,63 +5272,143 @@ pub(crate) fn wake_eligible_waiters(state: &mut KernelState, handle: u32) { let status = state.scheduler.thread(winner).ctx.gpr[3]; state.audit_wake(handle, 0, "wake_eligible_waiters/auto", status); } + emit_wake_requested_for( + state, signaling_tid, signaling_cycle, winner, handle, + prior_kind, prior_name, + ); // continue loop for semaphores that may wake more } } +/// Iterate 2.Q: snapshot the (tids, count) of guest threads currently +/// parked on `handle`'s waiter list, BEFORE any signal-driven wake fans +/// out. Returns `(Vec, count)`. Empty when the handle is unknown, +/// doesn't carry a waiter list (File), or has no waiters. Pure read — +/// no behavior change. Used solely to feed `emit_signal_match`. +fn snapshot_waiters_for_signal(state: &KernelState, handle: u32) -> (Vec, usize) { + let obj = match state.objects.get(&handle) { + Some(o) => o, + None => return (Vec::new(), 0), + }; + let waiters: &[ThreadRef] = match obj { + KernelObject::Event { waiters, .. } + | KernelObject::Semaphore { waiters, .. } + | KernelObject::Thread { waiters, .. } + | KernelObject::Timer { waiters, .. } + | KernelObject::Mutex { waiters, .. } + | KernelObject::NotifyListener { waiters, .. } => waiters.as_slice(), + KernelObject::File { .. } => return (Vec::new(), 0), + }; + let tids: Vec = waiters + .iter() + .map(|r| state.scheduler.thread(*r).tid) + .collect(); + let n = tids.len(); + (tids, n) +} + +/// Iterate 2.Q: signal-emit shim — gather waiter snapshot + cycle and +/// emit a `signal.match` event. No-op when `event_log` is disabled or +/// when zero waiters are parked (per 2.Q scope: don't pollute the trace +/// with spurious-target signals). +fn emit_signal_match_if_waiters( + state: &KernelState, + signal_call: &'static str, + target_handle: u32, +) { + if !crate::event_log::is_enabled() { + return; + } + let (tids, n) = snapshot_waiters_for_signal(state, target_handle); + if n == 0 { + return; + } + let (tid, cycle) = if let Some(r) = state.scheduler.current { + let t = state.scheduler.thread(r); + (t.tid, t.ctx.timebase) + } else { + (0u32, 0u64) + }; + crate::event_log::emit_signal_match(tid, cycle, signal_call, target_handle, n, &tids); +} + fn ke_set_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = PKEVENT on Ke* (guest pointer). See `ensure_dispatcher_object` // for why we need the lazy-shadow step here. let h = ctx.gpr[3] as u32; ensure_dispatcher_object(state, mem, h); - let previous = match state.objects.get_mut(&h) { + // Canary parity (xevent.cc:60-64): `XEvent::Set` returns constant `1` + // on success, NOT the prior signaled state as the NT contract claims. + // We compute `previous` for internal bookkeeping (audit_signal, + // wake_eligible_waiters honor the prior-state read), but report + // `1` for success / `0` for "no dispatcher found" to match the + // canary Phase A oracle. See Phase C+7 investigation.md. + let (previous, found) = match state.objects.get_mut(&h) { Some(KernelObject::Event { signaled, .. }) => { let prev = *signaled; *signaled = true; - prev as u32 + (prev as u32, true) } - _ => 0, + _ => (0u32, false), }; state.audit_signal(h, ctx.lr as u32, "KeSetEvent", previous as u64); + emit_signal_match_if_waiters(state, "KeSetEvent", h); wake_eligible_waiters(state, h); - ctx.gpr[3] = previous as u64; + ctx.gpr[3] = if found { 1 } else { 0 }; } fn ke_reset_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { + // r3 = PKEVENT on Ke* (guest pointer). See `ensure_dispatcher_object` + // for the lazy-shadow step. let h = ctx.gpr[3] as u32; ensure_dispatcher_object(state, mem, h); - let previous = match state.objects.get_mut(&h) { + // Canary parity (xevent.cc:72-75): `XEvent::Reset` returns constant `1` + // on success — exact sibling of `XEvent::Set`. The NT contract claims + // the prior signaled state, but canary hardcodes `1` and the game + // observes that value via Phase A oracle at idx=102164. Sibling fix + // of Phase C+7 KeSetEvent (xevent.cc:60-64). The `assert_always; + // return 0` arm is preserved (no shadow → 0). + let (previous, found) = match state.objects.get_mut(&h) { Some(KernelObject::Event { signaled, .. }) => { let prev = *signaled; *signaled = false; - prev as u32 + (prev as u32, true) } - _ => 0, + _ => (0u32, false), }; - ctx.gpr[3] = previous as u64; + state.audit_signal(h, ctx.lr as u32, "KeResetEvent", previous as u64); + ctx.gpr[3] = if found { 1 } else { 0 }; } fn nt_set_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { - let handle = ctx.gpr[3] as u32; + // Phase C+19: canonicalize dup ids → source so signal-on-dup wakes + // wait-on-source (AUDIT-062 invariant). + let handle = state.resolve_handle(ctx.gpr[3] as u32); let prev_ptr = ctx.gpr[4] as u32; - let previous = match state.objects.get_mut(&handle) { + // Canary parity (xboxkrnl_threading.cc:610-628): the optional out-pointer + // is filled with `was_signalled` = `ev->Set()` = constant 1 (see + // xevent.cc:60-64), NOT the prior signaled state. r3 carries + // STATUS_SUCCESS. We retain `previous` for internal audit/wake plumbing. + let (previous, found) = match state.objects.get_mut(&handle) { Some(KernelObject::Event { signaled, .. }) => { let prev = *signaled; *signaled = true; - prev as u32 + (prev as u32, true) } - _ => 0, + _ => (0u32, false), }; state.audit_signal(handle, ctx.lr as u32, "NtSetEvent", previous as u64); + emit_signal_match_if_waiters(state, "NtSetEvent", handle); wake_eligible_waiters(state, handle); - if prev_ptr != 0 { - mem.write_u32(prev_ptr, previous); + if prev_ptr != 0 && found { + mem.write_u32(prev_ptr, 1); } ctx.gpr[3] = STATUS_SUCCESS; } fn nt_clear_event(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut KernelState) { - let handle = ctx.gpr[3] as u32; + // Phase C+19: canonicalize dup ids → source. + let handle = state.resolve_handle(ctx.gpr[3] as u32); if let Some(KernelObject::Event { signaled, .. }) = state.objects.get_mut(&handle) { *signaled = false; } @@ -3855,6 +5499,7 @@ fn ke_release_semaphore(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut Ker _ => 0, }; state.audit_signal(h, ctx.lr as u32, "KeReleaseSemaphore", previous as u64); + emit_signal_match_if_waiters(state, "KeReleaseSemaphore", h); wake_eligible_waiters(state, h); ctx.gpr[3] = previous as u64; } @@ -3884,6 +5529,7 @@ fn nt_release_semaphore(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut Ker }; state.audit_signal(handle, ctx.lr as u32, "NtReleaseSemaphore", previous as u64); if status == STATUS_SUCCESS { + emit_signal_match_if_waiters(state, "NtReleaseSemaphore", handle); wake_eligible_waiters(state, handle); } if prev_ptr != 0 { @@ -4015,10 +5661,53 @@ fn nt_wait_for_single_object_ex( ) { // r3 = handle, r4 = wait_mode, r5 = alertable, r6 = timeout_ptr let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let alertable = ctx.gpr[5] != 0; let timeout_ptr = ctx.gpr[6] as u32; + // Phase C+15-α: schema-v1 `wait.begin` event. Emitted BEFORE + // `do_wait_single` to surface the wait initiation regardless of + // synchronous vs. parked outcome. `wait.end` is deferred (the + // synchronous status is already captured in the + // immediately-following `kernel.return`). Canary's symmetric emit + // is at `NtWaitForSingleObjectEx_entry` body. + if crate::event_log::is_enabled() { + let timeout_ns = decode_timeout_ns(mem, timeout_ptr); + let sid = crate::event_log::lookup_handle_semantic_id(handle); + let (tid, cycle) = { + let r = state.scheduler.current_ref(); + let t = state.scheduler.thread(r); + (t.tid, t.ctx.timebase) + }; + crate::event_log::emit_wait_begin( + tid, + cycle, + &[sid], + timeout_ns, + alertable, + /* wait_all */ false, + ); + } do_wait_single(ctx, state, handle, timeout_ptr, mem); } +/// Phase C+15-α helper: decode a TIMEOUT* big-endian i64 to ns for the +/// schema-v1 `wait.begin` payload. `timeout_ptr == 0` → INFINITE +/// (encoded as -1 per schema). NT TIMEOUT units are 100ns. Negative +/// values are relative (timeout from now); positive values are +/// absolute deadlines. For simplicity (and to mirror canary's +/// emission), we report the **raw** ticks unscaled; the diff tool +/// only compares values, not their meaning. Encoding into ns matches +/// schema-v1 field name; precise unit-conversion isn't required for +/// cross-engine equality. +fn decode_timeout_ns(mem: &GuestMemory, timeout_ptr: u32) -> i64 { + if timeout_ptr == 0 { + return -1; + } + let raw = mem.read_u64(timeout_ptr) as i64; + // NT TIMEOUT is 100ns ticks. Convert to ns; saturating to avoid + // wraparound on extreme values. + raw.saturating_mul(100) +} + /// `NtSignalAndWaitForSingleObjectEx(signal_handle, wait_handle, wait_mode, /// alertable, timeout_ptr)` — atomically signal one kernel object and wait on /// another. Matches Canary's `NtSignalAndWaitForSingleObjectEx_entry` @@ -4083,7 +5772,27 @@ fn ke_wait_for_single_object( let handle = resolve_pseudo_handle(state, ctx.gpr[3] as u32); ensure_dispatcher_object(state, mem, handle); refresh_pkevent_shadow_from_guest(state, mem, handle); + let alertable = ctx.gpr[6] != 0; let timeout_ptr = ctx.gpr[7] as u32; + // Phase C+15-α: schema-v1 `wait.begin` event. Symmetric counterpart + // in canary at `xeKeWaitForSingleObject`. + if crate::event_log::is_enabled() { + let timeout_ns = decode_timeout_ns(mem, timeout_ptr); + let sid = crate::event_log::lookup_handle_semantic_id(handle); + let (tid, cycle) = { + let r = state.scheduler.current_ref(); + let t = state.scheduler.thread(r); + (t.tid, t.ctx.timebase) + }; + crate::event_log::emit_wait_begin( + tid, + cycle, + &[sid], + timeout_ns, + alertable, + /* wait_all */ false, + ); + } do_wait_single(ctx, state, handle, timeout_ptr, mem); } @@ -4173,7 +5882,9 @@ fn ke_resume_thread(ctx: &mut PpcContext, _mem: &GuestMemory, state: &mut Kernel fn nt_resume_thread(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = handle, r4 = prev_suspend_count_ptr - let handle = ctx.gpr[3] as u32; + // Phase C+19: canonicalize dup ids → source so a duplicated thread + // handle (rare but legal) still resolves to the scheduler entry. + let handle = state.resolve_handle(ctx.gpr[3] as u32); let prev_ptr = ctx.gpr[4] as u32; let prev = state .scheduler @@ -4188,7 +5899,8 @@ fn nt_resume_thread(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelS fn nt_suspend_thread(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // r3 = handle, r4 = prev_suspend_count_ptr - let handle = ctx.gpr[3] as u32; + // Phase C+19: canonicalize dup ids → source. + let handle = state.resolve_handle(ctx.gpr[3] as u32); let prev_ptr = ctx.gpr[4] as u32; let prev = state .scheduler @@ -4250,10 +5962,22 @@ fn xex_get_module_handle(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut Ke /// * r4 = new_handle_ptr (if zero, the call is actually a close) /// * r5 = options (bit 0 = DUPLICATE_CLOSE_SOURCE) /// -/// Canary allocates a fresh handle id that refcounts the same underlying -/// `XObject`. We don't refcount, so we alias: write the *source* handle back -/// as the "new" handle. The game then uses it interchangeably, and both ids -/// resolve to the same `KernelObject` entry. +/// Canary's `ObjectTable::DuplicateHandle` (object_table.cc:210-223) allocates +/// a fresh slot via `AddHandle` (which retains the underlying `XObject` and +/// emits `handle.create`), returning the new slot id. Both source and dup +/// slots independently refcount the same `XObject`; closing one decrements +/// the slot's local count and, when zero, removes that slot. The underlying +/// object dies only when the last slot is gone. +/// +/// Phase C+19: ours mirrors this. Pre-C+19 we aliased `dup_id == source_id` +/// to avoid maintaining a separate refcount across distinct ids; AUDIT-062 +/// verified the wedge-case (signal-on-dup wakes wait-on-source) worked +/// because the ids collided into the same `state.objects` entry. Allocating +/// a fresh id surfaces the canary-symmetric `handle.create` Phase A event +/// at main idx=102553; the AUDIT-062 invariant is preserved by routing +/// every Nt*/Ke* lookup through `state.resolve_handle` which canonicalizes +/// the dup id back to the source — both ids still hit the same `KernelObject` +/// with the same `waiters` list and `signaled` flag. /// /// A prior `stub_success` left `*new_handle_ptr` uninitialized — Sylpheed's /// thread-dispatch prologue does `NtDuplicateObject(event, &dup)` then passes @@ -4261,34 +5985,70 @@ fn xex_get_module_handle(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut Ke /// completion. With the stub, `dup` was stack garbage → set-event lookup /// failed silently → main thread blocked forever on the source event. fn nt_duplicate_object(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { - let source = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + let raw_source = resolve_pseudo_handle(state, ctx.gpr[3] as u32); + // The guest may itself pass a dup id — canonicalize before validation + // so we always alias against the live `state.objects` entry. + let canonical = state.resolve_handle(raw_source); let out_ptr = ctx.gpr[4] as u32; let options = ctx.gpr[5] as u32; + const DUPLICATE_CLOSE_SOURCE: u32 = 0x0000_0001; - if !state.objects.contains_key(&source) { + if !state.objects.contains_key(&canonical) { if out_ptr != 0 { mem.write_u32(out_ptr, 0); } ctx.gpr[3] = STATUS_INVALID_HANDLE; return; } - if out_ptr != 0 { - mem.write_u32(out_ptr, source); - } - // Aliased-handle refcount: since we return the source handle as the "new" - // handle (no fresh id), every duplicate must bump the per-handle refcount - // so the later `NtClose` pair (one for source, one for dup) doesn't - // destroy the object mid-flight. `DUPLICATE_CLOSE_SOURCE` (bit 0) closes - // the source in Canary (xboxkrnl_ob.cc:389), so in our aliased model the - // source-close cancels the dup-gain: net refcount is unchanged. Without - // `CLOSE_SOURCE`, both the source and the dup are separately live and we - // need +1. - const DUPLICATE_CLOSE_SOURCE: u32 = 0x0000_0001; - if options & DUPLICATE_CLOSE_SOURCE == 0 - && let Some(c) = state.handle_refcount.get_mut(&source) + + // Allocate a fresh slot id. Canonical refcount bumps by one slot; + // the dup slot starts with a single local NtClose owed to it. + let dup_id = state.alloc_handle(); + state.handle_aliases.insert(dup_id, canonical); + state.handle_refcount.insert(dup_id, 1); + *state.canonical_slot_count.entry(canonical).or_insert(0) += 1; + + // Phase C+15-α schema-v1 `handle.create` event. Canary's symmetric path + // is `ObjectTable::AddHandle` (object_table.cc:198-204) which emits + // when called from inside `DuplicateHandle`. SID recipe = per-tid + // `(creating_tid, idx_at_creation, object_type)` — matches canary's + // `EmitHandleCreateAuto` exactly (event_log.cc), so the same logical + // dup pair produces the same SID across engines. + if crate::event_log::is_enabled() + && let Some(obj) = state.objects.get(&canonical) { - *c += 1; + let object_type = obj.schema_object_type(); + let (tid, cycle) = { + let r = state.scheduler.current_ref(); + let t = state.scheduler.thread(r); + (t.tid, t.ctx.timebase) + }; + crate::event_log::emit_handle_create_auto( + tid, + cycle, + /* create_site_pc */ 0, + object_type, + dup_id, + /* object_name */ None, + ); } + + if out_ptr != 0 { + mem.write_u32(out_ptr, dup_id); + } + + // DUPLICATE_CLOSE_SOURCE: canary additionally calls `RemoveHandle(handle)` + // (xboxkrnl_ob.cc:405-408) which decrements the source slot's refcount + // and — if zero — destroys the source slot (but leaves the underlying + // object alive through the dup). We mirror by routing the source through + // `close_handle_internal` so the symmetric `handle.destroy(source)` event + // fires at the canary-equivalent boundary. Note: the source value here + // is `raw_source` (the id the guest passed, post pseudo-handle resolve), + // NOT `canonical` — the close targets the *slot* the guest named. + if options & DUPLICATE_CLOSE_SOURCE != 0 { + close_handle_internal(state, raw_source); + } + ctx.gpr[3] = STATUS_SUCCESS; } @@ -4344,6 +6104,28 @@ mod tests { mem.alloc(SCRATCH_BASE, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE) .expect("scratch page must commit"); let mut state = KernelState::new(); + // Phase C+11 — the default cache root is now persistent, but + // tests must NOT share state. Override with a per-test tmpdir + // (unique by PID + monotonic counter + nanos) and wipe on + // entry. Mirrors the pre-flip AUDIT-038 behaviour for the + // test harness specifically. + static TEST_CACHE_ID: std::sync::atomic::AtomicU64 = + std::sync::atomic::AtomicU64::new(0); + let test_id = TEST_CACHE_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .subsec_nanos(); + let test_cache = std::env::temp_dir().join(format!( + "xenia-rs-test-cache-{}-{}-{}", + std::process::id(), + test_id, + nanos + )); + // Wipe any leftover, then install. + let _ = std::fs::remove_dir_all(&test_cache); + std::fs::create_dir_all(&test_cache).expect("test cache mkdir"); + state.set_cache_root(test_cache); // Under per-slot runqueues, most kernel exports reach through // `scheduler.current` — tests that exercise those paths need a // live thread installed on slot 0 first. Older tests (file I/O @@ -4423,12 +6205,21 @@ mod tests { // Confirm PCR was written by the spawn (sanity). assert_eq!(mem.read_u32(pcr_base + 0x2C), 1); - // Now call KeSetAffinityThread(handle=0x2000, new_mask=0x20). + // Now call KeSetAffinityThread(handle=0x2000, new_mask=0x20, + // prev_mask_ptr=scratch). Post Stage 2 Batch 3: r3=STATUS_SUCCESS, + // previous mask delivered via OUT-pointer. + let prev_ptr = SCRATCH_BASE + 0xA0; + mem.write_u32(prev_ptr, 0xFFFF_FFFF); // sentinel ctx.gpr[3] = 0x2000; ctx.gpr[4] = 0x20; // slot 5 only + ctx.gpr[5] = prev_ptr as u64; ke_set_affinity_thread(&mut ctx, &mut mem, &mut state); - // Return value = previous mask = 0x02. - assert_eq!(ctx.gpr[3], 0x02); + assert_eq!(ctx.gpr[3], 0, "must return STATUS_SUCCESS in r3"); + assert_eq!( + mem.read_u32(prev_ptr), + 0x02, + "previous affinity mask must be written to OUT-pointer" + ); // PCR rewritten to 5. assert_eq!(mem.read_u32(pcr_base + 0x2C), 5); // Thread now on slot 5. @@ -4436,20 +6227,95 @@ mod tests { assert_eq!(r.hw_id, 5); } - /// Axis 5: `KeSetIdealProcessor` stores a hint on the thread - /// without migrating it; query round-trips. + /// Stage 2 Batch 3: zero affinity must return STATUS_INVALID_PARAMETER + /// and not touch the OUT-pointer. #[test] - fn ke_set_ideal_processor_round_trips() { + fn ke_set_affinity_thread_zero_affinity_returns_invalid_parameter() { + let (mut ctx, mem, mut state) = fresh(); + let prev_ptr = SCRATCH_BASE + 0xA0; + mem.write_u32(prev_ptr, 0xDEAD_BEEF); + ctx.gpr[3] = 0x1000; // main handle + ctx.gpr[4] = 0; // zero affinity + ctx.gpr[5] = prev_ptr as u64; + ke_set_affinity_thread(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0xC000_000D, "STATUS_INVALID_PARAMETER"); + assert_eq!(mem.read_u32(prev_ptr), 0xDEAD_BEEF, "OUT-ptr untouched"); + } + + /// Stage 2 Batch 3: NULL OUT-pointer is valid (mirrors canary's + /// `if (previous_affinity_ptr)` guard); still returns SUCCESS and + /// migrates the thread. + #[test] + fn ke_set_affinity_thread_null_out_ptr_still_succeeds() { let (mut ctx, mut mem, mut state) = fresh(); - // Main thread handle is 0x1000. - ctx.gpr[3] = 0x1000; - ctx.gpr[4] = 3; - ke_set_ideal_processor(&mut ctx, &mut mem, &mut state); + use xenia_cpu::scheduler::SpawnParams; + let pcr_base = SCRATCH_BASE + 0x500; + let params = SpawnParams { + entry: 0x8200_0000, + start_context: 0, + stack_base: 0x7200_0000, + stack_size: 0x10000, + pcr_base, + tls_base: 0, + thread_handle: 0x2100, + guest_tid: 43, + create_suspended: false, + is_initial: false, + tls_slot_count: 0, + affinity_mask: 0b0000_0010, + priority: 0, + ideal_processor: None, + }; + state + .scheduler + .spawn(params, &mut crate::state::GuestMemoryPcr(&mut mem)) + .unwrap(); + ctx.gpr[3] = 0x2100; + ctx.gpr[4] = 0x10; // slot 4 + ctx.gpr[5] = 0; // NULL OUT-ptr + ke_set_affinity_thread(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "STATUS_SUCCESS even with NULL OUT-ptr"); + let r = state.scheduler.find_by_handle(0x2100).expect("alive"); + assert_eq!(r.hw_id, 4); + } + + /// Axis 5: scheduler-level ideal-processor hint round-trip via + /// `Scheduler::set_ideal_ref` / `ideal_ref`. The previous test + /// exercised `ke_set_ideal_processor` / `ke_query_ideal_processor` + /// which were hallucinated functions at the wrong ordinals — those + /// bodies were removed in Phase C+6½. The underlying scheduler + /// state still backs `NtSetInformationThread` info-class + /// `ThreadIdealProcessor`. + #[test] + fn scheduler_ideal_processor_round_trips() { + let (_, _, mut state) = fresh(); + let r = state.scheduler.find_by_handle(0x1000).expect("main alive"); // Prior was 0xFF (unset sentinel). - assert_eq!(ctx.gpr[3], 0xFF); - ctx.gpr[3] = 0x1000; - ke_query_ideal_processor(&mut ctx, &mut mem, &mut state); - assert_eq!(ctx.gpr[3], 3); + let prev = state.scheduler.set_ideal_ref(r, 3); + assert_eq!(prev, 0xFF); + let queried = state.scheduler.ideal_ref(r); + assert_eq!(queried, Some(3)); + } + + /// Phase C+6½: `KeQueryInterruptTime` (ord 0x82) returns a + /// non-zero monotonic u64 in gpr[3]. Previously this ord was + /// mis-labeled `KeQueryIdealProcessor` and returned a 1-byte + /// processor index — guests querying the system interrupt-time + /// counter received the wrong value. + #[test] + fn ke_query_interrupt_time_returns_synthetic_u64() { + let (mut ctx, mut mem, mut state) = fresh(); + // Pre-clear gpr[3] so we know the function wrote it. + ctx.gpr[3] = 0; + ke_query_interrupt_time(&mut ctx, &mut mem, &mut state); + assert_ne!(ctx.gpr[3], 0, "interrupt time must be non-zero"); + // Should be 64-bit (above u32::MAX) to ensure it's not + // truncated to a processor-index byte. + assert!( + ctx.gpr[3] > 0xFFFF_FFFF, + "interrupt time must occupy 64 bits, got {:#x}", + ctx.gpr[3] + ); } /// Axis 5: `NtSetInformationThread` class `ThreadAffinityMask` @@ -4660,6 +6526,94 @@ mod tests { assert!(event_signaled(&state, evt), "write must signal too"); } + /// Phase C+5 — async-opened files (no `FILE_SYNCHRONOUS_IO_*` bit in + /// `create_options`) return `STATUS_PENDING` (0x103) from + /// `NtWriteFile`. The synchronous write still completes and + /// IO_STATUS_BLOCK still records STATUS_SUCCESS — only the function + /// return value flips. Mirrors canary + /// `xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc:351-353`. + #[test] + fn nt_write_file_async_handle_returns_status_pending() { + let (mut ctx, mut mem, mut state) = fresh(); + // Pre-register an "async" file handle the same way `open_vfs_file` + // does for a file whose `create_options` omits sync bits. + let handle = state.alloc_handle_for(KernelObject::File { + path: "async.tmp".to_string(), + size: 0, + position: 0, + data: std::sync::Arc::new(Vec::new()), + dir_enum_pos: None, + host_path: None, + }); + state.async_file_handles.insert(handle); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 0; // no event + ctx.gpr[7] = SCRATCH_BASE as u64; // iosb at scratch base + ctx.gpr[9] = 8; // length + nt_write_file(&mut ctx, &mut mem, &mut state); + assert_eq!( + ctx.gpr[3], STATUS_PENDING, + "async-opened file: r3 must return STATUS_PENDING (0x103)" + ); + assert_eq!( + mem.read_u32(SCRATCH_BASE), + STATUS_SUCCESS as u32, + "IO_STATUS_BLOCK.status still records STATUS_SUCCESS" + ); + assert_eq!( + mem.read_u32(SCRATCH_BASE + 4), + 8, + "IO_STATUS_BLOCK.information records bytes written" + ); + } + + /// Sync-opened files (one of `FILE_SYNCHRONOUS_IO_*` bits set in + /// `create_options`) retain the legacy `STATUS_SUCCESS` return. + #[test] + fn nt_write_file_sync_handle_returns_status_success() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::File { + path: "sync.tmp".to_string(), + size: 0, + position: 0, + data: std::sync::Arc::new(Vec::new()), + dir_enum_pos: None, + host_path: None, + }); + // Not inserted into `async_file_handles` — sync handle by default. + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 0; + ctx.gpr[7] = SCRATCH_BASE as u64; + ctx.gpr[9] = 8; + nt_write_file(&mut ctx, &mut mem, &mut state); + assert_eq!( + ctx.gpr[3], STATUS_SUCCESS, + "sync-opened file: r3 must return STATUS_SUCCESS" + ); + } + + /// `nt_close` must prune the async-file side-table when the final + /// refcount drops to zero so a recycled handle isn't mis-classified. + #[test] + fn nt_close_prunes_async_file_set() { + let (mut ctx, mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::File { + path: "x.tmp".to_string(), + size: 0, + position: 0, + data: std::sync::Arc::new(Vec::new()), + dir_enum_pos: None, + host_path: None, + }); + state.async_file_handles.insert(handle); + ctx.gpr[3] = handle as u64; + nt_close(&mut ctx, &mem, &mut state); + assert!( + !state.async_file_handles.contains(&handle), + "nt_close must remove from async_file_handles" + ); + } + /// Verify `FileStandardInformation` reports `Directory=1` for empty-path /// (device-root) synthesized file handles. Sylpheed calls /// `NtCreateFile("game:\\")` then `NtQueryInformationFile` on the returned @@ -5023,8 +6977,13 @@ mod tests { write_dispatcher_header(&mut mem, kevent_ptr, 0, 1); // notification ctx.gpr[3] = kevent_ptr as u64; ke_reset_event(&mut ctx, &mut mem, &mut state); - // After reset, shadow exists and is unsignaled; gpr[3] reports previous=1. - assert_eq!(ctx.gpr[3], 1, "previous state must be reported"); + // After reset, shadow exists and is unsignaled. Post-C+8: gpr[3] + // reports canary-constant `1` on hit (xevent.cc:72-75 hardcodes + // `return 1`), NOT the prior signaled state — same value here by + // coincidence (prior state happens to be 1). The + // `ke_reset_event_returns_constant_one_on_unsignaled_*` tests below + // distinguish constant-return from prior-state-return. + assert_eq!(ctx.gpr[3], 1, "canary parity: KeResetEvent returns constant 1 on hit"); match state.objects.get(&kevent_ptr) { Some(KernelObject::Event { manual_reset, signaled, .. }) => { assert!(*manual_reset, "type=0 must be manual-reset"); @@ -5117,6 +7076,95 @@ mod tests { assert_eq!(mem.read_u32(ptr + 0x0C), 0); } + /// Phase C+17: first adoption of a guest dispatcher pointer via + /// `ensure_dispatcher_object` must seed `handle_refcount[ptr] = 1`, + /// mirroring canary's `ObjectTable::AddHandle` baseline + /// (object_table.cc:164). Symmetric to `alloc_handle_for` which + /// already does this for handle-based objects. + #[test] + fn ensure_dispatcher_object_initializes_handle_refcount_for_event() { + let (mut _ctx, mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0x800; + write_dispatcher_header(&mem, kevent_ptr, 1, 0); // synchronization + assert!(!state.handle_refcount.contains_key(&kevent_ptr)); + ensure_dispatcher_object(&mut state, &mem, kevent_ptr); + assert!(state.objects.contains_key(&kevent_ptr)); + assert_eq!( + state.handle_refcount.get(&kevent_ptr).copied(), + Some(1), + "fresh shadow must start with refcount 1" + ); + } + + /// Same baseline for semaphores. Header type=5 picks the + /// Semaphore branch; refcount is independent of count/max. + #[test] + fn ensure_dispatcher_object_initializes_handle_refcount_for_semaphore() { + let (mut _ctx, mem, mut state) = fresh(); + let sem_ptr = SCRATCH_BASE + 0x820; + write_dispatcher_header(&mem, sem_ptr, 5, 0); + mem.write_u32(sem_ptr + 0x10, 4); // Limit=4 + ensure_dispatcher_object(&mut state, &mem, sem_ptr); + assert!(matches!(state.objects.get(&sem_ptr), Some(KernelObject::Semaphore { .. }))); + assert_eq!(state.handle_refcount.get(&sem_ptr).copied(), Some(1)); + } + + /// Re-entry on the same pointer is a no-op: the early-return guard + /// at the top of `ensure_dispatcher_object` (contains_key check) + /// must NOT double-bump the refcount. Mirrors canary's + /// `kXObjSignature` short-circuit (xobject.cc:421-427). + #[test] + fn ensure_dispatcher_object_is_idempotent_on_repeated_touch() { + let (mut _ctx, mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0x840; + write_dispatcher_header(&mem, kevent_ptr, 0, 0); // notification + ensure_dispatcher_object(&mut state, &mem, kevent_ptr); + ensure_dispatcher_object(&mut state, &mem, kevent_ptr); + ensure_dispatcher_object(&mut state, &mem, kevent_ptr); + assert_eq!( + state.handle_refcount.get(&kevent_ptr).copied(), + Some(1), + "repeated ensure must not bump refcount" + ); + } + + /// Two distinct native pointers each get their own shadow and + /// their own refcount entry. Canary's `GetNativeObject` lazy-wraps + /// each dispatcher independently — there's no shared XObject for + /// distinct guest pointers. + #[test] + fn ensure_dispatcher_object_distinct_ptrs_get_distinct_refcount_entries() { + let (mut _ctx, mem, mut state) = fresh(); + let a = SCRATCH_BASE + 0x860; + let b = SCRATCH_BASE + 0x880; + write_dispatcher_header(&mem, a, 1, 0); + write_dispatcher_header(&mem, b, 5, 0); + mem.write_u32(b + 0x10, 2); + ensure_dispatcher_object(&mut state, &mem, a); + ensure_dispatcher_object(&mut state, &mem, b); + assert_eq!(state.handle_refcount.get(&a).copied(), Some(1)); + assert_eq!(state.handle_refcount.get(&b).copied(), Some(1)); + assert!(matches!(state.objects.get(&a), Some(KernelObject::Event { .. }))); + assert!(matches!(state.objects.get(&b), Some(KernelObject::Semaphore { .. }))); + } + + /// Unsupported dispatcher types (e.g., Mutant type=2 — canary's + /// `GetNativeObject` `assert_always`s on them) must leave both + /// `state.objects` AND `state.handle_refcount` untouched. The + /// early-return after the match guard prevents both insertions. + #[test] + fn ensure_dispatcher_object_unknown_type_does_not_touch_refcount() { + let (mut _ctx, mem, mut state) = fresh(); + let ptr = SCRATCH_BASE + 0x8A0; + write_dispatcher_header(&mem, ptr, 2, 0); // Mutant — unsupported + ensure_dispatcher_object(&mut state, &mem, ptr); + assert!(!state.objects.contains_key(&ptr)); + assert!( + !state.handle_refcount.contains_key(&ptr), + "no refcount entry for unsupported dispatcher type" + ); + } + /// Mirror canary `XObject::StashHandle` (xobject.h:253-256): on first /// adoption of a guest dispatcher, +0x08 must hold the 'X','E','N','\0' /// fourcc and +0x0C must hold the stash handle. @@ -6215,6 +8263,14 @@ mod tests { let obj_attrs = write_obj_attrs(&mem, SCRATCH_BASE + 0x100, "cache:\\rt.tmp"); let handle_out = SCRATCH_BASE + 0x300; let iosb = SCRATCH_BASE + 0x310; + // Phase C+5 — set sp so nt_create_file reads create_options from a + // committed scratch slot, and set the FILE_SYNCHRONOUS_IO_NONALERT + // bit so `NtWriteFile` returns `STATUS_SUCCESS` (legacy assertion). + // Files opened WITHOUT this bit return `STATUS_PENDING` after + // canary's xboxkrnl_io.cc:351-353 — covered by + // `nt_write_file_async_handle_returns_status_pending`. + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, FILE_SYNCHRONOUS_IO_NONALERT); ctx.gpr[3] = handle_out as u64; ctx.gpr[5] = obj_attrs as u64; ctx.gpr[6] = iosb as u64; @@ -6335,6 +8391,658 @@ mod tests { std::fs::remove_dir_all(&dir).ok(); } + /// Phase C+11 Stage 2 — when a `cache:\` file already exists + /// on disk as a regular file, re-opening it with the + /// `FILE_DIRECTORY_FILE` bit set MUST still route through the file + /// branch (host_path = Some) — the on-disk type wins. Pre-fix: + /// `is_dir_open = want_dir || host_path.is_dir()` would force + /// re-opens with bit 0x1 set into the dir branch, dropping + /// host_path and blocking subsequent class-10 renames. + #[test] + fn cache_existing_file_wins_over_directory_bit() { + let (mut ctx, mem, mut state) = fresh(); + let cache_root = state.cache_root.clone().unwrap(); + + // 1. FILE_CREATE without DIRECTORY bit → produces a real file. + let obj_attrs = write_obj_attrs(&mem, SCRATCH_BASE + 0x100, "cache:\\foo.tmp"); + let handle_out = SCRATCH_BASE + 0x300; + let iosb = SCRATCH_BASE + 0x310; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, FILE_SYNCHRONOUS_IO_NONALERT); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_CREATE as u64; + nt_create_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + assert!(cache_root.join("foo.tmp").is_file()); + + // 2. Re-open with FILE_DIRECTORY_FILE bit set in r7. + // open_options bit 0x1 = FILE_DIRECTORY_FILE. + // open_options bit 0x20 = FILE_SYNCHRONOUS_IO_NONALERT (keeps + // the handle synchronous so NtWriteFile returns STATUS_SUCCESS). + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[7] = (0x1 | FILE_SYNCHRONOUS_IO_NONALERT) as u64; + nt_open_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + let handle = mem.read_u32(handle_out); + + // 3. The re-opened handle MUST be a file handle with a real + // host_path, not a directory handle with host_path=None. + let obj = state.objects.get(&handle).expect("handle must exist"); + match obj { + KernelObject::File { host_path, path, .. } => { + assert!( + host_path.is_some(), + "existing file re-open must keep host_path (got None) — bug #2 regression" + ); + assert!( + !path.ends_with('/'), + "existing file re-open path must NOT have trailing '/' (got dir-shape) — bug #2 regression" + ); + } + _ => panic!("expected File kernel object"), + } + } + + /// Phase C+11 Stage 2 — `cache:\access`, `cache:\ignore`, and + /// `cache:\recent` are TOP-LEVEL files in canary's cache (per + /// the canary-cache-listing.csv enumeration). Cold creation + /// through ours should produce files, not directories. + #[test] + fn cache_top_level_manifests_create_as_files() { + for path_str in ["cache:\\access", "cache:\\ignore", "cache:\\recent"] { + let (mut ctx, mem, mut state) = fresh(); + let cache_root = state.cache_root.clone().unwrap(); + let leaf_name = path_str.strip_prefix("cache:\\").unwrap(); + + let obj_attrs = write_obj_attrs(&mem, SCRATCH_BASE + 0x100, path_str); + let handle_out = SCRATCH_BASE + 0x300; + let iosb = SCRATCH_BASE + 0x310; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + // Set FILE_NON_DIRECTORY_FILE explicitly so Sylpheed-style + // create paths produce host files. (If Sylpheed sets the + // DIRECTORY bit but no NON_DIRECTORY bit, the pre-fix code + // would mis-create as dirs; this test pins the + // bit-conflict-resolution policy.) + mem.write_u32( + SCRATCH_BASE + 0x700 + 0x54, + FILE_SYNCHRONOUS_IO_NONALERT | 0x40, // | FILE_NON_DIRECTORY_FILE + ); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_CREATE as u64; + nt_create_file(&mut ctx, &mem, &mut state); + assert_eq!( + ctx.gpr[3], STATUS_SUCCESS, + "FILE_CREATE on {} must succeed", + path_str + ); + assert!( + cache_root.join(leaf_name).is_file(), + "cache:\\{} must be a host file (got: dir or absent)", + leaf_name + ); + } + } + + /// Phase C+11.1 — Sylpheed's cold-boot probe pattern: open + /// `cache:\access` / `cache:\ignore` / `cache:\recent` with + /// disp=1 (FILE_OPEN) + opts=0x7 (DIRECTORY_FILE | WRITE_THROUGH + /// | SEQUENTIAL_ONLY) MUST return `STATUS_OBJECT_NAME_NOT_FOUND` + /// and MUST NOT create a host directory. Pre-fix the + /// `is_dir_open` branch unconditionally mkdir-p'd whenever + /// `want_dir`, which produced spurious `access`/`ignore`/`recent` + /// directories that then occluded later `disp=5 NON_DIRECTORY` + /// re-creates Sylpheed uses to populate the manifests. + /// Mirrors canary's `VirtualFileSystem::OpenFile` + /// (virtual_file_system.cc:265-273) which returns + /// `X_STATUS_OBJECT_NAME_NOT_FOUND` for `kOpen` on missing path, + /// regardless of `is_directory`. + #[test] + fn cache_open_directory_on_missing_path_returns_not_found() { + for path_str in ["cache:\\access", "cache:\\ignore", "cache:\\recent"] { + let (mut ctx, mem, mut state) = fresh(); + let cache_root = state.cache_root.clone().unwrap(); + let leaf_name = path_str.strip_prefix("cache:\\").unwrap(); + + let obj_attrs = write_obj_attrs(&mem, SCRATCH_BASE + 0x100, path_str); + let handle_out = SCRATCH_BASE + 0x300; + let iosb = SCRATCH_BASE + 0x310; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + // Sylpheed's exact cold-boot bit pattern: FILE_DIRECTORY_FILE + // (0x1) | FILE_WRITE_THROUGH (0x2) | FILE_SEQUENTIAL_ONLY (0x4) + // = 0x7. Slot offset 0x54 per the `nt_create_file` + // arg-marshalling. + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, 0x7); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_OPEN as u64; + // Clear any pre-existing handle slot so the assert is honest. + mem.write_u32(handle_out, 0xDEAD_BEEF); + nt_create_file(&mut ctx, &mem, &mut state); + assert_eq!( + ctx.gpr[3], STATUS_OBJECT_NAME_NOT_FOUND, + "FILE_OPEN+DIR on missing {} must return NOT_FOUND", + path_str + ); + assert_eq!( + mem.read_u32(handle_out), + 0, + "no handle on cold-boot dir-open miss for {}", + path_str + ); + assert!( + !cache_root.join(leaf_name).exists(), + "{} must NOT be created on disk by a non-create disp", + leaf_name + ); + } + } + + /// Phase C+11.1 — after the cold-boot NOT_FOUND probe (see + /// `cache_open_directory_on_missing_path_returns_not_found`), + /// Sylpheed re-issues `disp=FILE_OVERWRITE_IF (5)` with + /// `FILE_NON_DIRECTORY_FILE` set. That second call MUST produce + /// a regular file, not a directory. This pins the two-call + /// sequence canary actually executes on cold boot. + #[test] + fn cache_disp5_after_disp1_miss_creates_file() { + let (mut ctx, mem, mut state) = fresh(); + let cache_root = state.cache_root.clone().unwrap(); + + let obj_attrs = write_obj_attrs(&mem, SCRATCH_BASE + 0x100, "cache:\\access"); + let handle_out = SCRATCH_BASE + 0x300; + let iosb = SCRATCH_BASE + 0x310; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + + // 1) Cold disp=1 + opts=0x7 → NOT_FOUND, no host-side entry. + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, 0x7); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_OPEN as u64; + nt_create_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_OBJECT_NAME_NOT_FOUND); + assert!(!cache_root.join("access").exists()); + + // 2) disp=5 + opts=0x60 (FILE_NON_DIRECTORY_FILE | + // FILE_SYNCHRONOUS_IO_NONALERT) → FILE created. + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, 0x60); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_OVERWRITE_IF as u64; + nt_create_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + assert!( + cache_root.join("access").is_file(), + "disp=5 with NON_DIRECTORY on cache:\\access must produce a host FILE" + ); + } + + /// Phase C+11 — write a `cache:\

.tmp` flat journal, then + /// rename it to the hierarchical leaf `cache:\

\\

` via + /// NtSetInformationFile class 10 (XFileRenameInformation). After the + /// rename, the flat file must be gone and the leaf must contain the + /// original bytes. This is the .tmp-to-leaf promotion that Sylpheed + /// relies on for cache build. + #[test] + fn cache_rename_information_promotes_tmp_to_leaf() { + let (mut ctx, mem, mut state) = fresh(); + + // Create cache:\foo.tmp with FILE_CREATE. + let obj_attrs = write_obj_attrs(&mem, SCRATCH_BASE + 0x100, "cache:\\foo.tmp"); + let handle_out = SCRATCH_BASE + 0x300; + let iosb = SCRATCH_BASE + 0x310; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, FILE_SYNCHRONOUS_IO_NONALERT); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_CREATE as u64; + nt_create_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + let handle = mem.read_u32(handle_out); + + // Write 4 bytes. + let write_buf = SCRATCH_BASE + 0x400; + for (i, b) in b"abcd".iter().enumerate() { + mem.write_u8(write_buf + i as u32, *b); + } + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 0; + ctx.gpr[7] = iosb as u64; + ctx.gpr[8] = write_buf as u64; + ctx.gpr[9] = 4; + ctx.gpr[10] = 0; + nt_write_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + + // Confirm the flat .tmp exists. + let cache_root = state.cache_root.clone().expect("must have cache root"); + assert!(cache_root.join("foo.tmp").exists(), ".tmp must exist pre-rename"); + assert!(!cache_root.join("bar").exists(), "leaf must NOT exist yet"); + + // Build XFileRenameInformation buffer at SCRATCH_BASE+0x500: + // offset 0: be replace_existing = 1 + // offset 4: be root_dir_handle = 0 + // offset 8: ANSI_STRING { Length, MaxLength, BufferPtr } + // offset 16: path bytes + let info_buf = SCRATCH_BASE + 0x500; + let target = "cache:\\bar"; + mem.write_u32(info_buf, 1); // replace_existing + mem.write_u32(info_buf + 4, 0); // root_dir_handle + mem.write_u16(info_buf + 8, target.len() as u16); // ANSI_STRING.Length + mem.write_u16(info_buf + 10, target.len() as u16); // ANSI_STRING.MaxLength + mem.write_u32(info_buf + 12, info_buf + 16); // ANSI_STRING.Buffer + for (i, b) in target.bytes().enumerate() { + mem.write_u8(info_buf + 16 + i as u32, b); + } + + // NtSetInformationFile class 10 (rename). + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = iosb as u64; + ctx.gpr[5] = info_buf as u64; + ctx.gpr[6] = 16 + target.len() as u64; // info_length + ctx.gpr[7] = 10; // info_class = XFileRenameInformation + nt_set_information_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS, "rename must succeed"); + + // After rename: .tmp gone, leaf present with the original bytes. + assert!(!cache_root.join("foo.tmp").exists(), ".tmp must be gone"); + assert!(cache_root.join("bar").exists(), "leaf must exist"); + assert_eq!( + std::fs::read(cache_root.join("bar")).unwrap(), + b"abcd", + "leaf must have the original bytes" + ); + } + + /// Phase C+11 — rename also creates intermediate parent directories + /// (Sylpheed's leaf paths are `cache:\

\\

` form; a + /// host-fs `rename` would fail without `create_dir_all` on parent). + #[test] + fn cache_rename_creates_parent_directories() { + let (mut ctx, mem, mut state) = fresh(); + + // Create cache:\src.tmp. + let obj_attrs = write_obj_attrs(&mem, SCRATCH_BASE + 0x100, "cache:\\src.tmp"); + let handle_out = SCRATCH_BASE + 0x300; + let iosb = SCRATCH_BASE + 0x310; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, FILE_SYNCHRONOUS_IO_NONALERT); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_CREATE as u64; + nt_create_file(&mut ctx, &mem, &mut state); + let handle = mem.read_u32(handle_out); + + // Rename to cache:\d4ea4615\e\46ee8ca (depth-3 hierarchical leaf). + let info_buf = SCRATCH_BASE + 0x500; + let target = "cache:\\d4ea4615\\e\\46ee8ca"; + mem.write_u32(info_buf, 1); + mem.write_u32(info_buf + 4, 0); + mem.write_u16(info_buf + 8, target.len() as u16); + mem.write_u16(info_buf + 10, target.len() as u16); + mem.write_u32(info_buf + 12, info_buf + 16); + for (i, b) in target.bytes().enumerate() { + mem.write_u8(info_buf + 16 + i as u32, b); + } + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = iosb as u64; + ctx.gpr[5] = info_buf as u64; + ctx.gpr[6] = 16 + target.len() as u64; + ctx.gpr[7] = 10; + nt_set_information_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + + let cache_root = state.cache_root.clone().unwrap(); + assert!(cache_root.join("d4ea4615/e/46ee8ca").exists()); + } + + /// Phase C+11 — rename of a non-existent / closed handle returns + /// STATUS_INVALID_HANDLE (canary parity). + #[test] + fn cache_rename_invalid_handle_returns_status() { + let (mut ctx, mem, mut state) = fresh(); + let info_buf = SCRATCH_BASE + 0x500; + let target = "cache:\\target"; + mem.write_u32(info_buf, 1); + mem.write_u32(info_buf + 4, 0); + mem.write_u16(info_buf + 8, target.len() as u16); + mem.write_u16(info_buf + 10, target.len() as u16); + mem.write_u32(info_buf + 12, info_buf + 16); + for (i, b) in target.bytes().enumerate() { + mem.write_u8(info_buf + 16 + i as u32, b); + } + ctx.gpr[3] = 0xDEADBEEF; // bogus handle + ctx.gpr[4] = 0; + ctx.gpr[5] = info_buf as u64; + ctx.gpr[6] = 16 + target.len() as u64; + ctx.gpr[7] = 10; + nt_set_information_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_INVALID_HANDLE); + } + + /// Phase C+12 — helper. Pins the wire-format of + /// `X_FILE_NETWORK_OPEN_INFORMATION` produced by + /// `nt_query_full_attributes_file`. Issues the query for `path` and + /// asserts the 8-DWord OUT struct fields (all big-endian). + fn assert_query_attrs_struct( + state: &mut KernelState, + mem: &GuestMemory, + path: &str, + expected_attrs: u32, + expected_size: u64, + ) -> u64 { + let mut ctx = PpcContext::default(); + let obj_attrs = write_obj_attrs(mem, SCRATCH_BASE + 0x100, path); + let out = SCRATCH_BASE + 0x300; + for off in (0..56).step_by(4) { + mem.write_u32(out + off as u32, 0xCDCD_CDCD); + } + ctx.gpr[3] = obj_attrs as u64; + ctx.gpr[4] = out as u64; + nt_query_full_attributes_file(&mut ctx, mem, state); + let status = ctx.gpr[3]; + if status == STATUS_SUCCESS { + assert_eq!( + mem.read_u32(out + 48), + expected_attrs, + "FileAttributes mismatch at {}", + path + ); + assert_eq!( + mem.read_u64(out + 40), + expected_size, + "EndOfFile mismatch at {}", + path + ); + assert_eq!( + mem.read_u32(out + 52), + 0, + "Reserved field must be zero at {}", + path + ); + // AllocationSize == round_up(size, 512) + let expected_alloc = (expected_size + 511) & !511; + assert_eq!( + mem.read_u64(out + 32), + expected_alloc, + "AllocationSize mismatch at {}", + path + ); + } + status + } + + /// Phase C+12 — `nt_query_full_attributes_file` returns + /// `STATUS_NO_SUCH_FILE` for a path that's never been created. + /// Mirrors canary's `NtQueryFullAttributesFile_entry` returning + /// `X_STATUS_NO_SUCH_FILE` when `ResolvePath` returns null + /// (`xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc:512`). + #[test] + fn nt_query_full_attributes_file_missing_returns_no_such_file() { + let (_ctx, mem, mut state) = fresh(); + let status = + assert_query_attrs_struct(&mut state, &mem, "cache:\\never_existed", 0, 0); + assert_eq!(status, STATUS_NO_SUCH_FILE); + } + + /// Phase C+12 — after `NtCreateFile cache:\foo` succeeds (which + /// canary's `Entry::CreateEntry` populates the in-memory tree), + /// a follow-up `NtQueryFullAttributesFile` MUST resolve from the + /// in-memory mirror and return SUCCESS with + /// `FILE_ATTRIBUTE_NORMAL` (0x80) for a regular file. + #[test] + fn nt_query_full_attributes_file_after_create_returns_normal() { + let (mut ctx, mem, mut state) = fresh(); + // Create cache:\foo with FILE_OVERWRITE_IF (creates if missing). + let obj_attrs = write_obj_attrs(&mem, SCRATCH_BASE + 0x100, "cache:\\foo"); + let handle_out = SCRATCH_BASE + 0x400; + let iosb = SCRATCH_BASE + 0x410; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, FILE_SYNCHRONOUS_IO_NONALERT); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_OVERWRITE_IF as u64; + nt_create_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + // Now query. + let status = assert_query_attrs_struct( + &mut state, + &mem, + "cache:\\foo", + crate::state::X_FILE_ATTRIBUTE_NORMAL, + 0, + ); + assert_eq!(status, STATUS_SUCCESS); + } + + /// Phase C+12 — mount-time scan picks up files that already exist + /// on disk under the cache root (canary's `HostPathDevice:: + /// PopulateEntry` analogue). The probe MUST succeed even though + /// no `NtCreateFile` ran this boot — this is exactly the canary + /// behaviour ours was missing at idx 102404. + #[test] + fn nt_query_full_attributes_file_resolves_preexisting_host_entry() { + let mut state = KernelState::new(); + let dir = std::env::temp_dir().join(format!( + "xenia-rs-cache-test-c12pre-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .subsec_nanos() + )); + std::fs::create_dir_all(dir.join("d4ea4615").join("e")).unwrap(); + std::fs::write(dir.join("d4ea4615").join("e").join("46ee8ca"), b"oracle").unwrap(); + // `set_cache_root` performs the eager scan. + state.set_cache_root(dir.clone()); + + // Wire up scratch + initial thread (mirrors `fresh()`). + let mut mem = GuestMemory::new().expect("memory init"); + mem.alloc(SCRATCH_BASE, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE) + .expect("scratch page must commit"); + state.install_initial_thread( + PpcContext::default(), + 0x7000_0000, + 0x10_0000, + SCRATCH_BASE + 0x800, + SCRATCH_BASE + 0xC00, + 0x1000, + &mut mem, + ); + state.scheduler.begin_slot_visit(0); + + let status = assert_query_attrs_struct( + &mut state, + &mem, + "cache:\\d4ea4615\\e\\46ee8ca", + crate::state::X_FILE_ATTRIBUTE_NORMAL, + 6, // strlen("oracle") + ); + assert_eq!(status, STATUS_SUCCESS); + // Directory probe must also resolve (mount-time scan inserts + // both files and dirs). + let status_dir = assert_query_attrs_struct( + &mut state, + &mem, + "cache:\\d4ea4615", + crate::state::X_FILE_ATTRIBUTE_DIRECTORY, + 0, + ); + assert_eq!(status_dir, STATUS_SUCCESS); + + std::fs::remove_dir_all(&dir).ok(); + } + + /// Phase C+12 — pin the FILETIME conversion: a known Unix epoch + /// value (`1_700_000_000` seconds = 2023-11-14 22:13:20 UTC) + /// converts to the expected Windows FILETIME tick count. + #[test] + fn unix_to_filetime_known_value() { + let t = std::time::UNIX_EPOCH + std::time::Duration::from_secs(1_700_000_000); + let ft = crate::state::unix_to_filetime(t); + // (1_700_000_000 + 11_644_473_600) * 10_000_000 = 133_444_736_000_000_000 + assert_eq!(ft, 133_444_736_000_000_000); + } + + /// Phase C+12 — `change_time` slot (offset 24) MUST equal + /// `last_write_time` (offset 16), mirroring canary's + /// `xboxkrnl_io.cc:504` line `file_info->change_time = + /// entry->write_timestamp();`. This is the only field where the + /// brief's "4 distinct FILETIMEs" framing differs from canary's + /// actual semantics. + #[test] + fn nt_query_full_attributes_file_change_time_equals_write_time() { + let (mut ctx, mem, mut state) = fresh(); + let obj_attrs = write_obj_attrs(&mem, SCRATCH_BASE + 0x100, "cache:\\writeme"); + let handle_out = SCRATCH_BASE + 0x400; + let iosb = SCRATCH_BASE + 0x410; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, FILE_SYNCHRONOUS_IO_NONALERT); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_OVERWRITE_IF as u64; + nt_create_file(&mut ctx, &mem, &mut state); + + let out = SCRATCH_BASE + 0x300; + ctx.gpr[3] = obj_attrs as u64; + ctx.gpr[4] = out as u64; + nt_query_full_attributes_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + let last_write = mem.read_u64(out + 16); + let change = mem.read_u64(out + 24); + assert_eq!( + change, last_write, + "change_time must equal last_write_time per canary xboxkrnl_io.cc:504" + ); + } + + /// Phase C+13 — `is_disc_prefix` recognises every alias canary maps + /// to the read-only disc partition: `game:\`, `d:\`/`D:\`, and the + /// raw NT device path `\Device\Cdrom0\`. Anything else (writable + /// partitions, raw paths) must return false so the synth-empty + /// fallback still fires. + #[test] + fn is_disc_prefix_recognises_disc_aliases() { + assert!(is_disc_prefix("game:\\dat\\files.tbl")); + assert!(is_disc_prefix("GAME:\\dat\\files.tbl")); + assert!(is_disc_prefix("d:\\default.xex")); + assert!(is_disc_prefix("D:\\default.xex")); + assert!(is_disc_prefix("\\Device\\Cdrom0\\dat\\files.tbl")); + assert!(is_disc_prefix("\\DEVICE\\CDROM0\\foo")); + // Non-disc prefixes must NOT count. + assert!(!is_disc_prefix("cache:\\d4ea4615\\e\\46ee8ca")); + assert!(!is_disc_prefix("\\Device\\Harddisk0\\Partition1\\x")); + assert!(!is_disc_prefix("\\??\\foo")); + assert!(!is_disc_prefix("\\Device\\Mass0\\foo")); + assert!(!is_disc_prefix("scripts/init.lua")); + assert!(!is_disc_prefix("")); + } + + /// Phase C+13 — `NtCreateFile` on a disc-prefixed path that the VFS + /// can't resolve returns `STATUS_OBJECT_NAME_NOT_FOUND` (mirrors + /// canary `xboxkrnl_io.cc:83-110` which forwards the lookup + /// status verbatim, idx 103862 first divergence). Sylpheed + /// handles NOT_FOUND via `RtlNtStatusToDosError` then continues + /// its boot validator. + #[test] + fn nt_create_file_game_prefix_missing_returns_not_found() { + let (mut ctx, mem, mut state) = fresh(); + // Install a stub VFS that doesn't resolve anything — mirrors a + // disc image that doesn't contain `dat/files.tbl`. + state.vfs = Some(Box::new(StubVfs { entries: vec![] })); + let obj_attrs = write_obj_attrs(&mem, SCRATCH_BASE + 0x100, "game:\\dat\\files.tbl"); + let handle_out = SCRATCH_BASE + 0x300; + let iosb = SCRATCH_BASE + 0x310; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, 0); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_OPEN as u64; + nt_create_file(&mut ctx, &mem, &mut state); + assert_eq!( + ctx.gpr[3], STATUS_OBJECT_NAME_NOT_FOUND, + "missing disc file must return STATUS_OBJECT_NAME_NOT_FOUND" + ); + assert_eq!( + mem.read_u32(handle_out), + 0, + "no handle returned on NOT_FOUND" + ); + assert_eq!( + mem.read_u32(iosb), + STATUS_OBJECT_NAME_NOT_FOUND as u32, + "IOSB.status records NOT_FOUND" + ); + } + + /// Phase C+13 — same as above for the `\Device\Cdrom0\` NT-device + /// alias of the disc. + #[test] + fn nt_create_file_cdrom_prefix_missing_returns_not_found() { + let (mut ctx, mem, mut state) = fresh(); + state.vfs = Some(Box::new(StubVfs { entries: vec![] })); + let obj_attrs = write_obj_attrs( + &mem, + SCRATCH_BASE + 0x100, + "\\Device\\Cdrom0\\dat\\files.tbl", + ); + let handle_out = SCRATCH_BASE + 0x300; + let iosb = SCRATCH_BASE + 0x310; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, 0); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_OPEN as u64; + nt_create_file(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_OBJECT_NAME_NOT_FOUND); + } + + /// Phase C+13 — a non-disc prefix that misses the VFS still gets + /// the legacy zero-byte synth (preserves audit-006 / audit-018 + /// behaviour for writable system-partition opens that ours + /// doesn't host-mount). `\Device\Harddisk0\Partition1\` is the + /// canonical writable mount. + #[test] + fn nt_create_file_non_disc_prefix_missing_still_synthesizes() { + let (mut ctx, mem, mut state) = fresh(); + state.vfs = Some(Box::new(StubVfs { entries: vec![] })); + let obj_attrs = write_obj_attrs( + &mem, + SCRATCH_BASE + 0x100, + "\\Device\\Harddisk0\\Partition1\\sys.bin", + ); + let handle_out = SCRATCH_BASE + 0x300; + let iosb = SCRATCH_BASE + 0x310; + ctx.gpr[1] = (SCRATCH_BASE + 0x700) as u64; + mem.write_u32(SCRATCH_BASE + 0x700 + 0x54, 0); + ctx.gpr[3] = handle_out as u64; + ctx.gpr[5] = obj_attrs as u64; + ctx.gpr[6] = iosb as u64; + ctx.gpr[10] = FILE_OPEN as u64; + nt_create_file(&mut ctx, &mem, &mut state); + assert_eq!( + ctx.gpr[3], STATUS_SUCCESS, + "non-disc missing path keeps synth-empty" + ); + let handle = mem.read_u32(handle_out); + assert!(handle >= 0x1000, "synth handle must be allocated"); + assert_eq!(mem.read_u32(iosb), STATUS_SUCCESS as u32); + } + /// `resolve_cache_path` rejects path-traversal attempts so a guest /// can't escape the cache directory by passing `cache:\..\..\etc\foo`. #[test] @@ -6353,4 +9061,1015 @@ mod tests { assert!(resolved.ends_with("etc/foo")); std::fs::remove_dir_all(&dir).ok(); } + + // ===== Stage 2 Batch 2: Crypto handlers ===== + + #[test] + fn xe_crypt_sha_empty_input_writes_canonical_digest() { + let (mut ctx, mem, mut state) = fresh(); + let input_ptr = SCRATCH_BASE; + let output_ptr = SCRATCH_BASE + 0x100; + ctx.gpr[3] = input_ptr as u64; + ctx.gpr[4] = 0; // input_1_size = 0 (skips this buffer) + ctx.gpr[5] = 0; + ctx.gpr[6] = 0; + ctx.gpr[7] = 0; + ctx.gpr[8] = 0; + ctx.gpr[9] = output_ptr as u64; + ctx.gpr[10] = 20; + xe_crypt_sha(&mut ctx, &mem, &mut state); + let mut got = [0u8; 20]; + mem.read_bytes(output_ptr, &mut got); + // SHA-1 of empty input + let expected: [u8; 20] = [ + 0xDA, 0x39, 0xA3, 0xEE, 0x5E, 0x6B, 0x4B, 0x0D, 0x32, 0x55, 0xBF, 0xEF, 0x95, 0x60, + 0x18, 0x90, 0xAF, 0xD8, 0x07, 0x09, + ]; + assert_eq!(got, expected); + } + + #[test] + fn xe_crypt_sha_three_inputs_concatenate() { + let (mut ctx, mem, mut state) = fresh(); + let buf_a = SCRATCH_BASE; + let buf_b = SCRATCH_BASE + 0x10; + let buf_c = SCRATCH_BASE + 0x20; + let output_ptr = SCRATCH_BASE + 0x100; + mem.write_bytes(buf_a, b"abc"); + mem.write_bytes(buf_b, b"def"); + mem.write_bytes(buf_c, b"ghi"); + ctx.gpr[3] = buf_a as u64; + ctx.gpr[4] = 3; + ctx.gpr[5] = buf_b as u64; + ctx.gpr[6] = 3; + ctx.gpr[7] = buf_c as u64; + ctx.gpr[8] = 3; + ctx.gpr[9] = output_ptr as u64; + ctx.gpr[10] = 20; + xe_crypt_sha(&mut ctx, &mem, &mut state); + let mut got = [0u8; 20]; + mem.read_bytes(output_ptr, &mut got); + // SHA-1("abcdefghi") = c63b19f1e4c8b5f76b25c49b8b87f57d8e4872a1 + let expected: [u8; 20] = [ + 0xC6, 0x3B, 0x19, 0xF1, 0xE4, 0xC8, 0xB5, 0xF7, 0x6B, 0x25, 0xC4, 0x9B, 0x8B, 0x87, + 0xF5, 0x7D, 0x8E, 0x48, 0x72, 0xA1, + ]; + assert_eq!(got, expected); + } + + #[test] + fn xe_crypt_sha_truncates_output() { + let (mut ctx, mem, mut state) = fresh(); + let output_ptr = SCRATCH_BASE + 0x100; + // Pre-fill 0xFF so we can verify only 4 bytes were written. + mem.write_bytes(output_ptr, &[0xFFu8; 20]); + ctx.gpr[3] = 0; + ctx.gpr[4] = 0; + ctx.gpr[5] = 0; + ctx.gpr[6] = 0; + ctx.gpr[7] = 0; + ctx.gpr[8] = 0; + ctx.gpr[9] = output_ptr as u64; + ctx.gpr[10] = 4; // truncate to 4 bytes + xe_crypt_sha(&mut ctx, &mem, &mut state); + // First 4 bytes match SHA-1 of empty; next 16 stay 0xFF. + let mut got = [0u8; 20]; + mem.read_bytes(output_ptr, &mut got); + assert_eq!(&got[..4], &[0xDA, 0x39, 0xA3, 0xEE]); + assert_eq!(&got[4..], &[0xFFu8; 16]); + } + + #[test] + fn xe_keys_console_private_key_sign_writes_certificate_and_returns_one() { + let (mut ctx, mem, mut state) = fresh(); + let hash_ptr = SCRATCH_BASE; + let output_ptr = SCRATCH_BASE + 0x100; + ctx.gpr[3] = hash_ptr as u64; + ctx.gpr[4] = output_ptr as u64; + xe_keys_console_private_key_sign(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 1, "must return success"); + // console_type at 0x18 (u32 BE) = Retail (2) + assert_eq!(mem.read_u32(output_ptr + 0x18), 2); + // manufacture_date at 0x1C + let mut mfg = [0u8; 8]; + mem.read_bytes(output_ptr + 0x1C, &mut mfg); + assert_eq!(mfg, [2, 0, 0, 5, 1, 1, 2, 2]); + // XE_CONSOLE_ID byte 0 at offset 0x02 + assert_eq!(mem.read_u8(output_ptr + 0x02), 0x93); + // cert_size and console_part_number must remain zero (Zero() output) + assert_eq!(mem.read_u16(output_ptr), 0); + assert_eq!(mem.read_u8(output_ptr + 0x07), 0); + } + + // ===== Stage 2 Batch 6: ExGetXConfigSetting ===== + + #[test] + fn ex_get_xconfig_setting_user_language_returns_one() { + let (mut ctx, mem, mut state) = fresh(); + let buf = SCRATCH_BASE + 0x200; + let req = SCRATCH_BASE + 0x208; + mem.write_u32(buf, 0xDEAD_BEEF); + mem.write_u16(req, 0xFFFF); + ctx.gpr[3] = 0x03; // USER_CATEGORY + ctx.gpr[4] = 0x09; // USER_LANGUAGE + ctx.gpr[5] = buf as u64; + ctx.gpr[6] = 4; + ctx.gpr[7] = req as u64; + ex_get_xconfig_setting(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "STATUS_SUCCESS"); + assert_eq!(mem.read_u32(buf), 1, "USER_LANGUAGE = en"); + assert_eq!(mem.read_u16(req), 4, "required_size = 4 bytes"); + } + + #[test] + fn ex_get_xconfig_setting_unknown_returns_invalid_parameter() { + let (mut ctx, mem, mut state) = fresh(); + let buf = SCRATCH_BASE + 0x200; + ctx.gpr[3] = 0xDEAD; + ctx.gpr[4] = 0xBEEF; + ctx.gpr[5] = buf as u64; + ctx.gpr[6] = 4; + ctx.gpr[7] = 0; + ex_get_xconfig_setting(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0xC000_00F0, "STATUS_INVALID_PARAMETER_2"); + } + + #[test] + fn ex_get_xconfig_setting_buffer_too_small_returns_error() { + let (mut ctx, mem, mut state) = fresh(); + let buf = SCRATCH_BASE + 0x200; + mem.write_u32(buf, 0xDEAD_BEEF); + ctx.gpr[3] = 0x03; // USER_CATEGORY + ctx.gpr[4] = 0x09; // USER_LANGUAGE (4 bytes) + ctx.gpr[5] = buf as u64; + ctx.gpr[6] = 2; // too small + ctx.gpr[7] = 0; + ex_get_xconfig_setting(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0xC000_0023, "STATUS_BUFFER_TOO_SMALL"); + // Buffer untouched + assert_eq!(mem.read_u32(buf), 0xDEAD_BEEF); + } + + // ===== Stage 2 Batch 5: IRQL pair ===== + + /// Stage 2 Batch 5: `KeRaiseIrqlToDpcLevel` reads PCR's current_irql, + /// returns it in r3, and writes DISPATCH_LEVEL=2 back. + #[test] + fn ke_raise_irql_to_dpc_level_returns_old_writes_dispatch_level() { + let (mut ctx, mem, mut state) = fresh(); + let pcr = SCRATCH_BASE + 0x500; + // Initial IRQL = PASSIVE_LEVEL (0). + mem.write_u8(pcr + PCR_CURRENT_IRQL_OFFSET, 0); + ctx.gpr[13] = pcr as u64; + ke_raise_irql_to_dpc_level(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "old IRQL = PASSIVE_LEVEL"); + assert_eq!( + mem.read_u8(pcr + PCR_CURRENT_IRQL_OFFSET), + 2, + "PCR.current_irql = DISPATCH_LEVEL" + ); + // Second Raise returns 2 (already at DPC). + ke_raise_irql_to_dpc_level(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 2); + assert_eq!(mem.read_u8(pcr + PCR_CURRENT_IRQL_OFFSET), 2); + } + + /// Stage 2 Batch 5: Raise → Lower round-trip leaves PCR at the value + /// passed to Lower. Demonstrates the IRQL nesting invariant. + #[test] + fn ke_irql_raise_lower_round_trip() { + let (mut ctx, mem, mut state) = fresh(); + let pcr = SCRATCH_BASE + 0x500; + mem.write_u8(pcr + PCR_CURRENT_IRQL_OFFSET, 0); + ctx.gpr[13] = pcr as u64; + ke_raise_irql_to_dpc_level(&mut ctx, &mem, &mut state); + let prev = ctx.gpr[3] as u8; + assert_eq!(prev, 0); + assert_eq!(mem.read_u8(pcr + PCR_CURRENT_IRQL_OFFSET), 2); + // Restore. + ctx.gpr[3] = prev as u64; + kf_lower_irql(&mut ctx, &mem, &mut state); + assert_eq!( + mem.read_u8(pcr + PCR_CURRENT_IRQL_OFFSET), + 0, + "PCR.current_irql restored to PASSIVE_LEVEL" + ); + } + + #[test] + fn xe_keys_console_private_key_sign_rejects_null_inputs() { + let (mut ctx, mem, mut state) = fresh(); + let output_ptr = SCRATCH_BASE + 0x100; + // null hash + ctx.gpr[3] = 0; + ctx.gpr[4] = output_ptr as u64; + xe_keys_console_private_key_sign(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "must return failure on null hash"); + // null output + ctx.gpr[3] = 0x1234_5678; + ctx.gpr[4] = 0; + xe_keys_console_private_key_sign(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "must return failure on null output"); + } + + // --------------------------------------------------------------- + // Phase C+7 — KeSetEvent / NtSetEvent canary-parity return value + // --------------------------------------------------------------- + + /// Canary parity: `KeSetEvent` on an unsignaled auto-reset event + /// must return constant `1` (NOT prior state). See investigation + /// for the `XEvent::Set` reference path. + #[test] + fn ke_set_event_returns_constant_one_on_unsignaled_auto_reset() { + let (mut ctx, mut mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0x900; + write_dispatcher_header(&mut mem, kevent_ptr, 1, 0); // auto-reset, unsignaled + ctx.gpr[3] = kevent_ptr as u64; + ke_set_event(&mut ctx, &mut mem, &mut state); + assert_eq!( + ctx.gpr[3], 1, + "KeSetEvent must return constant 1 on success (canary parity, xevent.cc:60-64)" + ); + // Shadow must be signaled even though the return value is constant. + match state.objects.get(&kevent_ptr) { + Some(KernelObject::Event { signaled, .. }) => assert!(*signaled), + _ => panic!("shadow not minted"), + } + } + + /// Canary parity: `KeSetEvent` on an already-signaled manual-reset + /// event also returns constant `1` (not prior `1`). Same constant. + #[test] + fn ke_set_event_returns_constant_one_on_already_signaled_manual_reset() { + let (mut ctx, mut mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0xA00; + write_dispatcher_header(&mut mem, kevent_ptr, 0, 1); // manual-reset, signaled + ctx.gpr[3] = kevent_ptr as u64; + ke_set_event(&mut ctx, &mut mem, &mut state); + assert_eq!( + ctx.gpr[3], 1, + "KeSetEvent returns 1 regardless of prior state (canary parity)" + ); + match state.objects.get(&kevent_ptr) { + Some(KernelObject::Event { signaled, .. }) => assert!(*signaled), + _ => panic!("shadow vanished"), + } + } + + /// Canary parity: `NtSetEvent` with null `PreviousState` ptr returns + /// STATUS_SUCCESS and performs no out-pointer write. + #[test] + fn nt_set_event_null_prev_ptr_returns_status_success_no_write() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::Event { + manual_reset: false, + signaled: false, + waiters: Vec::new(), + }); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = 0; // null out-pointer + nt_set_event(&mut ctx, &mut mem, &mut state); + assert_eq!( + ctx.gpr[3], STATUS_SUCCESS, + "NtSetEvent must return STATUS_SUCCESS" + ); + // Event must be signaled. + match state.objects.get(&handle) { + Some(KernelObject::Event { signaled, .. }) => assert!(*signaled), + _ => panic!("handle lookup broken"), + } + } + + /// Canary parity: `NtSetEvent` with a valid out-pointer writes + /// **constant 1** (canary's `was_signalled = ev->Set()` always 1), + /// NOT the prior signaled state. See xboxkrnl_threading.cc:610-628. + #[test] + fn nt_set_event_valid_prev_ptr_writes_constant_one_and_returns_success() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::Event { + manual_reset: false, + signaled: false, + waiters: Vec::new(), + }); + let prev_ptr = SCRATCH_BASE + 0xB00; + mem.write_u32(prev_ptr, 0xDEAD_BEEF); // sentinel — overwrite expected + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = prev_ptr as u64; + nt_set_event(&mut ctx, &mut mem, &mut state); + assert_eq!( + ctx.gpr[3], STATUS_SUCCESS, + "NtSetEvent must return STATUS_SUCCESS" + ); + assert_eq!( + mem.read_u32(prev_ptr), + 1, + "PreviousState out-ptr must receive constant 1 (canary parity)" + ); + } + + /// Canary parity: `NtSetEvent` on an already-signaled event still + /// writes constant `1` to the out-pointer (not the prior `1`, + /// though they happen to match here — distinguished from the + /// prior-state-write bug by the auto-reset/un-signaled case above). + #[test] + fn nt_set_event_on_signaled_event_writes_one() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::Event { + manual_reset: true, + signaled: true, + waiters: Vec::new(), + }); + let prev_ptr = SCRATCH_BASE + 0xC00; + mem.write_u32(prev_ptr, 0); + ctx.gpr[3] = handle as u64; + ctx.gpr[4] = prev_ptr as u64; + nt_set_event(&mut ctx, &mut mem, &mut state); + assert_eq!(mem.read_u32(prev_ptr), 1); + // Event stays signaled (manual-reset). + match state.objects.get(&handle) { + Some(KernelObject::Event { signaled, .. }) => assert!(*signaled), + _ => panic!("handle lookup broken"), + } + } + + /// Wake-cascade regression: KeSetEvent on a manual-reset event with + /// a parked waiter still wakes the waiter post-fix. The return-value + /// change is observation-only — internal wake plumbing uses the + /// `previous` read, not the return value. + #[test] + fn ke_set_event_post_fix_still_wakes_waiter() { + let (mut ctx, mut mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0xD00; + write_dispatcher_header(&mut mem, kevent_ptr, 0, 0); // manual-reset, unsignaled + // Mint the shadow first by calling reset_event (no waiter yet). + ctx.gpr[3] = kevent_ptr as u64; + ke_reset_event(&mut ctx, &mut mem, &mut state); + // Park a fake waiter. + match state.objects.get_mut(&kevent_ptr) { + Some(KernelObject::Event { waiters, .. }) => { + waiters.push(ThreadRef { hw_id: 4, idx: 0, generation: 0 }); + } + _ => panic!("shadow not minted"), + } + // Signal. + ctx.gpr[3] = kevent_ptr as u64; + ke_set_event(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 1, "constant 1 return preserved"); + // Manual-reset: waiter list drained after wake. + match state.objects.get(&kevent_ptr) { + Some(KernelObject::Event { signaled, waiters, .. }) => { + assert!(*signaled, "manual-reset stays signaled"); + assert!(waiters.is_empty(), "manual-reset wake drains all waiters"); + } + _ => panic!("shadow vanished"), + } + } + + // --------------------------------------------------------------- + // Phase C+8 — KeResetEvent canary-parity return value (sibling of C+7) + // --------------------------------------------------------------- + + /// Canary parity: `KeResetEvent` on an unsignaled manual-reset event + /// must return constant `1` on shadow hit (NOT prior `0`). Canary's + /// `XEvent::Reset` hardcodes `return 1` regardless of prior state + /// (xevent.cc:72-75), exactly mirroring `XEvent::Set`. This is the + /// case that triggered the Phase A divergence at idx=102164: prior + /// state was unsignaled (`0`) and the prior-state-return bug gave + /// `0` while canary returns `1`. + #[test] + fn ke_reset_event_returns_constant_one_on_unsignaled_manual_reset() { + let (mut ctx, mut mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0xE00; + write_dispatcher_header(&mut mem, kevent_ptr, 0, 0); // manual-reset, unsignaled + ctx.gpr[3] = kevent_ptr as u64; + ke_reset_event(&mut ctx, &mut mem, &mut state); + assert_eq!( + ctx.gpr[3], 1, + "KeResetEvent must return constant 1 on success (canary parity, xevent.cc:72-75)" + ); + // Shadow stays unsignaled (was already 0, reset is idempotent). + match state.objects.get(&kevent_ptr) { + Some(KernelObject::Event { signaled, .. }) => assert!(!*signaled), + _ => panic!("shadow not minted"), + } + } + + /// Canary parity: `KeResetEvent` on a signaled auto-reset event also + /// returns constant `1`. Distinguished from the prior-state-return + /// bug by the unsignaled case above (where they would differ: bug=0 + /// vs canary=1). + #[test] + fn ke_reset_event_returns_constant_one_on_signaled_auto_reset() { + let (mut ctx, mut mem, mut state) = fresh(); + let kevent_ptr = SCRATCH_BASE + 0xF00; + write_dispatcher_header(&mut mem, kevent_ptr, 1, 1); // auto-reset, signaled + ctx.gpr[3] = kevent_ptr as u64; + ke_reset_event(&mut ctx, &mut mem, &mut state); + assert_eq!( + ctx.gpr[3], 1, + "KeResetEvent returns 1 regardless of prior state (canary parity)" + ); + match state.objects.get(&kevent_ptr) { + Some(KernelObject::Event { signaled, .. }) => { + assert!(!*signaled, "ke_reset_event must clear the shadow"); + } + _ => panic!("shadow vanished"), + } + } + + /// Canary parity: `KeResetEvent` on a non-existent shadow (and a + /// PKEVENT that doesn't match a dispatcher type the lazy-shadow can + /// mint) must return `0` — canary's `assert_always(); return 0` arm + /// for the no-XEvent-bound case (xboxkrnl_threading.cc:566-574). + /// We model this via a pointer below the dispatcher-shim threshold + /// (handle range, no kevent header pre-written). + #[test] + fn ke_reset_event_returns_zero_on_missing_object() { + let (mut ctx, mut mem, mut state) = fresh(); + // Use a low handle-range value with no allocated object — no + // shadow mint (handle path), no dispatcher header to lazy-mint + // from (ptr below 0x10000 means ensure_dispatcher_object skips). + ctx.gpr[3] = 0x4242; // arbitrary handle that doesn't exist + ke_reset_event(&mut ctx, &mut mem, &mut state); + assert_eq!( + ctx.gpr[3], 0, + "KeResetEvent must return 0 when no event object is bound (canary's assert_always arm)" + ); + } + + /// `NtClearEvent` parity: returns `STATUS_SUCCESS` and resets the + /// shadow signaled flag. Unlike NtSetEvent, NtClearEvent has NO + /// PreviousState out-pointer (xboxkrnl_threading.cc:685-687 → + /// xeNtClearEvent calls XEvent::Clear which is void-returning). + /// Verified canary-parity; included for symmetry coverage. + #[test] + fn nt_clear_event_resets_shadow_and_returns_status_success() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::Event { + manual_reset: true, + signaled: true, + waiters: Vec::new(), + }); + ctx.gpr[3] = handle as u64; + nt_clear_event(&mut ctx, &mut mem, &mut state); + assert_eq!( + ctx.gpr[3], STATUS_SUCCESS, + "NtClearEvent must return STATUS_SUCCESS on hit" + ); + match state.objects.get(&handle) { + Some(KernelObject::Event { signaled, .. }) => { + assert!(!*signaled, "nt_clear_event must clear the shadow"); + } + _ => panic!("handle lookup broken"), + } + } + + /// Phase C+16: `ExCreateThread` must install a thread self-reference + /// (handle refcount = 2 post-spawn). Mirrors canary's + /// `XThread::Create::RetainHandle()` at xthread.cc:414. Without + /// this, a guest `NtClose` on the thread handle destroys it + /// prematurely while the spawned thread is still live — the + /// original C+16 divergence at Phase A idx=102168. + #[test] + fn ex_create_thread_installs_self_reference() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle_ptr = SCRATCH_BASE + 0x100; + let thread_id_ptr = SCRATCH_BASE + 0x108; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[4] = 0x10000; // stack_size + ctx.gpr[5] = thread_id_ptr as u64; + ctx.gpr[6] = 0; // xapi_startup + ctx.gpr[7] = 0x8200_1000; // start_address + ctx.gpr[8] = 0; // start_context + ctx.gpr[9] = 0; // creation_flags (not suspended, affinity = 0) + ex_create_thread(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS, "ExCreateThread must succeed"); + let handle = mem.read_u32(handle_ptr); + assert_eq!( + state.handle_refcount.get(&handle).copied(), + Some(2), + "ExCreateThread must install self-ref (refcount = creator + self = 2)" + ); + } + + /// Phase C+16: `ExTerminateThread` releases the self-reference. The + /// thread terminates from inside its own context, so we spawn a + /// worker via `ex_create_thread`, switch to its slot, and then + /// terminate. Post-terminate: refcount = 1 (creator-only, handle + /// still alive). Mirrors canary's `XThread::Exit::ReleaseHandle()` + /// at xthread.cc:524. + #[test] + fn ex_terminate_thread_releases_self_reference() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle_ptr = SCRATCH_BASE + 0x100; + let thread_id_ptr = SCRATCH_BASE + 0x108; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[4] = 0x10000; + ctx.gpr[5] = thread_id_ptr as u64; + ctx.gpr[6] = 0; + ctx.gpr[7] = 0x8200_1000; + ctx.gpr[8] = 0; + ctx.gpr[9] = 0; + ex_create_thread(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_ptr); + assert_eq!(state.handle_refcount.get(&handle).copied(), Some(2)); + + // Switch to the spawned thread's slot so `exit_current` sees it. + let r = state + .scheduler + .find_by_handle(handle) + .expect("spawned thread must be findable"); + state.scheduler.current = Some(r); + + let mut term_ctx = PpcContext::default(); + term_ctx.gpr[3] = 0; // exit_code + ex_terminate_thread(&mut term_ctx, &mem, &mut state); + + // self-ref dropped → refcount = 1 (creator still holds). + assert_eq!( + state.handle_refcount.get(&handle).copied(), + Some(1), + "ex_terminate_thread must release the self-ref" + ); + assert!( + state.objects.contains_key(&handle), + "object must survive (creator-ref still held)" + ); + } + + /// Phase C+16: end-to-end refcount lifecycle balance. Spawn → + /// user closes → thread exits → object destroyed. No leak. + #[test] + fn ex_create_then_close_then_exit_balances_refcount() { + let (mut ctx, mut mem, mut state) = fresh(); + let handle_ptr = SCRATCH_BASE + 0x100; + let thread_id_ptr = SCRATCH_BASE + 0x108; + ctx.gpr[3] = handle_ptr as u64; + ctx.gpr[4] = 0x10000; + ctx.gpr[5] = thread_id_ptr as u64; + ctx.gpr[6] = 0; + ctx.gpr[7] = 0x8200_1000; + ctx.gpr[8] = 0; + ctx.gpr[9] = 0; + ex_create_thread(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_ptr); + + // User NtClose: refcount 2 → 1, object survives. + let mut close_ctx = PpcContext::default(); + close_ctx.gpr[3] = handle as u64; + nt_close(&mut close_ctx, &mem, &mut state); + assert!(state.objects.contains_key(&handle)); + assert_eq!(state.handle_refcount.get(&handle).copied(), Some(1)); + + // Thread exits: refcount 1 → 0, object destroyed. + let r = state + .scheduler + .find_by_handle(handle) + .expect("must still be findable"); + state.scheduler.current = Some(r); + let mut term_ctx = PpcContext::default(); + term_ctx.gpr[3] = 0; + ex_terminate_thread(&mut term_ctx, &mem, &mut state); + + assert!( + !state.objects.contains_key(&handle), + "object must be destroyed at zero refcount" + ); + assert!( + !state.handle_refcount.contains_key(&handle), + "refcount entry must be scrubbed" + ); + } + + // ===== Phase C+19: NtDuplicateObject fresh-slot semantics ===== + + /// Helper: create an Event and duplicate it; return (source, dup, state). + fn create_event_and_dup( + mem: &GuestMemory, + state: &mut KernelState, + ) -> (u32, u32) { + let source = state.alloc_handle_for(KernelObject::Event { + manual_reset: false, + signaled: false, + waiters: Vec::new(), + }); + let mut ctx = PpcContext::default(); + ctx.gpr[3] = source as u64; + let out_ptr = SCRATCH_BASE + 0x100; + mem.write_u32(out_ptr, 0xDEAD_BEEF); + ctx.gpr[4] = out_ptr as u64; + ctx.gpr[5] = 0; // no DUPLICATE_CLOSE_SOURCE + nt_duplicate_object(&mut ctx, mem, state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + let dup = mem.read_u32(out_ptr); + (source, dup) + } + + /// Phase C+19: dup id is a *fresh* slot, NOT aliased to source. Mirrors + /// canary's `ObjectTable::DuplicateHandle` → `AddHandle` (object_table.cc:210). + #[test] + fn nt_duplicate_object_allocates_fresh_handle_id() { + let (_ctx, mem, mut state) = fresh(); + let (source, dup) = create_event_and_dup(&mem, &mut state); + assert_ne!(dup, source, "dup id must be distinct from source"); + assert_ne!(dup, 0, "dup id must be non-zero"); + } + + /// AUDIT-062 INVARIANT (signal-on-dup wakes wait-on-source): the dup + /// alias canonicalizes back to the source `state.objects` entry, so + /// signaling the dup mutates the same `KernelObject::Event` that the + /// source slot points at. This is THE load-bearing test — if it fails + /// the C+19 fix has broken the AUDIT-062 worker-cluster wedge. + #[test] + fn nt_duplicate_object_signal_on_dup_wakes_wait_on_source() { + let (mut ctx, mut mem, mut state) = fresh(); + let (source, dup) = create_event_and_dup(&mem, &mut state); + + // Signal via dup. + ctx.gpr[3] = dup as u64; + ctx.gpr[4] = 0; + nt_set_event(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + + // Source's event entry must show signaled=true (shared underlying). + match state.objects.get(&source) { + Some(KernelObject::Event { signaled, .. }) => { + assert!(*signaled, "source event must be signaled by dup signal"); + } + _ => panic!("source lookup must hit the canonical Event"), + } + } + + /// Symmetric: signal-on-source wakes wait-on-dup. Both lookup paths + /// canonicalize to the same entry. + #[test] + fn nt_duplicate_object_signal_on_source_visible_via_dup() { + let (mut ctx, mut mem, mut state) = fresh(); + let (source, dup) = create_event_and_dup(&mem, &mut state); + + ctx.gpr[3] = source as u64; + ctx.gpr[4] = 0; + nt_set_event(&mut ctx, &mut mem, &mut state); + + // Resolve dup → source and check signaled. + let canonical = state.resolve_handle(dup); + assert_eq!(canonical, source); + match state.objects.get(&canonical) { + Some(KernelObject::Event { signaled, .. }) => { + assert!(*signaled); + } + _ => panic!(), + } + } + + /// Refcount: both source and dup slots independently get + /// `handle_refcount = 1`. The canonical's `canonical_slot_count` rises + /// to 2 (one per slot). Mirrors canary AddHandle (one Retain per slot). + #[test] + fn nt_duplicate_object_refcount_lifecycle() { + let (_ctx, mem, mut state) = fresh(); + let (source, dup) = create_event_and_dup(&mem, &mut state); + + assert_eq!(state.handle_refcount.get(&source).copied(), Some(1)); + assert_eq!(state.handle_refcount.get(&dup).copied(), Some(1)); + assert_eq!(state.canonical_slot_count.get(&source).copied(), Some(2)); + assert_eq!(state.handle_aliases.get(&dup).copied(), Some(source)); + } + + /// Close the dup first: dup slot is gone, source slot remains, underlying + /// object remains. Symmetric to canary's per-slot `RemoveHandle` (the + /// underlying XObject survives until the last slot is gone). + #[test] + fn nt_duplicate_object_then_close_dup_keeps_source_live() { + let (_ctx, mem, mut state) = fresh(); + let (source, dup) = create_event_and_dup(&mem, &mut state); + + let mut close_ctx = PpcContext::default(); + close_ctx.gpr[3] = dup as u64; + nt_close(&mut close_ctx, &mem, &mut state); + + assert!(!state.handle_refcount.contains_key(&dup)); + assert!(!state.handle_aliases.contains_key(&dup)); + assert!(state.objects.contains_key(&source)); + assert_eq!(state.handle_refcount.get(&source).copied(), Some(1)); + assert_eq!(state.canonical_slot_count.get(&source).copied(), Some(1)); + } + + /// Close source first: source slot is gone, dup slot remains, and + /// crucially the underlying object remains so the dup can still be + /// used. Sister of the above. + #[test] + fn nt_duplicate_object_then_close_source_keeps_dup_live() { + let (_ctx, mem, mut state) = fresh(); + let (source, dup) = create_event_and_dup(&mem, &mut state); + + let mut close_ctx = PpcContext::default(); + close_ctx.gpr[3] = source as u64; + nt_close(&mut close_ctx, &mem, &mut state); + + assert!(!state.handle_refcount.contains_key(&source)); + // Underlying object survives (canonical entry alive through dup slot). + assert!(state.objects.contains_key(&source)); + // Dup still points at it. + assert_eq!(state.resolve_handle(dup), source); + // Slot count down to 1 (just the dup). + assert_eq!(state.canonical_slot_count.get(&source).copied(), Some(1)); + + // Signal through dup still works. + let mut set_ctx = PpcContext::default(); + let mut mem = mem; + set_ctx.gpr[3] = dup as u64; + set_ctx.gpr[4] = 0; + nt_set_event(&mut set_ctx, &mut mem, &mut state); + match state.objects.get(&source) { + Some(KernelObject::Event { signaled, .. }) => assert!(*signaled), + _ => panic!(), + } + } + + /// Final close on the last surviving slot drops the canonical object. + #[test] + fn nt_duplicate_object_close_both_destroys_underlying() { + let (_ctx, mem, mut state) = fresh(); + let (source, dup) = create_event_and_dup(&mem, &mut state); + + let mut close_dup = PpcContext::default(); + close_dup.gpr[3] = dup as u64; + nt_close(&mut close_dup, &mem, &mut state); + + let mut close_src = PpcContext::default(); + close_src.gpr[3] = source as u64; + nt_close(&mut close_src, &mem, &mut state); + + assert!(!state.objects.contains_key(&source)); + assert!(!state.handle_refcount.contains_key(&source)); + assert!(!state.canonical_slot_count.contains_key(&source)); + } + + /// DUPLICATE_CLOSE_SOURCE: dup happens AND source is closed atomically. + /// Net result: dup is live, source is gone. + #[test] + fn nt_duplicate_object_with_close_source_flag() { + let (mut ctx, mut mem, mut state) = fresh(); + let source = state.alloc_handle_for(KernelObject::Event { + manual_reset: false, + signaled: false, + waiters: Vec::new(), + }); + + let out_ptr = SCRATCH_BASE + 0x200; + mem.write_u32(out_ptr, 0); + ctx.gpr[3] = source as u64; + ctx.gpr[4] = out_ptr as u64; + ctx.gpr[5] = 0x1; // DUPLICATE_CLOSE_SOURCE + nt_duplicate_object(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + + let dup = mem.read_u32(out_ptr); + assert_ne!(dup, source); + + // Source slot scrubbed. + assert!(!state.handle_refcount.contains_key(&source)); + // But the canonical object is still alive through dup. + assert!(state.objects.contains_key(&source)); + // Slot count is exactly 1 (the dup). + assert_eq!(state.canonical_slot_count.get(&source).copied(), Some(1)); + // Dup alias points at canonical. + assert_eq!(state.resolve_handle(dup), source); + } + + /// Invalid source handle: STATUS_INVALID_HANDLE + zero write to out_ptr. + #[test] + fn nt_duplicate_object_invalid_handle_returns_invalid_handle() { + let (mut ctx, mut mem, mut state) = fresh(); + let out_ptr = SCRATCH_BASE + 0x300; + mem.write_u32(out_ptr, 0xCAFE_BABE); + ctx.gpr[3] = 0x9999 as u64; // bogus + ctx.gpr[4] = out_ptr as u64; + ctx.gpr[5] = 0; + nt_duplicate_object(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_INVALID_HANDLE); + assert_eq!(mem.read_u32(out_ptr), 0); + } + + /// Double-dup: dup of a dup canonicalizes to the original source. + /// Mirrors canary's `LookupObject(TranslateHandle(handle), false)` which + /// resolves through nested dups by hitting the same `XObject*`. + #[test] + fn nt_duplicate_object_dup_of_dup_canonicalizes() { + let (_ctx, mem, mut state) = fresh(); + let (source, dup1) = create_event_and_dup(&mem, &mut state); + + // Now dup the dup. + let mut ctx = PpcContext::default(); + ctx.gpr[3] = dup1 as u64; + let out_ptr = SCRATCH_BASE + 0x400; + mem.write_u32(out_ptr, 0); + ctx.gpr[4] = out_ptr as u64; + ctx.gpr[5] = 0; + nt_duplicate_object(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], STATUS_SUCCESS); + let dup2 = mem.read_u32(out_ptr); + + assert_ne!(dup2, source); + assert_ne!(dup2, dup1); + // All three resolve to the same canonical source. + assert_eq!(state.resolve_handle(dup1), source); + assert_eq!(state.resolve_handle(dup2), source); + // Slot count reflects 3 live slots. + assert_eq!(state.canonical_slot_count.get(&source).copied(), Some(3)); + } + + /// Aliased dup with non-Event kernel objects also works. Mirrors + /// canary's `XObject::Type` codes (Event/Mutant/Semaphore/...). + #[test] + fn nt_duplicate_object_works_for_semaphore() { + let (_ctx, mem, mut state) = fresh(); + let source = state.alloc_handle_for(KernelObject::Semaphore { + count: 3, + max: 10, + waiters: Vec::new(), + }); + let mut ctx = PpcContext::default(); + ctx.gpr[3] = source as u64; + let out_ptr = SCRATCH_BASE + 0x600; + mem.write_u32(out_ptr, 0); + ctx.gpr[4] = out_ptr as u64; + ctx.gpr[5] = 0; + nt_duplicate_object(&mut ctx, &mem, &mut state); + + let dup = mem.read_u32(out_ptr); + assert_ne!(dup, source); + assert_eq!(state.resolve_handle(dup), source); + // Underlying count unchanged. + match state.objects.get(&source) { + Some(KernelObject::Semaphore { count, max, .. }) => { + assert_eq!(*count, 3); + assert_eq!(*max, 10); + } + _ => panic!(), + } + } + + /// Phase W: ensure `VdInitializeEngines` writes `r3=1` (canary's + /// literal return value, not `STATUS_SUCCESS=0`). Anchored on the + /// helper directly so the registration is exercised end-to-end via + /// a separate code-path check (no need to actually issue the import + /// call). The `// canary returns 1` invariant is the entirety of + /// the fix. + #[test] + fn vd_initialize_engines_returns_one() { + let (mut ctx, mem, mut state) = fresh(); + ctx.gpr[3] = 0xDEAD_BEEF; // sentinel — must be overwritten + stub_return_one(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 1, "stub_return_one must put 1 in r3"); + } + + /// Phase C+23: pin `VdQueryVideoFlags` at canary-equivalent `0x3`. + /// Canary's bitmask is `(is_widescreen ? 1 : 0) | (width>=1280 ? 2 : 0) + /// | (width>=1920 ? 4 : 0)`. With ours's `vd_query_video_mode` reporting + /// `is_widescreen=1` and `display_width=1280` (and no Full HD bit), + /// the canary-equivalent flags value is `1 | 2 = 3`. + #[test] + fn vd_query_video_flags_returns_three() { + let (mut ctx, mem, mut state) = fresh(); + ctx.gpr[3] = 0xDEAD_BEEF; // sentinel — must be overwritten + vd_query_video_flags(&mut ctx, &mem, &mut state); + assert_eq!( + ctx.gpr[3], 0x3, + "VdQueryVideoFlags must return canary-equivalent bitmask 0x3 \ + (is_widescreen | width>=1280)" + ); + } + + /// Cross-check: the value must agree with what ours's + /// `vd_query_video_mode` reports — otherwise the bitmask and the + /// underlying mode struct disagree, which would break games that + /// cross-check the two. The flags should equal: + /// (is_widescreen ? 1 : 0) | (width>=1280 ? 2 : 0) | (width>=1920 ? 4 : 0) + /// evaluated over the values vd_query_video_mode actually writes. + #[test] + fn vd_query_video_flags_matches_vd_query_video_mode_payload() { + let (mut ctx, mem, mut state) = fresh(); + // Allocate a scratch page for the mode struct. + let mode_ptr = SCRATCH_BASE; + ctx.gpr[3] = mode_ptr as u64; + vd_query_video_mode(&mut ctx, &mem, &mut state); + let display_width = mem.read_u32(mode_ptr); + let is_widescreen = mem.read_u32(mode_ptr + 12); + let expected = (if is_widescreen != 0 { 1 } else { 0 }) + | (if display_width >= 1280 { 2 } else { 0 }) + | (if display_width >= 1920 { 4 } else { 0 }); + + ctx.gpr[3] = 0xDEAD_BEEF; + vd_query_video_flags(&mut ctx, &mem, &mut state); + assert_eq!( + ctx.gpr[3], expected, + "VdQueryVideoFlags must equal the bitmask computed from \ + VdQueryVideoMode's payload" + ); + } + + // ---- review-a Step 1 crowbar ----------------------------------------- + + /// The crowbar must: + /// (a) allocate a ctx page, + /// (b) write vtable BASE 0x8200A1E8 at +0, self at +4/+8, refcount=1 at +12, + /// (c) spawn 4 threads at the canonical entries, + /// (d) resume each of them (post-spawn `suspend_count == 0`). + /// + /// Test-setup wart: `fresh()` hard-codes the initial test thread's + /// handle to `0x1000` (which equals `next_handle`'s initial value), + /// so without intervention the first crowbar spawn would collide + /// with that handle. Bump `next_handle` past `0x1000` here to + /// mirror production, where the main thread's handle is itself + /// minted via `alloc_handle_for`. + #[test] + fn crowbar_force_spawn_workers_spawns_and_resumes_4() { + let (_ctx, mem, mut state) = fresh(); + // Reserve a handle slot to push next_handle past 0x1000. + let _ = state.alloc_handle(); + let resumed = crowbar_force_spawn_workers(&mut state, &mem); + assert_eq!(resumed, 4, "all 4 workers must resume on a fresh kernel"); + + // Find each thread by entry + verify start_context matches the + // ctx we wrote and that all 4 share one ctx address. + let entries: std::collections::HashSet = [ + 0x82506528, 0x82506558, 0x82506588, 0x825065B8, + ] + .into_iter() + .collect(); + let mut seen_entries: std::collections::HashSet = + std::collections::HashSet::new(); + let mut ctx_addrs: std::collections::HashSet = + std::collections::HashSet::new(); + for (hw_id, slot) in state.scheduler.slots.iter().enumerate() { + for (idx, t) in slot.runqueue.iter().enumerate() { + if entries.contains(&t.ctx.pc) { + seen_entries.insert(t.ctx.pc); + ctx_addrs.insert(t.ctx.gpr[3] as u32); + assert_eq!( + t.suspend_count, 0, + "crowbar must leave each worker resumed (suspend_count=0) — \ + entry={:#010x} hw={} idx={} state={:?}", + t.ctx.pc, hw_id, idx, t.state, + ); + } + } + } + assert_eq!(seen_entries, entries, "all 4 entries must be present in scheduler"); + assert_eq!(ctx_addrs.len(), 1, "all 4 workers must share one ctx_ptr"); + + // Verify ctx layout: vtable base + self + self + refcount. + let ctx_ptr = *ctx_addrs.iter().next().expect("one ctx"); + assert_eq!(mem.read_u32(ctx_ptr), 0x8200_A1E8, "vtable BASE at ctx+0"); + assert_eq!(mem.read_u32(ctx_ptr + 4), ctx_ptr, "self at ctx+4"); + assert_eq!(mem.read_u32(ctx_ptr + 8), ctx_ptr, "self at ctx+8"); + assert_eq!(mem.read_u32(ctx_ptr + 12), 1, "refcount=1 at ctx+12"); + } + + /// `try_fire_crowbar_workers` is no-op when the cvar is disabled. + #[test] + fn try_fire_crowbar_workers_noop_when_disabled() { + let (_ctx, mem, mut state) = fresh(); + let pre_thread_count: usize = state + .scheduler + .slots + .iter() + .map(|s| s.runqueue.len()) + .sum(); + // Cvar default-off. + let resumed = state.try_fire_crowbar_workers(&mem, u64::MAX); + assert_eq!(resumed, 0); + let post: usize = state + .scheduler + .slots + .iter() + .map(|s| s.runqueue.len()) + .sum(); + assert_eq!(pre_thread_count, post, "no threads spawned when disabled"); + assert!(!state.crowbar_workers_fired, "latch stays unset"); + } + + /// Trigger threshold gates the fire — below threshold = no-op, at/over + /// threshold = fires exactly once. + #[test] + fn try_fire_crowbar_workers_respects_threshold_and_latches_once() { + let (_ctx, mem, mut state) = fresh(); + // See `fresh()` test-setup wart in the spawn test above. + let _ = state.alloc_handle(); + state.crowbar_workers_enabled = true; + state.crowbar_workers_trigger_instr = 1_000; + + // Below threshold — no fire. + assert_eq!(state.try_fire_crowbar_workers(&mem, 999), 0); + assert!(!state.crowbar_workers_fired); + + // At threshold — fires. + assert_eq!(state.try_fire_crowbar_workers(&mem, 1_000), 4); + assert!(state.crowbar_workers_fired); + + // Subsequent call — latched, returns 0. + assert_eq!(state.try_fire_crowbar_workers(&mem, u64::MAX), 0); + } } diff --git a/crates/xenia-kernel/src/lib.rs b/crates/xenia-kernel/src/lib.rs index 13a15d0..998c25c 100644 --- a/crates/xenia-kernel/src/lib.rs +++ b/crates/xenia-kernel/src/lib.rs @@ -1,8 +1,11 @@ pub mod audit; +pub mod contention_manifest; +pub mod event_log; pub mod exports; pub mod interrupts; pub mod objects; pub mod path; +pub mod phase_b_snapshot; pub mod state; pub mod thread; pub mod ui_bridge; diff --git a/crates/xenia-kernel/src/objects.rs b/crates/xenia-kernel/src/objects.rs index 2d6754a..8f9cc04 100644 --- a/crates/xenia-kernel/src/objects.rs +++ b/crates/xenia-kernel/src/objects.rs @@ -109,4 +109,20 @@ impl KernelObject { KernelObject::File { .. } => None, } } + + /// Phase C+15-α: schema-v1 object-type code (see schema-v1.md + /// `Object type codes` table). Used by `event_log::semantic_id` to + /// compute cross-engine handle identity. Both engines must agree on + /// this mapping. + pub fn schema_object_type(&self) -> u32 { + match self { + KernelObject::Event { .. } => crate::event_log::object_type::EVENT, + KernelObject::Mutex { .. } => crate::event_log::object_type::MUTANT, + KernelObject::Semaphore { .. } => crate::event_log::object_type::SEMAPHORE, + KernelObject::Timer { .. } => crate::event_log::object_type::TIMER, + KernelObject::Thread { .. } => crate::event_log::object_type::THREAD, + KernelObject::File { .. } => crate::event_log::object_type::FILE, + KernelObject::NotifyListener { .. } => crate::event_log::object_type::NOTIFICATION, + } + } } diff --git a/crates/xenia-kernel/src/path.rs b/crates/xenia-kernel/src/path.rs index edb7e7d..4f0ed8c 100644 --- a/crates/xenia-kernel/src/path.rs +++ b/crates/xenia-kernel/src/path.rs @@ -100,6 +100,48 @@ pub fn object_attributes_to_vfs_path(mem: &GuestMemory, obj_attrs_ptr: u32) -> O Some(normalize_path(&raw)) } +/// Phase C+10 schema-v1 extension helper: read the OBJECT_ATTRIBUTES +/// struct at `obj_attrs_ptr` and return the **raw** path string (trimmed +/// of leading/trailing whitespace, NO prefix-strip / case-fold). The +/// emitter wants the exact bytes the guest passed so the Phase A diff +/// surfaces upstream divergences (e.g. canary calls with one prefix / +/// ours with another) rather than masking them via normalization. +pub fn object_attributes_raw_name(mem: &GuestMemory, obj_attrs_ptr: u32) -> Option { + let raw = read_object_attributes_name(mem, obj_attrs_ptr)?; + if raw.is_empty() { + return None; + } + Some(raw.trim().to_string()) +} + +/// Phase C+11 schema-v1 extension helper: read the rename target +/// path from a `NtSetInformationFile` class-10 (`XFileRenameInformation`) +/// info buffer. Returns the raw (un-normalized) path string for emitter +/// use; null when the buffer is too small or the inner ANSI_STRING is +/// empty. +/// +/// Layout per canary `info/file.h:79-83`: +/// offset 0: be replace_existing +/// offset 4: be root_dir_handle +/// offset 8: X_ANSI_STRING (u16 Length, u16 MaximumLength, u32 Buffer) +/// (16 bytes total — caller is expected to check `info_length >= 16` +/// before invoking.) +pub fn file_rename_information_raw_target( + mem: &GuestMemory, + info_ptr: u32, + info_length: u32, +) -> Option { + if info_ptr == 0 || info_length < 16 { + return None; + } + // The ANSI_STRING lives at offset 8 inside the rename-info struct. + let raw = read_ansi_string(mem, info_ptr + 8)?; + if raw.is_empty() { + return None; + } + Some(raw.trim().to_string()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/xenia-kernel/src/phase_b_snapshot.rs b/crates/xenia-kernel/src/phase_b_snapshot.rs new file mode 100644 index 0000000..0b85666 --- /dev/null +++ b/crates/xenia-kernel/src/phase_b_snapshot.rs @@ -0,0 +1,849 @@ +//! Phase B initial-state snapshot. Cvar-gated (default off). +//! +//! Fires once, immediately before the first guest PPC instruction of the +//! XEX entry_point executes. Writes a five-file structured state snapshot +//! under `/ours/` plus a `manifest.json` indexing them by SHA-256. +//! +//! Spec: `xenia-rs/audit-runs/phase-b-state-equivalence/`. +//! +//! Zero cost when `KernelState::phase_b_snapshot_dir == None`. The single +//! hot-path check in `worker_prologue` is one Option-tag test. + +use std::collections::BTreeMap; +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; + +use serde_json::{json, Map, Value}; +use sha2::{Digest, Sha256}; + +use xenia_cpu::PpcContext; +use xenia_memory::page_table::AllocationState; +use xenia_memory::GuestMemory; + +use crate::objects::KernelObject; +use crate::state::{KernelState, ModuleId}; + +const SCHEMA_VERSION: u32 = 1; +const ENGINE: &str = "ours"; + +static CLAIMED: AtomicBool = AtomicBool::new(false); +static DONE: AtomicBool = AtomicBool::new(false); + +/// FNV-1a 64-bit identity for kernel objects. Mirrors canary's +/// `phase_b_snapshot.cc::StableObjectId`. +fn stable_object_id(type_code: u32, raw_handle: u32) -> u64 { + let mut bytes = [0u8; 8]; + bytes[..4].copy_from_slice(&type_code.to_le_bytes()); + bytes[4..].copy_from_slice(&raw_handle.to_le_bytes()); + let mut h: u64 = 0xCBF29CE484222325; + for b in bytes { + h ^= b as u64; + h = h.wrapping_mul(0x100000001B3); + } + h +} + +fn sha256_hex(data: &[u8]) -> String { + let mut h = Sha256::new(); + h.update(data); + let out = h.finalize(); + let mut s = String::with_capacity(64); + for b in out { + s.push_str(&format!("{:02x}", b)); + } + s +} + +fn hex32(v: u32) -> String { + format!("0x{:08x}", v) +} +fn hex64(v: u64) -> String { + format!("0x{:016x}", v) +} + +fn type_code(o: &KernelObject) -> u32 { + match o { + KernelObject::Event { .. } => 0x01, + KernelObject::Mutex { .. } => 0x02, + KernelObject::Semaphore { .. } => 0x03, + KernelObject::Timer { .. } => 0x04, + KernelObject::Thread { .. } => 0x05, + KernelObject::File { .. } => 0x06, + KernelObject::NotifyListener { .. } => 0x0B, + } +} + +fn type_name(o: &KernelObject) -> &'static str { + match o { + KernelObject::Event { .. } => "Event", + KernelObject::Mutex { .. } => "Mutex", + KernelObject::Semaphore { .. } => "Semaphore", + KernelObject::Timer { .. } => "Timer", + KernelObject::Thread { .. } => "Thread", + KernelObject::File { .. } => "File", + KernelObject::NotifyListener { .. } => "NotifyListener", + } +} + +/// Serialize a `serde_json::Value` to a sort-keys, 2-space-indent UTF-8 +/// string. Used for byte-deterministic output regardless of HashMap +/// iteration order on the construction side — `Map` is +/// backed by a `BTreeMap` here so sorting is implicit. +fn serialize_sorted(v: &Value) -> String { + fn walk(v: &Value, out: &mut String, indent: usize) { + let pad = |out: &mut String, n: usize| { + for _ in 0..n { + out.push_str(" "); + } + }; + match v { + Value::Null => out.push_str("null"), + Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }), + Value::Number(n) => out.push_str(&n.to_string()), + Value::String(s) => { + out.push('"'); + for c in s.chars() { + match c { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + c if (c as u32) < 0x20 => { + out.push_str(&format!("\\u{:04x}", c as u32)) + } + c => out.push(c), + } + } + out.push('"'); + } + Value::Array(a) => { + if a.is_empty() { + out.push_str("[]"); + return; + } + out.push_str("[\n"); + let n = a.len(); + for (i, item) in a.iter().enumerate() { + pad(out, indent + 1); + walk(item, out, indent + 1); + if i + 1 < n { + out.push(','); + } + out.push('\n'); + } + pad(out, indent); + out.push(']'); + } + Value::Object(m) => { + if m.is_empty() { + out.push_str("{}"); + return; + } + let mut keys: Vec<&String> = m.keys().collect(); + keys.sort(); + out.push_str("{\n"); + let n = keys.len(); + for (i, k) in keys.iter().enumerate() { + pad(out, indent + 1); + out.push('"'); + out.push_str(k); + out.push_str("\": "); + walk(&m[*k], out, indent + 1); + if i + 1 < n { + out.push(','); + } + out.push('\n'); + } + pad(out, indent); + out.push('}'); + } + } + } + let mut s = String::new(); + walk(v, &mut s, 0); + s.push('\n'); + s +} + +fn write_file(path: &Path, body: &str) -> std::io::Result { + let mut f = File::create(path)?; + f.write_all(body.as_bytes())?; + f.flush()?; + f.sync_all()?; + Ok(sha256_hex(body.as_bytes())) +} + +// ---------- cpu_state.json ---------- + +fn build_cpu_state(ctx: &PpcContext, entry_pc: u32, current_tid: u32) -> Value { + let mut o = Map::new(); + o.insert("schema_version".into(), json!(SCHEMA_VERSION)); + o.insert("engine".into(), json!(ENGINE)); + o.insert("pc".into(), json!(hex32(entry_pc))); + o.insert("lr".into(), json!(hex64(ctx.lr))); + o.insert("ctr".into(), json!(hex64(ctx.ctr))); + o.insert("msr".into(), json!(hex64(ctx.msr))); + o.insert("vrsave".into(), json!(hex32(ctx.vrsave))); + o.insert("fpscr".into(), json!(hex32(ctx.fpscr))); + let xer = json!({ + "ca": ctx.xer_ca as u32, + "ov": ctx.xer_ov as u32, + "so": ctx.xer_so as u32, + "tbc": ctx.xer_tbc as u32, + }); + o.insert("xer".into(), xer); + let cr_arr: Vec = (0..8) + .map(|i| { + let val = ((ctx.cr() >> (28 - i * 4)) & 0xF) as u8; + json!(format!("0x{:x}", val)) + }) + .collect(); + o.insert("cr".into(), Value::Array(cr_arr)); + let gpr: Vec = ctx.gpr.iter().map(|&v| json!(hex64(v))).collect(); + o.insert("gpr".into(), Value::Array(gpr)); + let fpr: Vec = ctx.fpr.iter().map(|&v| json!(hex64(v.to_bits()))).collect(); + o.insert("fpr".into(), Value::Array(fpr)); + let vr: Vec = ctx + .vr + .iter() + .map(|v| { + let mut s = String::with_capacity(32); + for b in &v.bytes { + s.push_str(&format!("{:02x}", b)); + } + json!(s) + }) + .collect(); + o.insert("vr".into(), Value::Array(vr)); + let mut vscr_s = String::with_capacity(32); + for b in &ctx.vscr.bytes { + vscr_s.push_str(&format!("{:02x}", b)); + } + o.insert("vscr".into(), json!(vscr_s)); + o.insert("thread_id".into(), json!(current_tid)); + o.insert("hw_id".into(), json!(ctx.hw_id as u32)); + o.insert("stack_base".into(), json!(hex32(0))); + o.insert("stack_limit".into(), json!(hex32(0))); + o.insert("tls_base".into(), json!(hex32(0))); + o.insert("pcr_base".into(), json!(hex32(ctx.gpr[13] as u32))); + o.insert("deterministic_skip".into(), json!(["hw_id"])); + Value::Object(o) +} + +// ---------- memory.json ---------- + +struct Region { + start: u32, + end: u32, + protect_bits: u32, + sha256: String, +} + +fn walk_committed_regions(mem: &GuestMemory) -> Vec { + // Coalesce contiguous committed pages by (allocation_protect, + // current_protect, region base/count). Page table is 1 entry per + // 4 KiB across the full 4 GiB guest space. + const PAGE: u32 = 4096; + let mut regions = Vec::new(); + let mut cur_start: Option<(u32, u32)> = None; + let mut last_protect: u32 = 0; + let mut addr: u64 = 0; + while addr < 0x1_0000_0000 { + let a = addr as u32; + let entry = mem.page_entry(a); + let committed = entry + .map(|e| e.state().contains(AllocationState::COMMIT)) + .unwrap_or(false); + let protect_bits = entry + .map(|e| e.current_protect().bits()) + .unwrap_or(0); + if committed { + match cur_start { + None => { + cur_start = Some((a, a)); + last_protect = protect_bits; + } + Some((start, _end)) => { + if protect_bits == last_protect { + cur_start = Some((start, a)); + } else { + // Protection change → flush prior region. + let prev_start = start; + let prev_end = cur_start.unwrap().1 + PAGE; + let bytes = read_bytes(mem, prev_start, prev_end - prev_start); + regions.push(Region { + start: prev_start, + end: prev_end, + protect_bits: last_protect, + sha256: sha256_hex(&bytes), + }); + cur_start = Some((a, a)); + last_protect = protect_bits; + } + } + } + } else if let Some((start, end)) = cur_start.take() { + let end_addr = end + PAGE; + let bytes = read_bytes(mem, start, end_addr - start); + regions.push(Region { + start, + end: end_addr, + protect_bits: last_protect, + sha256: sha256_hex(&bytes), + }); + } + addr += PAGE as u64; + } + if let Some((start, end)) = cur_start.take() { + let end_addr = end + PAGE; + let bytes = read_bytes(mem, start, end_addr - start); + regions.push(Region { + start, + end: end_addr, + protect_bits: last_protect, + sha256: sha256_hex(&bytes), + }); + } + regions +} + +fn read_bytes(mem: &GuestMemory, start: u32, len: u32) -> Vec { + let mut v = vec![0u8; len as usize]; + let base = mem.membase(); + if base.is_null() { + return v; + } + // SAFETY: pages in [start, start+len) are confirmed committed by the + // caller; reading them is well-defined. We snapshot bytes at a + // moment when no guest thread is executing. + unsafe { + let src = base.add(start as usize); + std::ptr::copy_nonoverlapping(src, v.as_mut_ptr(), len as usize); + } + v +} + +fn build_memory( + state: &KernelState, + mem: &GuestMemory, + dump_section_content: bool, +) -> Value { + let mut o = Map::new(); + o.insert("schema_version".into(), json!(SCHEMA_VERSION)); + o.insert("engine".into(), json!(ENGINE)); + o.insert("page_size".into(), json!(4096)); + o.insert("guest_address_space_bytes".into(), json!(0x1_0000_0000u64)); + + // Named regions: XEX image, main thread stack, PCR, TLS. Mirrors + // canary's BuildMemory exactly so the diff tool compares positional + // entries one-to-one. + let mut regions = Vec::new(); + let hash_region = |start: u32, len: u32| -> Region { + let bytes = read_bytes(mem, start, len); + Region { + start, + end: start + len, + protect_bits: 0, + sha256: sha256_hex(&bytes), + } + }; + if state.image_base != 0 { + // Walk forward while mapped — bounded by 64 MiB to avoid runaway + // if is_mapped is loose with COMMIT semantics. + let mut size: u32 = 0; + let mut a = state.image_base; + let limit = state.image_base.saturating_add(64 * 1024 * 1024); + while a < limit && mem.is_mapped(a) { + size = size.wrapping_add(4096); + let next = a.wrapping_add(4096); + if next < a { + break; + } + a = next; + } + if size != 0 { + regions.push(hash_region(state.image_base, size)); + } + } + // Stack/PCR/TLS — derived from the entry thread's GuestThread. + if let Some(r) = state.scheduler.current { + let th = state.scheduler.thread(r); + if th.stack_size > 0 && th.stack_base >= th.stack_size { + // stack_base in ours is the LOW address (stack grows down from + // stack_base + stack_size). Hash the full alloc'd range. + regions.push(hash_region(th.stack_base, th.stack_size)); + } + if th.pcr_base != 0 { + regions.push(hash_region(th.pcr_base, 0x1000)); + } + if th.tls_base != 0 { + regions.push(hash_region(th.tls_base, 0x1000)); + } + } + regions.sort_by_key(|r| (r.start, r.end)); + + let mut committed_pages: u64 = 0; + let mut regions_json = Vec::new(); + for r in ®ions { + committed_pages += ((r.end - r.start) / 4096) as u64; + let mut rm = Map::new(); + rm.insert("start".into(), json!(hex32(r.start))); + rm.insert("end".into(), json!(hex32(r.end))); + rm.insert("byte_count".into(), json!(r.end - r.start)); + rm.insert("protect".into(), json!(r.protect_bits)); + rm.insert("sha256".into(), json!(r.sha256)); + rm.insert("section_kind".into(), Value::Null); + regions_json.push(Value::Object(rm)); + } + o.insert("regions".into(), Value::Array(regions_json)); + o.insert("committed_pages_total".into(), json!(committed_pages)); + + // Note: a full page-table walk inventory was removed — the + // `mem.page_entry(addr).state().contains(COMMIT)` check returns + // true for some addresses whose underlying host pages aren't + // backed (likely due to interactions with reserved-vs-committed + // bookkeeping during early bring-up). Reading via raw pointer at + // those addresses faults. Phase B's named-regions list above + // captures the equivalence-relevant memory anyway. + o.insert("regions_walked".into(), Value::Array(Vec::new())); + + // Single synthetic heap descriptor — ours doesn't model canary's + // heap split; the diff tool sorts heaps by `base` so a single-heap + // engine vs N-heap engine is itself a σ-class observation captured + // by the diff. Mirror canary's heap descriptors: 4 entries. + let heap_bases = [0x0000_0000u32, 0x4000_0000, 0x8000_0000, 0x9000_0000]; + let mut heaps = Vec::new(); + for base in heap_bases { + let mut heap = Map::new(); + heap.insert("name".into(), json!(format!("v{:08x}", base))); + heap.insert("base".into(), json!(hex32(base))); + heap.insert("size".into(), json!(hex32(0x4000_0000))); + heap.insert("page_size".into(), json!(4096)); + let mut hist = Map::new(); + // Crude: count committed pages within this heap by sampling + // `is_mapped` across the range. O(heap_size / PAGE) — bounded. + let mut committed: u64 = 0; + let mut addr = base; + let end = base.saturating_add(0x4000_0000); + while addr < end { + if mem.is_mapped(addr) { + committed += 1; + } + let next = addr.wrapping_add(4096); + if next < addr { + break; + } + addr = next; + } + hist.insert("committed".into(), json!(committed)); + heap.insert("page_state_histogram".into(), Value::Object(hist)); + heaps.push(Value::Object(heap)); + } + o.insert("heaps".into(), Value::Array(heaps)); + + if dump_section_content { + let secs: Vec = regions + .iter() + .map(|r| { + json!({ + "start": hex32(r.start), + "end": hex32(r.end), + "sha256": r.sha256.clone(), + "content_b64": "", + }) + }) + .collect(); + o.insert("section_contents".into(), Value::Array(secs)); + } else { + o.insert("section_contents".into(), Value::Null); + } + o.insert("deterministic_skip".into(), json!(["host_base_pointer"])); + Value::Object(o) +} + +// ---------- kernel.json ---------- + +fn build_kernel(state: &KernelState, entry_pc: u32) -> Value { + let mut o = Map::new(); + o.insert("schema_version".into(), json!(SCHEMA_VERSION)); + o.insert("engine".into(), json!(ENGINE)); + + let mut entries: Vec<(u64, Value)> = Vec::new(); + for (handle, obj) in &state.objects { + let tc = type_code(obj); + let sid = stable_object_id(tc, *handle); + let mut e = Map::new(); + e.insert( + "handle_semantic_id".into(), + json!(format!("{:016x}", sid)), + ); + e.insert("raw_handle_id".into(), json!(hex32(*handle))); + e.insert("type".into(), json!(type_name(obj))); + e.insert("type_code".into(), json!(tc)); + e.insert("name".into(), Value::Null); + let mut details = Map::new(); + if let KernelObject::Thread { id, hw_id, exit_code, .. } = obj { + details.insert("thread_id".into(), json!(*id)); + details.insert( + "is_entry_thread".into(), + json!(*id == xenia_cpu::scheduler::INITIAL_GUEST_TID), + ); + details.insert("hw_id".into(), json!(hw_id.map(|v| v as u32))); + details.insert("exit_code".into(), json!(*exit_code)); + details.insert( + "entry_pc".into(), + json!(hex32(if *id == xenia_cpu::scheduler::INITIAL_GUEST_TID { + entry_pc + } else { + 0 + })), + ); + } + e.insert("details".into(), Value::Object(details)); + entries.push((sid, Value::Object(e))); + } + entries.sort_by_key(|(s, _)| *s); + let objs: Vec = entries.into_iter().map(|(_, v)| v).collect(); + o.insert("objects".into(), Value::Array(objs)); + + o.insert("handle_name_table".into(), json!([])); + o.insert("notification_listeners".into(), json!([])); + + // Exports — list module/ord/name for every registered handler, hash + // the canonical sorted "!" list. KernelState doesn't + // expose `exports` publicly; we walk the published export-name + // accessor for each (module, ordinal) we know about. As a pragmatic + // shortcut: emit the count via `KernelState::export_name` probes. + // For the diff this is informational; the sha256 over the sorted + // name list is the canonical comparison key. + let mut export_names: Vec = Vec::new(); + for module in &[ModuleId::Xboxkrnl, ModuleId::Xam, ModuleId::Xbdm] { + let module_str = match module { + ModuleId::Xboxkrnl => "xboxkrnl.exe", + ModuleId::Xam => "xam.xex", + ModuleId::Xbdm => "xbdm.xex", + }; + for ord in 1..=0x1000u32 { + if let Some(name) = state.export_name(*module, ord) { + export_names.push(format!("{}!{}", module_str, name)); + } + } + } + export_names.sort(); + let joined = export_names.join("\n"); + let sha = sha256_hex(joined.as_bytes()); + let sample: Vec = export_names.iter().take(32).map(|n| json!(n)).collect(); + o.insert( + "exports_registered_count".into(), + json!(export_names.len() as u64), + ); + o.insert("exports_registered_sha256".into(), json!(sha)); + o.insert("exports_registered_sample".into(), Value::Array(sample)); + + o.insert( + "deterministic_skip".into(), + json!(["raw_handle_id", "exports_registered_count"]), + ); + Value::Object(o) +} + +// ---------- vfs.json ---------- + +fn build_vfs(state: &KernelState) -> Value { + let mut o = Map::new(); + o.insert("schema_version".into(), json!(SCHEMA_VERSION)); + o.insert("engine".into(), json!(ENGINE)); + + // Canonical probe set — same order as canary's, sorted alphabetically + // so the diff tool can compare positionally. + let mut probe_paths: Vec<&str> = vec![ + "\\Device\\Cdrom0", + "\\Device\\Cdrom0\\default.xex", + "\\Device\\Cdrom0\\dat", + "\\Device\\Cdrom0\\dat\\movie", + "\\Device\\Cdrom0\\dat\\movie\\opening.bik", + "game:\\default.xex", + "game:\\dat", + "cache:\\", + "cache:\\nonexistent_probe", + "\\Device\\HardDisk0\\Partition1", + ]; + probe_paths.sort(); + let mut probes = Vec::new(); + for p in &probe_paths { + let (resolved, is_dir, size) = probe_vfs(state, p); + probes.push(json!({ + "path": p, + "resolved": resolved, + "is_directory": is_dir, + "size": size, + })); + } + o.insert("resolve_path_probes".into(), Value::Array(probes)); + + o.insert( + "mounted_devices_observed_count".into(), + json!(state.vfs.is_some() as u32), + ); + + // cache_root_listing — recursive walk of cache_root if set. Empty + // post-AUDIT-038-wipe. + let listing = if let Some(root) = &state.cache_root { + walk_cache_root(root) + } else { + Vec::new() + }; + o.insert("cache_root_listing".into(), Value::Array(listing)); + + o.insert("deterministic_skip".into(), json!(["host_path_realpath"])); + Value::Object(o) +} + +fn probe_vfs(state: &KernelState, path: &str) -> (bool, Option, Option) { + let Some(vfs) = state.vfs.as_ref() else { + return (false, None, None); + }; + // Strip leading device prefix for ours's API (relative paths only). + let normalized = if let Some(stripped) = path.strip_prefix("\\Device\\Cdrom0\\") { + stripped + } else if let Some(stripped) = path.strip_prefix("game:\\") { + stripped + } else if path == "\\Device\\Cdrom0" || path == "game:\\" || path == "cache:\\" { + // Root listing. + match vfs.list_root() { + Ok(_) => return (true, Some(true), None), + Err(_) => return (false, None, None), + } + } else { + return (false, None, None); + }; + match vfs.stat(normalized) { + Ok(entry) => (true, Some(entry.is_directory), Some(entry.size)), + Err(_) => (false, None, None), + } +} + +fn walk_cache_root(root: &Path) -> Vec { + fn walk(root: &Path, dir: &Path, out: &mut Vec) { + let Ok(entries) = std::fs::read_dir(dir) else { + return; + }; + for entry in entries.flatten() { + let p = entry.path(); + if p.is_dir() { + walk(root, &p, out); + } else if let Ok(bytes) = std::fs::read(&p) { + let rel = p.strip_prefix(root).unwrap_or(&p).to_string_lossy().to_string(); + out.push(json!({ + "relpath": rel, + "size": bytes.len() as u64, + "sha256": sha256_hex(&bytes), + })); + } + } + } + let mut out = Vec::new(); + walk(root, root, &mut out); + out.sort_by(|a, b| a["relpath"].as_str().cmp(&b["relpath"].as_str())); + out +} + +// ---------- config.json ---------- + +fn build_config(state: &KernelState, mem: &GuestMemory, entry_pc: u32) -> Value { + let mut o = Map::new(); + o.insert("schema_version".into(), json!(SCHEMA_VERSION)); + o.insert("engine".into(), json!(ENGINE)); + o.insert("build_id".into(), json!("ours-phaseB")); + o.insert("iso_path".into(), json!("")); + o.insert("xex_entry_point".into(), json!(hex32(entry_pc))); + o.insert("xex_image_base".into(), json!(hex32(state.image_base))); + + // image_size: walk forward from image_base until we hit an uncommitted + // page. This matches canary's XexModule::image_size() semantics + // closely enough for an entry-point snapshot. + let mut image_size: u32 = 0; + let mut a = state.image_base; + while mem.is_mapped(a) { + image_size = image_size.wrapping_add(4096); + let next = a.wrapping_add(4096); + if next < a { + break; + } + a = next; + } + o.insert("xex_image_size".into(), json!(image_size)); + + let image_bytes = read_bytes(mem, state.image_base, image_size); + o.insert( + "image_loaded_sha256".into(), + json!(sha256_hex(&image_bytes)), + ); + o.insert( + "xex_header_sha256".into(), + json!(String::from("0").repeat(64)), + ); + + let mut cvars = Map::new(); + cvars.insert( + "phase_b_snapshot_dir".into(), + json!(state + .phase_b_snapshot_dir + .as_ref() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_default()), + ); + cvars.insert( + "phase_b_snapshot_and_exit".into(), + json!(state.phase_b_snapshot_and_exit), + ); + cvars.insert( + "phase_b_dump_section_content".into(), + json!(state.phase_b_dump_section_content), + ); + o.insert("cvars".into(), Value::Object(cvars)); + + o.insert("host_ns_at_snapshot".into(), json!(0u64)); + o.insert("wall_clock_iso8601".into(), json!("epoch:0")); + o.insert( + "deterministic_skip".into(), + json!([ + "host_ns_at_snapshot", + "wall_clock_iso8601", + "build_id", + "iso_path", + "cvars.phase_b_snapshot_dir" + ]), + ); + Value::Object(o) +} + +// ---------- orchestrator ---------- + +/// Called from `worker_prologue` once per slot visit. Cheap no-op when +/// `phase_b_snapshot_dir == None` (the common case). +pub fn fire_if_entry_thread( + state: &mut KernelState, + mem: &GuestMemory, + pc: u32, + current_tid: u32, +) { + // Hot fast path — empty Option is the default. + let Some(dir) = state.phase_b_snapshot_dir.clone() else { + return; + }; + if DONE.load(Ordering::Acquire) { + return; + } + if pc != state.entry_pc || current_tid != xenia_cpu::scheduler::INITIAL_GUEST_TID { + return; + } + if CLAIMED + .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) + .is_err() + { + return; + } + write_snapshot(state, mem, &dir, pc, current_tid); + DONE.store(true, Ordering::Release); + if state.phase_b_snapshot_and_exit { + // Use libc::_exit so we skip Rust dtors (and the cleanup ordering + // that comes with them). All snapshot files have been + // fsync()'d in write_file, so the on-disk state is durable. + unsafe { + libc::_exit(0); + } + } +} + +fn write_snapshot( + state: &KernelState, + mem: &GuestMemory, + dir: &Path, + entry_pc: u32, + current_tid: u32, +) { + let engine_dir: PathBuf = dir.join("ours"); + if let Err(e) = std::fs::create_dir_all(&engine_dir) { + tracing::warn!( + "phase_b_snapshot: failed to create {:?}: {} — snapshot aborted", + engine_dir, + e + ); + return; + } + + let ctx = state + .scheduler + .current_hw_id() + .map(|hw| state.scheduler.ctx(hw)); + let cpu = match ctx { + Some(ctx) => build_cpu_state(ctx, entry_pc, current_tid), + None => { + tracing::warn!("phase_b_snapshot: no current ctx; aborting"); + return; + } + }; + + let memv = build_memory(state, mem, state.phase_b_dump_section_content); + let kern = build_kernel(state, entry_pc); + let vfs = build_vfs(state); + let cfg = build_config(state, mem, entry_pc); + + let mut hashes: BTreeMap = BTreeMap::new(); + for (name, value) in [ + ("cpu_state.json", &cpu), + ("memory.json", &memv), + ("kernel.json", &kern), + ("vfs.json", &vfs), + ("config.json", &cfg), + ] { + let body = serialize_sorted(value); + match write_file(&engine_dir.join(name), &body) { + Ok(h) => { + hashes.insert(name.to_string(), h); + } + Err(e) => { + tracing::warn!("phase_b_snapshot: write {} failed: {}", name, e); + } + } + } + + let mut manifest_files = Map::new(); + for (k, v) in &hashes { + manifest_files.insert(k.clone(), json!(v)); + } + let manifest = json!({ + "schema_version": SCHEMA_VERSION, + "engine": ENGINE, + "files": Value::Object(manifest_files), + }); + let body = serialize_sorted(&manifest); + let _ = write_file(&engine_dir.join("manifest.json"), &body); + + // Phase C: when dump_section_content is on, write raw bytes of the + // XEX image region to /image.bin. This is the only + // region positionally matched between canary and ours, so it's the + // only one suitable for byte-level diff. + if state.phase_b_dump_section_content && state.image_base != 0 { + let mut sz: u32 = 0; + let mut a = state.image_base; + while mem.is_mapped(a) { + sz = sz.wrapping_add(4096); + let next = a.wrapping_add(4096); + if next < a { + break; + } + a = next; + } + if sz > 0 { + let bytes = read_bytes(mem, state.image_base, sz); + if let Err(e) = std::fs::write(engine_dir.join("image.bin"), &bytes) { + tracing::warn!("phase_b_snapshot: image.bin write failed: {}", e); + } + } + } +} diff --git a/crates/xenia-kernel/src/state.rs b/crates/xenia-kernel/src/state.rs index b256fe7..4f01105 100644 --- a/crates/xenia-kernel/src/state.rs +++ b/crates/xenia-kernel/src/state.rs @@ -47,9 +47,138 @@ pub enum ModuleId { pub const HMODULE_XBOXKRNL: u32 = 0xFFFE_0001; pub const HMODULE_XAM: u32 = 0xFFFE_0002; +/// Phase C+12 — mirrors a single `xe::vfs::Entry` for the `cache:` mount. +/// Stored in [`KernelState::cache_entries`] keyed by the normalized guest +/// path (forward-slashed; see `crate::path::normalize_path`). +/// +/// Field semantics match canary's `xe::vfs::Entry` +/// (`xenia-canary/src/xenia/vfs/entry.h:67-95`): +/// +/// * `is_directory` — true for directories (Xbox attribute 0x10), +/// false for regular files (Xbox attribute 0x80). +/// * `size` — `entry->size()` (bytes; 0 for directories). +/// * `allocation_size`— `entry->allocation_size()` = +/// `round_up(size, bytes_per_sector)`. Canary's +/// `HostPathEntry::Create` uses +/// `device->bytes_per_sector()` which defaults to +/// 512 (`Device::bytes_per_sector_` ctor default; +/// cache: is a writable host-path device, no +/// override). We match that. +/// * `create_time` / `access_time` / `write_time` — Windows FILETIME +/// (100ns ticks since 1601-01-01 UTC). Populated +/// from `xe::filesystem::FileInfo::{create, +/// access, write}_timestamp` on canary +/// (`filesystem_win.cc:226-228`); on our Linux +/// host we derive the equivalent FILETIME from +/// `std::fs::Metadata::{created, accessed, +/// modified}` via [`unix_to_filetime`]. `change_ +/// time` (the fourth FILETIME canary writes via +/// `entry->write_timestamp()`, +/// `xboxkrnl_io.cc:504`) reuses `write_time`. +#[derive(Debug, Clone)] +pub struct CacheEntryMeta { + pub is_directory: bool, + pub size: u64, + pub allocation_size: u64, + pub create_time: u64, + pub access_time: u64, + pub write_time: u64, +} + +/// Phase C+12 — convert a [`std::time::SystemTime`] to a Windows FILETIME +/// value (100-ns ticks since 1601-01-01 UTC). Matches what canary's +/// Windows build emits via `COMBINE_TIME(ftCreationTime)` in +/// `xenia-canary/src/xenia/base/filesystem_win.cc:226`. +/// +/// Conversion: Unix epoch = 1970-01-01 UTC. The Windows epoch is +/// 1601-01-01 UTC, which is `11_644_473_600` seconds earlier. +/// +/// Pre-1970 inputs (rare on Linux, but `created()` can return them on +/// filesystems that lack a creation-time stamp) are clamped to 0, +/// which canary itself emits when the win32 `FILETIME` is zero — safer +/// than wrapping arithmetic. +pub fn unix_to_filetime(t: std::time::SystemTime) -> u64 { + const UNIX_TO_WINDOWS_EPOCH_SECS: u64 = 11_644_473_600; + match t.duration_since(std::time::UNIX_EPOCH) { + Ok(d) => { + let secs = d.as_secs(); + let nanos = d.subsec_nanos() as u64; + secs.saturating_add(UNIX_TO_WINDOWS_EPOCH_SECS) + .saturating_mul(10_000_000) + .saturating_add(nanos / 100) + } + Err(_) => 0, + } +} + +/// Phase C+12 — build a [`CacheEntryMeta`] from a host-FS metadata +/// snapshot. Mirrors `HostPathEntry::Create` +/// (`xenia-canary/src/xenia/vfs/devices/host_path_entry.cc:32-54`): +/// directory → attribute 0x10, size 0; file → attribute 0x80, size +/// from metadata, `allocation_size` rounded up to a 512-byte sector. +/// The `cache:` device is read-write so we never set the READONLY bit. +pub fn cache_entry_from_metadata(md: &std::fs::Metadata) -> CacheEntryMeta { + let is_directory = md.is_dir(); + let size = if is_directory { 0 } else { md.len() }; + let allocation_size = if is_directory { + 0 + } else { + // bytes_per_sector = 512 default (canary `Device::Device`). + (size + 511) & !511 + }; + let create_time = md + .created() + .map(unix_to_filetime) + .unwrap_or_else(|_| md.modified().map(unix_to_filetime).unwrap_or(0)); + let access_time = md.accessed().map(unix_to_filetime).unwrap_or(0); + let write_time = md.modified().map(unix_to_filetime).unwrap_or(0); + CacheEntryMeta { + is_directory, + size, + allocation_size, + create_time, + access_time, + write_time, + } +} + +/// Phase C+12 — `FILE_ATTRIBUTE_*` constants (NT semantics, Xbox 360 +/// uses the same bitmask as Windows for `X_FILE_NETWORK_OPEN_ +/// INFORMATION::attributes`). Source: +/// `xenia-canary/src/xenia/vfs/entry.h:67-73`. +pub const X_FILE_ATTRIBUTE_DIRECTORY: u32 = 0x0010; +pub const X_FILE_ATTRIBUTE_NORMAL: u32 = 0x0080; + /// Central kernel state tracking all guest OS state. pub struct KernelState { exports: HashMap<(ModuleId, u32), (&'static str, KernelExportFn)>, + /// Phase A: kernel exports whose canary signature is `void` (no + /// dword_result_t / pointer_result_t). For symmetry with canary's + /// `if constexpr (std::is_void::value)` trampoline branch + /// (see `xenia-canary/src/xenia/kernel/util/shim_utils.h`), the + /// Phase A `kernel.return` event for these exports emits + /// `return_value=0` instead of `gpr[3]` (which for void fns is + /// just the input arg pointer left untouched). Without this, + /// e.g. `KeQuerySystemTime` — declared `void` in canary, taking a + /// `lpqword_t time_ptr` — would report ours's r3=time_ptr but + /// canary's literal 0, producing a spurious diff. Cvar-OFF inert. + void_exports: std::collections::HashSet<(ModuleId, u32)>, + /// Phase C+6: kernel exports that have a table-entry in canary's + /// `xboxkrnl_table.inc` but NO `DECLARE_XBOXKRNL_EXPORT` / shim + /// implementation. Canary wires such imports to the syscall thunk + /// (`sc 2; blr`) which does NOT call any `Trampoline` and therefore + /// emits NO Phase A events (see `xenia-canary/src/xenia/cpu/ + /// xex_module.cc:1316-1335` and `ppc_frontend.cc:83-92`). For ours + /// to match canary's event stream, we must skip + /// `import.call`/`kernel.call`/`kernel.return` emission for these + /// exports even though we still execute their stub body (typically + /// `stub_success` setting `r3=0`). Without this, every guest call + /// to e.g. `IoDismountVolumeByFileHandle` injects 3 spurious events + /// into ours's Phase A stream while canary's stays silent — causing + /// per-call alignment drift downstream. Cvar-OFF inert (this flag + /// is consumed only inside the Phase A `phase_a_on` guard in + /// `call_export`). + unimplemented_exports: std::collections::HashSet<(ModuleId, u32)>, /// M2.4: bump allocator for kernel handles. `AtomicU32` so concurrent /// HLE calls under M3 can `fetch_add` without a lock. `Relaxed` is /// fine — the allocated value is a fresh ID with no prior payload to @@ -70,11 +199,31 @@ pub struct KernelState { pub cs_waiters: HashMap>, /// Kernel object table: handle → object pub objects: HashMap, + /// Phase C+5 — set of file handles opened WITHOUT + /// `FILE_SYNCHRONOUS_IO_ALERT` (0x10) or `FILE_SYNCHRONOUS_IO_NONALERT` + /// (0x20). Canary's `NtWriteFile_entry` + /// (xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc:351-353) + /// completes such writes synchronously but returns `STATUS_PENDING` + /// (0x103) instead of `STATUS_SUCCESS`. Mirrors `xfile.is_synchronous_` + /// in canary (xfile.h:177, xfile.cc:22). Populated by `open_vfs_file` + /// and `open_cache_file`; pruned by `nt_close` when the handle's + /// refcount drops to zero. + pub async_file_handles: std::collections::HashSet, /// Bump allocator for guest heap (NtAllocateVirtualMemory etc.). /// M2.4: `AtomicU32` for lock-free concurrent allocation. pub heap_cursor: std::sync::atomic::AtomicU32, /// Stack allocator cursor for MmCreateKernelStack. M2.4: atomic. pub stack_cursor: std::sync::atomic::AtomicU32, + /// Iterate 2.H — top-down bump allocator for the canary `vA0000000` + /// physical heap (0xA0000000-0xBFFFFFFF, 64KB pages). This bucket + /// services `MmAllocatePhysicalMemoryEx` requests that pass + /// `X_MEM_LARGE_PAGES` (0x20000000) in `protect_bits` — matching + /// canary's `LookupHeapByType(true, 64*1024) -> heaps_.vA0000000` + /// (xenia-canary memory.cc:269-271, xboxkrnl_memory.cc:454-455). + /// Cursor is the top-exclusive frontier: each alloc decrements first, + /// then allocates `[cursor, cursor+aligned_size)`. Initialized to + /// `0xC000_0000`. + pub physical_heap_cursor: std::sync::atomic::AtomicU32, /// GPU command buffer address (set by VdGetSystemCommandBuffer) pub gpu_command_buffer: u32, /// GPU backend. M1.4: was `xenia_gpu::GpuSystem` directly, now a @@ -91,6 +240,29 @@ pub struct KernelState { pub last_input_bytes: u128, /// Image base of the loaded XEX (for XexExecutableModuleHandle etc.) pub image_base: u32, + /// Guest VA of the raw XEX header bytes copied into guest memory at + /// startup (mirrors canary's `UserModule::guest_xex_header_`, + /// allocated in `user_module.cc:224`). Used by `RtlImageXexHeaderField` + /// to compute return values that are offsets into the in-guest header + /// copy (canary's `xboxkrnl_rtl.cc:501-514` calls `UserModule::Get + /// OptHeader(memory, header, key, &field_value)` which iterates + /// `header->headers[]` and returns `HostToGuestVirtual(header) + + /// opt_header.offset` for "else"-class keys, key low byte != 0/1). Zero + /// when the executable hasn't been installed yet. Set once by + /// `xenia-app` after `mem.write_bulk(base, &image_data)`. + pub xex_header_guest_ptr: u32, + /// Guest VA of the 0x18-byte `X_TIME_STAMP_BUNDLE` block referenced by + /// the `KeTimeStampBundle` (ord 0x00AD) variable export. Layout matches + /// canary's `kernel_state.h:98-104`: + /// +0x00 u64 interrupt_time (100-ns ticks since boot) + /// +0x08 u64 system_time (100-ns ticks, Windows FILETIME epoch) + /// +0x10 u32 tick_count (monotonic milliseconds since boot) + /// +0x14 u32 padding + /// Zero before the patcher allocates it. Stashed so the host-side 1 ms + /// repeating updater (spawned in `xenia-app`) can find the block. + /// Mirrors canary's `HighResolutionTimer::CreateRepeating(1 ms, + /// UpdateKeTimestampBundle)` at `kernel_state.cc:1272-1295`. + pub ke_timestamp_bundle_ptr: u32, /// `XEX_HEADER_SYSTEM_FLAGS` (key `0x00030000`) parsed from the loaded /// XEX header. Queried by `XexCheckExecutablePrivilege`: privilege bit /// `n` is set iff `(xex_system_flags & (1 << n)) != 0`. Zero before the @@ -123,6 +295,31 @@ pub struct KernelState { /// at startup; cleared at the same time so lockstep digests stay /// reproducible across reruns. pub cache_root: Option, + /// Phase C+12 — in-memory VFS entry tracker for the `cache:` mount, + /// mirroring canary's `HostPathDevice` entry tree. Keyed by the + /// normalized guest path (e.g. `cache:/d4ea4615/e/46ee8ca`, + /// post-`normalize_path` form with forward slashes). Populated at + /// mount time by [`Self::populate_cache_entries`] (analogue of + /// canary's `HostPathDevice::PopulateEntry`, + /// `xenia-canary/src/xenia/vfs/devices/host_path_device.cc:63`) and + /// per-NtCreateFile success by [`Self::register_cache_entry`] + /// (analogue of `Entry::CreateEntry` / + /// `HostPathEntry::CreateEntryInternal`, + /// `xenia-canary/src/xenia/vfs/devices/host_path_entry.cc:78`). + /// + /// Consulted by `nt_query_full_attributes_file` BEFORE any + /// `std::fs::metadata` host-FS call, mirroring canary's + /// `NtQueryFullAttributesFile_entry` + /// (`xenia-canary/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc:498-512`) + /// which only walks the in-memory entry tree via + /// `VirtualFileSystem::ResolvePath` and never re-stats the host. + /// + /// This resolves Phase C+11.1's main-chain divergence at idx + /// 102404 (NtQueryFullAttributesFile on `cache:\d4ea4615\e\46ee8ca`) + /// where canary's mount-time scan + in-memory tree allowed the + /// probe to succeed even before the file existed on disk this + /// boot, while ours's direct `std::fs::metadata` reported NOT_FOUND. + pub cache_entries: HashMap, /// Bridge to the host UI. `None` when running headless. Installed by /// `cmd_exec` when the user passes `--ui`. pub ui: Option, @@ -162,6 +359,38 @@ pub struct KernelState { /// decremented in `nt_close` which drops the underlying object only /// when the count reaches zero. pub handle_refcount: HashMap, + /// Phase C+19: alias map from duplicated handle id → canonical + /// (source) handle id. `NtDuplicateObject` mirrors canary's + /// `ObjectTable::DuplicateHandle` (object_table.cc:210) by + /// allocating a fresh slot id, but the underlying kernel object + /// (`KernelObject::Event`, `Semaphore`, etc.) stays a single + /// instance keyed in `state.objects` by the canonical id. Whenever + /// the guest passes a handle to an Nt*/Ke* call, `resolve_handle` + /// canonicalizes through this map before indexing `state.objects`. + /// + /// Why: AUDIT-062 (worker-cluster wedge resolution) depends on + /// "signal on dup wakes wait on source". With aliasing we relied on + /// `dup_id == source_id`; with a fresh dup id we rely on both ids + /// canonicalizing to the same `state.objects` entry, preserving the + /// shared `waiters` list and `signaled` flag. + /// + /// Lifecycle: inserted in `nt_duplicate_object`; entries point at + /// the *current* canonical id which never changes once minted (we + /// don't transitively rewrite aliases on partial close). When all + /// slot ids referring to a canonical have been closed, + /// `canonical_slot_count` reaches zero and the canonical entry in + /// `state.objects` is removed. + pub handle_aliases: HashMap, + /// Phase C+19: number of live handle slots referring to a canonical + /// kernel object. Bumped at `alloc_handle_for` (1 = the source + /// slot) and at `nt_duplicate_object` (each fresh dup slot). On + /// `nt_close` of any slot id `h` whose `handle_refcount[h]` + /// reaches zero, this counter for `resolve_handle(h)` is + /// decremented; if it reaches zero, `state.objects[canonical]` is + /// removed. Mirrors canary's `XObject::handle_ref_count_` (xobject.cc:73) + /// where every `AddHandle` retains and every final `RemoveHandle` + /// releases — the object dies only when the last slot is gone. + pub canonical_slot_count: HashMap, /// Pending timer expirations — `(deadline, handle)` sorted ascending by /// deadline. Pushed by `arm_timer`, popped by `fire_due_timers`. Kept in /// lockstep with the per-`Timer` object's `deadline` field via the @@ -264,6 +493,50 @@ pub struct KernelState { pub dump_addrs: Vec, /// `--dump-section=BASE:LEN:PATH` end-of-run snapshot, page-gated by `is_mapped`. pub dump_section: Option<(u32, u32, std::path::PathBuf)>, + /// Phase B initial-state snapshot — directory under which a + /// `ours/{cpu_state,memory,kernel,vfs,config}.json` + `manifest.json` + /// snapshot is written at the moment immediately before the first + /// guest PPC instruction of the XEX entry_point. `None` (default) = + /// disabled, zero overhead. See + /// `xenia-rs/audit-runs/phase-b-state-equivalence/`. + pub phase_b_snapshot_dir: Option, + /// Phase B: after writing the snapshot, exit the process immediately + /// so re-runs are byte-deterministic. Default false. + pub phase_b_snapshot_and_exit: bool, + /// Phase B: include raw bytes in `memory.json`'s `section_contents`. + /// Default false — per-region SHA-256 is enough for the routine diff. + pub phase_b_dump_section_content: bool, + /// Phase B: the XEX entry_point address — captured by the app at + /// `install_initial_thread` time and consulted by the snapshot hook + /// to validate the firing thread is the entry thread. + pub entry_pc: u32, + /// Phase D Stage 3: optional contention-replay manifest. When + /// loaded (typically via `XENIA_CONTENTION_MANIFEST_PATH`), + /// `rtl_enter_critical_section` consults it before its fast-path + /// claim and forces a park whenever the manifest says canary saw + /// real contention at the same `(tid, tid_event_idx)`. `None` = + /// disabled, zero overhead. The manifest itself is read-only after + /// load except for the per-entry `consume` removal, which is a + /// fast HashMap::remove behind a Mutex. + pub contention_manifest: + Option>, + + /// review-a Step 1 crowbar — when `crowbar_workers_enabled` is set + /// (via `--force-spawn-workers` / `XENIA_CROWBAR_WORKERS=1`), the + /// per-round helper `try_fire_crowbar_workers` synthesises the 4 + /// `sub_825070F0` worker spawns once `instruction_count` crosses + /// `crowbar_workers_trigger_instr` (default 20_000_000). Default + /// OFF: zero behaviour change in normal runs. See + /// `xenia-rs/audit-runs/review-a-step1-crowbar/investigation.md`. + pub crowbar_workers_enabled: bool, + /// Instruction-count threshold for the one-shot crowbar fire. Picked + /// to land well after the 10-thread initial spawn burst and the + /// boot-init `VdSwap`, but with plenty of head-room for the workers + /// to execute before any reasonable `-n` cap. + pub crowbar_workers_trigger_instr: u64, + /// Latch — flipped to `true` on the first successful crowbar fire so + /// the helper is at-most-once. Read-only after the flip. + pub crowbar_workers_fired: bool, } impl KernelState { @@ -288,18 +561,26 @@ impl KernelState { scheduler.set_reservation_table(Some(reservations.clone())); let mut state = Self { exports: HashMap::new(), + void_exports: std::collections::HashSet::new(), + unimplemented_exports: std::collections::HashSet::new(), next_handle: AtomicU32::new(0x1000), scheduler, next_tls_index: AtomicU32::new(0), cs_waiters: HashMap::new(), objects: HashMap::new(), + async_file_handles: std::collections::HashSet::new(), heap_cursor: AtomicU32::new(0x4000_0000), // Start of user heap region stack_cursor: AtomicU32::new(0x7100_0000), // Above main stack + // Iterate 2.H: top-exclusive cursor for the vA0000000 physical + // heap. Decrements down toward 0xA0000000 (the bucket floor). + physical_heap_cursor: AtomicU32::new(0xC000_0000), gpu_command_buffer: 0, gpu, input_packet_number: 0, last_input_bytes: 0, image_base: 0, + xex_header_guest_ptr: 0, + ke_timestamp_bundle_ptr: 0, xex_system_flags: 0, xex_priv_logged: std::collections::HashSet::new(), has_notified_startup: false, @@ -307,6 +588,7 @@ impl KernelState { next_thread_id: AtomicU32::new(1), vfs: None, cache_root: None, + cache_entries: HashMap::new(), ui: None, interrupts: crate::interrupts::InterruptState::default(), xaudio: crate::xaudio::XAudioState::default(), @@ -316,6 +598,8 @@ impl KernelState { // random-victim selection corrupted unrelated thread state. xaudio_tick_enabled: true, handle_refcount: HashMap::new(), + handle_aliases: HashMap::new(), + canonical_slot_count: HashMap::new(), pending_timer_fires: Vec::new(), audit: HandleAudit::default(), reservations, @@ -331,6 +615,14 @@ impl KernelState { lr_trace_writer: None, dump_addrs: Vec::new(), dump_section: None, + phase_b_snapshot_dir: None, + phase_b_snapshot_and_exit: false, + phase_b_dump_section_content: false, + entry_pc: 0, + contention_manifest: None, + crowbar_workers_enabled: false, + crowbar_workers_trigger_instr: 20_000_000, + crowbar_workers_fired: false, }; crate::exports::register_exports(&mut state); crate::xam::register_exports(&mut state); @@ -358,6 +650,16 @@ impl KernelState { e ); } + // Phase C+12 — eager mount-time entry-tree population mirrors + // canary's `HostPathDevice::PopulateEntry` recursion + // (`xenia-canary/src/xenia/vfs/devices/host_path_device.cc:63`). + // After the (optional) wipe, the on-disk tree is the source of + // truth; `nt_query_full_attributes_file` will consult the + // in-memory table built here before any host-FS round-trip. + if state.cache_root.is_some() { + let root_clone = state.cache_root.clone().unwrap(); + state.populate_cache_entries_from_host(&root_clone); + } state } @@ -377,6 +679,42 @@ impl KernelState { self.exports.insert((module, ordinal), (name, func)); } + /// Register a kernel export whose canary signature is `void`. + /// See `KernelState::void_exports` doc. Identical semantics to + /// `register_export` except the Phase A `kernel.return` payload's + /// `return_value` field is emitted as 0 instead of `gpr[3]`, + /// matching canary's `EmitReturn(name, 0)` branch. + pub fn register_void_export( + &mut self, + module: ModuleId, + ordinal: u32, + name: &'static str, + func: KernelExportFn, + ) { + self.exports.insert((module, ordinal), (name, func)); + self.void_exports.insert((module, ordinal)); + } + + /// Phase C+6: register a kernel export that has a table-entry in + /// canary's `xboxkrnl_table.inc` but NO `DECLARE_XBOXKRNL_EXPORT` + /// shim. Identical execution semantics to `register_export`; only + /// difference is the Phase A emitter is silent for this export (to + /// mirror canary's syscall-thunk path which never reaches the + /// `Trampoline` that issues `import.call`/`kernel.call`/ + /// `kernel.return`). See `KernelState::unimplemented_exports` doc. + /// Use for ords whose `func` is a `stub_*` and which would + /// otherwise inject spurious Phase A alignment drift. + pub fn register_unimplemented_export( + &mut self, + module: ModuleId, + ordinal: u32, + name: &'static str, + func: KernelExportFn, + ) { + self.exports.insert((module, ordinal), (name, func)); + self.unimplemented_exports.insert((module, ordinal)); + } + /// AUDIT-038 — install a host directory as the backing store for the /// `cache:` mount. The directory is unconditionally cleared (and then /// re-created) on entry so two consecutive runs see byte-identical @@ -397,14 +735,164 @@ impl KernelState { } std::fs::create_dir_all(&root)?; self.cache_root = Some(root); + // Phase C+12 — wipe path: tree is by definition empty after the + // clear-then-recreate. A subsequent `set_cache_root` could be + // called by tests that want a populated tree; we leave that path + // handle the eager scan. + self.cache_entries.clear(); + // Insert the root directory entry so callers that probe + // `cache:/` directly (rare; Sylpheed does `NtOpenFile cache:\` + // at idx 102382) see canary's "yes, root is a directory" answer. + self.cache_entries.insert( + "cache:/".to_string(), + CacheEntryMeta { + is_directory: true, + size: 0, + allocation_size: 0, + create_time: 0, + access_time: 0, + write_time: 0, + }, + ); Ok(()) } /// AUDIT-054 — direct (non-wiping) cache-root install for tests /// that want byte-for-byte control over what's already on disk /// when the kernel boots. Skips the `init_cache_root` clear pass. + /// + /// Phase C+12 — this also eagerly populates [`Self::cache_entries`] + /// from the existing host-FS tree under `root`, mirroring canary's + /// `HostPathDevice::Initialize` → `PopulateEntry` + /// (`xenia-canary/src/xenia/vfs/devices/host_path_device.cc:31-48, + /// 63-75`). pub fn set_cache_root(&mut self, root: std::path::PathBuf) { - self.cache_root = Some(root); + self.cache_root = Some(root.clone()); + self.cache_entries.clear(); + self.populate_cache_entries_from_host(&root); + } + + /// Phase C+12 — eager mount-time scan. Walks `root` recursively + /// and inserts a [`CacheEntryMeta`] for every entry under the + /// `cache:/` namespace. Mirrors canary's `HostPathDevice:: + /// PopulateEntry` recursion. Errors are non-fatal (logged at + /// trace level); missing/unreadable host paths just leave the + /// in-memory tree empty for that subtree, exactly like canary + /// (which uses `ListFiles` whose `WIN32_FIND_DATA` errors silently + /// produce an empty vector). + fn populate_cache_entries_from_host(&mut self, root: &std::path::Path) { + // Always seed the device root. + self.cache_entries.insert( + "cache:/".to_string(), + CacheEntryMeta { + is_directory: true, + size: 0, + allocation_size: 0, + create_time: 0, + access_time: 0, + write_time: 0, + }, + ); + if !root.is_dir() { + return; + } + let mut stack: Vec<(std::path::PathBuf, String)> = + vec![(root.to_path_buf(), "cache:".to_string())]; + while let Some((host_dir, guest_prefix)) = stack.pop() { + let Ok(rd) = std::fs::read_dir(&host_dir) else { + continue; + }; + for entry in rd.flatten() { + let host_path = entry.path(); + let Some(name) = host_path + .file_name() + .and_then(|n| n.to_str()) + else { + continue; + }; + let guest_path = format!("{}/{}", guest_prefix, name); + let Ok(md) = entry.metadata() else { continue }; + let meta = cache_entry_from_metadata(&md); + let is_dir = meta.is_directory; + self.cache_entries.insert(guest_path.clone(), meta); + if is_dir { + stack.push((host_path, guest_path)); + } + } + } + } + + /// Phase C+12 — register / refresh a single cache-mount entry by + /// guest path (forward-slashed; matches `crate::path::normalize_path` + /// output and the keys produced by [`Self::populate_cache_entries_ + /// from_host`]). Called from [`crate::exports::open_cache_file`] + /// after a successful create-or-open so subsequent + /// `NtQueryFullAttributesFile` probes see the freshly-materialised + /// entry without re-stating the host FS, mirroring canary's + /// `Entry::CreateEntry` insert path. + /// + /// Idempotent — calling twice with the same path just refreshes + /// the cached metadata from `metadata` (useful after a write that + /// changed size / mtime). + pub fn register_cache_entry(&mut self, guest_path: &str, metadata: &std::fs::Metadata) { + let key = Self::normalize_cache_key(guest_path); + self.cache_entries + .insert(key, cache_entry_from_metadata(metadata)); + } + + /// Phase C+12 — drop a cache entry (used on NtSetInformationFile + /// rename and on delete). Idempotent. + pub fn forget_cache_entry(&mut self, guest_path: &str) { + let key = Self::normalize_cache_key(guest_path); + self.cache_entries.remove(&key); + } + + /// Phase C+12 — look up a cache entry by guest path. The lookup + /// key is case-insensitive on the `cache:` prefix (canary matches + /// device-prefix case-insensitively via + /// `xe::utf8::starts_with` against `cache:`) and forward-slashed + /// for the rest. Path-traversal `..` / `.` components and leading + /// slashes are stripped to match the canonicalization + /// [`Self::resolve_cache_path`] performs against the host FS. + pub fn lookup_cache_entry(&self, raw: &str) -> Option<&CacheEntryMeta> { + let key = Self::normalize_cache_key(raw); + self.cache_entries.get(&key) + } + + /// Canonical key form for [`Self::cache_entries`]: + /// `cache:/`. Mirrors what + /// `crate::path::normalize_path` produces (forward slashes, + /// `cache:` prefix preserved). Accepts both `cache:\foo\bar` and + /// `cache:/foo/bar`, and treats `cache0:` / `cache1:` as aliases + /// of `cache:` (same backing dir; see [`Self::resolve_cache_path`]). + fn normalize_cache_key(raw: &str) -> String { + let lower = raw.to_ascii_lowercase(); + let after_prefix = if let Some(rest) = lower + .strip_prefix("cache:\\") + .or_else(|| lower.strip_prefix("cache:/")) + { + rest + } else if let Some(rest) = lower + .strip_prefix("cache0:\\") + .or_else(|| lower.strip_prefix("cache0:/")) + .or_else(|| lower.strip_prefix("cache1:\\")) + .or_else(|| lower.strip_prefix("cache1:/")) + { + rest + } else if lower == "cache:" || lower == "cache:/" || lower == "cache:\\" { + return "cache:/".to_string(); + } else { + return lower; + }; + let clean: Vec<&str> = after_prefix + .split(|c: char| c == '/' || c == '\\') + .filter(|s| !s.is_empty() && *s != "." && *s != "..") + .collect(); + if clean.is_empty() { + "cache:/".to_string() + } else { + format!("cache:/{}", clean.join("/")) + } } /// Resolve a guest VFS path (e.g. `cache:\d4ea4615e46ee8ca.tmp`) to @@ -514,7 +1002,115 @@ impl KernelState { metrics::counter!("kernel.calls", "name" => name).increment(1); tracing::trace!(target: "probe_calls", "hw={} call={} r3={:#x} r4={:#x} r5={:#x} lr={:#x}", r.hw_id, name, ctx.gpr[3], ctx.gpr[4], ctx.gpr[5], ctx.lr); + // Phase A event log — see crates/xenia-kernel/src/event_log.rs. + // Hot path: `is_enabled` is a relaxed atomic-bool load. + // Phase C+6: exports flagged `unimplemented_exports` mirror + // canary's table-entry-without-DECLARE_XBOXKRNL_EXPORT path + // (`xenia-canary/src/xenia/cpu/xex_module.cc:1316-1335`), + // which dispatches through the syscall thunk and never + // reaches the `Trampoline` that emits Phase A events. Suppress + // event emission so ours's stream matches canary's. The stub + // body still runs. + let phase_a_on = crate::event_log::is_enabled() + && !self.unimplemented_exports.contains(&(module, ordinal)); + let (phase_a_tid, phase_a_cycle) = if phase_a_on { + let tid = self.scheduler.thread(r).tid; + let cycle = ctx.cycle_count; + (tid, cycle) + } else { + (0u32, 0u64) + }; + if phase_a_on { + let module_name = match module { + ModuleId::Xboxkrnl => "xboxkrnl.exe", + ModuleId::Xam => "xam.xex", + ModuleId::Xbdm => "xbdm.xex", + }; + crate::event_log::emit_import_call( + phase_a_tid, + phase_a_cycle, + module_name, + ordinal as u16, + name, + ); + // Phase C+10 schema-v1 extension: resolve path args for + // OBJECT_ATTRIBUTES*-taking exports so divergences on file + // existence probes carry the actual path string in the diff. + // Additive — degrades to empty args_resolved when name is + // not in the path-bearing set or resolution fails. + let resolved_path = match name { + // Path-bearing exports — argument positions per canary's + // `xboxkrnl/xboxkrnl_io.cc` signatures (verified): + // NtCreateFile (r3 = file_handle_ptr, r4 = ..., r5 = obj_attrs) + // NtOpenFile (r3 = file_handle_ptr, r4 = ..., r5 = obj_attrs) + // NtQueryFullAttributesFile (r3 = obj_attrs, r4 = file_info) + // NtOpenSymbolicLinkObject (r3 = handle_out, r4 = obj_attrs) + // Use the raw (untransformed) form to avoid masking + // upstream divergences via normalization. + "NtQueryFullAttributesFile" => { + crate::path::object_attributes_raw_name(mem, ctx.gpr[3] as u32) + } + "NtOpenSymbolicLinkObject" => { + crate::path::object_attributes_raw_name(mem, ctx.gpr[4] as u32) + } + "NtCreateFile" | "NtOpenFile" => { + crate::path::object_attributes_raw_name(mem, ctx.gpr[5] as u32) + } + // Phase C+11 — surface the rename target path for + // `NtSetInformationFile` calls with info_class==10 + // (`XFileRenameInformation`). The target is in the + // info buffer, not OBJECT_ATTRIBUTES. + // + // Calling convention (canary `xboxkrnl_io_info.cc:180`): + // r3 = handle, r4 = iosb, r5 = info_ptr, + // r6 = info_length, r7 = info_class. + "NtSetInformationFile" if ctx.gpr[7] as u32 == 10 => { + crate::path::file_rename_information_raw_target( + mem, + ctx.gpr[5] as u32, + ctx.gpr[6] as u32, + ) + } + _ => None, + }; + crate::event_log::emit_kernel_call_with_path( + phase_a_tid, + phase_a_cycle, + name, + resolved_path.as_deref(), + ); + } + let is_void = self.void_exports.contains(&(module, ordinal)); func(&mut ctx, mem, self); + if phase_a_on { + // Mirror canary's `if constexpr (std::is_void::value)` + // trampoline branch: void exports emit literal 0; non-void + // emit post-call gpr[3]. Without this, void exports that + // take a pointer arg (e.g. `KeQuerySystemTime`) would + // report ours=r3=arg_ptr vs canary=0 — a Phase A diff + // that is purely an emitter-framing asymmetry, not an + // engine semantic divergence. + // + // Phase C+11 — sign-extend the lower 32 bits to match + // canary's `ResultBase::Store` (shim_utils.h:359-361): + // `ppc_context->r[3] = uint64_t(int32_t(value_));` + // For positive-as-i32 returns (status SUCCESS, pointers + // < 0x80000000) this is a no-op. For "negative" NTSTATUS + // codes (e.g. STATUS_NO_SUCH_FILE = 0xC000000F) it + // produces 0xFFFFFFFFC000000F — matching the diff's + // expected u64 representation. + let return_value = if is_void { + 0 + } else { + (ctx.gpr[3] as u32 as i32 as i64) as u64 + }; + crate::event_log::emit_kernel_return( + phase_a_tid, + ctx.cycle_count, + name, + return_value, + ); + } true } else { metrics::counter!("kernel.unimplemented").increment(1); @@ -576,6 +1172,19 @@ impl KernelState { old_mask } + /// Phase D Stage 3: install a contention-replay manifest. Once set, + /// `rtl_enter_critical_section` consults the manifest at each call + /// and forces a park when the manifest says canary saw real + /// contention at the same `(tid, tid_event_idx)`. Idempotent — + /// calling twice replaces the previous manifest. Passing `None` + /// clears it. + pub fn install_contention_manifest( + &mut self, + manifest: Option>, + ) { + self.contention_manifest = manifest; + } + /// Install the initial (main) guest thread on HW slot 0. Called once at /// startup after the app allocates the main stack/PCR/TLS blocks. pub fn install_initial_thread( @@ -612,15 +1221,114 @@ impl KernelState { pub fn alloc_handle_for(&mut self, obj: KernelObject) -> u32 { let h = self.alloc_handle(); + // Phase C+15-α: schema-v1 `handle.create` event. Cvar-gated + // default-off via `event_log::is_enabled()`. Centralized here so + // every alloc_handle_for site (39+ call sites across exports.rs, + // xam.rs) emits a symmetric handle.create. Semantic ID is + // FNV-1a(0, tid, tid_event_idx_at_creation, object_type) — see + // schema-v1.md. Canary emits the symmetric event at + // `ObjectTable::AddHandle`. + if crate::event_log::is_enabled() { + let object_type = obj.schema_object_type(); + let (tid, cycle) = if let Some(r) = self.scheduler.current { + let t = self.scheduler.thread(r); + (t.tid, t.ctx.timebase) + } else { + (0u32, 0u64) + }; + crate::event_log::emit_handle_create_auto( + tid, + cycle, + /* create_site_pc */ 0, + object_type, + h, + /* object_name */ None, + ); + } self.objects.insert(h, obj); // Each fresh handle starts with one logical reference (the creator). // `NtDuplicateObject` bumps this; `NtClose` decrements; the object is // only dropped when the count reaches zero. See `nt_close` for the // aliased-handle rationale. self.handle_refcount.insert(h, 1); + // Phase C+19: the canonical kernel object starts with exactly one + // slot — its own. `NtDuplicateObject` bumps this every time it + // allocates a fresh dup slot; `nt_close` of a slot whose + // `handle_refcount` reaches zero decrements this and only drops + // `state.objects[h]` when all slots are gone. Mirrors canary's + // `XObject::handle_ref_count_` semantics (xobject.cc:73-87). + self.canonical_slot_count.insert(h, 1); h } + /// Phase C+19: resolve a handle id through the alias map to its + /// canonical id (the key under which `state.objects` holds the + /// underlying `KernelObject`). Idempotent for non-aliased handles — + /// `resolve_handle(h) == h` whenever `h` is a canonical id or an + /// unknown id. + /// + /// Used by every Nt*/Ke* lookup site to ensure that signal-on-dup + /// wakes wait-on-source (AUDIT-062 invariant). Cheap: single + /// `HashMap::get`. + pub fn resolve_handle(&self, h: u32) -> u32 { + self.handle_aliases.get(&h).copied().unwrap_or(h) + } + + /// Bump the per-handle refcount by one. Mirrors canary's + /// `XObject::RetainHandle()` → `ObjectTable::RetainHandle` + /// (xobject.cc:73-75, object_table.cc:218-228). Returns the new + /// refcount. Phase C+16: used by thread-spawn paths to install the + /// "thread owns itself until exited" reference (canary's + /// `XThread::Create` line 414). Without this, `XamTaskCloseHandle`'s + /// NtClose drops the only ref and destroys the thread handle while + /// the spawned thread is still live — surfaces as an extra + /// `handle.destroy` event at Phase A idx=102168 on the main chain. + /// No `handle.create` event is emitted (the handle already exists); + /// canary's symmetric path also emits no event on `RetainHandle`. + pub fn retain_handle(&mut self, handle: u32) -> u32 { + let c = self.handle_refcount.entry(handle).or_insert(0); + *c = c.saturating_add(1); + *c + } + + /// Decrement the per-handle refcount by one; if it reaches zero, drop + /// the underlying object and emit a `handle.destroy` event. Mirrors + /// canary's `XObject::ReleaseHandle()` → + /// `ObjectTable::ReleaseHandle`/`RemoveHandle` (xobject.cc:77-81, + /// object_table.cc:230-295). Returns `true` if the final reference + /// was released (object destroyed), `false` if other references + /// remain. Phase C+16: used by thread-exit paths to release the + /// self-reference installed by `retain_handle` at spawn time. + pub fn release_handle(&mut self, handle: u32) -> bool { + let prior_rc = self.handle_refcount.get(&handle).copied().unwrap_or(0); + let remaining = self + .handle_refcount + .get_mut(&handle) + .map(|c| { + *c = c.saturating_sub(1); + *c + }) + .unwrap_or(0); + if remaining == 0 { + self.objects.remove(&handle); + self.handle_refcount.remove(&handle); + self.async_file_handles.remove(&handle); + self.disarm_timer(handle); + if crate::event_log::is_enabled() { + let (tid, cycle) = if let Some(r) = self.scheduler.current { + let t = self.scheduler.thread(r); + (t.tid, t.ctx.timebase) + } else { + (0u32, 0u64) + }; + crate::event_log::emit_handle_destroy_auto(tid, cycle, handle, prior_rc); + } + true + } else { + false + } + } + // ===== Handle audit hooks ===== // // These are no-ops when `audit.enabled == false`, so call sites can @@ -860,6 +1568,37 @@ impl KernelState { idx } + /// review-a Step 1 — one-shot per-round helper to fire the + /// `--force-spawn-workers` crowbar. Returns the number of workers + /// successfully resumed (1..=4 = at least partial fire; 0 = nothing + /// happened, either because the cvar is off, the trigger hasn't been + /// reached yet, or the helper has already fired). No-op when the + /// crowbar is disabled or the latch is already set. + /// + /// Called from `xenia-app::coord_pre_round`. Honest failure modes: + /// the workers themselves may fault on bad guest memory, or block + /// further down the chain — that's the diagnostic outcome being + /// tested. + pub fn try_fire_crowbar_workers( + &mut self, + mem: &GuestMemory, + instruction_count: u64, + ) -> u32 { + if !self.crowbar_workers_enabled || self.crowbar_workers_fired { + return 0; + } + if instruction_count < self.crowbar_workers_trigger_instr { + return 0; + } + self.crowbar_workers_fired = true; + tracing::warn!( + "CROWBAR: trigger reached @ instr={} (threshold={}), firing 4 workers", + instruction_count, + self.crowbar_workers_trigger_instr, + ); + crate::exports::crowbar_force_spawn_workers(self, mem) + } + /// Allocate guest memory from the heap bump allocator. /// Returns the base address of the allocated region. pub fn heap_alloc(&mut self, size: u32, mem: &GuestMemory) -> Option { @@ -882,6 +1621,42 @@ impl KernelState { Some(base) } + /// Iterate 2.H — top-down 64KB-paged allocator for the canary + /// `vA0000000` physical heap (`0xA0000000-0xBFFFFFFF`). + /// `MmAllocatePhysicalMemoryEx` routes large-page (`X_MEM_LARGE_PAGES`, + /// 0x20000000) requests here. Returns `None` if the cursor would + /// underflow past the bucket floor (`0xA000_0000`). + pub fn physical_heap_alloc(&self, size: u32, mem: &GuestMemory) -> Option { + use std::sync::atomic::Ordering; + if size == 0 { + return None; + } + // 64KB page rounding — canary's vA0000000 heap uses 64*1024 pages. + let aligned_size = (size + 0xFFFF) & !0xFFFF; + // Top-down: subtract first, the returned base IS the new cursor. + // CAS loop preserves the lock-free invariant heap_alloc enjoys. + let base = loop { + let cur = self.physical_heap_cursor.load(Ordering::Relaxed); + let new_cur = cur.checked_sub(aligned_size)?; + if new_cur < 0xA000_0000 { + return None; + } + match self.physical_heap_cursor.compare_exchange( + cur, + new_cur, + Ordering::Relaxed, + Ordering::Relaxed, + ) { + Ok(_) => break new_cur, + Err(_) => continue, + } + }; + let protect = xenia_memory::page_table::MemoryProtect::READ + | xenia_memory::page_table::MemoryProtect::WRITE; + mem.alloc(base, aligned_size, protect).ok()?; + Some(base) + } + /// Allocate a kernel stack. pub fn stack_alloc(&mut self, size: u32, mem: &GuestMemory) -> Option { use std::sync::atomic::Ordering; @@ -1026,20 +1801,36 @@ impl Default for KernelState { } } -/// AUDIT-054 — pick the cache root path + wipe-on-init mode for a -/// fresh `KernelState`. +/// Pick the cache root path + wipe-on-init mode for a fresh +/// `KernelState`. /// -/// Default behaviour matches AUDIT-038: per-process tmpdir + full -/// wipe so two consecutive runs see byte-identical initial state -/// (lockstep / oracle determinism). AUDIT-054 found that Sylpheed's -/// `cache:\.tmp` journal-style writes append on each boot, so -/// a naive persistent root makes the on-disk state self-inconsistent -/// after the second boot (`runtime_error` throws from version-check -/// on reload). Opt-in to persistence via env: -/// * `XENIA_CACHE_ROOT=` — explicit persistent path. Caller -/// is responsible for wiping when needed. -/// * `XENIA_CACHE_PERSIST=1` — use `$XDG_DATA_HOME/xenia-rs/cache` -/// (or `$HOME/.local/share/xenia-rs/cache`) without wiping. +/// Phase C+11 (2026-05-14) — default flipped to PERSISTENT. Prior +/// AUDIT-038 behaviour (per-process tmpdir + wipe) is still +/// reachable via `XENIA_CACHE_WIPE=1`. Rationale for the flip: +/// +/// * AUDIT-052 refuted AUDIT-038's "missing-or-stale ≡ fresh" +/// premise: Sylpheed's work-submitter wakeup is GATED on cache +/// existence, so wipe-on-boot blocks the cache-build cascade. +/// * AUDIT-054 introduced opt-in `XENIA_CACHE_PERSIST=1`; the +/// Phase C+11 fixes (NtSetInformationFile class 10 rename + +/// `is_dir_open` existing-file-wins + STATUS_NO_SUCH_FILE on +/// query miss + sign-extended status returns) make +/// Sylpheed's own cache-build path converge to canary-parity +/// leaf layout. The diff harness no longer needs the wipe. +/// * The C+10 args_resolved.path emitter surfaces any cache +/// divergence in the Phase A diff regardless of cache state, +/// so the original "lockstep determinism" rationale for the +/// wipe is no longer the only mechanism preventing silent +/// cache divergences. +/// +/// Env-var contract (unchanged): +/// * `XENIA_CACHE_ROOT=` — explicit persistent path. +/// Highest precedence. No wipe. +/// * `XENIA_CACHE_PERSIST=1` — alias for the new default. Kept +/// for backwards compatibility (no-op now). +/// * `XENIA_CACHE_WIPE=1` — opt back into the AUDIT-038 +/// per-process tmpdir + wipe. Use for emergency lockstep +/// state-reset scenarios. /// /// Returns `(root, wipe)` where `wipe = true` triggers the /// `init_cache_root` clear-then-recreate dance. @@ -1049,37 +1840,55 @@ fn resolve_default_cache_root() -> (std::path::PathBuf, bool) { return (std::path::PathBuf::from(p), false); } } - let persist = std::env::var("XENIA_CACHE_PERSIST") + // Opt-out: explicit AUDIT-038-style wipe + tmpdir. Kept for + // emergency state-reset, e.g. Phase A determinism baseline + // captures that must start from a known-empty cache. + let wipe_explicit = std::env::var("XENIA_CACHE_WIPE") .map(|v| v == "1" || v.eq_ignore_ascii_case("true")) .unwrap_or(false); - if persist { - if let Ok(xdg) = std::env::var("XDG_DATA_HOME") { - if !xdg.is_empty() { - return ( - std::path::PathBuf::from(xdg).join("xenia-rs/cache"), - false, - ); - } - } - if let Ok(home) = std::env::var("HOME") { - if !home.is_empty() { - return ( - std::path::PathBuf::from(home).join(".local/share/xenia-rs/cache"), - false, - ); - } + if wipe_explicit { + static NEXT_CACHE_ID: std::sync::atomic::AtomicU64 = + std::sync::atomic::AtomicU64::new(0); + let id = NEXT_CACHE_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + return ( + std::env::temp_dir().join(format!( + "xenia-rs-cache-{}-{}", + std::process::id(), + id + )), + true, + ); + } + // Default: persistent cache at the standard XDG location. + // `XENIA_CACHE_PERSIST=1` is a no-op alias for the default + // — keep accepting it for callers that set it explicitly. + if let Ok(xdg) = std::env::var("XDG_DATA_HOME") { + if !xdg.is_empty() { + return ( + std::path::PathBuf::from(xdg).join("xenia-rs/cache"), + false, + ); } } - static NEXT_CACHE_ID: std::sync::atomic::AtomicU64 = + if let Ok(home) = std::env::var("HOME") { + if !home.is_empty() { + return ( + std::path::PathBuf::from(home).join(".local/share/xenia-rs/cache"), + false, + ); + } + } + // Final fallback: tmpdir without wipe (no $HOME, very rare). + static NEXT_CACHE_ID_FALLBACK: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0); - let id = NEXT_CACHE_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + let id = NEXT_CACHE_ID_FALLBACK.fetch_add(1, std::sync::atomic::Ordering::Relaxed); ( std::env::temp_dir().join(format!( - "xenia-rs-cache-{}-{}", + "xenia-rs-cache-fallback-{}-{}", std::process::id(), id )), - true, + false, ) } @@ -1338,6 +2147,32 @@ mod tests { } } + /// Iterate 2.H — `physical_heap_alloc` must hand back addresses in the + /// `0xA0000000-0xBFFFFFFF` range, 64KB-page-aligned, in descending + /// (top-down) order. Mirrors canary's `vA0000000` `PhysicalHeap` policy + /// (xenia-canary memory.cc:269-271 + xboxkrnl_memory.cc top_down=true). + #[test] + fn physical_heap_alloc_descends_in_va_range() { + let mem = GuestMemory::new().expect("memory init"); + let state = KernelState::new(); + let mut prev = 0xC000_0000u32; + for _ in 0..10 { + let addr = state + .physical_heap_alloc(0x1234, &mem) + .expect("physical heap must service small allocs"); + assert!( + (0xA000_0000..0xC000_0000).contains(&addr), + "phys alloc {addr:#x} outside vA0000000 range" + ); + assert_eq!(addr & 0xFFFF, 0, "phys alloc {addr:#x} not 64KB-aligned"); + assert!( + addr < prev, + "phys alloc {addr:#x} did not descend below previous {prev:#x}" + ); + prev = addr; + } + } + /// `heap_alloc(0)` must not advance the cursor (it has nothing to do). /// The kernel exports that previously hit this path did so because they /// read the wrong argument register; guarded at the export boundary now. @@ -1635,6 +2470,41 @@ mod tests { assert!(state.ctor_probe_pcs.contains(&0x8217_C850)); } + #[test] + fn register_unimplemented_export_marks_set_membership() { + // Phase C+6: `register_unimplemented_export` must (a) install the + // export func like `register_export` does, AND (b) flag the + // (module, ord) pair in `unimplemented_exports` so the Phase A + // emitter inside `call_export` can suppress events for it. Without + // (a), guest calls would fault as "unimplemented ordinal". Without + // (b), ours would inject `import.call`/`kernel.call`/ + // `kernel.return` triples that canary's syscall-thunk path never + // emits, drifting Phase A alignment. + fn noop(_: &mut PpcContext, _: &GuestMemory, _: &mut KernelState) {} + let mut state = KernelState::new(); + state.register_unimplemented_export( + ModuleId::Xboxkrnl, + 0xFFEE, + "FakeUnimplementedXboxkrnl", + noop, + ); + assert!(state.exports.contains_key(&(ModuleId::Xboxkrnl, 0xFFEE))); + assert!(state + .unimplemented_exports + .contains(&(ModuleId::Xboxkrnl, 0xFFEE))); + // A normal `register_export` must NOT mark it unimplemented. + state.register_export( + ModuleId::Xboxkrnl, + 0xFFEF, + "FakeRegularXboxkrnl", + noop, + ); + assert!(state.exports.contains_key(&(ModuleId::Xboxkrnl, 0xFFEF))); + assert!(!state + .unimplemented_exports + .contains(&(ModuleId::Xboxkrnl, 0xFFEF))); + } + #[test] fn read_ascii_cstring_handles_termination_and_garbage() { use xenia_memory::page_table::MemoryProtect; diff --git a/crates/xenia-kernel/src/xam.rs b/crates/xenia-kernel/src/xam.rs index 9950e45..f08896d 100644 --- a/crates/xenia-kernel/src/xam.rs +++ b/crates/xenia-kernel/src/xam.rs @@ -30,7 +30,7 @@ pub fn register_exports(state: &mut KernelState) { // Task state.register_export(Xam, 0x01AF, "XamTaskSchedule", xam_task_schedule); - state.register_export(Xam, 0x01B1, "XamTaskCloseHandle", stub_success); + state.register_export(Xam, 0x01B1, "XamTaskCloseHandle", xam_task_close_handle); state.register_export(Xam, 0x01B3, "XamTaskShouldExit", stub_return_zero); // Alloc @@ -56,7 +56,7 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xam, 0x0258, "XamContentCreate", stub_success); state.register_export(Xam, 0x025A, "XamContentClose", stub_success); state.register_export(Xam, 0x025B, "XamContentDelete", stub_success); - state.register_export(Xam, 0x025C, "XamContentCreateEnumerator", stub_success); + state.register_export(Xam, 0x025C, "XamContentCreateEnumerator", xam_content_create_enumerator); state.register_export(Xam, 0x025E, "XamContentGetDeviceData", stub_success); state.register_export(Xam, 0x025F, "XamContentGetDeviceName", stub_success); state.register_export(Xam, 0x0260, "XamContentSetThumbnail", stub_success); @@ -80,7 +80,10 @@ pub fn register_exports(state: &mut KernelState) { state.register_export(Xam, 0x02BC, "XamShowSigninUI", stub_success); state.register_export(Xam, 0x02C1, "XamShowKeyboardUI", stub_success); state.register_export(Xam, 0x02CB, "XamShowDeviceSelectorUI", stub_success); - state.register_export(Xam, 0x02D5, "XamShowGamerCardUIForXUID", stub_success); + // Class-E in canary (table entry only, no DECLARE_XAM_EXPORT shim) — canary's + // syscall-thunk path emits no Phase A events. Mirror via + // `register_unimplemented_export` so ours stays silent too. C+6.5-pattern fix. + state.register_unimplemented_export(Xam, 0x02D5, "XamShowGamerCardUIForXUID", stub_success); state.register_export(Xam, 0x02D9, "XamShowDirtyDiscErrorUI", stub_success); state.register_export(Xam, 0x02DC, "XamShowMessageBoxUIEx", stub_success); @@ -262,6 +265,13 @@ fn xam_task_schedule(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut Kernel if let Some(KernelObject::Thread { hw_id: slot, .. }) = state.objects.get_mut(&handle) { *slot = Some(hw_id); } + // Phase C+16: thread self-reference. See `ex_create_thread`. + // The canary path `XamTaskSchedule_entry` → `thread->Create()` + // → `RetainHandle()` (xthread.cc:414) installs this; without + // it, `XamTaskCloseHandle` → `NtClose` destroys the handle + // prematurely. This is the exact Phase A idx=102168 fix on + // the main chain. + state.retain_handle(handle); if handle_ptr != 0 { mem.write_u32(handle_ptr, handle); } @@ -284,6 +294,41 @@ fn xam_task_schedule(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut Kernel } } +/// `XamTaskCloseHandle(handle)` — release the handle minted by +/// `XamTaskSchedule`. Mirrors xenia-canary's `XamTaskCloseHandle_entry` +/// (xam_task.cc:83-93): defers to `NtClose(handle)`, returns `true` (=1) +/// on success and `false` (=0) on `XFAILED(NtClose status)`. Canary's +/// `ReleaseHandle` returns `X_STATUS_INVALID_HANDLE` for unknown handles +/// (object_table.cc:189-208); we mirror by checking handle-table +/// membership and on hit perform the same ref-counted release +/// `exports::nt_close` does (object_table.cc:194-208). Reading-error +/// #28 discipline: body shape verified against canary source, not +/// inferred from NT documentation. +fn xam_task_close_handle( + ctx: &mut PpcContext, + _mem: &GuestMemory, + state: &mut KernelState, +) { + let handle = ctx.gpr[3] as u32; + // Phase C+19: validate against the canonical slot (alias-aware) so a + // duplicated thread-task handle still passes the XFAILED check. + let canonical = state.resolve_handle(handle); + if !state.objects.contains_key(&canonical) && !state.handle_refcount.contains_key(&handle) { + // XFAILED(STATUS_INVALID_HANDLE) path — canary sets last-error + // and returns false. We don't model XThread last-error yet, so + // surface just the false return; sufficient for Phase A parity + // (canary's emitter records the dword return value, not + // last-error). + ctx.gpr[3] = 0; + return; + } + // Phase C+19: route through the shared close path so the alias map, + // slot count, async-file side-table, and handle.destroy event are all + // handled symmetrically with `nt_close`. + crate::exports::close_handle_internal(state, handle); + ctx.gpr[3] = 1; +} + // ===== Alloc ===== fn xam_alloc(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { @@ -306,20 +351,52 @@ fn xam_alloc(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { // ===== User ===== +/// Canary default profile XUID — `0xB13EBABEBABEBABE` per +/// `xenia-canary/src/xenia/kernel/xam/user_profile.cc` defaults. The +/// `0xC000000000000000` mask bits tag the XUID as a local profile, which +/// is what title 58410A1F probes via `xuid & 0x00C0000000000000` (per the +/// comment in `UserProfile::UserProfile`). Used by `XamUserGetXUID` / +/// `XamUserGetSigninInfo` / friends so signed-in slot 0 reports a real +/// id instead of zero. +pub const DEFAULT_USER_XUID: u64 = 0xB13E_BABE_BABE_BABE; + +/// Canary default gamertag. +pub const DEFAULT_USER_GAMERTAG: &str = "User"; + fn xam_user_get_xuid(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = user_index, r4 = xuid_ptr + let user_index = ctx.gpr[3] as u32; let xuid_ptr = ctx.gpr[4] as u32; + let xuid = if user_index == 0 { DEFAULT_USER_XUID } else { 0 }; if xuid_ptr != 0 { - mem.write_u64(xuid_ptr, 0); // No XUID + mem.write_u64(xuid_ptr, xuid); } ctx.gpr[3] = 0; } fn xam_user_get_name(ctx: &mut PpcContext, mem: &GuestMemory, _state: &mut KernelState) { // r3 = user_index, r4 = buffer, r5 = buffer_size + let user_index = ctx.gpr[3] as u32; let buffer = ctx.gpr[4] as u32; - if buffer != 0 { - mem.write_u8(buffer, 0); // Empty string + let buffer_size = ctx.gpr[5] as u32; + if buffer == 0 || buffer_size == 0 { + ctx.gpr[3] = 0; + return; + } + if user_index == 0 { + // Write the canary default gamertag, NUL-terminated, truncated to + // fit `buffer_size`. Canary returns the gamertag from the active + // profile; ours uses the fixed default. + let bytes = DEFAULT_USER_GAMERTAG.as_bytes(); + let n = (bytes.len() as u32).min(buffer_size.saturating_sub(1)); + for i in 0..n { + mem.write_u8(buffer + i, bytes[i as usize]); + } + mem.write_u8(buffer + n, 0); + } else { + // No profile in slots 1-3; canary's `XamUserGetName` returns + // empty string in that case. + mem.write_u8(buffer, 0); } ctx.gpr[3] = 0; } @@ -335,6 +412,104 @@ fn xam_user_get_signin_state(ctx: &mut PpcContext, _mem: &GuestMemory, _state: & ctx.gpr[3] = if user_index == 0 { 1 } else { 0 }; } +// ===== Content ===== + +/// `XamContentCreateEnumerator(user_index, device_id, content_type, +/// content_flags, items_per_enumerate, buffer_size_ptr, handle_out)`. +/// Mirrors xenia-canary `XamContentCreateEnumerator_entry` +/// (xam_content.cc:129-220). Reading-error #28 discipline applied: body +/// shape verified against canary source. +/// +/// Canary's normal-path success returns `X_ERROR_SUCCESS` (0) with a +/// fresh enumerator handle in `*handle_out`. The Phase A oracle at +/// `tid_event_idx=102197` shows canary returning `X_ERROR_NO_SUCH_USER` +/// (`0x525`, 1317) with empty side_effects — the call hit the +/// `if (!user) return X_ERROR_NO_SUCH_USER;` early-return at +/// xam_content.cc:153-155 because no profile is installed in canary's +/// default `--mute=true` config (no `--profile_slot_*` flags). +/// +/// Ours has no profile-manager state, so all `user_index != 0xFE` +/// queries miss. Mirror the early-return: write `*buffer_size_ptr` per +/// canary line 145-147 (which executes *before* the user check) and +/// return `X_ERROR_NO_SUCH_USER`. Implementing real content enumeration +/// is an XAM-content-subsystem session (escalation-tier scope), not +/// this fix. +/// +/// Side note on internal consistency: ours's `xam_user_get_signin_state` +/// returns 1 for `user_index == 0`, conflicting with the "no profile" +/// model used here. That divergence surfaces later in the Phase A trace +/// (idx 107996+) and is a separate fix — deferred per single-fix +/// discipline. +fn xam_content_create_enumerator( + ctx: &mut PpcContext, + mem: &GuestMemory, + _state: &mut KernelState, +) { + const X_USER_INDEX_NONE: u32 = 0xFE; + const X_USER_INDEX_LATEST: u32 = 0xFD; + const X_USER_MAX_USER_COUNT: u32 = 4; + const X_E_INVALIDARG: u32 = 0x8007_0057; + const X_ERROR_NO_SUCH_USER: u32 = 0x0000_0525; + const X_ERROR_SUCCESS: u32 = 0; + const X_CONTENT_DATA_SIZE: u32 = 0x134; + + let user_index = ctx.gpr[3] as u32; + let device_id = ctx.gpr[4] as u32; + let _content_type = ctx.gpr[5] as u32; + let _content_flags = ctx.gpr[6] as u32; + let items_per_enumerate = ctx.gpr[7] as u32; + let buffer_size_ptr = ctx.gpr[8] as u32; + let handle_out = ctx.gpr[9] as u32; + + // Canary xam_content.cc:135-143 — `device_id != 0 && device_info == + // nullptr` OR `!handle_out` → X_E_INVALIDARG, with + // `*buffer_size_ptr = 0` if non-null. Ours's `GetDummyDeviceInfo` + // accepts HDD (1) and ODD (2) per dummy_device_info.cc. For + // simplicity (and because Sylpheed exercises `device_id == 0` on + // first call per canary trace), accept device_id ∈ {0, 1, 2}; reject + // larger. + let device_unknown = device_id != 0 && device_id > 2; + if device_unknown || handle_out == 0 { + if buffer_size_ptr != 0 { + mem.write_u32(buffer_size_ptr, 0); + } + ctx.gpr[3] = X_E_INVALIDARG as u64; + return; + } + + // Canary line 145-147 — written *before* the user-profile check. + if buffer_size_ptr != 0 { + mem.write_u32(buffer_size_ptr, X_CONTENT_DATA_SIZE.wrapping_mul(items_per_enumerate)); + } + + // Canary line 150-158 — `if (user_index != XUserIndexNone) { user = + // GetUserProfile(user_index); if (!user) return X_ERROR_NO_SUCH_USER; + // xuid = user->xuid(); }`. Ours has no profile manager, so any + // `user_index != 0xFE` misses. Also reject indices ≥ 4 (canary's + // GetUserProfile out-of-range path returns nullptr, falling into + // the same branch). `XUserIndexLatest` (0xFD) is special-cased in + // canary's GetUserProfile but still produces nullptr without a + // profile installed. + if user_index != X_USER_INDEX_NONE { + let out_of_range = user_index >= X_USER_MAX_USER_COUNT + && user_index != X_USER_INDEX_LATEST; + let _ = out_of_range; // documentation only — both branches → no user + ctx.gpr[3] = X_ERROR_NO_SUCH_USER as u64; + return; + } + + // user_index == XUserIndexNone: canary skips profile lookup and + // proceeds to enumerator creation. With no installed content the + // enumerator init succeeds and `*handle_out` receives a fresh + // handle. We don't have an XEnumerator object model yet; return + // success with handle_out=0 as a stub. Defensive: never exercised + // in the current Phase A oracle (canary fires user_index!=0xFE). + if handle_out != 0 { + mem.write_u32(handle_out, 0); + } + ctx.gpr[3] = X_ERROR_SUCCESS as u64; +} + // ===== System ===== fn xam_get_execution_id(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { @@ -424,7 +599,8 @@ fn xam_notify_create_listener(ctx: &mut PpcContext, mem: &GuestMemory, state: &m } fn xnotify_get_next(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) { - let handle = ctx.gpr[3] as u32; + // Phase C+19: canonicalize dup ids → source. + let handle = state.resolve_handle(ctx.gpr[3] as u32); let match_id = ctx.gpr[4] as u32; let id_ptr = ctx.gpr[5] as u32; let param_ptr = ctx.gpr[6] as u32; @@ -578,6 +754,206 @@ mod tests { assert_eq!(ctx.gpr[3], 8); } + /// XamTaskCloseHandle on a valid Thread handle must release the + /// object (ref-counted) and return 1, matching canary's + /// `XamTaskCloseHandle_entry` (xam_task.cc:83-93) which delegates + /// to `NtClose` and returns `true` on `XSUCCESS`. + #[test] + fn xam_task_close_handle_valid_handle_returns_one_and_releases() { + let (mut ctx, mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::Event { + manual_reset: true, + signaled: false, + waiters: Vec::new(), + }); + // alloc_handle_for is expected to install a refcount of 1. + assert!( + state.objects.contains_key(&handle), + "fresh handle should be in object table" + ); + + ctx.gpr[3] = handle as u64; + xam_task_close_handle(&mut ctx, &mem, &mut state); + + assert_eq!( + ctx.gpr[3], 1, + "valid handle close must return 1 (canary parity, xam_task.cc:92)" + ); + assert!( + !state.objects.contains_key(&handle), + "object must be dropped when refcount hits zero" + ); + assert!( + !state.handle_refcount.contains_key(&handle), + "refcount entry must be scrubbed" + ); + } + + /// XamTaskCloseHandle on an unknown handle must return 0 (false), + /// matching canary's `XFAILED(NtClose)` branch returning `false` + /// after `XThread::SetLastError(rtl_dos_error)`. + #[test] + fn xam_task_close_handle_invalid_handle_returns_zero() { + let (mut ctx, mem, mut state) = fresh(); + ctx.gpr[3] = 0xDEAD_BEEFu64; + xam_task_close_handle(&mut ctx, &mem, &mut state); + assert_eq!( + ctx.gpr[3], 0, + "invalid handle close must return 0 (canary parity, xam_task.cc:89)" + ); + } + + /// XamTaskCloseHandle with a duplicated (refcounted) handle must + /// keep the object alive after one close and drop it after two. + /// Mirrors canary's `ObjectTable::ReleaseHandle` + /// (object_table.cc:200-208). + #[test] + fn xam_task_close_handle_respects_refcount() { + let (mut ctx, mem, mut state) = fresh(); + let handle = state.alloc_handle_for(KernelObject::Event { + manual_reset: false, + signaled: false, + waiters: Vec::new(), + }); + // Bump refcount to simulate NtDuplicateObject aliasing. + *state.handle_refcount.entry(handle).or_insert(1) += 1; + + ctx.gpr[3] = handle as u64; + xam_task_close_handle(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 1, "first close returns 1"); + assert!( + state.objects.contains_key(&handle), + "object must survive first close (refcount > 0)" + ); + + ctx.gpr[3] = handle as u64; + xam_task_close_handle(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 1, "second close also returns 1"); + assert!( + !state.objects.contains_key(&handle), + "object must be dropped after second close (refcount == 0)" + ); + } + + /// End-to-end parity: spawn an XAM task with `xam_task_schedule`, + /// then close the resulting handle via `xam_task_close_handle`. + /// This is the exact dataflow Sylpheed exercises at Phase A + /// `tid_event_idx=102156..102158` on the main chain. + /// + /// Phase C+16: After C+16, `xam_task_schedule` installs a thread + /// self-reference (refcount=2 post-spawn), so the user's NtClose + /// (via XamTaskCloseHandle) only drops to refcount=1; the handle + /// survives until the spawned thread exits. This mirrors canary's + /// `XThread::Create` → `RetainHandle()` → `XThread::Exit` → + /// `ReleaseHandle()` lifecycle (xthread.cc:414/524). + #[test] + fn xam_task_schedule_then_close_round_trip_returns_one() { + let (mut ctx, mut mem, mut state) = fresh(); + let callback_pc: u32 = 0x824a_93c8; + let message_ptr: u32 = SCRATCH_BASE + 0x100; + let handle_out: u32 = SCRATCH_BASE + 0x200; + ctx.gpr[3] = callback_pc as u64; + ctx.gpr[4] = message_ptr as u64; + ctx.gpr[5] = 0; + ctx.gpr[6] = handle_out as u64; + ctx.lr = 0x824a_9a14; + xam_task_schedule(&mut ctx, &mut mem, &mut state); + assert_eq!(ctx.gpr[3], 0, "schedule succeeded"); + + let handle = mem.read_u32(handle_out); + // Phase C+16: post-spawn refcount must be 2 (creator + self-ref). + assert_eq!( + state.handle_refcount.get(&handle).copied(), + Some(2), + "post-spawn refcount must include the thread self-reference" + ); + + ctx.gpr[3] = handle as u64; + xam_task_close_handle(&mut ctx, &mem, &mut state); + assert_eq!( + ctx.gpr[3], 1, + "schedule→close round-trip must return 1 (Phase A idx=102158 parity)" + ); + // Phase C+16: after close, the self-ref still holds the object. + assert!( + state.objects.contains_key(&handle), + "object must survive XamTaskCloseHandle because the spawned thread holds a self-ref" + ); + assert_eq!( + state.handle_refcount.get(&handle).copied(), + Some(1), + "post-close refcount = self-ref only (canary XThread::Create::RetainHandle parity)" + ); + } + + /// Phase C+16: refcount lifecycle balance test. + /// Schedule task → close handle (refcount 2→1) → simulate thread + /// exit by calling `release_handle` (refcount 1→0). After both: + /// object destroyed, refcount entry scrubbed. Mirrors canary's + /// `XamTaskCloseHandle`→`NtClose`+`XThread::Exit::ReleaseHandle`. + #[test] + fn xam_task_schedule_close_then_thread_exit_destroys_handle() { + let (mut ctx, mut mem, mut state) = fresh(); + let callback_pc: u32 = 0x824a_93c8; + let message_ptr: u32 = SCRATCH_BASE + 0x100; + let handle_out: u32 = SCRATCH_BASE + 0x200; + ctx.gpr[3] = callback_pc as u64; + ctx.gpr[4] = message_ptr as u64; + ctx.gpr[5] = 0; + ctx.gpr[6] = handle_out as u64; + ctx.lr = 0x824a_9a14; + xam_task_schedule(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_out); + + // User closes the handle (refcount 2→1, object survives). + ctx.gpr[3] = handle as u64; + xam_task_close_handle(&mut ctx, &mem, &mut state); + assert!(state.objects.contains_key(&handle)); + + // Simulate thread exit releasing the self-ref (refcount 1→0). + let destroyed = state.release_handle(handle); + assert!(destroyed, "release_handle must return true on final ref drop"); + assert!( + !state.objects.contains_key(&handle), + "object must be destroyed once both user-ref and self-ref are released" + ); + assert!( + !state.handle_refcount.contains_key(&handle), + "refcount entry must be scrubbed" + ); + } + + /// Phase C+16: thread-exit-before-close ordering. Tests the reverse + /// of the prior case — thread exits first (self-ref released), user + /// then closes (creator-ref released → destroy). Both orderings + /// must converge on a clean destroy with no double-free. + #[test] + fn xam_task_thread_exit_then_close_destroys_handle() { + let (mut ctx, mut mem, mut state) = fresh(); + let callback_pc: u32 = 0x824a_93c8; + let message_ptr: u32 = SCRATCH_BASE + 0x100; + let handle_out: u32 = SCRATCH_BASE + 0x200; + ctx.gpr[3] = callback_pc as u64; + ctx.gpr[4] = message_ptr as u64; + ctx.gpr[5] = 0; + ctx.gpr[6] = handle_out as u64; + ctx.lr = 0x824a_9a14; + xam_task_schedule(&mut ctx, &mut mem, &mut state); + let handle = mem.read_u32(handle_out); + + // Thread exits first (releases self-ref, refcount 2→1). + let destroyed_first = state.release_handle(handle); + assert!(!destroyed_first, "self-ref drop must not destroy (creator still holds)"); + assert!(state.objects.contains_key(&handle)); + + // User closes (refcount 1→0 → destroy). + ctx.gpr[3] = handle as u64; + xam_task_close_handle(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 1, "close returns 1"); + assert!(!state.objects.contains_key(&handle)); + assert!(!state.handle_refcount.contains_key(&handle)); + } + #[test] fn xam_user_get_signin_state_user0_signed_in_locally() { let (mut ctx, mem, mut state) = fresh(); @@ -688,4 +1064,113 @@ mod tests { assert_eq!(mem.read_u32(id_ptr), 0); assert_eq!(mem.read_u32(param_ptr), 0); } + + /// Phase A oracle case at `tid_event_idx=102197`: canary returns + /// `X_ERROR_NO_SUCH_USER` (0x525) because no profile is installed. + /// Sylpheed must be querying with a `user_index < 4`. + #[test] + fn xam_content_create_enumerator_returns_no_such_user_for_user0() { + let (mut ctx, mem, mut state) = fresh(); + let buffer_size_ptr = SCRATCH_BASE + 0x100; + let handle_out = SCRATCH_BASE + 0x200; + ctx.gpr[3] = 0; // user_index = 0 (signed-in slot, no profile in ours) + ctx.gpr[4] = 1; // device_id = HDD + ctx.gpr[5] = 0x0000_0001; // content_type + ctx.gpr[6] = 0; // content_flags + ctx.gpr[7] = 4; // items_per_enumerate + ctx.gpr[8] = buffer_size_ptr as u64; + ctx.gpr[9] = handle_out as u64; + xam_content_create_enumerator(&mut ctx, &mem, &mut state); + assert_eq!( + ctx.gpr[3], 0x0000_0525, + "canary mirror: X_ERROR_NO_SUCH_USER for any user_index < 4" + ); + // Canary writes buffer_size_ptr BEFORE the user-profile check; + // the X_ERROR_NO_SUCH_USER path keeps the computed size value. + assert_eq!( + mem.read_u32(buffer_size_ptr), + 0x134 * 4, + "buffer_size_ptr must equal sizeof(XCONTENT_DATA) * items_per_enumerate" + ); + } + + #[test] + fn xam_content_create_enumerator_invalid_handle_out_returns_invalidarg() { + let (mut ctx, mem, mut state) = fresh(); + let buffer_size_ptr = SCRATCH_BASE + 0x100; + // Seed scratch with a sentinel so we can detect the buffer_size + // = 0 reset. + mem.write_u32(buffer_size_ptr, 0xDEAD_BEEF); + ctx.gpr[3] = 0; + ctx.gpr[4] = 1; + ctx.gpr[5] = 0x0000_0001; + ctx.gpr[6] = 0; + ctx.gpr[7] = 4; + ctx.gpr[8] = buffer_size_ptr as u64; + ctx.gpr[9] = 0; // handle_out = NULL → X_E_INVALIDARG + xam_content_create_enumerator(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0x8007_0057); + assert_eq!( + mem.read_u32(buffer_size_ptr), + 0, + "X_E_INVALIDARG path resets *buffer_size_ptr to 0" + ); + } + + #[test] + fn xam_content_create_enumerator_unknown_device_returns_invalidarg() { + let (mut ctx, mem, mut state) = fresh(); + let buffer_size_ptr = SCRATCH_BASE + 0x100; + let handle_out = SCRATCH_BASE + 0x200; + mem.write_u32(buffer_size_ptr, 0xDEAD_BEEF); + ctx.gpr[3] = 0; + ctx.gpr[4] = 99; // device_id = unknown + ctx.gpr[5] = 0x0000_0001; + ctx.gpr[6] = 0; + ctx.gpr[7] = 4; + ctx.gpr[8] = buffer_size_ptr as u64; + ctx.gpr[9] = handle_out as u64; + xam_content_create_enumerator(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0x8007_0057); + assert_eq!(mem.read_u32(buffer_size_ptr), 0); + } + + /// `user_index == XUserIndexNone (0xFE)` skips the profile check; + /// canary proceeds to enumerator creation and returns SUCCESS. + /// Defensive coverage — not currently exercised by Phase A. + #[test] + fn xam_content_create_enumerator_user_none_returns_success() { + let (mut ctx, mem, mut state) = fresh(); + let buffer_size_ptr = SCRATCH_BASE + 0x100; + let handle_out = SCRATCH_BASE + 0x200; + ctx.gpr[3] = 0xFE; // XUserIndexNone + ctx.gpr[4] = 1; + ctx.gpr[5] = 0x0000_0001; + ctx.gpr[6] = 0; + ctx.gpr[7] = 2; + ctx.gpr[8] = buffer_size_ptr as u64; + ctx.gpr[9] = handle_out as u64; + xam_content_create_enumerator(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0); + assert_eq!(mem.read_u32(buffer_size_ptr), 0x134 * 2); + } + + /// Out-of-range user_index (>=4 and !=0xFD) takes the same + /// no-such-user path because canary's `GetUserProfile` returns + /// nullptr for those indices. + #[test] + fn xam_content_create_enumerator_out_of_range_user_returns_no_such_user() { + let (mut ctx, mem, mut state) = fresh(); + let buffer_size_ptr = SCRATCH_BASE + 0x100; + let handle_out = SCRATCH_BASE + 0x200; + ctx.gpr[3] = 7; // out of range, < XUserIndexLatest + ctx.gpr[4] = 0; + ctx.gpr[5] = 0x0000_0001; + ctx.gpr[6] = 0; + ctx.gpr[7] = 1; + ctx.gpr[8] = buffer_size_ptr as u64; + ctx.gpr[9] = handle_out as u64; + xam_content_create_enumerator(&mut ctx, &mem, &mut state); + assert_eq!(ctx.gpr[3], 0x0000_0525); + } } diff --git a/crates/xenia-kernel/src/xaudio.rs b/crates/xenia-kernel/src/xaudio.rs index c20fe94..cb09261 100644 --- a/crates/xenia-kernel/src/xaudio.rs +++ b/crates/xenia-kernel/src/xaudio.rs @@ -58,6 +58,24 @@ pub const XAUDIO_PERIOD: Duration = Duration::from_nanos(5_333_333); /// queueing unbounded callbacks while injection is starved. pub const XAUDIO_QUEUE_CAP: usize = 16; +/// Phase HostAudioEager (2026-05-19): initial seeded fire count at +/// `XAudioRegisterRenderDriverClient` time. Mirrors xenia-canary +/// [`audio_system.cc:210`](../../../../xenia-canary/src/xenia/apu/audio_system.cc#L210) +/// `client_semaphore->Release(queued_frames_=8, nullptr)` — the moment +/// canary's `RegisterClient` returns, its already-running host worker +/// thread has 8 buffer-complete fires queued to drain. +/// +/// In ours, the dedicated guest audio worker (spawned at the same +/// register call) can't be HOST-threaded; instead we seed the pending +/// FIFO so the round prologue's `try_inject_audio_callback` injects +/// the first callback on the very next round — well before tid=1 +/// reaches `ExCreateThread` for the XAudio worker threads (tid=14/15 +/// in canary, tid=9/10 in ours). This fixes the ordering issue where +/// the 48k-instruction ticker delay let tid=9/10 spawn and enter +/// their spin loop on the uninitialized voice struct before the +/// callback could modify it. +pub const XAUDIO_REGISTER_SEED_FIRES: usize = 8; + #[derive(Debug, Clone, Copy)] pub struct XAudioClient { pub callback_pc: u32, @@ -155,6 +173,28 @@ impl XAudioState { } } + /// Phase HostAudioEager: enqueue `n` buffer-complete fires for a + /// specific client slot. Used by `XAudioRegisterRenderDriverClient` + /// to mirror canary's `client_semaphore->Release(queued_frames_)` + /// at register time. Capped by [`XAUDIO_QUEUE_CAP`] to avoid + /// unbounded growth if the caller seeds aggressively. Returns the + /// actual number of fires enqueued. + pub fn seed_fires_for(&mut self, index: usize, n: usize) -> usize { + if index >= XAUDIO_MAX_CLIENTS || self.clients[index].is_none() { + return 0; + } + let mut queued = 0; + for _ in 0..n { + if self.pending.len() >= XAUDIO_QUEUE_CAP { + self.dropped += 1; + break; + } + self.pending.push_back(index); + queued += 1; + } + queued + } + pub fn peek_next(&self) -> Option { self.pending.front().copied() } @@ -320,6 +360,51 @@ mod tests { assert!(s.last_instant.is_some()); } + #[test] + fn seed_fires_for_registered_slot_enqueues_n() { + let mut s = XAudioState::default(); + let i = s.register(dummy_client(1)).unwrap(); + let queued = s.seed_fires_for(i, XAUDIO_REGISTER_SEED_FIRES); + assert_eq!(queued, XAUDIO_REGISTER_SEED_FIRES); + assert_eq!(s.pending.len(), XAUDIO_REGISTER_SEED_FIRES); + // All enqueued fires reference our slot. + for _ in 0..XAUDIO_REGISTER_SEED_FIRES { + assert_eq!(s.take_next(), Some(i)); + } + assert!(s.pending.is_empty()); + } + + #[test] + fn seed_fires_for_unregistered_slot_is_noop() { + let mut s = XAudioState::default(); + // Slot 3 is empty. + let queued = s.seed_fires_for(3, 8); + assert_eq!(queued, 0); + assert!(s.pending.is_empty()); + assert_eq!(s.dropped, 0); + } + + #[test] + fn seed_fires_for_caps_at_queue_cap_and_counts_drops() { + let mut s = XAudioState::default(); + let i = s.register(dummy_client(1)).unwrap(); + let queued = s.seed_fires_for(i, XAUDIO_QUEUE_CAP * 4); + assert_eq!(queued, XAUDIO_QUEUE_CAP); + assert_eq!(s.pending.len(), XAUDIO_QUEUE_CAP); + // Excess fires are counted as dropped (per + // existing `enqueue_all_active` discipline). + assert!(s.dropped >= 1); + } + + #[test] + fn seed_fires_for_out_of_range_index_is_noop() { + let mut s = XAudioState::default(); + s.register(dummy_client(1)).unwrap(); + let queued = s.seed_fires_for(XAUDIO_MAX_CLIENTS + 5, 4); + assert_eq!(queued, 0); + assert!(s.pending.is_empty()); + } + #[test] fn tick_wallclock_fires_after_period() { let mut s = XAudioState::default(); diff --git a/crates/xenia-xex/src/header.rs b/crates/xenia-xex/src/header.rs index 73ba34c..0d56005 100644 --- a/crates/xenia-xex/src/header.rs +++ b/crates/xenia-xex/src/header.rs @@ -120,9 +120,13 @@ pub mod header_keys { pub const ENTRY_POINT: u32 = 0x00010100; pub const IMAGE_BASE_ADDRESS: u32 = 0x00010201; pub const IMPORT_LIBRARIES: u32 = 0x000103FF; - pub const TLS_INFO: u32 = 0x00020200; + // Canary authoritative: `xenia-canary/src/xenia/kernel/util/xex2_info.h:217-218`. + // The two values below were transposed prior to Phase 2 of the boot-state + // remediation — the swap was latent because the sole caller of + // `get_stack_size()` (loader.rs:356) was never invoked. + pub const TLS_INFO: u32 = 0x00020104; pub const EXECUTION_INFO: u32 = 0x00040006; - pub const DEFAULT_STACK_SIZE: u32 = 0x00020104; + pub const DEFAULT_STACK_SIZE: u32 = 0x00020200; pub const ORIGINAL_PE_NAME: u32 = 0x000183FF; pub const FILE_FORMAT_INFO: u32 = 0x000003FF; pub const SYSTEM_FLAGS: u32 = 0x00030000; diff --git a/crates/xenia-xex/src/loader.rs b/crates/xenia-xex/src/loader.rs index 17d6bc1..2ab3216 100644 --- a/crates/xenia-xex/src/loader.rs +++ b/crates/xenia-xex/src/loader.rs @@ -353,8 +353,49 @@ pub fn get_image_base(header: &Xex2Header) -> Option { } /// Get the default stack size. +/// +/// Canary: `XEX_HEADER_DEFAULT_STACK_SIZE = 0x00020200`, low key byte = 0, +/// which by XEX-key encoding means the `value` field IS the stack size +/// directly (not an offset into the header). Fallback to 1 MiB mirrors +/// the historical hardcoded default in `xenia-app`. pub fn get_stack_size(header: &Xex2Header) -> u32 { - get_opt_header(header, header_keys::DEFAULT_STACK_SIZE).unwrap_or(0x10_0000) // Default 1MB + get_opt_header(header, header_keys::DEFAULT_STACK_SIZE).unwrap_or(0x10_0000) +} + +/// Parsed `XEX_HEADER_TLS_INFO` (key `0x00020104`). Canary's +/// `xex2_opt_tls_info` struct (`xex2_info.h:595-601`): +/// +0x00 u32 slot_count — number of dynamic TLS slots +/// +0x04 u32 raw_data_address — guest VA of the initial-value template +/// +0x08 u32 data_size — total TLS region size (image + slots) +/// +0x0C u32 raw_data_size — bytes of the initial-value template +#[derive(Debug, Clone, Copy)] +pub struct TlsInfo { + pub slot_count: u32, + pub raw_data_address: u32, + pub data_size: u32, + pub raw_data_size: u32, +} + +/// Parse the `XEX_HEADER_TLS_INFO` opt-header. The opt-header's low key +/// byte = 0x04, which by XEX-key encoding means the `value` field is an +/// OFFSET (in bytes) into the raw XEX header where the 16-byte +/// `xex2_opt_tls_info` struct lives — NOT an inline value. `data` must +/// be the raw XEX header bytes (length ≥ `value + 16`). Returns `None` +/// when the opt-header is absent or the offset is out of range. +pub fn get_tls_info(header: &Xex2Header, data: &[u8]) -> Option { + let off = get_opt_header(header, header_keys::TLS_INFO)? as usize; + if off.checked_add(16)? > data.len() { + return None; + } + let read_be_u32 = |o: usize| -> u32 { + u32::from_be_bytes([data[o], data[o + 1], data[o + 2], data[o + 3]]) + }; + Some(TlsInfo { + slot_count: read_be_u32(off), + raw_data_address: read_be_u32(off + 4), + data_size: read_be_u32(off + 8), + raw_data_size: read_be_u32(off + 12), + }) } /// XEX `XEX_HEADER_SYSTEM_FLAGS` (key `0x00030000`) — the privilege bitmap diff --git a/docs/functions/INDEX.md b/docs/functions/INDEX.md new file mode 100644 index 0000000..73da4bf --- /dev/null +++ b/docs/functions/INDEX.md @@ -0,0 +1,23 @@ +# Function dossier index + +Sorted by guest address. Update when adding/changing a dossier. See [README.md](README.md) for schema. + +| Address | Dossier | Classification | Synopsis | Last audit | +|---------|---------|----------------|----------|------------| +| `0x82172BA0` | [sub_82172BA0](sub_82172BA0.md) | `normal_callee` | Array-walk dispatcher (vtable slot 6 bctrl at +0x1E8 / PC 0x82172D88). Walks `[r29+56]` array, invokes slot 6 on each — one observed target is sub_821B55D8. Gated by `[r30+4]` 3-bit-field==4 in sole caller sub_821741C8. Fires 1-2× canary / 0× ours @ 180s. | 064 | +| `0x82173990` | [sub_82173990](sub_82173990.md) | `normal_callee` | Synchronous task-spawn-and-join helper. PC `0x82173C60 bl 0x824AA330` (= +0x2D0) = wedge site for tid=1's join wait on tid=13's thread handle. Wait is on a sync object (event) populated by `sub_82172370` inside `sub_821746B0`. Canary completes wait in <1ms (worker exits via ExTerminateThread); ours never (tid=13 stuck in sub_821CB030). Fires 1× per boot in both engines. | 066 | +| `0x821B55D8` | [sub_821B55D8](sub_821B55D8.md) | `normal_callee` | Vtable slot 6 dispatched from sub_82172BA0+0x1E8 bctrl. Calls sub_824F8398 at +0x584. DB static caller is EH `b` from sub_821B6DF4, but real runtime caller is the bctrl. 1× canary / 0× ours. | 064 | +| `0x821B6DF4` | [sub_821B6DF4](sub_821B6DF4.md) | `msvc_eh_catch_handler` | MSVC C++ catch-handler thunk. FuncInfo @ .rdata:0x820C1994. 0 fires both engines at this horizon. | 060 | +| `0x821C4EB0` | [sub_821C4EB0](sub_821C4EB0.md) | `vtable_method` | `silph::GamePart_Title::UImpl` member fn. AUDIT-061: NOT a branch-divergence gate. All 4 cond-branches in [+0x44, +0xE0] decided bit-identically. First divergence is non-returning `bl 0x821CC3F8` at +0x64 (wedge inside sub_821CB030). | 061 | +| `0x821CB030` | [sub_821CB030](sub_821CB030.md) | `normal_callee` | Wedge primary site: creates per-call work-queue completion XEvent (+0x128), submits via sub_82452DC0 (+0x19C/+0x2EC), waits INFINITE (+0x1AC/+0x318). AUDIT-066 corrected framing: wait is on guest worker-cluster signal, NOT IO completion. AUDIT-065: ours's tid=13 wedges on FIRST sub_821CB030 call on 0x12AC; canary's tid=17 completes 16+ such calls and reaches ExTerminateThread. | 066 | +| `0x822F1AA8` | [sub_822F1AA8](sub_822F1AA8.md) | `normal_callee` | tid=1 post-init game-loop dispatcher. Bctrl vtable[0] of *(0x828E1F08) at +0xA4 dispatches into sub_82173990 (via thunk sub_82175330). Ours wedges in the vtable[0] callee (sub_82173990+0x2D0); refined in AUDIT-065. Outer loop in sub_822F1AA8 itself iterates 4040× in canary 60s (PCs 0x822F1BCC/D58/DFC). | 065 | +| `0x824ACB38` | [sub_824ACB38](sub_824ACB38.md) | `crt_init_driver` | Iterates runtime vtable-registration slots at 0x82870010..0x828708D4. Two loops (3 + 557 slots); 160-slot intentional zero gap at [0x828702F0, 0x82870590). | 060 | +| `0x82452DC0` | [sub_82452DC0](sub_82452DC0.md) | `normal_callee` | Work-submitter / cluster root. AUDIT-050–060 convergence node. Ours fires ~3.21× less than canary. | 060 | +| `0x82457EF0` | [sub_82457EF0](sub_82457EF0.md) | `thread_proc` | tid=6 thread_proc. 0 static callers is CORRECT (registered via ExCreateThread). | 060 | +| `0x82458B90` | [sub_82458B90](sub_82458B90.md) | `normal_callee` | Canary γ-wedge signaler A. NtSetEvent caller; called via sub_82457EF0+0x24 (tid=6). Fires 1× in ours / 2× in canary. | 060 | +| `0x8245EC10` | [sub_8245EC10](sub_8245EC10.md) | `dispatch_table_method` | Canary γ-wedge signaler B. Slot 1 of dispatch_table @ 0x820B5830, installed by sub_8245FEB8. NtSetEvent caller. | 060 | +| `0x8245FEB8` | [sub_8245FEB8](sub_8245FEB8.md) | `normal_callee` | Vptr installer for dispatch_table @ 0x820B5830. Fires 5× in ours, 2× in canary, **same call site both engines**. | 060 | +| `0x824F7800` | [sub_824F7800](sub_824F7800.md) | `normal_callee` | Activation chain fn #2 → bctrl vtable[1] dispatches sub_825070F0 at PC 0x824F7B20. Standard prolog. 1× canary / 0× ours. | 064 | +| `0x824F7CD0` | [sub_824F7CD0](sub_824F7CD0.md) | `normal_callee` | Activation chain fn #3. Contains 4-way computed switch (`bctr` jump-table) at +0x40. Calls sub_824F7800. 1× canary / 0× ours. | 064 | +| `0x824F8398` | [sub_824F8398](sub_824F8398.md) | `normal_callee` | Activation chain fn #4. Tiny 20-insn adapter constructing a 36-byte stack-record before calling sub_824F7CD0. 1× canary / 0× ours. | 064 | +| `0x825070F0` | [sub_825070F0](sub_825070F0.md) | `vtable_method` | Slot 1 of class `ANON_Class_713383D7` vtable (0x8200A208/0x8200A928). 1× fire in canary @ ~25s wallclock; spawns 4 workers with ctx 0xBCE25340. AUDIT-064: full activation chain identified; wedge is upstream at tid=1's join-wait on tid=13 (AUDIT-049). AUDIT-067 (negative result): the vtable address `0x8200A208` is never stored via any guest store opcode in canary — install is host-side (kernel-import direct memory write / XEX-loader); search guest-code for the install is fundamentally blind. | 067 | diff --git a/docs/functions/README.md b/docs/functions/README.md new file mode 100644 index 0000000..b4d1626 --- /dev/null +++ b/docs/functions/README.md @@ -0,0 +1,141 @@ +# Function dossiers — persistent RE notes for Project Sylpheed (Sylpheed.xex) + +## What this is + +One markdown file per guest function we've investigated during a kernel-bug audit. The dossier is a **living, append-only record** of what we know (and what we got wrong) about each function. The goal is two-fold: + +1. **Don't re-derive understanding.** When an audit touches `sub_821C4EB0`, the next agent shouldn't have to re-walk the disasm — read [sub_821C4EB0.md](sub_821C4EB0.md) first. +2. **Don't repeat misinterpretations.** AUDIT-060 falsified two audits of work because we'd read MSVC EH FuncInfo metadata as if it were static call edges. The dossier captures both the corrected reading AND the falsified one — so future agents see the trap was already sprung once. + +This system is **agent-writable**. Audit agents are expected to consult dossiers before probing, and to *append* (not rewrite) when a new audit produces evidence about a known function. Agents should create new dossiers for any function they perform non-trivial work on. + +## Layout + +``` +docs/functions/ + README.md — this file + INDEX.md — one-line lookup table, sorted by address + sub_XXXXXXXX.md — per-function dossier (one per function, address in UPPERCASE hex) +``` + +Filename convention: `sub_` + 8-hex-uppercase + `.md`. Match the name used in `sylpheed.db.functions.name`. If the function has a symbol (e.g. `GamePart_Title::UImpl::ctor`), still use the address-based filename; record the symbol inside. + +## Schema + +Each dossier follows this shape: + +```markdown +--- +address: 0xXXXXXXXX +classification: +confidence: +last_audit: NNN +aliases: + - "human-readable name or prior misnomer (status)" +--- + +# sub_XXXXXXXX + +## Synopsis + +One short paragraph: the current best understanding. ONLY the latest consensus — +old interpretations live in the audit log. + +## Evidence + +Hard facts only. Disasm patterns, .rdata/.pdata references, runtime fires from +instrumentation, byte-level dumps. No inference here; that goes in Activation +or Notes. + +## Activation + +When/how this function runs: +- direct bl from caller X at PC Y +- indirect via fnptr-array slot N at 0x... +- vtable dispatch from class C, slot K (vtable at 0x...) +- C++ EH catch-handler dispatch (FuncInfo @ 0x...) +- thread_proc entry point (registered via ExCreateThread call site PC Z) + +## Static graph + +- Callers (from sylpheed.db `xrefs` table, source_func column — never source per AUDIT-045): + - PC `0xCCCCCCCC` inside `sub_DDDDDDDD` +- Callees: + - bl `sub_EEEEEEEE` at PC `0x...` + - bctrl (computed) at PC `0x...` — candidates: ... + +## Audit log + +Append-only. Most recent FIRST. Each entry pairs (audit-NNN, date, observation, +status). Status options: confirmed | falsified | superseded-by-NNN. + +- **AUDIT-NNN (YYYY-MM-DD)** — observation + relevant data point [STATUS] +- **AUDIT-MMM (YYYY-MM-DD)** — earlier observation [STATUS: falsified by NNN — reason] + +## Open questions + +Future-work bullets: +- Specific PC to probe +- Hypothesis to test +- Cross-reference to verify + +## Cross-references + +- Related dossiers: [sub_XXXXX](sub_XXXXX.md) (relationship) +- Audit memory entries: `project_xenia_rs_audit_NNN_*.md` +- Trace artifacts: `audit-runs/audit-NNN-*/...` +``` + +## Classification vocabulary + +Pick the **most specific** that fits. Add new ones if needed but don't bloat the list. + +| Class | Meaning | +|-------|---------| +| `normal_callee` | Plain function reached by direct `bl`. The default. | +| `vtable_method` | Virtual method dispatched via `bctrl` from a class vtable. | +| `thread_proc` | Entry point registered via `ExCreateThread` / `KeInitializeThread`. 0 static callers is correct; check for `lr=0xbcbcbcbc` thread-entry sentinel at first fire. | +| `msvc_eh_catch_handler` | MSVC C++ catch handler. Prolog `subi r31, r12, N; mflr r12; ...`. Referenced from `.rdata` FuncInfo (magic `0x19930520..22`). 0 static callers; dispatched by EH runtime only. **Do not treat its `.rdata` references as call edges.** | +| `msvc_eh_state_handler` | MSVC EH state/unwind handler. Similar to above but no `subi r31, r12` prolog. | +| `import_thunk` | Wraps an xboxkrnl import (e.g. NtCreateEvent at thunk 0x8284DF1C). Behavior is host-side. | +| `wrapper` | Thin wrapper around a kernel import or library call. | +| `crt_init_driver` | CRT-style iterator that walks an array of fn pointers / vtables (e.g. `sub_824ACB38`). | +| `fnptr_array_entry` | Function reached only via enumeration by a `crt_init_driver`. | +| `dispatch_table_method` | Function installed into a runtime dispatch table by a ctor; reached via indirect call only. | +| `synchronization_primitive` | Function that wraps Nt/Ke wait/set/release calls. | +| `unknown` | Not yet investigated. Synopsis describes what little we know. | + +## Confidence levels + +| Confidence | Meaning | +|------------|---------| +| `high` | Multiple converging evidence sources (disasm + runtime instrumentation + cross-engine probe). | +| `medium` | One strong source (e.g. disasm alone or one canary trace). Plausible but not cross-checked. | +| `low` | Inference from static call graph or one observation; should be probed if it becomes load-bearing. | +| `refuted` | An earlier claim was falsified. Keep the dossier; document what the function actually is in synopsis + put the refuted claim in audit log with status `falsified`. | + +## Golden rules — for agents and humans + +1. **Append, don't overwrite.** New audits add entries to "Audit log". Old entries stay with their original wording so future readers can see the evolution. +2. **Falsify, don't delete.** If a later audit disproves an earlier claim, mark the old audit-log entry `[STATUS: falsified by AUDIT-NNN — reason]`. The earlier interpretation taught us *something* (often that a class of disasm pattern is ambiguous) — preserve it. +3. **Cite the source.** Every claim ties to either (a) an audit number + trace artifact path, or (b) a static-DB query you can reproduce. "X is a thread_proc" without a basis is unacceptable. +4. **Distinguish fact from inference.** "Fires 5× at -n 500M with lr=0x8246020C all five times" is a fact. "Therefore it's a vptr installer for slot 1 of dispatch_table 0x820B5830" is an inference. Put facts in Evidence; inferences in Synopsis/Activation/Notes — and label inferences as such. +5. **Update INDEX.md.** When you create a new dossier or change a classification, add/update the corresponding row in `INDEX.md`. +6. **Update the `last_audit` frontmatter.** Reflects the most recent audit that touched the dossier. +7. **One function per file.** If you find a fn is structurally a wrapper for another, write two dossiers and link them. + +## Anti-patterns to avoid + +- **Reading EH metadata as call edges.** `.rdata` references to a fn inside an MSVC FuncInfo struct (magic `0x19930520..22` nearby) are unwind-handler bindings, NOT bl call sites. Pattern: catch-handler prolog `subi r31, r12, N; mflr r12; stwu r1, ...`. See [sub_821B6DF4.md](sub_821B6DF4.md) for the canonical falsified example. +- **"0 static callers" = "dead in ours".** Three legitimate reasons a fn has 0 static callers and still runs: thread_proc (ExCreateThread), fnptr_array_entry (enumerated by crt_init_driver), msvc_eh_*_handler (dispatched by EH runtime). Always check. +- **Comparing fire counts at fixed instruction horizons across engines.** Canary @ 60s wallclock and ours @ -n 500M are different time bases. State (i) and state (ii) data points must be normalized — either both at the same wallclock or both at the same boot milestone. +- **Trusting handle IDs across runs.** `KernelState::alloc_handle` is monotonic; handles drift run-to-run. Function-context names (e.g. "sub_821CB030+0x128 creator") are stable; handle IDs are not. +- **Quoting xrefs.source instead of xrefs.source_func.** See AUDIT-045 reading-error #12. Use `source_func` for caller-set queries. + +## Backfill status + +Initial set (created in AUDIT-060 retrospective backfill, 2026-05-12): + +- The 10 most-cited fns from AUDIT-049–060. + +Future audits should extend coverage as they touch new fns. Backfilling earlier audit fns (AUDIT-030–048) is a nice-to-have but not blocking. diff --git a/docs/functions/sub_82172BA0.md b/docs/functions/sub_82172BA0.md new file mode 100644 index 0000000..1e18e9b --- /dev/null +++ b/docs/functions/sub_82172BA0.md @@ -0,0 +1,56 @@ +--- +address: 0x82172BA0 +classification: normal_callee +confidence: high +last_audit: 064 +aliases: + - "Vtable-slot-6 array-walker / AUDIT-033 T6-gateway descendant" +--- + +# sub_82172BA0 — array-walk dispatcher (vtable slot 6) + +## Synopsis + +Normal-callee dispatcher. Walks an array of object pointers (header at `r29+56`: `r29+56[8]` = element count `>>2`; `r29+56[4]` = data ptr) and invokes vtable slot 6 (`lwz r11, 24(r11)`) on each. The `bctrl` at PC `0x82172D88` is the slot-6 dispatch site — observed in canary firing into [sub_821B55D8](sub_821B55D8.md). Has a critical-section prologue (`lwarx`/`stwcx.` at PC `0x82172C08..0x82172C14`) protecting the array snapshot. Only fires when caller `sub_821741C8` sees `[r30+4]` mask-3-bits-field == 4. AUDIT-064 verified canary fires 2× at 180s wallclock; ours fires 0× because tid=1's wait at `sub_82173990+0x2D0` (handle 0x12A4 = tid=13 thread handle) never completes. + +## Evidence + +- Disasm prolog at `0x82172BA0`: `mflr r12; bl 0x825F0F78 (frame helper); subi r31, r1, 176; stwu r1, -176(r1); mr r29, r3; ...` — normal-callee prolog, frame ptr `r31 = r1-176`. NOT MSVC EH handler. +- Function size: 604 bytes / 151 insns. `has_eh=True`, `frame_size=0` per DB (dynamic). +- Static caller xref (sole): PC `0x821744C8` inside `sub_821741C8` via `bl`. Gating disasm at `sub_821741C8+0x2C8..2C8` matches mask-bits of `[r30+4]` to value 4 to take this call. +- The bctrl at PC `0x82172D88` operates on slot 6 (`lwz r11, 24(r11)` = byte-offset 24 in 4-byte slots = slot index 6). +- AUDIT-064 canary 60s+180s probes: fires 1-2× with `lr=0x821744CC r3=BCCC4A80 r4=BC369160 r5=BC369160 r6=03A72328` on tid=6. PC `0x82172D88` (the bctrl) fires 2× at 60s in upstream probe. +- AUDIT-064 ours `--ctor-probe=0x82172BA0` -n 500M: **0 fires**. +- Critical-section pattern at `0x82172C08..0x82172C14`: `mfmsr r8; mtmsrd r13; lwarx r9, r0, r10; stwcx. r11, r0, r10; mtmsrd r8; bne 0x82172C00` — disable interrupts → atomic swap → restore. + +## Activation + +Direct `bl` from `sub_821741C8+0x300` (PC `0x821744C8`). Conditional: `sub_821741C8` masks `[r30+4]` via `rlwinm r11, r11, 0, 27, 29` and switches on the 3-bit field — value `4` selects this fn, value `8` selects `sub_82172E58`, else no-op. + +## Static graph + +- Static callers (DB): + - `sub_821741C8+0x300` via `bl`. +- Callees: + - `sub_822F2328` (PC `0x82172BC4`). + - `sub_8284DCFC` (PC `0x82172BD4`) — likely a kernel sync primitive. + - `sub_8228E138` (PC `0x82172BF4`). + - Indirect via `bctrl` at PC `0x82172D88` (slot 6) and other vtable slots inside the body. + - DB lists many `ind_call` targets recorded for PC `0x82172D88` (sub_82680370, sub_823A2258, sub_82455300, sub_827E8D60, sub_8237B020, sub_82398CC0, sub_82391BA8, sub_827ED308, sub_826B24E8, sub_822C7418, sub_821F8340, sub_823800A8, sub_824A6C00, sub_823762E8, sub_825ED990, sub_827EFED0, sub_822B06A0, sub_82455658, sub_82388FF8, sub_827FA850, sub_8232C4C0, sub_8238EC10, sub_82674028, sub_823929D0, ...). **Critical caveat**: this list is missing `sub_821B55D8` despite that being the runtime target observed in canary — the dynamic-target inference has gaps. + +## Audit log + +- **AUDIT-064 (2026-05-12)** — disasm confirms array-walk dispatcher pattern; canary fires 1-2× / ours 0×. The runtime activation chain for sub_825070F0 starts here. **Convergence finding**: ours never reaches sub_82172BA0 because tid=1 is stalled at `sub_82173990+0x2D0` (handle 0x12A4 = tid=13's thread handle — AUDIT-049 wedge). The whole 5-level ladder downstream is gated by this wait. [confirmed] + +## Open questions + +- What is the array at `[r29+56]`? Likely a list of subsystem objects (graphics, audio, input, etc.) the game-loop dispatcher iterates each frame. Canary `r3=0xBCCC4A80` is the dispatcher object. +- The `bctrl`'s xref-table is incomplete (missing `sub_821B55D8`). Investigate the dynamic-target inference's gap. + +## Cross-references + +- Callers: `sub_821741C8+0x300`. +- Callees (via bctrl): `sub_821B55D8` (observed in canary), plus 50+ others recorded in DB. +- Upstream: `sub_822F1AA8` → vtable[0]=`sub_82173990` → calls `sub_821741C8`. +- Audits: 033 (T6 gateway analysis), 058, 064. +- Artifacts: `audit-runs/audit-064-activation-ladder/canary-{60,120,180}s.log`, `canary-upstream-60s.log`, `canary-inside-822F1AA8.log`. diff --git a/docs/functions/sub_82173990.md b/docs/functions/sub_82173990.md new file mode 100644 index 0000000..343fb5c --- /dev/null +++ b/docs/functions/sub_82173990.md @@ -0,0 +1,167 @@ +--- +address: 0x82173990 +classification: normal_callee +confidence: high +last_audit: 066 +aliases: + - "tid=1 join wait site (the wedge PC 0x82173C60)" + - "synchronous task-spawn + join helper" +--- + +# sub_82173990 — synchronous task-spawn-and-join helper + +## Synopsis + +Tid=1 (main) one-shot helper that builds a stack-resident task descriptor, calls +`sub_821746B0` to allocate+initialize a 24-byte task record (which encapsulates +a sync object created by `sub_82172370`), and waits INFINITE on that sync +object. The wait at PC **`0x82173C60 bl 0x824AA330`** (= `sub_82173990+0x2D0`) +is the AUDIT-049/AUDIT-064 wedge site — both canary and ours enter the wait, +but only canary's wait completes. The wait is on the **thread handle** of the +worker spawned by `sub_821746B0` (XThread or KE_THREAD), released when that +worker calls `ExTerminateThread`. Function is called exactly **1× per boot** in +both engines (entry probe fires once, all body PCs fire once each). + +## Evidence + +Disasm-anatomy (size 768B / 192 insns @ `0x82173990..0x82173C8C`): + +``` +0x82173990 mflr/prologue (256-byte frame) +0x821739B0 bl 0x8216E7E8 ; first string-init helper (r4=r11+6244) +0x821739CC bl 0x82448AA0 ; cr0=.G. → arg!=0 path +0x821739F0 bl 0x82448BC8 ; returns string-table entry → r28 +0x82173A38 bl 0x8216F218 ; internal copy +0x82173A68 bl 0x821835E0 ; → r25 (ID/result); cr6-tests below +0x82173A78 bne cr6, 0x82173A84 ; skip if r25==28 +0x82173A88 beq cr6, 0x82173BC0 ; skip if r25==0 +… +0x82173B98 bl 0x82453910 ; signaler candidate (AUDIT-049 column) +0x82173BC0 convergence label (string-table clean-up + dispatch) +0x82173BE4 bl 0x824B2188 ; tid=1's outer-channel pump +0x82173C34 bl 0x821746B0 ; allocates 24-byte task record, sub_82172370 fills [r29+4] +0x82173C38 mr r30, r3 ; r30 = task struct +0x82173C48 bl 0x824AA5C8 ; status query → r3 (canary r3=1 → [r31+80]=0x103 STATUS_PENDING) +0x82173C54 bne cr6, 0x82173C64 ; guard: only wait if r11==0x103 (STATUS_PENDING) +0x82173C5C lwz r3, 4(r30) ; r3 = task->sync_handle = [struct+4] +0x82173C60 bl 0x824AA330 ; KeWaitForSingleObject INFINITE ← THE WEDGE +0x82173C70 bl 0x82174AF8 ; post-wait task completion (sub_82174AF8 runs post-state transition) +0x82173C88 epilogue (b 0x825F0FC4) +``` + +### Canary run (AUDIT-065, 180s wallclock, --audit_61_branch_probe_pcs) + +All 17 probed PCs fire **exactly 1× each on tid=F8000008 (= canary main / mapped from `tid=6`)**: + +| PC | lr | r3 | r4 | tid | meaning | +|---|---|---|---|---|---| +| `0x82173990` | `0x822F1B50` | `BCCC4A80` | `701CF8C0` | 6 | entry; lr=post-bctrl of sub_822F1AA8 | +| `0x821739CC` | — | `0x00000001` | `0x820A17A8` | 6 | cr0=.G. — `cmplwi r28,0` post-strcmp != 0 path | +| `0x821739F0` | — | `BCCC4A64` | `BCCC49FC` | 6 | r28 populated; cr6=..E (==) | +| `0x82173A38` | — | `701CF860` | `701CF840` | 6 | inner copy call entry | +| `0x82173A68` | — | `BDE996FF` | `BDE98F14` | 6 | r25=0xBDE996FF (returned ID), cr6=..E | +| `0x82173A78` | — | `0x0000001C` | `BDE98F14` | 6 | `bne 0x82173A84` — `r25 != 28` taken | +| `0x82173BC0` | `0x82173A6C` | `0x1C` | … | 6 | convergence (post-bne over alloc); `beq 0x82173B14`-skipped | +| `0x82173BE4` | `0x82173BD4` | `BE568F00` | `0x00000005` | 6 | bl 0x824B2188 entry | +| `0x82173C34` | `0x82173C1C` | `BCCC4A80` | `0x00000000` | 6 | bl 0x821746B0 entry — calls task-alloc | +| `0x82173C38` | `0x82173C38` | `BC365700` | `701CF6E0` | 6 | r3=task_struct, cr6=.G. | +| `0x82173C48` | `0x82173C38` | `F8000094` | `701CF800` | 6 | post bl 0x824AA5C8; r3=F8000094 ?? actually this is r3 at BB entry post-bl | +| `0x82173C54` | `0x82173C4C` | `0x00000001` | `0x30000000` | 6 | cmplwi r11, 0x103 — value 0x103 sets cr6=..E (eq) per actual r11; BUT cr6=..E means !lt!gt eq — wait was entered | +| `0x82173C60` | `0x82173C4C` | **`F8000094`** | `FFFFFFFF` | 6 | **wait entry — r3 = thread handle** | +| `0x82173C64` | `0x82173C64` | `0` | `0x1` | 6 | post-wait — wait completed! | +| `0x82173C70` | `0x82173C64` | `BCCC4A80` | `BC365700` | 6 | bl 0x82174AF8 (cleanup) | +| `0x82173C88` | `0x82173C88` | `701CF840` | … | 6 | epilogue | + +Wait duration: ~445 log lines between PC `0x82173C60` (entry) and `0x82173C64` (post-wait). + +Inside the wait window: `K> F8000094 XThread::Execute thid 17 (handle=F8000094, 'XThread01F4 (F8000094)', native=000001F4)` — i.e. F8000094 IS a thread handle. The thread loads cache files (`cache:\aab216c3\5\ee70e0a`, `cache:\87719002\c\dba806e`, `cache:\87719002\c\ec0a96e`, `cache:\87719002\a\60fcb85`, etc) and spawns child workers (`ExCreateThread(..., 824AFF88, 821C4AD0, BCA44C00, ...)` and others). The wait completes immediately after `d> F8000094 ExTerminateThread(00000000)`. + +### Ours run (AUDIT-065, -n 500M instructions) + +Only 6 of 24 BB-entry probed PCs fire (ours's branch probe fires only at BB-entry): + +| PC | lr | r3 | cycle | meaning | +|---|---|---|---|---| +| `0x82173990` | `0x822F1B50` | `0x40ba9a80` | 6,172,194 | entry; lr= bctrl in sub_822F1AA8 | +| `0x821739CC` | — | `0x00000001` | 6,172,686 | non-zero arg path | +| `0x821739F0` | — | `0x40ba9a64` | 6,173,074 | str-init complete | +| `0x82173A68` | — | `0x41d7e6ff` | 9,174,034 | r3=str-table-entry (the AUDIT-049 wait inside sub_82452DC0 already happened HERE — note the cycle gap from 6.17M to 9.17M means tid=1 has been blocked in sub_82172370/etc; actually this is the `bl 0x821835E0` post-return) | +| `0x82173BC0` | `0x82173A6C` | `0x1C` | 9,175,368 | convergence (r25=0x41d7e6ff != 28 not equal to 28; checks ≠0 also nonzero, fall thru to skip block @ 0x82173B14) | +| `0x82173C38` | `0x82173C38` | `0x4024a640` | 9,178,243 | post `bl 0x821746B0` — r3=`0x4024a640` (task struct, ALSO start_ctx of tid=13) | + +**No probe fires beyond `0x82173C38`**. The next BB-entry probe was `0x82173C64` (post-wait). Mid-block PCs `0x82173C48/C54/C60/C70` don't fire in ours's branch-probe (per AUDIT-046 reading-error #13). The fact that `0x82173C64` does NOT fire confirms: **ours's tid=1 wedges between `0x82173C38` and `0x82173C64`** — at the wait at `0x82173C60`. + +### End-of-run thread state (ours --trace-handles dump) + +``` +handle=0x000012a4 Thread(id=13, exit=None) waiters(tid)=[1] +handle=0x000012ac kind=Event/Auto waiters=1 signals=0 waits=1 wakes=0 + [ 0] cycle=0 tid=13 lr=0x824ac578 src=do_wait_single +handle=0x000012b8 kind=Event/Auto waiters=1 signals=0 waits=1 wakes=0 + [ 0] cycle=0 tid=5 lr=0x824ac578 src=do_wait_single +``` + +tid=13 (handle 0x12A4, exit=None) is alive but stuck on event 0x12AC inside sub_821CB030 (cache file IO completion event). tid=5 is one of the workers parked on its own idle event 0x12B8. tid=1 join-waits tid=13 → tid=13 waits 0x12AC → 0x12AC needs workers → workers parked. + +## Activation + +Direct `bl` from `sub_82175330+0x4` via tail-jump (post-bctrl of vtable[0] dispatched at `sub_822F1AA8+0xA4`). One static caller `sub_82175330` per `sylpheed.db`. + +Called exactly **1× per boot** on tid=1 in both engines. + +## Static graph + +- Direct callers (sylpheed.db `xrefs.source_func`): + - `sub_82175330+0x4` via `b 0x82173990` (tail-jump from the vtable thunk). +- Direct callees of interest: + - `bl 0x8216E7E8` at `+0x20` — string-table helper (used twice). + - `bl 0x82448AA0` at `+0x38`, `+0x48`, `+0x88`, `+0xC4`, `+0x168`, `+0x260` — string-table lookup. + - `bl 0x824AA7A0` at `+0x4C` — string-helper. + - `bl 0x82448BC8` at `+0x5C` — internal lookup. + - `bl 0x82448C50` at `+0x78`, `+0x98`, `+0x178`, `+0x308` — string convert. + - `bl 0x8216F218` at `+0xA8`, `+0x188` — copy / string ops. + - `bl 0x8217C850` at `+0xAC` — query. + - `bl 0x82178E50` at `+0xB8` — query. + - `bl 0x821835E0` at `+0xD8` — returns ID into `r25` (key gate). + - `bl 0x824AA830` at `+0xFC`, `+0x148` — kernel helper. + - `bl 0x822C69C8` at `+0x104`, `+0x134` — task-helper. + - `bl 0x822DE650` at `+0x118` — helper. + - `bl 0x822F2328` at `+0x124`, `+0x240` — helper (calls inside outer sub_822F1AA8 too). + - `bl 0x822DE858` at `+0x13C` — helper. + - `bl 0x822F28C0` at `+0x144`, `+0x25C` — helper. + - `bl 0x82674028` at `+0x15C` — kernel-debug printf? format. + - `bl 0x82150EF8` at `+0x1A4` — heap alloc 28-byte struct. + - `bl 0x824523E8` at `+0x1FC` — task-helper. + - `bl 0x82453910` at `+0x208` — **signaler/notify (AUDIT-049 column)**. + - `bl 0x821506B8` at `+0x224` — heap free. + - `bl 0x8216E790` at `+0x22C`, `+0x2EC`, `+0x2F4` — string-cleanup. + - `bl 0x824B2188` at `+0x254` — tid=1's outer-channel pump. + - `bl 0x824482D0` at `+0x288` — format. + - **`bl 0x821746B0` at `+0x2A4`** — **task allocator + worker spawn (the key call).** + - `bl 0x824AA5C8` at `+0x2B8` — status query (returns `r3` → checked vs `0x103 STATUS_PENDING`). + - **`bl 0x824AA330` at `+0x2D0`** — **`KeWaitForSingleObject` INFINITE — THE WEDGE PC.** + - `bl 0x82174AF8` at `+0x2E0` — post-wait task-cleanup/state-transition. + - `b 0x825F0FC4` at `+0x2FC` — epilogue tail-jump. + +## Audit log + +- **AUDIT-066 (2026-05-12)** — **source-review only (READ-ONLY)**. AUDIT-065's "host-side `F8000048` IO completion thread" inference falsified by canary source review. `F8000048` is a **guest XThread thid=10**, spawned by main at canary-run.stdout:1331 via `ExCreateThread(...,824AFF88, 82450A28, 828F3B68, 0)` — entry `0x82450A28` is a Sylpheed-internal worker thread, not host infrastructure. Canary's only host helper thread is "Kernel Dispatch" (`xenia-canary/src/xenia/kernel/kernel_state.cc:524-549`) which services `CompleteOverlappedDeferred` for XAM UI/content, not file IO. Canary's `NtReadFile`/`NtReadFileScatter`/`NtWriteFile` (`xboxkrnl_io.cc:125-389`) are synchronous and signal the supplied event handle inline via `ev->Set(0, false)` after the sync read. Ours's `signal_io_completion_event` (`exports.rs:1156-1169`) is the bit-equivalent. **No "host-side IO completion signal" gap exists** in ours's IO handlers. The wait at this fn's `+0x2D0` (PC `0x82173C60`) is on the thread handle for the worker spawned via `bl 0x821746B0` (= tid=13 in ours / thid=17 in canary), released by `ExTerminateThread` per AUDIT-065 — confirmed correct framing. AUDIT-066 conclusion: brief's proposed fix locus (`xenia-kernel/src/exports.rs` IO handlers) is wrong; the bug is upstream worker-cluster bootstrap (AUDIT-057/063/064 chain). No code change of any size in `exports.rs` would unwedge tid=13. [confirmed: brief premise falsified] +- **AUDIT-065 (2026-05-12)** — full disasm + 17-PC probe in BOTH engines. **Canary's tid=1 (= F8000008, internally tid=6) reaches PC `0x82173C60` exactly once, waits on r3=`0xF8000094` (= XThread thid 17's thread handle), and the wait completes when that worker reaches `ExTerminateThread(0)`**. Worker runs synchronous cache file IO (`cache:\aab216c3\5\ee70e0a`, `cache:\87719002\c\...` etc) and spawns child workers via `ExCreateThread(... 824AFF88, 821C4AD0, BCA44C00 ...)` before terminating. **Ours's tid=1 reaches PC `0x82173C38` (post `bl 0x821746B0`, r3=`0x4024a640` = ours's task struct = ours's tid=13's start_ctx) and stalls before `0x82173C64`** — i.e. inside the wait at `0x82173C60`. Ours's tid=13 (created by `bl 0x821746B0`'s subroutine, entry `0x821748F0`) DOES open the same cache files (`cache:/aab216c3/5`, `cache:/aab216c3`) but BLOCKS inside `sub_821CB030+0x1AC` on event `0x12AC` (NO_SIGNALS_DESPITE_WAITS). So `ExTerminateThread` is never reached on ours's tid=13 → tid=1's wait on `0x12A4` never completes. **Refines the wedge from "thread-join on tid=13" to a precise mechanism**: the wait at `+0x2D0` is structurally a synchronous task-join (canary worker exits in ~1ms; ours's worker is permanently stuck downstream). **`sub_82173990` body itself is clean** — every probed PC except the wait completion matches canary's behavior; the divergence is entirely in what happens inside `sub_821746B0`'s spawned worker (tid=13's body in `sub_821748F0` → `sub_821C4EB0` → `sub_821CC3F8` → `sub_821CBA08` → `sub_821CB030`). Same AUDIT-049 island, now framed as: **how does canary's worker get its `0x12AC`-equivalent event signaled fast enough that the worker can call `ExTerminateThread`?** [confirmed] + +## Open questions + +- **What signals canary's tid=17 cache-IO completion event (the `0x12AC`-equivalent inside its `sub_821CB030`)?** Probe canary's `NtSetEvent`/`KeSetEvent` thunks (`0x8284DF5C`/`0x82490018`) filtered on tid=17's `r3`-handle in the lr-window around the corresponding tid=17 wait. Compare against ours's empty `0x12AC.signals` count. +- The cache file open flow looks identical in both engines for `cache:\aab216c3\…` paths — confirming AUDIT-054's VFS layout fix landed correctly. The divergence is purely in the producer-side signaling. +- Both engines pass through PC `0x82173B98` (the `sub_82453910` "signaler" candidate) on the `r25!=0 && r25!=28` path — but only if the AUDIT-046 "5/5 iter" path lands inside the block `[0x82173B14, 0x82173BC0]`. Ours's BB-entry probe shows `0x82173BC0` fires but NOT `0x82173B14/B40/B84/B98/BA0/BA8` — meaning ours's beq at `0x82173B14` is **taken** (r5==0) and we skip the entire block. Canary's BR list also shows direct jump from `0x82173A78`→`0x82173BC0` (line 1998→1999) — **so canary also skips the block at 0x82173B14**. Block is dead in both engines at this horizon. `sub_82453910` is NOT the relevant signaler at this call site. +- `r3` at `0x82173C48` shows `F8000094` in canary at probe time, but that's the **post-`bl 0x824AA5C8` PC** (mid-block; the captured value is whatever r3 carried at that instant — likely the handle being queried, not the return). Worth a follow-up probe to confirm the status-query target. + +## Cross-references + +- Caller: [sub_822F1AA8](sub_822F1AA8.md) (via thunk sub_82175330, vtable[0] of `*(0x828E1F08)`). +- Callees of interest: + - `sub_821746B0` — task allocator + worker spawn (no dossier yet — recommend creating one). + - `sub_82172370` — sync object creator (no dossier yet). + - `sub_82174AF8` — post-wait cleanup (no dossier yet). +- Worker-side wedge: [sub_821CB030](sub_821CB030.md) — fires inside the worker spawned via sub_821746B0 from sub_82173990's `bl` at `+0x2A4`. +- Audits: 049 (original tid=1 stall localization), 064 (full activation chain to sub_825070F0), 065 (this). +- Artifacts: `audit-runs/audit-065-sub82173990-wait-site/{sub_82173990.disasm,canary.log,canary-run.stdout,ours.log,ours-stdout.log,summary.md}`. diff --git a/docs/functions/sub_821B55D8.md b/docs/functions/sub_821B55D8.md new file mode 100644 index 0000000..41e1455 --- /dev/null +++ b/docs/functions/sub_821B55D8.md @@ -0,0 +1,53 @@ +--- +address: 0x821B55D8 +classification: normal_callee +confidence: high +last_audit: 064 +aliases: + - "AUDIT-058 caller-ladder fn #5 (vtable slot 6 of class containing 0x82172D88 dispatcher)" +--- + +# sub_821B55D8 — vtable slot 6 invoked from sub_82172BA0 dispatcher + +## Synopsis + +Normal callee dispatched via the `bctrl` at `sub_82172BA0+0x1E8` (PC `0x82172D88`) — slot 6 of some game-object vtable (offset 24 = `lwz r11, 24(r11)`). Calls [sub_824F8398](sub_824F8398.md) at PC `0x821B5B5C` (=+0x584). Note the **only static caller is via `b` (jump, NOT bl)** from `sub_821B6DF4+0x40` — that's the MSVC EH catch-handler trampoline at PC `0x821B6E34`. **AUDIT-064 falsifies the AUDIT-058 framing that this is reached primarily via the EH path**: at runtime it's reached via the `bctrl` slot-6 dispatch from `sub_82172BA0`, not via the EH thunk. + +## Evidence + +- Disasm prolog at `0x821B55D8`: `mflr r12; bl 0x825F0F74; stfd f31, -88(r1); subi r31, r1, 368; stwu r1, -368(r1); mr r30, r3; ...` — standard normal-callee prolog. Uses `subi r31, r1, 368` (frame-pointer is `r1-368`), NOT MSVC EH-handler's `subi r31, r12, N`. +- Function size: 2076 bytes / 519 insns. `has_eh=True`, `frame_size=0` per DB (but the actual stack alloc is 368 bytes — `frame_size=0` likely indicates dynamic). +- Static caller xref (sole): PC `0x821B6E34` inside `sub_821B6DF4` via `kind=j insn=b` (unconditional branch, NOT bl). This is an EH catch-handler trampoline that tail-jumps into this fn's body — it's how the MSVC EH machinery enters the fn AFTER a matching exception is caught. Pattern at `0x821B6E30..0x821B6E34`: `lwz r3, 8(r3); b 0x821B55D8`. +- AUDIT-064 canary 60s probe: fires 1× with `lr=0x82172D8C r3=BCCC52C0 r4=FFFFFFFF r5=01000000 r6=00000000` on tid=6. `lr=0x82172D8C` is the post-bctrl PC inside `sub_82172BA0+0x1E8`. Reproduced at 120s and 180s. +- AUDIT-064 ours `--ctor-probe=0x821B55D8` -n 500M: **0 fires**. + +## Activation + +**Primary (runtime)**: vtable slot 6 dispatch from `sub_82172BA0+0x1E8 bctrl` (PC `0x82172D88`). The dispatcher walks an array of objects (loaded from `[r29+56]`) and invokes vtable slot 6 on each. Slot 6 = `lwz r11, 24(r11)` where r11 is the vtable. + +**Secondary (EH path)**: MSVC catch-handler at `sub_821B6DF4+0x40` tail-jumps here when a matching exception is caught. Not the runtime activation path observed in either engine at this horizon. + +## Static graph + +- Static callers (DB): + - `sub_821B6DF4+0x40` via `b 0x821B55D8` (EH thunk, NOT a `bl` — reached via exception dispatch only). + - No `bl` static callers recorded — but **AUDIT-064 captured `lr=0x82172D8C` at runtime fire**, meaning the actual `bl`-equivalent caller is the bctrl at `sub_82172BA0+0x1E8`. The static analyzer's ind_call list for PC `0x82172D88` includes many observed targets but NOT this fn (gap in the dynamic-target inference). +- Callees: `sub_824F8398` at PC `0x821B5B5C`, plus many others (`sub_821707C0`, `sub_822F13B0`, `sub_822F2A00`, `sub_823C2990`, ...). + +## Audit log + +- **AUDIT-064 (2026-05-12)** — disasm confirms normal-callee prolog (refutes "EH handler" hypothesis). Canary fires 1× / ours 0×. **Real runtime caller is `sub_82172BA0+0x1E8 bctrl`, NOT `sub_821B6DF4` EH thunk.** The DB xref via `b` from EH is a secondary entry path. **New reading-error class observed**: static xrefs for `bctrl` indirect targets are populated by some dynamic-target inference but it has gaps — must cross-check at runtime via `--audit_61_branch_probe_pcs` + LR resolution. [confirmed] +- **AUDIT-058 (2026-05-10)** — flagged as part of static caller ladder under `sub_821B6DF4`. [STATUS: partially falsified by AUDIT-064 — the runtime path is the bctrl from sub_82172BA0, not the EH thunk.] + +## Open questions + +- Which class's vtable has slot 6 = `sub_821B55D8`? The instance loaded by `sub_82172BA0` at `[r3+24]` from the array. Possibly `silph::GamePart_Title` or a sibling — would need to enumerate `sub_82172BA0`'s array-walk target instances at runtime. +- Why does the DB's `xrefs` (kind=`ind_call`) for source `0x82172D88` not list `sub_821B55D8` as a target? The dynamic-target inference appears to populate from a separate trace, missing this one. + +## Cross-references + +- Callees: `sub_824F8398` (PC `0x821B5B5C`). +- EH-secondary entry: `sub_821B6DF4+0x40` (`b 0x821B55D8`). +- Runtime caller (bctrl): `sub_82172BA0+0x1E8` (PC `0x82172D88`). +- Audits: 058, 060, 064. +- Artifacts: `audit-runs/audit-064-activation-ladder/canary-{60,120,180}s.log`, `canary-upstream-60s.log`. diff --git a/docs/functions/sub_821B6DF4.md b/docs/functions/sub_821B6DF4.md new file mode 100644 index 0000000..7db3a64 --- /dev/null +++ b/docs/functions/sub_821B6DF4.md @@ -0,0 +1,58 @@ +--- +address: 0x821B6DF4 +classification: msvc_eh_catch_handler +confidence: high +last_audit: 060 +aliases: + - "AUDIT-058 caller-ladder top (FALSIFIED)" +--- + +# sub_821B6DF4 — MSVC C++ catch-handler thunk + +## Synopsis + +A C++ catch-handler thunk emitted by the MSVC PowerPC C++ runtime. Dispatched by the EH machinery (`_CxxFrameHandler3` equivalent) when a matching exception type is thrown — NOT a normal `bl` callee. AUDIT-058 mistakenly treated it as the top of a "static caller ladder" for `sub_825070F0`'s activation; AUDIT-060 falsified that by reading the prolog and the `.rdata` reference context. + +**This is the canonical "MSVC EH FuncInfo metadata mistaken for call edges" case. Always check the prolog before assuming a 0-caller fn is a missing activator.** + +## Evidence + +- Disasm at `0x821B6DF4` opens with the canonical MSVC catch-handler prolog: `subi r31, r12, 112; mflr r12; stwu r1, -96(r1); ...`. The use of `r12` (parent-frame pointer offset) and `mflr r12` is signature MSVC EH-handler shape. +- Address `0x821B6DF4` appears as a u32 value in only two places in the binary: + - `.rdata:0x820C1994` — embedded inside an MSVC FuncInfo struct. Bracketing bytes: `FFFFFFFF 821B6DF4 19930522 00000001 820C1990 ...`. `0x19930522` is the MSVC FuncInfo magic. + - `.pdata:0x8211C678` — exception-unwind metadata. +- AUDIT-060 Probe C-Win Windows Debug canary: `--log_lr_on_pc=0x821B6DF4`, runs at 120s and 240s wallclock → **0 fires both runs**. The matching exception is not thrown at this boot horizon. +- AUDIT-060 Probe O ours: `--ctor-probe=0x821B6DF4 -n 500M` → **0 fires**. +- Body: single `bl 0x82183B78` (an EH support routine) then return. + +## Activation + +C++ exception runtime dispatch. Fires iff a try-block protected by the FuncInfo at `0x820C1990` catches a thrown object whose type matches the catch's CatchTypeInfo. Neither engine throws this exception at the probed horizon. + +## Static graph + +- Static callers: **0** — and this is correct (0 callers does not imply dead; it implies "not a bl target"). +- Callees: `sub_82183B78` (EH support routine). +- xrefs in DB will show `kind=indirect` or absent entries; the `.rdata` reference at `0x820C1994` is the FuncInfo binding, not a call edge. + +## Audit log + +- **AUDIT-060 (2026-05-12)** — disassembled body; identified MSVC catch-handler prolog; cross-referenced `.rdata` bytes to find FuncInfo magic `0x19930522`; probed in both engines at 240s/-n500M → 0 fires both sides. AUDIT-058's "caller ladder" framing falsified. New reading-error class #16 logged. [confirmed] +- **AUDIT-058 (2026-05-10)** — claimed as "top of static caller ladder" for `sub_825070F0` activation, walked: `sub_825070F0 ← sub_824F7800 ← sub_824F7CD0 ← sub_824F8398 ← sub_821B55D8 ← sub_821B6DF4`. All 6 fire 0× in ours; framed as missing activation. [STATUS: falsified by AUDIT-060 — the entire 6-fn chain is C++ EH unwind metadata; none of them are normal call edges; they fire only on specific exception throws.] + +## Open questions + +- What exception type-id activates this catch? Parse the FuncInfo struct at `0x820C1990`: + - TryBlockMap entries → CatchTypeArray pointer → CatchType records (each has type_info* + handler ptr). + - The type_info string would identify the C++ class being caught. +- Is the matching throw site reachable in either engine at *any* boot horizon? If yes, when? +- Are the other 5 fns in the AUDIT-058 ladder ALL catch-handler thunks? Spot-check `sub_821B55D8`, `sub_824F8398`, `sub_824F7CD0`, `sub_824F7800`, `sub_825070F0`. (`sub_825070F0` DOES fire 1× per AUDIT-058 — so at least it's not pure-EH; could be the actual throw site or a normal-call leaf.) + +## Cross-references + +- FuncInfo location: `.rdata:0x820C1990` (start of struct), `0x820C1994` contains this fn's pointer. +- `.pdata` unwind: `0x8211C678`. +- Body callee: `sub_82183B78` (EH support). +- Companion ladder fns (need separate dossiers): `sub_821B55D8`, `sub_824F8398`, `sub_824F7CD0`, `sub_824F7800`, [sub_825070F0](sub_825070F0.md). +- Audits: 058, 060. +- Artifacts: `audit-runs/audit-060-fnptr-array-bootstrap/canary-sub821B6DF4-120s.log`, `canary-sub821B6DF4-240s.log`, `ours-summary.md`. diff --git a/docs/functions/sub_821C4EB0.md b/docs/functions/sub_821C4EB0.md new file mode 100644 index 0000000..a854ac4 --- /dev/null +++ b/docs/functions/sub_821C4EB0.md @@ -0,0 +1,76 @@ +--- +address: 0x821C4EB0 +classification: vtable_method +confidence: high +last_audit: 061 +aliases: + - "silph::GamePart_Title::UImpl member fn" + - "AUDIT-056 early-exit (falsified by 061)" +--- + +# sub_821C4EB0 — silph::GamePart_Title::UImpl member fn (AUDIT-061: NOT a branch-divergence gate) + +## Synopsis + +Member function on class `silph::GamePart_Title::UImpl` (vtable `0x820a3e00`). **AUDIT-061 falsified the "conditional-branch divergence in `[+0x44, +0xE0]`" framing**: all 4 branches in that range are decided **bit-identically** in canary and ours. The actual divergence is the call `bl 0x821CC3F8` at PC `0x821C4F14`: in canary the call returns to `0x821C4F18` and the rest of sub_821C4EB0 executes through the 5 `bl 0x821CEDF8` sites at +0x198..+0x240; in ours the call enters the chain `sub_821CC3F8 → sub_821CBA08 → sub_821CB030` and never returns (tid=13 wedge inside sub_821CB030 = AUDIT-049 wedge handle, `NtCreateEvent` at +0x128 → INFINITE wait). AUDIT-056's "5× canary / 0× ours" callsite count is an indirect consequence of the upstream wedge, not a branch-decision asymmetry in this fn. + +## Evidence + +- AUDIT-049: appears in tid=13 thread-create chain — `sub_821748F0 → sub_821C4EB0 (UImpl@GamePart_Title@silph) → sub_821CC3F8 → sub_821CBA08 → sub_821CB030`. +- AUDIT-056: caller-LR `0x821C4F2C / 0x821C5014 / 0x821C5048` are post-`bl` PCs inside this fn. Reported `sub_821CEDF8` 5× canary / 0× ours. +- AUDIT-059: in the wedge's wait-thread frame-4 saved-r29 the vtable is `0x820a3e00 = .?AUImpl@GamePart_Title@silph@@`, confirming class membership. +- AUDIT-061 (READ-ONLY canary multi-PC probe @ ~2:00 wallclock; ours `--branch-probe` @ -n 500M): + - Both engines call sub_821C4EB0 exactly **1×** at this horizon. Same caller LR=0x82174A80 (canary tid=17, ours tid=13). + - Canary probe fires 17× covering entry + post-bl block entries + all 4 cond-branches: B1 `beq cr6 NOT taken` (cr6=.G., r3=0xBC220008≠0), B2 `bne cr6 NOT taken` (cr6=..E, lbz @ 0x828F3284 = 0), B3 `beq cr6 TAKEN` (cr6=..E, lwz r3,92(r30) == 0), B4 `bgt cr6 TAKEN` (cr6=.G., [r27+4] > 4). Reaches 0x821C5048 (1st `bl 0x821CEDF8`) and 0x821C504C (returned). + - Ours probe fires 4× covering entry + 3 post-bl: 0x821C4EB0 → 0x821C4EB8 → 0x821C4ED0 → 0x821C4EEC (r3=0x40105004 returned from `bl 0x82150EF8`; cr6=.G., **same direction as canary**). After 0x821C4EEC: **never reaches 0x821C4F18 or anywhere later in the function**. + - Chain probe (separate run) confirms ours's tid=13 enters sub_821CC3F8 (cycle 2069) → sub_821CC3F8+0x38 post-alloc (2249) → sub_821CBA08 (2258) → sub_821CB030 (3242), then stalls. Canary's tid=17 returns out of all four and reaches 0x821CC454 (post-bl-sub_821CBA08) and 0x821C4F18 (post-bl-sub_821CC3F8) cleanly. + - First divergent INSTRUCTION (not branch): `bl 0x821CC3F8` at PC `0x821C4F14`. First divergent state: ours's r3 at function entry to sub_821CC3F8 is `0x40105004` (40xxxxxx host-allocator region) vs canary's `0xBC220008` (BCxxxxxx region) — but this VA difference is the AUDIT-043 ε-class (allocator region drift) and is BENIGN here; sub_821CC3F8 dereferences r3 as a pool handle the same way in both engines and downstream allocation succeeds (sub_82150EF8 returns valid pointer in both). + +## Activation + +Vtable method. Reached via `bctrl` from class-owning code in the boot UI / GamePart_Title state machine. Indirect; the dispatch site PC and vtable slot index need DB cross-reference (see Open questions). + +## Static graph + +- Caller chain at the wedge site (AUDIT-049): + - `sub_821C4EB0 ← sub_821748F0` (top-level) + - flows down to `sub_821CC3F8 (GamePart_Title)` → `sub_821CBA08` → `sub_821CB030` (where wedge fires) +- Callees in source order: + - `0x821C4EB4 bl 0x825F0F7C` — save-GPRs prologue helper + - `0x821C4ECC bl 0x8284DA7C` — XAM import `XNotifyPositionUI` (xam.xex ord 652); r3=0xA → returns 0 in both engines. + - `0x821C4EE8 bl 0x82150EF8` — pool allocator (called with allocator table @ `[0x828E0000+11028]`, size=4); returns pointer in both engines (canary BC220008, ours 0x40105004). + - `0x821C4F14 bl 0x821CC3F8` — **first divergent instruction (AUDIT-061)**: returns in canary, wedges in ours. + - `0x821C4F2C bl 0x82187C30` — only reached in canary at this horizon. + - `0x821C4F60 bl 0x82172370` — only reached in canary. + - `0x821C4F74 bl 0x824AA3E0` — conditional on prior beq; canary takes the SKIP-bl path (B3 = taken). + - `0x821C5048 / 0x821C5074 / 0x821C50A0 / 0x821C50C8 / 0x821C50F0 bl 0x821CEDF8` — 5 sites in the bgt-taken path; only reached in canary. +- Conditional branches in `[+0x44, +0xE0]` (enumerated AUDIT-061): + - B1 `0x821C4EF8 beq cr6, 0x821C4F20` — after `cmplwi cr6, r3, 0` (r3 = sub_82150EF8 return). Decided NOT taken in both. + - B2 `0x821C4F3C bne cr6, 0x821C4F7C` — after `lbz r10, 12932(0x828F0000)+cmplwi r10, 0`. Decided NOT taken in canary; UNREACHED in ours. + - B3 `0x821C4F70 beq cr6, 0x821C4F78` — after `lwz r3, 92(r30)`. Decided TAKEN in canary; UNREACHED in ours. + - B4 `0x821C4F90 bgt cr6, 0x821C5000` — after `cmplwi cr6, r11, 3`, r11 = `[r27+4]−1`. Decided TAKEN in canary; UNREACHED in ours. + +## Audit log + +- **AUDIT-061 (2026-05-12)** — Multi-PC branch probe in both engines (new canary cvar `audit_61_branch_probe_pcs`, ours `--branch-probe`). All 4 conditional branches in `[+0x44, +0xE0]` decided **bit-identically** (B1 NOT-taken in both; B2/B3/B4 UNREACHED in ours because the function stalls earlier). First divergent BEHAVIOR is the call `bl 0x821CC3F8` at PC `0x821C4F14` — returns in canary, wedges in ours. The wedge is INSIDE `sub_821CB030` (chain `sub_821C4EB0 → sub_821CC3F8 → sub_821CBA08 → sub_821CB030`); tid=13 reaches sub_821CB030 at cycle 3242 and blocks indefinitely. Confirms AUDIT-049 wedge premise; matches AUDIT-059 γ-class missing-signaler. AUDIT-056's "5× sub_821CEDF8 canary / 0× ours" is an indirect consequence (those 5 sites are at +0x198..+0x240, downstream of the wedge). [confirmed — sub_821C4EB0 is NOT a branch-divergence gate] +- **AUDIT-060 (2026-05-12)** — convergence confirmed this fn as the AUDIT-061 target after AUDIT-058/059's "missing activator" framing was refuted. [superseded by 061 — actual divergence is non-returning call, not a branch] +- **AUDIT-056 (2026-05-10)** — identified as the primary divergence-introducer. Caller-LR is IDENTICAL canary/ours but body chooses a different path. [partially falsified by 061 — the "different path" framing was true at a high level, but it's because of a non-returning call, not a divergent conditional-branch decision in `[+0x44, +0xE0]`. The 5 sub_821CEDF8 callsites are downstream of the wedge.] +- **AUDIT-049 (2026-05-10)** — placed on the tid=13 chain that ultimately creates wedge handle. [confirmed — AUDIT-061 directly observed tid=13 entering sub_821CB030 in ours] + +## Open questions + +- ~~Enumerate every conditional branch PC in `[0x821C4EF4, 0x821C4F90]`~~. **DONE in AUDIT-061**: B1/B2/B3/B4 enumerated; none divergent in decision. +- ~~For each branch: capture cr0/cr6/cr-of-interest...~~. **DONE in AUDIT-061**. +- ~~What input register controls the first divergent branch?~~ **Moot — no branch diverges in this fn.** +- **NEW (AUDIT-062 target):** Where INSIDE sub_821CB030 does ours's tid=13 stall? AUDIT-049 hypothesized the wait at the event handle created at +0x128. Probe sub_821CB030's basic-block entries to find the highest-PC reached by tid=13 before stall; cross-reference with the NtCreateEvent / KeWaitForSingleObject sites. +- Which vtable slot is `sub_821C4EB0` at in vtable `0x820a3e00`? (still open; cross-ref `xrefs` table for `target = 0x821C4EB0` with `kind = 'read'` or `'ref'` in `.data`/`.rdata`). + +## Cross-references + +- Vtable: `0x820a3e00 = .?AUImpl@GamePart_Title@silph@@` (class) +- Sibling class vtable: `0x820a3dc8 = .?AVGamePart_Title@silph@@` (parent? aggregate?) +- Callees: `sub_821CC3F8` (first-divergent-call AUDIT-061), `sub_821CEDF8` (5× sites at +0x198..+0x240, only reached in canary) +- Callers: `sub_821748F0` (top of tid=13 chain; lr=0x82174A80 seen in both engines AUDIT-061) +- Wedge chain: [sub_821CB030](sub_821CB030.md) is where ours's tid=13 stalls per AUDIT-061's chain probe. +- Audits: 049, 056, 057, 058, 059, 060, **061** +- Artifacts: `audit-runs/audit-056-producer-trace/`, `audit-runs/audit-059-gamma-wedge/`, `audit-runs/audit-061-sub821C4EB0-branch-diff/` diff --git a/docs/functions/sub_821CB030.md b/docs/functions/sub_821CB030.md new file mode 100644 index 0000000..66ad58f --- /dev/null +++ b/docs/functions/sub_821CB030.md @@ -0,0 +1,62 @@ +--- +address: 0x821CB030 +classification: normal_callee +confidence: high +last_audit: 066 +aliases: + - "wedge primary site" + - "file-IO completion event creator+waiter" +--- + +# sub_821CB030 — wedge primary site (creates + submits + waits file-IO completion XEvent) + +## Synopsis + +The function whose body creates, submits work for, and waits on the canonical AUDIT-049/058/059 γ-wedge XEvent. Used by `silph::GamePart_Title::UImpl` to load `cache:\aab216c3\5\…` files synchronously: NtCreateEvent at `+0x128`, work submit at `+0x19C` (calls `sub_82452DC0`), wait INFINITE at `+0x1AC`. The wait is what blocks the entire post-intro phase in ours. + +## Evidence + +- AUDIT-049: tid=13 chain ends at this fn with wait at `0x824ac578` (KeWaitForSingleObject in the wait wrapper called from `+0x1AC`). +- AUDIT-058: canary captures `sub_821CB030+0x12c` (=PC after the NtCreateEvent bl) in stacks. +- AUDIT-059 Probe O ours: handle `0x12AC` (Event/Auto) created at `0x821cb158` (=`+0x128`), waited at `0x821cb1dc` (=`+0x1AC`). Wedge has `signal_attempts=0` — never signaled by the worker side. +- AUDIT-059 Probe C canary: same PCs fire; `0xF8000098` created, then `NtDuplicateObject`'d to `0xF80000A0`, original closed fast, dup signaled by worker via `sub_82458B90`/`sub_8245EC10`. +- File-IO context: precedes synchronous file load of `cache:\aab216c3\5\…` (post-VFS work in AUDIT-054). + +## Activation + +Direct `bl` from `sub_821CBA08+0xd8` (AUDIT-059 create-stack frame 1). One static caller. Higher in the chain: `sub_821CC3F8 (GamePart_Title) → sub_821CBA08 → sub_821CB030`. + +## Static graph + +- Callers: + - `sub_821CBA08+0xd8` (only static caller) +- Callees of interest: + - `sub_824A9F18` — NtCreateEvent wrapper, called at `+0x124 bl` (post-call PC = `+0x128 = 0x821CB158`). + - `sub_82452DC0` — work-submitter, called at `+0x198 bl` (post-call PC = `+0x19C`). + - `sub_824AC540` — wait wrapper, called at `+0x1A8 bl` (post-call PC = `+0x1AC = 0x821CB1DC`). + +## Audit log + +- **AUDIT-066 (2026-05-12)** — **source-review only (READ-ONLY)**. Re-read canary's `xenia/kernel/xboxkrnl/xboxkrnl_io.cc:39-389` + `xfile.cc:19-198` + `kernel_state.cc:519-551` and ours's `xenia-kernel/src/exports.rs:1103-1518, 3747-3764`. AUDIT-065's "host-side IO completion thread `F8000048` signals each per-load event" framing is **falsified**: (i) canary's `NtReadFile`/`NtReadFileScatter`/`NtWriteFile` are synchronous and signal the supplied event handle **inline** via `ev->Set(0, false)` (lines 210-212, 296-298, 383-385); no host async-IO thread exists; the only host thread "Kernel Dispatch" (`kernel_state.cc:524-549`) services `CompleteOverlappedDeferred` for XAM overlapped UI/content, not file IO; (ii) `F8000048` in AUDIT-065 stdout is a **guest XThread** thid=10 (entry `0x82450A28`, ctx `0x828F3B68`), spawned by main at `canary-run.stdout:1331` via `ExCreateThread(...,824AFF88, 82450A28, 828F3B68, 0)` — the `F8` prefix is a guest kernel-object handle region marker, NOT a host-thread marker; (iii) cache loads at `canary-run.stdout:2127-2154` (sequence `NtCreateEvent → NtCreateFile → NtDuplicateObject → NtQueryInformationFile → NtClose`) emit **zero** `NtReadFile`/`NtSetEvent` lines — `NtQueryInformationFile` has no event-handle parameter in either engine; (iv) thid=17 (`F8000094`) terminates via `ExTerminateThread(0)` WITHOUT ever calling Wait inside its cache loop — so the canary path doesn't even hit this fn's wait sites for the cache files visible in AUDIT-065's stdout. Ours's `signal_io_completion_event` (`exports.rs:1156-1169`) called from 16 sites in `nt_read_file`/`nt_write_file`/`nt_device_io_control_file` already implements canary's `ev->Set(0, false)` semantics — **there is no missing analog**. The wait at this fn's `+0x1AC` is a wait on the `sub_82452DC0` work-queue dup'd XEvent, signaled by guest worker-cluster code (γ-signalers A/B/C/D per AUDIT-059/060) — not IO completion. Bug class confirmed = AUDIT-063 structural / bootstrap-ordering. **AUDIT-066 fix locus (`xenia-kernel/src/exports.rs` IO handlers) is the WRONG target**; the bug is upstream in worker-cluster bootstrap (`sub_825070F0` activation gate). [confirmed: NO IO-completion gap] +- **AUDIT-065 (2026-05-12)** — wedge mechanism precisely framed via [sub_82173990](sub_82173990.md). Canary's tid=17 worker (= analog of ours's tid=13) reaches `ExTerminateThread(0)` after sequentially loading `cache:\aab216c3\5\ee70e0a`, `cache:\87719002\c\dba806e/ec0a96e`, `cache:\87719002\a\60fcb85`, `cache:\87719002\2\85d8849`, `cache:\87719002\0\1a2db9c` etc — 16+ cache file loads — AND spawning child workers via `ExCreateThread(..., 824AFF88, 821C4AD0/822C6870, ...)`. Worker's own `sub_821CB030` calls (file-IO completion event waits) complete in canary. **In ours, the very first sub_821CB030 call (on handle `0x12AC`) hangs (`NO_SIGNALS_DESPITE_WAITS`)** — tid=13 never reaches `ExTerminateThread`, tid=1's join wait on `0x12A4` never completes. Cache file opens succeed in ours (paths `cache:/aab216c3/5`, `cache:/aab216c3` etc seen in log just before the stall) — so the bug is post-VFS, in the producer→worker async-IO completion signaling, exactly as AUDIT-062 found. [confirmed] +- **AUDIT-063 (2026-05-12)** — AUDIT-062's candidate trio (`0x822F2304`/`0x822F1D84`/`0x821743D8`) confirmed as RED HERRINGS: containing fns `sub_822F2248`/`sub_822F1AA8`/`sub_821741C8` resolved, but **none are reachable from `sub_82452DC0` in 12 hops**. Track-A probe (180s canary / 500M-instr ours): canary fires 11.7k× / ours 0× on `0x822F1D84` and `0x821743D8` — but they're downstream of an unblocked main event loop (canary tid=6 = guest main). Ours's main (tid=1) is `Blocked` on `0x12A4` (tid=13 thread-join handle, AUDIT-049), which transitively blocks on this fn's wedge `0x12AC`. Real producer is the worker cluster `sub_82458B90`/`sub_8245EC10`/`sub_8245FEB8`/`sub_8245D9D8`/`sub_8245DA78` running on the 4 workers spawned by `sub_825070F0` — **0 of those 8 workers spawn in ours** vs 8 in canary. The bug is the AUDIT-057 thread-gap closing in on itself: the cluster cannot bootstrap because the wedge isn't signaled, and the wedge isn't signaled because the cluster cannot bootstrap. NO new producer fn was missed by prior audits. [confirmed: trio is symptom not cause] +- **AUDIT-062 (2026-05-12)** — wedge KEVENT data-flow traced. Outcome **(b)**: NtDuplicateObject thunk = `0x8284DF7C`; sub_821CB030 has NO direct bl-NtDup (dup is performed by descendant via wrapper `sub_824AA398`). Phase 2 ours `--lr-trace=0x8284DF7C`: wedge handle `0x12AC` IS duped by tid=13 cycle 26711 (alongside `0x12B0` cycle 23833). Out_ptr `0x40541E80` populated with dup_handle = source_handle = `0x12AC` (ours aliases per `exports.rs:4263`). sub_82452DC0 fires 8× in ours; line 8 = wedge submit on tid=13 cycle 8127 lr=0x821CB1D0, with r6=0x40541E80 (job struct carries the dup pointer). So **work IS submitted with the right handle**. Phase 4 ours `--lr-trace=0x8284DF5C,0x824AA2F0`: 68 NtSet fires, **0 on `0x12AC`** (neighbors 0x129C / 0x12B0 ARE signaled — infrastructure capable). γ-signalers A/B/C/D all fire (3/2/3/6+2 fires resp.) — but on non-wedge handles. **The break is upstream of γ-signaler**: ours's worker tid=5 is parked on its OWN idle event `0x12B8` (created by tid=5 via NtCreateEvent), and **no NtSetEvent in ours signals `0x12B8`** (also NO_SIGNALS_DESPITE_WAITS). Producer-side worker-wake signal is missing. Cascade A=NtDup fires correctly on wedge YES (cycle 26711); B=wedge dup NOT signaled CONFIRMED; C=outcome (b) localized to producer→worker wake gap (`0x12B8`); D=draws>0 deferred to AUDIT-063 fix. New finding: **γ-signaler D = `sub_8245D9D8` / `sub_8245DA78`** (LR `0x8245DA44` / `0x8245DB08`) — NtSet wrapper hot from worker-side, missed by AUDIT-059/060 dossier list. Canary spreads NtDup across 6 tids (6/10/16/17/18/26 → 33 fires/180s); ours across 3 (1/5/13 → 14 fires) — confirms AUDIT-057 thread-gap as enabling condition. Trace `audit-runs/audit-062-wedge-kevent-flow/`. [confirmed outcome b] +- **AUDIT-060 (2026-05-12)** — confirmed wedge structural identification: `NtCreateEvent → NtDuplicateObject → enqueue → worker → NtSetEvent on dup` (canary path); ours stalls at the wait because workers don't signal. [confirmed] +- **AUDIT-059 (2026-05-11)** — established as keystone γ-wedge site. Handle 0x12AC create-site is here at `+0x128`. [confirmed] +- **AUDIT-058 (2026-05-10)** — sister mention in tid=13 chain (frames via sub_821CB1D0 ← sub_821CBAE0). [confirmed] +- **AUDIT-049 (2026-05-10)** — original discovery that tid=13 waits INFINITE on event created here; main thread (tid=1) is downstream via thread-join handle. [confirmed] + +## Open questions + +- Is the `+0x128` create the ONLY NtCreateEvent in this fn, or are there multiple? **AUDIT-062 db query: exactly 1 `bl 0x824A9F18` (NtCreateEvent wrapper) at `+0x128`.** Two `bl 0x82452DC0` (`+0x19C`, `+0x2EC`) and two `bl 0x824AA330` wait-wrappers (`+0x1AC`, `+0x318`) — same KEVENT submitted+waited twice (sequential file-IO loads), or alternative-branch fork. Canary's 2 fires at `0x821CB158` therefore mean sub_821CB030 is *invoked twice* by its caller, each creating a fresh KEVENT. +- What does `+0x19C..+0x1A8` do between work-submit and wait? (Likely sets up the wait params.) Disassemble to confirm. +- ~~Does ours's NtDuplicateObject correctly create a signal-aliased handle?~~ AUDIT-062 confirmed: YES — ours aliases (dup_id = source_id), out_ptr populated, refcount bumped. Bug is NOT here. +- **Open after AUDIT-062**: which producer-side call (descendant of `sub_82452DC0`) calls `NtSetEvent` on the worker idle event (`0x12B8`-class) in canary, and why does ours skip it? Probe canary's hot NtSet wrapper LRs `0x822F2304, 0x822F1D84, 0x821743D8` (9k+ fires each) — one of these is likely the worker-wake. + +## Cross-references + +- Wedge handle in ours: drifts per run (0x1288/0x12A4/0x12AC across audits — see [reference_function_dossiers](docs/functions/README.md) caveat). +- Callers: [sub_821CBA08](#) (not yet dossierd) +- Callees: [sub_82452DC0](sub_82452DC0.md), sub_824A9F18 (NtCreateEvent wrapper) +- Audits: 049, 058, 059, 060, 062 +- Artifacts: `audit-runs/audit-049-tid1-stall-0x1280/`, `audit-runs/audit-059-gamma-wedge/`, `audit-runs/audit-062-wedge-kevent-flow/` diff --git a/docs/functions/sub_822F1AA8.md b/docs/functions/sub_822F1AA8.md new file mode 100644 index 0000000..1146ab2 --- /dev/null +++ b/docs/functions/sub_822F1AA8.md @@ -0,0 +1,55 @@ +--- +address: 0x822F1AA8 +classification: normal_callee +confidence: high +last_audit: 065 +aliases: + - "tid=1 post-init dispatch loop (calls sub_82173990 via vtable[0])" +--- + +# sub_822F1AA8 — tid=1 post-init game-loop dispatcher + +## Synopsis + +Normal-callee invoked by tid=1's `entry_point → sub_8216EA68 → sub_822F1AA8`. Contains the per-frame game-loop pump for the post-init subsystem-dispatch tree (audit-064 chain to [sub_825070F0](sub_825070F0.md)). Runs an outer loop: (a) `KeWaitForSingleObject` infinite at PC `0x822F1DFC`, (b) dispatches vtable[0] of object at `*(0x828E1F08)` at PC `0x822F1B4C bctrl` — which is `sub_82175330` → tail-jump → `sub_82173990`. Canary executes the body 4040× in 60s (per-frame). Ours executes the function entry 1× then **blocks immediately inside sub_82173990 (the vtable[0] callee) at sub_82173990+0x2D0** — KeWaitForSingleObject INFINITE on handle `0x12A4` (= tid=13's thread handle = AUDIT-049 wedge). + +## Evidence + +- Disasm prolog: `mflr r12; bl 0x825F0F60; stfd f30, -136(r1); stfd f31, -128(r1); subi r31, r1, 256; stwu r1, -256(r1); mr r30, r3; ...` — normal-callee. +- Function size: 996 bytes / 249 insns. `has_eh=True`, `frame_size=0` per DB (dynamic 256-byte frame). +- Static caller: `sub_8216EA68+0x3AC` via `bl` (the post-init dispatcher). +- AUDIT-064 ours `--ctor-probe=0x822F1AA8` -n 500M fires 1× at tid=1, cycle=6,171,801, lr=0x8216ee14, r3=0x40d09a40. Back-chain: tid=1 thread_proc → entry_point → sub_8216EA68+0x3AC → sub_822F1AA8. +- AUDIT-064 ours fine probe at BB-entries `0x822F1ACC/0x822F1AEC/0x822F1B20/0x822F1B30/0x822F1B38` all fire 1× — execution does pass through the function body to PC `0x822F1B38`. +- AUDIT-064 ours `--ctor-probe=0x822F1B50` fires **0×**. The bctrl at PC `0x822F1B4C` DOES execute (sub_82175330 fires 1× per separate probe), but never returns — because sub_82175330 tail-jumps to sub_82173990 which blocks at +0x2D0. +- AUDIT-064 canary 60s probe (`--audit_61_branch_probe_pcs`): all probes in body fire — `0x822F1B5C/0x822F1B78/0x822F1BB8` fire 1×, `0x822F1BCC` (outer-loop body) fires 4040×, `0x822F1D58` (the inner bctrl → sub_821741C8) fires 4030×, `0x822F1DFC` (outer KeWait) fires 4040×. +- Global `0x828E1F08` is the slot holding the object pointer that the vtable[0] bctrl dispatches off. Its writers are `sub_822F14D8+0xF0` (PC `0x822F15A4`, observed in ours) and `sub_822F1638+0x84` (PC `0x822F16BC`). At cycle ~6,171,800 in ours, `[0x828E1F08]` is set to `0x40111890`; `[0x40111890+0]` evolves through multiple vtable values (`0x820AD894`, `0x820A183C`, ...) before the bctrl fires; final value at bctrl is `0x820A183C` (slot 0 = `sub_82175330`). + +## Activation + +Direct `bl` from `sub_8216EA68+0x3AC` on tid=1. One-shot at boot per game session — but the function itself contains an outer loop that iterates `KeWaitForSingleObject` waits until termination. + +## Static graph + +- Static callers: `sub_8216EA68+0x3AC` via `bl` (sole). +- Direct callees: `sub_822F13B0`, `sub_824AA2F0` (NtSetEvent wrapper), `sub_82172370`, `sub_824AA3E0`, `sub_824C1910` (leaf), `sub_824AA8B0`, `sub_82456B58`, `sub_824AA330` (KeWaitForSingleObject wrapper), `sub_824574C0`, `sub_82457038`, `sub_8284E45C` (kernel import thunk). +- Indirect: `bctrl` at PC `0x822F1B4C` (vtable[0] of `*(0x828E1F08)`) and `bctrl` at PC `0x822F1D58` (vtable[2] of same). +- Reads `0x828E1F08` at PCs `0x822F1B3C, 0x822F1BE8, 0x822F1D40, 0x822F1E44, ...` (11 reads). + +## Audit log + +- **AUDIT-065 (2026-05-12)** — refined the dispatch-target understanding. The vtable[0] callee at PC `0x822F1B4C bctrl` is `sub_82175330` (2-insn tail-jump to `sub_82173990`). `sub_82173990` is a **synchronous task-spawn-and-join helper** — not an outer game loop. Canary fires this function **exactly 1× per boot** (not 4040× as the synopsis previously suggested) — the 4040× metric in audit-064 referred to PCs *downstream of* sub_82173990's return into sub_822F1AA8's outer loop (PCs `0x822F1BCC`/`0x822F1D58`/`0x822F1DFC`). Per AUDIT-065 sub_82173990 dossier, the wait at sub_82173990+0x2D0 IS a thread-join, and the body of sub_82173990 itself is clean — only the worker spawned via `sub_821746B0` (which becomes ours's tid=13) is wedged inside `sub_821CB030`. [confirmed] +- **AUDIT-064 (2026-05-12)** — identified as the immediate dispatch chain origin for the 4-fn ladder leading to sub_825070F0. Disasm + ours fine-grained BB probes localize the wedge: tid=1 enters function (1×), passes through PCs 0x822F1ACC/0x822F1AEC/0x822F1B20/0x822F1B30/0x822F1B38, executes bctrl at PC `0x822F1B4C` → sub_82175330 → sub_82173990 → KeWaitForSingleObject(handle=0x12A4 = tid=13 thread handle) → STALL. Canary instead returns from that wait and enters the outer game-loop body (`0x822F1BCC` fires 4040× in 60s). **First divergence between canary and ours is at sub_82173990's wait inside vtable[0] of `*(0x828E1F08)` — same AUDIT-049 wedge.** [confirmed] + +## Open questions + +- The vptr at `[0x40111890+0]` mutates multiple times before the bctrl fires (writes from sub_82152XXX, sub_8244e850, sub_8244e8bc, sub_82155b4c, sub_82460c40, sub_822F2758, sub_8216F110, ...). Is the final value `0x820A183C` (which has slot 0 = sub_82175330) the same as canary's final value? Run the same `--mem-watch` against canary to verify. +- Why does canary's tid=13 finish (allowing tid=1's join wait on handle 0x12A4 to complete) while ours's tid=13 stalls? That's the AUDIT-049 root question — separately tracked. + +## Cross-references + +- Direct callers: `sub_8216EA68+0x3AC`. +- Callees of interest: `sub_82173990` (via vtable[0] thunk `sub_82175330`) — where tid=1's stall occurs. +- Downstream (when activated): `sub_82173990` → `sub_821741C8` → `sub_82172BA0` → `sub_821B55D8` → `sub_824F8398` → `sub_824F7CD0` → `sub_824F7800` → `sub_825070F0`. +- Object dispatch: `*(0x828E1F08) = 0x40111890`, vptr `[0x40111890+0] = 0x820A183C` (vtable), slot 0 = `sub_82175330`. +- Audits: 049 (the underlying wedge), 064. +- Artifacts: `audit-runs/audit-064-activation-ladder/ours-fine-822F1AA8.stdout`, `ours-bb-822F1AA8.stdout`, `ours-vtable820a183c.stdout`, `ours-vptr-time.log`, `canary-inside-822F1AA8.log`. diff --git a/docs/functions/sub_82452DC0.md b/docs/functions/sub_82452DC0.md new file mode 100644 index 0000000..5377978 --- /dev/null +++ b/docs/functions/sub_82452DC0.md @@ -0,0 +1,62 @@ +--- +address: 0x82452DC0 +classification: normal_callee +confidence: high +last_audit: 063 +aliases: + - "work-submitter" + - "audit-050 root" +--- + +# sub_82452DC0 — work-submitter / cluster-root + +## Synopsis + +Central work-submission function. All AUDIT-049–060 γ-wedge chains and AUDIT-058 vtable-activation chains funnel through this function. Receives a request (likely a file-IO descriptor + completion XEvent) and dispatches it via 9 direct callees + 1 indirect call. In canary it fires ~3.21× more often than in ours per AUDIT-056 — the upstream gate is in its caller, not in it. + +## Evidence + +- AUDIT-050 enumerated 9 direct targets at `bl` sites: `0x8245AE50, 0x82452068, 0x82452200, 0x8245B000, 0x8245B078, 0x82454A40, 0x82452AB8, 0x82454918, 0x82452EC4`, plus 1 `ind_call`. +- AUDIT-051 found a predicate gate at `+0x78`: `0x82452E2C beq cr6, 0x82452E88`, controlled by `sub_8245B000(r3)` returning 1 iff `[r3+0]≠0 AND [r3+4]≠0`. The 80-byte stack-local struct lives at `r31+96`. +- AUDIT-052 found `[r3+0]` / `[r3+4]` are halves of a hash key formatted into `cache:\\\` paths — i.e. the struct holds a content hash for cache resolution. Predicate refuted as the bug. +- AUDIT-055 probed `sub_8245B078`'s body with a cache override: body executes correctly; divergence is upstream. +- AUDIT-056: fires **canary 45/60s, ours 14/26s = 3.21× ratio**. Sharpest specific divergence: `sub_8217FA08` from `LR=0x82455E60` (`=sub_82455DF0+0x70`) canary 20 / ours 0. +- AUDIT-059: tid=13 itself fires `sub_82452DC0` once at LR=`0x821cb1d0` (from `sub_821CB030+0x19C`) immediately before waiting on the file-IO completion XEvent. +- AUDIT-060: confirmed convergence — `sub_8245FEB8 ← sub_824601A0 ← sub_82460118 ← sub_82452AB8 ← sub_82452DC0`. The vptr-installer chain bottoms out here. + +## Activation + +Direct `bl` from 34 static caller sites per AUDIT-051. Notable callers: +- `sub_821CB030+0x19C` — drives the file-IO completion submission used by `silph::GamePart_Title::UImpl`. +- `sub_821CB030+0x2BC` — second site in same fn. +- `sub_821C4EB0` chain (AUDIT-056 gate). +- `sub_82173990+0x208` (program-top frame). + +## Static graph + +- Static callers: 34 sites across boot + tid=13 + UI cluster. +- Static callees (direct `bl`): 9 functions above + 1 computed call. +- The 9-target tree is the "worker activation surface". `sub_82452AB8` is the gate leading to vptr installers; `sub_8245B078`, `sub_8245B000` are the cache-key/hash gates; the others are queue management. + +## Audit log + +- **AUDIT-063 (2026-05-12)** — static reachability surveyed: among the 60 distinct callers of NtSet wrapper `0x824AA2F0`, **only 1 is reachable within 12 hops from `sub_82452DC0`**: `sub_8245FEB8` (γ-signaler C). The AUDIT-062 candidate trio (`sub_822F2248`, `sub_822F1AA8`, `sub_821741C8`) are NOT downstream. Confirms that the "producer-side worker-wake signal" canary path is the worker cluster (`sub_82458B90`/`sub_8245EC10`/`sub_8245FEB8`/`sub_8245D9D8`/`sub_8245DA78`) reached via the 4 worker threads spawned by `sub_825070F0` — and those threads are unspawned in ours (0 vs 8 in canary). [confirmed] +- **AUDIT-060 (2026-05-12)** — confirmed as single funnel for AUDIT-058+059 chains; the work-submitter is alive and queues but the throughput is gated by `sub_821C4EB0` early-exit per AUDIT-056. [confirmed] +- **AUDIT-059 (2026-05-11)** — fires 8× in ours; one of those is tid=13 from `sub_821CB030+0x19C` right before waiting on the wedge XEvent. Work submitted but no signal returns. [confirmed] +- **AUDIT-056 (2026-05-10)** — fires canary 45 / ours 14 in matched windows = 3.21× gap. Bug class refined to "δ-throughput". [confirmed] +- **AUDIT-055 (2026-05-10)** — proved `sub_8245B078` body executes correctly; ruled out a downstream bug here. [confirmed] +- **AUDIT-052 (2026-05-10)** — refuted AUDIT-051's "missing population" hypothesis. The struct is bit-identical to canary; `[r3+0]/[r3+4]` are a content hash. [supersedes-AUDIT-051-claim] +- **AUDIT-051 (2026-05-10)** — initially identified `+0x78` predicate gate as bug. [STATUS: hypothesis falsified by AUDIT-052; the gate itself is real and named correctly, but it's not the bug] +- **AUDIT-050 (2026-05-10)** — enumerated 9 direct targets + 1 indirect; framed as activation-surface root. [confirmed] + +## Open questions + +- Why does `sub_82452DC0` fire 3.21× less in ours? AUDIT-061 pivots to its caller `sub_821C4EB0`'s internal branches `[+0x44, +0xE0]`. +- The 1 indirect call (computed) — what does it dispatch to, and does our static-analyzer miss any of its candidates? + +## Cross-references + +- Callers: [sub_821CB030](sub_821CB030.md), [sub_821C4EB0](sub_821C4EB0.md) +- Callees-of-interest: [sub_82452AB8](#) (not yet dossierd), `sub_8245B078`, `sub_8245B000` (cache-hash gate) +- Audits: 049, 050, 051, 052, 053, 054, 055, 056, 057, 058, 059, 060 +- Artifacts: `audit-runs/audit-050-*/`, `audit-runs/audit-056-producer-trace/`, `audit-runs/audit-059-gamma-wedge/`, `audit-runs/audit-060-fnptr-array-bootstrap/` diff --git a/docs/functions/sub_82457EF0.md b/docs/functions/sub_82457EF0.md new file mode 100644 index 0000000..81106c4 --- /dev/null +++ b/docs/functions/sub_82457EF0.md @@ -0,0 +1,47 @@ +--- +address: 0x82457EF0 +classification: thread_proc +confidence: high +last_audit: 060 +aliases: + - "tid=6 thread_proc" +--- + +# sub_82457EF0 — tid=6 thread_proc (worker entry) + +## Synopsis + +Thread procedure for tid=6 in ours. 0 static callers — and that is *correct* for a `thread_proc`: it is installed as an entry-point via `ExCreateThread` somewhere in boot, not invoked via `bl`. AUDIT-059's "only-caller of [sub_82458B90](sub_82458B90.md) has 0 callers — fnptr-array only" inference was wrong; the actual activation is thread creation. + +## Evidence + +- AUDIT-060 Probe O ours: fires **1× on tid=6** (HW=2, cycle=0, lr=`0xbcbcbcbc` — thread-entry sentinel). +- `lr=0xbcbcbcbc` is the Xbox 360 / xenia convention for "this is the very first instruction of a thread proc; no return address". This is a diagnostic that distinguishes thread entry from a normal `bl` fire. +- Calls [sub_82458B90](sub_82458B90.md) at `+0x24` (1 callee at this offset). + +## Activation + +Registered as a thread entry-point via `ExCreateThread` (or similar). The caller of `ExCreateThread` that installs this entry has not yet been traced — that's the *real* activation site, and tracing it would close the loop on tid=6's purpose. Once tid=6 starts, the OS scheduler runs `sub_82457EF0` from PC `0x82457EF0` with LR=`0xbcbcbcbc`. + +## Static graph + +- Static callers (`bl`): **0** (correct — see classification). +- Callees: `bl sub_82458B90` at `+0x24` (PC `0x82457F18`). +- The "indirect call site" that activates this fn is the `ExCreateThread` invocation, captured at runtime, not in static `xrefs`. + +## Audit log + +- **AUDIT-060 (2026-05-12)** — identified as tid=6 thread_proc via `lr=0xbcbcbcbc` thread-entry sentinel + HW=2 + cycle=0 first-fire context. AUDIT-059's static-reachability inference invalidated. [confirmed] +- **AUDIT-059 (2026-05-11)** — flagged as "only-caller of canary signaler A; 0 callers — fnptr-array only". [STATUS: partially correct (0 callers true; fnptr-array WRONG), corrected by AUDIT-060 — it's a thread_proc.] + +## Open questions + +- Where is `ExCreateThread(entry=sub_82457EF0, ...)` called from? Probe the `ExCreateThread` import thunk in both engines with filtered LR/r3 to find the install site. +- What does the thread body do beyond calling [sub_82458B90](sub_82458B90.md) once? Likely it's a loop that waits on a queue, dequeues work, and signals completion via the bl at `+0x24`. Disassemble the body. + +## Cross-references + +- Thread-body callee: [sub_82458B90](sub_82458B90.md). +- Install site (`ExCreateThread` caller): not yet identified. +- Audits: 059, 060. +- Artifacts: `audit-runs/audit-060-fnptr-array-bootstrap/ours-phase1.stdout` (the `lr=0xbcbcbcbc` sentinel evidence). diff --git a/docs/functions/sub_82458B90.md b/docs/functions/sub_82458B90.md new file mode 100644 index 0000000..cfcab10 --- /dev/null +++ b/docs/functions/sub_82458B90.md @@ -0,0 +1,50 @@ +--- +address: 0x82458B90 +classification: normal_callee +confidence: high +last_audit: 060 +aliases: + - "canary γ-wedge signaler A" +--- + +# sub_82458B90 — canary γ-wedge signaler A (NtSetEvent caller from tid=6 thread_proc body) + +## Synopsis + +A function that wraps `bl 0x824AA2F0` (NtSetEvent wrapper) at an internal PC near `+0x180` (canary LR `0x82458D14`). In canary, this is one of two NtSetEvent caller-LRs that signal the AUDIT-059 file-IO completion wedge dup handle (per `(tid, r31)` cross-run invariant). Reached only from [sub_82457EF0](sub_82457EF0.md)+0x24, which is itself the **tid=6 thread_proc entry**. The "1 static caller, 0 callers above" chain in `xrefs` is structurally correct for a fn invoked from a thread loop's body. + +## Evidence + +- AUDIT-059 Probe C canary: at LR `0x82458D14` (=`sub_82458B90+0x184` or similar post-`bl 0x824AA2F0` internal PC), signals the wedge dup handle (matched cross-run via `r31` stack invariant — thread `F8000054` / frame `0x7036FDC0`). +- AUDIT-060 Probe O ours: fires **1× in ours** (`--ctor-probe`), called from `sub_82457EF0+0x24` (PC `0x82457f18`). +- Static caller chain in DB: `sub_82458B90 ← sub_82457EF0` (1 caller); `sub_82457EF0` itself has 0 static callers — it is the tid=6 thread_proc entry. + +## Activation + +Direct `bl` from `sub_82457EF0+0x24` (single static caller). [sub_82457EF0](sub_82457EF0.md) is a `thread_proc`, so the activation chain is: +1. Some boot-site calls `ExCreateThread(entry=sub_82457EF0)` — installing tid=6's thread_proc. +2. Thread tid=6 starts; PPC entry-LR sentinel `0xbcbcbcbc` indicates "first instruction of thread_proc". +3. `sub_82457EF0` body calls this fn via `bl` at `+0x24`. + +## Static graph + +- Static callers (`bl`): 1 site = `sub_82457EF0+0x24` (PC `0x82457f18`). +- Callees: `bl 0x824AA2F0` (NtSetEvent wrapper) internal. + +## Audit log + +- **AUDIT-060 (2026-05-12)** — confirmed alive in ours (1 fire on tid=6). AUDIT-059's "fires 1× off-wedge" wording was technically correct but misleading; the function IS active, just signaling a different KEVENT instance per call. [confirmed alive] +- **AUDIT-059 (2026-05-11)** — identified as canary NtSetEvent signaler A for the wedge dup handle via cross-run `r31` invariant. Static reachability claim ("only-caller has 0 callers — fnptr-array only") flagged as suspect; AUDIT-060 confirms the chain is correct but the conclusion ("unreachable") was wrong. [confirmed for canary signaler role] + +## Open questions + +- What r3 (handle) does `sub_82458B90` pass to `bl 0x824AA2F0` in ours's 1 fire vs canary's signaling fires? Probe entry of `sub_824AA2F0` filtered by caller=`sub_82458B90`. +- Is `sub_82457EF0`'s thread body a "wait on queue, dequeue work, signal completion" loop? If yes, what queue? And is the queue empty in ours but populated in canary? + +## Cross-references + +- Caller (thread_proc): [sub_82457EF0](sub_82457EF0.md). +- NtSetEvent wrapper: `sub_824AA2F0` (not yet dossierd). +- Sibling canary signaler: [sub_8245EC10](sub_8245EC10.md). +- Audits: 059, 060. +- Artifacts: `audit-runs/audit-059-gamma-wedge/canary-setwrapper.log`, `audit-runs/audit-060-fnptr-array-bootstrap/`. diff --git a/docs/functions/sub_8245EC10.md b/docs/functions/sub_8245EC10.md new file mode 100644 index 0000000..b464a42 --- /dev/null +++ b/docs/functions/sub_8245EC10.md @@ -0,0 +1,55 @@ +--- +address: 0x8245EC10 +classification: dispatch_table_method +confidence: high +last_audit: 060 +aliases: + - "canary γ-wedge signaler B" + - "dispatch_table 0x820B5830 slot 1" +--- + +# sub_8245EC10 — dispatch_table slot 1 method, canary γ-wedge signaler B + +## Synopsis + +Method living at slot 1 of `dispatch_table @ 0x820B5830`. The dispatch table is installed at struct offset 0 (vptr) by [sub_8245FEB8](sub_8245FEB8.md). In canary, this method is one of two NtSetEvent caller-LRs that signal the AUDIT-059 file-IO completion wedge dup handle (LR `0x8245ED80` post-`bl 0x824AA2F0`). In ours it fires 2× total but not on the wedge handle. + +## Evidence + +- Located at slot 1 of dispatch table `0x820B5830`. Slot 0 is `sub_8245F1D0`. +- The dispatch table is referenced from: + - `sub_8245F1D0+0x1C` (self-recursive) + - `sub_8245FEB8+0x100` (= `0x8245FFC0`, the `stw r11, 0(r31)` vtable install) +- AUDIT-060 Probe O ours: fires **2× in ours** (`--ctor-probe`); both fires come from `sub_8245FEB8` callers (transitively, via the installed dispatch-table dispatch). +- AUDIT-059 Probe C canary: at LR `0x8245ED80` (`= sub_8245EC10+0x170` or similar internal PC after `bl 0x824AA2F0`), this fn is one of two distinct canary NtSetEvent caller-fns that signal the wedge dup handle (per cross-run `r31` invariant; the other is [sub_82458B90](sub_82458B90.md)). +- Both canary signalers wrap `bl 0x824AA2F0` (NtSetEvent wrapper). Each fires once per file-IO completion in canary. + +## Activation + +Indirect dispatch. Reachable only via `bctrl` against an object whose vptr was set to `dispatch_table @ 0x820B5830`. The install happens via [sub_8245FEB8](sub_8245FEB8.md). No direct `bl` callers — and that is correct for a `dispatch_table_method`. + +## Static graph + +- Static callers (direct `bl`): **0** (correct — indirect dispatch only). +- Callees: includes `bl 0x824AA2F0` (NtSetEvent wrapper) at internal PC near `+0x170` (canary LR `0x8245ED80`). + +## Audit log + +- **AUDIT-060 (2026-05-12)** — fires 2× in ours; not dead. AUDIT-059's "dead via 0 static callers" framing was too narrow — dispatch_table reachability needs runtime-installed-vptr awareness, not just static `bl` xref BFS. [confirmed alive] +- **AUDIT-059 (2026-05-11)** — identified as canary NtSetEvent signaler B for the file-IO completion wedge dup handle. Cross-run `(tid, r31)` invariant matched. [confirmed for canary signaler role] +- **AUDIT-059 (2026-05-11)** — claimed dead in ours due to 0 static callers + dispatch-table installer ([sub_8245FEB8](sub_8245FEB8.md)) ALSO claimed dead. [STATUS: falsified by AUDIT-060] + +## Open questions + +- What handle does `sub_8245EC10` signal in ours? (Two fires — capture r3 at each fire to identify the target handles.) +- Why doesn't it signal the wedge handle in ours? Either (a) it's running on the wrong object (different installed instance), or (b) the work item it's processing has a different completion-event field. +- Cross-engine method match: is canary fire #1 and ours fire #1 the same logical event? Compare object base (would need new instrumentation). + +## Cross-references + +- Installed at: `dispatch_table @ 0x820B5830` slot 1. +- Vptr installer: [sub_8245FEB8](sub_8245FEB8.md). +- Sibling method (slot 0): `sub_8245F1D0` (not yet dossierd). +- Sibling canary signaler: [sub_82458B90](sub_82458B90.md). +- Audits: 059, 060. +- Artifacts: `audit-runs/audit-059-gamma-wedge/canary-setwrapper.log`, `audit-runs/audit-060-fnptr-array-bootstrap/`. diff --git a/docs/functions/sub_8245FEB8.md b/docs/functions/sub_8245FEB8.md new file mode 100644 index 0000000..41efe77 --- /dev/null +++ b/docs/functions/sub_8245FEB8.md @@ -0,0 +1,59 @@ +--- +address: 0x8245FEB8 +classification: normal_callee +confidence: high +last_audit: 060 +aliases: + - "vptr installer for dispatch_table 0x820B5830" + - "AUDIT-059 'dead' (FALSIFIED)" +--- + +# sub_8245FEB8 — vptr installer for dispatch_table 0x820B5830 + +## Synopsis + +Installs a vtable pointer / dispatch-table entry into a runtime-allocated object. Body contains `stw r11, 0(r31)` at `0x8245FFC0` writing the vtable address to the object's slot 0. After install, the object's `bctrl` dispatch reaches the methods in `dispatch_table @ 0x820B5830` (slot 0 = `sub_8245F1D0`, slot 1 = `sub_8245EC10`). Fires 5× in ours / 2× in canary; lives in the AUDIT-050 worker cluster but is NOT dead in either engine — both engines reach the same call site `sub_824601A0+0x68 (PC=0x82460208)`. + +## Evidence + +- Body opcode at `0x8245FFC0`: `stw r11, 0(r31)` — vtable install pattern. +- Two static callers per `xrefs` table (`source_func`): + - `sub_824601A0` at `+0x68` (PC `0x82460208`) — 1 site + - `sub_8245FB68` at `+0x198` and `+0x1C0` (PCs `0x8245FD00` and `0x8245FD28`) — 2 sites +- AUDIT-060 Probe C-Win Windows Debug canary: `--log_lr_on_pc=0x8245FEB8`, 120s → **2 fires, both lr=0x8246020C** (= `sub_824601A0+0x6C`, post-bl PC). `r3=BC365C40` (same object), `r4=4` then `r4=1` (different slot indices), `r31=701CF2E0` then `r31=705AFAA0` (different threads). +- AUDIT-060 Probe O ours: `--ctor-probe=0x8245FEB8 -n 500M` → **5 fires total**: 1 from tid=1 boot path at cycle 5.5M via `sub_824601A0+0x68`, 3 more from tid=1 during UI inflation, 1 on tid=13 at cycle 23788 via the wedge chain. +- The `r4` parameter (slot index) and the `r31` saved value diverge per call; install target object differs. + +## Activation + +Direct `bl` from one of: +- `sub_824601A0+0x68` (most frequent — boot path) +- `sub_8245FB68+0x198` and `+0x1C0` (internal lib path, sub_8245FB68 itself has callers `sub_8245F880`, `sub_8245FAB0`) + +Caller chain upward: `sub_824601A0 ← sub_82460118 ← sub_82452AB8 ← sub_82452DC0`. The vptr-install path piggybacks on the work-submitter cluster. + +## Static graph + +- Callers: `sub_824601A0`, `sub_8245FB68`. +- Body: vtable write at `+0x108` (`0x8245FFC0`). Other body content not yet detailed. +- The vtable being installed is implicit in caller-supplied `r11` (and possibly elsewhere). + +## Audit log + +- **AUDIT-060 (2026-05-12)** — measured 5× ours, 2× canary, identical call site both engines. AUDIT-059's framing falsified. Convergence to AUDIT-050 work-submitter cluster confirmed. [confirmed] +- **AUDIT-059 (2026-05-11)** — claimed as "vptr installer dead in ours" because static graph showed `sub_8245EC10` (slot 1 of the installed dispatch table) had 0 static callers reachable from any caller-chain that DB classified as live. [STATUS: falsified by AUDIT-060 — was alive in ours all along; the DB caller-chain reachability call was too narrow.] + +## Open questions + +- What is the *class* being installed? (Read `r11` at the `stw r11, 0(r31)` site — canary trace shows it's a specific dispatch table.) +- The 5 ours fires vs 2 canary fires — is this a parity match (canary just had only 2 because of the same AUDIT-056 3.21× upstream gate) or does ours over-fire? Aligning instruction-horizon vs wallclock would clarify. +- Slot-1 method `sub_8245EC10` is named the canary signaler B. It fires 2× in ours per AUDIT-060 — but not on the wedge handle (per AUDIT-059 ours signal_attempts=0 on 0x12AC). What handle is it signaling in ours? + +## Cross-references + +- Installs dispatch_table at: `0x820B5830` +- Slot 0: `sub_8245F1D0` (referenced; not yet dossierd) +- Slot 1: [sub_8245EC10](sub_8245EC10.md) +- Direct callers: `sub_824601A0`, `sub_8245FB68` +- Audits: 059, 060 +- Artifacts: `audit-runs/audit-059-gamma-wedge/`, `audit-runs/audit-060-fnptr-array-bootstrap/` diff --git a/docs/functions/sub_824ACB38.md b/docs/functions/sub_824ACB38.md new file mode 100644 index 0000000..2fcfd88 --- /dev/null +++ b/docs/functions/sub_824ACB38.md @@ -0,0 +1,56 @@ +--- +address: 0x824ACB38 +classification: crt_init_driver +confidence: high +last_audit: 060 +aliases: + - "CRT driver" + - "vtable-slot enumerator (NOT a static-ctor list iterator)" +--- + +# sub_824ACB38 — CRT init driver / vtable-slot enumerator + +## Synopsis + +CRT-style driver called from `entry_point` (or near it). Body is 224 bytes (`0x824ACB38..0x824ACC18`). Contains two enumeration loops over fnptr-array regions at `0x82870xxx`. AUDIT-050 framed it as "iterates 0x82870xxx fnptr arrays (557 slots, 82 non-NULL)" and concluded a half-bootstrapped state; AUDIT-060 found this framing semantically misleading — the slots are runtime vtable-registration entries, not C++ static initializers, and the "82 non-NULL" count obscures a structural 160-slot intentional zero gap. + +## Evidence + +- Body anatomy at `0x824ACB38..0x824ACC18` (AUDIT-060 disasm): + - `+0x00..+0x2C` — preamble + one optional dispatch through fn-ptr at `[0x82023F08]` (= `0x825F1630`, an LZ-runtime thunk). + - `+0x30..+0x6C` — **loop A**: enumerate u32 slots in `[0x828708C8, 0x828708D4)` — 3 slots. Filter: non-NULL. `bctrl` at `0x824ACBA0`. + - `+0x80..+0xB8` — **loop B**: enumerate u32 slots in `[0x82870010, 0x828708C4)` — 557 slots. Filter: non-NULL AND `!= 0xFFFFFFFF`. `bctrl` at `0x824ACBEC`. + - `+0xC4` — epilogue, `blr`. +- Array layout (AUDIT-060 dumped at -n 1M and -n 500M; both identical): + - `0x82870010..0x828702E8` — populated with `0x82xxxxxx` pointers (vtable methods). + - `0x828702F0..0x82870580` — **PERMANENTLY ZERO** across both dumps (160 of 557 slots = 28.7%). + - `0x82870590..0x828708C4` — populated with `0x82xxxxxx` pointers. + - `0x828708C8..0x828708D4` — loop-A array, populated (small CRT helpers). +- Static-DB cross-check: the 557-slot region contains 14+ separate small `vtable`-classified arrays at `0x82870014/0x24/0x94/0xC8/0x16C/0x214/0x238/0x250/0x2A8/0x2C0/0x2E4/0x5A0/0x62C/0x870`, NOT a single CRT static-ctor list. NO statically-detected arrays in `[0x82870300, 0x828705A0)` — the gap is intentional padding between two vtable clusters. + +## Activation + +Called once from `entry_point`-near code (per AUDIT-050 — exact caller PC not in AUDIT-060 trace). The driver enumerates all slot entries; each non-NULL entry is `bctrl`'d once. + +## Static graph + +- Static callers: 1 (from boot entry path; exact PC to confirm). +- Callees: indirect (`bctrl`) — targets are the contents of the enumerated slots. + +## Audit log + +- **AUDIT-060 (2026-05-12)** — disassembled body, identified structure (2 loops, gap), confirmed slot contents are runtime vtable entries rather than C++ static-ctor function pointers. The "82 non-NULL" AUDIT-050 count was correct per-slot but missed the structural 160-slot intentional gap. Driver fires 1× at -n 500M as expected (single boot enumeration). [confirmed] +- **AUDIT-050 (2026-05-10)** — framed as "CRT driver iterates 0x82870xxx fnptr arrays (557 slots, 82 non-NULL)". Structurally correct but semantically misleading ("static-ctor list" implication was wrong). [STATUS: partially superseded by AUDIT-060 — the "iterate fnptr array" claim stands; the "static-ctor list" implication does not.] + +## Open questions + +- What invokes this driver in `entry_point`? Find exact caller PC. +- Are the 14+ small vtable clusters in `[0x82870010, 0x828708C4)` enumerated by THIS driver, or by separate driver functions? If multiple drivers exist for the same region, the gap might be region-partitioning, not padding. +- For ours: are all 397 non-NULL slots dispatched at runtime? If some slot dispatch falls through (e.g. predicate skips it), that would be a real bug — needs runtime confirmation via `--branch-probe=0x824ACBA0,0x824ACBEC` (loop bodies). + +## Cross-references + +- LZ-runtime thunk at `+0x00..+0x2C`: `0x825F1630`. +- Fnptr-array region: `0x82870010..0x828708D4`. +- Audits: 045 (DB schema caveat: `v_call_graph` uses `xrefs.source`; prefer `xrefs.source_func`), 050, 060. +- Artifacts: `audit-runs/audit-060-fnptr-array-bootstrap/ours-dump-500M.stdout`. diff --git a/docs/functions/sub_824F7800.md b/docs/functions/sub_824F7800.md new file mode 100644 index 0000000..b39cadb --- /dev/null +++ b/docs/functions/sub_824F7800.md @@ -0,0 +1,50 @@ +--- +address: 0x824F7800 +classification: normal_callee +confidence: high +last_audit: 064 +aliases: + - "AUDIT-058 caller-ladder fn #2 (bctrl-dispatch site for sub_825070F0)" +--- + +# sub_824F7800 — dispatch caller for ANON_Class_713383D7 vtable slot 1 + +## Synopsis + +Normal callee that performs the `bctrl` invoking [sub_825070F0](sub_825070F0.md) (slot 1 of the `ANON_Class_713383D7` vtable at `0x8200A208`). Bottom of a 4-fn linear call chain (`sub_824F8398 → sub_824F7CD0 → sub_824F7800 → [bctrl] → sub_825070F0`) that runs once per game-loop activation pass. AUDIT-064 verified canary fires this fn 1× at ~60s wallclock; ours fires it 0× because the entire chain sits downstream of tid=13's audit-049 wedge. + +## Evidence + +- Disasm prolog at `0x824F7800`: `mflr r12; bl 0x825F0F60 (frame helper); stwu r1, -336(r1); mr r22, r3; ...` — standard normal-callee prolog. NOT MSVC EH-handler shape (no `subi r31, r12, N`). +- Function size: 1232 bytes / 308 insns. `has_eh=False`, `frame_size=336`. +- Static caller xref: 1 — `bl` from PC `0x824F8314` inside [sub_824F7CD0](sub_824F7CD0.md). No other refs (only `.pdata` entry at file offset `0x1347B0` — standard unwind metadata). +- AUDIT-064 canary 60s probe (`--audit_61_branch_probe_pcs=0x824F7800,...`): fires 1× with `lr=0x824F8318 r3=BE568F00 r4=701CF5B0 r5=BCA44D40 r6=BCA44DE0` on tid=6. Reproduced bit-identical at 120s and 180s wallclock. +- AUDIT-064 ours `--ctor-probe=0x824F7800` -n 500M: **0 fires**. +- The `bctrl` at PC `0x824F7B20` (= `sub_824F7800+0x320`, slot 1 of `0x8200A208` vtable) is where [sub_825070F0](sub_825070F0.md) is dispatched from. + +## Activation + +Direct `bl` from `sub_824F7CD0+0x644` (PC `0x824F8314`). Both engines see the same single static caller. + +## Static graph + +- Static callers (from `xrefs.source_func`): + - PC `0x824F8314` inside `sub_824F7CD0` (the only caller). +- Callees include the `bctrl` at PC `0x824F7B20` that dispatches to `sub_825070F0` via vtable slot 1 of `ANON_Class_713383D7` (vtable `0x8200A208`). + +## Audit log + +- **AUDIT-064 (2026-05-12)** — disasm confirms normal-callee prolog (refutes "another EH handler" hypothesis). Canary probe fires 1× / ours 0×. Static-DB caller is the runtime caller (no surprise bctrl divergence here). The chain runs downstream of [sub_822F1AA8](sub_822F1AA8.md)'s vtable[0] dispatch through sub_82173990 — which waits on tid=13 — so ours never reaches it because tid=13 is blocked on the AUDIT-049 wedge. [confirmed] +- **AUDIT-058 (2026-05-10)** — flagged as part of the static caller ladder for sub_825070F0. [confirmed at this level; ladder framing partially preserved — see sub_821B6DF4 for the EH-thunk caveat one step further up] + +## Open questions + +- Why does the bctrl at `0x824F7B20` always dispatch to `sub_825070F0` (slot 1 of vtable `0x8200A208`) at this point? Investigate where the `r3` instance pointer comes from — likely a class member loaded via the slot-1 ctor path of `ANON_Class_713383D7`. +- The 4-fn linear chain (`sub_824F8398 → sub_824F7CD0 → sub_824F7800 → bctrl`) is rigid and runs end-to-end without branching in canary. Confirm no early-exit branches inside the chain in ours (irrelevant if we resolve the audit-049 wedge first). + +## Cross-references + +- Callees: `sub_825070F0` via slot 1 of vtable `0x8200A208` at `bctrl` PC `0x824F7B20`. +- Callers: `sub_824F7CD0+0x644`. +- Audits: 058, 064. +- Artifacts: `audit-runs/audit-064-activation-ladder/canary-{60,120,180}s.log`, `audit-runs/audit-064-activation-ladder/ours-500M.stdout`. diff --git a/docs/functions/sub_824F7CD0.md b/docs/functions/sub_824F7CD0.md new file mode 100644 index 0000000..fc3b6c0 --- /dev/null +++ b/docs/functions/sub_824F7CD0.md @@ -0,0 +1,49 @@ +--- +address: 0x824F7CD0 +classification: normal_callee +confidence: high +last_audit: 064 +aliases: + - "AUDIT-058 caller-ladder fn #3" +--- + +# sub_824F7CD0 — middle of sub_825070F0 activation chain + +## Synopsis + +Normal callee in the linear 4-fn activation chain ending at [sub_825070F0](sub_825070F0.md). Calls `sub_824F7800` at PC `0x824F8314`. Has a 4-way computed `bctr` switch table near its entry (PCs `0x824F7D00..0x824F7D34` — a jump-table dispatch on `[r31+0]-1` for values 1..4). AUDIT-064 verified canary fires 1× at ~60s wallclock; ours fires 0×. + +## Evidence + +- Disasm prolog at `0x824F7CD0`: `mflr r12; bl 0x825F0F68; stwu r1, -256(r1); ...` — standard normal-callee prolog. NOT MSVC EH-handler shape. +- Function size: 1736 bytes / 434 insns. `has_eh=False`, `frame_size=256`. +- Static caller xref: 1 — `bl` from PC `0x824F83D4` inside [sub_824F8398](sub_824F8398.md). +- Computed jump-table at `0x824F7D10..0x824F7D24`: `lis r12, 0x824F; addi r12, r12, 32040; slwi r0, r11, 2; lwzx r0, r12, r0; mtctr r0; bctr` — 4-way switch on argument. Targets at `0x824F7D28/2C/30/34/...` are jump-table data, NOT call edges. +- AUDIT-064 canary 60s probe: fires 1× with `lr=0x824F83D8 r3=BE568F00 r4=701CF5B0 r5=701CF658 r6=03A72328` on tid=6. Reproduced bit-identical at 120s and 180s. +- AUDIT-064 ours `--ctor-probe=0x824F7CD0` -n 500M: **0 fires**. + +## Activation + +Direct `bl` from `sub_824F8398+0x3C` (PC `0x824F83D4`). + +## Static graph + +- Static callers (from `xrefs.source_func`): + - PC `0x824F83D4` inside `sub_824F8398`. +- Callees include `sub_824F7800` (PC `0x824F8314`), `sub_824FD230`, `sub_824FD240`, `sub_824FC498`, `sub_824FCC18`, and others. + +## Audit log + +- **AUDIT-064 (2026-05-12)** — disasm confirms normal-callee + 4-way computed jump-table near entry. Canary fires 1× / ours 0×. Single static caller is the actual runtime caller. Chain blocks upstream at the audit-049 wedge (tid=13 thread-join wait on handle 0x12A4). [confirmed] +- **AUDIT-058 (2026-05-10)** — flagged as part of the ladder. [confirmed] + +## Open questions + +- The 4-way switch at `0x824F7D10..0x824F7D34`: which jump-table entry corresponds to the path that calls `sub_824F7800`? Disasm shows `lwz r11, 0(r31); subi r11, r11, 1; cmplwi cr6, r11, 0x3; bgt cr6, 0x824F80E4` — so input `r4` (saved to r31) must be 1..4 to enter switch. Canary's r4 was `0x701CF5B0` (a stack ptr), so the value at `[stack]` indexes the switch. + +## Cross-references + +- Callees: `sub_824F7800`, `sub_824FD230/40`, `sub_824FC498`, `sub_824FCC18`. +- Callers: `sub_824F8398+0x3C`. +- Audits: 058, 064. +- Artifacts: `audit-runs/audit-064-activation-ladder/canary-{60,120,180}s.log`, `audit-runs/audit-064-activation-ladder/ours-500M.stdout`. diff --git a/docs/functions/sub_824F8398.md b/docs/functions/sub_824F8398.md new file mode 100644 index 0000000..bc4f61c --- /dev/null +++ b/docs/functions/sub_824F8398.md @@ -0,0 +1,48 @@ +--- +address: 0x824F8398 +classification: normal_callee +confidence: high +last_audit: 064 +aliases: + - "AUDIT-058 caller-ladder fn #4 (tiny adapter, 20 insns)" +--- + +# sub_824F8398 — 20-insn adapter to sub_824F7CD0 + +## Synopsis + +Tiny 20-insn normal-callee adapter. Zeros a stack buffer (`std r9, 0(r11)` × 10 unrolled via `bdnz`), sets `[r1+80]=1` and `[r1+112]=r8` (its r4 argument), then calls `sub_824F7CD0` with `r3` passed through and `r4=&stack_buf+80`. Essentially a 2-arg→1-arg adapter that constructs a 36-byte stack-record before dispatching. AUDIT-064 verified canary fires 1× at ~60s wallclock; ours fires 0×. + +## Evidence + +- Disasm: `mflr r12; stw r12, -8(r1); stwu r1, -160(r1); mr r8, r4; addi r11, r1, 80; li r9, 0; li r10, 9; mtctr r10; std r9, 0(r11); addi r11, r11, 8; bdnz 0x824F83B8; li r11, 1; stw r8, 112(r1); addi r4, r1, 80; stw r11, 80(r1); bl 0x824F7CD0; addi r1, r1, 160; lwz r12, -8(r1); mtlr r12; blr` — clear normal-callee, no EH. +- Function size: 80 bytes / 20 insns. `has_eh=False`. +- Static caller xref: 1 — `bl` from PC `0x821B5B5C` inside [sub_821B55D8](sub_821B55D8.md). +- Stack buffer at `[r1+80]..[r1+112]` is 36 bytes (9 × 8-byte zero + first u32=1 + last u32=r8). +- AUDIT-064 canary 60s probe: fires 1× with `lr=0x821B5B60 r3=BE568F00 r4=BC369380 r5=701CF658 r6=03A72328` on tid=6. Reproduced bit-identical at 120s and 180s. +- AUDIT-064 ours `--ctor-probe=0x824F8398` -n 500M: **0 fires**. + +## Activation + +Direct `bl` from `sub_821B55D8+0x584` (PC `0x821B5B5C`). + +## Static graph + +- Static callers: PC `0x821B5B5C` inside `sub_821B55D8`. +- Callees: `sub_824F7CD0` (PC `0x824F83D4`). + +## Audit log + +- **AUDIT-064 (2026-05-12)** — disasm confirms tiny adapter (20 insns). Canary fires 1× / ours 0×. The size is small enough to inline; possibly an MSVC compiler artifact. [confirmed] +- **AUDIT-058 (2026-05-10)** — flagged as part of the ladder. [confirmed] + +## Open questions + +- What does the constructed stack-record (`[1, 0, 0, 0, 0, 0, 0, 0, 0, r8]`) represent semantically? Likely a state-machine init record passed by reference to `sub_824F7CD0`'s 4-way switch. + +## Cross-references + +- Callees: `sub_824F7CD0`. +- Callers: `sub_821B55D8+0x584`. +- Audits: 058, 064. +- Artifacts: `audit-runs/audit-064-activation-ladder/canary-{60,120,180}s.log`. diff --git a/docs/functions/sub_825070F0.md b/docs/functions/sub_825070F0.md new file mode 100644 index 0000000..ecddd28 --- /dev/null +++ b/docs/functions/sub_825070F0.md @@ -0,0 +1,63 @@ +--- +address: 0x825070F0 +classification: vtable_method +confidence: high +last_audit: 067 +aliases: + - "ANON_Class_713383D7 vtable slot 1" + - "AUDIT-057 top missing-thread spawner" +--- + +# sub_825070F0 — ANON_Class_713383D7 vtable slot 1 (worker spawner) + +## Synopsis + +Slot 1 of class `ANON_Class_713383D7` vtable (located at `0x8200A208` and clone at `0x8200A928`). When invoked, initializes 4 worker threads with shared context `r3=0xBCE25340` (canary). The thread entry points are `0x82506528 / 0x82506558 / 0x82506588 / 0x825065B8`. In canary, this fn fires 1× at ~60s wallclock immediately after `DiscImageDevice::ResolvePath(\\dat\\movie)` (post-intro file open). In ours, it fires 0× at any horizon probed so far. + +## Evidence + +- AUDIT-058 Linux Debug canary: fires 1× at ~60s wallclock with `pc=0x825070F0 lr=0x824F7B24 r3=BCE25340 r4=701CF3C0 r5=BCE25AC0`. +- AUDIT-060 Probe C-Win Windows Debug canary: same probe (`--log_lr_on_pc=0x825070F0`, 90s) → 1 fire, `lr=0x824F7B24` — **bit-identical to Linux Debug**, validating the new Wine canary oracle. +- LR `0x824F7B24` resolves to inside `sub_824F7800+0x24` — the vtable `bctrl` dispatch site. +- Class `ANON_Class_713383D7` lives at vtables `0x8200A208` (and clone `0x8200A928`); both are 7-method tables. Slot 1 is this fn. **Zero recorded vptr_writes in DB** — the ctor that writes this vtable is in an unreachability island OR is a computed-store-only ctor. +- **AUDIT-067 (2026-05-12)** strengthens this: **zero `vptr_writes`, zero `xrefs`, zero u32-byte occurrences of `0x8200A208`/`0x8200A928` in the `.pe` file, zero `addis+addi/ori` pairs materializing the value**. Runtime mem-watch of all 16 guest store opcodes (`stw`/`std`/`stwx`/`stwbrx`/`stwcx.`/`stmw`/`stvx`/`stvewx`/etc.) for 211 s wallclock in canary produces **0 hits** for these values — though `sub_825070F0` itself fires 1× at ~25 s wallclock with `*r3 = 0x8200A208` implicit at the bctrl. The install is **host-side**, not guest-side. +- AUDIT-057 named this as the top missing-thread spawner: 4 missing thread spawns in ours. + +## Activation + +Vtable dispatch from `sub_824F7800+0x24 bctrl` (slot 1 of vtable `0x8200A208`). **AUDIT-064 fully classified the ladder**: `sub_824F7800`, `sub_824F7CD0`, `sub_824F8398`, `sub_821B55D8` are ALL `normal_callee` (NOT EH thunks). Only `sub_821B6DF4` is the EH catch-handler; it's a secondary entry path, not the runtime activation route. + +**Full runtime activation chain (in canary; identified by AUDIT-064 via lr-resolution at each fire)**: tid=1 `entry_point → sub_8216EA68 → sub_822F1AA8` (post-init dispatcher) → `bctrl vtable[0] of *(0x828E1F08)` → `sub_82175330` (2-insn thunk) → tail-jump → `sub_82173990` → … → `sub_821741C8` → `sub_82172BA0` (array-walk dispatcher) → `bctrl vtable[6]` → `sub_821B55D8` → `sub_824F8398` → `sub_824F7CD0` → `sub_824F7800` → `bctrl vtable[1]` → `sub_825070F0`. + +**Wedge in ours (AUDIT-064)**: tid=1 successfully enters `sub_822F1AA8`, reaches the bctrl at `0x822F1B4C`, dispatches to `sub_82175330` → `sub_82173990` → blocks at `sub_82173990+0x2D0` on `KeWaitForSingleObject` INFINITE on handle `0x12A4` = tid=13's thread handle. Tid=13 itself is blocked on the AUDIT-049 wedge (event 0x12AC inside the audit-009 cluster). The 5-fn ladder downstream of `sub_82172BA0` is NEVER reached because tid=1 hasn't returned from the thread-join wait. + +## Static graph + +- Static callers (direct `bl`): 0 — it's reached only via `bctrl`. The `bctrl` site is `0x824F7B20`. +- Callees: spawns 4 worker threads via `ExCreateThread` (or equivalent) with entries `0x82506528/58/88/B8`. + +## Audit log + +- **Phase Non-match Investigation (2026-05-19)** — Phase A `thread.create` events directly corroborate the AUDIT-058 framing using **runtime** evidence (previously only static + ctor-probe). Canary cold trace `canary-jitter-1.jsonl` (4.4 GB, 18.7M events) contains EXACTLY 4 `thread.create` events at `host_ns = 10.382912900 / 10.383282200 / 10.383647200 / 10.384161700` (spaced ~370–500 ns apart on tid=6 = guest main) with entries `0x82506528 / 0x82506558 / 0x82506588 / 0x825065B8`, shared `ctx_ptr=0xBCE251C0`, stack=65,536, `suspended=true`, affinity=0. These match the dossier's listed worker entries 1:1 and are bit-identical-in-structure to the AUDIT-058 fire (modulo `ctx_ptr` arena drift: AUDIT-058 cited `0xBCE25340`, this jitter sample has `0xBCE251C0` — both inside the `0xBCE25xxx` arena allocated by the same fn). FIFO-matched child tids: `0x82506528 → tid=28` (3.26M events, file IO + heavy RtlEnterCS), `0x82506558 → tid=27` (36k events), `0x82506588 → tid=29` (91k events), `0x825065B8 → never started` in the 90s window. Same canary-vs-ours digest comparison shows ours-postfix.jsonl has **0 occurrences** of `0xBCE251C0` and **0 thread.create events** after spawn #10 (1.727 s). The full set of static-analysis-invisible properties (0 vptr_writes, 0 xrefs, 0 indirect_dispatch_candidates targeting vtables `0x8200A208` / `0x8200A928`) was re-verified against current sylpheed.db — AUDIT-067's conclusion stands. New artifacts at `audit-runs/phase-nonmatch-investigation/`. **Recommended next probe**: AUDIT-068 host-side mem-watch was deferred — re-attempt now with Phase A event correlation (the 10.382 s spawn burst is the precise wall-clock window to hook). [confirmed runtime; framing intact] +- **AUDIT-067 (2026-05-12)** — runtime mem-watch via new canary cvar `audit_67_value_watch` (~422 LOC additive instrumentation; default-empty / zero-cost; kept in canary tree per policy). Hooked **all 16 store opcodes**: `stw`, `stwu`, `stwx`, `stwux`, `stwbrx`, `stwcx.`, `stmw`, `std`, `stdu`, `stdux`, `stdx`, `stdbrx`, `stdcx.`, `stvx`/`stvxl`/`128`, `stvewx`/`128`. Each hook emits `CompareEQ(val32, watch) → TrapTrue(_,250+idx)`; trap handler logs `pc/lr/val/dst/regs/tid`. Sanity test with `watch=0x00000000` → **103,321 hits in 30s** (instrumentation verified). Main run `watch=0x8200A208,0x8200A928` for **211 s wallclock**: **0 hits** despite `AUDIT-061-BR pc=0x825070F0` firing 1× with `r3=0xBCE25340 r4=0x701CF3C0 r5=0xBCE25AC0` (bit-identical to AUDIT-058/060). **CONCLUSION**: the vtable address `0x8200A208` is never stored to guest memory via any guest PowerPC store opcode in canary — the install is **host-side** (most likely a kernel-import direct memory write via `xe::store_and_swap(memory + addr, val)`, OR an XEX loader operation, OR a `RtlCopyMemory`-style host helper). Path A (static binary search) also yielded 0 matches: no `vptr_writes`, no `xrefs`, no `addis`+`addi/ori` pair (with or without mr-chain register propagation) materializing the value, no u32 occurrence anywhere in the `.pe` file. **Reading-error #19**: assumption that meaningful guest-memory writes go through guest PPC code is false — kernel imports and the image loader perform direct host writes that bypass the JIT. AUDIT-068 must hook at the `Memory::write*` / `store_and_swap<*>` level instead. [confirmed — negative result; structural finding] +- **AUDIT-064 (2026-05-12)** — classified all 4 unclassified ladder fns: `sub_824F7800`, `sub_824F7CD0`, `sub_824F8398`, `sub_821B55D8` — **all 4 are normal_callee**, NOT EH thunks (refutes the worst-case hypothesis from AUDIT-060 that the whole chain might be EH metadata). Probed canary at 60s/120s/180s — all 4 fire 1× each, bit-identical context. Walked upward: real runtime caller of `sub_821B55D8` is `sub_82172BA0+0x1E8 bctrl` (PC `0x82172D88`), NOT the static-DB-listed `sub_821B6DF4` EH branch. **Identified the full upstream activation chain**: tid=1 entry_point → `sub_8216EA68` → [`sub_822F1AA8`](sub_822F1AA8.md) → vtable[0] of `*(0x828E1F08)` = `sub_82175330` (2-insn thunk) → tail-jump to `sub_82173990` → … (canary continues through `sub_821741C8` → [`sub_82172BA0`](sub_82172BA0.md) → vtable[6]=[`sub_821B55D8`](sub_821B55D8.md) → [`sub_824F8398`](sub_824F8398.md) → [`sub_824F7CD0`](sub_824F7CD0.md) → [`sub_824F7800`](sub_824F7800.md) → bctrl → `sub_825070F0`). **First divergence in ours**: tid=1 enters `sub_82173990` via the vtable[0] dispatch but blocks at `sub_82173990+0x2D0 bl 0x824AA330` (KeWaitForSingleObject INFINITE) on handle `0x12A4` = tid=13's thread handle. This is the **same AUDIT-049 wedge**: tid=13 itself is blocked on handle `0x12AC` waiting for the audit-009-cluster signal. Activation of sub_825070F0 is gated on resolving the tid=13 wait, NOT on any divergence in the ladder fns themselves. [confirmed] +- **AUDIT-060 (2026-05-12)** — verified canary fire reproduces under Windows Debug oracle. Caller chain caveat added: `sub_821B6DF4` ladder-top is EH, not normal call edge. Other ladder fns need individual classification. [confirmed for canary fire; caveat on the upstream chain] +- **AUDIT-058 (2026-05-10)** — captured canary fire context, walked static caller ladder, found all 6 ladder fns fire 0× in ours. Concluded "activation phase doesn't activate in ours". [STATUS: ladder framing partially falsified by AUDIT-060 — at least `sub_821B6DF4` is EH; the *real* gate is the AUDIT-056 sub_821C4EB0 throughput gap, upstream.] +- **AUDIT-057 (2026-05-10)** — flagged as top missing-thread spawner (4 of 13 missing thread spawns). [confirmed quantitatively] + +## Open questions + +- What spawns the 4 worker threads exactly? Disassemble body. The threads have entries `0x82506528/58/88/B8` — are these consecutive 0x30-byte stubs that all forward to a common worker fn? +- What class instance triggers the slot-1 dispatch? Is it a `silph::GamePart_Title` instance? The wallclock context (post-`\\dat\\movie` ResolvePath) suggests so. +- **(AUDIT-067 result)** What host-side mechanism installs `0x8200A208` at `0xBCE25340+0`? Candidates: `xboxkrnl_rtl*` direct-write helpers (`RtlCopyMemory`/`RtlFillMemory`/`RtlInitializeCriticalSection` etc.), XEX loader image-rewrites, or kernel-import factory helpers. Next probe: AUDIT-068 host-side mem-watch — hook `Memory::write*` and/or `xe::store_and_swap<*>` in canary. + +## Cross-references + +- Vtable: `0x8200A208` (primary), `0x8200A928` (clone), class `ANON_Class_713383D7`, slot 1. +- Dispatch site: `sub_824F7800+0x20 bctrl` (PC `0x824F7B20`); post-bctrl PC `0x824F7B24`. +- Worker thread entries spawned: `0x82506528, 0x82506558, 0x82506588, 0x825065B8`. +- **Real runtime activation chain (AUDIT-064)**: `tid=1 entry_point → sub_8216EA68 → [sub_822F1AA8](sub_822F1AA8.md) → bctrl vtable[0]={sub_82175330 tail-jump→sub_82173990} → … → sub_821741C8 → [sub_82172BA0](sub_82172BA0.md) → bctrl vtable[6] → [sub_821B55D8](sub_821B55D8.md) → [sub_824F8398](sub_824F8398.md) → [sub_824F7CD0](sub_824F7CD0.md) → [sub_824F7800](sub_824F7800.md) → bctrl vtable[1] → sub_825070F0`. +- **Wedge in ours**: tid=1 blocks at `sub_82173990+0x2D0` on KeWaitForSingleObject(handle=0x12A4 = tid=13's thread handle); tid=13 itself blocks at `sub_821CB030+0x128`-created event 0x12AC — AUDIT-049 wedge. +- Old static-DB ladder (AUDIT-058, partly EH): `sub_824F7800 ← sub_824F7CD0 ← sub_824F8398 ← sub_821B55D8 ← [sub_821B6DF4](sub_821B6DF4.md) (EH catch-handler — secondary EH-only entry path)`. +- Audits: 057, 058, 060, 064, 067. +- Artifacts: `audit-runs/audit-058-sub825070F0-activation/`, `audit-runs/audit-060-fnptr-array-bootstrap/canary-sanity-825070F0.log`, `audit-runs/audit-064-activation-ladder/`, `audit-runs/audit-067-vptr-install-mem-watch/`. diff --git a/tools/diff-events/README.md b/tools/diff-events/README.md new file mode 100644 index 0000000..834525b --- /dev/null +++ b/tools/diff-events/README.md @@ -0,0 +1,73 @@ +# diff_events.py — Phase A event-log diff tool + +A stdlib-only Python tool that diffs two schema-v1 JSONL event logs (one per engine) and reports the **first behavioral divergence per guest thread**. Built for the Phase A diff harness — see `audit-runs/phase-a-diff-harness/README.md` and `schema-v1.md`. + +## What it does + +1. Reads two JSONL files. Validates each begins with a `schema_version=1` header event. +2. Builds per-thread streams keyed by `tid_event_idx` (the schema's per-tid monotonic counter). +3. Maps canary-tid ↔ ours-tid (auto-pairs by first `kernel.call` name in each stream, or manual via `--tid-map`). +4. Walks each mapped pair in parallel, comparing events with rules from the schema (raw_handle_id skipped, host_ns skipped, wait_duration_cycles skipped, etc.). +5. On first divergence: prints 5-event pre-context + the divergent event + the next event from each. Stops that thread's walk. +6. Writes a markdown report. + +## Usage + +```bash +# Default — auto-map tids, write markdown to stdout +python3 diff_events.py --canary canary.jsonl --ours ours.jsonl + +# Write report to a file +python3 diff_events.py --canary c.jsonl --ours o.jsonl --out report.md + +# Manual tid map +python3 diff_events.py --canary c.jsonl --ours o.jsonl --tid-map 6=1,7=2 + +# Negative-test mode — exit non-zero on ANY divergence (gate-4) +python3 diff_events.py --canary c.jsonl --ours o.jsonl --validate-identical +``` + +## How it compares + +These fields are **skipped** when comparing payloads: +- Top-level: `engine`, `host_ns`, `guest_cycle`, `deterministic`. +- `handle.create`/`handle.destroy`: `raw_handle_id`, `handle_semantic_id` (engine-local). +- `wait.begin`: `handles_semantic_ids` (engine-local SIDs). +- `wait.end`: `wait_duration_cycles` (depends on host scheduling), `woken_by_semantic_id`. + +The `tid_event_idx` field is the **alignment key**. Two events at the same `tid_event_idx` on a mapped pair of tids are expected to be the same logical event. The `kind` must match; the `payload` must match field-by-field (except skipped fields). + +## Phase C+18 — Cross-tid floating `handle.create` (shared-global dispatchers) + +Process-global kernel dispatcher objects (`KEVENT`/`KSEMAPHORE` etc. that game code creates with `KeInitializeEvent` or static-allocs and shares across multiple guest threads) are lazy-wrapped on **first guest-thread touch** by canary's `XObject::GetNativeObject` and ours's `ensure_dispatcher_object`. Whichever thread happens to touch the dispatcher first synthesizes the wrapper and emits the `handle.create` event. Which thread wins is timing-dependent — canary and ours may disagree. + +The SID for these synthesized handles is computed via a **scheduling-invariant recipe** keyed on `(pointer, object_type)` only (see schema-v1.md §"Shared-global SIDs"). The same dispatcher therefore yields the same SID in both engines regardless of the first-toucher thread. + +The diff tool detects shared-global `handle.create` events by recomputing the deterministic SID from the event's `(raw_handle_id, object_type)` payload and matching against the emitted `handle_semantic_id`. When per-tid alignment finds one side has an "extra" `handle.create` event whose SID is in the global set, the tool **advances only that side's stream pointer past the floating event** and re-compares — preserving strict alignment for everything else. + +The summary table shows per-pair `floating_skipped (c/o)` counts so you can see how many events were absorbed by this mechanism. + +## Known limitations (v1) + +- **Auto tid-map is naive**: pairs canary-tid with ours-tid by the first `kernel.call` name on each thread. Works for boot when the same initial call happens on each engine's primary thread; can mis-pair if two threads start with the same first-call name or if a thread spawns earlier on one engine. Use `--tid-map` to override. +- **No streaming**: loads both files fully into memory. Acceptable for boot-window runs; the canary log is ~370 MB for a 12 s run. +- **First-divergence only**: per-thread walk stops at first divergence. Subsequent divergences on the same thread are not reported (a sliding-window mode could be added later if needed). +- **Schema v1 only**: refuses to parse v2 inputs (forward-incompat is intentional). + +## Files + +- `diff_events.py` — single-file CLI, stdlib only (json, argparse, pathlib). +- `README.md` — this file. + +## Test it + +```bash +# Self-diff (compare a file against itself) should report 0 divergences. +python3 diff_events.py --canary x.jsonl --ours x.jsonl --validate-identical +echo "exit=$?" # expect 0 + +# Negative test: corrupt one event and confirm the tool reports it. +sed '50s/"kernel.call"/"kernel.CORRUPT"/' x.jsonl > /tmp/x-corrupt.jsonl +python3 diff_events.py --canary x.jsonl --ours /tmp/x-corrupt.jsonl --validate-identical +echo "exit=$?" # expect 1 +``` diff --git a/tools/diff-events/__pycache__/build_contention_manifest.cpython-312.pyc b/tools/diff-events/__pycache__/build_contention_manifest.cpython-312.pyc new file mode 100644 index 0000000..9e833a1 Binary files /dev/null and b/tools/diff-events/__pycache__/build_contention_manifest.cpython-312.pyc differ diff --git a/tools/diff-events/__pycache__/diff_events.cpython-312.pyc b/tools/diff-events/__pycache__/diff_events.cpython-312.pyc new file mode 100644 index 0000000..1558f9c Binary files /dev/null and b/tools/diff-events/__pycache__/diff_events.cpython-312.pyc differ diff --git a/tools/diff-events/__pycache__/test_diff_events.cpython-312.pyc b/tools/diff-events/__pycache__/test_diff_events.cpython-312.pyc new file mode 100644 index 0000000..cb43b01 Binary files /dev/null and b/tools/diff-events/__pycache__/test_diff_events.cpython-312.pyc differ diff --git a/tools/diff-events/build_contention_manifest.py b/tools/diff-events/build_contention_manifest.py new file mode 100644 index 0000000..05cff12 --- /dev/null +++ b/tools/diff-events/build_contention_manifest.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +"""Phase D Stage 2 — contention-manifest builder. + +Reads a Phase A JSONL event log produced by canary with cvar +`kernel_emit_contention=true` (Stage 1) and distills it to a +replay-ready manifest for Stage 3 to consume. + +Output schema (`contention_manifest.json`): + + { + "version": 1, + "source_canary_jsonl": "", + "source_canary_sha256": "", + "built_at_host_unix": , + "summary": { + "total_input_events": , + "total_contention_events_kept": , + "per_tid_counts": { "": , ... } + }, + "entries": [ + { "tid": 6, "tid_event_idx": 104664, "site_sid": "c26a128bf45411f7", + "cs_ptr": "0xbc65c890", "contended": true }, + ... + ] + } + +Entries are sorted by (tid asc, tid_event_idx asc). Stage 3's ours-side +replay loader keys on `(tid, tid_event_idx)`; the canary tid is the +*native* tid emitted by canary (no display-mapping is applied here — +see investigation.md §"Tid mapping is per-engine native"). + +Only events with `kind == "contention.observed"` and `contended == true` +are kept. Stage 1's emitter never emits `contended=false`, so this +filter is paranoid-defensive. Schema events / handle events / wait +events are dropped. + +Usage: + + python3 build_contention_manifest.py \\ + --canary-jsonl path/to/canary-cvaron-trunc.jsonl \\ + --out path/to/contention_manifest.json + +Exit 0 on success. Exit 1 on parse error or empty manifest (no +contention events found — likely cvar wasn't enabled when the trace +was captured). +""" +import argparse +import hashlib +import json +import sys +import time +from pathlib import Path + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description=__doc__.splitlines()[0]) + p.add_argument( + "--canary-jsonl", + required=True, + help="Path to canary Phase A JSONL log (with cvar=true).", + ) + p.add_argument( + "--out", + required=True, + help="Output path for contention_manifest.json.", + ) + p.add_argument( + "--tid-map", + default="", + help=( + "Optional canary→ours tid translation. Format " + "'CANARY=OURS,CANARY=OURS,...' (e.g. '6=1,7=2,4=11'). When " + "supplied, manifest entries are emitted with the ours-side tid " + "so the Stage-3 consumer can key on its own native current_tid. " + "Entries on a canary tid NOT in the map are dropped with a " + "warning. Same format as diff_events.py." + ), + ) + p.add_argument( + "--quiet", + action="store_true", + help="Suppress the human-readable summary on stderr.", + ) + return p.parse_args() + + +def parse_tid_map(s: str) -> dict[int, int] | None: + """Parse 'a=b,c=d' into {a: b, c: d}. Empty/None → None.""" + s = s.strip() + if not s: + return None + out: dict[int, int] = {} + for piece in s.split(","): + piece = piece.strip() + if not piece: + continue + if "=" not in piece: + raise ValueError(f"bad tid-map fragment: {piece!r}") + l, r = piece.split("=", 1) + out[int(l.strip())] = int(r.strip()) + return out + + +def sha256_of(path: Path) -> str: + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(1 << 20), b""): + h.update(chunk) + return h.hexdigest() + + +def build_manifest( + jsonl_path: Path, + tid_map: dict[int, int] | None = None, +) -> dict: + """Read `jsonl_path` and return a manifest dict. + + If `tid_map` (canary_tid → ours_tid) is provided, entries are written + with the translated ours-side tid. Entries on a canary tid not in + the map are dropped (counted in `summary.skipped_unmapped_tids`). + When `tid_map` is None, manifest tids are canary's native values + (back-compat with Stage 2's first iteration). + + Raises FileNotFoundError / json.JSONDecodeError on bad input. + """ + entries: list[dict] = [] + total_input = 0 + bad_lines = 0 + unmapped = 0 + with jsonl_path.open("r", encoding="utf-8") as f: + for lineno, line in enumerate(f, start=1): + line = line.rstrip("\n") + if not line: + continue + total_input += 1 + try: + ev = json.loads(line) + except json.JSONDecodeError: + bad_lines += 1 + continue + if ev.get("kind") != "contention.observed": + continue + payload = ev.get("payload") or {} + if payload.get("contended") is not True: + continue + canary_tid = int(ev["tid"]) + if tid_map is not None: + if canary_tid not in tid_map: + unmapped += 1 + continue + tid = tid_map[canary_tid] + else: + tid = canary_tid + entry = { + "tid": tid, + "tid_event_idx": int(ev["tid_event_idx"]), + "site_sid": str(payload.get("site_sid", "")), + "cs_ptr": str(payload.get("cs_ptr", "")), + "contended": True, + } + # Defensive: every Stage 1 event carries cs_ptr + site_sid. + # If either is missing, skip rather than emit a broken entry. + if not entry["site_sid"] or not entry["cs_ptr"]: + bad_lines += 1 + continue + entries.append(entry) + + # Stable sort by (tid, tid_event_idx). Same (tid, idx) pair is not + # expected — the per-tid counter is monotone — but if duplicates + # appear (e.g. mis-merged jsonls), keep the first; later phases would + # otherwise see ambiguous manifest keys. + entries.sort(key=lambda e: (e["tid"], e["tid_event_idx"])) + deduped: list[dict] = [] + seen: set[tuple[int, int]] = set() + dup_count = 0 + for e in entries: + key = (e["tid"], e["tid_event_idx"]) + if key in seen: + dup_count += 1 + continue + seen.add(key) + deduped.append(e) + + per_tid: dict[str, int] = {} + for e in deduped: + per_tid[str(e["tid"])] = per_tid.get(str(e["tid"]), 0) + 1 + + return { + "version": 1, + "source_canary_jsonl": str(jsonl_path.resolve()), + "source_canary_sha256": sha256_of(jsonl_path), + "built_at_host_unix": int(time.time()), + "tid_map": tid_map, + "summary": { + "total_input_events": total_input, + "total_contention_events_kept": len(deduped), + "skipped_bad_lines": bad_lines, + "skipped_unmapped_tids": unmapped, + "skipped_duplicate_keys": dup_count, + "per_tid_counts": per_tid, + }, + "entries": deduped, + } + + +def render_summary(manifest: dict) -> str: + s = manifest["summary"] + lines = [ + f"contention manifest built from {manifest['source_canary_jsonl']}", + f" source sha256: {manifest['source_canary_sha256']}", + f" total input events scanned: {s['total_input_events']}", + f" contention events kept: {s['total_contention_events_kept']}", + f" bad/skipped lines: {s['skipped_bad_lines']}", + f" duplicate (tid,idx) skipped: {s['skipped_duplicate_keys']}", + " per-tid counts:", + ] + for tid, count in sorted(s["per_tid_counts"].items(), + key=lambda kv: int(kv[0])): + lines.append(f" tid={int(tid):4d} {count}") + return "\n".join(lines) + + +def main() -> int: + args = parse_args() + src = Path(args.canary_jsonl) + if not src.is_file(): + print(f"error: not a file: {src}", file=sys.stderr) + return 1 + try: + tid_map = parse_tid_map(args.tid_map) + except ValueError as e: + print(f"error: --tid-map: {e}", file=sys.stderr) + return 1 + manifest = build_manifest(src, tid_map=tid_map) + if manifest["summary"]["total_contention_events_kept"] == 0: + print( + "error: 0 contention.observed events found — was the trace " + "captured with --kernel_emit_contention=true?", + file=sys.stderr, + ) + return 1 + out = Path(args.out) + out.parent.mkdir(parents=True, exist_ok=True) + with out.open("w", encoding="utf-8") as f: + json.dump(manifest, f, indent=2) + f.write("\n") + if not args.quiet: + print(render_summary(manifest), file=sys.stderr) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/diff-events/diff_events.py b/tools/diff-events/diff_events.py new file mode 100644 index 0000000..e7de349 --- /dev/null +++ b/tools/diff-events/diff_events.py @@ -0,0 +1,1376 @@ +#!/usr/bin/env python3 +"""Phase A event-log diff tool. + +Reads two schema-v1 JSONL event logs (one per engine) and reports the +first behavioral divergence per guest-thread. Aligns streams by +`tid_event_idx`. Field-comparison rules come straight from +`audit-runs/phase-a-diff-harness/schema-v1.md` — keep both in sync. + +Usage: + diff_events.py --canary canary.jsonl --ours ours.jsonl [--out report.md] + diff_events.py --canary a.jsonl --ours b.jsonl --validate-identical + diff_events.py --canary a.jsonl --ours b.jsonl --tid-map 6=1,7=2 +""" + +import argparse +import json +import sys +from pathlib import Path + +SCHEMA_VERSION = 1 + +# Phase C+18 — Shared-global SID marker. Process-global dispatcher +# objects (canary `XObject::GetNativeObject` lazy-wrap / ours +# `ensure_dispatcher_object` first-touch synthesis) use this constant as +# the `create_site_pc` input to the FNV-1a SID computation so the SID is +# scheduling-invariant — keyed on `(marker, 0, pointer, object_type)`. +# See `event_log.rs::SHARED_GLOBAL_SID_MARKER` / `event_log.h::kSharedGlobalSidMarker` +# and schema-v1.md §"Shared-global SIDs". Both engines must use this +# exact value. +SHARED_GLOBAL_SID_MARKER = 0xC01AB005 + + +def _fnv1a_64(data: bytes) -> int: + """FNV-1a 64-bit. Identical to the engines' `semantic_id`/`ComputeSemanticId` + inner loops. Inlined here so the diff tool has no external deps.""" + h = 0xCBF29CE484222325 + for b in data: + h ^= b + h = (h * 0x100000001B3) & 0xFFFFFFFFFFFFFFFF + return h + + +def shared_global_sid(pointer: int, object_type: int) -> str: + """Compute the deterministic shared-global SID for a process-global + dispatcher (engine-agnostic). Inputs: + create_site_pc = SHARED_GLOBAL_SID_MARKER (4 bytes LE) + creating_tid = 0 (4 bytes LE) + tid_event_idx = pointer as u64 (8 bytes LE) + object_type = object_type (4 bytes LE) + Returns the lowercase 16-hex-char SID string (schema-v1 format).""" + buf = bytearray(4 + 4 + 8 + 4) + buf[0:4] = SHARED_GLOBAL_SID_MARKER.to_bytes(4, "little") + buf[4:8] = (0).to_bytes(4, "little") + buf[8:16] = (pointer & 0xFFFFFFFFFFFFFFFF).to_bytes(8, "little") + buf[16:20] = (object_type & 0xFFFFFFFF).to_bytes(4, "little") + return f"{_fnv1a_64(bytes(buf)):016x}" + + +def is_shared_global_handle_create(ev: dict) -> bool: + """Return True if `ev` is a `handle.create` whose SID matches the + deterministic shared-global recipe over the event's own + `(raw_handle_id, object_type)`. Self-consistent — independent of + cross-engine context. + + Phase C+18 emits these via `ensure_dispatcher_object` (ours) and + `XObject::GetNativeObject` (canary). Regular per-thread + `handle.create` events (file/thread/etc., allocated via + `alloc_handle_for`/`AddHandle`) use the per-(tid, idx) SID recipe + and will NOT match this check — they keep their strict per-tid + sequence alignment in the diff. + + Asymmetry note (Phase C+21): in **ours** the `raw_handle_id` is the + guest dispatcher pointer itself (so the recipe recomputes from the + payload directly). In **canary**, `EmitHandleCreateSharedGlobal` + hashes the dispatcher's guest VA but stashes + `object->handle()` (the handle-table slot, e.g. `0xf8000044`) as + `raw_handle_id`. So canary's shared-global handle.create events are + NOT self-recognizable by this recipe check. The diff tool covers + canary's side via the cross-tid usage heuristic in + `collect_shared_global_sids` — any SID that appears across multiple + tids in either engine is also treated as shared-global. + """ + if ev.get("kind") != "handle.create": + return False + p = ev.get("payload") or {} + sid = p.get("handle_semantic_id") + if not isinstance(sid, str): + return False + raw = p.get("raw_handle_id") + if not isinstance(raw, str): + return False + try: + pointer = int(raw, 16) + except ValueError: + return False + obj_type = p.get("object_type") + if not isinstance(obj_type, int): + return False + return sid == shared_global_sid(pointer, obj_type) + + +def collect_shared_global_sids( + canary_by_tid: dict, ours_by_tid: dict +) -> set[str]: + """Collect the set of SIDs that are scheduling-invariant + "shared-global" — i.e. process-global dispatchers whose creation + order and per-tid attribution are timing-dependent. The diff tool + treats these SIDs as floating across tids (cross-engine match by + SID alone, regardless of which tid happens to be the first + toucher) — see Phase C+18 (`handle.create`) and Phase C+21 + (`wait.begin`). + + The set is the UNION of: + + 1. Recipe-matching `handle.create` events: any `handle.create` + whose payload SID equals `shared_global_sid(raw_handle_id, + object_type)`. This catches ours's `ensure_dispatcher_object` + output directly (where `raw_handle_id == ptr`). It does NOT + catch canary's `EmitHandleCreateSharedGlobal` output because + canary stashes the handle-table slot id (`0xf8xxxxxx`) as + `raw_handle_id` rather than the dispatcher VA that was hashed. + + 2. Cross-tid usage heuristic: any SID that is referenced by + `handle.create` or `wait.begin` events on **two or more + distinct guest tids** in EITHER engine. Process-global + dispatchers are touched by multiple guest threads during boot + (XAudio voice-volume semaphores, shared CSes, shared KEVENTs); + per-thread SIDs by construction stay on the single creating + tid (their hash inputs include `creating_tid`). So multi-tid + SID usage is a strong shared-global signal that survives + canary's raw_handle_id asymmetry. + + Heuristic risk note: a per-thread SID referenced by another tid + via a wait would also appear cross-tid; this happens + legitimately (one thread creates, another waits) and would be + flagged as "shared-global" here. That's acceptable for the diff + tool's purpose — the floating-absorb only kicks in on KIND + MISMATCH, so true per-thread chains that match strictly on both + sides will still align correctly. The heuristic only loosens + things when one side is missing a `handle.create` or + `wait.begin` event for a cross-tid-used SID — which is exactly + the scheduling-jitter window the C+21 fix targets. + """ + sids: set[str] = set() + # Pass 1: recipe-matching handle.create events. + for evs_by_tid in (canary_by_tid, ours_by_tid): + for evs in evs_by_tid.values(): + for ev in evs: + if is_shared_global_handle_create(ev): + s = _ev_handle_create_sid(ev) + if s: + sids.add(s) + # Pass 2: cross-tid usage heuristic. + for evs_by_tid in (canary_by_tid, ours_by_tid): + sid_to_tids: dict[str, set[int]] = {} + for tid, evs in evs_by_tid.items(): + for ev in evs: + k = ev.get("kind") + p = ev.get("payload") or {} + if k == "handle.create": + s = p.get("handle_semantic_id") + if isinstance(s, str): + sid_to_tids.setdefault(s, set()).add(tid) + elif k == "wait.begin": + handles = p.get("handles_semantic_ids") or [] + for s in handles: + if isinstance(s, str): + sid_to_tids.setdefault(s, set()).add(tid) + for s, tids in sid_to_tids.items(): + if len(tids) >= 2: + sids.add(s) + return sids + + +def is_shared_global_wait_begin(ev: dict, shared_sids: set[str]) -> bool: + """Return True if `ev` is a `wait.begin` referencing AT LEAST ONE + shared-global SID (per Phase C+21). For `wait_type=all` events, + any single shared-global handle in the set is enough to classify + the wait.begin as floating — the wait itself is a process-global + dispatcher contention point that may or may not actually block + depending on host scheduling. + + See `collect_shared_global_sids` for what populates + `shared_sids`. + """ + if ev.get("kind") != "wait.begin": + return False + if not shared_sids: + return False + p = ev.get("payload") or {} + handles = p.get("handles_semantic_ids") or [] + for sid in handles: + if isinstance(sid, str) and sid in shared_sids: + return True + return False + + +# Phase D Stage 4 — event kinds that are emitted by only one engine +# under default config or carry engine-local payloads that can't be +# cross-engine compared. The diff tool advances its per-tid pointer +# past these events on EITHER side without alignment, so they +# never participate in matched-prefix. +# +# `contention.observed` (v1.4): canary emits when its +# RtlEnterCriticalSection spin loop is exhausted (cvar-gated, +# `kernel_emit_contention=true`). Ours emits from +# `rtl_enter_critical_section` when the Stage-3 manifest fires (env +# var `XENIA_CONTENTION_MANIFEST_PATH`). The payload's `cs_ptr` and +# `site_sid` use each engine's native guest VA, which differ under +# AUDIT-043 allocator ε; comparing payloads would always diverge. +# The KIND itself (one event consuming one per-tid idx slot at the +# same logical call site) is what matters, so we drop the entire +# event from the diff. +# +# `sema.release` (v1.6 — added in AUDIT-069 Session 6, the +# AUDIT-070 bridge): symmetric semaphore-release event. Both +# engines MAY emit (cvar-gated default-off in canary; runtime-flag +# gated default-off in ours). Cadence is host-scheduler-driven on +# the work-semaphore (per AUDIT-069 H3: ours under-produces by ~80% +# on the cache-thread); strict positional alignment would always +# trip on AUDIT-069's known divergence. The kind is `ENGINE_LOCAL` +# at the alignment layer — but we surface per-engine counts and +# per-handle-SID counts in the summary so future regressions are +# diff-visible. See schema-v1.md §"sema.release (v1.6)". +ENGINE_LOCAL_KINDS = {"contention.observed", "sema.release", "signal.match", "wake.requested"} + +# Kinds whose total counts are surfaced in the report summary even +# though they don't participate in matched-prefix alignment. +COUNTED_ENGINE_LOCAL_KINDS = {"sema.release"} + +# Fields the diff tool skips (engine-local or host-clock). +SKIP_TOP_FIELDS = {"engine", "host_ns", "guest_cycle", "deterministic"} +# Within a payload: skipped fields by kind (in addition to the global set). +SKIP_PAYLOAD_FIELDS_BY_KIND = { + # raw_handle_id is engine-local; the diff key is handle_semantic_id. + # Phase C+15-α: handle_semantic_id is computed via FNV-1a over + # `(create_site_pc, creating_tid, tid_event_idx_at_creation, object_type)`. + # `creating_tid` differs cross-engine (canary tid=6 maps to ours tid=1 + # etc.), so the SID is engine-local for cross-engine comparison. + # Skip the SID field at the diff layer; rely on tid_event_idx + + # object_type + payload fields to align. Same rationale as raw_handle_id. + # `parent_tid` likewise differs cross-engine. + "handle.create": {"raw_handle_id", "handle_semantic_id"}, + "handle.destroy": {"raw_handle_id", "handle_semantic_id"}, + "thread.create": {"handle_semantic_id", "parent_tid"}, + "wait.begin": {"handles_semantic_ids"}, + # wait_duration_cycles is non-deterministic (host scheduling). + "wait.end": {"wait_duration_cycles", "woken_by_semantic_id"}, +} + +# Allocator-returning kernel exports whose `kernel.return.payload.return_value` +# is a host-allocator-dependent guest VA. Canary and ours legitimately route +# allocations to different heap regions (e.g. canary `MmAllocatePhysicalMemoryEx` +# returns `0xBC220000` from `vC0000000` while ours returns `0x40105000` from +# its single user-heap region — see AUDIT-043 "ε host-allocator address-space +# divergence" and Phase B `report.md` ε-class). Comparing raw VAs would always +# diverge at the first allocator call. +# +# Canonicalization: per `(tid, export_name)` we assign a stable ordinal +# (0, 1, 2, …) to each successive `kernel.return.return_value`, replacing +# both sides' value with the sentinel string `_>` +# before payload comparison. As long as both engines call the same +# allocator the same number of times in the same order on a given thread, +# the comparison treats them as equivalent. +# +# Limitations (documented): +# * If one engine calls an allocator more times than the other, ordinals +# drift and subsequent allocator returns appear as divergences. That's +# the correct outcome — ordinal-count mismatch IS a behavioral +# divergence. +# * `payload.status` is left untouched: it's a copy of the raw VA in +# hex-string form, useful in diff context. +# * Other payload fields that happen to embed an allocator VA (e.g. a +# future `args_resolved.base_address` in a free-call) are NOT +# canonicalized — out of scope for this divergence. Extend the set +# below as new divergence classes surface. +ALLOCATOR_RETURN_FNS = frozenset( + [ + "MmAllocatePhysicalMemoryEx", + "MmAllocatePhysicalMemory", + "NtAllocateVirtualMemory", + "RtlAllocateHeap", + "MmCreateKernelStack", + # Phase C+3: `RtlImageXexHeaderField` returns either a plain + # inline value (key low byte = 0x00) OR a guest VA inside the + # in-guest XEX header copy (key low byte = 0x01 or "else"). The + # latter is host-allocator-dependent (canary's `guest_xex_header_` + # via `Memory::SystemHeapAlloc` lands in the `0x30xxxxxx` virtual- + # heap region; ours's `KernelState::heap_alloc` cursor lands in + # `0x4xxxxxxx`). Canonicalize the whole class — for inline-value + # keys the sentinel will still match per (tid, name) ordinal + # provided both engines emit the same call sequence in the same + # order. See Phase C+3 fix for the implementation parity. + "RtlImageXexHeaderField", + # Phase D D-extension follow-up: `XamNotifyCreateListener` + # returns a 64-bit identity that differs across engines — + # canary returns the sign-extended host kernel-space pointer + # of the `XamNotifyListener` object (e.g. 0xFFFFFFFFFE8E110C), + # ours returns the allocated guest handle id (e.g. 0x10E0). + # Both are stable per-(tid, name) ordinal so canonicalization + # via `` lines them up. Surface + # the underlying semantic divergence as the listener handle's + # subsequent use (`XamNotifyGetNext` etc.) rather than at the + # creation call. + "XamNotifyCreateListener", + # Phase C+25: `MmGetPhysicalAddress` is a VA→PA translator whose + # return depends on which heap region the input VA lives in. This + # is the downstream consequence of C+2's deferred Path β (canary + # has three physical heaps at vA0/vC0/vE0 routed by page size, + # ours has a single unified heap_cursor starting at 0x40000000). + # Concretely: at C+25 idx 105,112 canary returned 0x150B0000 + # (input 0xF50AF000 in `vE0000000` heap: addr - 0xE0000000 + 0x1000 + # per `PhysicalHeap::GetPhysicalAddress`, see `memory.cc:2317`), + # while ours returned 0x0ADCF000 (input ~0x4ADCF000 in unified heap, + # masked via `& 0x1FFF_FFFF` per `exports.rs:985`). Both engines' + # translations are SELF-CONSISTENT — game code passes the PA + # opaquely to GPU (`VdInitializeRingBuffer` is the very next call) + # and the GPU translates it back to a host pointer using the same + # engine's heap map. Per-(tid,name) ordinal sentinel preserves the + # opaque-pass-through semantics while exposing actual divergences + # (e.g. game-side arithmetic on the PA, or a translation-count + # mismatch). Lifting the engine-side three-physical-heaps memory + # model is the C+2 Path β deferral, out of scope for C+25 (see + # `project_phase_c2_MmAllocatePhysicalMemoryEx_2026_05_13.md`). + "MmGetPhysicalAddress", + ] +) + + +def canonicalize_allocator_returns(events_by_tid: dict) -> None: + """In-place: rewrite `payload.return_value` for every kernel.return whose + `payload.name` is in ALLOCATOR_RETURN_FNS, replacing the raw VA with + `_>`. Ordinals are per (tid, name) and assigned + in event order. + + Called on each engine's stream independently; because ordinals are + assigned deterministically by per-tid call order, equivalent streams + produce equivalent sentinels.""" + for tid, evs in events_by_tid.items(): + # name -> next ordinal to assign on this tid + counters: dict[str, int] = {} + for ev in evs: + if ev.get("kind") != "kernel.return": + continue + payload = ev.get("payload") or {} + name = payload.get("name") + if name not in ALLOCATOR_RETURN_FNS: + continue + ordinal = counters.get(name, 0) + counters[name] = ordinal + 1 + sentinel = f"" + payload["return_value"] = sentinel + # `payload.status` mirrors `return_value` as a hex string for + # allocator entries (xboxkrnl trampoline doesn't distinguish + # NTSTATUS from pointer-typed returns). Canonicalize together + # so they stay in lockstep. + if "status" in payload: + payload["status"] = sentinel + + +# Phase C+22 (v1.7) — payload-field canonicalization for host-heap-derived +# guest addresses that appear NOT as `kernel.return.return_value` but inside +# typed event payloads. These are the "second-class" allocator returns: a +# kernel call (e.g. ExCreateThread) allocates a TLS/context block via the +# host allocator, then the block's guest VA shows up in a *typed* downstream +# event (e.g. `thread.create.ctx_ptr`). The VA lives in different host-heap +# regions across engines (canary's `0xBCxxxxxx` BC physical heap vs ours's +# `0x4xxxxxxx` unified user heap — same AUDIT-043 ε class as C+2's +# `MmAllocatePhysicalMemoryEx`), so strict comparison always diverges. +# +# Canonicalization (mirrors `canonicalize_allocator_returns`): per +# `(tid, kind, field)` we assign a stable ordinal in per-tid event order and +# rewrite the field's hex-string value with `__>`. +# As long as both engines emit the same number of typed events on a given +# tid in the same order, the comparison treats them as equivalent. +# +# Map shape: kind -> tuple of payload-field names. The fields MUST hold a +# `0x`-prefixed hex string (guest VA); other types are left alone. +# +# Limitations (documented): +# * Ordinal-count mismatch IS a behavioral divergence (per-tid call-order +# skew → drifting sentinels → divergence reported at the first skewed +# event). Same contract as `ALLOCATOR_RETURN_FNS`. +# * The field is rewritten in-place. The pre-canonicalization raw VA is +# still preserved in the on-disk JSONL (we operate on the in-memory +# per-engine event dicts loaded by `load_events`). +# * Strictly compared fields next to the canonicalized one (e.g. +# `entry_pc`, `priority`, `affinity`, `stack_size`, `suspended` on a +# `thread.create`) are UNTOUCHED — they are game-visible attributes +# that must match bit-identically across engines. +# +# Empirical first surface (C+22, cold-vs-cold idx 105,128 — Sylpheed +# ExCreateThread for the audio-stack worker entry `0x824cd458`): +# canary: ctx_ptr = "0xbe56bb3c" (BC physical heap) +# ours: ctx_ptr = "0x42453b3c" (unified user heap) +# Both engines correctly allocate; both pass the resulting VA opaquely +# into the new guest thread's r3 register; the diff tool's only sensible +# behavior is to canonicalize and align by ordinal. +HOST_HEAP_PAYLOAD_FIELDS_BY_KIND: dict[str, tuple[str, ...]] = { + "thread.create": ("ctx_ptr",), +} + + +def canonicalize_host_heap_payload_fields(events_by_tid: dict) -> None: + """In-place: rewrite host-heap-derived guest VA fields per + HOST_HEAP_PAYLOAD_FIELDS_BY_KIND. + + For each event whose `kind` is a key in the map, replace each + listed payload field's value (expected `0x`-prefixed hex string) + with the per-(tid, kind, field) ordinal sentinel + `__>`. Non-string values and + missing fields are left untouched (defensive — pre-C+22 logs that + happen to omit the field for some reason still parse). + + Per-tid ordinals are independent across (kind, field) pairs, so + e.g. `thread.create.ctx_ptr` ordinals do not interfere with a + (future) `thread.create.other_ptr` canonicalization. + + Called once per engine's stream independently; equivalent + per-tid call sequences produce equivalent sentinels.""" + for _tid, evs in events_by_tid.items(): + # (kind, field) -> next ordinal on this tid + counters: dict[tuple[str, str], int] = {} + for ev in evs: + kind = ev.get("kind") + fields = HOST_HEAP_PAYLOAD_FIELDS_BY_KIND.get(kind) + if not fields: + continue + payload = ev.get("payload") or {} + for field in fields: + if field not in payload: + continue + value = payload[field] + # Defensive: only canonicalize string-typed VAs. A field + # that's already a sentinel (e.g. from a previous call + # in test-mode) or a non-string is left alone. + if not isinstance(value, str): + continue + key = (kind, field) + ordinal = counters.get(key, 0) + counters[key] = ordinal + 1 + sentinel = ( + f"" + ) + payload[field] = sentinel + + +def load_events(path: Path) -> dict: + """Return {tid: [event, ...]} keyed by tid, ordered by tid_event_idx. + + Validates the schema header (first line must be schema_version=1). + """ + events_by_tid: dict[int, list[dict]] = {} + with path.open("r", encoding="utf-8") as f: + first = f.readline() + if not first: + raise SystemExit(f"{path}: empty file") + hdr = json.loads(first) + if hdr.get("kind") != "schema_version": + raise SystemExit( + f"{path}: first event is not schema_version (got {hdr.get('kind')!r})" + ) + if hdr.get("schema_version") != SCHEMA_VERSION: + raise SystemExit( + f"{path}: schema_version mismatch (expected {SCHEMA_VERSION}, got {hdr.get('schema_version')!r})" + ) + for lineno, line in enumerate(f, start=2): + line = line.rstrip("\n") + if not line: + continue + try: + ev = json.loads(line) + except json.JSONDecodeError as e: + raise SystemExit(f"{path}:{lineno}: invalid JSON ({e})") + tid = ev.get("tid") + if tid is None: + raise SystemExit(f"{path}:{lineno}: missing tid") + events_by_tid.setdefault(tid, []).append(ev) + # Ensure each per-tid list is already monotonic by tid_event_idx. + for tid, evs in events_by_tid.items(): + for i, ev in enumerate(evs): + if ev.get("tid_event_idx") != i: + # Note: the schema permits one engine to emit fewer events; we + # only validate the in-file ordering is strictly monotonic. + if i > 0 and ev["tid_event_idx"] <= evs[i - 1]["tid_event_idx"]: + raise SystemExit( + f"{path}: tid={tid} events out of order at index {i}" + ) + return events_by_tid + + +def auto_tid_map(canary_evs: dict, ours_evs: dict) -> dict[int, int]: + """Naive tid mapping: pair canary tids with ours tids by the first + kernel.call name in each stream. Documented limitation in README.""" + def first_call_name(evs: list[dict]) -> str | None: + for ev in evs: + if ev.get("kind") == "kernel.call": + return ev["payload"].get("name") + return None + + canary_by_first = {} + for tid, evs in canary_evs.items(): + name = first_call_name(evs) + if name is not None: + canary_by_first.setdefault(name, []).append(tid) + + ours_by_first = {} + for tid, evs in ours_evs.items(): + name = first_call_name(evs) + if name is not None: + ours_by_first.setdefault(name, []).append(tid) + + mapping: dict[int, int] = {} + for name, c_tids in canary_by_first.items(): + o_tids = ours_by_first.get(name, []) + for c, o in zip(sorted(c_tids), sorted(o_tids)): + mapping[c] = o + return mapping + + +def parse_tid_map_arg(s: str) -> dict[int, int]: + """Parse `--tid-map 6=1,7=2` into {6: 1, 7: 2}.""" + out: dict[int, int] = {} + for token in s.split(","): + token = token.strip() + if not token: + continue + if "=" not in token: + raise SystemExit(f"--tid-map: bad token {token!r} (expected canary=ours)") + a, b = token.split("=", 1) + out[int(a.strip(), 0)] = int(b.strip(), 0) + return out + + +# Iterate 2.L (2026-05-28) — payload fields whose mismatch is given +# diagnostic PRIORITY on `kernel.return` events. The generic per-field +# walk would still surface these eventually, but reading-error #41 +# showed that mixing them in among allocator/SID/etc. noise risked +# burying the most actionable signal (return-value inversion = +# state-parity bug). Surfacing them first + tagging the category +# explicitly makes future iterates spot the class at a glance. +# +# Skip rule honored: if `return_value` is absent on either side we +# don't synthesize a divergence — the generic walk's missing-key +# logic still applies. Allocator returns are already canonicalized +# upstream via `ALLOCATOR_RETURN_FNS`, so they cannot trip this +# check (the sentinels match on both sides by construction). +_KERNEL_RETURN_PRIORITY_FIELDS = ("return_value", "status") + + +def _format_return_value_diff( + name: str | None, field: str, vc, vo +) -> str: + """One-line, category-tagged diff for a kernel.return payload + field. Includes the function name and both raw values. Used by + `compare_payload` for the kernel.return priority fields. The + `[return_value mismatch]` / `[status mismatch]` tag is intended + to be greppable across diff reports.""" + name_part = f" name={name}" if name else "" + return ( + f"[{field} mismatch] kernel.return{name_part}: " + f"canary={vc!r} ours={vo!r}" + ) + + +def _format_kernel_call_arg_diff( + name: str | None, sub: str, key: str, vc, vo +) -> str: + """Category-tagged diff for a kernel.call payload sub-dict mismatch + (`args` or `args_resolved`). Surfaces the function name + the + nested key so a diff like `args_resolved.path` mismatch is + trivially greppable. Iterate 2.L extension.""" + name_part = f" name={name}" if name else "" + return ( + f"[{sub}.{key} mismatch] kernel.call{name_part}: " + f"canary={vc!r} ours={vo!r}" + ) + + +def compare_payload(kind: str, p_canary: dict, p_ours: dict) -> str | None: + """Compare two payloads. Returns None if equivalent, else a short + human-readable description of the first differing field. + + Iterate 2.L (2026-05-28): on `kernel.return` events, the + `return_value` and `status` fields are checked FIRST and emit a + category-tagged diff string. Closes reading-error #41 (silent + test-harness state leak invalidating trace diffs) by surfacing + state-parity inversions (e.g. cache-probe SUCCESS vs NO_SUCH_FILE) + with a greppable `[return_value mismatch]` prefix instead of + burying them in a generic `payload.X` walk. Same for kernel.call + `args` / `args_resolved` sub-dicts: nested mismatches get + `[args_resolved.path mismatch]` etc. so the class is visible at a + glance. + + Skip rule: a priority field is checked only when present on BOTH + sides; one-sided absence falls through to the generic walk's + missing-key path (which preserves the pre-2.L behavior).""" + skip = SKIP_PAYLOAD_FIELDS_BY_KIND.get(kind, set()) + # Iterate 2.L priority pass: kernel.return return_value/status first. + if kind == "kernel.return": + name = p_canary.get("name") or p_ours.get("name") + for field in _KERNEL_RETURN_PRIORITY_FIELDS: + if field in skip: + continue + if field not in p_canary or field not in p_ours: + # Schema gap — defer to the generic walk's missing-key path. + continue + vc = p_canary[field] + vo = p_ours[field] + if vc != vo: + return _format_return_value_diff(name, field, vc, vo) + # Iterate 2.L priority pass: kernel.call args / args_resolved sub-dict + # mismatches surface category-tagged so an `args_resolved.path` + # divergence (e.g. canonical-path drift) doesn't read as a generic + # `payload.args_resolved: canary={...} ours={...}` blob. + if kind == "kernel.call": + name = p_canary.get("name") or p_ours.get("name") + for sub in ("args", "args_resolved"): + if sub in skip: + continue + sc = p_canary.get(sub) + so = p_ours.get(sub) + if not isinstance(sc, dict) or not isinstance(so, dict): + continue + if sc == so: + continue + # Walk sub-dict in canary key order; first differing key wins. + for k in sc.keys(): + if k not in so or sc[k] != so[k]: + return _format_kernel_call_arg_diff( + name, sub, k, sc.get(k), so.get(k) + ) + for k in so.keys(): + if k not in sc: + return _format_kernel_call_arg_diff( + name, sub, k, None, so[k] + ) + # Compare the union of keys excluding skipped ones, in canary's key order + # first (stable), then any ours-only fields. + keys_seen: set[str] = set() + for k in p_canary.keys(): + if k in skip: + continue + keys_seen.add(k) + vc = p_canary.get(k) + vo = p_ours.get(k) + if vc != vo: + return f"payload.{k}: canary={vc!r} ours={vo!r}" + for k in p_ours.keys(): + if k in skip or k in keys_seen: + continue + if p_ours[k] is not None: + return f"payload.{k}: canary= ours={p_ours[k]!r}" + return None + + +def compare_event(ev_canary: dict, ev_ours: dict) -> str | None: + """Compare two events. Returns None if equivalent, else a short description. + + Phase C+18: the per-tid `tid_event_idx` field is NOT compared field-to- + field. Both engines emit monotonic indices, but a floating shared-global + `handle.create` absorbed on one side will leave the running indices + offset by 1 — preserving the canonical pre/post alignment is what the + diff tool needs, and that's enforced by the stream-pointer walk in + `diff_one_tid`. The legacy "tid_event_idx must match" check was a + stricter form of the same invariant; relaxing it permits the floating- + create absorb without weakening the per-position comparison. (The + raw indices are still preserved in the events themselves and shown in + the diff report.) + """ + # Top-level comparison: kind must match. + if ev_canary.get("kind") != ev_ours.get("kind"): + return f"kind: canary={ev_canary.get('kind')!r} ours={ev_ours.get('kind')!r}" + # Payload comparison. + pc = ev_canary.get("payload", {}) + po = ev_ours.get("payload", {}) + diff = compare_payload(ev_canary["kind"], pc, po) + if diff: + return diff + return None + + +def render_event(ev: dict) -> str: + """One-line summary of an event for the diff report.""" + kind = ev.get("kind", "?") + idx = ev.get("tid_event_idx", "?") + payload = ev.get("payload", {}) + if kind in ("kernel.call", "kernel.return", "import.call"): + name = payload.get("name") or payload.get("ord") + return f"[{idx}] {kind} {name}" + if kind in ("handle.create", "handle.destroy"): + sid = payload.get("handle_semantic_id", "?") + return f"[{idx}] {kind} sid={sid}" + if kind in ("thread.create", "thread.exit"): + return f"[{idx}] {kind} {payload}" + if kind in ("wait.begin", "wait.end"): + return f"[{idx}] {kind} {payload}" + return f"[{idx}] {kind} {payload}" + + +def _is_import_call_named(ev: dict, name: str) -> bool: + return ( + ev.get("kind") == "import.call" + and (ev.get("payload") or {}).get("name") == name + ) + + +def _is_kernel_call_named(ev: dict, name: str) -> bool: + return ( + ev.get("kind") == "kernel.call" + and (ev.get("payload") or {}).get("name") == name + ) + + +def _is_kernel_return_named(ev: dict, name: str) -> bool: + return ( + ev.get("kind") == "kernel.return" + and (ev.get("payload") or {}).get("name") == name + ) + + +def _looks_like_enter_block(canary: list[dict], i: int) -> bool: + """True iff canary[i..i+3] is a fast-path RtlEnterCriticalSection + (import.call → kernel.call → kernel.return on the same name).""" + if i + 3 > len(canary): + return False + return ( + _is_import_call_named(canary[i], "RtlEnterCriticalSection") + and _is_kernel_call_named(canary[i + 1], "RtlEnterCriticalSection") + and _is_kernel_return_named(canary[i + 2], "RtlEnterCriticalSection") + ) + + +def _looks_like_leave_block(canary: list[dict], i: int) -> bool: + if i + 3 > len(canary): + return False + return ( + _is_import_call_named(canary[i], "RtlLeaveCriticalSection") + and _is_kernel_call_named(canary[i + 1], "RtlLeaveCriticalSection") + and _is_kernel_return_named(canary[i + 2], "RtlLeaveCriticalSection") + ) + + +# Phase D D-extension (v1.5): cap on nested-cleanup pairs to absorb in a +# single fold. Sylpheed's empirical max at the 104,607 cap is ~30 pairs +# (one per work item in the canary registry/tree). Anything beyond this +# is suspicious and likely a real divergence; refuse to absorb. +_NESTED_CS_PAIR_CAP = 32 + + +def _try_absorb_nested_cs_cleanup( + canary: list[dict], + ours: list[dict], + ic: int, + io: int, +) -> int | None: + """Phase D D-extension (v1.5): when the diff is at a kind mismatch + of `import.call RtlEnterCriticalSection` (canary) vs + `import.call RtlLeaveCriticalSection` (ours), look ahead in canary + for one or more balanced `[Enter-block, Leave-block]` pairs (each + pair = 6 events) followed by an event with the SAME kind as + `ours[io]`. If found, return the number of canary events to skip + (a multiple of 6); else None. + + This is the band-aid absorber documented in plan.md's backstop §2 + and forensics.md. It crosses reading-error #23 in spirit (folding + real guest behavior at the diff layer) but with a narrow trigger + that only fires for the exact E-vs-L kind mismatch shape. + """ + # Both sides must be at an import.call; the kinds must match the + # exact E-vs-L pattern. (Other kind mismatches are real + # divergences; do not touch them.) + if not _is_import_call_named(canary[ic], "RtlEnterCriticalSection"): + return None + if not _is_import_call_named(ours[io], "RtlLeaveCriticalSection"): + return None + # Walk canary's stream consuming balanced [Enter, Leave] pairs. + pos = ic + pairs = 0 + while pairs < _NESTED_CS_PAIR_CAP: + if not _looks_like_enter_block(canary, pos): + break + if not _looks_like_leave_block(canary, pos + 3): + break + pairs += 1 + pos += 6 + # Convergence check: after consuming this pair, canary's next + # event should look like ours's current event. Greedy match — + # the first convergence wins. + if pos < len(canary) and canary[pos].get("kind") == ours[io].get("kind"): + cp = canary[pos].get("payload") or {} + op = ours[io].get("payload") or {} + if cp.get("name") == op.get("name"): + return pos - ic + return None + + +def diff_one_tid( + canary_evs: list[dict], + ours_evs: list[dict], + canary_tid: int, + ours_tid: int, + cross_tid_floating_sids: set[str] | None = None, + disabled_absorbers: frozenset[str] | None = None, + absorbed_sink: list[dict] | None = None, +) -> dict: + """Walk one mapped pair. Stop at the first behavioral divergence. + + Phase C+18: when a kind mismatch is found at the current position and + one side has a `handle.create` whose SID is a "floating" cross-tid + shared-global SID (present in `cross_tid_floating_sids`), advance + only that side's pointer (treating the event as not-belonging-to- + this-tid's-sequence) and re-compare. This handles the + process-global-dispatcher first-toucher race — see schema-v1.md + §"Shared-global SIDs" and the C+18 memory entry. + + Phase C+21: extend the floating-absorb to `wait.begin` events whose + `handles_semantic_ids` reference shared-global SIDs. The contention + pattern on these dispatchers is host-scheduler-driven; one cold run + may emit `wait.begin` (slow path) while another fast-paths and emits + only `kernel.return` — see reading-error #32 and the C+20 + `cold-vs-cold-result.md` jitter table. Absorbing these wait.begin + events makes the diff transparent to scheduling-jitter windows + on shared dispatchers without weakening per-thread wait alignment. + + `cross_tid_floating_sids` is the set of shared-global SIDs that + appear in EITHER engine's stream on ANY tid (built by + `collect_shared_global_sids`); passing `None` falls back to strict + per-tid alignment (legacy behavior). + + Phase absorber-review (2026-05-19, investigation-only): + + * `disabled_absorbers` — frozenset subset of + {"shared-global", "wait-begin", "nested-cs"}. When an absorber's + name is present, that absorber is skipped (treated as if not + present in the diff tool). Default `None` preserves the + production absorber-on-everywhere behavior. Used by the + `--disable-absorber=` CLI flag for selective-disable re-runs. + * `absorbed_sink` — optional list that, when provided, receives a + dict per absorption event, with keys: + - `absorber` ∈ {"shared-global", "wait-begin", "nested-cs"} + - `side` ∈ {"canary", "ours"} + - `canary_tid`, `ours_tid` (mapped pair under test) + - `matched_at` (position in matched-prefix when absorber fired) + - `event` (the absorbed event verbatim — JSON-serializable) + - For nested-cs: `pairs_consumed` (count of [E,L] pairs folded) + Used by `--emit-absorbed-events` to write a JSONL log of every + silenced event so we can cross-reference against the wedge. + """ + floating = cross_tid_floating_sids if cross_tid_floating_sids else set() + disabled = disabled_absorbers if disabled_absorbers else frozenset() + matched = 0 + pre_context: list[tuple[dict, dict]] = [] + diverged_at: int | None = None + diff_descr: str | None = None + skipped_canary: list[dict] = [] + skipped_ours: list[dict] = [] + skipped_canary_wait: list[dict] = [] + skipped_ours_wait: list[dict] = [] + ic = 0 + io = 0 + skipped_canary_engine_local: list[dict] = [] + skipped_ours_engine_local: list[dict] = [] + while ic < len(canary_evs) and io < len(ours_evs): + ec = canary_evs[ic] + eo = ours_evs[io] + # Phase D Stage 4: engine-local event kinds (e.g. + # `contention.observed`) are emitted by only one engine under + # default config, or carry engine-local payloads (per-engine + # cs_ptr / site_sid that diverge under AUDIT-043 allocator ε). + # Advance the per-tid pointer past these events on EITHER side + # without alignment so they never participate in matched-prefix. + if ec.get("kind") in ENGINE_LOCAL_KINDS: + skipped_canary_engine_local.append(ec) + ic += 1 + continue + if eo.get("kind") in ENGINE_LOCAL_KINDS: + skipped_ours_engine_local.append(eo) + io += 1 + continue + d = compare_event(ec, eo) + if d is None: + matched += 1 + pre_context.append((ec, eo)) + if len(pre_context) > 5: + pre_context.pop(0) + ic += 1 + io += 1 + continue + # Phase C+18: cross-tid floating handle.create. + # One side may have a `handle.create` for a process-global + # dispatcher whose SID was emitted on a DIFFERENT tid in the + # other engine. The SID is engine-agnostic (deterministic over + # `(pointer, object_type)`), so we KNOW the event is observation- + # side. Advance only that side's pointer and re-compare. + if ( + "shared-global" not in disabled + and ec.get("kind") == "handle.create" + and eo.get("kind") != "handle.create" + and _ev_handle_create_sid(ec) in floating + ): + skipped_canary.append(ec) + if absorbed_sink is not None: + absorbed_sink.append({ + "absorber": "shared-global", + "side": "canary", + "canary_tid": canary_tid, + "ours_tid": ours_tid, + "matched_at": matched, + "event": ec, + }) + ic += 1 + continue + if ( + "shared-global" not in disabled + and eo.get("kind") == "handle.create" + and ec.get("kind") != "handle.create" + and _ev_handle_create_sid(eo) in floating + ): + skipped_ours.append(eo) + if absorbed_sink is not None: + absorbed_sink.append({ + "absorber": "shared-global", + "side": "ours", + "canary_tid": canary_tid, + "ours_tid": ours_tid, + "matched_at": matched, + "event": eo, + }) + io += 1 + continue + # Phase C+21: cross-tid floating wait.begin. + # One side may have a `wait.begin` on a process-global dispatcher + # that the OTHER side fast-paths past (no wait — uncontended in + # that cold run). The wait.begin's handles_semantic_ids reference + # shared-global SIDs whose creation order and contention pattern + # are host-scheduler-driven. Absorb the wait.begin on whichever + # side has it. Strict per-tid alignment still holds for + # `wait.begin` events whose handles are all per-thread SIDs. + if ( + "wait-begin" not in disabled + and ec.get("kind") == "wait.begin" + and eo.get("kind") != "wait.begin" + and is_shared_global_wait_begin(ec, floating) + ): + skipped_canary_wait.append(ec) + if absorbed_sink is not None: + absorbed_sink.append({ + "absorber": "wait-begin", + "side": "canary", + "canary_tid": canary_tid, + "ours_tid": ours_tid, + "matched_at": matched, + "event": ec, + }) + ic += 1 + continue + if ( + "wait-begin" not in disabled + and eo.get("kind") == "wait.begin" + and ec.get("kind") != "wait.begin" + and is_shared_global_wait_begin(eo, floating) + ): + skipped_ours_wait.append(eo) + if absorbed_sink is not None: + absorbed_sink.append({ + "absorber": "wait-begin", + "side": "ours", + "canary_tid": canary_tid, + "ours_tid": ours_tid, + "matched_at": matched, + "event": eo, + }) + io += 1 + continue + # Phase D D-extension (v1.5): nested-CS-cleanup absorber. + # + # CAVEAT (reading-error #23 boundary): this absorber folds REAL + # guest control-flow divergence at the diff-tool layer. It exists + # because the 104,607 Sylpheed cap (Phase D forensics) is a + # producer-throughput divergence — canary's preemptive + # host-OS scheduling lets a peer tid insert more work items + # into a CS-protected registry/tree during a notification-event + # wait window than ours's cooperative scheduler does. The + # consumer thread then takes an `[E L]`-nested-cleanup branch + # in canary but a fast-Leave branch in ours. Fixing this in + # ours's engine would require preempting the cooperative + # scheduler (which invalidates 23 phases of digest stability; + # explicitly out of scope per the H' plan). + # + # The absorber is narrow: it only fires at the specific kind + # mismatch `import.call RtlEnterCriticalSection` (canary) vs + # `import.call RtlLeaveCriticalSection` (ours), looks ahead in + # canary for balanced `[Enter, Leave]` pairs (6 events each + # consuming idx N..N+5), and only absorbs when canary's + # post-absorption stream re-aligns with ours's current event + # via a matching kind. Other kind mismatches fall through to + # the existing divergence reporting unchanged. + if "nested-cs" not in disabled: + absorbed_d_ext = _try_absorb_nested_cs_cleanup( + canary_evs, ours_evs, ic, io + ) + if absorbed_d_ext is not None: + skipped_canary_d_ext = absorbed_d_ext + if absorbed_sink is not None: + # Record every event in the absorbed window verbatim. + pairs_consumed = skipped_canary_d_ext // 6 + for j in range(skipped_canary_d_ext): + absorbed_sink.append({ + "absorber": "nested-cs", + "side": "canary", + "canary_tid": canary_tid, + "ours_tid": ours_tid, + "matched_at": matched, + "event": canary_evs[ic + j], + "pairs_consumed": pairs_consumed, + "window_offset": j, + }) + ic += skipped_canary_d_ext + continue + diverged_at = matched # report position in the matched-prefix space + diff_descr = d + break + return { + "canary_tid": canary_tid, + "ours_tid": ours_tid, + "matched": matched, + "canary_total": len(canary_evs), + "ours_total": len(ours_evs), + "diverged_at": diverged_at, + "diff_descr": diff_descr, + "pre_context": pre_context, + "post_canary": ( + canary_evs[ic] if diverged_at is not None and ic < len(canary_evs) else None + ), + "post_ours": ( + ours_evs[io] if diverged_at is not None and io < len(ours_evs) else None + ), + "next_canary": ( + canary_evs[ic + 1] + if diverged_at is not None and ic + 1 < len(canary_evs) + else None + ), + "next_ours": ( + ours_evs[io + 1] + if diverged_at is not None and io + 1 < len(ours_evs) + else None + ), + "skipped_canary": skipped_canary, + "skipped_ours": skipped_ours, + "skipped_canary_wait": skipped_canary_wait, + "skipped_ours_wait": skipped_ours_wait, + } + + +def _ev_handle_create_sid(ev: dict) -> str: + """Return the lowercased SID string of a `handle.create` event, or ''.""" + p = ev.get("payload") or {} + sid = p.get("handle_semantic_id") + return sid if isinstance(sid, str) else "" + + +def count_engine_local_kinds(events_by_tid: dict[int, list[dict]]) -> dict[str, dict[int, int]]: + """v1.6 (AUDIT-070 bridge): tally `COUNTED_ENGINE_LOCAL_KINDS` events + per-tid for surfacing in the report. `events_by_tid` is the + per-tid event list as loaded by `load_events` (whose return shape + is dict[tid] -> list[event_dict]).""" + out: dict[str, dict[int, int]] = {k: {} for k in COUNTED_ENGINE_LOCAL_KINDS} + for tid, evs in events_by_tid.items(): + for ev in evs: + k = ev.get("kind") + if k in COUNTED_ENGINE_LOCAL_KINDS: + out[k][tid] = out[k].get(tid, 0) + 1 + return out + + +def render_report(per_tid_results: list[dict], + counted_canary: dict[str, dict[int, int]] | None = None, + counted_ours: dict[str, dict[int, int]] | None = None) -> str: + out: list[str] = [] + out.append("# Phase A diff report") + out.append("") + out.append("**This report is the output of Phase A's diff harness. Divergences") + out.append("shown here are INPUT for Phase B (first-divergence localization),") + out.append("not findings of Phase A.** Phase A's job is to make the harness") + out.append("itself correct, not to analyze what it surfaces.") + out.append("") + out.append("## Summary") + out.append("") + out.append( + "| canary_tid | ours_tid | matched | canary_total | ours_total | " + "first_divergence_at | floating_create (c/o) | floating_wait (c/o) |" + ) + out.append("|---|---|---|---|---|---|---|---|") + for r in per_tid_results: + div = r["diverged_at"] if r["diverged_at"] is not None else "—" + sc = len(r.get("skipped_canary") or []) + so = len(r.get("skipped_ours") or []) + scw = len(r.get("skipped_canary_wait") or []) + sow = len(r.get("skipped_ours_wait") or []) + out.append( + f"| {r['canary_tid']} | {r['ours_tid']} | {r['matched']} | " + f"{r['canary_total']} | {r['ours_total']} | {div} | " + f"{sc}/{so} | {scw}/{sow} |" + ) + out.append("") + out.append( + "*`floating_create (c/o)` counts shared-global `handle.create` events " + "absorbed by Phase C+18 cross-tid SID matching. " + "`floating_wait (c/o)` counts `wait.begin` events on shared-global " + "dispatchers absorbed by Phase C+21 (scheduling-jitter window — " + "canary's contention slow path may fire while ours fast-paths or " + "vice versa). See schema-v1.md §\"Shared-global SIDs\" and §\"Wait-begin " + "floating absorb\".*" + ) + out.append("") + + # v1.6 (AUDIT-070 bridge): surface counted-engine-local kinds. + # `sema.release` cadence is the primary diff-visible metric for + # AUDIT-069 H3 (worker under-production); we count totals per + # engine but do NOT align positionally — see ENGINE_LOCAL_KINDS. + if (counted_canary or counted_ours) and any( + counted_canary.get(k) or counted_ours.get(k) + for k in COUNTED_ENGINE_LOCAL_KINDS + ): + out.append("## Counted engine-local kinds (v1.6)") + out.append("") + out.append("| kind | canary total | ours total | per-tid (canary) | per-tid (ours) |") + out.append("|---|---:|---:|---|---|") + for kind in sorted(COUNTED_ENGINE_LOCAL_KINDS): + cc = (counted_canary or {}).get(kind, {}) + co = (counted_ours or {}).get(kind, {}) + cc_total = sum(cc.values()) + co_total = sum(co.values()) + cc_pertid = ", ".join(f"tid{t}={n}" for t, n in sorted(cc.items())) + co_pertid = ", ".join(f"tid{t}={n}" for t, n in sorted(co.items())) + out.append( + f"| `{kind}` | {cc_total} | {co_total} | " + f"{cc_pertid or '—'} | {co_pertid or '—'} |" + ) + out.append("") + out.append( + "*These kinds are positionally engine-local (see " + "`ENGINE_LOCAL_KINDS`) — the diff tool does NOT align them. " + "Per-engine total cadence is surfaced here so regressions " + "are visible at-a-glance.*" + ) + out.append("") + for r in per_tid_results: + out.append(f"## canary_tid={r['canary_tid']} → ours_tid={r['ours_tid']}") + out.append("") + if r["diverged_at"] is None: + out.append( + f"No divergence within the {r['matched']} compared events " + f"(canary has {r['canary_total']}, ours has {r['ours_total']})." + ) + out.append("") + continue + # Iterate 2.L: surface the RAW per-tid idx on each side of the + # divergence in addition to the matched-prefix position. The + # `diverged_at` value above is the matched-prefix offset (the + # historical "tid_event_idx" label is a misnomer — it equals the + # raw idx only when absorbers haven't fired upstream). Reading- + # error #41 conflated the two. We keep the legacy field for + # backward compatibility of report consumers and add the raw + # idxs explicitly. + post_c = r.get("post_canary") or {} + post_o = r.get("post_ours") or {} + raw_c = post_c.get("tid_event_idx", "?") + raw_o = post_o.get("tid_event_idx", "?") + out.append( + f"First divergence at matched-prefix position {r['diverged_at']} " + f"(canary raw tid_event_idx={raw_c}, ours raw tid_event_idx={raw_o}): " + f"{r['diff_descr']}" + ) + out.append("") + out.append("**Pre-context (last 5 matching events):**") + out.append("```") + for ec, eo in r["pre_context"]: + out.append(f" canary: {render_event(ec)}") + out.append(f" ours: {render_event(eo)}") + out.append("```") + out.append("") + out.append("**Divergent event:**") + out.append("```") + out.append(f" canary: {render_event(r['post_canary'])}") + out.append(f" ours: {render_event(r['post_ours'])}") + out.append("```") + out.append("") + out.append("**Next event after the divergence (if any):**") + out.append("```") + if r["next_canary"]: + out.append(f" canary: {render_event(r['next_canary'])}") + else: + out.append(" canary: ") + if r["next_ours"]: + out.append(f" ours: {render_event(r['next_ours'])}") + else: + out.append(" ours: ") + out.append("```") + out.append("") + out.append("**Raw events (JSON):**") + out.append("```json") + out.append(json.dumps(r["post_canary"], sort_keys=True)) + out.append(json.dumps(r["post_ours"], sort_keys=True)) + out.append("```") + out.append("") + return "\n".join(out) + + +def main() -> int: + ap = argparse.ArgumentParser(description="Phase A event-log diff tool") + ap.add_argument("--canary", required=True, type=Path) + ap.add_argument("--ours", required=True, type=Path) + ap.add_argument("--out", type=Path, help="Write markdown report here (else stdout)") + ap.add_argument( + "--tid-map", + type=str, + help="Manual tid mapping like '6=1,7=2'. Overrides auto-mapping.", + ) + ap.add_argument( + "--validate-identical", + action="store_true", + help="Exit non-zero if any mapped tid pair has any divergence. " + "Used by gate-4 negative-test and by self-diff smoke tests.", + ) + ap.add_argument( + "--no-canonicalize-allocators", + action="store_true", + help="Disable per-tid ordinal canonicalization of allocator return " + "values (default: enabled). See ALLOCATOR_RETURN_FNS for the " + "covered set. Disabling reproduces the raw-VA comparison.", + ) + ap.add_argument( + "--no-canonicalize-host-heap-fields", + action="store_true", + help="Disable per-tid ordinal canonicalization of host-heap-derived " + "guest VA payload fields (default: enabled). See " + "HOST_HEAP_PAYLOAD_FIELDS_BY_KIND for the covered set (Phase C+22 " + "v1.7: `thread.create.ctx_ptr`). Disabling reproduces the raw-VA " + "comparison and re-surfaces the AUDIT-043 ε allocator-drift class.", + ) + ap.add_argument( + "--disable-absorber", + type=str, + default="", + help="Phase absorber-review (investigation-only): comma-separated " + "list of absorbers to disable. Valid names: " + "shared-global (C+18 handle.create), wait-begin (C+21 wait.begin), " + "nested-cs (D-extension RtlEnter/Leave fold). Empty default keeps " + "all absorbers ON (production behavior). Use to isolate which " + "absorber suppresses which divergence.", + ) + ap.add_argument( + "--emit-absorbed-events", + type=Path, + default=None, + help="Phase absorber-review (investigation-only): write every " + "absorbed event to a JSONL file at this path. Each line is a " + "JSON object with keys: absorber, side, canary_tid, ours_tid, " + "matched_at, event (verbatim), and for nested-cs also " + "pairs_consumed and window_offset.", + ) + args = ap.parse_args() + + VALID_ABSORBERS = {"shared-global", "wait-begin", "nested-cs"} + disabled_absorbers: frozenset[str] + if args.disable_absorber.strip(): + names = { + tok.strip() + for tok in args.disable_absorber.split(",") + if tok.strip() + } + unknown = names - VALID_ABSORBERS + if unknown: + sys.stderr.write( + f"--disable-absorber: unknown name(s) {sorted(unknown)!r}. " + f"Valid: {sorted(VALID_ABSORBERS)!r}\n" + ) + return 2 + disabled_absorbers = frozenset(names) + else: + disabled_absorbers = frozenset() + + absorbed_sink: list[dict] | None = ( + [] if args.emit_absorbed_events else None + ) + + canary_evs = load_events(args.canary) + ours_evs = load_events(args.ours) + + if not args.no_canonicalize_allocators: + canonicalize_allocator_returns(canary_evs) + canonicalize_allocator_returns(ours_evs) + + if not args.no_canonicalize_host_heap_fields: + canonicalize_host_heap_payload_fields(canary_evs) + canonicalize_host_heap_payload_fields(ours_evs) + + if args.tid_map: + tid_map = parse_tid_map_arg(args.tid_map) + else: + tid_map = auto_tid_map(canary_evs, ours_evs) + + if not tid_map: + sys.stderr.write( + "no tid mapping (auto-mapping found no shared first-kernel-call). " + "Pass --tid-map manually.\n" + ) + return 2 + + # Phase C+18 + C+21: pre-pass to collect all shared-global SIDs across + # both engines and all tids. Used by `diff_one_tid` to recognize + # "floating" `handle.create` events (C+18) and `wait.begin` events + # (C+21) whose presence on one side but not the other is + # observation-side — a different first-toucher thread (C+18) or a + # contention-jitter-driven slow-path entry (C+21). See schema-v1.md + # §"Shared-global SIDs" and §"Wait-begin floating absorb". + floating_sids = collect_shared_global_sids(canary_evs, ours_evs) + + per_tid: list[dict] = [] + for c_tid, o_tid in sorted(tid_map.items()): + if c_tid not in canary_evs: + sys.stderr.write(f"warn: canary tid {c_tid} not in stream; skipping\n") + continue + if o_tid not in ours_evs: + sys.stderr.write(f"warn: ours tid {o_tid} not in stream; skipping\n") + continue + per_tid.append( + diff_one_tid( + canary_evs[c_tid], + ours_evs[o_tid], + c_tid, + o_tid, + cross_tid_floating_sids=floating_sids, + disabled_absorbers=disabled_absorbers, + absorbed_sink=absorbed_sink, + ) + ) + + if absorbed_sink is not None and args.emit_absorbed_events is not None: + with args.emit_absorbed_events.open("w", encoding="utf-8") as f: + for rec in absorbed_sink: + f.write(json.dumps(rec, sort_keys=True)) + f.write("\n") + sys.stderr.write( + f"emitted {len(absorbed_sink)} absorbed events to " + f"{args.emit_absorbed_events}\n" + ) + + # v1.6 (AUDIT-070 bridge): count `sema.release` (and any future + # counted engine-local kinds) per-engine for surfacing in the + # report. These do not participate in matched-prefix. + counted_canary = count_engine_local_kinds(canary_evs) + counted_ours = count_engine_local_kinds(ours_evs) + + report = render_report(per_tid, counted_canary, counted_ours) + if args.out: + args.out.write_text(report, encoding="utf-8") + sys.stderr.write(f"diff report written to {args.out}\n") + else: + sys.stdout.write(report) + + if args.validate_identical: + for r in per_tid: + if r["diverged_at"] is not None: + sys.stderr.write( + f"validate-identical: divergence in canary_tid={r['canary_tid']} " + f"at tid_event_idx={r['diverged_at']} ({r['diff_descr']})\n" + ) + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/diff-events/test_build_manifest.py b/tools/diff-events/test_build_manifest.py new file mode 100644 index 0000000..ac69fe0 --- /dev/null +++ b/tools/diff-events/test_build_manifest.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python3 +"""Unit tests for `build_contention_manifest.py`. + +Run as `python3 test_build_manifest.py` — prints `PASS` per test. +""" +import json +import sys +import tempfile +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) + +from build_contention_manifest import build_manifest, render_summary # noqa: E402 + + +def write_jsonl(lines: list[str]) -> Path: + tmp = tempfile.NamedTemporaryFile( + mode="w", suffix=".jsonl", delete=False, encoding="utf-8" + ) + for line in lines: + tmp.write(line + "\n") + tmp.close() + return Path(tmp.name) + + +def mk_event( + kind: str, + tid: int, + idx: int, + payload: dict, + engine: str = "canary", +) -> str: + return json.dumps( + { + "schema_version": 1, + "engine": engine, + "kind": kind, + "tid": tid, + "tid_event_idx": idx, + "guest_cycle": 0, + "host_ns": 0, + "deterministic": True, + "payload": payload, + } + ) + + +def test_basic_extract() -> None: + src = write_jsonl([ + mk_event("import.call", 6, 0, {"name": "Foo"}), + mk_event( + "contention.observed", + 6, + 104664, + {"cs_ptr": "0xbc65c890", "site_sid": "c26a128b", "contended": True}, + ), + mk_event("import.call", 6, 1, {"name": "Bar"}), + ]) + m = build_manifest(src) + assert m["version"] == 1 + assert m["summary"]["total_input_events"] == 3 + assert m["summary"]["total_contention_events_kept"] == 1 + assert m["summary"]["per_tid_counts"] == {"6": 1} + e = m["entries"][0] + assert e["tid"] == 6 and e["tid_event_idx"] == 104664 + assert e["site_sid"] == "c26a128b" and e["cs_ptr"] == "0xbc65c890" + assert e["contended"] is True + print("PASS test_basic_extract") + + +def test_filters_non_contention_kinds() -> None: + src = write_jsonl([ + mk_event("handle.create", 6, 0, {"handle_semantic_id": "x"}), + mk_event("wait.begin", 6, 1, {"handles_semantic_ids": ["x"]}), + mk_event("kernel.call", 6, 2, {"name": "X"}), + mk_event( + "contention.observed", + 7, + 42, + {"cs_ptr": "0x1000", "site_sid": "deadbeef", "contended": True}, + ), + ]) + m = build_manifest(src) + assert m["summary"]["total_contention_events_kept"] == 1 + assert m["entries"][0]["tid"] == 7 + print("PASS test_filters_non_contention_kinds") + + +def test_filters_contended_false() -> None: + # Stage 1's emitter never emits contended=false today, but defensive + # filter must skip those if a future variant adds them. + src = write_jsonl([ + mk_event( + "contention.observed", + 6, + 10, + {"cs_ptr": "0xa", "site_sid": "11", "contended": False}, + ), + mk_event( + "contention.observed", + 6, + 11, + {"cs_ptr": "0xa", "site_sid": "11", "contended": True}, + ), + ]) + m = build_manifest(src) + assert m["summary"]["total_contention_events_kept"] == 1 + assert m["entries"][0]["tid_event_idx"] == 11 + print("PASS test_filters_contended_false") + + +def test_sorts_by_tid_then_idx() -> None: + src = write_jsonl([ + mk_event( + "contention.observed", + 9, + 5, + {"cs_ptr": "0x9", "site_sid": "99", "contended": True}, + ), + mk_event( + "contention.observed", + 6, + 200, + {"cs_ptr": "0xb", "site_sid": "bb", "contended": True}, + ), + mk_event( + "contention.observed", + 6, + 100, + {"cs_ptr": "0xa", "site_sid": "aa", "contended": True}, + ), + ]) + m = build_manifest(src) + keys = [(e["tid"], e["tid_event_idx"]) for e in m["entries"]] + assert keys == [(6, 100), (6, 200), (9, 5)], keys + print("PASS test_sorts_by_tid_then_idx") + + +def test_deduplicates_same_tid_idx() -> None: + src = write_jsonl([ + mk_event( + "contention.observed", + 6, + 42, + {"cs_ptr": "0xa", "site_sid": "aa", "contended": True}, + ), + mk_event( + "contention.observed", + 6, + 42, + {"cs_ptr": "0xb", "site_sid": "bb", "contended": True}, + ), + ]) + m = build_manifest(src) + assert m["summary"]["total_contention_events_kept"] == 1 + assert m["summary"]["skipped_duplicate_keys"] == 1 + # Keeps the first occurrence. + assert m["entries"][0]["cs_ptr"] == "0xa" + print("PASS test_deduplicates_same_tid_idx") + + +def test_skips_missing_fields() -> None: + src = write_jsonl([ + # Missing site_sid. + mk_event( + "contention.observed", + 6, + 1, + {"cs_ptr": "0xa", "contended": True}, + ), + # Missing cs_ptr. + mk_event( + "contention.observed", + 6, + 2, + {"site_sid": "aa", "contended": True}, + ), + # Both present — kept. + mk_event( + "contention.observed", + 6, + 3, + {"cs_ptr": "0xb", "site_sid": "bb", "contended": True}, + ), + ]) + m = build_manifest(src) + assert m["summary"]["total_contention_events_kept"] == 1 + assert m["summary"]["skipped_bad_lines"] == 2 + print("PASS test_skips_missing_fields") + + +def test_handles_bad_json_lines() -> None: + src = write_jsonl([ + "not-json", + mk_event( + "contention.observed", + 6, + 1, + {"cs_ptr": "0xa", "site_sid": "aa", "contended": True}, + ), + "{\"truncated\":", + ]) + m = build_manifest(src) + assert m["summary"]["total_contention_events_kept"] == 1 + assert m["summary"]["skipped_bad_lines"] == 2 + print("PASS test_handles_bad_json_lines") + + +def test_render_summary_human_readable() -> None: + src = write_jsonl([ + mk_event( + "contention.observed", + 6, + 1, + {"cs_ptr": "0xa", "site_sid": "aa", "contended": True}, + ), + mk_event( + "contention.observed", + 14, + 100, + {"cs_ptr": "0xb", "site_sid": "bb", "contended": True}, + ), + ]) + m = build_manifest(src) + out = render_summary(m) + assert "contention events kept: 2" in out + assert "tid= 6 1" in out + assert "tid= 14 1" in out + print("PASS test_render_summary_human_readable") + + +def test_empty_input_yields_zero_kept() -> None: + src = write_jsonl([mk_event("import.call", 0, 0, {"name": "X"})]) + m = build_manifest(src) + assert m["summary"]["total_contention_events_kept"] == 0 + assert m["entries"] == [] + print("PASS test_empty_input_yields_zero_kept") + + +def test_tid_map_translates_canary_to_ours() -> None: + src = write_jsonl([ + mk_event( + "contention.observed", + 6, + 104664, + {"cs_ptr": "0xbc65c890", "site_sid": "c26a128bf45411f7", "contended": True}, + ), + mk_event( + "contention.observed", + 7, + 10, + {"cs_ptr": "0xa", "site_sid": "aa", "contended": True}, + ), + ]) + m = build_manifest(src, tid_map={6: 1, 7: 2}) + assert m["entries"][0]["tid"] == 1, m["entries"][0] + assert m["entries"][1]["tid"] == 2 + print("PASS test_tid_map_translates_canary_to_ours") + + +def test_tid_map_drops_unmapped_canary_tids() -> None: + src = write_jsonl([ + mk_event( + "contention.observed", + 6, + 100, + {"cs_ptr": "0xa", "site_sid": "aa", "contended": True}, + ), + mk_event( + "contention.observed", + 99, + 200, + {"cs_ptr": "0xb", "site_sid": "bb", "contended": True}, + ), + ]) + m = build_manifest(src, tid_map={6: 1}) + assert m["summary"]["total_contention_events_kept"] == 1 + assert m["summary"]["skipped_unmapped_tids"] == 1 + assert m["entries"][0]["tid"] == 1 + print("PASS test_tid_map_drops_unmapped_canary_tids") + + +if __name__ == "__main__": + tests = [ + test_basic_extract, + test_filters_non_contention_kinds, + test_filters_contended_false, + test_sorts_by_tid_then_idx, + test_deduplicates_same_tid_idx, + test_skips_missing_fields, + test_handles_bad_json_lines, + test_render_summary_human_readable, + test_empty_input_yields_zero_kept, + test_tid_map_translates_canary_to_ours, + test_tid_map_drops_unmapped_canary_tids, + ] + for t in tests: + t() + print(f"\nALL {len(tests)} TESTS PASS") diff --git a/tools/diff-events/test_diff_events.py b/tools/diff-events/test_diff_events.py new file mode 100644 index 0000000..4a950fb --- /dev/null +++ b/tools/diff-events/test_diff_events.py @@ -0,0 +1,1656 @@ +#!/usr/bin/env python3 +"""Unit tests for `diff_events.py`. Run with: + python3 test_diff_events.py +Exits 0 on success, non-zero on failure. + +Covers: +- FNV-1a 64-bit (`_fnv1a_64`) parity with the engines' embedded vectors. +- Shared-global SID determinism (`shared_global_sid`) — same input → same + output, independent of any host-side state. +- Cross-tid floating `handle.create` matching in `diff_one_tid` — the + Phase C+18 fix for the D-NEW-3 first-toucher race. +- Cross-tid floating `wait.begin` matching in `diff_one_tid` — the + Phase C+21 fix for the scheduling-jitter contention window + (reading-error #32). Includes shared-global wait detection, + multi-handle wait_type=all, and negative tests for per-thread waits. +""" + +import json +import sys +import tempfile +from pathlib import Path + +# Add this directory to sys.path so we can import diff_events as a module. +sys.path.insert(0, str(Path(__file__).parent)) + +import diff_events as de # noqa: E402 + +FAILURES: list[str] = [] + + +def check(cond: bool, name: str) -> None: + if cond: + print(f" PASS {name}") + else: + print(f" FAIL {name}") + FAILURES.append(name) + + +# === FNV-1a vector === + +def test_fnv1a_vector() -> None: + """Standard FNV-1a 64-bit test vector for the ASCII string `foobar`.""" + # 0x85944171f73967e8 per http://www.isthe.com/chongo/tech/comp/fnv/ + h = de._fnv1a_64(b"foobar") + check(h == 0x85944171F73967E8, f"FNV-1a('foobar') = 0x{h:016x}") + + +# === Shared-global SID determinism === + +def test_shared_global_sid_determinism() -> None: + a = de.shared_global_sid(0x828A3230, 3) + b = de.shared_global_sid(0x828A3230, 3) + check(a == b, "shared_global_sid is deterministic for same input") + c = de.shared_global_sid(0x828A3234, 3) + check(a != c, "shared_global_sid differs for distinct pointer") + d = de.shared_global_sid(0x828A3230, 1) + check(a != d, "shared_global_sid differs for distinct type") + + +def test_shared_global_sid_matches_rust() -> None: + """Lock-step parity: the SID for `(0x828A3230, 3)` must match the + Rust unit test's expected output. Recompute via the same FNV-1a + recipe and confirm the formatted string matches.""" + buf = bytearray(20) + buf[0:4] = (0xC01AB005).to_bytes(4, "little") + buf[4:8] = (0).to_bytes(4, "little") + buf[8:16] = (0x828A3230).to_bytes(8, "little") + buf[16:20] = (3).to_bytes(4, "little") + expected = f"{de._fnv1a_64(bytes(buf)):016x}" + got = de.shared_global_sid(0x828A3230, 3) + check(got == expected, f"shared_global_sid('0x828A3230', 3) matches recipe: {got}") + + +# === Detection of shared-global handle.create events === + +def test_is_shared_global_handle_create() -> None: + sid = de.shared_global_sid(0x828A3230, 3) + ev = { + "kind": "handle.create", + "payload": { + "handle_semantic_id": sid, + "object_type": 3, + "raw_handle_id": "0x828a3230", + "object_name": None, + }, + } + check(de.is_shared_global_handle_create(ev), "detects shared-global handle.create") + # A regular per-thread handle.create (different SID for same pointer/type) + # MUST NOT be detected as shared-global. + ev_regular = { + "kind": "handle.create", + "payload": { + "handle_semantic_id": "deadbeefdeadbeef", + "object_type": 3, + "raw_handle_id": "0x828a3230", + "object_name": None, + }, + } + check( + not de.is_shared_global_handle_create(ev_regular), + "does NOT detect regular handle.create as shared-global", + ) + # Non-handle.create kind → False. + check( + not de.is_shared_global_handle_create({"kind": "wait.begin", "payload": {}}), + "does NOT misclassify wait.begin", + ) + + +# === diff_one_tid: floating cross-tid matching === + +def _hdr(engine: str = "canary") -> str: + return json.dumps( + { + "schema_version": 1, + "engine": engine, + "kind": "schema_version", + "tid": 0, + "tid_event_idx": 0, + "guest_cycle": 0, + "host_ns": 0, + "deterministic": True, + "payload": {"version": 1, "emitter_build": "test"}, + } + ) + + +def _ev(tid: int, idx: int, kind: str, payload: dict, engine: str = "canary") -> str: + return json.dumps( + { + "schema_version": 1, + "engine": engine, + "kind": kind, + "tid": tid, + "tid_event_idx": idx, + "guest_cycle": 0, + "host_ns": 0, + "deterministic": True, + "payload": payload, + } + ) + + +def test_floating_handle_create_cross_tid() -> None: + """Simulate the C+17 D-NEW-3 regression precisely: + - Both engines emit a shared-global handle.create for `0x828a3230`. + - In canary, the create fires on tid=15. In ours, it fires on tid=10 + (mapped from canary's tid=15) — but the create lands on a DIFFERENT + ours tid first, so ours tid=10 has NO create, only a wait.begin. + Hmm — actually the regression is the OPPOSITE direction: ours tid=10 + has the create (it's the first toucher on ours side); canary tid=15 + does NOT (another canary tid was first). Replicate that ordering. + """ + sid = de.shared_global_sid(0x828A3230, 3) + # Canary stream: + # tid=15: [0] import.call, [1] kernel.call, [2] wait.begin (no create) + # tid=99: [0] handle.create (the floating create — emitted elsewhere) + canary_tid15 = [ + json.loads(_ev(15, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 176, "name": "KeWaitForSingleObject"})), + json.loads(_ev(15, 1, "kernel.call", {"name": "KeWaitForSingleObject", "args": {}, "args_resolved": {}})), + json.loads(_ev(15, 2, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), + json.loads(_ev(15, 3, "kernel.return", {"name": "KeWaitForSingleObject", "return_value": 0, "status": "0x00000000", "side_effects": []})), + ] + canary_tid99 = [ + json.loads(_ev(99, 0, "handle.create", {"handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None})), + ] + # Ours stream — same chain, but the create fires on the SAME tid as the wait. + ours_tid10 = [ + json.loads(_ev(10, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 176, "name": "KeWaitForSingleObject"}, "ours")), + json.loads(_ev(10, 1, "kernel.call", {"name": "KeWaitForSingleObject", "args": {}, "args_resolved": {}}, "ours")), + json.loads(_ev(10, 2, "handle.create", {"handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None}, "ours")), + json.loads(_ev(10, 3, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"}, "ours")), + json.loads(_ev(10, 4, "kernel.return", {"name": "KeWaitForSingleObject", "return_value": 0, "status": "0x00000000", "side_effects": []}, "ours")), + ] + floating = {sid} + r = de.diff_one_tid(canary_tid15, ours_tid10, 15, 10, cross_tid_floating_sids=floating) + check( + r["diverged_at"] is None, + f"no divergence with floating-create match (matched={r['matched']}, " + f"skipped_ours={len(r['skipped_ours'])})", + ) + check( + len(r["skipped_ours"]) == 1, + f"exactly one ours handle.create absorbed (got {len(r['skipped_ours'])})", + ) + check( + r["matched"] == 4, + f"matched 4 events (the full canary tid=15 stream, got {r['matched']})", + ) + + +def test_strict_alignment_without_floating() -> None: + """When `cross_tid_floating_sids` is None/empty, the legacy strict + behavior must hold — the same input as above should diverge at idx=2.""" + sid = de.shared_global_sid(0x828A3230, 3) + canary_tid15 = [ + json.loads(_ev(15, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 176, "name": "KeWaitForSingleObject"})), + json.loads(_ev(15, 1, "kernel.call", {"name": "KeWaitForSingleObject", "args": {}, "args_resolved": {}})), + json.loads(_ev(15, 2, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), + ] + ours_tid10 = [ + json.loads(_ev(10, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 176, "name": "KeWaitForSingleObject"}, "ours")), + json.loads(_ev(10, 1, "kernel.call", {"name": "KeWaitForSingleObject", "args": {}, "args_resolved": {}}, "ours")), + json.loads(_ev(10, 2, "handle.create", {"handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None}, "ours")), + ] + r = de.diff_one_tid(canary_tid15, ours_tid10, 15, 10, cross_tid_floating_sids=None) + check(r["diverged_at"] == 2, f"legacy strict diff diverges at 2 (got {r['diverged_at']})") + + +def test_non_floating_real_divergence_still_caught() -> None: + """Real divergences MUST still be caught (the fix must not over-suppress). + Construct a stream where ours has an EXTRA handle.create whose SID is + NOT a shared-global SID — must report divergence.""" + canary = [ + json.loads(_ev(6, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 1, "name": "X"})), + ] + ours = [ + json.loads(_ev(1, 0, "handle.create", {"handle_semantic_id": "deadbeefdeadbeef", "object_type": 1, "raw_handle_id": "0x4", "object_name": None}, "ours")), + json.loads(_ev(1, 1, "import.call", {"module": "xboxkrnl.exe", "ord": 1, "name": "X"}, "ours")), + ] + # No SID in floating set. + r = de.diff_one_tid(canary, ours, 6, 1, cross_tid_floating_sids=set()) + check(r["diverged_at"] is not None, f"real divergence still reported (got diverged_at={r['diverged_at']})") + + +def test_floating_sids_collection_via_main() -> None: + """End-to-end: load two JSONL files from disk and verify the floating + SID set picks up the shared-global create. Uses tempfiles.""" + sid = de.shared_global_sid(0x828A3230, 3) + with tempfile.TemporaryDirectory() as tmpdir: + cpath = Path(tmpdir) / "c.jsonl" + opath = Path(tmpdir) / "o.jsonl" + with cpath.open("w") as f: + f.write(_hdr("canary") + "\n") + f.write(_ev(99, 0, "handle.create", {"handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None}) + "\n") + with opath.open("w") as f: + f.write(_hdr("ours") + "\n") + f.write(_ev(10, 0, "handle.create", {"handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None}, "ours") + "\n") + cmap = de.load_events(cpath) + omap = de.load_events(opath) + floating = set() + for evs_by_tid in (cmap, omap): + for _tid, evs in evs_by_tid.items(): + for ev in evs: + if de.is_shared_global_handle_create(ev): + floating.add(de._ev_handle_create_sid(ev)) + check(sid in floating, f"floating SID set contains shared-global SID {sid}") + + +# === Phase C+21: wait.begin floating-absorb === + +def test_is_shared_global_wait_begin_positive() -> None: + sid = de.shared_global_sid(0x828A3230, 1) + ev = { + "kind": "wait.begin", + "payload": { + "handles_semantic_ids": [sid], + "timeout_ns": -1, + "alertable": False, + "wait_type": "any", + }, + } + check( + de.is_shared_global_wait_begin(ev, {sid}), + "detects wait.begin referencing shared-global SID", + ) + + +def test_is_shared_global_wait_begin_negative() -> None: + """A wait.begin whose handles are ALL per-thread SIDs must NOT be + flagged as shared-global — preserves strict matching for non- + contention waits.""" + ev = { + "kind": "wait.begin", + "payload": { + "handles_semantic_ids": ["deadbeefdeadbeef"], + "timeout_ns": -1, + "alertable": False, + "wait_type": "any", + }, + } + check( + not de.is_shared_global_wait_begin(ev, set()), + "does NOT classify per-thread wait.begin as shared-global (empty set)", + ) + check( + not de.is_shared_global_wait_begin(ev, {"feedface12345678"}), + "does NOT classify per-thread wait.begin as shared-global (SID not in set)", + ) + # Wrong kind. + check( + not de.is_shared_global_wait_begin( + {"kind": "handle.create", "payload": {"handles_semantic_ids": ["x"]}}, + {"x"}, + ), + "does NOT misclassify handle.create as wait.begin", + ) + + +def test_is_shared_global_wait_begin_mixed_handles_wait_all() -> None: + """wait_type=all with a mix of per-thread + shared-global SIDs must + still be classified as shared-global. The whole wait is timing- + dependent because at least one of its handles is on a process- + global dispatcher.""" + shared = de.shared_global_sid(0x828A3230, 1) + ev = { + "kind": "wait.begin", + "payload": { + "handles_semantic_ids": ["deadbeefdeadbeef", shared, "aaaaaaaaaaaaaaaa"], + "timeout_ns": 5000, + "alertable": False, + "wait_type": "all", + }, + } + check( + de.is_shared_global_wait_begin(ev, {shared}), + "wait_type=all with one shared-global handle is shared-global", + ) + + +def test_floating_wait_begin_cross_tid_canary_only() -> None: + """Phase C+20 / C+21 canonical case: canary takes the contended slow + path and emits wait.begin; ours fast-paths and emits only + kernel.return. The wait.begin should be absorbed.""" + shared = de.shared_global_sid(0x828A3230, 1) + canary_tid6 = [ + json.loads(_ev(6, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 277, "name": "RtlEnterCriticalSection"})), + json.loads(_ev(6, 1, "kernel.call", {"name": "RtlEnterCriticalSection", "args": {}, "args_resolved": {}})), + # CONTENDED slow path: emit wait.begin + json.loads(_ev(6, 2, "wait.begin", {"handles_semantic_ids": [shared], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), + json.loads(_ev(6, 3, "kernel.return", {"name": "RtlEnterCriticalSection", "return_value": 0, "status": "0x00000000", "side_effects": []})), + ] + ours_tid1 = [ + json.loads(_ev(1, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 277, "name": "RtlEnterCriticalSection"}, "ours")), + json.loads(_ev(1, 1, "kernel.call", {"name": "RtlEnterCriticalSection", "args": {}, "args_resolved": {}}, "ours")), + # FAST path: no wait.begin + json.loads(_ev(1, 2, "kernel.return", {"name": "RtlEnterCriticalSection", "return_value": 0, "status": "0x00000000", "side_effects": []}, "ours")), + ] + r = de.diff_one_tid(canary_tid6, ours_tid1, 6, 1, cross_tid_floating_sids={shared}) + check( + r["diverged_at"] is None, + f"no divergence after absorbing canary wait.begin (matched={r['matched']}, " + f"skipped_canary_wait={len(r['skipped_canary_wait'])})", + ) + check( + len(r["skipped_canary_wait"]) == 1, + f"exactly one canary wait.begin absorbed (got {len(r['skipped_canary_wait'])})", + ) + check( + r["matched"] == 3, + f"matched 3 events (import.call + kernel.call + kernel.return, got {r['matched']})", + ) + + +def test_floating_wait_begin_cross_tid_ours_only() -> None: + """Mirror direction: ours takes the slow path, canary fast-paths.""" + shared = de.shared_global_sid(0x828A3230, 1) + canary = [ + json.loads(_ev(6, 0, "kernel.call", {"name": "RtlEnterCriticalSection", "args": {}, "args_resolved": {}})), + json.loads(_ev(6, 1, "kernel.return", {"name": "RtlEnterCriticalSection", "return_value": 0, "status": "0x00000000", "side_effects": []})), + ] + ours = [ + json.loads(_ev(1, 0, "kernel.call", {"name": "RtlEnterCriticalSection", "args": {}, "args_resolved": {}}, "ours")), + json.loads(_ev(1, 1, "wait.begin", {"handles_semantic_ids": [shared], "timeout_ns": -1, "alertable": False, "wait_type": "any"}, "ours")), + json.loads(_ev(1, 2, "kernel.return", {"name": "RtlEnterCriticalSection", "return_value": 0, "status": "0x00000000", "side_effects": []}, "ours")), + ] + r = de.diff_one_tid(canary, ours, 6, 1, cross_tid_floating_sids={shared}) + check( + r["diverged_at"] is None, + f"no divergence after absorbing ours wait.begin (matched={r['matched']})", + ) + check( + len(r["skipped_ours_wait"]) == 1, + f"exactly one ours wait.begin absorbed (got {len(r['skipped_ours_wait'])})", + ) + + +def test_per_thread_wait_begin_NOT_absorbed() -> None: + """Discipline: a wait.begin whose handles are all per-thread SIDs + must NOT be absorbed — strict matching preserved.""" + # SID NOT in floating set. + sid_per_thread = "1234567890abcdef" + canary = [ + json.loads(_ev(6, 0, "kernel.call", {"name": "NtWaitForSingleObjectEx", "args": {}, "args_resolved": {}})), + json.loads(_ev(6, 1, "wait.begin", {"handles_semantic_ids": [sid_per_thread], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), + json.loads(_ev(6, 2, "kernel.return", {"name": "NtWaitForSingleObjectEx", "return_value": 0, "status": "0x00000000", "side_effects": []})), + ] + ours = [ + json.loads(_ev(1, 0, "kernel.call", {"name": "NtWaitForSingleObjectEx", "args": {}, "args_resolved": {}}, "ours")), + # Missing wait.begin — but the SID is per-thread, so must NOT be absorbed. + json.loads(_ev(1, 1, "kernel.return", {"name": "NtWaitForSingleObjectEx", "return_value": 0, "status": "0x00000000", "side_effects": []}, "ours")), + ] + # Floating set is non-empty (some OTHER shared-global SID), but the + # wait's SID is not in it. + r = de.diff_one_tid(canary, ours, 6, 1, cross_tid_floating_sids={"aaaaaaaaaaaaaaaa"}) + check( + r["diverged_at"] is not None, + f"per-thread wait.begin divergence still caught (got diverged_at={r['diverged_at']})", + ) + check( + r["diff_descr"] is not None and "kind" in r["diff_descr"], + f"divergence is a kind mismatch (got descr={r['diff_descr']!r})", + ) + + +def test_wait_begin_strict_match_unchanged() -> None: + """When both sides have a matching wait.begin on a shared-global SID, + NO absorption occurs — strict positional match advances both + pointers.""" + shared = de.shared_global_sid(0x828A3230, 1) + canary = [ + json.loads(_ev(6, 0, "wait.begin", {"handles_semantic_ids": [shared], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), + ] + ours = [ + json.loads(_ev(1, 0, "wait.begin", {"handles_semantic_ids": [shared], "timeout_ns": -1, "alertable": False, "wait_type": "any"}, "ours")), + ] + r = de.diff_one_tid(canary, ours, 6, 1, cross_tid_floating_sids={shared}) + check( + r["diverged_at"] is None and r["matched"] == 1, + f"strict match preserved when both sides have wait.begin (matched={r['matched']})", + ) + check( + len(r["skipped_canary_wait"]) == 0 and len(r["skipped_ours_wait"]) == 0, + f"no spurious absorption when both match (canary_wait={len(r['skipped_canary_wait'])}, ours_wait={len(r['skipped_ours_wait'])})", + ) + + +def test_collect_shared_global_sids_cross_tid_heuristic() -> None: + """Phase C+21 heuristic: any SID referenced (handle.create OR + wait.begin) by 2+ distinct tids in either engine is shared-global, + even if the SID isn't recipe-matching (covers canary's + `EmitHandleCreateSharedGlobal` raw_handle_id asymmetry). + """ + # Construct canary-like stream where a SID is referenced on 2 tids + # by wait.begin (no recipe-matching handle.create needed). + sid = "feedface12345678" # NOT recipe-matching + canary_evs = { + 6: [ + json.loads(_ev(6, 0, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), + ], + 9: [ + json.loads(_ev(9, 0, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), + ], + } + ours_evs = {1: []} + sids = de.collect_shared_global_sids(canary_evs, ours_evs) + check( + sid in sids, + f"cross-tid heuristic detects shared-global SID {sid} from multi-tid wait.begin", + ) + + +def test_engine_local_contention_observed_skipped_both_sides() -> None: + """Phase D Stage 4: `contention.observed` is engine-local — diff + tool advances past it on EITHER side without alignment, so the + matched-prefix is unaffected by presence/absence/divergence in + that kind.""" + # Canary has contention.observed at idx 5 with one cs_ptr; ours has + # one at idx 5 with a different cs_ptr. Both should be skipped; the + # rest of the stream matches. + canary = [ + {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, + {"kind": "kernel.call", "tid": 6, "tid_event_idx": 1, "payload": {"name": "Foo"}}, + {"kind": "contention.observed", "tid": 6, "tid_event_idx": 2, + "payload": {"cs_ptr": "0xbc65c890", "site_sid": "aaaa", "contended": True}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": 3, + "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, + ] + ours = [ + {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, + {"kind": "kernel.call", "tid": 6, "tid_event_idx": 1, "payload": {"name": "Foo"}}, + {"kind": "contention.observed", "tid": 6, "tid_event_idx": 2, + "payload": {"cs_ptr": "0x40544890", "site_sid": "bbbb", "contended": True}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": 3, + "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, + ] + result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) + check( + result["diverged_at"] is None, + f"contention.observed engine-local — no divergence (got diverged_at={result['diverged_at']})", + ) + check( + result["matched"] == 3, + f"matched = 3 (import.call + kernel.call + kernel.return; contention.observed skipped) got {result['matched']}", + ) + + +def test_engine_local_one_sided_contention_observed() -> None: + """When only canary emits contention.observed (cvar-ON canary vs + default ours, no Stage-3 manifest), the diff tool must still advance + past canary's event without divergence.""" + canary = [ + {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, + {"kind": "kernel.call", "tid": 6, "tid_event_idx": 1, "payload": {"name": "Foo"}}, + {"kind": "contention.observed", "tid": 6, "tid_event_idx": 2, + "payload": {"cs_ptr": "0x1", "site_sid": "x", "contended": True}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": 3, + "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, + ] + ours = [ + {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, + {"kind": "kernel.call", "tid": 6, "tid_event_idx": 1, "payload": {"name": "Foo"}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": 2, + "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, + ] + result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) + check( + result["diverged_at"] is None, + f"one-sided contention.observed — no divergence (got diverged_at={result['diverged_at']})", + ) + check( + result["matched"] == 3, + f"one-sided: matched = 3 (got {result['matched']})", + ) + + +def _enter_block(start_idx: int) -> list[dict]: + """Three events for a fast-path RtlEnterCriticalSection call.""" + return [ + {"kind": "import.call", "tid": 6, "tid_event_idx": start_idx, + "payload": {"module": "xboxkrnl.exe", "ord": 293, + "name": "RtlEnterCriticalSection"}}, + {"kind": "kernel.call", "tid": 6, "tid_event_idx": start_idx + 1, + "payload": {"name": "RtlEnterCriticalSection", "args": {}, + "args_resolved": {}}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": start_idx + 2, + "payload": {"name": "RtlEnterCriticalSection", "return_value": 0, + "status": "0x00000000", "side_effects": []}}, + ] + + +def _leave_block(start_idx: int) -> list[dict]: + """Three events for a fast-path RtlLeaveCriticalSection call.""" + return [ + {"kind": "import.call", "tid": 6, "tid_event_idx": start_idx, + "payload": {"module": "xboxkrnl.exe", "ord": 304, + "name": "RtlLeaveCriticalSection"}}, + {"kind": "kernel.call", "tid": 6, "tid_event_idx": start_idx + 1, + "payload": {"name": "RtlLeaveCriticalSection", "args": {}, + "args_resolved": {}}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": start_idx + 2, + "payload": {"name": "RtlLeaveCriticalSection", "return_value": 0, + "status": "0x00000000", "side_effects": []}}, + ] + + +def _ntclose_block(start_idx: int, sid: str) -> list[dict]: + """Four events for an NtClose call ending in handle.destroy.""" + return [ + {"kind": "import.call", "tid": 6, "tid_event_idx": start_idx, + "payload": {"module": "xboxkrnl.exe", "ord": 207, "name": "NtClose"}}, + {"kind": "kernel.call", "tid": 6, "tid_event_idx": start_idx + 1, + "payload": {"name": "NtClose", "args": {}, "args_resolved": {}}}, + {"kind": "handle.destroy", "tid": 6, "tid_event_idx": start_idx + 2, + "payload": {"handle_semantic_id": sid, + "raw_handle_id": "0x00001068", + "prior_refcount": 1}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": start_idx + 3, + "payload": {"name": "NtClose", "return_value": 0, + "status": "0x00000000", "side_effects": []}}, + ] + + +def test_nested_cs_cleanup_block_absorbed_when_convergent() -> None: + """Phase D D-extension (v1.5): canary has an extra nested + RtlEnter+RtlLeave block between the outer Enter and the outer Leave; + ours does not. Both sides converge on the same Outer Leave + NtClose + immediately after. Absorber folds canary's nested block so the + matched-prefix continues past the cap. + + Mirrors the Phase D 104,607 cap shape exactly: + canary: E ... E L L NtClose + ours: E L NtClose + where the matched-on-SID handle.destroy in NtClose pins both sides + to the same logical operation. + """ + sid = "f02c5bda6f21992e" + # Canary: Enter, nested Enter+Leave, outer Leave, NtClose. + canary = ( + _enter_block(0) # 0..2 outer Enter + + _enter_block(3) # 3..5 NESTED Enter (the divergent extra) + + _leave_block(6) # 6..8 inner Leave + + _leave_block(9) # 9..11 outer Leave + + _ntclose_block(12, sid) # 12..15 + ) + # Ours: Enter, outer Leave, NtClose. No nested block. + ours = ( + _enter_block(0) # 0..2 + + _leave_block(3) # 3..5 + + _ntclose_block(6, sid) # 6..9 + ) + result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) + check( + result["diverged_at"] is None, + f"nested cleanup absorbed — no divergence (got diverged_at={result['diverged_at']})", + ) + # Canary has 16 events; the 6 nested-block events should be absorbed + # and the remaining 10 events match ours's 10 events. + check( + result["matched"] == 10, + f"matched = 10 (E + L + NtClose 4 events, after absorbing 6 nested) got {result['matched']}", + ) + + +def test_nested_cs_cleanup_NOT_absorbed_when_followup_diverges() -> None: + """Negative test: when canary has nested E+L but the post-block + sequence does NOT converge with ours (e.g., different NtClose + handle SID or completely different next event), the absorber must + NOT fire. Reading-error #23: don't fold REAL guest divergences. + """ + sid_canary = "deadbeef00000000" + sid_ours = "1234567812345678" + canary = ( + _enter_block(0) + + _enter_block(3) # nested + + _leave_block(6) # nested + + _leave_block(9) # outer + + _ntclose_block(12, sid_canary) # closes DIFFERENT handle + ) + ours = ( + _enter_block(0) + + _leave_block(3) + + _ntclose_block(6, sid_ours) # closes DIFFERENT handle + ) + result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) + # Note: handle.destroy SID is in SKIP_PAYLOAD_FIELDS_BY_KIND, so + # different SIDs alone don't trigger a divergence. The absorber + # uses raw_handle_id (NOT skipped at the diff layer in this test + # because the absorber inspects raw payload). With the same raw_handle_id + # the absorber DOES converge — this test exercises the "matching + # follow-up" path. The negative case (truly divergent follow-up) is + # covered when there's NO NtClose after the canary nested block; see + # next test. + check( + result["diverged_at"] is None, + f"nested cleanup absorbed when follow-up converges via handle.destroy alignment (got diverged_at={result['diverged_at']})", + ) + + +def test_nested_cs_cleanup_NOT_absorbed_when_canary_has_no_followup() -> None: + """Negative: canary's nested block is followed by something + completely different from ours's next event. Absorber must not fire. + """ + canary = ( + _enter_block(0) + + _enter_block(3) # nested + + _leave_block(6) # nested + + [{"kind": "import.call", "tid": 6, "tid_event_idx": 9, + "payload": {"module": "xboxkrnl.exe", "ord": 999, + "name": "NtCreateMutant"}}] # unrelated next call + ) + ours = ( + _enter_block(0) + + _leave_block(3) + + _ntclose_block(6, "abc") + ) + result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) + # canary's outer Enter (event 0) matches ours's outer Enter. + # Then canary's nested-Enter (event 3) vs ours's outer-Leave (event 3) — + # absorber lookahead doesn't find ours's import.call RtlLeave + # pattern past the nested pair, so absorption is skipped and the + # divergence is reported. + check( + result["diverged_at"] is not None, + f"unconvergent nested block NOT absorbed — divergence expected (got diverged_at={result['diverged_at']})", + ) + + +def test_collect_shared_global_sids_single_tid_excluded() -> None: + """Negative: a SID referenced on only ONE tid is NOT classified + shared-global by the cross-tid heuristic. Prevents over-absorption + of per-thread SIDs that happen to be created and waited on by the + same tid.""" + sid = "abcdef0123456789" # NOT recipe-matching + canary_evs = { + 6: [ + json.loads(_ev(6, 0, "handle.create", {"handle_semantic_id": sid, "object_type": 1, "raw_handle_id": "0x40", "object_name": None})), + json.loads(_ev(6, 1, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), + ], + } + ours_evs = {1: []} + sids = de.collect_shared_global_sids(canary_evs, ours_evs) + check( + sid not in sids, + f"single-tid SID {sid} is NOT classified shared-global", + ) + + +# === Phase C+25 — MmGetPhysicalAddress canonicalization === +# +# `MmGetPhysicalAddress` is a VA→PA translator. Canary and ours route the +# input VA through different heap-region layouts (canary has three physical +# heaps at vA0/vC0/vE0 routed by page size; ours has a single unified +# heap_cursor at 0x40000000 masked with `& 0x1FFF_FFFF`). Both translations +# are internally self-consistent — the game passes the PA opaquely to GPU +# subsystems (`VdInitializeRingBuffer` etc.) and the GPU re-translates +# using the same engine's map. Per-(tid,name) ordinal canonicalization +# (the same mechanism used for `MmAllocatePhysicalMemoryEx`) preserves +# the opaque-pass-through semantics while still surfacing real +# divergences (e.g. translation-count mismatch, game-side PA arithmetic). +def test_mm_get_physical_address_in_allocator_set() -> None: + check( + "MmGetPhysicalAddress" in de.ALLOCATOR_RETURN_FNS, + "MmGetPhysicalAddress is registered in ALLOCATOR_RETURN_FNS", + ) + + +def test_mm_get_physical_address_canonicalization() -> None: + """Per-tid ordinal sentinels rewrite differing PAs to a shared symbol. + + Mirrors the C+25 cold-vs-cold scenario at idx 105,112: + canary returns 0x150B0000 (from vE0 region), ours returns 0x0ADCF000 + (from unified heap masked). Both engines have called the translator + in the same per-tid order, so ordinal 0 lines up to the same sentinel. + """ + events_by_tid = { + 6: [ + { + "kind": "kernel.return", + "tid": 6, + "tid_event_idx": 0, + "payload": { + "name": "MmGetPhysicalAddress", + "return_value": 0x150B0000, + "status": "0x150b0000", + }, + }, + { + "kind": "kernel.return", + "tid": 6, + "tid_event_idx": 1, + "payload": { + "name": "MmGetPhysicalAddress", + "return_value": 0x150C0000, + "status": "0x150c0000", + }, + }, + ], + } + de.canonicalize_allocator_returns(events_by_tid) + rv0 = events_by_tid[6][0]["payload"]["return_value"] + rv1 = events_by_tid[6][1]["payload"]["return_value"] + st0 = events_by_tid[6][0]["payload"]["status"] + check( + rv0 == "", + f"first call canonicalized to ordinal 0 (got {rv0!r})", + ) + check( + rv1 == "", + f"second call canonicalized to ordinal 1 (got {rv1!r})", + ) + check( + st0 == "", + f"status mirrors return_value canonicalization (got {st0!r})", + ) + + +def test_mm_get_physical_address_cross_engine_alignment() -> None: + """End-to-end: divergent raw PAs (canary 0x150B0000 vs ours 0x0ADCF000) + canonicalize to identical sentinels and produce no divergence.""" + canary_evs = { + 6: [ + { + "kind": "kernel.return", + "tid": 6, + "tid_event_idx": 0, + "payload": { + "name": "MmGetPhysicalAddress", + "return_value": 0x150B0000, + "status": "0x150b0000", + "side_effects": [], + }, + }, + ], + } + ours_evs = { + 6: [ + { + "kind": "kernel.return", + "tid": 6, + "tid_event_idx": 0, + "payload": { + "name": "MmGetPhysicalAddress", + "return_value": 0x0ADCF000, + "status": "0x0adcf000", + "side_effects": [], + }, + }, + ], + } + de.canonicalize_allocator_returns(canary_evs) + de.canonicalize_allocator_returns(ours_evs) + c0 = canary_evs[6][0]["payload"]["return_value"] + o0 = ours_evs[6][0]["payload"]["return_value"] + check( + c0 == o0, + f"both engines canonicalize to same sentinel (canary={c0!r} ours={o0!r})", + ) + + +def test_mm_get_physical_address_count_mismatch_still_diverges() -> None: + """If one engine calls the translator MORE times than the other on a + given tid, ordinals drift and the next call surfaces a divergence + against whatever event the other side emits at that position. + Ordinal-count mismatch IS a behavioral divergence (per the existing + `canonicalize_allocator_returns` docstring contract).""" + canary_evs = { + 6: [ + { + "kind": "kernel.return", + "tid": 6, + "tid_event_idx": 0, + "payload": { + "name": "MmGetPhysicalAddress", + "return_value": 0x10000000, + }, + }, + { + "kind": "kernel.return", + "tid": 6, + "tid_event_idx": 1, + "payload": { + "name": "MmGetPhysicalAddress", + "return_value": 0x20000000, + }, + }, + ], + } + de.canonicalize_allocator_returns(canary_evs) + rv0 = canary_evs[6][0]["payload"]["return_value"] + rv1 = canary_evs[6][1]["payload"]["return_value"] + check( + rv0 != rv1, + f"successive calls get distinct ordinals (rv0={rv0!r} rv1={rv1!r})", + ) + + +# === Phase absorber-review: --disable-absorber + --emit-absorbed-events === + + +def _floating_sharedglob_ev_setup() -> tuple[list[dict], list[dict], set[str]]: + """Construct a minimal canary/ours pair with one floating + shared-global handle.create on canary's side that the + Phase C+18 absorber would normally absorb. Returns (canary_evs, + ours_evs, floating_sids).""" + sid = de.shared_global_sid(0x828A3230, 3) + canary_evs = [ + { + "kind": "handle.create", + "tid": 6, + "tid_event_idx": 0, + "payload": { + "handle_semantic_id": sid, + "object_type": 3, + "raw_handle_id": "0x828A3230", + }, + }, + { + "kind": "kernel.return", + "tid": 6, + "tid_event_idx": 1, + "payload": {"name": "KeWaitForSingleObject", "return_value": 0}, + }, + ] + ours_evs = [ + { + "kind": "kernel.return", + "tid": 1, + "tid_event_idx": 0, + "payload": {"name": "KeWaitForSingleObject", "return_value": 0}, + }, + ] + floating = {sid} + return canary_evs, ours_evs, floating + + +def test_disable_shared_global_absorber() -> None: + """When `shared-global` is in disabled_absorbers, the C+18 absorber + does NOT fire and the floating handle.create surfaces as a real + divergence.""" + canary_evs, ours_evs, floating = _floating_sharedglob_ev_setup() + # Sanity: with absorber ON (default), no divergence. + res_on = de.diff_one_tid( + canary_evs, ours_evs, 6, 1, cross_tid_floating_sids=floating + ) + check( + res_on["diverged_at"] is None and len(res_on["skipped_canary"]) == 1, + f"absorber ON: no divergence + 1 absorbed (got div={res_on['diverged_at']}, " + f"skipped={len(res_on['skipped_canary'])})", + ) + # With absorber DISABLED, the kind mismatch surfaces immediately. + res_off = de.diff_one_tid( + canary_evs, ours_evs, 6, 1, + cross_tid_floating_sids=floating, + disabled_absorbers=frozenset({"shared-global"}), + ) + check( + res_off["diverged_at"] == 0, + f"absorber OFF: divergence at 0 (got {res_off['diverged_at']})", + ) + check( + len(res_off["skipped_canary"]) == 0, + f"absorber OFF: no absorption (got {len(res_off['skipped_canary'])})", + ) + + +def test_absorbed_sink_records_event() -> None: + """When `absorbed_sink` is supplied, the absorbed event is appended + with absorber/side/canary_tid/ours_tid/matched_at/event metadata.""" + canary_evs, ours_evs, floating = _floating_sharedglob_ev_setup() + sink: list[dict] = [] + res = de.diff_one_tid( + canary_evs, ours_evs, 6, 1, + cross_tid_floating_sids=floating, + absorbed_sink=sink, + ) + check(res["diverged_at"] is None, "sink does not interfere with diff") + check(len(sink) == 1, f"sink has 1 record (got {len(sink)})") + rec = sink[0] + check(rec["absorber"] == "shared-global", f"absorber=shared-global (got {rec['absorber']!r})") + check(rec["side"] == "canary", f"side=canary (got {rec['side']!r})") + check(rec["canary_tid"] == 6, f"canary_tid=6 (got {rec['canary_tid']})") + check(rec["ours_tid"] == 1, f"ours_tid=1 (got {rec['ours_tid']})") + check(rec["matched_at"] == 0, f"matched_at=0 (got {rec['matched_at']})") + check(rec["event"] is canary_evs[0], "event is verbatim absorbed event") + + +def test_disable_wait_begin_absorber_and_records() -> None: + """Disabling wait-begin makes a floating wait.begin surface; with + absorber ON the event is recorded in the sink.""" + sid = de.shared_global_sid(0xBC65C890, 3) + canary_evs = [ + { + "kind": "wait.begin", + "tid": 6, + "tid_event_idx": 0, + "payload": { + "handles_semantic_ids": [sid], + "wait_type": "any", + "alertable": False, + "wait_mode": "kernel", + "timeout_ns": None, + }, + }, + { + "kind": "kernel.return", + "tid": 6, + "tid_event_idx": 1, + "payload": {"name": "KeWaitForSingleObject", "return_value": 0}, + }, + ] + ours_evs = [ + { + "kind": "kernel.return", + "tid": 1, + "tid_event_idx": 0, + "payload": {"name": "KeWaitForSingleObject", "return_value": 0}, + }, + ] + floating = {sid} + sink: list[dict] = [] + res_on = de.diff_one_tid( + canary_evs, ours_evs, 6, 1, + cross_tid_floating_sids=floating, + absorbed_sink=sink, + ) + check(res_on["diverged_at"] is None, "wait-begin absorber ON: no divergence") + check(len(sink) == 1 and sink[0]["absorber"] == "wait-begin", "sink records wait-begin") + res_off = de.diff_one_tid( + canary_evs, ours_evs, 6, 1, + cross_tid_floating_sids=floating, + disabled_absorbers=frozenset({"wait-begin"}), + ) + check( + res_off["diverged_at"] == 0, + f"wait-begin absorber OFF: divergence at 0 (got {res_off['diverged_at']})", + ) + + +def test_disable_nested_cs_absorber() -> None: + """Disabling nested-cs makes the E-vs-L mismatch surface immediately.""" + def enter_block(idx: int) -> list[dict]: + return [ + {"kind": "import.call", "tid": 6, "tid_event_idx": idx, + "payload": {"name": "RtlEnterCriticalSection"}}, + {"kind": "kernel.call", "tid": 6, "tid_event_idx": idx + 1, + "payload": {"name": "RtlEnterCriticalSection"}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": idx + 2, + "payload": {"name": "RtlEnterCriticalSection", "return_value": 0}}, + ] + def leave_block(idx: int) -> list[dict]: + return [ + {"kind": "import.call", "tid": 6, "tid_event_idx": idx, + "payload": {"name": "RtlLeaveCriticalSection"}}, + {"kind": "kernel.call", "tid": 6, "tid_event_idx": idx + 1, + "payload": {"name": "RtlLeaveCriticalSection"}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": idx + 2, + "payload": {"name": "RtlLeaveCriticalSection", "return_value": 0}}, + ] + # Canary: 1 nested E+L pair (6 events) then a Leave block that aligns + # with ours's current Leave. + canary_evs = ( + enter_block(0) + leave_block(3) + + leave_block(6) + ) + # Ours: just a single Leave block. + ours_evs = leave_block(0) + sink: list[dict] = [] + res_on = de.diff_one_tid( + canary_evs, ours_evs, 6, 1, + absorbed_sink=sink, + ) + check(res_on["diverged_at"] is None, "nested-cs absorber ON: no divergence") + # Sink should contain 6 records (one per absorbed event in the pair). + nc = [r for r in sink if r["absorber"] == "nested-cs"] + check(len(nc) == 6, f"sink has 6 nested-cs records (got {len(nc)})") + check( + nc[0]["pairs_consumed"] == 1, + f"pairs_consumed=1 (got {nc[0]['pairs_consumed']})", + ) + res_off = de.diff_one_tid( + canary_evs, ours_evs, 6, 1, + disabled_absorbers=frozenset({"nested-cs"}), + ) + check( + res_off["diverged_at"] == 0, + f"nested-cs absorber OFF: divergence at 0 (got {res_off['diverged_at']})", + ) + + +# === v1.6 (AUDIT-070 bridge): sema.release engine-local kind === + +def test_sema_release_engine_local_skipped_both_sides() -> None: + """`sema.release` is engine-local — diff tool advances past it on + EITHER side without alignment. Cadence divergence does NOT show + up as positional divergence.""" + canary = [ + {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, + {"kind": "sema.release", "tid": 6, "tid_event_idx": 1, + "payload": {"handle_semantic_id": "abc", "release_count": 1, + "previous_count": 0, "caller_pc": "0x82450ce0"}}, + {"kind": "sema.release", "tid": 6, "tid_event_idx": 2, + "payload": {"handle_semantic_id": "abc", "release_count": 1, + "previous_count": 1, "caller_pc": "0x82450ce0"}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": 3, + "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, + ] + ours = [ + {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, + {"kind": "sema.release", "tid": 6, "tid_event_idx": 1, + "payload": {"handle_semantic_id": "abc", "release_count": 1, + "previous_count": 0, "caller_pc": "0x82450ce0"}}, + {"kind": "kernel.return", "tid": 6, "tid_event_idx": 2, + "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, + ] + result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) + check( + result["diverged_at"] is None, + f"sema.release engine-local — no divergence (got diverged_at={result['diverged_at']})", + ) + check( + result["matched"] == 2, + f"matched = 2 (import.call + kernel.return; 1 canary + 1 ours sema.release skipped) got {result['matched']}", + ) + + +def test_sema_release_counted() -> None: + """`sema.release` is in COUNTED_ENGINE_LOCAL_KINDS — the + count_engine_local_kinds helper tallies per-tid totals.""" + evs_by_tid = { + 6: [ + {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {}}, + {"kind": "sema.release", "tid": 6, "tid_event_idx": 1, "payload": {}}, + {"kind": "sema.release", "tid": 6, "tid_event_idx": 2, "payload": {}}, + ], + 10: [ + {"kind": "sema.release", "tid": 10, "tid_event_idx": 0, "payload": {}}, + {"kind": "sema.release", "tid": 10, "tid_event_idx": 1, "payload": {}}, + {"kind": "sema.release", "tid": 10, "tid_event_idx": 2, "payload": {}}, + ], + } + counts = de.count_engine_local_kinds(evs_by_tid) + check("sema.release" in counts, "sema.release present in counter output") + check( + counts["sema.release"].get(6) == 2, + f"tid=6 has 2 sema.release (got {counts['sema.release'].get(6)})", + ) + check( + counts["sema.release"].get(10) == 3, + f"tid=10 has 3 sema.release (got {counts['sema.release'].get(10)})", + ) + + +def test_sema_release_in_engine_local_kinds() -> None: + """Regression: `sema.release` must be in ENGINE_LOCAL_KINDS so it + does not participate in matched-prefix alignment.""" + check( + "sema.release" in de.ENGINE_LOCAL_KINDS, + "sema.release listed in ENGINE_LOCAL_KINDS", + ) + check( + "sema.release" in de.COUNTED_ENGINE_LOCAL_KINDS, + "sema.release listed in COUNTED_ENGINE_LOCAL_KINDS", + ) + + +# === Phase C+22 (v1.7): host-heap payload-field canonicalization === +# +# `thread.create.ctx_ptr` is a host-heap-derived guest VA: canary's +# ExCreateThread routes the TLS/context block through the BC physical +# heap (`0xBC...`), while ours uses the unified user heap (`0x4...`). +# Both engines are SELF-CONSISTENT — they pass the VA opaquely to the +# new guest thread in r3, and the new thread reads back through the +# same engine's translation map. Comparing raw VAs always diverges. +# Per-(tid, kind, field) ordinal canonicalization (same mechanism as +# `MmAllocatePhysicalMemoryEx` in C+2/C+25, restricted to the *typed* +# payload field instead of `kernel.return.return_value`) preserves the +# opaque-pass-through semantics while still surfacing real divergences +# (count mismatch, game-visible attributes like priority/affinity/etc.). + + +def test_thread_create_ctx_ptr_in_host_heap_set() -> None: + check( + "thread.create" in de.HOST_HEAP_PAYLOAD_FIELDS_BY_KIND, + "thread.create is registered in HOST_HEAP_PAYLOAD_FIELDS_BY_KIND", + ) + check( + "ctx_ptr" in de.HOST_HEAP_PAYLOAD_FIELDS_BY_KIND["thread.create"], + "ctx_ptr is canonicalized for thread.create", + ) + + +def test_host_heap_field_canonicalization_ordinals() -> None: + """Per-tid (kind, field) ordinal sentinels rewrite differing host-heap + VAs to a shared symbol. + + Mirrors the C+22 cold-vs-cold scenario at idx 105,128: + canary returns ctx_ptr=0xbe56bb3c (BC physical heap), ours returns + 0x42453b3c (unified user heap). Both engines have called ExCreateThread + in the same per-tid order, so ordinal 0 lines up to the same sentinel. + """ + events_by_tid = { + 1: [ + { + "kind": "thread.create", + "tid": 1, + "tid_event_idx": 0, + "payload": { + "handle_semantic_id": "aaaa", + "parent_tid": 1, + "entry_pc": "0x824cd458", + "ctx_ptr": "0x42453b3c", + "priority": 0, + "affinity": 4, + "stack_size": 32768, + "suspended": False, + }, + }, + { + "kind": "thread.create", + "tid": 1, + "tid_event_idx": 1, + "payload": { + "handle_semantic_id": "bbbb", + "parent_tid": 1, + "entry_pc": "0x82181830", + "ctx_ptr": "0x42453c40", + "priority": 0, + "affinity": 0, + "stack_size": 131072, + "suspended": False, + }, + }, + ], + } + de.canonicalize_host_heap_payload_fields(events_by_tid) + p0 = events_by_tid[1][0]["payload"]["ctx_ptr"] + p1 = events_by_tid[1][1]["payload"]["ctx_ptr"] + check( + p0 == "", + f"first thread.create canonicalized to ordinal 0 (got {p0!r})", + ) + check( + p1 == "", + f"second thread.create canonicalized to ordinal 1 (got {p1!r})", + ) + # Strict fields must be UNTOUCHED. + check( + events_by_tid[1][0]["payload"]["entry_pc"] == "0x824cd458", + "entry_pc strict (untouched)", + ) + check( + events_by_tid[1][0]["payload"]["stack_size"] == 32768, + "stack_size strict (untouched)", + ) + check( + events_by_tid[1][1]["payload"]["affinity"] == 0, + "affinity strict (untouched)", + ) + + +def test_host_heap_field_cross_engine_alignment() -> None: + """End-to-end: divergent raw VAs (canary 0xbe56bb3c vs ours 0x42453b3c) + canonicalize to identical sentinels and produce no divergence at the + diff-event layer.""" + canary_evs = { + 6: [ + { + "kind": "thread.create", + "tid": 6, + "tid_event_idx": 0, + "payload": { + "handle_semantic_id": "17d8b2ba9dd4ba13", + "parent_tid": 6, + "entry_pc": "0x824cd458", + "ctx_ptr": "0xbe56bb3c", + "priority": 0, + "affinity": 4, + "stack_size": 32768, + "suspended": False, + }, + }, + ], + } + ours_evs = { + 1: [ + { + "kind": "thread.create", + "tid": 1, + "tid_event_idx": 0, + "payload": { + "handle_semantic_id": "3562d07db6ff161d", + "parent_tid": 1, + "entry_pc": "0x824cd458", + "ctx_ptr": "0x42453b3c", + "priority": 0, + "affinity": 4, + "stack_size": 32768, + "suspended": False, + }, + }, + ], + } + de.canonicalize_host_heap_payload_fields(canary_evs) + de.canonicalize_host_heap_payload_fields(ours_evs) + c0 = canary_evs[6][0]["payload"]["ctx_ptr"] + o0 = ours_evs[1][0]["payload"]["ctx_ptr"] + check( + c0 == o0, + f"both engines canonicalize ctx_ptr to same sentinel " + f"(canary={c0!r} ours={o0!r})", + ) + # Run compare_event to confirm no divergence: parent_tid is in + # SKIP_PAYLOAD_FIELDS_BY_KIND[thread.create]; handle_semantic_id too. + # ctx_ptr is now canonicalized. Everything else (entry_pc, priority, + # affinity, stack_size, suspended) matches strictly. + diff = de.compare_event(canary_evs[6][0], ours_evs[1][0]) + check( + diff is None, + f"compare_event reports no divergence (got {diff!r})", + ) + + +def test_host_heap_field_real_divergence_still_caught() -> None: + """Real divergences MUST still be caught (the canonicalization must + NOT over-suppress strict fields). Mutate `entry_pc` / `priority` / + `affinity` / `stack_size` / `suspended` each in turn and confirm + each surfaces a divergence.""" + def mk(payload: dict) -> dict: + return { + "kind": "thread.create", + "tid": 6, + "tid_event_idx": 0, + "payload": payload, + } + base_canary = { + "handle_semantic_id": "aa", + "parent_tid": 6, + "entry_pc": "0x824cd458", + "ctx_ptr": "0xbe56bb3c", + "priority": 0, + "affinity": 4, + "stack_size": 32768, + "suspended": False, + } + # Vary each strict field one-at-a-time on ours. + for field, bad in [ + ("entry_pc", "0xdeadbeef"), + ("priority", 16), + ("affinity", 0), + ("stack_size", 65536), + ("suspended", True), + ]: + ours_payload = dict(base_canary) + ours_payload["parent_tid"] = 1 # engine-local — already skipped + ours_payload["handle_semantic_id"] = "bb" # already skipped + ours_payload["ctx_ptr"] = "0x42453b3c" # canonicalized + ours_payload[field] = bad + c_evs = {6: [mk(dict(base_canary))]} + o_evs = {1: [mk(ours_payload)]} + de.canonicalize_host_heap_payload_fields(c_evs) + de.canonicalize_host_heap_payload_fields(o_evs) + diff = de.compare_event(c_evs[6][0], o_evs[1][0]) + check( + diff is not None and field in diff, + f"strict field {field!r} divergence still caught " + f"(got {diff!r})", + ) + + +def test_host_heap_field_count_mismatch_still_diverges() -> None: + """If one engine emits MORE thread.create events than the other on a + given tid, ordinals drift and the next event surfaces a divergence. + Ordinal-count mismatch IS a behavioral divergence (per the + `canonicalize_allocator_returns` contract, mirrored here).""" + canary_evs = { + 6: [ + { + "kind": "thread.create", + "tid": 6, + "tid_event_idx": 0, + "payload": { + "handle_semantic_id": "a", + "ctx_ptr": "0xbe56bb3c", + "entry_pc": "0x824cd458", + "parent_tid": 6, + "priority": 0, + "affinity": 4, + "stack_size": 32768, + "suspended": False, + }, + }, + { + "kind": "thread.create", + "tid": 6, + "tid_event_idx": 1, + "payload": { + "handle_semantic_id": "b", + "ctx_ptr": "0xbe56bc20", + "entry_pc": "0x82181830", + "parent_tid": 6, + "priority": 0, + "affinity": 0, + "stack_size": 32768, + "suspended": False, + }, + }, + ], + } + de.canonicalize_host_heap_payload_fields(canary_evs) + p0 = canary_evs[6][0]["payload"]["ctx_ptr"] + p1 = canary_evs[6][1]["payload"]["ctx_ptr"] + check( + p0 != p1, + f"successive thread.creates get distinct ctx_ptr ordinals " + f"(p0={p0!r} p1={p1!r})", + ) + + +def test_host_heap_field_non_string_value_left_alone() -> None: + """Defensive: if `ctx_ptr` is for any reason not a string (None, + int, missing), the canonicalizer leaves it untouched. Pre-C+22 + event logs whose emitter omits the field still parse without + crashing.""" + events_by_tid = { + 6: [ + { + "kind": "thread.create", + "tid": 6, + "tid_event_idx": 0, + "payload": { + "ctx_ptr": None, + "entry_pc": "0x824cd458", + }, + }, + { + "kind": "thread.create", + "tid": 6, + "tid_event_idx": 1, + "payload": { + # No ctx_ptr at all. + "entry_pc": "0x82181830", + }, + }, + { + "kind": "thread.create", + "tid": 6, + "tid_event_idx": 2, + "payload": { + "ctx_ptr": "0xbe56bb3c", # First STRING value. + "entry_pc": "0x82000000", + }, + }, + ], + } + de.canonicalize_host_heap_payload_fields(events_by_tid) + check( + events_by_tid[6][0]["payload"]["ctx_ptr"] is None, + "None ctx_ptr left as None", + ) + check( + "ctx_ptr" not in events_by_tid[6][1]["payload"], + "missing ctx_ptr stays missing", + ) + # Ordinal counter advances only on actual canonicalization, so the + # first string-typed ctx_ptr gets ordinal 0. + rewritten = events_by_tid[6][2]["payload"]["ctx_ptr"] + check( + rewritten == "", + f"first string ctx_ptr gets ordinal 0 (got {rewritten!r})", + ) + + +def test_parent_tid_already_skipped() -> None: + """Phase C+22 v1.7 audit: `parent_tid` is engine-local (canary's + guest tid != ours's guest tid for the same chain), so it MUST be in + the skip set. This test pins the existing behavior (already in + place since C+15-α) so we don't accidentally regress.""" + skip = de.SKIP_PAYLOAD_FIELDS_BY_KIND.get("thread.create", set()) + check( + "parent_tid" in skip, + f"thread.create.parent_tid is in SKIP_PAYLOAD_FIELDS_BY_KIND " + f"(got {sorted(skip)!r})", + ) + # End-to-end: compare two thread.create events that differ ONLY in + # parent_tid. Must report no divergence (parent_tid skipped). + ev_c = { + "kind": "thread.create", + "tid": 6, + "tid_event_idx": 0, + "payload": { + "handle_semantic_id": "ignored", + "parent_tid": 6, + "entry_pc": "0x824cd458", + "ctx_ptr": "", + "priority": 0, + "affinity": 4, + "stack_size": 32768, + "suspended": False, + }, + } + ev_o = { + "kind": "thread.create", + "tid": 1, + "tid_event_idx": 0, + "payload": { + "handle_semantic_id": "ignored2", + "parent_tid": 1, # different + "entry_pc": "0x824cd458", + "ctx_ptr": "", + "priority": 0, + "affinity": 4, + "stack_size": 32768, + "suspended": False, + }, + } + diff = de.compare_event(ev_c, ev_o) + check( + diff is None, + f"parent_tid difference does NOT surface as divergence (got {diff!r})", + ) + + +# === Iterate 2.L (2026-05-28) — return_value / args_resolved diff tagging === + +def _kr(name: str, rv, status: str) -> dict: + """Helper: minimal kernel.return event.""" + return { + "kind": "kernel.return", + "tid": 1, + "tid_event_idx": 100, + "payload": { + "name": name, + "return_value": rv, + "status": status, + "side_effects": [], + }, + } + + +def test_kernel_return_value_mismatch_categorized() -> None: + """Iterate 2.L: a kernel.return whose `return_value` differs must + surface a category-tagged `[return_value mismatch]` diff string + including the function name and both values. Closes the + reading-error #41 class (cache-probe SUCCESS vs NO_SUCH_FILE was + surfacing as generic `payload.return_value: ...`).""" + ev_c = _kr("NtQueryFullAttributesFile", 18446744072635809807, "0xc000000f") + ev_o = _kr("NtQueryFullAttributesFile", 0, "0x00000000") + diff = de.compare_event(ev_c, ev_o) + check( + diff is not None and "[return_value mismatch]" in diff, + f"return_value diff has '[return_value mismatch]' tag (got {diff!r})", + ) + check( + diff is not None and "name=NtQueryFullAttributesFile" in diff, + f"return_value diff includes function name (got {diff!r})", + ) + check( + diff is not None + and "canary=18446744072635809807" in diff + and "ours=0" in diff, + f"return_value diff includes both raw values (got {diff!r})", + ) + + +def test_kernel_return_status_mismatch_categorized() -> None: + """Iterate 2.L: if return_value happens to match (e.g. both are 0 + but the status field disagrees — schema-impossible but defensive), + the status mismatch surfaces tagged. Also covers the case where + return_value matches and only status diverges.""" + ev_c = _kr("NtFoo", 0, "0xc000000f") + ev_o = _kr("NtFoo", 0, "0x00000000") + diff = de.compare_event(ev_c, ev_o) + check( + diff is not None and "[status mismatch]" in diff, + f"status diff has '[status mismatch]' tag (got {diff!r})", + ) + + +def test_kernel_return_value_match_no_diff() -> None: + """Iterate 2.L: matching kernel.return events produce no diff.""" + ev_c = _kr("NtFoo", 0, "0x00000000") + ev_o = _kr("NtFoo", 0, "0x00000000") + diff = de.compare_event(ev_c, ev_o) + check(diff is None, f"matching kernel.return → no diff (got {diff!r})") + + +def test_kernel_return_value_missing_one_side_falls_back() -> None: + """Iterate 2.L: when return_value is absent on one side, the priority + pass MUST skip (schema-gap-safe) and let the generic walk handle + the missing-key case. Preserves pre-2.L behavior on partial + payloads.""" + ev_c = {"kind": "kernel.return", "tid": 1, "tid_event_idx": 0, + "payload": {"name": "NtFoo", "status": "0x00000000", + "side_effects": []}} + ev_o = {"kind": "kernel.return", "tid": 1, "tid_event_idx": 0, + "payload": {"name": "NtFoo", "return_value": 0, + "status": "0x00000000", "side_effects": []}} + diff = de.compare_event(ev_c, ev_o) + # Generic walk will see return_value present on ours but missing on + # canary; payload.return_value branch fires with canary=. + check( + diff is not None and "[return_value mismatch]" not in diff, + f"missing-side fell through to generic walk (got {diff!r})", + ) + + +def test_kernel_call_args_resolved_path_mismatch_categorized() -> None: + """Iterate 2.L: a kernel.call with diverging `args_resolved.path` + surfaces a `[args_resolved.path mismatch]` tag with the function + name. Future cache-probe path drift will be diff-visible at a + glance instead of being hidden in a generic payload blob.""" + ev_c = { + "kind": "kernel.call", "tid": 1, "tid_event_idx": 0, + "payload": { + "name": "NtQueryFullAttributesFile", "args": {}, + "args_resolved": {"path": "cache:\\d4ea4615\\e\\46ee8ca"}, + }, + } + ev_o = { + "kind": "kernel.call", "tid": 1, "tid_event_idx": 0, + "payload": { + "name": "NtQueryFullAttributesFile", "args": {}, + "args_resolved": {"path": "cache:\\different\\path"}, + }, + } + diff = de.compare_event(ev_c, ev_o) + check( + diff is not None and "[args_resolved.path mismatch]" in diff, + f"args_resolved.path diff tagged (got {diff!r})", + ) + check( + diff is not None and "name=NtQueryFullAttributesFile" in diff, + f"args_resolved diff includes function name (got {diff!r})", + ) + + +def test_kernel_call_matching_args_resolved_no_diff() -> None: + """Iterate 2.L: matching kernel.call events produce no diff.""" + ev_c = { + "kind": "kernel.call", "tid": 1, "tid_event_idx": 0, + "payload": {"name": "NtFoo", "args": {"x": 1}, + "args_resolved": {"path": "abc"}}, + } + ev_o = { + "kind": "kernel.call", "tid": 1, "tid_event_idx": 0, + "payload": {"name": "NtFoo", "args": {"x": 1}, + "args_resolved": {"path": "abc"}}, + } + diff = de.compare_event(ev_c, ev_o) + check(diff is None, f"matching kernel.call → no diff (got {diff!r})") + + +def main() -> int: + test_fnv1a_vector() + test_shared_global_sid_determinism() + test_shared_global_sid_matches_rust() + test_is_shared_global_handle_create() + test_floating_handle_create_cross_tid() + test_strict_alignment_without_floating() + test_non_floating_real_divergence_still_caught() + test_floating_sids_collection_via_main() + # Phase C+21 + test_is_shared_global_wait_begin_positive() + test_is_shared_global_wait_begin_negative() + test_is_shared_global_wait_begin_mixed_handles_wait_all() + test_floating_wait_begin_cross_tid_canary_only() + test_floating_wait_begin_cross_tid_ours_only() + test_per_thread_wait_begin_NOT_absorbed() + test_wait_begin_strict_match_unchanged() + test_collect_shared_global_sids_cross_tid_heuristic() + test_collect_shared_global_sids_single_tid_excluded() + test_engine_local_contention_observed_skipped_both_sides() + test_engine_local_one_sided_contention_observed() + test_nested_cs_cleanup_block_absorbed_when_convergent() + test_nested_cs_cleanup_NOT_absorbed_when_followup_diverges() + test_nested_cs_cleanup_NOT_absorbed_when_canary_has_no_followup() + # Phase C+25 + test_mm_get_physical_address_in_allocator_set() + test_mm_get_physical_address_canonicalization() + test_mm_get_physical_address_cross_engine_alignment() + test_mm_get_physical_address_count_mismatch_still_diverges() + # Phase absorber-review + test_disable_shared_global_absorber() + test_absorbed_sink_records_event() + test_disable_wait_begin_absorber_and_records() + test_disable_nested_cs_absorber() + # v1.6 (AUDIT-070 bridge) + test_sema_release_engine_local_skipped_both_sides() + test_sema_release_counted() + test_sema_release_in_engine_local_kinds() + # Phase C+22 v1.7 host-heap payload-field canonicalization + test_thread_create_ctx_ptr_in_host_heap_set() + test_host_heap_field_canonicalization_ordinals() + test_host_heap_field_cross_engine_alignment() + test_host_heap_field_real_divergence_still_caught() + test_host_heap_field_count_mismatch_still_diverges() + test_host_heap_field_non_string_value_left_alone() + test_parent_tid_already_skipped() + # Iterate 2.L (return_value / args_resolved diff tagging) + test_kernel_return_value_mismatch_categorized() + test_kernel_return_status_mismatch_categorized() + test_kernel_return_value_match_no_diff() + test_kernel_return_value_missing_one_side_falls_back() + test_kernel_call_args_resolved_path_mismatch_categorized() + test_kernel_call_matching_args_resolved_no_diff() + print() + if FAILURES: + print(f"FAIL: {len(FAILURES)} test(s) failed: {FAILURES}") + return 1 + print("PASS: all diff_events.py tests passed") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/diff-state/README.md b/tools/diff-state/README.md new file mode 100644 index 0000000..2f6b952 --- /dev/null +++ b/tools/diff-state/README.md @@ -0,0 +1,75 @@ +# diff-state + +Phase B initial-state snapshot diff tool. Stdlib-only Python. Mirrors the +shape of `tools/diff-events/` but operates on the *static structural* +snapshots emitted by `phase_b_snapshot` at the moment immediately before +the first guest PPC instruction of the XEX entry_point executes. + +## Usage + +```bash +python3 tools/diff-state/diff_state.py \ + --canary /canary \ + --ours /ours \ + --out /report.md +``` + +Writes: + +- `/report.md` — human-readable divergence catalog +- `/report.json` — machine-readable sibling (same content) + +## Exit codes + +| code | meaning | +|---|---| +| 0 | no divergence (or `--validate-identical` succeeded) | +| 1 | divergences found | +| 2 | STOP triggered (`image_loaded_sha256` / `xex_entry_point` / `iso_sha256` mismatch) | + +## Field-comparison rules + +Lives at the top of `diff_state.py` as Python constants — read those for +the authoritative spec. Summary: + +- `engine`, `schema_version`, `deterministic_skip` are always skipped. +- `cpu_state.json`: skip `hw_id`. +- `kernel.json`: skip `raw_handle_id`, `exports_registered_count`. +- `config.json`: skip `build_id`, `iso_path`, `host_ns_at_snapshot`, + `wall_clock_iso8601`, `cli_argv`, `cvars.phase_b_snapshot_dir`. +- Each snapshot's `deterministic_skip` array is honored too. + +## Set vs sequence semantics + +- **Set** (sort by key, then positional compare): + - `kernel.json::objects` (key=`handle_semantic_id`) + - `kernel.json::handle_name_table` (key=`name`) + - `vfs.json::cache_root_listing` (key=`relpath`) + - `memory.json::heaps` (key=`base`) +- **Sequence** (positional compare): everything else, including + `memory.json::regions` (which both engines emit pre-sorted by + `(start, end)`). + +## Classification + +| class | trigger | priority | +|---|---|---| +| σ-structural | field missing/extra; sequence-length mismatch; set element only in one engine | 1 (always report) | +| δ-content-STOP | `image_loaded_sha256` / `xex_entry_point` / `iso_sha256` mismatch | STOP (exit 2) | +| δ-content | other `*_sha256` field differs | 2 | +| γ-kernel-content | `objects[].details` field differs | 2 — primary Phase C target | +| κ-cache | non-empty `cache_root_listing` either side | re-run after `rm -rf` of caches | +| ε-host-allocator | heap base/region start differs but sha256 agrees | catalog only | +| τ-host-timing | `deterministic_skip`-listed timing field | silent unless verbose | + +## Negative-test recipe + +To verify the tool catches a hand-mutation: + +```bash +cp -r snap-001/ours snap-001/ours-mut +sed -i 's/"thread_id": 1/"thread_id": 999/' snap-001/ours-mut/kernel.json +python3 tools/diff-state/diff_state.py \ + --canary snap-001/ours --ours snap-001/ours-mut --out /tmp/r.md +# exit code 1; report names objects[handle_semantic_id=...] details.thread_id +``` diff --git a/tools/diff-state/diff_state.py b/tools/diff-state/diff_state.py new file mode 100644 index 0000000..a734a49 --- /dev/null +++ b/tools/diff-state/diff_state.py @@ -0,0 +1,545 @@ +#!/usr/bin/env python3 +"""Phase B state-snapshot diff tool. + +Reads two snapshot directories (one per engine, `/canary/` and +`/ours/`) emitted by `phase_b_snapshot` at the moment immediately +before the first guest PPC instruction of the XEX entry_point. Produces +a markdown report (`report.md`) plus a machine-readable JSON sibling +(`report.json`) classifying every observable divergence. + +Field-comparison rules + classification table: + audit-runs/phase-b-state-equivalence/README.md + Both engines' emitter source + this tool read the same rules. + +Usage: + diff_state.py --canary /canary --ours /ours [--out report.md] + diff_state.py --canary --ours --validate-identical + +Exit codes: + 0 — no divergence (or `--validate-identical` succeeded) + 1 — divergences found + 2 — STOP triggered (image_loaded_sha256 / xex_entry_point / iso_sha256 + mismatch — interpretation of downstream files is not valid) +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import sys +from pathlib import Path +from typing import Any + +SCHEMA_VERSION = 1 + +# ---------- field-comparison rules (declared up front) ---------- + +# Per-snapshot-file fields the diff tool always skips at the top level. +SKIP_TOP_FIELDS = {"schema_version", "engine", "deterministic_skip"} + +# Per-file: extra fields skipped. JSON-pointer-style ("a.b.c") matched +# either at top-level keys or within array-of-objects members keyed by +# `handle_semantic_id` etc. +SKIP_BY_FILE: dict[str, set[str]] = { + "cpu_state.json": {"hw_id"}, + "memory.json": set(), + "kernel.json": {"raw_handle_id", "exports_registered_count"}, + "vfs.json": set(), + "config.json": { + "build_id", + "iso_path", + "host_ns_at_snapshot", + "wall_clock_iso8601", + "cli_argv", + "cvars.phase_b_snapshot_dir", + }, +} + +# `objects` etc. are sets (sort then compare); `regions`/`probes`/`gpr`/ +# etc. are sequences (positional compare). Mismatches handled separately. +SET_FIELDS: dict[str, dict[str, str]] = { + # file -> field_name -> sort-key (used as dict key) + "kernel.json": { + "objects": "handle_semantic_id", + "handle_name_table": "name", + }, + "vfs.json": {"cache_root_listing": "relpath"}, + "memory.json": {"heaps": "base"}, +} + +# STOP-trigger fields (δ-content critical equivalence). +# Note: image_loaded_sha256 is reported but NOT a STOP trigger here. The +# raw hash mismatches when engines patch imports differently — see +# check_invariants() which evaluates `image_canonical_sha256` (computed +# from image.bin + xex.json) as the real semantic STOP key. +STOP_FIELDS = { + ("config.json", "xex_entry_point"), + ("config.json", "iso_sha256"), +} + + +# ---------- divergence record ---------- + +class Divergence: + __slots__ = ("file", "path", "kind", "canary", "ours", "klass") + + def __init__(self, file: str, path: str, kind: str, canary: Any, ours: Any, klass: str): + self.file = file + self.path = path + self.kind = kind + self.canary = canary + self.ours = ours + self.klass = klass + + def to_dict(self) -> dict: + return { + "file": self.file, + "path": self.path, + "kind": self.kind, + "canary": self.canary, + "ours": self.ours, + "class": self.klass, + } + + +# ---------- classification ---------- + +def classify(file: str, path: str, kind: str, canary: Any, ours: Any) -> str: + if (file, path) in STOP_FIELDS: + return "delta-content-STOP" + if kind in ("set-size-mismatch", "missing-field", "extra-field", "seq-length"): + return "sigma-structural" + if path.endswith(".sha256") or path.endswith("_sha256"): + return "delta-content" + if path.startswith("objects[") and ".details." in path: + return "gamma-kernel-content" + if file == "vfs.json" and path.startswith("cache_root_listing"): + return "kappa-cache" + if path in ("heaps[].base", "heaps[].name"): + return "epsilon-host-allocator" + if path in ("host_ns_at_snapshot", "wall_clock_iso8601"): + return "tau-host-timing" + return "gamma-kernel-content" + + +# ---------- generic walker ---------- + +def collect_skip_set(file: str, doc: dict) -> set[str]: + s = set(SKIP_TOP_FIELDS) | set(SKIP_BY_FILE.get(file, set())) + extra = doc.get("deterministic_skip") + if isinstance(extra, list): + for x in extra: + if isinstance(x, str): + s.add(x) + return s + + +def is_skipped(file: str, path: str, skip: set[str]) -> bool: + if path in skip: + return True + # Strip array indices for membership check, so "objects[].raw_handle_id" + # in the skip set matches "objects[3].raw_handle_id". + bracketed = [] + parts = path.split(".") + for p in parts: + idx = p.find("[") + if idx >= 0: + bracketed.append(p[:idx] + "[]") + else: + bracketed.append(p) + norm = ".".join(bracketed) + if norm in skip: + return True + # Last-token (leaf field) match — e.g. "raw_handle_id" anywhere. + leaf = bracketed[-1] + if leaf in skip: + return True + return False + + +def diff_value( + file: str, + path: str, + a: Any, + b: Any, + out: list[Divergence], + skip: set[str], + set_keys: dict[str, str] | None = None, +) -> None: + if is_skipped(file, path, skip): + return + if type(a) != type(b): + out.append(Divergence(file, path, "type-mismatch", a, b, + classify(file, path, "type-mismatch", a, b))) + return + if isinstance(a, dict): + a_keys = set(a.keys()) + b_keys = set(b.keys()) + for k in sorted(a_keys - b_keys): + sub = f"{path}.{k}" if path else k + if is_skipped(file, sub, skip): + continue + out.append(Divergence(file, sub, "missing-field", a[k], None, + classify(file, sub, "missing-field", a[k], None))) + for k in sorted(b_keys - a_keys): + sub = f"{path}.{k}" if path else k + if is_skipped(file, sub, skip): + continue + out.append(Divergence(file, sub, "extra-field", None, b[k], + classify(file, sub, "extra-field", None, b[k]))) + for k in sorted(a_keys & b_keys): + sub = f"{path}.{k}" if path else k + diff_value(file, sub, a[k], b[k], out, skip, set_keys) + return + if isinstance(a, list): + # Set-field handling: sort by configured key. + last_seg = path.rsplit(".", 1)[-1] if path else "" + bare = last_seg.split("[", 1)[0] + key = (set_keys or {}).get(bare) + if key is not None: + a_sorted = sorted(a, key=lambda x: x.get(key, "") if isinstance(x, dict) else "") + b_sorted = sorted(b, key=lambda x: x.get(key, "") if isinstance(x, dict) else "") + a_keys = {x.get(key) for x in a_sorted if isinstance(x, dict)} + b_keys = {x.get(key) for x in b_sorted if isinstance(x, dict)} + missing = sorted(a_keys - b_keys, key=str) + extra = sorted(b_keys - a_keys, key=str) + for m in missing: + out.append(Divergence(file, f"{path}[{key}={m}]", + "missing-from-ours", m, None, + classify(file, f"{path}[{key}={m}]", + "missing-from-ours", m, None))) + for e in extra: + out.append(Divergence(file, f"{path}[{key}={e}]", + "extra-in-ours", None, e, + classify(file, f"{path}[{key}={e}]", + "extra-in-ours", None, e))) + common = sorted(a_keys & b_keys, key=str) + a_by = {x.get(key): x for x in a_sorted if isinstance(x, dict)} + b_by = {x.get(key): x for x in b_sorted if isinstance(x, dict)} + for ck in common: + diff_value(file, f"{path}[{key}={ck}]", a_by[ck], b_by[ck], + out, skip, set_keys) + return + # Sequence-field: positional. + if len(a) != len(b): + out.append(Divergence(file, path, "seq-length", len(a), len(b), + classify(file, path, "seq-length", len(a), len(b)))) + n = min(len(a), len(b)) + else: + n = len(a) + for i in range(n): + diff_value(file, f"{path}[{i}]", a[i], b[i], out, skip, set_keys) + return + if a != b: + out.append(Divergence(file, path, "value", a, b, + classify(file, path, "value", a, b))) + + +# ---------- file-level orchestration ---------- + +def load_json(p: Path) -> dict: + with p.open("r", encoding="utf-8") as f: + return json.load(f) + + +def diff_directory(canary_dir: Path, ours_dir: Path) -> tuple[list[Divergence], dict]: + files = ["cpu_state.json", "memory.json", "kernel.json", "vfs.json", "config.json"] + divergences: list[Divergence] = [] + manifest_canary = load_json(canary_dir / "manifest.json") if (canary_dir / "manifest.json").exists() else {} + manifest_ours = load_json(ours_dir / "manifest.json") if (ours_dir / "manifest.json").exists() else {} + file_status = {} + for name in files: + cp = canary_dir / name + op = ours_dir / name + if not cp.exists(): + divergences.append(Divergence(name, "", "missing-file", + "absent", "present", "sigma-structural")) + file_status[name] = "missing-in-canary" + continue + if not op.exists(): + divergences.append(Divergence(name, "", "missing-file", + "present", "absent", "sigma-structural")) + file_status[name] = "missing-in-ours" + continue + ch = manifest_canary.get("files", {}).get(name) + oh = manifest_ours.get("files", {}).get(name) + if ch is not None and ch == oh: + # Verify the manifest hashes against the actual file contents + # before trusting them — a tampered file with an intact manifest + # would otherwise be silently masked. + ch_actual = hashlib.sha256(cp.read_bytes()).hexdigest() + oh_actual = hashlib.sha256(op.read_bytes()).hexdigest() + if ch_actual == ch and oh_actual == oh: + file_status[name] = "identical" + continue + # Manifest claim does not match disk — fall through to full diff + # and surface the manifest mismatch as a structural divergence. + if ch_actual != ch: + divergences.append(Divergence( + name, "", "manifest-hash-mismatch", ch, ch_actual, + "sigma-structural")) + if oh_actual != oh: + divergences.append(Divergence( + name, "", "manifest-hash-mismatch", oh, oh_actual, + "sigma-structural")) + a = load_json(cp) + b = load_json(op) + skip = collect_skip_set(name, a) | collect_skip_set(name, b) + diff_value(name, "", a, b, divergences, skip, + set_keys=SET_FIELDS.get(name)) + file_status[name] = "diverged" + return divergences, file_status + + +# ---------- invariants ---------- + +def _canonicalize_image(image: bytes, xex_meta: dict, image_base: int) -> bytes: + """Mask XEX import slots to 0xCD. Import patches are legitimate + engine-specific runtime overlays (record_type=0 var slots = 4 bytes, + record_type=1 thunks = 16 bytes); they break a naive byte-equality + invariant even when both engines decoded the XEX identically.""" + ranges = [] + for lib in xex_meta.get("import_libraries", []): + for imp in lib.get("imports", []): + addr = imp["address"] + rt = imp["record_type"] + if rt == 0: + ranges.append((addr, addr + 4)) + elif rt == 1: + ranges.append((addr, addr + 16)) + buf = bytearray(image) + for sva, eva in ranges: + s = sva - image_base + e = eva - image_base + if s < 0 or e > len(buf): + continue + for i in range(s, e): + buf[i] = 0xCD + return bytes(buf) + + +def check_invariants( + canary_dir: Path, ours_dir: Path, xex_json: Path | None = None +) -> tuple[list[tuple[str, str, str, bool]], bool]: + """Returns (rows, stop) where each row is (name, canary_val, ours_val, ok). + `stop` is True iff any STOP-class invariant failed. + + When --xex-json is provided AND both snapshots contain `image.bin`, + the image-load invariant is computed over a canonicalized buffer + (XEX import slots masked). This relaxes the original raw-bytes STOP + to the only meaningful semantic check — both engines decoded the + XEX identically — and avoids tripping on legitimate runtime import + patches (canary's 0xDEADC0DE vs ours's 0x00000000 sentinels).""" + rows = [] + stop = False + try: + c_cfg = load_json(canary_dir / "config.json") + o_cfg = load_json(ours_dir / "config.json") + c_cpu = load_json(canary_dir / "cpu_state.json") + o_cpu = load_json(ours_dir / "cpu_state.json") + except FileNotFoundError as e: + return [(f"file_present:{e.filename}", "", "", False)], True + + c_entry = c_cfg.get("xex_entry_point") + o_entry = o_cfg.get("xex_entry_point") + rows.append(("xex_entry_point", str(c_entry), str(o_entry), c_entry == o_entry)) + if c_entry != o_entry: + stop = True + + c_pc = c_cpu.get("pc") + o_pc = o_cpu.get("pc") + pc_match = c_pc == c_entry and o_pc == o_entry + rows.append(( + "cpu_state.pc == xex_entry_point", + f"{c_pc} == {c_entry}", + f"{o_pc} == {o_entry}", + pc_match, + )) + if not pc_match: + stop = True + + c_img = c_cfg.get("image_loaded_sha256") + o_img = o_cfg.get("image_loaded_sha256") + # Original raw hash — informational. Mismatch is expected when the + # engines patch imports differently. Reported but does NOT STOP. + rows.append(( + "image_loaded_sha256 (raw)", + c_img or "", + o_img or "", + c_img == o_img, + )) + + # Canonical hash — the real equivalence check. Requires both engines + # to have dumped image.bin (--phase-b-dump-section-content) AND a + # caller-supplied --xex-json with the import table. When unavailable + # we fall back to the raw hash as the STOP key for backward compat. + c_img_bin = canary_dir / "image.bin" + o_img_bin = ours_dir / "image.bin" + canonical_available = ( + xex_json is not None + and c_img_bin.exists() + and o_img_bin.exists() + ) + if canonical_available: + xex_meta = json.loads(Path(xex_json).read_text()) + image_base = xex_meta.get("image_base", 0x82000000) + cbytes = c_img_bin.read_bytes() + obytes = o_img_bin.read_bytes() + c_canon = _canonicalize_image(cbytes, xex_meta, image_base) + o_canon = _canonicalize_image(obytes, xex_meta, image_base) + import hashlib as _hl + c_canon_h = _hl.sha256(c_canon).hexdigest() + o_canon_h = _hl.sha256(o_canon).hexdigest() + canon_ok = c_canon_h == o_canon_h + rows.append(( + "image_canonical_sha256", + c_canon_h, + o_canon_h, + canon_ok, + )) + if not canon_ok: + stop = True + else: + # No canonicalization possible — fall back to raw bytes as the + # STOP key. This preserves the original Phase B semantics. + if c_img != o_img: + stop = True + return rows, stop + + +# ---------- report writing ---------- + +def write_report(out_path: Path, canary_dir: Path, ours_dir: Path, + divergences: list[Divergence], file_status: dict, + invariants: list, stop: bool): + lines = [] + lines.append("# Phase B snapshot diff") + lines.append("") + lines.append(f"- canary snapshot: `{canary_dir}`") + lines.append(f"- ours snapshot: `{ours_dir}`") + lines.append("") + lines.append("## Invariants (HARD GATE)") + lines.append("") + lines.append("| invariant | canary | ours | ok? |") + lines.append("|---|---|---|---|") + for name, cval, oval, ok in invariants: + lines.append(f"| {name} | `{cval}` | `{oval}` | {'PASS' if ok else 'FAIL'} |") + lines.append("") + if stop: + lines.append("> **STOP**: a primary equivalence invariant failed. " + "Downstream divergences are not interpretable until this is " + "resolved. Re-run with `--phase-b-dump-section-content` on both " + "engines and binary-diff the regions to localize.") + lines.append("") + lines.append("## File-level summary") + lines.append("") + lines.append("| file | status | divergence count by class |") + lines.append("|---|---|---|") + by_file_class: dict[tuple[str, str], int] = {} + for d in divergences: + by_file_class[(d.file, d.klass)] = by_file_class.get((d.file, d.klass), 0) + 1 + for fname, st in file_status.items(): + counts = [] + for klass in ["sigma-structural", "delta-content-STOP", "delta-content", + "gamma-kernel-content", "kappa-cache", + "epsilon-host-allocator", "tau-host-timing"]: + c = by_file_class.get((fname, klass), 0) + if c: + counts.append(f"{klass}={c}") + lines.append(f"| {fname} | {st} | {' '.join(counts) if counts else '—'} |") + lines.append("") + # Per-class sections. + by_class: dict[str, list[Divergence]] = {} + for d in divergences: + by_class.setdefault(d.klass, []).append(d) + priority_order = [ + ("sigma-structural", "σ-structural divergences (priority 1)"), + ("delta-content-STOP", "δ-content STOP divergences"), + ("delta-content", "δ-content divergences (priority 2)"), + ("gamma-kernel-content", "γ-kernel-content divergences (priority 2)"), + ("kappa-cache", "κ-cache divergences (re-run after pre-clean)"), + ("epsilon-host-allocator", "ε-host-allocator (informational)"), + ("tau-host-timing", "τ-host-timing (informational)"), + ] + for klass, title in priority_order: + items = by_class.get(klass, []) + if not items: + continue + lines.append(f"## {title}") + lines.append("") + for d in items[:200]: # cap each section + lines.append(f"- **{d.file}** `{d.path}`: kind=`{d.kind}` " + f"canary=`{d.canary!r}` ours=`{d.ours!r}`") + if len(items) > 200: + lines.append(f"- _… {len(items) - 200} more in this class (see report.json)_") + lines.append("") + lines.append("## Phase C handoff") + lines.append("") + lines.append("Suggested attack order: σ first (structural), then γ ranked by " + "object type (Thread > Event > Semaphore > Mutex > Timer > File > " + "Other), then δ. ε and τ are catalog-only.") + out_path.write_text("\n".join(lines), encoding="utf-8") + + +def write_report_json(out_path: Path, divergences: list[Divergence], + file_status: dict, invariants: list, stop: bool): + obj = { + "schema_version": SCHEMA_VERSION, + "invariants": [ + {"name": n, "canary": c, "ours": o, "ok": ok} + for n, c, o, ok in invariants + ], + "stop": stop, + "file_status": file_status, + "divergences": [d.to_dict() for d in divergences], + } + out_path.write_text(json.dumps(obj, indent=2, sort_keys=True), encoding="utf-8") + + +# ---------- CLI ---------- + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--canary", required=True) + ap.add_argument("--ours", required=True) + ap.add_argument("--out", default=None) + ap.add_argument("--xex-json", default=None, + help="optional xex.json metadata for canonical image-load " + "invariant (requires image.bin in both snapshot dirs)") + ap.add_argument("--validate-identical", action="store_true") + ns = ap.parse_args() + canary_dir = Path(ns.canary) + ours_dir = Path(ns.ours) + if not canary_dir.is_dir() or not ours_dir.is_dir(): + print(f"both snapshot dirs must exist: {canary_dir} {ours_dir}", file=sys.stderr) + sys.exit(2) + + xex_json = Path(ns.xex_json) if ns.xex_json else None + invariants, stop = check_invariants(canary_dir, ours_dir, xex_json) + divergences, file_status = diff_directory(canary_dir, ours_dir) + + if ns.validate_identical: + if divergences or not all(ok for _, _, _, ok in invariants): + print("validate-identical: differences found", file=sys.stderr) + sys.exit(1) + print("validate-identical: OK") + sys.exit(0) + + out_md = Path(ns.out) if ns.out else (canary_dir.parent / "report.md") + out_json = out_md.with_suffix(".json") + write_report(out_md, canary_dir, ours_dir, divergences, file_status, + invariants, stop) + write_report_json(out_json, divergences, file_status, invariants, stop) + print(f"wrote {out_md} ({len(divergences)} divergences)") + print(f"wrote {out_json}") + + if stop: + sys.exit(2) + if divergences: + sys.exit(1) + sys.exit(0) + + +if __name__ == "__main__": + main()