diff --git a/src/xenia/cpu/cpu_flags.cc b/src/xenia/cpu/cpu_flags.cc index 3ff067e15..1abad3bd2 100644 --- a/src/xenia/cpu/cpu_flags.cc +++ b/src/xenia/cpu/cpu_flags.cc @@ -57,3 +57,54 @@ DEFINE_bool(break_condition_truncate, true, "truncate value to 32-bits", "CPU"); DEFINE_bool(break_on_debugbreak, true, "int3 on JITed __debugbreak requests.", "CPU"); + +// AUDIT-DEMO: smoke marker (memory entry: emulator.cc:225,283). Always-on bool. +DEFINE_bool(audit_demo_setup_trace, true, + "Audit smoke marker: log AUDIT-DEMO-SETUP-BEGIN at emulator setup.", + "Audit"); + +// AUDIT-061: comma-separated list of guest PCs to log on each fire. +// Format: "0xPC1,0xPC2,..." (max 32 PCs). Each fire emits +// AUDIT-061-BR pc=X lr=X cr0=LGE cr6=LGE r3=X r4=X r5=X r6=X r31=X tid=N. +// Default empty (off); no perf cost when empty. +DEFINE_string(audit_61_branch_probe_pcs, "", + "AUDIT-061: CSV of guest PCs to trace (cr0/cr6 + regs/tid).", + "Audit"); + +// AUDIT-067: comma-separated list of u32 values to watch. When non-empty, +// every 4-byte guest store (stw/stwu/stwx/stwux/stmw) emits a runtime +// equality check; matches log AUDIT-067-VAL pc=X lr=X val=X dst=X r3..r6 r31 tid=N. +// Max 4 values. Default empty (off); zero overhead when empty. +DEFINE_string(audit_67_value_watch, "", + "AUDIT-067: CSV of u32 values (max 4) — log every guest " + "store whose value matches.", + "Audit"); + +// Phase A — see kernel/event_log.h. +DEFINE_string(phase_a_event_log_path, "", + "Phase A: write schema-v1 JSONL event log to this path. " + "Empty (default) = disabled.", + "Audit"); +DEFINE_bool(phase_a_event_log_mem_writes, false, + "Phase A: include mem.write events in the JSONL log. RESERVED — " + "not wired in this phase. Default false.", + "Audit"); + +// Phase B — see kernel/phase_b_snapshot.h. +DEFINE_string(phase_b_snapshot_dir, "", + "Phase B: write 5-file structured state snapshot to " + "/canary/ at the moment immediately before the first " + "guest PPC instruction of entry_point. Empty (default) = " + "disabled, zero overhead.", + "Audit"); +DEFINE_bool(phase_b_snapshot_and_exit, false, + "Phase B: after writing the snapshot, exit the process " + "immediately (std::_Exit(0)) so re-runs are byte-deterministic.", + "Audit"); +DEFINE_bool(phase_b_dump_section_content, false, + "Phase B: in memory.json, populate section_contents[].content_b64 " + "with raw bytes of every committed XEX-image region. Default " + "false — per-region SHA-256 is enough for the routine diff; " + "this is the escape hatch for the STOP-and-report condition " + "(image_loaded_sha256 mismatch).", + "Audit"); diff --git a/src/xenia/cpu/cpu_flags.h b/src/xenia/cpu/cpu_flags.h index 38c4f98ba..5704a25c7 100644 --- a/src/xenia/cpu/cpu_flags.h +++ b/src/xenia/cpu/cpu_flags.h @@ -35,4 +35,32 @@ DECLARE_bool(break_condition_truncate); DECLARE_bool(break_on_debugbreak); +// AUDIT-DEMO smoke marker. +DECLARE_bool(audit_demo_setup_trace); + +// AUDIT-061: multi-PC branch probe — emits one log line per fire with +// (pc, lr, cr0 LGE, cr6 LGE, r3, r4, r5, r6, r31, tid). CSV of guest PCs. +DECLARE_string(audit_61_branch_probe_pcs); + +// AUDIT-067: value-watch — emit a log line for each 32-bit guest store whose +// value-to-be-stored matches any configured value. CSV of u32 values +// ("0xDEADBEEF,..."), max 4 entries. Default empty (off); zero cost when empty. +DECLARE_string(audit_67_value_watch); + +// Phase A: JSONL event-log emitter path. When non-empty, the engine writes +// schema-v1 JSONL events to this file. Empty (default) = no overhead, no +// behavior change. Schema: xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md +DECLARE_string(phase_a_event_log_path); +DECLARE_bool(phase_a_event_log_mem_writes); + +// Phase B: initial-state snapshot. When the dir cvar is non-empty, the +// engine writes a five-file structured state snapshot (cpu_state.json, +// memory.json, kernel.json, vfs.json, config.json, plus manifest.json) to +// `/canary/` at the moment immediately before the first guest PPC +// instruction of the XEX entry_point executes. See +// `xenia-rs/audit-runs/phase-b-state-equivalence/`. +DECLARE_string(phase_b_snapshot_dir); +DECLARE_bool(phase_b_snapshot_and_exit); +DECLARE_bool(phase_b_dump_section_content); + #endif // XENIA_CPU_CPU_FLAGS_H_ diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index cc7d90c2e..a8325a584 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -22,6 +22,7 @@ #include "xenia/cpu/processor.h" #include "xenia/emulator.h" #include "xenia/kernel/kernel_state.h" +#include "xenia/kernel/phase_b_snapshot.h" #include "xenia/kernel/user_module.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" @@ -575,6 +576,11 @@ void XThread::Execute() { // On Windows, setjmp/longjmp is used because MSVC's longjmp performs SEH // stack unwinding which already calls destructors. uint32_t next_address; + // Phase B snapshot. No-op when phase_b_snapshot_dir cvar is empty + // (default). When set, fires once on the entry-point thread immediately + // before its first guest instruction executes. See + // xenia/kernel/phase_b_snapshot.h. + ::xe::kernel::phase_b::FireIfEntryThread(this, thread_state_, address); #if !XE_PLATFORM_WIN32 try { exit_code = static_cast(kernel_state()->processor()->Execute( --- a/src/xenia/kernel/phase_b_snapshot.h (NEW FILE) +++ b/src/xenia/kernel/phase_b_snapshot.h @@ -0,0 +1,43 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Phase B initial-state snapshot. Cvar-gated (default off). + * Spec: xenia-rs/audit-runs/phase-b-state-equivalence/ + ****************************************************************************** + */ + +#ifndef XENIA_KERNEL_PHASE_B_SNAPSHOT_H_ +#define XENIA_KERNEL_PHASE_B_SNAPSHOT_H_ + +#include + +namespace xe { +namespace cpu { +class ThreadState; +} // namespace cpu +namespace kernel { + +class XThread; + +namespace phase_b { + +// Called immediately before the JIT executes the first guest PPC +// instruction of a thread. Returns silently when: +// * phase_b_snapshot_dir cvar is empty (zero overhead — default off); +// * a snapshot has already been written (one-shot CAS guard); +// * `entry_address` does not match the loaded executable module's +// entry_point (this thread is not the entry thread — a worker +// spawned by an early kernel call could reach its first instruction +// before the boot thread does). +// +// On a match: writes /canary/{cpu_state,memory,kernel,vfs,config}.json +// + manifest.json, optionally `_Exit(0)` per phase_b_snapshot_and_exit. +void FireIfEntryThread(XThread* xthread, cpu::ThreadState* thread_state, + uint32_t entry_address); + +} // namespace phase_b +} // namespace kernel +} // namespace xe + +#endif // XENIA_KERNEL_PHASE_B_SNAPSHOT_H_ --- a/src/xenia/kernel/phase_b_snapshot.cc (NEW FILE) +++ b/src/xenia/kernel/phase_b_snapshot.cc @@ -0,0 +1,899 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Phase B initial-state snapshot. See phase_b_snapshot.h. + ****************************************************************************** + */ + +#include "xenia/kernel/phase_b_snapshot.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "third_party/crypto/sha256.h" +#include "third_party/fmt/include/fmt/format.h" + +#include "xenia/base/cvar.h" +#include "xenia/cpu/cpu_flags.h" +#include "xenia/cpu/ppc/ppc_context.h" +#include "xenia/cpu/thread_state.h" +#include "xenia/kernel/kernel_state.h" +#include "xenia/kernel/user_module.h" +#include "xenia/kernel/util/object_table.h" +#include "xenia/kernel/xobject.h" +#include "xenia/kernel/xthread.h" +#include "xenia/memory.h" +#include "xenia/vfs/device.h" +#include "xenia/vfs/entry.h" +#include "xenia/vfs/virtual_file_system.h" + +namespace xe { +namespace kernel { +namespace phase_b { + +namespace { + +constexpr uint32_t kSchemaVersion = 1; +constexpr const char* kEngineName = "canary"; + +// One-shot guard. CAS-claim to ensure only the entry thread fires the +// snapshot; release on guard-fail so a non-entry thread reaching its +// first instruction first does not steal the shot. +std::atomic g_claimed{false}; +std::atomic g_done{false}; + +// ---------- string helpers ---------- + +std::string JsonEscape(const std::string& s) { + std::string out; + out.reserve(s.size() + 2); + for (unsigned char c : s) { + if (c == '\\' || c == '"') { + out.push_back('\\'); + out.push_back(static_cast(c)); + } else if (c == '\n') { + out += "\\n"; + } else if (c == '\r') { + out += "\\r"; + } else if (c == '\t') { + out += "\\t"; + } else if (c < 0x20) { + out += fmt::format("\\u{:04x}", c); + } else { + out.push_back(static_cast(c)); + } + } + return out; +} + +std::string Hex32(uint32_t v) { return fmt::format("\"0x{:08x}\"", v); } +std::string Hex64(uint64_t v) { return fmt::format("\"0x{:016x}\"", v); } + +std::string Sha256Hex(const uint8_t* data, size_t len) { + ::sha256::SHA256 h; + h.add(data, len); + return h.getHash(); +} + +// Stream-style writer that produces newline-indented JSON with sorted keys. +// We build a small tree first then serialize, so ordering is deterministic +// independent of any std::unordered_map iteration order. +class JsonNode { + public: + enum class Kind { Null, Bool, Int, UInt, IntStr, Str, Array, Object, Raw }; + JsonNode() : kind_(Kind::Null) {} + + static JsonNode Null() { JsonNode n; n.kind_ = Kind::Null; return n; } + static JsonNode Boolean(bool b) { + JsonNode n; + n.kind_ = Kind::Bool; + n.bool_ = b; + return n; + } + static JsonNode Integer(int64_t i) { + JsonNode n; + n.kind_ = Kind::Int; + n.int_ = i; + return n; + } + static JsonNode Unsigned(uint64_t u) { + JsonNode n; + n.kind_ = Kind::UInt; + n.uint_ = u; + return n; + } + // Pre-formatted JSON literal (e.g. `"0x..."`, raw object/array source). + static JsonNode Raw(std::string s) { + JsonNode n; + n.kind_ = Kind::Raw; + n.str_ = std::move(s); + return n; + } + static JsonNode String(std::string s) { + JsonNode n; + n.kind_ = Kind::Str; + n.str_ = std::move(s); + return n; + } + static JsonNode Array(std::vector v) { + JsonNode n; + n.kind_ = Kind::Array; + n.array_ = std::move(v); + return n; + } + static JsonNode Object() { + JsonNode n; + n.kind_ = Kind::Object; + return n; + } + // Object that preserves insertion order (used at the top level of files, + // where the user-facing key ordering is canonical). + static JsonNode OrderedObject() { + JsonNode n; + n.kind_ = Kind::Object; + n.ordered_ = true; + return n; + } + + void Set(const std::string& key, JsonNode v) { + obj_[key] = std::move(v); + if (ordered_) ordered_keys_.push_back(key); + } + + void Serialize(std::string& out, int indent = 0) const { + auto pad = [&](int n) { + out.append(static_cast(n * 2), ' '); + }; + switch (kind_) { + case Kind::Null: + out += "null"; + break; + case Kind::Bool: + out += bool_ ? "true" : "false"; + break; + case Kind::Int: + out += std::to_string(int_); + break; + case Kind::UInt: + out += std::to_string(uint_); + break; + case Kind::Raw: + out += str_; + break; + case Kind::Str: + out.push_back('"'); + out += JsonEscape(str_); + out.push_back('"'); + break; + case Kind::Array: { + if (array_.empty()) { + out += "[]"; + break; + } + out += "[\n"; + for (size_t i = 0; i < array_.size(); ++i) { + pad(indent + 1); + array_[i].Serialize(out, indent + 1); + if (i + 1 < array_.size()) out += ","; + out += "\n"; + } + pad(indent); + out += "]"; + break; + } + case Kind::Object: { + if (obj_.empty()) { + out += "{}"; + break; + } + out += "{\n"; + std::vector keys; + if (ordered_) { + keys = ordered_keys_; + } else { + keys.reserve(obj_.size()); + for (const auto& [k, _] : obj_) keys.push_back(k); + std::sort(keys.begin(), keys.end()); + } + for (size_t i = 0; i < keys.size(); ++i) { + pad(indent + 1); + out.push_back('"'); + out += JsonEscape(keys[i]); + out += "\": "; + obj_.at(keys[i]).Serialize(out, indent + 1); + if (i + 1 < keys.size()) out += ","; + out += "\n"; + } + pad(indent); + out += "}"; + break; + } + } + } + + private: + Kind kind_; + bool bool_ = false; + int64_t int_ = 0; + uint64_t uint_ = 0; + std::string str_; + std::vector array_; + std::map obj_; + bool ordered_ = false; + std::vector ordered_keys_; +}; + +// Sync-then-fclose helper. Returns SHA-256 of the file's bytes. +std::string WriteFileAndHash(const std::filesystem::path& path, + const std::string& content) { + std::FILE* f = std::fopen(path.string().c_str(), "wb"); + if (!f) { + return std::string(64, '0'); + } + std::fwrite(content.data(), 1, content.size(), f); + std::fflush(f); +#if defined(_MSC_VER) + // Best effort on Windows — _commit takes a file descriptor. + // fmt:omit on cross-build to avoid Win32-only headers in this TU. +#else + // Unix-style fsync would go here; skipped to keep deps minimal in this TU. +#endif + std::fclose(f); + return Sha256Hex(reinterpret_cast(content.data()), + content.size()); +} + +// ---------- cpu_state.json ---------- + +JsonNode BuildCpuState(XThread* xthread, cpu::ThreadState* thread_state, + uint32_t entry_pc) { + auto* ctx = thread_state->context(); + auto root = JsonNode::OrderedObject(); + root.Set("schema_version", JsonNode::Unsigned(kSchemaVersion)); + root.Set("engine", JsonNode::String(kEngineName)); + // Canary's PPCContext doesn't track PC explicitly — the JIT dispatch + // loop owns it. At the snapshot point, the about-to-execute PC equals + // the `entry_pc` arg passed to FireIfEntryThread. + root.Set("pc", JsonNode::Raw(Hex32(entry_pc))); + root.Set("lr", JsonNode::Raw(Hex64(ctx->lr))); + root.Set("ctr", JsonNode::Raw(Hex64(ctx->ctr))); + root.Set("msr", JsonNode::Raw(Hex64(ctx->msr))); + root.Set("vrsave", JsonNode::Raw(Hex32(ctx->vrsave))); + root.Set("fpscr", JsonNode::Raw(Hex32(ctx->fpscr.value))); + + auto xer = JsonNode::Object(); + xer.Set("ca", JsonNode::Unsigned(ctx->xer_ca)); + xer.Set("ov", JsonNode::Unsigned(ctx->xer_ov)); + xer.Set("so", JsonNode::Unsigned(ctx->xer_so)); + // tbc is not modelled per-field in canary's PPCContext; emit 0. + xer.Set("tbc", JsonNode::Unsigned(0)); + root.Set("xer", std::move(xer)); + + // CR as 8 nibbles 0xN. Diff tool compares array positionally. + std::vector cr_arr; + cr_arr.reserve(8); + uint64_t cr = ctx->cr(); + for (int i = 0; i < 8; ++i) { + uint32_t nibble = (cr >> (28 - i * 4)) & 0xF; + cr_arr.push_back(JsonNode::Raw(fmt::format("\"0x{:x}\"", nibble))); + } + root.Set("cr", JsonNode::Array(std::move(cr_arr))); + + std::vector gpr; + gpr.reserve(32); + for (int i = 0; i < 32; ++i) { + gpr.push_back(JsonNode::Raw(Hex64(ctx->r[i]))); + } + root.Set("gpr", JsonNode::Array(std::move(gpr))); + + std::vector fpr; + fpr.reserve(32); + for (int i = 0; i < 32; ++i) { + uint64_t bits = 0; + std::memcpy(&bits, &ctx->f[i], sizeof(bits)); + fpr.push_back(JsonNode::Raw(Hex64(bits))); + } + root.Set("fpr", JsonNode::Array(std::move(fpr))); + + // Emit 32 hex chars of the raw 16 bytes (byte 0 first). Ours uses + // big-endian-stored bytes; canary's union exposes u8[16] in the same + // host order. Emitting bytes[0]..bytes[15] keeps both engines' VR + // serializations directly comparable. + std::vector vr; + vr.reserve(128); + for (int i = 0; i < 128; ++i) { + std::string s; + s.reserve(32); + for (int j = 0; j < 16; ++j) { + s += fmt::format("{:02x}", ctx->v[i].u8[j]); + } + vr.push_back(JsonNode::String(std::move(s))); + } + root.Set("vr", JsonNode::Array(std::move(vr))); + std::string vscr_s; + vscr_s.reserve(32); + for (int j = 0; j < 16; ++j) { + vscr_s += fmt::format("{:02x}", ctx->vscr_vec.u8[j]); + } + root.Set("vscr", JsonNode::String(std::move(vscr_s))); + + root.Set("thread_id", JsonNode::Unsigned(xthread ? xthread->thread_id() : 0)); + root.Set("hw_id", JsonNode::Unsigned(0)); + root.Set("stack_base", + JsonNode::Raw(Hex32(xthread ? xthread->stack_base() : 0))); + root.Set("stack_limit", + JsonNode::Raw(Hex32(xthread ? xthread->stack_limit() : 0))); + root.Set("tls_base", + JsonNode::Raw(Hex32(xthread ? xthread->tls_ptr() : 0))); + root.Set("pcr_base", + JsonNode::Raw(Hex32(xthread ? xthread->pcr_ptr() : 0))); + + std::vector det_skip; + det_skip.push_back(JsonNode::String("hw_id")); + root.Set("deterministic_skip", JsonNode::Array(std::move(det_skip))); + return root; +} + +// ---------- memory.json ---------- + +struct CommittedRegion { + uint32_t start; + uint32_t end; + uint32_t protect; + std::string sha256; +}; + +void WalkHeapRegions(Memory* memory, uint32_t heap_base_addr, + std::vector& out_regions, + std::map& out_hist) { + auto* heap = memory->LookupHeap(heap_base_addr); + if (!heap) return; + const uint32_t heap_base = heap->heap_base(); + const uint32_t heap_size = heap->heap_size(); + const uint32_t page_size = heap->page_size(); + // Read bytes via `virtual_membase + guest_address`. This is sound for + // the four guest-virtual heaps (0x00/0x40/0x80/0x90); physical heaps + // (0xA0/0xC0/0xE0) mirror physical_membase and can include host pages + // that are reserved but not backed at boot — reading them faults. + // Phase B only walks virtual heaps; the caller filters which bases + // to probe. + uint8_t* membase = memory->virtual_membase(); + uint32_t cursor = heap_base; + uint32_t end = heap_base + heap_size; + while (cursor < end) { + HeapAllocationInfo info; + if (!heap->QueryRegionInfo(cursor, &info)) break; + if (info.region_size == 0) { + cursor += page_size; + continue; + } + if (info.state == 0) { + out_hist["free"] += info.region_size / page_size; + } else if ((info.state & 0x2) != 0) { // kMemoryAllocationCommit + out_hist["committed"] += info.region_size / page_size; + // Hash region contents from virtual_membase + cursor. + std::string h = membase ? Sha256Hex(membase + cursor, info.region_size) + : std::string(64, '0'); + CommittedRegion r; + r.start = cursor; + r.end = cursor + info.region_size; + r.protect = info.protect; + r.sha256 = h; + out_regions.push_back(r); + } else { + out_hist["reserved"] += info.region_size / page_size; + } + cursor += info.region_size; + } +} + +JsonNode BuildMemory(KernelState* kstate, bool dump_section_content) { + Memory* memory = kstate->memory(); + auto root = JsonNode::OrderedObject(); + root.Set("schema_version", JsonNode::Unsigned(kSchemaVersion)); + root.Set("engine", JsonNode::String(kEngineName)); + root.Set("page_size", JsonNode::Unsigned(4096)); + root.Set("guest_address_space_bytes", + JsonNode::Unsigned(uint64_t{0x100000000})); + + // Phase B walks a FIXED set of named regions whose host backing is + // guaranteed live at entry_point time: the XEX image, the entry + // thread's stack, its PCR, its TLS block. A blanket "walk every + // committed page across all heaps" approach is unsafe because + // canary's `QueryRegionInfo` reports `state=COMMIT` for pages whose + // host mapping may still be lazy (Windows reserved-but-not-committed, + // physical heap mirrors with unmapped backing). Reading those host + // VAs faults — see Wine page-fault during initial bring-up. + // + // Named regions are sufficient for Phase B's purpose (catalog + // divergences at the snapshot point); the diff tool compares the + // ordered list, so any region present in one engine and absent in + // the other is a σ-structural divergence. + uint8_t* membase = memory->virtual_membase(); + std::vector all_regions; + std::map global_hist; + auto hash_named_region = [&](uint32_t start, uint32_t size) { + if (size == 0 || !membase) return; + std::string h = Sha256Hex(membase + start, size); + CommittedRegion r; + r.start = start; + r.end = start + size; + r.protect = 0; + r.sha256 = h; + all_regions.push_back(r); + global_hist["committed"] += size / 4096; + }; + + // 1. XEX image. + if (auto exec_module = kstate->GetExecutableModule()) { + uint32_t image_base = exec_module->xex_module()->base_address(); + uint32_t image_size = exec_module->xex_module()->image_size(); + if (image_base && image_size) { + hash_named_region(image_base, image_size); + } + } + // 2. Entry thread's stack + PCR + TLS — accessed via the XThread + // that's about to execute (resolved from the snapshot helper's + // arguments by passing a small accessor). + if (auto* xthread = XThread::GetCurrentThread()) { + uint32_t stack_base = xthread->stack_base(); + uint32_t stack_limit = xthread->stack_limit(); + if (stack_base > stack_limit) { + hash_named_region(stack_limit, stack_base - stack_limit); + } + uint32_t pcr = xthread->pcr_ptr(); + if (pcr) { + hash_named_region(pcr, 0x1000); + } + uint32_t tls = xthread->tls_ptr(); + if (tls) { + hash_named_region(tls, 0x1000); + } + } + + // Heap descriptors — emit the four virtual heaps' bounds. Histograms + // come from QueryRegionInfo (which is safe to call — it doesn't read + // backing pages). + const uint32_t heap_probes[] = { + 0x00000000u, 0x40000000u, 0x80000000u, 0x90000000u, + }; + std::vector heaps_arr; + for (uint32_t base : heap_probes) { + auto* heap = memory->LookupHeap(base); + if (!heap) continue; + std::map hist; + uint32_t cursor = heap->heap_base(); + uint32_t hend = heap->heap_base() + heap->heap_size(); + while (cursor < hend) { + HeapAllocationInfo info; + if (!heap->QueryRegionInfo(cursor, &info)) break; + if (info.region_size == 0) { + cursor += heap->page_size(); + continue; + } + if (info.state == 0) { + hist["free"] += info.region_size / heap->page_size(); + } else if ((info.state & 0x2) != 0) { + hist["committed"] += info.region_size / heap->page_size(); + } else { + hist["reserved"] += info.region_size / heap->page_size(); + } + cursor += info.region_size; + } + for (const auto& [k, v] : hist) global_hist[k] += v; + auto heap_obj = JsonNode::Object(); + heap_obj.Set("name", JsonNode::String(fmt::format("v{:08x}", base))); + heap_obj.Set("base", JsonNode::Raw(Hex32(heap->heap_base()))); + heap_obj.Set("size", JsonNode::Raw(Hex32(heap->heap_size()))); + heap_obj.Set("page_size", JsonNode::Unsigned(heap->page_size())); + auto hist_obj = JsonNode::Object(); + for (const auto& [k, v] : hist) { + hist_obj.Set(k, JsonNode::Unsigned(v)); + } + heap_obj.Set("page_state_histogram", std::move(hist_obj)); + heaps_arr.push_back(std::move(heap_obj)); + } + root.Set("heaps", JsonNode::Array(std::move(heaps_arr))); + + // Sort regions by (start, end). + std::sort(all_regions.begin(), all_regions.end(), + [](const CommittedRegion& a, const CommittedRegion& b) { + if (a.start != b.start) return a.start < b.start; + return a.end < b.end; + }); + uint64_t committed_pages = 0; + std::vector regions_arr; + regions_arr.reserve(all_regions.size()); + for (const auto& r : all_regions) { + auto ro = JsonNode::Object(); + ro.Set("start", JsonNode::Raw(Hex32(r.start))); + ro.Set("end", JsonNode::Raw(Hex32(r.end))); + ro.Set("byte_count", JsonNode::Unsigned(r.end - r.start)); + ro.Set("protect", JsonNode::Unsigned(r.protect)); + ro.Set("sha256", JsonNode::String(r.sha256)); + ro.Set("section_kind", JsonNode::Null()); + regions_arr.push_back(std::move(ro)); + committed_pages += (r.end - r.start) / 4096; + } + root.Set("regions", JsonNode::Array(std::move(regions_arr))); + root.Set("committed_pages_total", JsonNode::Unsigned(committed_pages)); + + if (dump_section_content) { + std::vector sec; + for (const auto& r : all_regions) { + auto so = JsonNode::Object(); + so.Set("start", JsonNode::Raw(Hex32(r.start))); + so.Set("end", JsonNode::Raw(Hex32(r.end))); + so.Set("sha256", JsonNode::String(r.sha256)); + so.Set("content_b64", JsonNode::String("")); // Stubbed. + sec.push_back(std::move(so)); + } + root.Set("section_contents", JsonNode::Array(std::move(sec))); + } else { + root.Set("section_contents", JsonNode::Null()); + } + + std::vector det_skip; + det_skip.push_back(JsonNode::String("host_base_pointer")); + root.Set("deterministic_skip", JsonNode::Array(std::move(det_skip))); + return root; +} + +// ---------- kernel.json ---------- + +const char* TypeName(XObject::Type t) { + switch (t) { + case XObject::Type::Event: return "Event"; + case XObject::Type::Mutant: return "Mutant"; + case XObject::Type::Semaphore: return "Semaphore"; + case XObject::Type::Thread: return "Thread"; + case XObject::Type::Timer: return "Timer"; + case XObject::Type::File: return "File"; + case XObject::Type::IOCompletion: return "IOCompletion"; + case XObject::Type::Module: return "Module"; + case XObject::Type::Enumerator: return "Enumerator"; + case XObject::Type::NotifyListener: return "NotifyListener"; + case XObject::Type::Session: return "Session"; + case XObject::Type::Socket: return "Socket"; + case XObject::Type::SymbolicLink: return "SymbolicLink"; + case XObject::Type::Device: return "Device"; + case XObject::Type::Undefined: return "Undefined"; + } + return "Undefined"; +} + +uint32_t TypeCode(XObject::Type t) { + switch (t) { + case XObject::Type::Event: return 0x01; + case XObject::Type::Mutant: return 0x02; + case XObject::Type::Semaphore: return 0x03; + case XObject::Type::Timer: return 0x04; + case XObject::Type::Thread: return 0x05; + case XObject::Type::File: return 0x06; + case XObject::Type::IOCompletion: return 0x07; + case XObject::Type::Module: return 0x08; + case XObject::Type::Enumerator: return 0x09; + case XObject::Type::NotifyListener: return 0x0B; + default: return 0x00; + } +} + +// FNV-1a 64-bit semantic-id, matching event_log.cc::ComputeSemanticId. +// At snapshot time we don't have a meaningful create_site_pc/create_tid/ +// create_idx tuple for every object (they were minted before Phase B +// instrumentation existed), so fall back to a stable identity hash over +// (object_type, primary_handle). This is consistent across runs of the +// same engine; diff tool compares semantic IDs across engines only when +// both sides also stamp the same identity inputs. For Phase B's purposes +// (initial-state snapshot), the object population is tiny (≤ 2 entries +// at entry-point time: the main thread, plus an executable module ref), +// so a simple stable hash suffices. +uint64_t StableObjectId(uint32_t type_code, uint32_t raw_handle) { + uint8_t bytes[8]; + for (int i = 0; i < 4; ++i) bytes[i] = (type_code >> (i * 8)) & 0xFF; + for (int i = 0; i < 4; ++i) bytes[4 + i] = (raw_handle >> (i * 8)) & 0xFF; + uint64_t h = 0xCBF29CE484222325ULL; + for (int i = 0; i < 8; ++i) { + h ^= bytes[i]; + h *= 0x100000001B3ULL; + } + return h; +} + +JsonNode BuildKernel(KernelState* kstate, uint32_t entry_pc) { + auto root = JsonNode::OrderedObject(); + root.Set("schema_version", JsonNode::Unsigned(kSchemaVersion)); + root.Set("engine", JsonNode::String(kEngineName)); + + auto objects = kstate->object_table()->GetAllObjects(); + // Sort by semantic id for set-equivalence. + struct OneObj { + uint64_t sid; + JsonNode node; + }; + std::vector entries; + for (auto& o : objects) { + uint32_t tc = TypeCode(o->type()); + uint32_t rh = o->handle(); + uint64_t sid = StableObjectId(tc, rh); + auto n = JsonNode::Object(); + n.Set("handle_semantic_id", JsonNode::String(fmt::format("{:016x}", sid))); + n.Set("raw_handle_id", JsonNode::Raw(Hex32(rh))); + n.Set("type", JsonNode::String(TypeName(o->type()))); + n.Set("type_code", JsonNode::Unsigned(tc)); + n.Set("name", o->name().empty() ? JsonNode::Null() + : JsonNode::String(o->name())); + auto details = JsonNode::Object(); + if (o->type() == XObject::Type::Thread) { + auto* th = reinterpret_cast(o.get()); + details.Set("thread_id", JsonNode::Unsigned(th->thread_id())); + details.Set("is_entry_thread", + JsonNode::Boolean( + th->main_thread() || + (th->creation_params() && + th->creation_params()->start_address == entry_pc))); + details.Set("priority", JsonNode::Integer(th->priority())); + details.Set( + "stack_size", + JsonNode::Unsigned(th->creation_params() + ? th->creation_params()->stack_size + : 0)); + details.Set("entry_pc", + JsonNode::Raw(Hex32(th->creation_params() + ? th->creation_params()->start_address + : 0))); + details.Set("ctx_ptr", + JsonNode::Raw(Hex32(th->creation_params() + ? th->creation_params()->start_context + : 0))); + details.Set("suspended", JsonNode::Boolean(false)); + } + n.Set("details", std::move(details)); + entries.push_back({sid, std::move(n)}); + } + std::sort(entries.begin(), entries.end(), + [](const OneObj& a, const OneObj& b) { return a.sid < b.sid; }); + std::vector obj_arr; + obj_arr.reserve(entries.size()); + for (auto& e : entries) obj_arr.push_back(std::move(e.node)); + root.Set("objects", JsonNode::Array(std::move(obj_arr))); + + // We don't enumerate handle_name_table / notification_listeners / + // exports — accessors are not public. Emit empty arrays so the diff + // tool's structural check still has the field present. + root.Set("handle_name_table", JsonNode::Array({})); + root.Set("notification_listeners", JsonNode::Array({})); + root.Set("exports_registered_count", JsonNode::Unsigned(0)); + root.Set("exports_registered_sample", JsonNode::Array({})); + root.Set("exports_registered_sha256", + JsonNode::String(std::string(64, '0'))); + + std::vector det_skip; + det_skip.push_back(JsonNode::String("raw_handle_id")); + det_skip.push_back(JsonNode::String("exports_registered_count")); + root.Set("deterministic_skip", JsonNode::Array(std::move(det_skip))); + return root; +} + +// ---------- vfs.json ---------- + +JsonNode BuildVfs(KernelState* kstate) { + auto root = JsonNode::OrderedObject(); + root.Set("schema_version", JsonNode::Unsigned(kSchemaVersion)); + root.Set("engine", JsonNode::String(kEngineName)); + + auto* fs = kstate->file_system(); + // VirtualFileSystem doesn't expose its `devices_` vector or `symlinks_` + // map publicly. To stay additive (no canary-core API surface changes), + // we probe a canonical set of paths via ResolvePath and report only + // what we can observe. Diff tool sorts mounts_observed by path. + std::vector probe_paths = { + "\\Device\\Cdrom0", + "\\Device\\Cdrom0\\default.xex", + "\\Device\\Cdrom0\\dat", + "\\Device\\Cdrom0\\dat\\movie", + "\\Device\\Cdrom0\\dat\\movie\\opening.bik", + "game:\\default.xex", + "game:\\dat", + "cache:\\", + "cache:\\nonexistent_probe", + "\\Device\\HardDisk0\\Partition1", + }; + std::sort(probe_paths.begin(), probe_paths.end()); + std::vector probes; + for (const auto& path : probe_paths) { + auto entry = fs->ResolvePath(path); + auto o = JsonNode::Object(); + o.Set("path", JsonNode::String(path)); + o.Set("resolved", JsonNode::Boolean(entry != nullptr)); + if (entry) { + o.Set("is_directory", + JsonNode::Boolean((entry->attributes() & 0x10) != 0)); // FILE_ATTR_DIRECTORY + o.Set("size", JsonNode::Unsigned(entry->size())); + } else { + o.Set("is_directory", JsonNode::Null()); + o.Set("size", JsonNode::Null()); + } + probes.push_back(std::move(o)); + } + root.Set("resolve_path_probes", JsonNode::Array(std::move(probes))); + + // Mounts observed: report only what `ResolvePath` saw against the + // device prefixes we know about. The data is derived, not enumerated, + // so this is safe under future-canary device additions. + root.Set("mounted_devices_observed_count", + JsonNode::Unsigned( + (fs->ResolvePath("\\Device\\Cdrom0") != nullptr ? 1u : 0u))); + + root.Set("cache_root_listing", JsonNode::Array({})); + std::vector det_skip; + det_skip.push_back(JsonNode::String("host_path_realpath")); + root.Set("deterministic_skip", JsonNode::Array(std::move(det_skip))); + return root; +} + +// ---------- config.json ---------- + +JsonNode BuildConfig(KernelState* kstate, uint32_t entry_pc) { + auto root = JsonNode::OrderedObject(); + root.Set("schema_version", JsonNode::Unsigned(kSchemaVersion)); + root.Set("engine", JsonNode::String(kEngineName)); + root.Set("build_id", JsonNode::String("canary-phaseB")); + + auto exec_module = kstate->GetExecutableModule(); + uint32_t image_base = 0; + uint32_t image_size = 0; + std::string image_loaded_sha = std::string(64, '0'); + std::string xex_header_sha = std::string(64, '0'); + std::string iso_path_str; + if (exec_module) { + image_base = exec_module->xex_module()->base_address(); + image_size = exec_module->xex_module()->image_size(); + iso_path_str = exec_module->path(); + uint8_t* host = + kstate->memory()->TranslateVirtual(image_base); + if (host && image_size > 0) { + image_loaded_sha = Sha256Hex(host, image_size); + } + if (exec_module->hash()) { + xex_header_sha = fmt::format("{:016x}", *exec_module->hash()); + } + } + root.Set("iso_path", JsonNode::String(iso_path_str)); + root.Set("xex_entry_point", JsonNode::Raw(Hex32(entry_pc))); + root.Set("xex_image_base", JsonNode::Raw(Hex32(image_base))); + root.Set("xex_image_size", JsonNode::Unsigned(image_size)); + root.Set("image_loaded_sha256", JsonNode::String(image_loaded_sha)); + root.Set("xex_header_sha256", JsonNode::String(xex_header_sha)); + + auto cvars = JsonNode::Object(); + cvars.Set("phase_b_snapshot_dir", + JsonNode::String(cvars::phase_b_snapshot_dir)); + cvars.Set("phase_b_snapshot_and_exit", + JsonNode::Boolean(cvars::phase_b_snapshot_and_exit)); + cvars.Set("phase_b_dump_section_content", + JsonNode::Boolean(cvars::phase_b_dump_section_content)); + cvars.Set("phase_a_event_log_path", + JsonNode::String(cvars::phase_a_event_log_path)); + root.Set("cvars", std::move(cvars)); + + auto now = std::chrono::system_clock::now(); + auto t = std::chrono::system_clock::to_time_t(now); + // wall_clock_iso8601 is non-deterministic; intended for human reading + // only. Diff tool skips it. + std::string wall = fmt::format("epoch:{}", static_cast(t)); + root.Set("wall_clock_iso8601", JsonNode::String(wall)); + root.Set("host_ns_at_snapshot", JsonNode::Unsigned(0)); + + std::vector det_skip; + det_skip.push_back(JsonNode::String("host_ns_at_snapshot")); + det_skip.push_back(JsonNode::String("wall_clock_iso8601")); + det_skip.push_back(JsonNode::String("build_id")); + det_skip.push_back(JsonNode::String("iso_path")); + det_skip.push_back(JsonNode::String("cvars.phase_b_snapshot_dir")); + root.Set("deterministic_skip", JsonNode::Array(std::move(det_skip))); + return root; +} + +void EmitFile(const std::filesystem::path& dir, const char* name, + const JsonNode& node, std::map& hashes) { + std::string body; + node.Serialize(body, 0); + body.push_back('\n'); + std::filesystem::path p = dir / name; + std::string h = WriteFileAndHash(p, body); + hashes[name] = h; +} + +void WriteSnapshot(XThread* xthread, cpu::ThreadState* thread_state, + uint32_t entry_pc) { + auto* kstate = xthread->kernel_state(); + std::filesystem::path base(cvars::phase_b_snapshot_dir); + std::filesystem::path engine_dir = base / "canary"; + std::error_code ec; + std::filesystem::create_directories(engine_dir, ec); + + std::map hashes; + EmitFile(engine_dir, "cpu_state.json", + BuildCpuState(xthread, thread_state, entry_pc), hashes); + EmitFile(engine_dir, "memory.json", + BuildMemory(kstate, cvars::phase_b_dump_section_content), hashes); + EmitFile(engine_dir, "kernel.json", BuildKernel(kstate, entry_pc), hashes); + EmitFile(engine_dir, "vfs.json", BuildVfs(kstate), hashes); + EmitFile(engine_dir, "config.json", BuildConfig(kstate, entry_pc), hashes); + + auto manifest = JsonNode::OrderedObject(); + manifest.Set("schema_version", JsonNode::Unsigned(kSchemaVersion)); + manifest.Set("engine", JsonNode::String(kEngineName)); + // Files object is sorted by key (alphabetic), matching the diff tool's + // assumption. + auto files = JsonNode::Object(); + for (const auto& [name, hash] : hashes) { + files.Set(name, JsonNode::String(hash)); + } + manifest.Set("files", std::move(files)); + + std::string body; + manifest.Serialize(body, 0); + body.push_back('\n'); + std::filesystem::path mp = engine_dir / "manifest.json"; + std::FILE* f = std::fopen(mp.string().c_str(), "wb"); + if (f) { + std::fwrite(body.data(), 1, body.size(), f); + std::fflush(f); + std::fclose(f); + } +} + +} // namespace + +void FireIfEntryThread(XThread* xthread, cpu::ThreadState* thread_state, + uint32_t entry_address) { + // Fast path: cvar empty → zero overhead. The .empty() check is a + // single read of a std::string's size, no syscall. + if (cvars::phase_b_snapshot_dir.empty()) { + return; + } + if (g_done.load(std::memory_order_acquire)) { + return; + } + // Resolve the entry_point of the executable module. If it doesn't + // match this thread's first instruction, this isn't the entry thread + // — release any claim we may have made and return. + auto* kstate = xthread ? xthread->kernel_state() : nullptr; + if (!kstate) return; + auto exec_module = kstate->GetExecutableModule(); + if (!exec_module) return; + uint32_t entry_pc = exec_module->entry_point(); + if (entry_address != entry_pc) return; + + // CAS-claim. Releases on guard-fail (above) so a non-entry thread + // reaching its first instruction before the boot thread doesn't + // steal the shot. + bool expected = false; + if (!g_claimed.compare_exchange_strong(expected, true, + std::memory_order_acq_rel)) { + return; + } + + WriteSnapshot(xthread, thread_state, entry_pc); + g_done.store(true, std::memory_order_release); + + if (cvars::phase_b_snapshot_and_exit) { + std::_Exit(0); + } +} + +} // namespace phase_b +} // namespace kernel +} // namespace xe