Source changes (dormant parity infra, retained from iterate 2.AI/2.AO): - xenia-kernel/exports.rs: nt_create_event manual_reset polarity + related event wiring - xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps (.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as regenerable local artifacts — see memory + HANDOFF for the running findings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1071 lines
40 KiB
Diff
1071 lines
40 KiB
Diff
diff --git a/src/xenia/cpu/cpu_flags.cc b/src/xenia/cpu/cpu_flags.cc
|
||
index 3ff067e15..1abad3bd2 100644
|
||
--- a/src/xenia/cpu/cpu_flags.cc
|
||
+++ b/src/xenia/cpu/cpu_flags.cc
|
||
@@ -57,3 +57,54 @@ DEFINE_bool(break_condition_truncate, true, "truncate value to 32-bits", "CPU");
|
||
|
||
DEFINE_bool(break_on_debugbreak, true, "int3 on JITed __debugbreak requests.",
|
||
"CPU");
|
||
+
|
||
+// AUDIT-DEMO: smoke marker (memory entry: emulator.cc:225,283). Always-on bool.
|
||
+DEFINE_bool(audit_demo_setup_trace, true,
|
||
+ "Audit smoke marker: log AUDIT-DEMO-SETUP-BEGIN at emulator setup.",
|
||
+ "Audit");
|
||
+
|
||
+// AUDIT-061: comma-separated list of guest PCs to log on each fire.
|
||
+// Format: "0xPC1,0xPC2,..." (max 32 PCs). Each fire emits
|
||
+// AUDIT-061-BR pc=X lr=X cr0=LGE cr6=LGE r3=X r4=X r5=X r6=X r31=X tid=N.
|
||
+// Default empty (off); no perf cost when empty.
|
||
+DEFINE_string(audit_61_branch_probe_pcs, "",
|
||
+ "AUDIT-061: CSV of guest PCs to trace (cr0/cr6 + regs/tid).",
|
||
+ "Audit");
|
||
+
|
||
+// AUDIT-067: comma-separated list of u32 values to watch. When non-empty,
|
||
+// every 4-byte guest store (stw/stwu/stwx/stwux/stmw) emits a runtime
|
||
+// equality check; matches log AUDIT-067-VAL pc=X lr=X val=X dst=X r3..r6 r31 tid=N.
|
||
+// Max 4 values. Default empty (off); zero overhead when empty.
|
||
+DEFINE_string(audit_67_value_watch, "",
|
||
+ "AUDIT-067: CSV of u32 values (max 4) — log every guest "
|
||
+ "store whose value matches.",
|
||
+ "Audit");
|
||
+
|
||
+// Phase A — see kernel/event_log.h.
|
||
+DEFINE_string(phase_a_event_log_path, "",
|
||
+ "Phase A: write schema-v1 JSONL event log to this path. "
|
||
+ "Empty (default) = disabled.",
|
||
+ "Audit");
|
||
+DEFINE_bool(phase_a_event_log_mem_writes, false,
|
||
+ "Phase A: include mem.write events in the JSONL log. RESERVED — "
|
||
+ "not wired in this phase. Default false.",
|
||
+ "Audit");
|
||
+
|
||
+// Phase B — see kernel/phase_b_snapshot.h.
|
||
+DEFINE_string(phase_b_snapshot_dir, "",
|
||
+ "Phase B: write 5-file structured state snapshot to "
|
||
+ "<dir>/canary/ at the moment immediately before the first "
|
||
+ "guest PPC instruction of entry_point. Empty (default) = "
|
||
+ "disabled, zero overhead.",
|
||
+ "Audit");
|
||
+DEFINE_bool(phase_b_snapshot_and_exit, false,
|
||
+ "Phase B: after writing the snapshot, exit the process "
|
||
+ "immediately (std::_Exit(0)) so re-runs are byte-deterministic.",
|
||
+ "Audit");
|
||
+DEFINE_bool(phase_b_dump_section_content, false,
|
||
+ "Phase B: in memory.json, populate section_contents[].content_b64 "
|
||
+ "with raw bytes of every committed XEX-image region. Default "
|
||
+ "false — per-region SHA-256 is enough for the routine diff; "
|
||
+ "this is the escape hatch for the STOP-and-report condition "
|
||
+ "(image_loaded_sha256 mismatch).",
|
||
+ "Audit");
|
||
diff --git a/src/xenia/cpu/cpu_flags.h b/src/xenia/cpu/cpu_flags.h
|
||
index 38c4f98ba..5704a25c7 100644
|
||
--- a/src/xenia/cpu/cpu_flags.h
|
||
+++ b/src/xenia/cpu/cpu_flags.h
|
||
@@ -35,4 +35,32 @@ DECLARE_bool(break_condition_truncate);
|
||
|
||
DECLARE_bool(break_on_debugbreak);
|
||
|
||
+// AUDIT-DEMO smoke marker.
|
||
+DECLARE_bool(audit_demo_setup_trace);
|
||
+
|
||
+// AUDIT-061: multi-PC branch probe — emits one log line per fire with
|
||
+// (pc, lr, cr0 LGE, cr6 LGE, r3, r4, r5, r6, r31, tid). CSV of guest PCs.
|
||
+DECLARE_string(audit_61_branch_probe_pcs);
|
||
+
|
||
+// AUDIT-067: value-watch — emit a log line for each 32-bit guest store whose
|
||
+// value-to-be-stored matches any configured value. CSV of u32 values
|
||
+// ("0xDEADBEEF,..."), max 4 entries. Default empty (off); zero cost when empty.
|
||
+DECLARE_string(audit_67_value_watch);
|
||
+
|
||
+// Phase A: JSONL event-log emitter path. When non-empty, the engine writes
|
||
+// schema-v1 JSONL events to this file. Empty (default) = no overhead, no
|
||
+// behavior change. Schema: xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md
|
||
+DECLARE_string(phase_a_event_log_path);
|
||
+DECLARE_bool(phase_a_event_log_mem_writes);
|
||
+
|
||
+// Phase B: initial-state snapshot. When the dir cvar is non-empty, the
|
||
+// engine writes a five-file structured state snapshot (cpu_state.json,
|
||
+// memory.json, kernel.json, vfs.json, config.json, plus manifest.json) to
|
||
+// `<dir>/canary/` at the moment immediately before the first guest PPC
|
||
+// instruction of the XEX entry_point executes. See
|
||
+// `xenia-rs/audit-runs/phase-b-state-equivalence/`.
|
||
+DECLARE_string(phase_b_snapshot_dir);
|
||
+DECLARE_bool(phase_b_snapshot_and_exit);
|
||
+DECLARE_bool(phase_b_dump_section_content);
|
||
+
|
||
#endif // XENIA_CPU_CPU_FLAGS_H_
|
||
diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc
|
||
index cc7d90c2e..a8325a584 100644
|
||
--- a/src/xenia/kernel/xthread.cc
|
||
+++ b/src/xenia/kernel/xthread.cc
|
||
@@ -22,6 +22,7 @@
|
||
#include "xenia/cpu/processor.h"
|
||
#include "xenia/emulator.h"
|
||
#include "xenia/kernel/kernel_state.h"
|
||
+#include "xenia/kernel/phase_b_snapshot.h"
|
||
#include "xenia/kernel/user_module.h"
|
||
#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h"
|
||
|
||
@@ -575,6 +576,11 @@ void XThread::Execute() {
|
||
// On Windows, setjmp/longjmp is used because MSVC's longjmp performs SEH
|
||
// stack unwinding which already calls destructors.
|
||
uint32_t next_address;
|
||
+ // Phase B snapshot. No-op when phase_b_snapshot_dir cvar is empty
|
||
+ // (default). When set, fires once on the entry-point thread immediately
|
||
+ // before its first guest instruction executes. See
|
||
+ // xenia/kernel/phase_b_snapshot.h.
|
||
+ ::xe::kernel::phase_b::FireIfEntryThread(this, thread_state_, address);
|
||
#if !XE_PLATFORM_WIN32
|
||
try {
|
||
exit_code = static_cast<int>(kernel_state()->processor()->Execute(
|
||
|
||
--- a/src/xenia/kernel/phase_b_snapshot.h (NEW FILE)
|
||
+++ b/src/xenia/kernel/phase_b_snapshot.h
|
||
@@ -0,0 +1,43 @@
|
||
+/**
|
||
+ ******************************************************************************
|
||
+ * Xenia : Xbox 360 Emulator Research Project *
|
||
+ ******************************************************************************
|
||
+ * Phase B initial-state snapshot. Cvar-gated (default off).
|
||
+ * Spec: xenia-rs/audit-runs/phase-b-state-equivalence/
|
||
+ ******************************************************************************
|
||
+ */
|
||
+
|
||
+#ifndef XENIA_KERNEL_PHASE_B_SNAPSHOT_H_
|
||
+#define XENIA_KERNEL_PHASE_B_SNAPSHOT_H_
|
||
+
|
||
+#include <cstdint>
|
||
+
|
||
+namespace xe {
|
||
+namespace cpu {
|
||
+class ThreadState;
|
||
+} // namespace cpu
|
||
+namespace kernel {
|
||
+
|
||
+class XThread;
|
||
+
|
||
+namespace phase_b {
|
||
+
|
||
+// Called immediately before the JIT executes the first guest PPC
|
||
+// instruction of a thread. Returns silently when:
|
||
+// * phase_b_snapshot_dir cvar is empty (zero overhead — default off);
|
||
+// * a snapshot has already been written (one-shot CAS guard);
|
||
+// * `entry_address` does not match the loaded executable module's
|
||
+// entry_point (this thread is not the entry thread — a worker
|
||
+// spawned by an early kernel call could reach its first instruction
|
||
+// before the boot thread does).
|
||
+//
|
||
+// On a match: writes <dir>/canary/{cpu_state,memory,kernel,vfs,config}.json
|
||
+// + manifest.json, optionally `_Exit(0)` per phase_b_snapshot_and_exit.
|
||
+void FireIfEntryThread(XThread* xthread, cpu::ThreadState* thread_state,
|
||
+ uint32_t entry_address);
|
||
+
|
||
+} // namespace phase_b
|
||
+} // namespace kernel
|
||
+} // namespace xe
|
||
+
|
||
+#endif // XENIA_KERNEL_PHASE_B_SNAPSHOT_H_
|
||
|
||
--- a/src/xenia/kernel/phase_b_snapshot.cc (NEW FILE)
|
||
+++ b/src/xenia/kernel/phase_b_snapshot.cc
|
||
@@ -0,0 +1,899 @@
|
||
+/**
|
||
+ ******************************************************************************
|
||
+ * Xenia : Xbox 360 Emulator Research Project *
|
||
+ ******************************************************************************
|
||
+ * Phase B initial-state snapshot. See phase_b_snapshot.h.
|
||
+ ******************************************************************************
|
||
+ */
|
||
+
|
||
+#include "xenia/kernel/phase_b_snapshot.h"
|
||
+
|
||
+#include <algorithm>
|
||
+#include <atomic>
|
||
+#include <chrono>
|
||
+#include <cstdint>
|
||
+#include <cstdio>
|
||
+#include <cstdlib>
|
||
+#include <cstring>
|
||
+#include <filesystem>
|
||
+#include <map>
|
||
+#include <string>
|
||
+#include <vector>
|
||
+
|
||
+#include "third_party/crypto/sha256.h"
|
||
+#include "third_party/fmt/include/fmt/format.h"
|
||
+
|
||
+#include "xenia/base/cvar.h"
|
||
+#include "xenia/cpu/cpu_flags.h"
|
||
+#include "xenia/cpu/ppc/ppc_context.h"
|
||
+#include "xenia/cpu/thread_state.h"
|
||
+#include "xenia/kernel/kernel_state.h"
|
||
+#include "xenia/kernel/user_module.h"
|
||
+#include "xenia/kernel/util/object_table.h"
|
||
+#include "xenia/kernel/xobject.h"
|
||
+#include "xenia/kernel/xthread.h"
|
||
+#include "xenia/memory.h"
|
||
+#include "xenia/vfs/device.h"
|
||
+#include "xenia/vfs/entry.h"
|
||
+#include "xenia/vfs/virtual_file_system.h"
|
||
+
|
||
+namespace xe {
|
||
+namespace kernel {
|
||
+namespace phase_b {
|
||
+
|
||
+namespace {
|
||
+
|
||
+constexpr uint32_t kSchemaVersion = 1;
|
||
+constexpr const char* kEngineName = "canary";
|
||
+
|
||
+// One-shot guard. CAS-claim to ensure only the entry thread fires the
|
||
+// snapshot; release on guard-fail so a non-entry thread reaching its
|
||
+// first instruction first does not steal the shot.
|
||
+std::atomic<bool> g_claimed{false};
|
||
+std::atomic<bool> g_done{false};
|
||
+
|
||
+// ---------- string helpers ----------
|
||
+
|
||
+std::string JsonEscape(const std::string& s) {
|
||
+ std::string out;
|
||
+ out.reserve(s.size() + 2);
|
||
+ for (unsigned char c : s) {
|
||
+ if (c == '\\' || c == '"') {
|
||
+ out.push_back('\\');
|
||
+ out.push_back(static_cast<char>(c));
|
||
+ } else if (c == '\n') {
|
||
+ out += "\\n";
|
||
+ } else if (c == '\r') {
|
||
+ out += "\\r";
|
||
+ } else if (c == '\t') {
|
||
+ out += "\\t";
|
||
+ } else if (c < 0x20) {
|
||
+ out += fmt::format("\\u{:04x}", c);
|
||
+ } else {
|
||
+ out.push_back(static_cast<char>(c));
|
||
+ }
|
||
+ }
|
||
+ return out;
|
||
+}
|
||
+
|
||
+std::string Hex32(uint32_t v) { return fmt::format("\"0x{:08x}\"", v); }
|
||
+std::string Hex64(uint64_t v) { return fmt::format("\"0x{:016x}\"", v); }
|
||
+
|
||
+std::string Sha256Hex(const uint8_t* data, size_t len) {
|
||
+ ::sha256::SHA256 h;
|
||
+ h.add(data, len);
|
||
+ return h.getHash();
|
||
+}
|
||
+
|
||
+// Stream-style writer that produces newline-indented JSON with sorted keys.
|
||
+// We build a small tree first then serialize, so ordering is deterministic
|
||
+// independent of any std::unordered_map iteration order.
|
||
+class JsonNode {
|
||
+ public:
|
||
+ enum class Kind { Null, Bool, Int, UInt, IntStr, Str, Array, Object, Raw };
|
||
+ JsonNode() : kind_(Kind::Null) {}
|
||
+
|
||
+ static JsonNode Null() { JsonNode n; n.kind_ = Kind::Null; return n; }
|
||
+ static JsonNode Boolean(bool b) {
|
||
+ JsonNode n;
|
||
+ n.kind_ = Kind::Bool;
|
||
+ n.bool_ = b;
|
||
+ return n;
|
||
+ }
|
||
+ static JsonNode Integer(int64_t i) {
|
||
+ JsonNode n;
|
||
+ n.kind_ = Kind::Int;
|
||
+ n.int_ = i;
|
||
+ return n;
|
||
+ }
|
||
+ static JsonNode Unsigned(uint64_t u) {
|
||
+ JsonNode n;
|
||
+ n.kind_ = Kind::UInt;
|
||
+ n.uint_ = u;
|
||
+ return n;
|
||
+ }
|
||
+ // Pre-formatted JSON literal (e.g. `"0x..."`, raw object/array source).
|
||
+ static JsonNode Raw(std::string s) {
|
||
+ JsonNode n;
|
||
+ n.kind_ = Kind::Raw;
|
||
+ n.str_ = std::move(s);
|
||
+ return n;
|
||
+ }
|
||
+ static JsonNode String(std::string s) {
|
||
+ JsonNode n;
|
||
+ n.kind_ = Kind::Str;
|
||
+ n.str_ = std::move(s);
|
||
+ return n;
|
||
+ }
|
||
+ static JsonNode Array(std::vector<JsonNode> v) {
|
||
+ JsonNode n;
|
||
+ n.kind_ = Kind::Array;
|
||
+ n.array_ = std::move(v);
|
||
+ return n;
|
||
+ }
|
||
+ static JsonNode Object() {
|
||
+ JsonNode n;
|
||
+ n.kind_ = Kind::Object;
|
||
+ return n;
|
||
+ }
|
||
+ // Object that preserves insertion order (used at the top level of files,
|
||
+ // where the user-facing key ordering is canonical).
|
||
+ static JsonNode OrderedObject() {
|
||
+ JsonNode n;
|
||
+ n.kind_ = Kind::Object;
|
||
+ n.ordered_ = true;
|
||
+ return n;
|
||
+ }
|
||
+
|
||
+ void Set(const std::string& key, JsonNode v) {
|
||
+ obj_[key] = std::move(v);
|
||
+ if (ordered_) ordered_keys_.push_back(key);
|
||
+ }
|
||
+
|
||
+ void Serialize(std::string& out, int indent = 0) const {
|
||
+ auto pad = [&](int n) {
|
||
+ out.append(static_cast<size_t>(n * 2), ' ');
|
||
+ };
|
||
+ switch (kind_) {
|
||
+ case Kind::Null:
|
||
+ out += "null";
|
||
+ break;
|
||
+ case Kind::Bool:
|
||
+ out += bool_ ? "true" : "false";
|
||
+ break;
|
||
+ case Kind::Int:
|
||
+ out += std::to_string(int_);
|
||
+ break;
|
||
+ case Kind::UInt:
|
||
+ out += std::to_string(uint_);
|
||
+ break;
|
||
+ case Kind::Raw:
|
||
+ out += str_;
|
||
+ break;
|
||
+ case Kind::Str:
|
||
+ out.push_back('"');
|
||
+ out += JsonEscape(str_);
|
||
+ out.push_back('"');
|
||
+ break;
|
||
+ case Kind::Array: {
|
||
+ if (array_.empty()) {
|
||
+ out += "[]";
|
||
+ break;
|
||
+ }
|
||
+ out += "[\n";
|
||
+ for (size_t i = 0; i < array_.size(); ++i) {
|
||
+ pad(indent + 1);
|
||
+ array_[i].Serialize(out, indent + 1);
|
||
+ if (i + 1 < array_.size()) out += ",";
|
||
+ out += "\n";
|
||
+ }
|
||
+ pad(indent);
|
||
+ out += "]";
|
||
+ break;
|
||
+ }
|
||
+ case Kind::Object: {
|
||
+ if (obj_.empty()) {
|
||
+ out += "{}";
|
||
+ break;
|
||
+ }
|
||
+ out += "{\n";
|
||
+ std::vector<std::string> keys;
|
||
+ if (ordered_) {
|
||
+ keys = ordered_keys_;
|
||
+ } else {
|
||
+ keys.reserve(obj_.size());
|
||
+ for (const auto& [k, _] : obj_) keys.push_back(k);
|
||
+ std::sort(keys.begin(), keys.end());
|
||
+ }
|
||
+ for (size_t i = 0; i < keys.size(); ++i) {
|
||
+ pad(indent + 1);
|
||
+ out.push_back('"');
|
||
+ out += JsonEscape(keys[i]);
|
||
+ out += "\": ";
|
||
+ obj_.at(keys[i]).Serialize(out, indent + 1);
|
||
+ if (i + 1 < keys.size()) out += ",";
|
||
+ out += "\n";
|
||
+ }
|
||
+ pad(indent);
|
||
+ out += "}";
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ private:
|
||
+ Kind kind_;
|
||
+ bool bool_ = false;
|
||
+ int64_t int_ = 0;
|
||
+ uint64_t uint_ = 0;
|
||
+ std::string str_;
|
||
+ std::vector<JsonNode> array_;
|
||
+ std::map<std::string, JsonNode> obj_;
|
||
+ bool ordered_ = false;
|
||
+ std::vector<std::string> ordered_keys_;
|
||
+};
|
||
+
|
||
+// Sync-then-fclose helper. Returns SHA-256 of the file's bytes.
|
||
+std::string WriteFileAndHash(const std::filesystem::path& path,
|
||
+ const std::string& content) {
|
||
+ std::FILE* f = std::fopen(path.string().c_str(), "wb");
|
||
+ if (!f) {
|
||
+ return std::string(64, '0');
|
||
+ }
|
||
+ std::fwrite(content.data(), 1, content.size(), f);
|
||
+ std::fflush(f);
|
||
+#if defined(_MSC_VER)
|
||
+ // Best effort on Windows — _commit takes a file descriptor.
|
||
+ // fmt:omit on cross-build to avoid Win32-only headers in this TU.
|
||
+#else
|
||
+ // Unix-style fsync would go here; skipped to keep deps minimal in this TU.
|
||
+#endif
|
||
+ std::fclose(f);
|
||
+ return Sha256Hex(reinterpret_cast<const uint8_t*>(content.data()),
|
||
+ content.size());
|
||
+}
|
||
+
|
||
+// ---------- cpu_state.json ----------
|
||
+
|
||
+JsonNode BuildCpuState(XThread* xthread, cpu::ThreadState* thread_state,
|
||
+ uint32_t entry_pc) {
|
||
+ auto* ctx = thread_state->context();
|
||
+ auto root = JsonNode::OrderedObject();
|
||
+ root.Set("schema_version", JsonNode::Unsigned(kSchemaVersion));
|
||
+ root.Set("engine", JsonNode::String(kEngineName));
|
||
+ // Canary's PPCContext doesn't track PC explicitly — the JIT dispatch
|
||
+ // loop owns it. At the snapshot point, the about-to-execute PC equals
|
||
+ // the `entry_pc` arg passed to FireIfEntryThread.
|
||
+ root.Set("pc", JsonNode::Raw(Hex32(entry_pc)));
|
||
+ root.Set("lr", JsonNode::Raw(Hex64(ctx->lr)));
|
||
+ root.Set("ctr", JsonNode::Raw(Hex64(ctx->ctr)));
|
||
+ root.Set("msr", JsonNode::Raw(Hex64(ctx->msr)));
|
||
+ root.Set("vrsave", JsonNode::Raw(Hex32(ctx->vrsave)));
|
||
+ root.Set("fpscr", JsonNode::Raw(Hex32(ctx->fpscr.value)));
|
||
+
|
||
+ auto xer = JsonNode::Object();
|
||
+ xer.Set("ca", JsonNode::Unsigned(ctx->xer_ca));
|
||
+ xer.Set("ov", JsonNode::Unsigned(ctx->xer_ov));
|
||
+ xer.Set("so", JsonNode::Unsigned(ctx->xer_so));
|
||
+ // tbc is not modelled per-field in canary's PPCContext; emit 0.
|
||
+ xer.Set("tbc", JsonNode::Unsigned(0));
|
||
+ root.Set("xer", std::move(xer));
|
||
+
|
||
+ // CR as 8 nibbles 0xN. Diff tool compares array positionally.
|
||
+ std::vector<JsonNode> cr_arr;
|
||
+ cr_arr.reserve(8);
|
||
+ uint64_t cr = ctx->cr();
|
||
+ for (int i = 0; i < 8; ++i) {
|
||
+ uint32_t nibble = (cr >> (28 - i * 4)) & 0xF;
|
||
+ cr_arr.push_back(JsonNode::Raw(fmt::format("\"0x{:x}\"", nibble)));
|
||
+ }
|
||
+ root.Set("cr", JsonNode::Array(std::move(cr_arr)));
|
||
+
|
||
+ std::vector<JsonNode> gpr;
|
||
+ gpr.reserve(32);
|
||
+ for (int i = 0; i < 32; ++i) {
|
||
+ gpr.push_back(JsonNode::Raw(Hex64(ctx->r[i])));
|
||
+ }
|
||
+ root.Set("gpr", JsonNode::Array(std::move(gpr)));
|
||
+
|
||
+ std::vector<JsonNode> fpr;
|
||
+ fpr.reserve(32);
|
||
+ for (int i = 0; i < 32; ++i) {
|
||
+ uint64_t bits = 0;
|
||
+ std::memcpy(&bits, &ctx->f[i], sizeof(bits));
|
||
+ fpr.push_back(JsonNode::Raw(Hex64(bits)));
|
||
+ }
|
||
+ root.Set("fpr", JsonNode::Array(std::move(fpr)));
|
||
+
|
||
+ // Emit 32 hex chars of the raw 16 bytes (byte 0 first). Ours uses
|
||
+ // big-endian-stored bytes; canary's union exposes u8[16] in the same
|
||
+ // host order. Emitting bytes[0]..bytes[15] keeps both engines' VR
|
||
+ // serializations directly comparable.
|
||
+ std::vector<JsonNode> vr;
|
||
+ vr.reserve(128);
|
||
+ for (int i = 0; i < 128; ++i) {
|
||
+ std::string s;
|
||
+ s.reserve(32);
|
||
+ for (int j = 0; j < 16; ++j) {
|
||
+ s += fmt::format("{:02x}", ctx->v[i].u8[j]);
|
||
+ }
|
||
+ vr.push_back(JsonNode::String(std::move(s)));
|
||
+ }
|
||
+ root.Set("vr", JsonNode::Array(std::move(vr)));
|
||
+ std::string vscr_s;
|
||
+ vscr_s.reserve(32);
|
||
+ for (int j = 0; j < 16; ++j) {
|
||
+ vscr_s += fmt::format("{:02x}", ctx->vscr_vec.u8[j]);
|
||
+ }
|
||
+ root.Set("vscr", JsonNode::String(std::move(vscr_s)));
|
||
+
|
||
+ root.Set("thread_id", JsonNode::Unsigned(xthread ? xthread->thread_id() : 0));
|
||
+ root.Set("hw_id", JsonNode::Unsigned(0));
|
||
+ root.Set("stack_base",
|
||
+ JsonNode::Raw(Hex32(xthread ? xthread->stack_base() : 0)));
|
||
+ root.Set("stack_limit",
|
||
+ JsonNode::Raw(Hex32(xthread ? xthread->stack_limit() : 0)));
|
||
+ root.Set("tls_base",
|
||
+ JsonNode::Raw(Hex32(xthread ? xthread->tls_ptr() : 0)));
|
||
+ root.Set("pcr_base",
|
||
+ JsonNode::Raw(Hex32(xthread ? xthread->pcr_ptr() : 0)));
|
||
+
|
||
+ std::vector<JsonNode> det_skip;
|
||
+ det_skip.push_back(JsonNode::String("hw_id"));
|
||
+ root.Set("deterministic_skip", JsonNode::Array(std::move(det_skip)));
|
||
+ return root;
|
||
+}
|
||
+
|
||
+// ---------- memory.json ----------
|
||
+
|
||
+struct CommittedRegion {
|
||
+ uint32_t start;
|
||
+ uint32_t end;
|
||
+ uint32_t protect;
|
||
+ std::string sha256;
|
||
+};
|
||
+
|
||
+void WalkHeapRegions(Memory* memory, uint32_t heap_base_addr,
|
||
+ std::vector<CommittedRegion>& out_regions,
|
||
+ std::map<std::string, uint64_t>& out_hist) {
|
||
+ auto* heap = memory->LookupHeap(heap_base_addr);
|
||
+ if (!heap) return;
|
||
+ const uint32_t heap_base = heap->heap_base();
|
||
+ const uint32_t heap_size = heap->heap_size();
|
||
+ const uint32_t page_size = heap->page_size();
|
||
+ // Read bytes via `virtual_membase + guest_address`. This is sound for
|
||
+ // the four guest-virtual heaps (0x00/0x40/0x80/0x90); physical heaps
|
||
+ // (0xA0/0xC0/0xE0) mirror physical_membase and can include host pages
|
||
+ // that are reserved but not backed at boot — reading them faults.
|
||
+ // Phase B only walks virtual heaps; the caller filters which bases
|
||
+ // to probe.
|
||
+ uint8_t* membase = memory->virtual_membase();
|
||
+ uint32_t cursor = heap_base;
|
||
+ uint32_t end = heap_base + heap_size;
|
||
+ while (cursor < end) {
|
||
+ HeapAllocationInfo info;
|
||
+ if (!heap->QueryRegionInfo(cursor, &info)) break;
|
||
+ if (info.region_size == 0) {
|
||
+ cursor += page_size;
|
||
+ continue;
|
||
+ }
|
||
+ if (info.state == 0) {
|
||
+ out_hist["free"] += info.region_size / page_size;
|
||
+ } else if ((info.state & 0x2) != 0) { // kMemoryAllocationCommit
|
||
+ out_hist["committed"] += info.region_size / page_size;
|
||
+ // Hash region contents from virtual_membase + cursor.
|
||
+ std::string h = membase ? Sha256Hex(membase + cursor, info.region_size)
|
||
+ : std::string(64, '0');
|
||
+ CommittedRegion r;
|
||
+ r.start = cursor;
|
||
+ r.end = cursor + info.region_size;
|
||
+ r.protect = info.protect;
|
||
+ r.sha256 = h;
|
||
+ out_regions.push_back(r);
|
||
+ } else {
|
||
+ out_hist["reserved"] += info.region_size / page_size;
|
||
+ }
|
||
+ cursor += info.region_size;
|
||
+ }
|
||
+}
|
||
+
|
||
+JsonNode BuildMemory(KernelState* kstate, bool dump_section_content) {
|
||
+ Memory* memory = kstate->memory();
|
||
+ auto root = JsonNode::OrderedObject();
|
||
+ root.Set("schema_version", JsonNode::Unsigned(kSchemaVersion));
|
||
+ root.Set("engine", JsonNode::String(kEngineName));
|
||
+ root.Set("page_size", JsonNode::Unsigned(4096));
|
||
+ root.Set("guest_address_space_bytes",
|
||
+ JsonNode::Unsigned(uint64_t{0x100000000}));
|
||
+
|
||
+ // Phase B walks a FIXED set of named regions whose host backing is
|
||
+ // guaranteed live at entry_point time: the XEX image, the entry
|
||
+ // thread's stack, its PCR, its TLS block. A blanket "walk every
|
||
+ // committed page across all heaps" approach is unsafe because
|
||
+ // canary's `QueryRegionInfo` reports `state=COMMIT` for pages whose
|
||
+ // host mapping may still be lazy (Windows reserved-but-not-committed,
|
||
+ // physical heap mirrors with unmapped backing). Reading those host
|
||
+ // VAs faults — see Wine page-fault during initial bring-up.
|
||
+ //
|
||
+ // Named regions are sufficient for Phase B's purpose (catalog
|
||
+ // divergences at the snapshot point); the diff tool compares the
|
||
+ // ordered list, so any region present in one engine and absent in
|
||
+ // the other is a σ-structural divergence.
|
||
+ uint8_t* membase = memory->virtual_membase();
|
||
+ std::vector<CommittedRegion> all_regions;
|
||
+ std::map<std::string, uint64_t> global_hist;
|
||
+ auto hash_named_region = [&](uint32_t start, uint32_t size) {
|
||
+ if (size == 0 || !membase) return;
|
||
+ std::string h = Sha256Hex(membase + start, size);
|
||
+ CommittedRegion r;
|
||
+ r.start = start;
|
||
+ r.end = start + size;
|
||
+ r.protect = 0;
|
||
+ r.sha256 = h;
|
||
+ all_regions.push_back(r);
|
||
+ global_hist["committed"] += size / 4096;
|
||
+ };
|
||
+
|
||
+ // 1. XEX image.
|
||
+ if (auto exec_module = kstate->GetExecutableModule()) {
|
||
+ uint32_t image_base = exec_module->xex_module()->base_address();
|
||
+ uint32_t image_size = exec_module->xex_module()->image_size();
|
||
+ if (image_base && image_size) {
|
||
+ hash_named_region(image_base, image_size);
|
||
+ }
|
||
+ }
|
||
+ // 2. Entry thread's stack + PCR + TLS — accessed via the XThread
|
||
+ // that's about to execute (resolved from the snapshot helper's
|
||
+ // arguments by passing a small accessor).
|
||
+ if (auto* xthread = XThread::GetCurrentThread()) {
|
||
+ uint32_t stack_base = xthread->stack_base();
|
||
+ uint32_t stack_limit = xthread->stack_limit();
|
||
+ if (stack_base > stack_limit) {
|
||
+ hash_named_region(stack_limit, stack_base - stack_limit);
|
||
+ }
|
||
+ uint32_t pcr = xthread->pcr_ptr();
|
||
+ if (pcr) {
|
||
+ hash_named_region(pcr, 0x1000);
|
||
+ }
|
||
+ uint32_t tls = xthread->tls_ptr();
|
||
+ if (tls) {
|
||
+ hash_named_region(tls, 0x1000);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ // Heap descriptors — emit the four virtual heaps' bounds. Histograms
|
||
+ // come from QueryRegionInfo (which is safe to call — it doesn't read
|
||
+ // backing pages).
|
||
+ const uint32_t heap_probes[] = {
|
||
+ 0x00000000u, 0x40000000u, 0x80000000u, 0x90000000u,
|
||
+ };
|
||
+ std::vector<JsonNode> heaps_arr;
|
||
+ for (uint32_t base : heap_probes) {
|
||
+ auto* heap = memory->LookupHeap(base);
|
||
+ if (!heap) continue;
|
||
+ std::map<std::string, uint64_t> hist;
|
||
+ uint32_t cursor = heap->heap_base();
|
||
+ uint32_t hend = heap->heap_base() + heap->heap_size();
|
||
+ while (cursor < hend) {
|
||
+ HeapAllocationInfo info;
|
||
+ if (!heap->QueryRegionInfo(cursor, &info)) break;
|
||
+ if (info.region_size == 0) {
|
||
+ cursor += heap->page_size();
|
||
+ continue;
|
||
+ }
|
||
+ if (info.state == 0) {
|
||
+ hist["free"] += info.region_size / heap->page_size();
|
||
+ } else if ((info.state & 0x2) != 0) {
|
||
+ hist["committed"] += info.region_size / heap->page_size();
|
||
+ } else {
|
||
+ hist["reserved"] += info.region_size / heap->page_size();
|
||
+ }
|
||
+ cursor += info.region_size;
|
||
+ }
|
||
+ for (const auto& [k, v] : hist) global_hist[k] += v;
|
||
+ auto heap_obj = JsonNode::Object();
|
||
+ heap_obj.Set("name", JsonNode::String(fmt::format("v{:08x}", base)));
|
||
+ heap_obj.Set("base", JsonNode::Raw(Hex32(heap->heap_base())));
|
||
+ heap_obj.Set("size", JsonNode::Raw(Hex32(heap->heap_size())));
|
||
+ heap_obj.Set("page_size", JsonNode::Unsigned(heap->page_size()));
|
||
+ auto hist_obj = JsonNode::Object();
|
||
+ for (const auto& [k, v] : hist) {
|
||
+ hist_obj.Set(k, JsonNode::Unsigned(v));
|
||
+ }
|
||
+ heap_obj.Set("page_state_histogram", std::move(hist_obj));
|
||
+ heaps_arr.push_back(std::move(heap_obj));
|
||
+ }
|
||
+ root.Set("heaps", JsonNode::Array(std::move(heaps_arr)));
|
||
+
|
||
+ // Sort regions by (start, end).
|
||
+ std::sort(all_regions.begin(), all_regions.end(),
|
||
+ [](const CommittedRegion& a, const CommittedRegion& b) {
|
||
+ if (a.start != b.start) return a.start < b.start;
|
||
+ return a.end < b.end;
|
||
+ });
|
||
+ uint64_t committed_pages = 0;
|
||
+ std::vector<JsonNode> regions_arr;
|
||
+ regions_arr.reserve(all_regions.size());
|
||
+ for (const auto& r : all_regions) {
|
||
+ auto ro = JsonNode::Object();
|
||
+ ro.Set("start", JsonNode::Raw(Hex32(r.start)));
|
||
+ ro.Set("end", JsonNode::Raw(Hex32(r.end)));
|
||
+ ro.Set("byte_count", JsonNode::Unsigned(r.end - r.start));
|
||
+ ro.Set("protect", JsonNode::Unsigned(r.protect));
|
||
+ ro.Set("sha256", JsonNode::String(r.sha256));
|
||
+ ro.Set("section_kind", JsonNode::Null());
|
||
+ regions_arr.push_back(std::move(ro));
|
||
+ committed_pages += (r.end - r.start) / 4096;
|
||
+ }
|
||
+ root.Set("regions", JsonNode::Array(std::move(regions_arr)));
|
||
+ root.Set("committed_pages_total", JsonNode::Unsigned(committed_pages));
|
||
+
|
||
+ if (dump_section_content) {
|
||
+ std::vector<JsonNode> sec;
|
||
+ for (const auto& r : all_regions) {
|
||
+ auto so = JsonNode::Object();
|
||
+ so.Set("start", JsonNode::Raw(Hex32(r.start)));
|
||
+ so.Set("end", JsonNode::Raw(Hex32(r.end)));
|
||
+ so.Set("sha256", JsonNode::String(r.sha256));
|
||
+ so.Set("content_b64", JsonNode::String("")); // Stubbed.
|
||
+ sec.push_back(std::move(so));
|
||
+ }
|
||
+ root.Set("section_contents", JsonNode::Array(std::move(sec)));
|
||
+ } else {
|
||
+ root.Set("section_contents", JsonNode::Null());
|
||
+ }
|
||
+
|
||
+ std::vector<JsonNode> det_skip;
|
||
+ det_skip.push_back(JsonNode::String("host_base_pointer"));
|
||
+ root.Set("deterministic_skip", JsonNode::Array(std::move(det_skip)));
|
||
+ return root;
|
||
+}
|
||
+
|
||
+// ---------- kernel.json ----------
|
||
+
|
||
+const char* TypeName(XObject::Type t) {
|
||
+ switch (t) {
|
||
+ case XObject::Type::Event: return "Event";
|
||
+ case XObject::Type::Mutant: return "Mutant";
|
||
+ case XObject::Type::Semaphore: return "Semaphore";
|
||
+ case XObject::Type::Thread: return "Thread";
|
||
+ case XObject::Type::Timer: return "Timer";
|
||
+ case XObject::Type::File: return "File";
|
||
+ case XObject::Type::IOCompletion: return "IOCompletion";
|
||
+ case XObject::Type::Module: return "Module";
|
||
+ case XObject::Type::Enumerator: return "Enumerator";
|
||
+ case XObject::Type::NotifyListener: return "NotifyListener";
|
||
+ case XObject::Type::Session: return "Session";
|
||
+ case XObject::Type::Socket: return "Socket";
|
||
+ case XObject::Type::SymbolicLink: return "SymbolicLink";
|
||
+ case XObject::Type::Device: return "Device";
|
||
+ case XObject::Type::Undefined: return "Undefined";
|
||
+ }
|
||
+ return "Undefined";
|
||
+}
|
||
+
|
||
+uint32_t TypeCode(XObject::Type t) {
|
||
+ switch (t) {
|
||
+ case XObject::Type::Event: return 0x01;
|
||
+ case XObject::Type::Mutant: return 0x02;
|
||
+ case XObject::Type::Semaphore: return 0x03;
|
||
+ case XObject::Type::Timer: return 0x04;
|
||
+ case XObject::Type::Thread: return 0x05;
|
||
+ case XObject::Type::File: return 0x06;
|
||
+ case XObject::Type::IOCompletion: return 0x07;
|
||
+ case XObject::Type::Module: return 0x08;
|
||
+ case XObject::Type::Enumerator: return 0x09;
|
||
+ case XObject::Type::NotifyListener: return 0x0B;
|
||
+ default: return 0x00;
|
||
+ }
|
||
+}
|
||
+
|
||
+// FNV-1a 64-bit semantic-id, matching event_log.cc::ComputeSemanticId.
|
||
+// At snapshot time we don't have a meaningful create_site_pc/create_tid/
|
||
+// create_idx tuple for every object (they were minted before Phase B
|
||
+// instrumentation existed), so fall back to a stable identity hash over
|
||
+// (object_type, primary_handle). This is consistent across runs of the
|
||
+// same engine; diff tool compares semantic IDs across engines only when
|
||
+// both sides also stamp the same identity inputs. For Phase B's purposes
|
||
+// (initial-state snapshot), the object population is tiny (≤ 2 entries
|
||
+// at entry-point time: the main thread, plus an executable module ref),
|
||
+// so a simple stable hash suffices.
|
||
+uint64_t StableObjectId(uint32_t type_code, uint32_t raw_handle) {
|
||
+ uint8_t bytes[8];
|
||
+ for (int i = 0; i < 4; ++i) bytes[i] = (type_code >> (i * 8)) & 0xFF;
|
||
+ for (int i = 0; i < 4; ++i) bytes[4 + i] = (raw_handle >> (i * 8)) & 0xFF;
|
||
+ uint64_t h = 0xCBF29CE484222325ULL;
|
||
+ for (int i = 0; i < 8; ++i) {
|
||
+ h ^= bytes[i];
|
||
+ h *= 0x100000001B3ULL;
|
||
+ }
|
||
+ return h;
|
||
+}
|
||
+
|
||
+JsonNode BuildKernel(KernelState* kstate, uint32_t entry_pc) {
|
||
+ auto root = JsonNode::OrderedObject();
|
||
+ root.Set("schema_version", JsonNode::Unsigned(kSchemaVersion));
|
||
+ root.Set("engine", JsonNode::String(kEngineName));
|
||
+
|
||
+ auto objects = kstate->object_table()->GetAllObjects();
|
||
+ // Sort by semantic id for set-equivalence.
|
||
+ struct OneObj {
|
||
+ uint64_t sid;
|
||
+ JsonNode node;
|
||
+ };
|
||
+ std::vector<OneObj> entries;
|
||
+ for (auto& o : objects) {
|
||
+ uint32_t tc = TypeCode(o->type());
|
||
+ uint32_t rh = o->handle();
|
||
+ uint64_t sid = StableObjectId(tc, rh);
|
||
+ auto n = JsonNode::Object();
|
||
+ n.Set("handle_semantic_id", JsonNode::String(fmt::format("{:016x}", sid)));
|
||
+ n.Set("raw_handle_id", JsonNode::Raw(Hex32(rh)));
|
||
+ n.Set("type", JsonNode::String(TypeName(o->type())));
|
||
+ n.Set("type_code", JsonNode::Unsigned(tc));
|
||
+ n.Set("name", o->name().empty() ? JsonNode::Null()
|
||
+ : JsonNode::String(o->name()));
|
||
+ auto details = JsonNode::Object();
|
||
+ if (o->type() == XObject::Type::Thread) {
|
||
+ auto* th = reinterpret_cast<XThread*>(o.get());
|
||
+ details.Set("thread_id", JsonNode::Unsigned(th->thread_id()));
|
||
+ details.Set("is_entry_thread",
|
||
+ JsonNode::Boolean(
|
||
+ th->main_thread() ||
|
||
+ (th->creation_params() &&
|
||
+ th->creation_params()->start_address == entry_pc)));
|
||
+ details.Set("priority", JsonNode::Integer(th->priority()));
|
||
+ details.Set(
|
||
+ "stack_size",
|
||
+ JsonNode::Unsigned(th->creation_params()
|
||
+ ? th->creation_params()->stack_size
|
||
+ : 0));
|
||
+ details.Set("entry_pc",
|
||
+ JsonNode::Raw(Hex32(th->creation_params()
|
||
+ ? th->creation_params()->start_address
|
||
+ : 0)));
|
||
+ details.Set("ctx_ptr",
|
||
+ JsonNode::Raw(Hex32(th->creation_params()
|
||
+ ? th->creation_params()->start_context
|
||
+ : 0)));
|
||
+ details.Set("suspended", JsonNode::Boolean(false));
|
||
+ }
|
||
+ n.Set("details", std::move(details));
|
||
+ entries.push_back({sid, std::move(n)});
|
||
+ }
|
||
+ std::sort(entries.begin(), entries.end(),
|
||
+ [](const OneObj& a, const OneObj& b) { return a.sid < b.sid; });
|
||
+ std::vector<JsonNode> obj_arr;
|
||
+ obj_arr.reserve(entries.size());
|
||
+ for (auto& e : entries) obj_arr.push_back(std::move(e.node));
|
||
+ root.Set("objects", JsonNode::Array(std::move(obj_arr)));
|
||
+
|
||
+ // We don't enumerate handle_name_table / notification_listeners /
|
||
+ // exports — accessors are not public. Emit empty arrays so the diff
|
||
+ // tool's structural check still has the field present.
|
||
+ root.Set("handle_name_table", JsonNode::Array({}));
|
||
+ root.Set("notification_listeners", JsonNode::Array({}));
|
||
+ root.Set("exports_registered_count", JsonNode::Unsigned(0));
|
||
+ root.Set("exports_registered_sample", JsonNode::Array({}));
|
||
+ root.Set("exports_registered_sha256",
|
||
+ JsonNode::String(std::string(64, '0')));
|
||
+
|
||
+ std::vector<JsonNode> det_skip;
|
||
+ det_skip.push_back(JsonNode::String("raw_handle_id"));
|
||
+ det_skip.push_back(JsonNode::String("exports_registered_count"));
|
||
+ root.Set("deterministic_skip", JsonNode::Array(std::move(det_skip)));
|
||
+ return root;
|
||
+}
|
||
+
|
||
+// ---------- vfs.json ----------
|
||
+
|
||
+JsonNode BuildVfs(KernelState* kstate) {
|
||
+ auto root = JsonNode::OrderedObject();
|
||
+ root.Set("schema_version", JsonNode::Unsigned(kSchemaVersion));
|
||
+ root.Set("engine", JsonNode::String(kEngineName));
|
||
+
|
||
+ auto* fs = kstate->file_system();
|
||
+ // VirtualFileSystem doesn't expose its `devices_` vector or `symlinks_`
|
||
+ // map publicly. To stay additive (no canary-core API surface changes),
|
||
+ // we probe a canonical set of paths via ResolvePath and report only
|
||
+ // what we can observe. Diff tool sorts mounts_observed by path.
|
||
+ std::vector<std::string> probe_paths = {
|
||
+ "\\Device\\Cdrom0",
|
||
+ "\\Device\\Cdrom0\\default.xex",
|
||
+ "\\Device\\Cdrom0\\dat",
|
||
+ "\\Device\\Cdrom0\\dat\\movie",
|
||
+ "\\Device\\Cdrom0\\dat\\movie\\opening.bik",
|
||
+ "game:\\default.xex",
|
||
+ "game:\\dat",
|
||
+ "cache:\\",
|
||
+ "cache:\\nonexistent_probe",
|
||
+ "\\Device\\HardDisk0\\Partition1",
|
||
+ };
|
||
+ std::sort(probe_paths.begin(), probe_paths.end());
|
||
+ std::vector<JsonNode> probes;
|
||
+ for (const auto& path : probe_paths) {
|
||
+ auto entry = fs->ResolvePath(path);
|
||
+ auto o = JsonNode::Object();
|
||
+ o.Set("path", JsonNode::String(path));
|
||
+ o.Set("resolved", JsonNode::Boolean(entry != nullptr));
|
||
+ if (entry) {
|
||
+ o.Set("is_directory",
|
||
+ JsonNode::Boolean((entry->attributes() & 0x10) != 0)); // FILE_ATTR_DIRECTORY
|
||
+ o.Set("size", JsonNode::Unsigned(entry->size()));
|
||
+ } else {
|
||
+ o.Set("is_directory", JsonNode::Null());
|
||
+ o.Set("size", JsonNode::Null());
|
||
+ }
|
||
+ probes.push_back(std::move(o));
|
||
+ }
|
||
+ root.Set("resolve_path_probes", JsonNode::Array(std::move(probes)));
|
||
+
|
||
+ // Mounts observed: report only what `ResolvePath` saw against the
|
||
+ // device prefixes we know about. The data is derived, not enumerated,
|
||
+ // so this is safe under future-canary device additions.
|
||
+ root.Set("mounted_devices_observed_count",
|
||
+ JsonNode::Unsigned(
|
||
+ (fs->ResolvePath("\\Device\\Cdrom0") != nullptr ? 1u : 0u)));
|
||
+
|
||
+ root.Set("cache_root_listing", JsonNode::Array({}));
|
||
+ std::vector<JsonNode> det_skip;
|
||
+ det_skip.push_back(JsonNode::String("host_path_realpath"));
|
||
+ root.Set("deterministic_skip", JsonNode::Array(std::move(det_skip)));
|
||
+ return root;
|
||
+}
|
||
+
|
||
+// ---------- config.json ----------
|
||
+
|
||
+JsonNode BuildConfig(KernelState* kstate, uint32_t entry_pc) {
|
||
+ auto root = JsonNode::OrderedObject();
|
||
+ root.Set("schema_version", JsonNode::Unsigned(kSchemaVersion));
|
||
+ root.Set("engine", JsonNode::String(kEngineName));
|
||
+ root.Set("build_id", JsonNode::String("canary-phaseB"));
|
||
+
|
||
+ auto exec_module = kstate->GetExecutableModule();
|
||
+ uint32_t image_base = 0;
|
||
+ uint32_t image_size = 0;
|
||
+ std::string image_loaded_sha = std::string(64, '0');
|
||
+ std::string xex_header_sha = std::string(64, '0');
|
||
+ std::string iso_path_str;
|
||
+ if (exec_module) {
|
||
+ image_base = exec_module->xex_module()->base_address();
|
||
+ image_size = exec_module->xex_module()->image_size();
|
||
+ iso_path_str = exec_module->path();
|
||
+ uint8_t* host =
|
||
+ kstate->memory()->TranslateVirtual<uint8_t*>(image_base);
|
||
+ if (host && image_size > 0) {
|
||
+ image_loaded_sha = Sha256Hex(host, image_size);
|
||
+ }
|
||
+ if (exec_module->hash()) {
|
||
+ xex_header_sha = fmt::format("{:016x}", *exec_module->hash());
|
||
+ }
|
||
+ }
|
||
+ root.Set("iso_path", JsonNode::String(iso_path_str));
|
||
+ root.Set("xex_entry_point", JsonNode::Raw(Hex32(entry_pc)));
|
||
+ root.Set("xex_image_base", JsonNode::Raw(Hex32(image_base)));
|
||
+ root.Set("xex_image_size", JsonNode::Unsigned(image_size));
|
||
+ root.Set("image_loaded_sha256", JsonNode::String(image_loaded_sha));
|
||
+ root.Set("xex_header_sha256", JsonNode::String(xex_header_sha));
|
||
+
|
||
+ auto cvars = JsonNode::Object();
|
||
+ cvars.Set("phase_b_snapshot_dir",
|
||
+ JsonNode::String(cvars::phase_b_snapshot_dir));
|
||
+ cvars.Set("phase_b_snapshot_and_exit",
|
||
+ JsonNode::Boolean(cvars::phase_b_snapshot_and_exit));
|
||
+ cvars.Set("phase_b_dump_section_content",
|
||
+ JsonNode::Boolean(cvars::phase_b_dump_section_content));
|
||
+ cvars.Set("phase_a_event_log_path",
|
||
+ JsonNode::String(cvars::phase_a_event_log_path));
|
||
+ root.Set("cvars", std::move(cvars));
|
||
+
|
||
+ auto now = std::chrono::system_clock::now();
|
||
+ auto t = std::chrono::system_clock::to_time_t(now);
|
||
+ // wall_clock_iso8601 is non-deterministic; intended for human reading
|
||
+ // only. Diff tool skips it.
|
||
+ std::string wall = fmt::format("epoch:{}", static_cast<int64_t>(t));
|
||
+ root.Set("wall_clock_iso8601", JsonNode::String(wall));
|
||
+ root.Set("host_ns_at_snapshot", JsonNode::Unsigned(0));
|
||
+
|
||
+ std::vector<JsonNode> det_skip;
|
||
+ det_skip.push_back(JsonNode::String("host_ns_at_snapshot"));
|
||
+ det_skip.push_back(JsonNode::String("wall_clock_iso8601"));
|
||
+ det_skip.push_back(JsonNode::String("build_id"));
|
||
+ det_skip.push_back(JsonNode::String("iso_path"));
|
||
+ det_skip.push_back(JsonNode::String("cvars.phase_b_snapshot_dir"));
|
||
+ root.Set("deterministic_skip", JsonNode::Array(std::move(det_skip)));
|
||
+ return root;
|
||
+}
|
||
+
|
||
+void EmitFile(const std::filesystem::path& dir, const char* name,
|
||
+ const JsonNode& node, std::map<std::string, std::string>& hashes) {
|
||
+ std::string body;
|
||
+ node.Serialize(body, 0);
|
||
+ body.push_back('\n');
|
||
+ std::filesystem::path p = dir / name;
|
||
+ std::string h = WriteFileAndHash(p, body);
|
||
+ hashes[name] = h;
|
||
+}
|
||
+
|
||
+void WriteSnapshot(XThread* xthread, cpu::ThreadState* thread_state,
|
||
+ uint32_t entry_pc) {
|
||
+ auto* kstate = xthread->kernel_state();
|
||
+ std::filesystem::path base(cvars::phase_b_snapshot_dir);
|
||
+ std::filesystem::path engine_dir = base / "canary";
|
||
+ std::error_code ec;
|
||
+ std::filesystem::create_directories(engine_dir, ec);
|
||
+
|
||
+ std::map<std::string, std::string> hashes;
|
||
+ EmitFile(engine_dir, "cpu_state.json",
|
||
+ BuildCpuState(xthread, thread_state, entry_pc), hashes);
|
||
+ EmitFile(engine_dir, "memory.json",
|
||
+ BuildMemory(kstate, cvars::phase_b_dump_section_content), hashes);
|
||
+ EmitFile(engine_dir, "kernel.json", BuildKernel(kstate, entry_pc), hashes);
|
||
+ EmitFile(engine_dir, "vfs.json", BuildVfs(kstate), hashes);
|
||
+ EmitFile(engine_dir, "config.json", BuildConfig(kstate, entry_pc), hashes);
|
||
+
|
||
+ auto manifest = JsonNode::OrderedObject();
|
||
+ manifest.Set("schema_version", JsonNode::Unsigned(kSchemaVersion));
|
||
+ manifest.Set("engine", JsonNode::String(kEngineName));
|
||
+ // Files object is sorted by key (alphabetic), matching the diff tool's
|
||
+ // assumption.
|
||
+ auto files = JsonNode::Object();
|
||
+ for (const auto& [name, hash] : hashes) {
|
||
+ files.Set(name, JsonNode::String(hash));
|
||
+ }
|
||
+ manifest.Set("files", std::move(files));
|
||
+
|
||
+ std::string body;
|
||
+ manifest.Serialize(body, 0);
|
||
+ body.push_back('\n');
|
||
+ std::filesystem::path mp = engine_dir / "manifest.json";
|
||
+ std::FILE* f = std::fopen(mp.string().c_str(), "wb");
|
||
+ if (f) {
|
||
+ std::fwrite(body.data(), 1, body.size(), f);
|
||
+ std::fflush(f);
|
||
+ std::fclose(f);
|
||
+ }
|
||
+}
|
||
+
|
||
+} // namespace
|
||
+
|
||
+void FireIfEntryThread(XThread* xthread, cpu::ThreadState* thread_state,
|
||
+ uint32_t entry_address) {
|
||
+ // Fast path: cvar empty → zero overhead. The .empty() check is a
|
||
+ // single read of a std::string's size, no syscall.
|
||
+ if (cvars::phase_b_snapshot_dir.empty()) {
|
||
+ return;
|
||
+ }
|
||
+ if (g_done.load(std::memory_order_acquire)) {
|
||
+ return;
|
||
+ }
|
||
+ // Resolve the entry_point of the executable module. If it doesn't
|
||
+ // match this thread's first instruction, this isn't the entry thread
|
||
+ // — release any claim we may have made and return.
|
||
+ auto* kstate = xthread ? xthread->kernel_state() : nullptr;
|
||
+ if (!kstate) return;
|
||
+ auto exec_module = kstate->GetExecutableModule();
|
||
+ if (!exec_module) return;
|
||
+ uint32_t entry_pc = exec_module->entry_point();
|
||
+ if (entry_address != entry_pc) return;
|
||
+
|
||
+ // CAS-claim. Releases on guard-fail (above) so a non-entry thread
|
||
+ // reaching its first instruction before the boot thread doesn't
|
||
+ // steal the shot.
|
||
+ bool expected = false;
|
||
+ if (!g_claimed.compare_exchange_strong(expected, true,
|
||
+ std::memory_order_acq_rel)) {
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ WriteSnapshot(xthread, thread_state, entry_pc);
|
||
+ g_done.store(true, std::memory_order_release);
|
||
+
|
||
+ if (cvars::phase_b_snapshot_and_exit) {
|
||
+ std::_Exit(0);
|
||
+ }
|
||
+}
|
||
+
|
||
+} // namespace phase_b
|
||
+} // namespace kernel
|
||
+} // namespace xe
|