handoff: VSync/event-wedge fixes + iterate 2.A–2.BC research notes

Source changes (dormant parity infra, retained from iterate 2.AI/2.AO):
- xenia-kernel/exports.rs: nt_create_event manual_reset polarity +
  related event wiring
- xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity

Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the
iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps
(.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as
regenerable local artifacts — see memory + HANDOFF for the running findings.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-06-05 07:19:08 +02:00
parent acd1656753
commit ef93a4fa14
620 changed files with 108303 additions and 1 deletions

View File

@@ -0,0 +1,841 @@
=== AUDIT-068 Session 3 — canary instrumentation v3 diff ===
=== Date: 2026-05-20 ===
=== Cumulative tracked-file diff (cpu_flags.{h,cc}, memory.cc) ===
diff --git a/src/xenia/cpu/cpu_flags.cc b/src/xenia/cpu/cpu_flags.cc
index 3ff067e15..2298dd3d7 100644
--- a/src/xenia/cpu/cpu_flags.cc
+++ b/src/xenia/cpu/cpu_flags.cc
@@ -57,3 +57,83 @@ DEFINE_bool(break_condition_truncate, true, "truncate value to 32-bits", "CPU");
DEFINE_bool(break_on_debugbreak, true, "int3 on JITed __debugbreak requests.",
"CPU");
+
+// AUDIT-DEMO: smoke marker (memory entry: emulator.cc:225,283). Always-on bool.
+DEFINE_bool(audit_demo_setup_trace, true,
+ "Audit smoke marker: log AUDIT-DEMO-SETUP-BEGIN at emulator setup.",
+ "Audit");
+
+// AUDIT-061: comma-separated list of guest PCs to log on each fire.
+// Format: "0xPC1,0xPC2,..." (max 32 PCs). Each fire emits
+// AUDIT-061-BR pc=X lr=X cr0=LGE cr6=LGE r3=X r4=X r5=X r6=X r31=X tid=N.
+// Default empty (off); no perf cost when empty.
+DEFINE_string(audit_61_branch_probe_pcs, "",
+ "AUDIT-061: CSV of guest PCs to trace (cr0/cr6 + regs/tid).",
+ "Audit");
+
+// AUDIT-067: comma-separated list of u32 values to watch. When non-empty,
+// every 4-byte guest store (stw/stwu/stwx/stwux/stmw) emits a runtime
+// equality check; matches log AUDIT-067-VAL pc=X lr=X val=X dst=X r3..r6 r31 tid=N.
+// Max 4 values. Default empty (off); zero overhead when empty.
+DEFINE_string(audit_67_value_watch, "",
+ "AUDIT-067: CSV of u32 values (max 4) — log every guest "
+ "store whose value matches.",
+ "Audit");
+
+// AUDIT-068: host-side memory-write watch. See cpu_flags.h header for format.
+// Mirrors AUDIT-067 but covers host-side writes (xe::store_and_swap<T>,
+// Memory::Zero/Fill/Copy). Empty default = zero cost.
+DEFINE_string(audit_68_host_mem_watch_values, "",
+ "AUDIT-068: CSV of u32 values (max 8) — log every host-side "
+ "guest-memory write whose value matches.",
+ "Audit");
+DEFINE_string(audit_68_host_mem_watch_addrs, "",
+ "AUDIT-068: CSV of guest VAs or VA ranges 'START-END' (max 8) "
+ "— log every host-side guest-memory write whose guest VA falls "
+ "within the configured set.",
+ "Audit");
+
+// AUDIT-068 Session 3: read-mode probe. See cpu_flags.h for format.
+DEFINE_string(audit_68_host_mem_read_probe, "",
+ "AUDIT-068 Session 3: CSV of 'VA:SIZE:PERIOD_NS' tuples (max 8) "
+ "— a dedicated poll thread reads the value at each VA every "
+ "PERIOD_NS and emits AUDIT-068-READ-CHANGE on transition.",
+ "Audit");
+
+// Phase A — see kernel/event_log.h.
+DEFINE_string(phase_a_event_log_path, "",
+ "Phase A: write schema-v1 JSONL event log to this path. "
+ "Empty (default) = disabled.",
+ "Audit");
+DEFINE_bool(phase_a_event_log_mem_writes, false,
+ "Phase A: include mem.write events in the JSONL log. RESERVED — "
+ "not wired in this phase. Default false.",
+ "Audit");
+
+// Phase D Stage 1 — see kernel/event_log.h `EmitContentionObserved`.
+DEFINE_bool(kernel_emit_contention, false,
+ "Phase D Stage 1: emit `contention.observed` events when "
+ "RtlEnterCriticalSection's spin loop is exhausted and the call "
+ "falls through to xeKeWaitForSingleObject. Default false (zero "
+ "cost when disabled). Requires --phase_a_event_log_path to be "
+ "set as well.",
+ "Audit");
+
+// Phase B — see kernel/phase_b_snapshot.h.
+DEFINE_string(phase_b_snapshot_dir, "",
+ "Phase B: write 5-file structured state snapshot to "
+ "<dir>/canary/ at the moment immediately before the first "
+ "guest PPC instruction of entry_point. Empty (default) = "
+ "disabled, zero overhead.",
+ "Audit");
+DEFINE_bool(phase_b_snapshot_and_exit, false,
+ "Phase B: after writing the snapshot, exit the process "
+ "immediately (std::_Exit(0)) so re-runs are byte-deterministic.",
+ "Audit");
+DEFINE_bool(phase_b_dump_section_content, false,
+ "Phase B: in memory.json, populate section_contents[].content_b64 "
+ "with raw bytes of every committed XEX-image region. Default "
+ "false — per-region SHA-256 is enough for the routine diff; "
+ "this is the escape hatch for the STOP-and-report condition "
+ "(image_loaded_sha256 mismatch).",
+ "Audit");
diff --git a/src/xenia/cpu/cpu_flags.h b/src/xenia/cpu/cpu_flags.h
index 38c4f98ba..9b5ca7a1c 100644
--- a/src/xenia/cpu/cpu_flags.h
+++ b/src/xenia/cpu/cpu_flags.h
@@ -35,4 +35,52 @@ DECLARE_bool(break_condition_truncate);
DECLARE_bool(break_on_debugbreak);
+// AUDIT-DEMO smoke marker.
+DECLARE_bool(audit_demo_setup_trace);
+
+// AUDIT-061: multi-PC branch probe — emits one log line per fire with
+// (pc, lr, cr0 LGE, cr6 LGE, r3, r4, r5, r6, r31, tid). CSV of guest PCs.
+DECLARE_string(audit_61_branch_probe_pcs);
+
+// AUDIT-067: value-watch — emit a log line for each 32-bit guest store whose
+// value-to-be-stored matches any configured value. CSV of u32 values
+// ("0xDEADBEEF,..."), max 4 entries. Default empty (off); zero cost when empty.
+DECLARE_string(audit_67_value_watch);
+
+// AUDIT-068: host-side memory-write watch — emit a log line for each host-side
+// write to guest memory whose VALUE matches any configured u32 value, or whose
+// guest VA falls within any configured ADDR or ADDR-range. Mirrors AUDIT-067
+// but covers the host-side write paths (xe::store_and_swap<T>, Memory::Zero/
+// Fill/Copy) that AUDIT-067's JIT store-opcode hooks cannot see.
+//
+// VALUES: CSV of u32 values, max 8 entries; e.g. "0x8200A208,0x8200A928".
+// ADDRS: CSV of guest VAs or VA ranges, max 8 entries; range form is
+// "0xSTART-0xEND" (inclusive). e.g. "0x42500000-0x42600000,0xBCE25340".
+// Default empty (off); zero cost on the hot path when both are empty.
+DECLARE_string(audit_68_host_mem_watch_values);
+DECLARE_string(audit_68_host_mem_watch_addrs);
+
+// AUDIT-068 Session 3: read-mode probe. CSV of "VA:SIZE:PERIOD_NS" tuples
+// (max 8). A dedicated low-priority thread polls each VA every PERIOD_NS and
+// emits AUDIT-068-READ-CHANGE when the value transitions. SIZE in {1,2,4,8}.
+// Example: "0xBCE25340:4:1000000" = poll u32 at 0xBCE25340 every 1 ms.
+// Default empty (off); the poll thread is not spawned when empty.
+DECLARE_string(audit_68_host_mem_read_probe);
+
+// Phase A: JSONL event-log emitter path. When non-empty, the engine writes
+// schema-v1 JSONL events to this file. Empty (default) = no overhead, no
+// behavior change. Schema: xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md
+DECLARE_string(phase_a_event_log_path);
+DECLARE_bool(phase_a_event_log_mem_writes);
+
+// Phase B: initial-state snapshot. When the dir cvar is non-empty, the
+// engine writes a five-file structured state snapshot (cpu_state.json,
+// memory.json, kernel.json, vfs.json, config.json, plus manifest.json) to
+// `<dir>/canary/` at the moment immediately before the first guest PPC
+// instruction of the XEX entry_point executes. See
+// `xenia-rs/audit-runs/phase-b-state-equivalence/`.
+DECLARE_string(phase_b_snapshot_dir);
+DECLARE_bool(phase_b_snapshot_and_exit);
+DECLARE_bool(phase_b_dump_section_content);
+
#endif // XENIA_CPU_CPU_FLAGS_H_
diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc
index 22ba66aee..f02b11d7f 100644
--- a/src/xenia/memory.cc
+++ b/src/xenia/memory.cc
@@ -14,6 +14,7 @@
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/assert.h"
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
#include "xenia/base/byte_stream.h"
#include "xenia/base/clock.h"
#include "xenia/base/cvar.h"
@@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) {
static Memory* active_memory_ = nullptr;
+// AUDIT-068 — process-global accessor (declared in memory.h).
+Memory* Memory::active() { return active_memory_; }
+
void CrashDump() {
static std::atomic<int> in_crash_dump(0);
if (in_crash_dump.fetch_add(1)) {
@@ -151,11 +155,41 @@ Memory::Memory() {
uint32_t(xe::memory::allocation_granularity());
assert_zero(active_memory_);
active_memory_ = this;
+
+ // AUDIT-068: register host→guest translation thunk so the watch slow path
+ // in xenia-base can resolve guest VAs without depending on xenia-core.
+ xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t {
+ Memory* m = active_memory_;
+ return m ? m->HostToGuestVirtual(host_ptr) : 0u;
+ };
+
+ // AUDIT-068 Session 3: register guest→host translation thunk and a
+ // page-protect query thunk for the read-mode probe. The probe thread uses
+ // QueryProtect to skip unmapped/uncommitted pages before dereferencing.
+ xe::audit_68::g_guest_to_host_thunk = [](uint32_t va) -> const void* {
+ Memory* m = active_memory_;
+ return m ? reinterpret_cast<const void*>(m->TranslateVirtual(va))
+ : nullptr;
+ };
+ xe::audit_68::g_query_protect_thunk = [](uint32_t va,
+ uint32_t* out_protect) -> bool {
+ Memory* m = active_memory_;
+ if (!m) return false;
+ BaseHeap* heap = m->LookupHeap(va);
+ if (!heap) {
+ if (out_protect) *out_protect = 0;
+ return false;
+ }
+ return heap->QueryProtect(va, out_protect);
+ };
}
Memory::~Memory() {
assert_true(active_memory_ == this);
active_memory_ = nullptr;
+ xe::audit_68::g_host_to_guest_thunk = nullptr;
+ xe::audit_68::g_guest_to_host_thunk = nullptr;
+ xe::audit_68::g_query_protect_thunk = nullptr;
// Uninstall the MMIO handler, as we won't be able to service more
// requests.
@@ -540,16 +574,71 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const {
}
void Memory::Zero(uint32_t address, uint32_t size) {
+ // AUDIT-068: log a single span event with value=0; size is capped at 8 for
+ // the value field. Slow path is gated on the atomic flag.
+ xe::audit_68::check_guest_va(address, 0,
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
+ "Memory::Zero");
std::memset(TranslateVirtual(address), 0, size);
}
void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) {
+ // Replicate the fill byte across the value field so value_matches can
+ // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for
+ // capture purposes the byte itself in the low slot is enough.
+ uint64_t v = static_cast<uint64_t>(value);
+ v |= v << 8;
+ v |= v << 16;
+ v |= v << 32;
+ xe::audit_68::check_guest_va(address, v,
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
+ "Memory::Fill");
std::memset(TranslateVirtual(address), value, size);
}
void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) {
uint8_t* pdest = TranslateVirtual(dest);
const uint8_t* psrc = TranslateVirtual(src);
+ // AUDIT-068 Session 2: full byte-scan over 4-byte aligned positions of the
+ // source buffer. Catches XEX-loader-style memcpys where a vptr (the target
+ // u32 value) is buried somewhere mid-buffer rather than at offset 0. Cost
+ // O(size/4 * N_values) with N_values capped at 8 inside value_matches —
+ // negligible vs the underlying memcpy throughput.
+ //
+ // Gated on active bit 0x1 (values-mode) AND active != 0. If only addrs are
+ // configured (Run 2 voice-struct mode), we still emit a single addr-only
+ // event covering the destination span so addr-watch isn't broken.
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
+ if (active != 0) [[unlikely]] {
+ if ((active & 0x1) && size >= 4) {
+ // Scan source for any configured u32 value (big-endian, mirrors how
+ // guest sees the bytes). 4-byte aligned offsets only.
+ uint32_t aligned_end = size & ~3u;
+ for (uint32_t i = 0; i < aligned_end; i += 4) {
+ uint32_t be_u32 =
+ (uint32_t(psrc[i + 0]) << 24) | (uint32_t(psrc[i + 1]) << 16) |
+ (uint32_t(psrc[i + 2]) << 8) | uint32_t(psrc[i + 3]);
+ xe::audit_68::check_guest_va(dest + i, be_u32, 4, "Memory::Copy");
+ }
+ }
+ if (active & 0x2) {
+ // Addr-only mode: emit a single coarse event tagged with the dest base
+ // and first u32 of source for context. The slow-path range check will
+ // log iff the dest span intersects a configured addr range.
+ uint64_t v = 0;
+ if (size >= 4) {
+ v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) |
+ (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]);
+ } else if (size > 0) {
+ for (uint32_t i = 0; i < size; ++i) {
+ v = (v << 8) | psrc[i];
+ }
+ }
+ xe::audit_68::check_guest_va(
+ dest, v, static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
+ "Memory::Copy");
+ }
+ }
std::memcpy(pdest, psrc, size);
}
=== Full contents of new file: src/xenia/base/audit_68_host_mem_watch_fwd.h ===
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* AUDIT-068: host-side memory-write watch — forward declarations only.
*
* Declarations here are intentionally minimal so that xenia/base/memory.h can
* include this without pulling in xenia/memory.h (which would create a
* circular dependency: xenia-base → xenia-core → xenia-base). The full
* definitions live in xenia/audit_68_host_mem_watch.{h,cc} (xenia-core).
*
* Hot path: callers (the integer specializations of xe::store_and_swap<T>)
* load the atomic flag once. When it is 0 (default), no further work is done
* — a single relaxed atomic load and a predictable branch.
******************************************************************************
*/
#ifndef XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
#define XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
#include <atomic>
#include <cstdint>
namespace xe {
namespace audit_68 {
// 0 = inactive (default). Non-zero = the cvars have been parsed and at least
// one watch is configured. Set lazily by check_host_write_slowpath() on first
// call after cvar parsing. Loaded relaxed on the hot path.
//
// Implementation lives in xenia-base (audit_68_host_mem_watch_base.cc) so
// that callers in xenia-base/xenia-cpu/xenia-kernel can resolve the symbol
// without depending on xenia-core link order.
extern std::atomic<uint32_t> g_active;
// Host-pointer → guest-VA translation thunk. xenia/memory.cc::Memory::Memory()
// registers a function pointer here that wraps Memory::HostToGuestVirtual.
// Until set, the slow path falls back to logging the raw host pointer.
using HostToGuestThunk = uint32_t (*)(const void*);
extern HostToGuestThunk g_host_to_guest_thunk;
// AUDIT-068 Session 3 — read-mode probe support.
//
// Guest-VA → host-pointer translation thunk (wraps Memory::TranslateVirtual).
// Used by the read-probe poll thread to sample bytes at configured guest VAs.
// May return non-null even for unmapped/uncommitted VAs (the underlying
// translation is arithmetic — virtual_membase_ + va) — callers MUST consult
// the QueryProtect thunk before dereferencing.
using GuestToHostThunk = const void* (*)(uint32_t);
extern GuestToHostThunk g_guest_to_host_thunk;
// Returns true iff the page containing `guest_va` is committed and readable;
// out_protect receives the raw page protect bits (kProtectRead, etc.). Wraps
// Memory::LookupHeap() + BaseHeap::QueryProtect(). Used as a guard before the
// read-probe samples bytes (early-boot heap-not-yet-mapped path must NOT
// crash).
using QueryProtectThunk = bool (*)(uint32_t, uint32_t* /*out_protect*/);
extern QueryProtectThunk g_query_protect_thunk;
// Slow path. Only invoked when g_active is non-zero. Implementation in
// xenia/base/audit_68_host_mem_watch_base.cc (xenia-base).
//
// host_ptr: the host pointer being written (from store_and_swap's `mem`).
// value: the value being stored (zero-extended to u64).
// size: 1, 2, 4 or 8.
// tag: caller-provided tag string (e.g. "store_and_swap<u32>"). Logged
// verbatim, no formatting. Must be a static string (lifetime
// beyond this call).
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
uint8_t size, const char* tag);
// Same as above, but with a known guest VA (for callers like Memory::Zero/
// Fill/Copy that have the VA but not a single host pointer).
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
const char* tag);
// Inline hot-path wrappers. Single relaxed atomic load + branch when inactive.
inline void check_host_write(const void* host_ptr, uint64_t value, uint8_t size,
const char* tag) {
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
check_host_write_slowpath(host_ptr, value, size, tag);
}
}
inline void check_guest_va(uint32_t guest_va, uint64_t value, uint8_t size,
const char* tag) {
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
check_guest_va_slowpath(guest_va, value, size, tag);
}
}
} // namespace audit_68
} // namespace xe
#endif // XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
=== Full contents of new file: src/xenia/base/audit_68_host_mem_watch_base.cc ===
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* AUDIT-068 host-side memory-write watch — implementation (xenia-base).
*
* Mirrors AUDIT-067 in spirit (value-CSV cvar, lazy parse, atomic-bool
* activation) but observes the HOST-side write paths instead of the JIT'd
* guest store opcodes. Captures writes performed by xe::store_and_swap<T>
* (xenia/base/memory.h) and by Memory::Zero/Fill/Copy (xenia/memory.cc).
*
* Lives in xenia-base so that the slow-path symbols resolve for callers in
* xenia-base / xenia-cpu / xenia-kernel without depending on xenia-core link
* order. The host→guest VA translation is provided by a function-pointer
* thunk that xenia::Memory::Memory() registers at construction.
*
* See xenia/base/audit_68_host_mem_watch_fwd.h for the API.
* See xenia/cpu/cpu_flags.{h,cc} for the cvars.
******************************************************************************
*/
#include "xenia/base/audit_68_host_mem_watch_fwd.h"
#include <algorithm>
#include <atomic>
#include <chrono>
#include <cstring>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
#include "xenia/base/cvar.h"
#include "xenia/base/logging.h"
#include "xenia/base/threading.h"
// We need the cvars but cpu_flags.h lives in xenia-cpu. To avoid an upward
// dep we re-declare them here with the same macros — cvar.h's DECLARE_*
// macros are header-safe (just `extern` declarations) and resolve against the
// definitions in xenia-cpu/cpu_flags.cc at link time. (xenia-cpu links AFTER
// xenia-base in the executable; symbols in xenia-cpu/cpu_flags.cc are still
// resolvable from xenia-base translation units because the lld pass folds
// all libraries together at the executable level.)
DECLARE_string(audit_68_host_mem_watch_values);
DECLARE_string(audit_68_host_mem_watch_addrs);
DECLARE_string(audit_68_host_mem_read_probe);
namespace xe {
namespace audit_68 {
// Hot-path flag (declared in fwd header). Initial sentinel UINT32_MAX means
// "unparsed"; the very first slow-path call invokes ensure_parsed() which
// replaces the sentinel with the actual active bitmask (0 if both cvars are
// empty, 1/2/3 otherwise). After that, hot-path calls observe the real value
// and bail out cheaply when off.
std::atomic<uint32_t> g_active{0xFFFFFFFFu};
// Host→guest VA translation thunk (declared in fwd header). Set by
// xenia::Memory::Memory() at construction; reset to nullptr by ~Memory().
HostToGuestThunk g_host_to_guest_thunk{nullptr};
// AUDIT-068 Session 3: guest→host translation + page-protect query thunks.
GuestToHostThunk g_guest_to_host_thunk{nullptr};
QueryProtectThunk g_query_protect_thunk{nullptr};
namespace {
constexpr size_t kMaxValues = 8;
constexpr size_t kMaxAddrRanges = 8;
struct AddrRange {
uint32_t start; // inclusive
uint32_t end; // inclusive
};
std::vector<uint32_t> g_values;
std::vector<AddrRange> g_addrs;
std::once_flag g_parsed_flag;
std::chrono::steady_clock::time_point g_t0;
std::once_flag g_t0_once;
int64_t host_ns_since_start() {
std::call_once(g_t0_once,
[]() { g_t0 = std::chrono::steady_clock::now(); });
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now() - g_t0)
.count();
}
void trim(std::string& s) {
while (!s.empty() && (s.front() == ' ' || s.front() == '\t')) {
s.erase(s.begin());
}
while (!s.empty() && (s.back() == ' ' || s.back() == '\t')) {
s.pop_back();
}
}
bool parse_u32(const std::string& tok, uint32_t* out) {
try {
*out = static_cast<uint32_t>(std::stoul(tok, nullptr, 0));
return true;
} catch (...) {
return false;
}
}
void parse_values_csv(const std::string& csv) {
size_t pos = 0;
while (pos < csv.size() && g_values.size() < kMaxValues) {
size_t end = csv.find(',', pos);
std::string tok = csv.substr(pos, end - pos);
trim(tok);
if (!tok.empty()) {
uint32_t v;
if (parse_u32(tok, &v)) {
g_values.push_back(v);
}
}
if (end == std::string::npos) break;
pos = end + 1;
}
}
void parse_addrs_csv(const std::string& csv) {
size_t pos = 0;
while (pos < csv.size() && g_addrs.size() < kMaxAddrRanges) {
size_t end = csv.find(',', pos);
std::string tok = csv.substr(pos, end - pos);
trim(tok);
if (!tok.empty()) {
size_t dash = tok.find('-', 2); // skip leading "0x" if present
AddrRange r{};
if (dash != std::string::npos) {
std::string s = tok.substr(0, dash);
std::string e = tok.substr(dash + 1);
trim(s);
trim(e);
uint32_t a, b;
if (parse_u32(s, &a) && parse_u32(e, &b)) {
r.start = a;
r.end = b;
g_addrs.push_back(r);
}
} else {
uint32_t a;
if (parse_u32(tok, &a)) {
r.start = a;
r.end = a + 7;
g_addrs.push_back(r);
}
}
}
if (end == std::string::npos) break;
pos = end + 1;
}
}
void parse_locked() {
parse_values_csv(cvars::audit_68_host_mem_watch_values);
parse_addrs_csv(cvars::audit_68_host_mem_watch_addrs);
uint32_t bits = 0;
if (!g_values.empty()) bits |= 0x1;
if (!g_addrs.empty()) bits |= 0x2;
g_active.store(bits, std::memory_order_release);
XELOGI(
"AUDIT-068-INIT values_csv=\"{}\" addrs_csv=\"{}\" values_parsed={} "
"addr_ranges_parsed={} active=0x{:X}",
cvars::audit_68_host_mem_watch_values,
cvars::audit_68_host_mem_watch_addrs, g_values.size(), g_addrs.size(),
bits);
for (size_t i = 0; i < g_values.size(); ++i) {
XELOGI("AUDIT-068-INIT value[{}] = 0x{:08X}", i, g_values[i]);
}
for (size_t i = 0; i < g_addrs.size(); ++i) {
XELOGI("AUDIT-068-INIT addr_range[{}] = 0x{:08X}-0x{:08X}", i,
g_addrs[i].start, g_addrs[i].end);
}
}
bool value_matches(uint64_t value, uint8_t size) {
for (uint32_t v : g_values) {
if (size >= 4 && static_cast<uint32_t>(value) == v) return true;
if (size == 8 && static_cast<uint32_t>(value >> 32) == v) return true;
if (size == 2 && (v & 0xFFFF) == (value & 0xFFFF)) return true;
if (size == 1 && (v & 0xFF) == (value & 0xFF)) return true;
}
return false;
}
bool addr_matches(uint32_t guest_va, uint8_t size) {
uint32_t lo = guest_va;
uint32_t hi = guest_va + (size ? size - 1 : 0);
for (const auto& r : g_addrs) {
if (lo <= r.end && hi >= r.start) return true;
}
return false;
}
uint32_t current_tid() { return xe::threading::current_thread_id(); }
void emit(uint32_t guest_va, const void* host_ptr, uint64_t value,
uint8_t size, const char* tag) {
XELOGI(
"AUDIT-068-HOST-WRITE guest_va=0x{:08X} host_ptr=0x{:016X} "
"val=0x{:016X} sz={} fn={} host_ns={} tid={}",
guest_va, reinterpret_cast<uintptr_t>(host_ptr), value,
static_cast<uint32_t>(size), tag ? tag : "<null>",
host_ns_since_start(), current_tid());
}
// ===== AUDIT-068 Session 3 — read-mode probe state =====
constexpr size_t kMaxReadProbes = 8;
struct ReadProbe {
uint32_t guest_va;
uint8_t size; // 1, 2, 4, 8
uint64_t period_ns;
uint64_t last_value;
bool last_was_valid;
};
std::vector<ReadProbe> g_read_probes;
std::atomic<bool> g_read_probe_thread_running{false};
std::atomic<bool> g_read_probe_shutdown{false};
std::thread g_read_probe_thread;
std::once_flag g_read_probe_started;
bool parse_read_probe_tok(const std::string& tok, ReadProbe* out) {
// Expected form: "VA:SIZE:PERIOD_NS" — three colon-separated u64.
size_t c1 = tok.find(':');
if (c1 == std::string::npos) return false;
size_t c2 = tok.find(':', c1 + 1);
if (c2 == std::string::npos) return false;
std::string sva = tok.substr(0, c1);
std::string ssz = tok.substr(c1 + 1, c2 - c1 - 1);
std::string sper = tok.substr(c2 + 1);
trim(sva);
trim(ssz);
trim(sper);
try {
out->guest_va = static_cast<uint32_t>(std::stoul(sva, nullptr, 0));
uint32_t sz = static_cast<uint32_t>(std::stoul(ssz, nullptr, 0));
if (sz != 1 && sz != 2 && sz != 4 && sz != 8) return false;
out->size = static_cast<uint8_t>(sz);
out->period_ns = static_cast<uint64_t>(std::stoull(sper, nullptr, 0));
if (out->period_ns < 1000) out->period_ns = 1000; // 1us floor.
out->last_value = 0;
out->last_was_valid = false;
return true;
} catch (...) {
return false;
}
}
void parse_read_probes_csv(const std::string& csv) {
size_t pos = 0;
while (pos < csv.size() && g_read_probes.size() < kMaxReadProbes) {
size_t end = csv.find(',', pos);
std::string tok = csv.substr(pos, end - pos);
trim(tok);
if (!tok.empty()) {
ReadProbe rp{};
if (parse_read_probe_tok(tok, &rp)) {
g_read_probes.push_back(rp);
}
}
if (end == std::string::npos) break;
pos = end + 1;
}
}
uint64_t sample_at(uint32_t guest_va, uint8_t size, bool* out_valid) {
*out_valid = false;
if (!g_guest_to_host_thunk || !g_query_protect_thunk) return 0;
uint32_t prot = 0;
if (!g_query_protect_thunk(guest_va, &prot)) return 0;
// Page must have at least read permission. The protect bits map to
// xe::memory::PageAccess: kReadOnly=1, kReadWrite=2, kExecuteReadOnly=3,
// kExecuteReadWrite=4. kNoAccess=0. Accept anything non-zero — caller
// distinguishes via the second-pass change detector anyway.
if (prot == 0) return 0;
const void* hp = g_guest_to_host_thunk(guest_va);
if (!hp) return 0;
uint64_t v = 0;
// Guest memory is big-endian. We use raw byte loads to avoid alignment
// traps for size>4 on possibly-unaligned VAs. The "value" we log is the
// host-endian interpretation of the BE bytes (matches store_and_swap's
// logging convention: the byte-swapped scalar).
const uint8_t* bp = reinterpret_cast<const uint8_t*>(hp);
switch (size) {
case 1: v = bp[0]; break;
case 2: v = (uint64_t(bp[0]) << 8) | bp[1]; break;
case 4:
v = (uint64_t(bp[0]) << 24) | (uint64_t(bp[1]) << 16) |
(uint64_t(bp[2]) << 8) | bp[3];
break;
case 8:
v = (uint64_t(bp[0]) << 56) | (uint64_t(bp[1]) << 48) |
(uint64_t(bp[2]) << 40) | (uint64_t(bp[3]) << 32) |
(uint64_t(bp[4]) << 24) | (uint64_t(bp[5]) << 16) |
(uint64_t(bp[6]) << 8) | bp[7];
break;
}
*out_valid = true;
return v;
}
void read_probe_thread_main() {
// Compute the GCD-ish min poll period across all probes; sleep that long
// between scans. Each probe fires only when its own period_ns has elapsed
// since the last sample (per-probe `next_fire_ns`).
uint64_t min_period_ns = UINT64_MAX;
for (const auto& p : g_read_probes) {
if (p.period_ns < min_period_ns) min_period_ns = p.period_ns;
}
if (min_period_ns == UINT64_MAX) return;
// Per-probe next-fire times.
std::vector<uint64_t> next_fire(g_read_probes.size(), 0);
XELOGI(
"AUDIT-068-READ-INIT probe_count={} min_period_ns={} thread spawned",
g_read_probes.size(), min_period_ns);
for (size_t i = 0; i < g_read_probes.size(); ++i) {
XELOGI("AUDIT-068-READ-INIT probe[{}] va=0x{:08X} size={} period_ns={}",
i, g_read_probes[i].guest_va,
static_cast<uint32_t>(g_read_probes[i].size),
g_read_probes[i].period_ns);
}
while (!g_read_probe_shutdown.load(std::memory_order_relaxed)) {
int64_t now_ns = host_ns_since_start();
for (size_t i = 0; i < g_read_probes.size(); ++i) {
if (static_cast<uint64_t>(now_ns) < next_fire[i]) continue;
ReadProbe& rp = g_read_probes[i];
bool valid = false;
uint64_t v = sample_at(rp.guest_va, rp.size, &valid);
if (valid) {
if (!rp.last_was_valid) {
// First successful read: emit the initial value, do NOT call it a
// "change" — but log so we know when the VA mapped.
XELOGI(
"AUDIT-068-READ-INITIAL va=0x{:08X} val=0x{:016X} sz={} "
"host_ns={} tid=probe",
rp.guest_va, v, static_cast<uint32_t>(rp.size), now_ns);
rp.last_value = v;
rp.last_was_valid = true;
} else if (v != rp.last_value) {
XELOGI(
"AUDIT-068-READ-CHANGE va=0x{:08X} old=0x{:016X} "
"new=0x{:016X} sz={} host_ns={} tid=probe",
rp.guest_va, rp.last_value, v, static_cast<uint32_t>(rp.size),
now_ns);
rp.last_value = v;
}
} else if (rp.last_was_valid) {
// Was valid, now invalid — page unmapped/reprotected.
XELOGI(
"AUDIT-068-READ-UNMAPPED va=0x{:08X} last=0x{:016X} sz={} "
"host_ns={} tid=probe",
rp.guest_va, rp.last_value, static_cast<uint32_t>(rp.size),
now_ns);
rp.last_was_valid = false;
}
next_fire[i] = static_cast<uint64_t>(now_ns) + rp.period_ns;
}
// Sleep until the next earliest fire, but no shorter than 1us and no
// longer than min_period_ns (to keep shutdown latency bounded).
uint64_t sleep_ns = min_period_ns;
if (sleep_ns < 1000) sleep_ns = 1000;
std::this_thread::sleep_for(std::chrono::nanoseconds(sleep_ns));
}
XELOGI("AUDIT-068-READ-EXIT thread shutting down");
}
void start_read_probe_thread_if_configured() {
std::call_once(g_read_probe_started, []() {
parse_read_probes_csv(cvars::audit_68_host_mem_read_probe);
if (g_read_probes.empty()) return;
if (!g_guest_to_host_thunk || !g_query_protect_thunk) {
XELOGI(
"AUDIT-068-READ-INIT thunks not ready (guest_to_host={} "
"query_protect={}) — read probe deferred",
(void*)g_guest_to_host_thunk, (void*)g_query_protect_thunk);
return;
}
g_read_probe_thread_running.store(true, std::memory_order_release);
g_read_probe_thread = std::thread(&read_probe_thread_main);
g_read_probe_thread.detach(); // best-effort; daemon-style.
});
}
} // namespace
void ensure_parsed() { std::call_once(g_parsed_flag, parse_locked); }
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
uint8_t size, const char* tag) {
// AUDIT-068 Session 2: defer parsing until Memory::Memory() has registered
// the host→guest thunk. This guarantees the cmdline cvar override has been
// applied AND the logging subsystem is alive before we latch g_active.
// Without this gate, a be<T>::set() call during static-init (e.g. from a
// global initializer in another translation unit) would trigger
// parse_locked() before cpu_flags.cc's cvar objects are constructed —
// latching g_active=0 permanently and silencing the watch.
HostToGuestThunk thunk = g_host_to_guest_thunk;
if (!thunk) return;
ensure_parsed();
// AUDIT-068 Session 3: lazy-start the read-probe poll thread. Same gate as
// ensure_parsed() — must come after Memory::Memory() has registered the
// thunks so the probe can read pages safely.
start_read_probe_thread_if_configured();
uint32_t active = g_active.load(std::memory_order_acquire);
if (active == 0) return;
uint32_t guest_va = 0;
if (thunk) {
guest_va = thunk(host_ptr);
}
bool hit = false;
if ((active & 0x1) && value_matches(value, size)) hit = true;
if (!hit && (active & 0x2) && thunk && addr_matches(guest_va, size)) {
hit = true;
}
if (!hit) return;
emit(guest_va, host_ptr, value, size, tag);
}
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
const char* tag) {
// AUDIT-068 Session 2: same static-init gate as check_host_write_slowpath.
// Callers (Memory::Zero/Fill/Copy + xex_module audit68_prescan_memcpy) only
// run after Memory::Memory(), but defensive in case of future expansion.
if (!g_host_to_guest_thunk) return;
ensure_parsed();
uint32_t active = g_active.load(std::memory_order_acquire);
if (active == 0) return;
bool hit = false;
if ((active & 0x1) && value_matches(value, size)) hit = true;
if (!hit && (active & 0x2) && addr_matches(guest_va, size)) hit = true;
if (!hit) return;
emit(guest_va, nullptr, value, size, tag);
}
} // namespace audit_68
} // namespace xe