Source changes (dormant parity infra, retained from iterate 2.AI/2.AO): - xenia-kernel/exports.rs: nt_create_event manual_reset polarity + related event wiring - xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps (.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as regenerable local artifacts — see memory + HANDOFF for the running findings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
842 lines
33 KiB
Diff
842 lines
33 KiB
Diff
=== AUDIT-068 Session 3 — canary instrumentation v3 diff ===
|
|
=== Date: 2026-05-20 ===
|
|
=== Cumulative tracked-file diff (cpu_flags.{h,cc}, memory.cc) ===
|
|
|
|
diff --git a/src/xenia/cpu/cpu_flags.cc b/src/xenia/cpu/cpu_flags.cc
|
|
index 3ff067e15..2298dd3d7 100644
|
|
--- a/src/xenia/cpu/cpu_flags.cc
|
|
+++ b/src/xenia/cpu/cpu_flags.cc
|
|
@@ -57,3 +57,83 @@ DEFINE_bool(break_condition_truncate, true, "truncate value to 32-bits", "CPU");
|
|
|
|
DEFINE_bool(break_on_debugbreak, true, "int3 on JITed __debugbreak requests.",
|
|
"CPU");
|
|
+
|
|
+// AUDIT-DEMO: smoke marker (memory entry: emulator.cc:225,283). Always-on bool.
|
|
+DEFINE_bool(audit_demo_setup_trace, true,
|
|
+ "Audit smoke marker: log AUDIT-DEMO-SETUP-BEGIN at emulator setup.",
|
|
+ "Audit");
|
|
+
|
|
+// AUDIT-061: comma-separated list of guest PCs to log on each fire.
|
|
+// Format: "0xPC1,0xPC2,..." (max 32 PCs). Each fire emits
|
|
+// AUDIT-061-BR pc=X lr=X cr0=LGE cr6=LGE r3=X r4=X r5=X r6=X r31=X tid=N.
|
|
+// Default empty (off); no perf cost when empty.
|
|
+DEFINE_string(audit_61_branch_probe_pcs, "",
|
|
+ "AUDIT-061: CSV of guest PCs to trace (cr0/cr6 + regs/tid).",
|
|
+ "Audit");
|
|
+
|
|
+// AUDIT-067: comma-separated list of u32 values to watch. When non-empty,
|
|
+// every 4-byte guest store (stw/stwu/stwx/stwux/stmw) emits a runtime
|
|
+// equality check; matches log AUDIT-067-VAL pc=X lr=X val=X dst=X r3..r6 r31 tid=N.
|
|
+// Max 4 values. Default empty (off); zero overhead when empty.
|
|
+DEFINE_string(audit_67_value_watch, "",
|
|
+ "AUDIT-067: CSV of u32 values (max 4) — log every guest "
|
|
+ "store whose value matches.",
|
|
+ "Audit");
|
|
+
|
|
+// AUDIT-068: host-side memory-write watch. See cpu_flags.h header for format.
|
|
+// Mirrors AUDIT-067 but covers host-side writes (xe::store_and_swap<T>,
|
|
+// Memory::Zero/Fill/Copy). Empty default = zero cost.
|
|
+DEFINE_string(audit_68_host_mem_watch_values, "",
|
|
+ "AUDIT-068: CSV of u32 values (max 8) — log every host-side "
|
|
+ "guest-memory write whose value matches.",
|
|
+ "Audit");
|
|
+DEFINE_string(audit_68_host_mem_watch_addrs, "",
|
|
+ "AUDIT-068: CSV of guest VAs or VA ranges 'START-END' (max 8) "
|
|
+ "— log every host-side guest-memory write whose guest VA falls "
|
|
+ "within the configured set.",
|
|
+ "Audit");
|
|
+
|
|
+// AUDIT-068 Session 3: read-mode probe. See cpu_flags.h for format.
|
|
+DEFINE_string(audit_68_host_mem_read_probe, "",
|
|
+ "AUDIT-068 Session 3: CSV of 'VA:SIZE:PERIOD_NS' tuples (max 8) "
|
|
+ "— a dedicated poll thread reads the value at each VA every "
|
|
+ "PERIOD_NS and emits AUDIT-068-READ-CHANGE on transition.",
|
|
+ "Audit");
|
|
+
|
|
+// Phase A — see kernel/event_log.h.
|
|
+DEFINE_string(phase_a_event_log_path, "",
|
|
+ "Phase A: write schema-v1 JSONL event log to this path. "
|
|
+ "Empty (default) = disabled.",
|
|
+ "Audit");
|
|
+DEFINE_bool(phase_a_event_log_mem_writes, false,
|
|
+ "Phase A: include mem.write events in the JSONL log. RESERVED — "
|
|
+ "not wired in this phase. Default false.",
|
|
+ "Audit");
|
|
+
|
|
+// Phase D Stage 1 — see kernel/event_log.h `EmitContentionObserved`.
|
|
+DEFINE_bool(kernel_emit_contention, false,
|
|
+ "Phase D Stage 1: emit `contention.observed` events when "
|
|
+ "RtlEnterCriticalSection's spin loop is exhausted and the call "
|
|
+ "falls through to xeKeWaitForSingleObject. Default false (zero "
|
|
+ "cost when disabled). Requires --phase_a_event_log_path to be "
|
|
+ "set as well.",
|
|
+ "Audit");
|
|
+
|
|
+// Phase B — see kernel/phase_b_snapshot.h.
|
|
+DEFINE_string(phase_b_snapshot_dir, "",
|
|
+ "Phase B: write 5-file structured state snapshot to "
|
|
+ "<dir>/canary/ at the moment immediately before the first "
|
|
+ "guest PPC instruction of entry_point. Empty (default) = "
|
|
+ "disabled, zero overhead.",
|
|
+ "Audit");
|
|
+DEFINE_bool(phase_b_snapshot_and_exit, false,
|
|
+ "Phase B: after writing the snapshot, exit the process "
|
|
+ "immediately (std::_Exit(0)) so re-runs are byte-deterministic.",
|
|
+ "Audit");
|
|
+DEFINE_bool(phase_b_dump_section_content, false,
|
|
+ "Phase B: in memory.json, populate section_contents[].content_b64 "
|
|
+ "with raw bytes of every committed XEX-image region. Default "
|
|
+ "false — per-region SHA-256 is enough for the routine diff; "
|
|
+ "this is the escape hatch for the STOP-and-report condition "
|
|
+ "(image_loaded_sha256 mismatch).",
|
|
+ "Audit");
|
|
diff --git a/src/xenia/cpu/cpu_flags.h b/src/xenia/cpu/cpu_flags.h
|
|
index 38c4f98ba..9b5ca7a1c 100644
|
|
--- a/src/xenia/cpu/cpu_flags.h
|
|
+++ b/src/xenia/cpu/cpu_flags.h
|
|
@@ -35,4 +35,52 @@ DECLARE_bool(break_condition_truncate);
|
|
|
|
DECLARE_bool(break_on_debugbreak);
|
|
|
|
+// AUDIT-DEMO smoke marker.
|
|
+DECLARE_bool(audit_demo_setup_trace);
|
|
+
|
|
+// AUDIT-061: multi-PC branch probe — emits one log line per fire with
|
|
+// (pc, lr, cr0 LGE, cr6 LGE, r3, r4, r5, r6, r31, tid). CSV of guest PCs.
|
|
+DECLARE_string(audit_61_branch_probe_pcs);
|
|
+
|
|
+// AUDIT-067: value-watch — emit a log line for each 32-bit guest store whose
|
|
+// value-to-be-stored matches any configured value. CSV of u32 values
|
|
+// ("0xDEADBEEF,..."), max 4 entries. Default empty (off); zero cost when empty.
|
|
+DECLARE_string(audit_67_value_watch);
|
|
+
|
|
+// AUDIT-068: host-side memory-write watch — emit a log line for each host-side
|
|
+// write to guest memory whose VALUE matches any configured u32 value, or whose
|
|
+// guest VA falls within any configured ADDR or ADDR-range. Mirrors AUDIT-067
|
|
+// but covers the host-side write paths (xe::store_and_swap<T>, Memory::Zero/
|
|
+// Fill/Copy) that AUDIT-067's JIT store-opcode hooks cannot see.
|
|
+//
|
|
+// VALUES: CSV of u32 values, max 8 entries; e.g. "0x8200A208,0x8200A928".
|
|
+// ADDRS: CSV of guest VAs or VA ranges, max 8 entries; range form is
|
|
+// "0xSTART-0xEND" (inclusive). e.g. "0x42500000-0x42600000,0xBCE25340".
|
|
+// Default empty (off); zero cost on the hot path when both are empty.
|
|
+DECLARE_string(audit_68_host_mem_watch_values);
|
|
+DECLARE_string(audit_68_host_mem_watch_addrs);
|
|
+
|
|
+// AUDIT-068 Session 3: read-mode probe. CSV of "VA:SIZE:PERIOD_NS" tuples
|
|
+// (max 8). A dedicated low-priority thread polls each VA every PERIOD_NS and
|
|
+// emits AUDIT-068-READ-CHANGE when the value transitions. SIZE in {1,2,4,8}.
|
|
+// Example: "0xBCE25340:4:1000000" = poll u32 at 0xBCE25340 every 1 ms.
|
|
+// Default empty (off); the poll thread is not spawned when empty.
|
|
+DECLARE_string(audit_68_host_mem_read_probe);
|
|
+
|
|
+// Phase A: JSONL event-log emitter path. When non-empty, the engine writes
|
|
+// schema-v1 JSONL events to this file. Empty (default) = no overhead, no
|
|
+// behavior change. Schema: xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md
|
|
+DECLARE_string(phase_a_event_log_path);
|
|
+DECLARE_bool(phase_a_event_log_mem_writes);
|
|
+
|
|
+// Phase B: initial-state snapshot. When the dir cvar is non-empty, the
|
|
+// engine writes a five-file structured state snapshot (cpu_state.json,
|
|
+// memory.json, kernel.json, vfs.json, config.json, plus manifest.json) to
|
|
+// `<dir>/canary/` at the moment immediately before the first guest PPC
|
|
+// instruction of the XEX entry_point executes. See
|
|
+// `xenia-rs/audit-runs/phase-b-state-equivalence/`.
|
|
+DECLARE_string(phase_b_snapshot_dir);
|
|
+DECLARE_bool(phase_b_snapshot_and_exit);
|
|
+DECLARE_bool(phase_b_dump_section_content);
|
|
+
|
|
#endif // XENIA_CPU_CPU_FLAGS_H_
|
|
diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc
|
|
index 22ba66aee..f02b11d7f 100644
|
|
--- a/src/xenia/memory.cc
|
|
+++ b/src/xenia/memory.cc
|
|
@@ -14,6 +14,7 @@
|
|
|
|
#include "third_party/fmt/include/fmt/format.h"
|
|
#include "xenia/base/assert.h"
|
|
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
|
#include "xenia/base/byte_stream.h"
|
|
#include "xenia/base/clock.h"
|
|
#include "xenia/base/cvar.h"
|
|
@@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) {
|
|
|
|
static Memory* active_memory_ = nullptr;
|
|
|
|
+// AUDIT-068 — process-global accessor (declared in memory.h).
|
|
+Memory* Memory::active() { return active_memory_; }
|
|
+
|
|
void CrashDump() {
|
|
static std::atomic<int> in_crash_dump(0);
|
|
if (in_crash_dump.fetch_add(1)) {
|
|
@@ -151,11 +155,41 @@ Memory::Memory() {
|
|
uint32_t(xe::memory::allocation_granularity());
|
|
assert_zero(active_memory_);
|
|
active_memory_ = this;
|
|
+
|
|
+ // AUDIT-068: register host→guest translation thunk so the watch slow path
|
|
+ // in xenia-base can resolve guest VAs without depending on xenia-core.
|
|
+ xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t {
|
|
+ Memory* m = active_memory_;
|
|
+ return m ? m->HostToGuestVirtual(host_ptr) : 0u;
|
|
+ };
|
|
+
|
|
+ // AUDIT-068 Session 3: register guest→host translation thunk and a
|
|
+ // page-protect query thunk for the read-mode probe. The probe thread uses
|
|
+ // QueryProtect to skip unmapped/uncommitted pages before dereferencing.
|
|
+ xe::audit_68::g_guest_to_host_thunk = [](uint32_t va) -> const void* {
|
|
+ Memory* m = active_memory_;
|
|
+ return m ? reinterpret_cast<const void*>(m->TranslateVirtual(va))
|
|
+ : nullptr;
|
|
+ };
|
|
+ xe::audit_68::g_query_protect_thunk = [](uint32_t va,
|
|
+ uint32_t* out_protect) -> bool {
|
|
+ Memory* m = active_memory_;
|
|
+ if (!m) return false;
|
|
+ BaseHeap* heap = m->LookupHeap(va);
|
|
+ if (!heap) {
|
|
+ if (out_protect) *out_protect = 0;
|
|
+ return false;
|
|
+ }
|
|
+ return heap->QueryProtect(va, out_protect);
|
|
+ };
|
|
}
|
|
|
|
Memory::~Memory() {
|
|
assert_true(active_memory_ == this);
|
|
active_memory_ = nullptr;
|
|
+ xe::audit_68::g_host_to_guest_thunk = nullptr;
|
|
+ xe::audit_68::g_guest_to_host_thunk = nullptr;
|
|
+ xe::audit_68::g_query_protect_thunk = nullptr;
|
|
|
|
// Uninstall the MMIO handler, as we won't be able to service more
|
|
// requests.
|
|
@@ -540,16 +574,71 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const {
|
|
}
|
|
|
|
void Memory::Zero(uint32_t address, uint32_t size) {
|
|
+ // AUDIT-068: log a single span event with value=0; size is capped at 8 for
|
|
+ // the value field. Slow path is gated on the atomic flag.
|
|
+ xe::audit_68::check_guest_va(address, 0,
|
|
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
|
+ "Memory::Zero");
|
|
std::memset(TranslateVirtual(address), 0, size);
|
|
}
|
|
|
|
void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) {
|
|
+ // Replicate the fill byte across the value field so value_matches can
|
|
+ // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for
|
|
+ // capture purposes the byte itself in the low slot is enough.
|
|
+ uint64_t v = static_cast<uint64_t>(value);
|
|
+ v |= v << 8;
|
|
+ v |= v << 16;
|
|
+ v |= v << 32;
|
|
+ xe::audit_68::check_guest_va(address, v,
|
|
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
|
+ "Memory::Fill");
|
|
std::memset(TranslateVirtual(address), value, size);
|
|
}
|
|
|
|
void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) {
|
|
uint8_t* pdest = TranslateVirtual(dest);
|
|
const uint8_t* psrc = TranslateVirtual(src);
|
|
+ // AUDIT-068 Session 2: full byte-scan over 4-byte aligned positions of the
|
|
+ // source buffer. Catches XEX-loader-style memcpys where a vptr (the target
|
|
+ // u32 value) is buried somewhere mid-buffer rather than at offset 0. Cost
|
|
+ // O(size/4 * N_values) with N_values capped at 8 inside value_matches —
|
|
+ // negligible vs the underlying memcpy throughput.
|
|
+ //
|
|
+ // Gated on active bit 0x1 (values-mode) AND active != 0. If only addrs are
|
|
+ // configured (Run 2 voice-struct mode), we still emit a single addr-only
|
|
+ // event covering the destination span so addr-watch isn't broken.
|
|
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
|
|
+ if (active != 0) [[unlikely]] {
|
|
+ if ((active & 0x1) && size >= 4) {
|
|
+ // Scan source for any configured u32 value (big-endian, mirrors how
|
|
+ // guest sees the bytes). 4-byte aligned offsets only.
|
|
+ uint32_t aligned_end = size & ~3u;
|
|
+ for (uint32_t i = 0; i < aligned_end; i += 4) {
|
|
+ uint32_t be_u32 =
|
|
+ (uint32_t(psrc[i + 0]) << 24) | (uint32_t(psrc[i + 1]) << 16) |
|
|
+ (uint32_t(psrc[i + 2]) << 8) | uint32_t(psrc[i + 3]);
|
|
+ xe::audit_68::check_guest_va(dest + i, be_u32, 4, "Memory::Copy");
|
|
+ }
|
|
+ }
|
|
+ if (active & 0x2) {
|
|
+ // Addr-only mode: emit a single coarse event tagged with the dest base
|
|
+ // and first u32 of source for context. The slow-path range check will
|
|
+ // log iff the dest span intersects a configured addr range.
|
|
+ uint64_t v = 0;
|
|
+ if (size >= 4) {
|
|
+ v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) |
|
|
+ (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]);
|
|
+ } else if (size > 0) {
|
|
+ for (uint32_t i = 0; i < size; ++i) {
|
|
+ v = (v << 8) | psrc[i];
|
|
+ }
|
|
+ }
|
|
+ xe::audit_68::check_guest_va(
|
|
+ dest, v, static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
|
+ "Memory::Copy");
|
|
+ }
|
|
+ }
|
|
std::memcpy(pdest, psrc, size);
|
|
}
|
|
|
|
|
|
=== Full contents of new file: src/xenia/base/audit_68_host_mem_watch_fwd.h ===
|
|
|
|
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* AUDIT-068: host-side memory-write watch — forward declarations only.
|
|
*
|
|
* Declarations here are intentionally minimal so that xenia/base/memory.h can
|
|
* include this without pulling in xenia/memory.h (which would create a
|
|
* circular dependency: xenia-base → xenia-core → xenia-base). The full
|
|
* definitions live in xenia/audit_68_host_mem_watch.{h,cc} (xenia-core).
|
|
*
|
|
* Hot path: callers (the integer specializations of xe::store_and_swap<T>)
|
|
* load the atomic flag once. When it is 0 (default), no further work is done
|
|
* — a single relaxed atomic load and a predictable branch.
|
|
******************************************************************************
|
|
*/
|
|
|
|
#ifndef XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
|
#define XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
|
|
|
#include <atomic>
|
|
#include <cstdint>
|
|
|
|
namespace xe {
|
|
namespace audit_68 {
|
|
|
|
// 0 = inactive (default). Non-zero = the cvars have been parsed and at least
|
|
// one watch is configured. Set lazily by check_host_write_slowpath() on first
|
|
// call after cvar parsing. Loaded relaxed on the hot path.
|
|
//
|
|
// Implementation lives in xenia-base (audit_68_host_mem_watch_base.cc) so
|
|
// that callers in xenia-base/xenia-cpu/xenia-kernel can resolve the symbol
|
|
// without depending on xenia-core link order.
|
|
extern std::atomic<uint32_t> g_active;
|
|
|
|
// Host-pointer → guest-VA translation thunk. xenia/memory.cc::Memory::Memory()
|
|
// registers a function pointer here that wraps Memory::HostToGuestVirtual.
|
|
// Until set, the slow path falls back to logging the raw host pointer.
|
|
using HostToGuestThunk = uint32_t (*)(const void*);
|
|
extern HostToGuestThunk g_host_to_guest_thunk;
|
|
|
|
// AUDIT-068 Session 3 — read-mode probe support.
|
|
//
|
|
// Guest-VA → host-pointer translation thunk (wraps Memory::TranslateVirtual).
|
|
// Used by the read-probe poll thread to sample bytes at configured guest VAs.
|
|
// May return non-null even for unmapped/uncommitted VAs (the underlying
|
|
// translation is arithmetic — virtual_membase_ + va) — callers MUST consult
|
|
// the QueryProtect thunk before dereferencing.
|
|
using GuestToHostThunk = const void* (*)(uint32_t);
|
|
extern GuestToHostThunk g_guest_to_host_thunk;
|
|
|
|
// Returns true iff the page containing `guest_va` is committed and readable;
|
|
// out_protect receives the raw page protect bits (kProtectRead, etc.). Wraps
|
|
// Memory::LookupHeap() + BaseHeap::QueryProtect(). Used as a guard before the
|
|
// read-probe samples bytes (early-boot heap-not-yet-mapped path must NOT
|
|
// crash).
|
|
using QueryProtectThunk = bool (*)(uint32_t, uint32_t* /*out_protect*/);
|
|
extern QueryProtectThunk g_query_protect_thunk;
|
|
|
|
// Slow path. Only invoked when g_active is non-zero. Implementation in
|
|
// xenia/base/audit_68_host_mem_watch_base.cc (xenia-base).
|
|
//
|
|
// host_ptr: the host pointer being written (from store_and_swap's `mem`).
|
|
// value: the value being stored (zero-extended to u64).
|
|
// size: 1, 2, 4 or 8.
|
|
// tag: caller-provided tag string (e.g. "store_and_swap<u32>"). Logged
|
|
// verbatim, no formatting. Must be a static string (lifetime
|
|
// beyond this call).
|
|
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
|
|
uint8_t size, const char* tag);
|
|
|
|
// Same as above, but with a known guest VA (for callers like Memory::Zero/
|
|
// Fill/Copy that have the VA but not a single host pointer).
|
|
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
|
|
const char* tag);
|
|
|
|
// Inline hot-path wrappers. Single relaxed atomic load + branch when inactive.
|
|
inline void check_host_write(const void* host_ptr, uint64_t value, uint8_t size,
|
|
const char* tag) {
|
|
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
|
|
check_host_write_slowpath(host_ptr, value, size, tag);
|
|
}
|
|
}
|
|
|
|
inline void check_guest_va(uint32_t guest_va, uint64_t value, uint8_t size,
|
|
const char* tag) {
|
|
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
|
|
check_guest_va_slowpath(guest_va, value, size, tag);
|
|
}
|
|
}
|
|
|
|
} // namespace audit_68
|
|
} // namespace xe
|
|
|
|
#endif // XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
|
|
|
=== Full contents of new file: src/xenia/base/audit_68_host_mem_watch_base.cc ===
|
|
|
|
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* AUDIT-068 host-side memory-write watch — implementation (xenia-base).
|
|
*
|
|
* Mirrors AUDIT-067 in spirit (value-CSV cvar, lazy parse, atomic-bool
|
|
* activation) but observes the HOST-side write paths instead of the JIT'd
|
|
* guest store opcodes. Captures writes performed by xe::store_and_swap<T>
|
|
* (xenia/base/memory.h) and by Memory::Zero/Fill/Copy (xenia/memory.cc).
|
|
*
|
|
* Lives in xenia-base so that the slow-path symbols resolve for callers in
|
|
* xenia-base / xenia-cpu / xenia-kernel without depending on xenia-core link
|
|
* order. The host→guest VA translation is provided by a function-pointer
|
|
* thunk that xenia::Memory::Memory() registers at construction.
|
|
*
|
|
* See xenia/base/audit_68_host_mem_watch_fwd.h for the API.
|
|
* See xenia/cpu/cpu_flags.{h,cc} for the cvars.
|
|
******************************************************************************
|
|
*/
|
|
|
|
#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
|
|
|
#include <algorithm>
|
|
#include <atomic>
|
|
#include <chrono>
|
|
#include <cstring>
|
|
#include <mutex>
|
|
#include <string>
|
|
#include <thread>
|
|
#include <vector>
|
|
|
|
#include "xenia/base/cvar.h"
|
|
#include "xenia/base/logging.h"
|
|
#include "xenia/base/threading.h"
|
|
|
|
// We need the cvars but cpu_flags.h lives in xenia-cpu. To avoid an upward
|
|
// dep we re-declare them here with the same macros — cvar.h's DECLARE_*
|
|
// macros are header-safe (just `extern` declarations) and resolve against the
|
|
// definitions in xenia-cpu/cpu_flags.cc at link time. (xenia-cpu links AFTER
|
|
// xenia-base in the executable; symbols in xenia-cpu/cpu_flags.cc are still
|
|
// resolvable from xenia-base translation units because the lld pass folds
|
|
// all libraries together at the executable level.)
|
|
DECLARE_string(audit_68_host_mem_watch_values);
|
|
DECLARE_string(audit_68_host_mem_watch_addrs);
|
|
DECLARE_string(audit_68_host_mem_read_probe);
|
|
|
|
namespace xe {
|
|
namespace audit_68 {
|
|
|
|
// Hot-path flag (declared in fwd header). Initial sentinel UINT32_MAX means
|
|
// "unparsed"; the very first slow-path call invokes ensure_parsed() which
|
|
// replaces the sentinel with the actual active bitmask (0 if both cvars are
|
|
// empty, 1/2/3 otherwise). After that, hot-path calls observe the real value
|
|
// and bail out cheaply when off.
|
|
std::atomic<uint32_t> g_active{0xFFFFFFFFu};
|
|
|
|
// Host→guest VA translation thunk (declared in fwd header). Set by
|
|
// xenia::Memory::Memory() at construction; reset to nullptr by ~Memory().
|
|
HostToGuestThunk g_host_to_guest_thunk{nullptr};
|
|
|
|
// AUDIT-068 Session 3: guest→host translation + page-protect query thunks.
|
|
GuestToHostThunk g_guest_to_host_thunk{nullptr};
|
|
QueryProtectThunk g_query_protect_thunk{nullptr};
|
|
|
|
namespace {
|
|
|
|
constexpr size_t kMaxValues = 8;
|
|
constexpr size_t kMaxAddrRanges = 8;
|
|
|
|
struct AddrRange {
|
|
uint32_t start; // inclusive
|
|
uint32_t end; // inclusive
|
|
};
|
|
|
|
std::vector<uint32_t> g_values;
|
|
std::vector<AddrRange> g_addrs;
|
|
std::once_flag g_parsed_flag;
|
|
|
|
std::chrono::steady_clock::time_point g_t0;
|
|
std::once_flag g_t0_once;
|
|
|
|
int64_t host_ns_since_start() {
|
|
std::call_once(g_t0_once,
|
|
[]() { g_t0 = std::chrono::steady_clock::now(); });
|
|
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::steady_clock::now() - g_t0)
|
|
.count();
|
|
}
|
|
|
|
void trim(std::string& s) {
|
|
while (!s.empty() && (s.front() == ' ' || s.front() == '\t')) {
|
|
s.erase(s.begin());
|
|
}
|
|
while (!s.empty() && (s.back() == ' ' || s.back() == '\t')) {
|
|
s.pop_back();
|
|
}
|
|
}
|
|
|
|
bool parse_u32(const std::string& tok, uint32_t* out) {
|
|
try {
|
|
*out = static_cast<uint32_t>(std::stoul(tok, nullptr, 0));
|
|
return true;
|
|
} catch (...) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void parse_values_csv(const std::string& csv) {
|
|
size_t pos = 0;
|
|
while (pos < csv.size() && g_values.size() < kMaxValues) {
|
|
size_t end = csv.find(',', pos);
|
|
std::string tok = csv.substr(pos, end - pos);
|
|
trim(tok);
|
|
if (!tok.empty()) {
|
|
uint32_t v;
|
|
if (parse_u32(tok, &v)) {
|
|
g_values.push_back(v);
|
|
}
|
|
}
|
|
if (end == std::string::npos) break;
|
|
pos = end + 1;
|
|
}
|
|
}
|
|
|
|
void parse_addrs_csv(const std::string& csv) {
|
|
size_t pos = 0;
|
|
while (pos < csv.size() && g_addrs.size() < kMaxAddrRanges) {
|
|
size_t end = csv.find(',', pos);
|
|
std::string tok = csv.substr(pos, end - pos);
|
|
trim(tok);
|
|
if (!tok.empty()) {
|
|
size_t dash = tok.find('-', 2); // skip leading "0x" if present
|
|
AddrRange r{};
|
|
if (dash != std::string::npos) {
|
|
std::string s = tok.substr(0, dash);
|
|
std::string e = tok.substr(dash + 1);
|
|
trim(s);
|
|
trim(e);
|
|
uint32_t a, b;
|
|
if (parse_u32(s, &a) && parse_u32(e, &b)) {
|
|
r.start = a;
|
|
r.end = b;
|
|
g_addrs.push_back(r);
|
|
}
|
|
} else {
|
|
uint32_t a;
|
|
if (parse_u32(tok, &a)) {
|
|
r.start = a;
|
|
r.end = a + 7;
|
|
g_addrs.push_back(r);
|
|
}
|
|
}
|
|
}
|
|
if (end == std::string::npos) break;
|
|
pos = end + 1;
|
|
}
|
|
}
|
|
|
|
void parse_locked() {
|
|
parse_values_csv(cvars::audit_68_host_mem_watch_values);
|
|
parse_addrs_csv(cvars::audit_68_host_mem_watch_addrs);
|
|
|
|
uint32_t bits = 0;
|
|
if (!g_values.empty()) bits |= 0x1;
|
|
if (!g_addrs.empty()) bits |= 0x2;
|
|
g_active.store(bits, std::memory_order_release);
|
|
|
|
XELOGI(
|
|
"AUDIT-068-INIT values_csv=\"{}\" addrs_csv=\"{}\" values_parsed={} "
|
|
"addr_ranges_parsed={} active=0x{:X}",
|
|
cvars::audit_68_host_mem_watch_values,
|
|
cvars::audit_68_host_mem_watch_addrs, g_values.size(), g_addrs.size(),
|
|
bits);
|
|
for (size_t i = 0; i < g_values.size(); ++i) {
|
|
XELOGI("AUDIT-068-INIT value[{}] = 0x{:08X}", i, g_values[i]);
|
|
}
|
|
for (size_t i = 0; i < g_addrs.size(); ++i) {
|
|
XELOGI("AUDIT-068-INIT addr_range[{}] = 0x{:08X}-0x{:08X}", i,
|
|
g_addrs[i].start, g_addrs[i].end);
|
|
}
|
|
}
|
|
|
|
bool value_matches(uint64_t value, uint8_t size) {
|
|
for (uint32_t v : g_values) {
|
|
if (size >= 4 && static_cast<uint32_t>(value) == v) return true;
|
|
if (size == 8 && static_cast<uint32_t>(value >> 32) == v) return true;
|
|
if (size == 2 && (v & 0xFFFF) == (value & 0xFFFF)) return true;
|
|
if (size == 1 && (v & 0xFF) == (value & 0xFF)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool addr_matches(uint32_t guest_va, uint8_t size) {
|
|
uint32_t lo = guest_va;
|
|
uint32_t hi = guest_va + (size ? size - 1 : 0);
|
|
for (const auto& r : g_addrs) {
|
|
if (lo <= r.end && hi >= r.start) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
uint32_t current_tid() { return xe::threading::current_thread_id(); }
|
|
|
|
void emit(uint32_t guest_va, const void* host_ptr, uint64_t value,
|
|
uint8_t size, const char* tag) {
|
|
XELOGI(
|
|
"AUDIT-068-HOST-WRITE guest_va=0x{:08X} host_ptr=0x{:016X} "
|
|
"val=0x{:016X} sz={} fn={} host_ns={} tid={}",
|
|
guest_va, reinterpret_cast<uintptr_t>(host_ptr), value,
|
|
static_cast<uint32_t>(size), tag ? tag : "<null>",
|
|
host_ns_since_start(), current_tid());
|
|
}
|
|
|
|
// ===== AUDIT-068 Session 3 — read-mode probe state =====
|
|
|
|
constexpr size_t kMaxReadProbes = 8;
|
|
|
|
struct ReadProbe {
|
|
uint32_t guest_va;
|
|
uint8_t size; // 1, 2, 4, 8
|
|
uint64_t period_ns;
|
|
uint64_t last_value;
|
|
bool last_was_valid;
|
|
};
|
|
|
|
std::vector<ReadProbe> g_read_probes;
|
|
std::atomic<bool> g_read_probe_thread_running{false};
|
|
std::atomic<bool> g_read_probe_shutdown{false};
|
|
std::thread g_read_probe_thread;
|
|
std::once_flag g_read_probe_started;
|
|
|
|
bool parse_read_probe_tok(const std::string& tok, ReadProbe* out) {
|
|
// Expected form: "VA:SIZE:PERIOD_NS" — three colon-separated u64.
|
|
size_t c1 = tok.find(':');
|
|
if (c1 == std::string::npos) return false;
|
|
size_t c2 = tok.find(':', c1 + 1);
|
|
if (c2 == std::string::npos) return false;
|
|
std::string sva = tok.substr(0, c1);
|
|
std::string ssz = tok.substr(c1 + 1, c2 - c1 - 1);
|
|
std::string sper = tok.substr(c2 + 1);
|
|
trim(sva);
|
|
trim(ssz);
|
|
trim(sper);
|
|
try {
|
|
out->guest_va = static_cast<uint32_t>(std::stoul(sva, nullptr, 0));
|
|
uint32_t sz = static_cast<uint32_t>(std::stoul(ssz, nullptr, 0));
|
|
if (sz != 1 && sz != 2 && sz != 4 && sz != 8) return false;
|
|
out->size = static_cast<uint8_t>(sz);
|
|
out->period_ns = static_cast<uint64_t>(std::stoull(sper, nullptr, 0));
|
|
if (out->period_ns < 1000) out->period_ns = 1000; // 1us floor.
|
|
out->last_value = 0;
|
|
out->last_was_valid = false;
|
|
return true;
|
|
} catch (...) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void parse_read_probes_csv(const std::string& csv) {
|
|
size_t pos = 0;
|
|
while (pos < csv.size() && g_read_probes.size() < kMaxReadProbes) {
|
|
size_t end = csv.find(',', pos);
|
|
std::string tok = csv.substr(pos, end - pos);
|
|
trim(tok);
|
|
if (!tok.empty()) {
|
|
ReadProbe rp{};
|
|
if (parse_read_probe_tok(tok, &rp)) {
|
|
g_read_probes.push_back(rp);
|
|
}
|
|
}
|
|
if (end == std::string::npos) break;
|
|
pos = end + 1;
|
|
}
|
|
}
|
|
|
|
uint64_t sample_at(uint32_t guest_va, uint8_t size, bool* out_valid) {
|
|
*out_valid = false;
|
|
if (!g_guest_to_host_thunk || !g_query_protect_thunk) return 0;
|
|
uint32_t prot = 0;
|
|
if (!g_query_protect_thunk(guest_va, &prot)) return 0;
|
|
// Page must have at least read permission. The protect bits map to
|
|
// xe::memory::PageAccess: kReadOnly=1, kReadWrite=2, kExecuteReadOnly=3,
|
|
// kExecuteReadWrite=4. kNoAccess=0. Accept anything non-zero — caller
|
|
// distinguishes via the second-pass change detector anyway.
|
|
if (prot == 0) return 0;
|
|
const void* hp = g_guest_to_host_thunk(guest_va);
|
|
if (!hp) return 0;
|
|
uint64_t v = 0;
|
|
// Guest memory is big-endian. We use raw byte loads to avoid alignment
|
|
// traps for size>4 on possibly-unaligned VAs. The "value" we log is the
|
|
// host-endian interpretation of the BE bytes (matches store_and_swap's
|
|
// logging convention: the byte-swapped scalar).
|
|
const uint8_t* bp = reinterpret_cast<const uint8_t*>(hp);
|
|
switch (size) {
|
|
case 1: v = bp[0]; break;
|
|
case 2: v = (uint64_t(bp[0]) << 8) | bp[1]; break;
|
|
case 4:
|
|
v = (uint64_t(bp[0]) << 24) | (uint64_t(bp[1]) << 16) |
|
|
(uint64_t(bp[2]) << 8) | bp[3];
|
|
break;
|
|
case 8:
|
|
v = (uint64_t(bp[0]) << 56) | (uint64_t(bp[1]) << 48) |
|
|
(uint64_t(bp[2]) << 40) | (uint64_t(bp[3]) << 32) |
|
|
(uint64_t(bp[4]) << 24) | (uint64_t(bp[5]) << 16) |
|
|
(uint64_t(bp[6]) << 8) | bp[7];
|
|
break;
|
|
}
|
|
*out_valid = true;
|
|
return v;
|
|
}
|
|
|
|
void read_probe_thread_main() {
|
|
// Compute the GCD-ish min poll period across all probes; sleep that long
|
|
// between scans. Each probe fires only when its own period_ns has elapsed
|
|
// since the last sample (per-probe `next_fire_ns`).
|
|
uint64_t min_period_ns = UINT64_MAX;
|
|
for (const auto& p : g_read_probes) {
|
|
if (p.period_ns < min_period_ns) min_period_ns = p.period_ns;
|
|
}
|
|
if (min_period_ns == UINT64_MAX) return;
|
|
|
|
// Per-probe next-fire times.
|
|
std::vector<uint64_t> next_fire(g_read_probes.size(), 0);
|
|
|
|
XELOGI(
|
|
"AUDIT-068-READ-INIT probe_count={} min_period_ns={} thread spawned",
|
|
g_read_probes.size(), min_period_ns);
|
|
for (size_t i = 0; i < g_read_probes.size(); ++i) {
|
|
XELOGI("AUDIT-068-READ-INIT probe[{}] va=0x{:08X} size={} period_ns={}",
|
|
i, g_read_probes[i].guest_va,
|
|
static_cast<uint32_t>(g_read_probes[i].size),
|
|
g_read_probes[i].period_ns);
|
|
}
|
|
|
|
while (!g_read_probe_shutdown.load(std::memory_order_relaxed)) {
|
|
int64_t now_ns = host_ns_since_start();
|
|
for (size_t i = 0; i < g_read_probes.size(); ++i) {
|
|
if (static_cast<uint64_t>(now_ns) < next_fire[i]) continue;
|
|
ReadProbe& rp = g_read_probes[i];
|
|
bool valid = false;
|
|
uint64_t v = sample_at(rp.guest_va, rp.size, &valid);
|
|
if (valid) {
|
|
if (!rp.last_was_valid) {
|
|
// First successful read: emit the initial value, do NOT call it a
|
|
// "change" — but log so we know when the VA mapped.
|
|
XELOGI(
|
|
"AUDIT-068-READ-INITIAL va=0x{:08X} val=0x{:016X} sz={} "
|
|
"host_ns={} tid=probe",
|
|
rp.guest_va, v, static_cast<uint32_t>(rp.size), now_ns);
|
|
rp.last_value = v;
|
|
rp.last_was_valid = true;
|
|
} else if (v != rp.last_value) {
|
|
XELOGI(
|
|
"AUDIT-068-READ-CHANGE va=0x{:08X} old=0x{:016X} "
|
|
"new=0x{:016X} sz={} host_ns={} tid=probe",
|
|
rp.guest_va, rp.last_value, v, static_cast<uint32_t>(rp.size),
|
|
now_ns);
|
|
rp.last_value = v;
|
|
}
|
|
} else if (rp.last_was_valid) {
|
|
// Was valid, now invalid — page unmapped/reprotected.
|
|
XELOGI(
|
|
"AUDIT-068-READ-UNMAPPED va=0x{:08X} last=0x{:016X} sz={} "
|
|
"host_ns={} tid=probe",
|
|
rp.guest_va, rp.last_value, static_cast<uint32_t>(rp.size),
|
|
now_ns);
|
|
rp.last_was_valid = false;
|
|
}
|
|
next_fire[i] = static_cast<uint64_t>(now_ns) + rp.period_ns;
|
|
}
|
|
// Sleep until the next earliest fire, but no shorter than 1us and no
|
|
// longer than min_period_ns (to keep shutdown latency bounded).
|
|
uint64_t sleep_ns = min_period_ns;
|
|
if (sleep_ns < 1000) sleep_ns = 1000;
|
|
std::this_thread::sleep_for(std::chrono::nanoseconds(sleep_ns));
|
|
}
|
|
XELOGI("AUDIT-068-READ-EXIT thread shutting down");
|
|
}
|
|
|
|
void start_read_probe_thread_if_configured() {
|
|
std::call_once(g_read_probe_started, []() {
|
|
parse_read_probes_csv(cvars::audit_68_host_mem_read_probe);
|
|
if (g_read_probes.empty()) return;
|
|
if (!g_guest_to_host_thunk || !g_query_protect_thunk) {
|
|
XELOGI(
|
|
"AUDIT-068-READ-INIT thunks not ready (guest_to_host={} "
|
|
"query_protect={}) — read probe deferred",
|
|
(void*)g_guest_to_host_thunk, (void*)g_query_protect_thunk);
|
|
return;
|
|
}
|
|
g_read_probe_thread_running.store(true, std::memory_order_release);
|
|
g_read_probe_thread = std::thread(&read_probe_thread_main);
|
|
g_read_probe_thread.detach(); // best-effort; daemon-style.
|
|
});
|
|
}
|
|
|
|
} // namespace
|
|
|
|
void ensure_parsed() { std::call_once(g_parsed_flag, parse_locked); }
|
|
|
|
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
|
|
uint8_t size, const char* tag) {
|
|
// AUDIT-068 Session 2: defer parsing until Memory::Memory() has registered
|
|
// the host→guest thunk. This guarantees the cmdline cvar override has been
|
|
// applied AND the logging subsystem is alive before we latch g_active.
|
|
// Without this gate, a be<T>::set() call during static-init (e.g. from a
|
|
// global initializer in another translation unit) would trigger
|
|
// parse_locked() before cpu_flags.cc's cvar objects are constructed —
|
|
// latching g_active=0 permanently and silencing the watch.
|
|
HostToGuestThunk thunk = g_host_to_guest_thunk;
|
|
if (!thunk) return;
|
|
ensure_parsed();
|
|
// AUDIT-068 Session 3: lazy-start the read-probe poll thread. Same gate as
|
|
// ensure_parsed() — must come after Memory::Memory() has registered the
|
|
// thunks so the probe can read pages safely.
|
|
start_read_probe_thread_if_configured();
|
|
uint32_t active = g_active.load(std::memory_order_acquire);
|
|
if (active == 0) return;
|
|
|
|
uint32_t guest_va = 0;
|
|
if (thunk) {
|
|
guest_va = thunk(host_ptr);
|
|
}
|
|
|
|
bool hit = false;
|
|
if ((active & 0x1) && value_matches(value, size)) hit = true;
|
|
if (!hit && (active & 0x2) && thunk && addr_matches(guest_va, size)) {
|
|
hit = true;
|
|
}
|
|
if (!hit) return;
|
|
|
|
emit(guest_va, host_ptr, value, size, tag);
|
|
}
|
|
|
|
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
|
|
const char* tag) {
|
|
// AUDIT-068 Session 2: same static-init gate as check_host_write_slowpath.
|
|
// Callers (Memory::Zero/Fill/Copy + xex_module audit68_prescan_memcpy) only
|
|
// run after Memory::Memory(), but defensive in case of future expansion.
|
|
if (!g_host_to_guest_thunk) return;
|
|
ensure_parsed();
|
|
uint32_t active = g_active.load(std::memory_order_acquire);
|
|
if (active == 0) return;
|
|
|
|
bool hit = false;
|
|
if ((active & 0x1) && value_matches(value, size)) hit = true;
|
|
if (!hit && (active & 0x2) && addr_matches(guest_va, size)) hit = true;
|
|
if (!hit) return;
|
|
|
|
emit(guest_va, nullptr, value, size, tag);
|
|
}
|
|
|
|
} // namespace audit_68
|
|
} // namespace xe
|