Files
xenia-rs/audit-runs/audit-068-host-mem-watch/fix-canary-v4.diff
MechaCat02 ef93a4fa14 handoff: VSync/event-wedge fixes + iterate 2.A–2.BC research notes
Source changes (dormant parity infra, retained from iterate 2.AI/2.AO):
- xenia-kernel/exports.rs: nt_create_event manual_reset polarity +
  related event wiring
- xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity

Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the
iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps
(.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as
regenerable local artifacts — see memory + HANDOFF for the running findings.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-05 07:19:08 +02:00

1732 lines
68 KiB
Diff

# AUDIT-068 cumulative canary instrumentation diff — Session 4 close
# Session 4 adds 0 LOC (zero new instrumentation needed).
# Diff content equals Session 3's; only this header changes.
diff --git a/src/xenia/base/byte_order.h b/src/xenia/base/byte_order.h
index 5a076f319..c80ee0ffc 100644
--- a/src/xenia/base/byte_order.h
+++ b/src/xenia/base/byte_order.h
@@ -11,6 +11,7 @@
#define XENIA_BASE_BYTE_ORDER_H_
#include <cstdint>
+#include <type_traits>
#if defined __has_include
#if __has_include(<version>)
#include <version>
@@ -21,6 +22,7 @@
#endif
#include "xenia/base/assert.h"
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
#include "xenia/base/platform.h"
#if !__cpp_lib_endian
@@ -88,6 +90,30 @@ struct endian_store {
operator T() const { return get(); }
void set(const T& src) {
+ // AUDIT-068 Session 2: hook the canonical be<T>/le<T> write path. Gated
+ // on the host→guest thunk being installed by Memory::Memory(); without
+ // that there is no Memory and therefore no possible guest-memory write.
+ // This ALSO prevents the slow-path from running during static-init order
+ // (which would race the cvar object construction in cpu_flags.cc and
+ // permanently latch g_active=0 before --audit_68_* cmdline override
+ // applies). See reading-error #35 / Session 2 plan.
+ if constexpr (sizeof(T) <= 8 && std::is_integral_v<T>) {
+ if (xe::audit_68::g_host_to_guest_thunk != nullptr) [[unlikely]] {
+ uint64_t v;
+ if constexpr (sizeof(T) == 8) {
+ v = static_cast<uint64_t>(src);
+ } else if constexpr (sizeof(T) == 4) {
+ v = static_cast<uint64_t>(static_cast<uint32_t>(src));
+ } else if constexpr (sizeof(T) == 2) {
+ v = static_cast<uint64_t>(static_cast<uint16_t>(src));
+ } else {
+ v = static_cast<uint64_t>(static_cast<uint8_t>(src));
+ }
+ xe::audit_68::check_host_write(
+ &value, v, static_cast<uint8_t>(sizeof(T)),
+ E == std::endian::big ? "be<T>::set" : "le<T>::set");
+ }
+ }
if constexpr (std::endian::native == E) {
value = src;
} else {
diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h
index 8ef40bbff..e78c8499c 100644
--- a/src/xenia/base/memory.h
+++ b/src/xenia/base/memory.h
@@ -18,6 +18,7 @@
#include <string_view>
#include <type_traits>
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
#include "xenia/base/byte_order.h"
namespace xe {
@@ -354,34 +355,52 @@ template <typename T>
void store(void* mem, const T& value);
template <>
inline void store<int8_t>(void* mem, const int8_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
+ static_cast<uint8_t>(value)),
+ 1, "store<i8>");
*reinterpret_cast<int8_t*>(mem) = value;
}
template <>
inline void store<uint8_t>(void* mem, const uint8_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 1,
+ "store<u8>");
*reinterpret_cast<uint8_t*>(mem) = value;
}
template <>
inline void store<int16_t>(void* mem, const int16_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
+ static_cast<uint16_t>(value)),
+ 2, "store<i16>");
*reinterpret_cast<int16_t*>(mem) = value;
}
template <>
inline void store<uint16_t>(void* mem, const uint16_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 2,
+ "store<u16>");
*reinterpret_cast<uint16_t*>(mem) = value;
}
template <>
inline void store<int32_t>(void* mem, const int32_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
+ static_cast<uint32_t>(value)),
+ 4, "store<i32>");
*reinterpret_cast<int32_t*>(mem) = value;
}
template <>
inline void store<uint32_t>(void* mem, const uint32_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 4,
+ "store<u32>");
*reinterpret_cast<uint32_t*>(mem) = value;
}
template <>
inline void store<int64_t>(void* mem, const int64_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 8,
+ "store<i64>");
*reinterpret_cast<int64_t*>(mem) = value;
}
template <>
inline void store<uint64_t>(void* mem, const uint64_t& value) {
+ xe::audit_68::check_host_write(mem, value, 8, "store<u64>");
*reinterpret_cast<uint64_t*>(mem) = value;
}
template <>
@@ -411,34 +430,52 @@ template <typename T>
void store_and_swap(void* mem, const T& value);
template <>
inline void store_and_swap<int8_t>(void* mem, const int8_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
+ static_cast<uint8_t>(value)),
+ 1, "store_and_swap<i8>");
*reinterpret_cast<int8_t*>(mem) = value;
}
template <>
inline void store_and_swap<uint8_t>(void* mem, const uint8_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 1,
+ "store_and_swap<u8>");
*reinterpret_cast<uint8_t*>(mem) = value;
}
template <>
inline void store_and_swap<int16_t>(void* mem, const int16_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
+ static_cast<uint16_t>(value)),
+ 2, "store_and_swap<i16>");
*reinterpret_cast<int16_t*>(mem) = byte_swap(value);
}
template <>
inline void store_and_swap<uint16_t>(void* mem, const uint16_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 2,
+ "store_and_swap<u16>");
*reinterpret_cast<uint16_t*>(mem) = byte_swap(value);
}
template <>
inline void store_and_swap<int32_t>(void* mem, const int32_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
+ static_cast<uint32_t>(value)),
+ 4, "store_and_swap<i32>");
*reinterpret_cast<int32_t*>(mem) = byte_swap(value);
}
template <>
inline void store_and_swap<uint32_t>(void* mem, const uint32_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 4,
+ "store_and_swap<u32>");
*reinterpret_cast<uint32_t*>(mem) = byte_swap(value);
}
template <>
inline void store_and_swap<int64_t>(void* mem, const int64_t& value) {
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 8,
+ "store_and_swap<i64>");
*reinterpret_cast<int64_t*>(mem) = byte_swap(value);
}
template <>
inline void store_and_swap<uint64_t>(void* mem, const uint64_t& value) {
+ xe::audit_68::check_host_write(mem, value, 8, "store_and_swap<u64>");
*reinterpret_cast<uint64_t*>(mem) = byte_swap(value);
}
template <>
diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc
index 5da8f6adc..cbac9826c 100644
--- a/src/xenia/cpu/backend/x64/x64_emitter.cc
+++ b/src/xenia/cpu/backend/x64/x64_emitter.cc
@@ -13,6 +13,8 @@
#include <climits>
#include <cstring>
+#include <string>
+#include <vector>
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/assert.h"
@@ -63,6 +65,76 @@ DEFINE_bool(instrument_call_times, false,
"Compute time taken for functions, for profiling guest code",
"x64");
#endif
+
+// AUDIT-061/067: forward decls of probe/watch tables (defined in
+// ppc_hir_builder.cc).
+namespace xe {
+namespace cpu {
+namespace audit61 {
+const std::vector<uint32_t>& pcs();
+} // namespace audit61
+namespace audit67 {
+const std::vector<uint32_t>& vals();
+} // namespace audit67
+} // namespace cpu
+} // namespace xe
+
+// AUDIT-061: handler for trap codes [200, 232). arg0 carries trap idx
+// (trap_code - 200), mapping to ::xe::cpu::audit61::pcs()[idx]. Emits one
+// log line per fire with cr0/cr6 LGE flags + key GPRs + LR + tid.
+static uint64_t TrapAudit61Branch(void* raw_context, uint64_t idx) {
+ auto* ctx = reinterpret_cast<xe::cpu::ppc::PPCContext_s*>(raw_context);
+ const auto& pcs = ::xe::cpu::audit61::pcs();
+ uint32_t pc = (idx < pcs.size()) ? pcs[static_cast<size_t>(idx)] : 0u;
+ uint32_t tid = 0;
+ if (ctx->thread_state) {
+ tid = ctx->thread_state->thread_id();
+ }
+ auto enc = [](uint8_t lt, uint8_t gt, uint8_t eq) {
+ char buf[4];
+ buf[0] = lt ? 'L' : '.';
+ buf[1] = gt ? 'G' : '.';
+ buf[2] = eq ? 'E' : '.';
+ buf[3] = '\0';
+ return std::string(buf);
+ };
+ XELOGI(
+ "AUDIT-061-BR pc={:08X} lr={:08X} cr0={} cr6={} r3={:08X} r4={:08X} "
+ "r5={:08X} r6={:08X} r31={:08X} tid={}",
+ pc, static_cast<uint32_t>(ctx->lr),
+ enc(ctx->cr0.cr0_lt, ctx->cr0.cr0_gt, ctx->cr0.cr0_eq),
+ enc(ctx->cr6.cr6_all_equal, ctx->cr6.cr6_1, ctx->cr6.cr6_none_equal),
+ static_cast<uint32_t>(ctx->r[3]), static_cast<uint32_t>(ctx->r[4]),
+ static_cast<uint32_t>(ctx->r[5]), static_cast<uint32_t>(ctx->r[6]),
+ static_cast<uint32_t>(ctx->r[31]), tid);
+ return 0;
+}
+
+// AUDIT-067: handler for trap codes [250, 254). arg0 carries trap idx
+// (trap_code - 250), mapping to ::xe::cpu::audit67::vals()[idx]. Fired when
+// a 4-byte guest store sees the configured value. The store-emit site stashed
+// (pc << 32) | (ea & 0xFFFFFFFF) into ctx->scratch right before the trap.
+static uint64_t TrapAudit67ValueWatch(void* raw_context, uint64_t idx) {
+ auto* ctx = reinterpret_cast<xe::cpu::ppc::PPCContext_s*>(raw_context);
+ const auto& vals = ::xe::cpu::audit67::vals();
+ uint32_t val =
+ (idx < vals.size()) ? vals[static_cast<size_t>(idx)] : 0u;
+ uint32_t pc = static_cast<uint32_t>(ctx->scratch >> 32);
+ uint32_t dst = static_cast<uint32_t>(ctx->scratch & 0xFFFFFFFFu);
+ uint32_t tid = 0;
+ if (ctx->thread_state) {
+ tid = ctx->thread_state->thread_id();
+ }
+ XELOGI(
+ "AUDIT-067-VAL pc={:08X} lr={:08X} val={:08X} dst={:08X} "
+ "r3={:08X} r4={:08X} r5={:08X} r6={:08X} r31={:08X} tid={}",
+ pc, static_cast<uint32_t>(ctx->lr), val, dst,
+ static_cast<uint32_t>(ctx->r[3]), static_cast<uint32_t>(ctx->r[4]),
+ static_cast<uint32_t>(ctx->r[5]), static_cast<uint32_t>(ctx->r[6]),
+ static_cast<uint32_t>(ctx->r[31]), tid);
+ return 0;
+}
+
namespace xe {
namespace cpu {
namespace backend {
@@ -455,6 +527,20 @@ void X64Emitter::Trap(uint16_t trap_type) {
// ?
break;
default:
+ // AUDIT-067: trap codes [250, 254) dispatch the value-watch handler.
+ // arg0 = idx into ::xe::cpu::audit67::vals().
+ if (trap_type >= 250 && trap_type < 254) {
+ CallNative(::TrapAudit67ValueWatch,
+ static_cast<uint64_t>(trap_type - 250));
+ break;
+ }
+ // AUDIT-061: trap codes [200, 232) dispatch the branch-probe handler.
+ // arg0 = idx into ::xe::cpu::audit61::pcs().
+ if (trap_type >= 200 && trap_type < 232) {
+ CallNative(::TrapAudit61Branch,
+ static_cast<uint64_t>(trap_type - 200));
+ break;
+ }
XELOGW("Unknown trap type {}", trap_type);
db(0xCC);
break;
diff --git a/src/xenia/cpu/cpu_flags.cc b/src/xenia/cpu/cpu_flags.cc
index 3ff067e15..2298dd3d7 100644
--- a/src/xenia/cpu/cpu_flags.cc
+++ b/src/xenia/cpu/cpu_flags.cc
@@ -57,3 +57,83 @@ DEFINE_bool(break_condition_truncate, true, "truncate value to 32-bits", "CPU");
DEFINE_bool(break_on_debugbreak, true, "int3 on JITed __debugbreak requests.",
"CPU");
+
+// AUDIT-DEMO: smoke marker (memory entry: emulator.cc:225,283). Always-on bool.
+DEFINE_bool(audit_demo_setup_trace, true,
+ "Audit smoke marker: log AUDIT-DEMO-SETUP-BEGIN at emulator setup.",
+ "Audit");
+
+// AUDIT-061: comma-separated list of guest PCs to log on each fire.
+// Format: "0xPC1,0xPC2,..." (max 32 PCs). Each fire emits
+// AUDIT-061-BR pc=X lr=X cr0=LGE cr6=LGE r3=X r4=X r5=X r6=X r31=X tid=N.
+// Default empty (off); no perf cost when empty.
+DEFINE_string(audit_61_branch_probe_pcs, "",
+ "AUDIT-061: CSV of guest PCs to trace (cr0/cr6 + regs/tid).",
+ "Audit");
+
+// AUDIT-067: comma-separated list of u32 values to watch. When non-empty,
+// every 4-byte guest store (stw/stwu/stwx/stwux/stmw) emits a runtime
+// equality check; matches log AUDIT-067-VAL pc=X lr=X val=X dst=X r3..r6 r31 tid=N.
+// Max 4 values. Default empty (off); zero overhead when empty.
+DEFINE_string(audit_67_value_watch, "",
+ "AUDIT-067: CSV of u32 values (max 4) — log every guest "
+ "store whose value matches.",
+ "Audit");
+
+// AUDIT-068: host-side memory-write watch. See cpu_flags.h header for format.
+// Mirrors AUDIT-067 but covers host-side writes (xe::store_and_swap<T>,
+// Memory::Zero/Fill/Copy). Empty default = zero cost.
+DEFINE_string(audit_68_host_mem_watch_values, "",
+ "AUDIT-068: CSV of u32 values (max 8) — log every host-side "
+ "guest-memory write whose value matches.",
+ "Audit");
+DEFINE_string(audit_68_host_mem_watch_addrs, "",
+ "AUDIT-068: CSV of guest VAs or VA ranges 'START-END' (max 8) "
+ "— log every host-side guest-memory write whose guest VA falls "
+ "within the configured set.",
+ "Audit");
+
+// AUDIT-068 Session 3: read-mode probe. See cpu_flags.h for format.
+DEFINE_string(audit_68_host_mem_read_probe, "",
+ "AUDIT-068 Session 3: CSV of 'VA:SIZE:PERIOD_NS' tuples (max 8) "
+ "— a dedicated poll thread reads the value at each VA every "
+ "PERIOD_NS and emits AUDIT-068-READ-CHANGE on transition.",
+ "Audit");
+
+// Phase A — see kernel/event_log.h.
+DEFINE_string(phase_a_event_log_path, "",
+ "Phase A: write schema-v1 JSONL event log to this path. "
+ "Empty (default) = disabled.",
+ "Audit");
+DEFINE_bool(phase_a_event_log_mem_writes, false,
+ "Phase A: include mem.write events in the JSONL log. RESERVED — "
+ "not wired in this phase. Default false.",
+ "Audit");
+
+// Phase D Stage 1 — see kernel/event_log.h `EmitContentionObserved`.
+DEFINE_bool(kernel_emit_contention, false,
+ "Phase D Stage 1: emit `contention.observed` events when "
+ "RtlEnterCriticalSection's spin loop is exhausted and the call "
+ "falls through to xeKeWaitForSingleObject. Default false (zero "
+ "cost when disabled). Requires --phase_a_event_log_path to be "
+ "set as well.",
+ "Audit");
+
+// Phase B — see kernel/phase_b_snapshot.h.
+DEFINE_string(phase_b_snapshot_dir, "",
+ "Phase B: write 5-file structured state snapshot to "
+ "<dir>/canary/ at the moment immediately before the first "
+ "guest PPC instruction of entry_point. Empty (default) = "
+ "disabled, zero overhead.",
+ "Audit");
+DEFINE_bool(phase_b_snapshot_and_exit, false,
+ "Phase B: after writing the snapshot, exit the process "
+ "immediately (std::_Exit(0)) so re-runs are byte-deterministic.",
+ "Audit");
+DEFINE_bool(phase_b_dump_section_content, false,
+ "Phase B: in memory.json, populate section_contents[].content_b64 "
+ "with raw bytes of every committed XEX-image region. Default "
+ "false — per-region SHA-256 is enough for the routine diff; "
+ "this is the escape hatch for the STOP-and-report condition "
+ "(image_loaded_sha256 mismatch).",
+ "Audit");
diff --git a/src/xenia/cpu/cpu_flags.h b/src/xenia/cpu/cpu_flags.h
index 38c4f98ba..9b5ca7a1c 100644
--- a/src/xenia/cpu/cpu_flags.h
+++ b/src/xenia/cpu/cpu_flags.h
@@ -35,4 +35,52 @@ DECLARE_bool(break_condition_truncate);
DECLARE_bool(break_on_debugbreak);
+// AUDIT-DEMO smoke marker.
+DECLARE_bool(audit_demo_setup_trace);
+
+// AUDIT-061: multi-PC branch probe — emits one log line per fire with
+// (pc, lr, cr0 LGE, cr6 LGE, r3, r4, r5, r6, r31, tid). CSV of guest PCs.
+DECLARE_string(audit_61_branch_probe_pcs);
+
+// AUDIT-067: value-watch — emit a log line for each 32-bit guest store whose
+// value-to-be-stored matches any configured value. CSV of u32 values
+// ("0xDEADBEEF,..."), max 4 entries. Default empty (off); zero cost when empty.
+DECLARE_string(audit_67_value_watch);
+
+// AUDIT-068: host-side memory-write watch — emit a log line for each host-side
+// write to guest memory whose VALUE matches any configured u32 value, or whose
+// guest VA falls within any configured ADDR or ADDR-range. Mirrors AUDIT-067
+// but covers the host-side write paths (xe::store_and_swap<T>, Memory::Zero/
+// Fill/Copy) that AUDIT-067's JIT store-opcode hooks cannot see.
+//
+// VALUES: CSV of u32 values, max 8 entries; e.g. "0x8200A208,0x8200A928".
+// ADDRS: CSV of guest VAs or VA ranges, max 8 entries; range form is
+// "0xSTART-0xEND" (inclusive). e.g. "0x42500000-0x42600000,0xBCE25340".
+// Default empty (off); zero cost on the hot path when both are empty.
+DECLARE_string(audit_68_host_mem_watch_values);
+DECLARE_string(audit_68_host_mem_watch_addrs);
+
+// AUDIT-068 Session 3: read-mode probe. CSV of "VA:SIZE:PERIOD_NS" tuples
+// (max 8). A dedicated low-priority thread polls each VA every PERIOD_NS and
+// emits AUDIT-068-READ-CHANGE when the value transitions. SIZE in {1,2,4,8}.
+// Example: "0xBCE25340:4:1000000" = poll u32 at 0xBCE25340 every 1 ms.
+// Default empty (off); the poll thread is not spawned when empty.
+DECLARE_string(audit_68_host_mem_read_probe);
+
+// Phase A: JSONL event-log emitter path. When non-empty, the engine writes
+// schema-v1 JSONL events to this file. Empty (default) = no overhead, no
+// behavior change. Schema: xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md
+DECLARE_string(phase_a_event_log_path);
+DECLARE_bool(phase_a_event_log_mem_writes);
+
+// Phase B: initial-state snapshot. When the dir cvar is non-empty, the
+// engine writes a five-file structured state snapshot (cpu_state.json,
+// memory.json, kernel.json, vfs.json, config.json, plus manifest.json) to
+// `<dir>/canary/` at the moment immediately before the first guest PPC
+// instruction of the XEX entry_point executes. See
+// `xenia-rs/audit-runs/phase-b-state-equivalence/`.
+DECLARE_string(phase_b_snapshot_dir);
+DECLARE_bool(phase_b_snapshot_and_exit);
+DECLARE_bool(phase_b_dump_section_content);
+
#endif // XENIA_CPU_CPU_FLAGS_H_
diff --git a/src/xenia/cpu/ppc/ppc_emit_altivec.cc b/src/xenia/cpu/ppc/ppc_emit_altivec.cc
index 513b21391..c9af025ff 100644
--- a/src/xenia/cpu/ppc/ppc_emit_altivec.cc
+++ b/src/xenia/cpu/ppc/ppc_emit_altivec.cc
@@ -9,12 +9,28 @@
#include "xenia/cpu/ppc/ppc_emit-private.h"
+#include <vector>
#include "xenia/base/assert.h"
+#include "xenia/cpu/cpu_flags.h"
#include "xenia/cpu/ppc/ppc_context.h"
#include "xenia/cpu/ppc/ppc_hir_builder.h"
#include <cmath>
+// AUDIT-067: forward-decls. Defined in ppc_emit_memory.cc / ppc_hir_builder.cc.
+namespace xe {
+namespace cpu {
+namespace audit67 {
+const std::vector<uint32_t>& vals();
+}
+namespace ppc {
+void EmitAudit67ValueWatchVec(PPCHIRBuilder& f, uint32_t pc,
+ ::xe::cpu::hir::Value* vec128,
+ ::xe::cpu::hir::Value* ea);
+}
+}
+}
+
namespace xe {
namespace cpu {
namespace ppc {
@@ -175,6 +191,21 @@ int InstrEmit_stvewx_(PPCHIRBuilder& f, const InstrData& i, uint32_t vd,
f.Shr(f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantUint8(0xF)), 2);
Value* v = f.Extract(f.LoadVR(vd), el, INT32_TYPE);
f.Store(ea, f.ByteSwap(v));
+ if (!::xe::cpu::audit67::vals().empty()) {
+ // For stvewx: only one lane is actually stored; piggyback on the scalar
+ // value-watch helper by emitting the equivalent of stw of v at ea.
+ Value* pc_hi64 =
+ f.LoadConstantUint64(static_cast<uint64_t>(i.address) << 32);
+ Value* ea_lo64 = f.ZeroExtend(f.Truncate(ea, INT32_TYPE), INT64_TYPE);
+ Value* packed = f.Or(pc_hi64, ea_lo64);
+ const auto& vals = ::xe::cpu::audit67::vals();
+ for (size_t idx = 0; idx < vals.size(); ++idx) {
+ Value* cmp = f.CompareEQ(v, f.LoadConstantUint32(vals[idx]));
+ f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed);
+ f.ContextBarrier();
+ f.TrapTrue(cmp, static_cast<uint16_t>(250 + idx));
+ }
+ }
return 0;
}
int InstrEmit_stvewx(PPCHIRBuilder& f, const InstrData& i) {
@@ -187,7 +218,11 @@ int InstrEmit_stvewx128(PPCHIRBuilder& f, const InstrData& i) {
int InstrEmit_stvx_(PPCHIRBuilder& f, const InstrData& i, uint32_t vd,
uint32_t ra, uint32_t rb) {
Value* ea = f.And(CalculateEA_0(f, ra, rb), f.LoadConstantUint64(~0xFull));
- f.Store(ea, f.ByteSwap(f.LoadVR(vd)));
+ Value* vec = f.LoadVR(vd);
+ f.Store(ea, f.ByteSwap(vec));
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatchVec(f, i.address, vec, ea);
+ }
return 0;
}
int InstrEmit_stvx(PPCHIRBuilder& f, const InstrData& i) {
diff --git a/src/xenia/cpu/ppc/ppc_emit_memory.cc b/src/xenia/cpu/ppc/ppc_emit_memory.cc
index b4bdabb49..a6b44697d 100644
--- a/src/xenia/cpu/ppc/ppc_emit_memory.cc
+++ b/src/xenia/cpu/ppc/ppc_emit_memory.cc
@@ -10,11 +10,22 @@
#include "xenia/cpu/ppc/ppc_emit-private.h"
#include <stddef.h>
+#include <vector>
#include "xenia/base/assert.h"
#include "xenia/base/cvar.h"
+#include "xenia/cpu/cpu_flags.h"
#include "xenia/cpu/ppc/ppc_context.h"
#include "xenia/cpu/ppc/ppc_hir_builder.h"
+// AUDIT-067: forward-decl of value-watch table (defined in ppc_hir_builder.cc).
+namespace xe {
+namespace cpu {
+namespace audit67 {
+const std::vector<uint32_t>& vals();
+} // namespace audit67
+} // namespace cpu
+} // namespace xe
+
DEFINE_bool(
disable_prefetch_and_cachecontrol, true,
"Disables translating ppc prefetch/cache flush instructions to host "
@@ -67,6 +78,90 @@ void StoreEA(PPCHIRBuilder& f, uint32_t rt, Value* ea) {
f.StoreGPR(rt, ea);
}
+// AUDIT-067: emit a runtime equality check on the 32-bit value-to-be-stored
+// against each configured watch value. On match, store (pc, EA) packed into
+// the PPCContext scratch field so the native trap handler can read them,
+// then fire a trap with code (kTrapBase + idx). Done host-side as a
+// build-time pc constant + a runtime EA truncate, packed as
+// (pc << 32) | (ea & 0xFFFFFFFF) so the handler can decompose.
+static void EmitAudit67ValueWatch(PPCHIRBuilder& f, uint32_t pc, Value* val32,
+ Value* ea) {
+ const auto& vals = ::xe::cpu::audit67::vals();
+ if (vals.empty()) return;
+ // pc is known at JIT time → emit as constant; ea is runtime.
+ Value* pc_hi64 = f.LoadConstantUint64(static_cast<uint64_t>(pc) << 32);
+ Value* ea_lo64 = f.ZeroExtend(f.Truncate(ea, INT32_TYPE), INT64_TYPE);
+ Value* packed = f.Or(pc_hi64, ea_lo64);
+ for (size_t idx = 0; idx < vals.size(); ++idx) {
+ Value* cmp = f.CompareEQ(val32, f.LoadConstantUint32(vals[idx]));
+ f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed);
+ f.ContextBarrier();
+ f.TrapTrue(cmp, static_cast<uint16_t>(250 + idx));
+ }
+}
+
+// AUDIT-067 128-bit (vector) variant: checks each of the 4 32-bit lanes in a
+// vector store. Used for stvx/stvxl/stvewx (memcpy-derived installs may use
+// 128-bit vector stores). The matched lane is reflected in the dst by
+// adding (lane * 4) so the handler can see exactly where in memory the
+// value lands. Declared with external linkage so altivec.cc can call it.
+void EmitAudit67ValueWatchVec(PPCHIRBuilder& f, uint32_t pc,
+ Value* vec128, Value* ea) {
+ const auto& vals = ::xe::cpu::audit67::vals();
+ if (vals.empty()) return;
+ Value* pc_hi64 = f.LoadConstantUint64(static_cast<uint64_t>(pc) << 32);
+ for (size_t idx = 0; idx < vals.size(); ++idx) {
+ Value* watch = f.LoadConstantUint32(vals[idx]);
+ for (uint8_t lane = 0; lane < 4; ++lane) {
+ Value* lane_val = f.Extract(vec128, lane, INT32_TYPE);
+ Value* cmp = f.CompareEQ(lane_val, watch);
+ Value* lane_off = f.LoadConstantUint32(static_cast<uint32_t>(lane * 4));
+ Value* dst32 = f.Add(f.Truncate(ea, INT32_TYPE), lane_off);
+ Value* packed = f.Or(pc_hi64, f.ZeroExtend(dst32, INT64_TYPE));
+ f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed);
+ f.ContextBarrier();
+ f.TrapTrue(cmp, static_cast<uint16_t>(250 + idx));
+ }
+ }
+}
+
+// AUDIT-067 64-bit variant: same as above but checks BOTH halves of a 64-bit
+// stored value. EA points at the start of the 8-byte store; the matched half
+// is encoded into the trap idx via (250 + 2*idx + half), where half=0 means
+// upper 32 bits (lower address), half=1 means lower 32 bits (upper address).
+static void EmitAudit67ValueWatch64(PPCHIRBuilder& f, uint32_t pc, Value* val64,
+ Value* ea) {
+ const auto& vals = ::xe::cpu::audit67::vals();
+ if (vals.empty()) return;
+ // PowerPC is big-endian: u64 stored at EA places upper-32 bits at EA+0
+ // and lower-32 bits at EA+4. Check both halves against each watch value.
+ Value* upper32 = f.Truncate(f.Shr(val64, int8_t(32)), INT32_TYPE); // bits[63:32]
+ Value* lower32 = f.Truncate(val64, INT32_TYPE); // bits[31:0]
+ Value* pc_hi64 = f.LoadConstantUint64(static_cast<uint64_t>(pc) << 32);
+ for (size_t idx = 0; idx < vals.size(); ++idx) {
+ // Upper half lands at EA+0.
+ {
+ Value* cmp = f.CompareEQ(upper32, f.LoadConstantUint32(vals[idx]));
+ Value* ea_lo64 = f.ZeroExtend(f.Truncate(ea, INT32_TYPE), INT64_TYPE);
+ Value* packed = f.Or(pc_hi64, ea_lo64);
+ f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed);
+ f.ContextBarrier();
+ f.TrapTrue(cmp, static_cast<uint16_t>(250 + idx));
+ }
+ // Lower half lands at EA+4.
+ {
+ Value* cmp = f.CompareEQ(lower32, f.LoadConstantUint32(vals[idx]));
+ Value* ea_plus4 =
+ f.Add(f.Truncate(ea, INT32_TYPE), f.LoadConstantUint32(4));
+ Value* ea_lo64 = f.ZeroExtend(ea_plus4, INT64_TYPE);
+ Value* packed = f.Or(pc_hi64, ea_lo64);
+ f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed);
+ f.ContextBarrier();
+ f.TrapTrue(cmp, static_cast<uint16_t>(250 + idx));
+ }
+ }
+}
+
// Integer load (A-13)
int InstrEmit_lbz(PPCHIRBuilder& f, const InstrData& i) {
@@ -518,9 +613,11 @@ int InstrEmit_stw(PPCHIRBuilder& f, const InstrData& i) {
b = f.LoadGPR(i.D.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
- f.StoreOffset(b, offset,
- f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE)));
-
+ Value* val32 = f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE);
+ f.StoreOffset(b, offset, f.ByteSwap(val32));
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch(f, i.address, val32, f.Add(b, offset));
+ }
return 0;
}
@@ -532,10 +629,14 @@ int InstrEmit_stmw(PPCHIRBuilder& f, const InstrData& i) {
b = f.LoadGPR(i.D.RA);
}
+ const bool watch_active = !::xe::cpu::audit67::vals().empty();
for (uint32_t j = 0; j < 32 - i.D.RT; ++j) {
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS) + j * 4);
- f.StoreOffset(b, offset,
- f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT + j), INT32_TYPE)));
+ Value* val32 = f.Truncate(f.LoadGPR(i.D.RT + j), INT32_TYPE);
+ f.StoreOffset(b, offset, f.ByteSwap(val32));
+ if (watch_active) {
+ EmitAudit67ValueWatch(f, i.address, val32, f.Add(b, offset));
+ }
}
return 0;
}
@@ -545,8 +646,12 @@ int InstrEmit_stwu(PPCHIRBuilder& f, const InstrData& i) {
// MEM(EA, 4) <- (RS)[32:63]
// RA <- EA
Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS));
- f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE)));
+ Value* val32 = f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE);
+ f.Store(ea, f.ByteSwap(val32));
StoreEA(f, i.D.RA, ea);
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch(f, i.address, val32, ea);
+ }
return 0;
}
@@ -555,8 +660,12 @@ int InstrEmit_stwux(PPCHIRBuilder& f, const InstrData& i) {
// MEM(EA, 4) <- (RS)[32:63]
// RA <- EA
Value* ea = CalculateEA(f, i.X.RA, i.X.RB);
- f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)));
+ Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
+ f.Store(ea, f.ByteSwap(val32));
StoreEA(f, i.X.RA, ea);
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch(f, i.address, val32, ea);
+ }
return 0;
}
@@ -568,7 +677,11 @@ int InstrEmit_stwx(PPCHIRBuilder& f, const InstrData& i) {
// EA <- b + (RB)
// MEM(EA, 4) <- (RS)[32:63]
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
- f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)));
+ Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
+ f.Store(ea, f.ByteSwap(val32));
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch(f, i.address, val32, ea);
+ }
return 0;
}
@@ -587,7 +700,11 @@ int InstrEmit_std(PPCHIRBuilder& f, const InstrData& i) {
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.DS.DS << 2));
- f.StoreOffset(b, offset, f.ByteSwap(f.LoadGPR(i.DS.RT)));
+ Value* val64 = f.LoadGPR(i.DS.RT);
+ f.StoreOffset(b, offset, f.ByteSwap(val64));
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch64(f, i.address, val64, f.Add(b, offset));
+ }
return 0;
}
@@ -596,8 +713,12 @@ int InstrEmit_stdu(PPCHIRBuilder& f, const InstrData& i) {
// MEM(EA, 8) <- (RS)
// RA <- EA
Value* ea = CalculateEA_i(f, i.DS.RA, XEEXTS16(i.DS.DS << 2));
- f.Store(ea, f.ByteSwap(f.LoadGPR(i.DS.RT)));
+ Value* val64 = f.LoadGPR(i.DS.RT);
+ f.Store(ea, f.ByteSwap(val64));
StoreEA(f, i.DS.RA, ea);
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch64(f, i.address, val64, ea);
+ }
return 0;
}
@@ -606,8 +727,12 @@ int InstrEmit_stdux(PPCHIRBuilder& f, const InstrData& i) {
// MEM(EA, 8) <- (RS)
// RA <- EA
Value* ea = CalculateEA(f, i.X.RA, i.X.RB);
- f.Store(ea, f.ByteSwap(f.LoadGPR(i.X.RT)));
+ Value* val64 = f.LoadGPR(i.X.RT);
+ f.Store(ea, f.ByteSwap(val64));
StoreEA(f, i.X.RA, ea);
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch64(f, i.address, val64, ea);
+ }
return 0;
}
@@ -619,7 +744,11 @@ int InstrEmit_stdx(PPCHIRBuilder& f, const InstrData& i) {
// EA <- b + (RB)
// MEM(EA, 8) <- (RS)
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
- f.Store(ea, f.ByteSwap(f.LoadGPR(i.X.RT)));
+ Value* val64 = f.LoadGPR(i.X.RT);
+ f.Store(ea, f.ByteSwap(val64));
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch64(f, i.address, val64, ea);
+ }
return 0;
}
@@ -684,7 +813,11 @@ int InstrEmit_stwbrx(PPCHIRBuilder& f, const InstrData& i) {
// EA <- b + (RB)
// MEM(EA, 4) <- bswap((RS)[32:63])
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
- f.Store(ea, f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE));
+ Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
+ f.Store(ea, val32);
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch(f, i.address, val32, ea);
+ }
return 0;
}
@@ -696,7 +829,11 @@ int InstrEmit_stdbrx(PPCHIRBuilder& f, const InstrData& i) {
// EA <- b + (RB)
// MEM(EA, 8) <- bswap(RS)
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
- f.Store(ea, f.LoadGPR(i.X.RT));
+ Value* val64 = f.LoadGPR(i.X.RT);
+ f.Store(ea, val64);
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch64(f, i.address, val64, ea);
+ }
return 0;
}
@@ -843,7 +980,8 @@ int InstrEmit_stdcx(PPCHIRBuilder& f, const InstrData& i) {
// This will always succeed if under the global lock, however.
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
- Value* rt = f.ByteSwap(f.LoadGPR(i.X.RT));
+ Value* val64 = f.LoadGPR(i.X.RT);
+ Value* rt = f.ByteSwap(val64);
if (cvars::no_reserved_ops) {
f.Store(ea, rt);
@@ -862,6 +1000,9 @@ int InstrEmit_stdcx(PPCHIRBuilder& f, const InstrData& i) {
if (!cvars::no_reserved_ops) {
f.MemoryBarrier();
}
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch64(f, i.address, val64, ea);
+ }
return 0;
}
@@ -885,7 +1026,8 @@ int InstrEmit_stwcx(PPCHIRBuilder& f, const InstrData& i) {
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
- Value* rt = f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE));
+ Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
+ Value* rt = f.ByteSwap(val32);
if (cvars::no_reserved_ops) {
f.Store(ea, rt);
@@ -904,7 +1046,9 @@ int InstrEmit_stwcx(PPCHIRBuilder& f, const InstrData& i) {
if (!cvars::no_reserved_ops) {
f.MemoryBarrier();
}
-
+ if (!::xe::cpu::audit67::vals().empty()) {
+ EmitAudit67ValueWatch(f, i.address, val32, ea);
+ }
return 0;
}
// Floating-point load (A-19)
diff --git a/src/xenia/cpu/ppc/ppc_hir_builder.cc b/src/xenia/cpu/ppc/ppc_hir_builder.cc
index 42d996cba..e2f7a45db 100644
--- a/src/xenia/cpu/ppc/ppc_hir_builder.cc
+++ b/src/xenia/cpu/ppc/ppc_hir_builder.cc
@@ -34,6 +34,97 @@ DEFINE_bool(
"unimplemented PowerPC instruction is encountered.",
"CPU");
+// AUDIT-061 — multi-PC branch probe. Parses cvars::audit_61_branch_probe_pcs
+// once and exposes a (pc -> trap_id) lookup table. trap_id range [200, 65535].
+// PCs outside the table are not probed. Native side reads g_audit61_pcs[idx].
+#include <vector>
+#include <string>
+namespace xe {
+namespace cpu {
+namespace audit61 {
+constexpr uint16_t kTrapBase = 200;
+constexpr size_t kMaxPcs = 32;
+static std::vector<uint32_t> g_pcs;
+static bool g_parsed = false;
+
+const std::vector<uint32_t>& pcs() {
+ if (!g_parsed) {
+ g_parsed = true;
+ const std::string& csv = cvars::audit_61_branch_probe_pcs;
+ size_t pos = 0;
+ while (pos < csv.size() && g_pcs.size() < kMaxPcs) {
+ size_t end = csv.find(',', pos);
+ std::string tok = csv.substr(pos, end - pos);
+ // strip whitespace
+ while (!tok.empty() && (tok.front() == ' ' || tok.front() == '\t'))
+ tok.erase(tok.begin());
+ while (!tok.empty() && (tok.back() == ' ' || tok.back() == '\t'))
+ tok.pop_back();
+ if (!tok.empty()) {
+ try {
+ uint32_t v = static_cast<uint32_t>(std::stoul(tok, nullptr, 0));
+ g_pcs.push_back(v);
+ } catch (...) {
+ }
+ }
+ if (end == std::string::npos) break;
+ pos = end + 1;
+ }
+ }
+ return g_pcs;
+}
+
+// Returns trap id for pc, or 0 if pc not in probe set.
+uint16_t trap_id_for(uint32_t pc) {
+ const auto& v = pcs();
+ for (size_t i = 0; i < v.size(); ++i) {
+ if (v[i] == pc) return static_cast<uint16_t>(kTrapBase + i);
+ }
+ return 0;
+}
+} // namespace audit61
+
+// AUDIT-067 — value-watch. Parses cvars::audit_67_value_watch once, exposes
+// values via vals(). Trap codes for matches start at kTrapBase = 250.
+namespace audit67 {
+constexpr uint16_t kTrapBase = 250;
+constexpr size_t kMaxVals = 4;
+static std::vector<uint32_t> g_vals;
+static bool g_parsed = false;
+
+const std::vector<uint32_t>& vals() {
+ if (!g_parsed) {
+ g_parsed = true;
+ const std::string& csv = cvars::audit_67_value_watch;
+ size_t pos = 0;
+ while (pos < csv.size() && g_vals.size() < kMaxVals) {
+ size_t end = csv.find(',', pos);
+ std::string tok = csv.substr(pos, end - pos);
+ while (!tok.empty() && (tok.front() == ' ' || tok.front() == '\t'))
+ tok.erase(tok.begin());
+ while (!tok.empty() && (tok.back() == ' ' || tok.back() == '\t'))
+ tok.pop_back();
+ if (!tok.empty()) {
+ try {
+ uint32_t v = static_cast<uint32_t>(std::stoul(tok, nullptr, 0));
+ g_vals.push_back(v);
+ } catch (...) {
+ }
+ }
+ if (end == std::string::npos) break;
+ pos = end + 1;
+ }
+ XELOGI("AUDIT-067-INIT csv=\"{}\" parsed_count={}", csv, g_vals.size());
+ for (size_t i = 0; i < g_vals.size(); ++i) {
+ XELOGI("AUDIT-067-INIT vals[{}] = 0x{:08X}", i, g_vals[i]);
+ }
+ }
+ return g_vals;
+}
+} // namespace audit67
+} // namespace cpu
+} // namespace xe
+
namespace xe {
namespace cpu {
namespace ppc {
@@ -174,6 +265,20 @@ bool PPCHIRBuilder::Emit(GuestFunction* function, uint32_t flags) {
MaybeBreakOnInstruction(address);
+ // AUDIT-061: emit a trap before this instruction if it's on the probe
+ // list. The trap fires BEFORE the cmp/branch HIR emit so the native
+ // handler observes cr0/cr6 set by the *previous* instruction (the cmp
+ // that controls this conditional branch). ContextBarrier flushes
+ // HIR temporaries to PPCContext so the handler reads consistent state.
+ if (!::xe::cpu::audit61::pcs().empty()) {
+ uint16_t tid = ::xe::cpu::audit61::trap_id_for(address);
+ if (tid != 0) {
+ Comment("--audit_61_branch_probe target");
+ ContextBarrier();
+ Trap(tid);
+ }
+ }
+
InstrData i;
i.address = address;
i.code = code;
diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc
index 1034dcac7..38148010c 100644
--- a/src/xenia/cpu/xex_module.cc
+++ b/src/xenia/cpu/xex_module.cc
@@ -51,6 +51,38 @@ DECLARE_bool(allow_plugins);
DECLARE_bool(disable_context_promotion);
+// AUDIT-068 Session 2: helper that scans a raw byte buffer for 4-byte aligned
+// u32 values that match the configured audit_68 value list, emitting a
+// per-position event. Used to pre-scan XEX-loader memcpys that bypass all
+// other hooked surfaces. Cost when off: a single relaxed atomic load.
+static inline void audit68_prescan_memcpy(uint32_t guest_va_dest,
+ const uint8_t* src, size_t size,
+ const char* tag) {
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
+ if (active == 0) return;
+ if ((active & 0x1) && size >= 4) {
+ size_t aligned_end = size & ~size_t(3);
+ for (size_t i = 0; i < aligned_end; i += 4) {
+ uint32_t be_u32 = (uint32_t(src[i + 0]) << 24) |
+ (uint32_t(src[i + 1]) << 16) |
+ (uint32_t(src[i + 2]) << 8) | uint32_t(src[i + 3]);
+ xe::audit_68::check_guest_va(
+ static_cast<uint32_t>(guest_va_dest + i), be_u32, 4, tag);
+ }
+ }
+ if (active & 0x2) {
+ // Coarse addr-only event over the full span (dest only).
+ uint64_t v = 0;
+ if (size >= 4) {
+ v = (uint64_t(src[0]) << 24) | (uint64_t(src[1]) << 16) |
+ (uint64_t(src[2]) << 8) | uint64_t(src[3]);
+ }
+ xe::audit_68::check_guest_va(guest_va_dest, v,
+ static_cast<uint8_t>(std::min<size_t>(size, 8)),
+ tag);
+ }
+}
+
static constexpr uint8_t xe_xex1_retail_key[16] = {
0xA2, 0x6C, 0x10, 0xF7, 0x1F, 0xD9, 0x35, 0xE9,
0x8B, 0x99, 0x92, 0x2C, 0xE9, 0x32, 0x15, 0x72};
@@ -424,6 +456,10 @@ int XexModule::ApplyPatch(XexModule* module) {
// If image_source_offset is set, copy [source_offset:source_size] to
// target_offset
if (patch_header->delta_image_source_offset) {
+ audit68_prescan_memcpy(
+ module->base_address_ + patch_header->delta_image_target_offset,
+ base_exe + patch_header->delta_image_source_offset,
+ patch_header->delta_image_source_size, "xex_memcpy_patch");
memcpy(base_exe + patch_header->delta_image_target_offset,
base_exe + patch_header->delta_image_source_offset,
patch_header->delta_image_source_size);
@@ -589,6 +625,8 @@ int XexModule::ReadImageUncompressed(const void* xex_addr, size_t xex_length) {
if (exe_length > uncompressed_size) {
return 1;
}
+ audit68_prescan_memcpy(base_address_, p, exe_length,
+ "xex_memcpy_uncompressed");
memcpy(buffer, p, exe_length);
return 0;
case XEX_ENCRYPTION_NORMAL:
@@ -665,6 +703,9 @@ int XexModule::ReadImageBasicCompressed(const void* xex_addr,
// Overflow.
return 1;
}
+ audit68_prescan_memcpy(
+ base_address_ + static_cast<uint32_t>(d - buffer), p, data_size,
+ "xex_memcpy_basic_block");
memcpy(d, p, data_size);
break;
case XEX_ENCRYPTION_NORMAL: {
@@ -799,6 +840,17 @@ int XexModule::ReadImageCompressed(const void* xex_addr, size_t xex_length) {
result_code = lzx_decompress(
compress_buffer, d - compress_buffer, buffer, uncompressed_size,
compression_info->normal.window_size, nullptr, 0);
+
+ // AUDIT-068 Session 2: lzx_decompress writes directly into guest
+ // memory via the host pointer `buffer`. There's no host-side hook
+ // covering its internal bulk writes, so post-scan the produced bytes
+ // to recover what the XEX loader actually placed at `base_address_`.
+ // This is THE most likely catch for the vtable install case (vtables
+ // live in the .rdata section that is part of the LZX-compressed image).
+ if (result_code == 0) {
+ audit68_prescan_memcpy(base_address_, buffer, uncompressed_size,
+ "xex_lzx_decompress_output");
+ }
} else {
XELOGE("Unable to allocate XEX memory at {:08X}-{:08X}.", base_address_,
uncompressed_size);
diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc
index 22ba66aee..f02b11d7f 100644
--- a/src/xenia/memory.cc
+++ b/src/xenia/memory.cc
@@ -14,6 +14,7 @@
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/assert.h"
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
#include "xenia/base/byte_stream.h"
#include "xenia/base/clock.h"
#include "xenia/base/cvar.h"
@@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) {
static Memory* active_memory_ = nullptr;
+// AUDIT-068 — process-global accessor (declared in memory.h).
+Memory* Memory::active() { return active_memory_; }
+
void CrashDump() {
static std::atomic<int> in_crash_dump(0);
if (in_crash_dump.fetch_add(1)) {
@@ -151,11 +155,41 @@ Memory::Memory() {
uint32_t(xe::memory::allocation_granularity());
assert_zero(active_memory_);
active_memory_ = this;
+
+ // AUDIT-068: register host→guest translation thunk so the watch slow path
+ // in xenia-base can resolve guest VAs without depending on xenia-core.
+ xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t {
+ Memory* m = active_memory_;
+ return m ? m->HostToGuestVirtual(host_ptr) : 0u;
+ };
+
+ // AUDIT-068 Session 3: register guest→host translation thunk and a
+ // page-protect query thunk for the read-mode probe. The probe thread uses
+ // QueryProtect to skip unmapped/uncommitted pages before dereferencing.
+ xe::audit_68::g_guest_to_host_thunk = [](uint32_t va) -> const void* {
+ Memory* m = active_memory_;
+ return m ? reinterpret_cast<const void*>(m->TranslateVirtual(va))
+ : nullptr;
+ };
+ xe::audit_68::g_query_protect_thunk = [](uint32_t va,
+ uint32_t* out_protect) -> bool {
+ Memory* m = active_memory_;
+ if (!m) return false;
+ BaseHeap* heap = m->LookupHeap(va);
+ if (!heap) {
+ if (out_protect) *out_protect = 0;
+ return false;
+ }
+ return heap->QueryProtect(va, out_protect);
+ };
}
Memory::~Memory() {
assert_true(active_memory_ == this);
active_memory_ = nullptr;
+ xe::audit_68::g_host_to_guest_thunk = nullptr;
+ xe::audit_68::g_guest_to_host_thunk = nullptr;
+ xe::audit_68::g_query_protect_thunk = nullptr;
// Uninstall the MMIO handler, as we won't be able to service more
// requests.
@@ -540,16 +574,71 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const {
}
void Memory::Zero(uint32_t address, uint32_t size) {
+ // AUDIT-068: log a single span event with value=0; size is capped at 8 for
+ // the value field. Slow path is gated on the atomic flag.
+ xe::audit_68::check_guest_va(address, 0,
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
+ "Memory::Zero");
std::memset(TranslateVirtual(address), 0, size);
}
void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) {
+ // Replicate the fill byte across the value field so value_matches can
+ // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for
+ // capture purposes the byte itself in the low slot is enough.
+ uint64_t v = static_cast<uint64_t>(value);
+ v |= v << 8;
+ v |= v << 16;
+ v |= v << 32;
+ xe::audit_68::check_guest_va(address, v,
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
+ "Memory::Fill");
std::memset(TranslateVirtual(address), value, size);
}
void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) {
uint8_t* pdest = TranslateVirtual(dest);
const uint8_t* psrc = TranslateVirtual(src);
+ // AUDIT-068 Session 2: full byte-scan over 4-byte aligned positions of the
+ // source buffer. Catches XEX-loader-style memcpys where a vptr (the target
+ // u32 value) is buried somewhere mid-buffer rather than at offset 0. Cost
+ // O(size/4 * N_values) with N_values capped at 8 inside value_matches —
+ // negligible vs the underlying memcpy throughput.
+ //
+ // Gated on active bit 0x1 (values-mode) AND active != 0. If only addrs are
+ // configured (Run 2 voice-struct mode), we still emit a single addr-only
+ // event covering the destination span so addr-watch isn't broken.
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
+ if (active != 0) [[unlikely]] {
+ if ((active & 0x1) && size >= 4) {
+ // Scan source for any configured u32 value (big-endian, mirrors how
+ // guest sees the bytes). 4-byte aligned offsets only.
+ uint32_t aligned_end = size & ~3u;
+ for (uint32_t i = 0; i < aligned_end; i += 4) {
+ uint32_t be_u32 =
+ (uint32_t(psrc[i + 0]) << 24) | (uint32_t(psrc[i + 1]) << 16) |
+ (uint32_t(psrc[i + 2]) << 8) | uint32_t(psrc[i + 3]);
+ xe::audit_68::check_guest_va(dest + i, be_u32, 4, "Memory::Copy");
+ }
+ }
+ if (active & 0x2) {
+ // Addr-only mode: emit a single coarse event tagged with the dest base
+ // and first u32 of source for context. The slow-path range check will
+ // log iff the dest span intersects a configured addr range.
+ uint64_t v = 0;
+ if (size >= 4) {
+ v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) |
+ (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]);
+ } else if (size > 0) {
+ for (uint32_t i = 0; i < size; ++i) {
+ v = (v << 8) | psrc[i];
+ }
+ }
+ xe::audit_68::check_guest_va(
+ dest, v, static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
+ "Memory::Copy");
+ }
+ }
std::memcpy(pdest, psrc, size);
}
diff --git a/src/xenia/memory.h b/src/xenia/memory.h
index bd9519a40..fa712fe08 100644
--- a/src/xenia/memory.h
+++ b/src/xenia/memory.h
@@ -347,6 +347,13 @@ class Memory {
Memory();
~Memory();
+ // AUDIT-068: process-global Memory singleton accessor. Returns the
+ // currently-constructed Memory instance, or nullptr if none. Set inside
+ // Memory::Memory()/~Memory(); see memory.cc `active_memory_`. Used by
+ // xe::audit_68::check_host_write() to translate a host pointer back to a
+ // guest VA without an explicit Memory* context.
+ static Memory* active();
+
// Initializes the memory system.
// This may fail if the host address space could not be reserved or the
// mapping to the file system fails.
# === new file: src/xenia/base/audit_68_host_mem_watch_fwd.h ===
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* AUDIT-068: host-side memory-write watch — forward declarations only.
*
* Declarations here are intentionally minimal so that xenia/base/memory.h can
* include this without pulling in xenia/memory.h (which would create a
* circular dependency: xenia-base → xenia-core → xenia-base). The full
* definitions live in xenia/audit_68_host_mem_watch.{h,cc} (xenia-core).
*
* Hot path: callers (the integer specializations of xe::store_and_swap<T>)
* load the atomic flag once. When it is 0 (default), no further work is done
* — a single relaxed atomic load and a predictable branch.
******************************************************************************
*/
#ifndef XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
#define XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
#include <atomic>
#include <cstdint>
namespace xe {
namespace audit_68 {
// 0 = inactive (default). Non-zero = the cvars have been parsed and at least
// one watch is configured. Set lazily by check_host_write_slowpath() on first
// call after cvar parsing. Loaded relaxed on the hot path.
//
// Implementation lives in xenia-base (audit_68_host_mem_watch_base.cc) so
// that callers in xenia-base/xenia-cpu/xenia-kernel can resolve the symbol
// without depending on xenia-core link order.
extern std::atomic<uint32_t> g_active;
// Host-pointer → guest-VA translation thunk. xenia/memory.cc::Memory::Memory()
// registers a function pointer here that wraps Memory::HostToGuestVirtual.
// Until set, the slow path falls back to logging the raw host pointer.
using HostToGuestThunk = uint32_t (*)(const void*);
extern HostToGuestThunk g_host_to_guest_thunk;
// AUDIT-068 Session 3 — read-mode probe support.
//
// Guest-VA → host-pointer translation thunk (wraps Memory::TranslateVirtual).
// Used by the read-probe poll thread to sample bytes at configured guest VAs.
// May return non-null even for unmapped/uncommitted VAs (the underlying
// translation is arithmetic — virtual_membase_ + va) — callers MUST consult
// the QueryProtect thunk before dereferencing.
using GuestToHostThunk = const void* (*)(uint32_t);
extern GuestToHostThunk g_guest_to_host_thunk;
// Returns true iff the page containing `guest_va` is committed and readable;
// out_protect receives the raw page protect bits (kProtectRead, etc.). Wraps
// Memory::LookupHeap() + BaseHeap::QueryProtect(). Used as a guard before the
// read-probe samples bytes (early-boot heap-not-yet-mapped path must NOT
// crash).
using QueryProtectThunk = bool (*)(uint32_t, uint32_t* /*out_protect*/);
extern QueryProtectThunk g_query_protect_thunk;
// Slow path. Only invoked when g_active is non-zero. Implementation in
// xenia/base/audit_68_host_mem_watch_base.cc (xenia-base).
//
// host_ptr: the host pointer being written (from store_and_swap's `mem`).
// value: the value being stored (zero-extended to u64).
// size: 1, 2, 4 or 8.
// tag: caller-provided tag string (e.g. "store_and_swap<u32>"). Logged
// verbatim, no formatting. Must be a static string (lifetime
// beyond this call).
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
uint8_t size, const char* tag);
// Same as above, but with a known guest VA (for callers like Memory::Zero/
// Fill/Copy that have the VA but not a single host pointer).
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
const char* tag);
// Inline hot-path wrappers. Single relaxed atomic load + branch when inactive.
inline void check_host_write(const void* host_ptr, uint64_t value, uint8_t size,
const char* tag) {
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
check_host_write_slowpath(host_ptr, value, size, tag);
}
}
inline void check_guest_va(uint32_t guest_va, uint64_t value, uint8_t size,
const char* tag) {
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
check_guest_va_slowpath(guest_va, value, size, tag);
}
}
} // namespace audit_68
} // namespace xe
#endif // XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
# === new file: src/xenia/base/audit_68_host_mem_watch_base.cc ===
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* AUDIT-068 host-side memory-write watch — implementation (xenia-base).
*
* Mirrors AUDIT-067 in spirit (value-CSV cvar, lazy parse, atomic-bool
* activation) but observes the HOST-side write paths instead of the JIT'd
* guest store opcodes. Captures writes performed by xe::store_and_swap<T>
* (xenia/base/memory.h) and by Memory::Zero/Fill/Copy (xenia/memory.cc).
*
* Lives in xenia-base so that the slow-path symbols resolve for callers in
* xenia-base / xenia-cpu / xenia-kernel without depending on xenia-core link
* order. The host→guest VA translation is provided by a function-pointer
* thunk that xenia::Memory::Memory() registers at construction.
*
* See xenia/base/audit_68_host_mem_watch_fwd.h for the API.
* See xenia/cpu/cpu_flags.{h,cc} for the cvars.
******************************************************************************
*/
#include "xenia/base/audit_68_host_mem_watch_fwd.h"
#include <algorithm>
#include <atomic>
#include <chrono>
#include <cstring>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
#include "xenia/base/cvar.h"
#include "xenia/base/logging.h"
#include "xenia/base/threading.h"
// We need the cvars but cpu_flags.h lives in xenia-cpu. To avoid an upward
// dep we re-declare them here with the same macros — cvar.h's DECLARE_*
// macros are header-safe (just `extern` declarations) and resolve against the
// definitions in xenia-cpu/cpu_flags.cc at link time. (xenia-cpu links AFTER
// xenia-base in the executable; symbols in xenia-cpu/cpu_flags.cc are still
// resolvable from xenia-base translation units because the lld pass folds
// all libraries together at the executable level.)
DECLARE_string(audit_68_host_mem_watch_values);
DECLARE_string(audit_68_host_mem_watch_addrs);
DECLARE_string(audit_68_host_mem_read_probe);
namespace xe {
namespace audit_68 {
// Hot-path flag (declared in fwd header). Initial sentinel UINT32_MAX means
// "unparsed"; the very first slow-path call invokes ensure_parsed() which
// replaces the sentinel with the actual active bitmask (0 if both cvars are
// empty, 1/2/3 otherwise). After that, hot-path calls observe the real value
// and bail out cheaply when off.
std::atomic<uint32_t> g_active{0xFFFFFFFFu};
// Host→guest VA translation thunk (declared in fwd header). Set by
// xenia::Memory::Memory() at construction; reset to nullptr by ~Memory().
HostToGuestThunk g_host_to_guest_thunk{nullptr};
// AUDIT-068 Session 3: guest→host translation + page-protect query thunks.
GuestToHostThunk g_guest_to_host_thunk{nullptr};
QueryProtectThunk g_query_protect_thunk{nullptr};
namespace {
constexpr size_t kMaxValues = 8;
constexpr size_t kMaxAddrRanges = 8;
struct AddrRange {
uint32_t start; // inclusive
uint32_t end; // inclusive
};
std::vector<uint32_t> g_values;
std::vector<AddrRange> g_addrs;
std::once_flag g_parsed_flag;
std::chrono::steady_clock::time_point g_t0;
std::once_flag g_t0_once;
int64_t host_ns_since_start() {
std::call_once(g_t0_once,
[]() { g_t0 = std::chrono::steady_clock::now(); });
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now() - g_t0)
.count();
}
void trim(std::string& s) {
while (!s.empty() && (s.front() == ' ' || s.front() == '\t')) {
s.erase(s.begin());
}
while (!s.empty() && (s.back() == ' ' || s.back() == '\t')) {
s.pop_back();
}
}
bool parse_u32(const std::string& tok, uint32_t* out) {
try {
*out = static_cast<uint32_t>(std::stoul(tok, nullptr, 0));
return true;
} catch (...) {
return false;
}
}
void parse_values_csv(const std::string& csv) {
size_t pos = 0;
while (pos < csv.size() && g_values.size() < kMaxValues) {
size_t end = csv.find(',', pos);
std::string tok = csv.substr(pos, end - pos);
trim(tok);
if (!tok.empty()) {
uint32_t v;
if (parse_u32(tok, &v)) {
g_values.push_back(v);
}
}
if (end == std::string::npos) break;
pos = end + 1;
}
}
void parse_addrs_csv(const std::string& csv) {
size_t pos = 0;
while (pos < csv.size() && g_addrs.size() < kMaxAddrRanges) {
size_t end = csv.find(',', pos);
std::string tok = csv.substr(pos, end - pos);
trim(tok);
if (!tok.empty()) {
size_t dash = tok.find('-', 2); // skip leading "0x" if present
AddrRange r{};
if (dash != std::string::npos) {
std::string s = tok.substr(0, dash);
std::string e = tok.substr(dash + 1);
trim(s);
trim(e);
uint32_t a, b;
if (parse_u32(s, &a) && parse_u32(e, &b)) {
r.start = a;
r.end = b;
g_addrs.push_back(r);
}
} else {
uint32_t a;
if (parse_u32(tok, &a)) {
r.start = a;
r.end = a + 7;
g_addrs.push_back(r);
}
}
}
if (end == std::string::npos) break;
pos = end + 1;
}
}
void parse_locked() {
parse_values_csv(cvars::audit_68_host_mem_watch_values);
parse_addrs_csv(cvars::audit_68_host_mem_watch_addrs);
uint32_t bits = 0;
if (!g_values.empty()) bits |= 0x1;
if (!g_addrs.empty()) bits |= 0x2;
g_active.store(bits, std::memory_order_release);
XELOGI(
"AUDIT-068-INIT values_csv=\"{}\" addrs_csv=\"{}\" values_parsed={} "
"addr_ranges_parsed={} active=0x{:X}",
cvars::audit_68_host_mem_watch_values,
cvars::audit_68_host_mem_watch_addrs, g_values.size(), g_addrs.size(),
bits);
for (size_t i = 0; i < g_values.size(); ++i) {
XELOGI("AUDIT-068-INIT value[{}] = 0x{:08X}", i, g_values[i]);
}
for (size_t i = 0; i < g_addrs.size(); ++i) {
XELOGI("AUDIT-068-INIT addr_range[{}] = 0x{:08X}-0x{:08X}", i,
g_addrs[i].start, g_addrs[i].end);
}
}
bool value_matches(uint64_t value, uint8_t size) {
for (uint32_t v : g_values) {
if (size >= 4 && static_cast<uint32_t>(value) == v) return true;
if (size == 8 && static_cast<uint32_t>(value >> 32) == v) return true;
if (size == 2 && (v & 0xFFFF) == (value & 0xFFFF)) return true;
if (size == 1 && (v & 0xFF) == (value & 0xFF)) return true;
}
return false;
}
bool addr_matches(uint32_t guest_va, uint8_t size) {
uint32_t lo = guest_va;
uint32_t hi = guest_va + (size ? size - 1 : 0);
for (const auto& r : g_addrs) {
if (lo <= r.end && hi >= r.start) return true;
}
return false;
}
uint32_t current_tid() { return xe::threading::current_thread_id(); }
void emit(uint32_t guest_va, const void* host_ptr, uint64_t value,
uint8_t size, const char* tag) {
XELOGI(
"AUDIT-068-HOST-WRITE guest_va=0x{:08X} host_ptr=0x{:016X} "
"val=0x{:016X} sz={} fn={} host_ns={} tid={}",
guest_va, reinterpret_cast<uintptr_t>(host_ptr), value,
static_cast<uint32_t>(size), tag ? tag : "<null>",
host_ns_since_start(), current_tid());
}
// ===== AUDIT-068 Session 3 — read-mode probe state =====
constexpr size_t kMaxReadProbes = 8;
struct ReadProbe {
uint32_t guest_va;
uint8_t size; // 1, 2, 4, 8
uint64_t period_ns;
uint64_t last_value;
bool last_was_valid;
};
std::vector<ReadProbe> g_read_probes;
std::atomic<bool> g_read_probe_thread_running{false};
std::atomic<bool> g_read_probe_shutdown{false};
std::thread g_read_probe_thread;
std::once_flag g_read_probe_started;
bool parse_read_probe_tok(const std::string& tok, ReadProbe* out) {
// Expected form: "VA:SIZE:PERIOD_NS" — three colon-separated u64.
size_t c1 = tok.find(':');
if (c1 == std::string::npos) return false;
size_t c2 = tok.find(':', c1 + 1);
if (c2 == std::string::npos) return false;
std::string sva = tok.substr(0, c1);
std::string ssz = tok.substr(c1 + 1, c2 - c1 - 1);
std::string sper = tok.substr(c2 + 1);
trim(sva);
trim(ssz);
trim(sper);
try {
out->guest_va = static_cast<uint32_t>(std::stoul(sva, nullptr, 0));
uint32_t sz = static_cast<uint32_t>(std::stoul(ssz, nullptr, 0));
if (sz != 1 && sz != 2 && sz != 4 && sz != 8) return false;
out->size = static_cast<uint8_t>(sz);
out->period_ns = static_cast<uint64_t>(std::stoull(sper, nullptr, 0));
if (out->period_ns < 1000) out->period_ns = 1000; // 1us floor.
out->last_value = 0;
out->last_was_valid = false;
return true;
} catch (...) {
return false;
}
}
void parse_read_probes_csv(const std::string& csv) {
size_t pos = 0;
while (pos < csv.size() && g_read_probes.size() < kMaxReadProbes) {
size_t end = csv.find(',', pos);
std::string tok = csv.substr(pos, end - pos);
trim(tok);
if (!tok.empty()) {
ReadProbe rp{};
if (parse_read_probe_tok(tok, &rp)) {
g_read_probes.push_back(rp);
}
}
if (end == std::string::npos) break;
pos = end + 1;
}
}
uint64_t sample_at(uint32_t guest_va, uint8_t size, bool* out_valid) {
*out_valid = false;
if (!g_guest_to_host_thunk || !g_query_protect_thunk) return 0;
uint32_t prot = 0;
if (!g_query_protect_thunk(guest_va, &prot)) return 0;
// Page must have at least read permission. The protect bits map to
// xe::memory::PageAccess: kReadOnly=1, kReadWrite=2, kExecuteReadOnly=3,
// kExecuteReadWrite=4. kNoAccess=0. Accept anything non-zero — caller
// distinguishes via the second-pass change detector anyway.
if (prot == 0) return 0;
const void* hp = g_guest_to_host_thunk(guest_va);
if (!hp) return 0;
uint64_t v = 0;
// Guest memory is big-endian. We use raw byte loads to avoid alignment
// traps for size>4 on possibly-unaligned VAs. The "value" we log is the
// host-endian interpretation of the BE bytes (matches store_and_swap's
// logging convention: the byte-swapped scalar).
const uint8_t* bp = reinterpret_cast<const uint8_t*>(hp);
switch (size) {
case 1: v = bp[0]; break;
case 2: v = (uint64_t(bp[0]) << 8) | bp[1]; break;
case 4:
v = (uint64_t(bp[0]) << 24) | (uint64_t(bp[1]) << 16) |
(uint64_t(bp[2]) << 8) | bp[3];
break;
case 8:
v = (uint64_t(bp[0]) << 56) | (uint64_t(bp[1]) << 48) |
(uint64_t(bp[2]) << 40) | (uint64_t(bp[3]) << 32) |
(uint64_t(bp[4]) << 24) | (uint64_t(bp[5]) << 16) |
(uint64_t(bp[6]) << 8) | bp[7];
break;
}
*out_valid = true;
return v;
}
void read_probe_thread_main() {
// Compute the GCD-ish min poll period across all probes; sleep that long
// between scans. Each probe fires only when its own period_ns has elapsed
// since the last sample (per-probe `next_fire_ns`).
uint64_t min_period_ns = UINT64_MAX;
for (const auto& p : g_read_probes) {
if (p.period_ns < min_period_ns) min_period_ns = p.period_ns;
}
if (min_period_ns == UINT64_MAX) return;
// Per-probe next-fire times.
std::vector<uint64_t> next_fire(g_read_probes.size(), 0);
XELOGI(
"AUDIT-068-READ-INIT probe_count={} min_period_ns={} thread spawned",
g_read_probes.size(), min_period_ns);
for (size_t i = 0; i < g_read_probes.size(); ++i) {
XELOGI("AUDIT-068-READ-INIT probe[{}] va=0x{:08X} size={} period_ns={}",
i, g_read_probes[i].guest_va,
static_cast<uint32_t>(g_read_probes[i].size),
g_read_probes[i].period_ns);
}
while (!g_read_probe_shutdown.load(std::memory_order_relaxed)) {
int64_t now_ns = host_ns_since_start();
for (size_t i = 0; i < g_read_probes.size(); ++i) {
if (static_cast<uint64_t>(now_ns) < next_fire[i]) continue;
ReadProbe& rp = g_read_probes[i];
bool valid = false;
uint64_t v = sample_at(rp.guest_va, rp.size, &valid);
if (valid) {
if (!rp.last_was_valid) {
// First successful read: emit the initial value, do NOT call it a
// "change" — but log so we know when the VA mapped.
XELOGI(
"AUDIT-068-READ-INITIAL va=0x{:08X} val=0x{:016X} sz={} "
"host_ns={} tid=probe",
rp.guest_va, v, static_cast<uint32_t>(rp.size), now_ns);
rp.last_value = v;
rp.last_was_valid = true;
} else if (v != rp.last_value) {
XELOGI(
"AUDIT-068-READ-CHANGE va=0x{:08X} old=0x{:016X} "
"new=0x{:016X} sz={} host_ns={} tid=probe",
rp.guest_va, rp.last_value, v, static_cast<uint32_t>(rp.size),
now_ns);
rp.last_value = v;
}
} else if (rp.last_was_valid) {
// Was valid, now invalid — page unmapped/reprotected.
XELOGI(
"AUDIT-068-READ-UNMAPPED va=0x{:08X} last=0x{:016X} sz={} "
"host_ns={} tid=probe",
rp.guest_va, rp.last_value, static_cast<uint32_t>(rp.size),
now_ns);
rp.last_was_valid = false;
}
next_fire[i] = static_cast<uint64_t>(now_ns) + rp.period_ns;
}
// Sleep until the next earliest fire, but no shorter than 1us and no
// longer than min_period_ns (to keep shutdown latency bounded).
uint64_t sleep_ns = min_period_ns;
if (sleep_ns < 1000) sleep_ns = 1000;
std::this_thread::sleep_for(std::chrono::nanoseconds(sleep_ns));
}
XELOGI("AUDIT-068-READ-EXIT thread shutting down");
}
void start_read_probe_thread_if_configured() {
std::call_once(g_read_probe_started, []() {
parse_read_probes_csv(cvars::audit_68_host_mem_read_probe);
if (g_read_probes.empty()) return;
if (!g_guest_to_host_thunk || !g_query_protect_thunk) {
XELOGI(
"AUDIT-068-READ-INIT thunks not ready (guest_to_host={} "
"query_protect={}) — read probe deferred",
(void*)g_guest_to_host_thunk, (void*)g_query_protect_thunk);
return;
}
g_read_probe_thread_running.store(true, std::memory_order_release);
g_read_probe_thread = std::thread(&read_probe_thread_main);
g_read_probe_thread.detach(); // best-effort; daemon-style.
});
}
} // namespace
void ensure_parsed() { std::call_once(g_parsed_flag, parse_locked); }
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
uint8_t size, const char* tag) {
// AUDIT-068 Session 2: defer parsing until Memory::Memory() has registered
// the host→guest thunk. This guarantees the cmdline cvar override has been
// applied AND the logging subsystem is alive before we latch g_active.
// Without this gate, a be<T>::set() call during static-init (e.g. from a
// global initializer in another translation unit) would trigger
// parse_locked() before cpu_flags.cc's cvar objects are constructed —
// latching g_active=0 permanently and silencing the watch.
HostToGuestThunk thunk = g_host_to_guest_thunk;
if (!thunk) return;
ensure_parsed();
// AUDIT-068 Session 3: lazy-start the read-probe poll thread. Same gate as
// ensure_parsed() — must come after Memory::Memory() has registered the
// thunks so the probe can read pages safely.
start_read_probe_thread_if_configured();
uint32_t active = g_active.load(std::memory_order_acquire);
if (active == 0) return;
uint32_t guest_va = 0;
if (thunk) {
guest_va = thunk(host_ptr);
}
bool hit = false;
if ((active & 0x1) && value_matches(value, size)) hit = true;
if (!hit && (active & 0x2) && thunk && addr_matches(guest_va, size)) {
hit = true;
}
if (!hit) return;
emit(guest_va, host_ptr, value, size, tag);
}
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
const char* tag) {
// AUDIT-068 Session 2: same static-init gate as check_host_write_slowpath.
// Callers (Memory::Zero/Fill/Copy + xex_module audit68_prescan_memcpy) only
// run after Memory::Memory(), but defensive in case of future expansion.
if (!g_host_to_guest_thunk) return;
ensure_parsed();
uint32_t active = g_active.load(std::memory_order_acquire);
if (active == 0) return;
bool hit = false;
if ((active & 0x1) && value_matches(value, size)) hit = true;
if (!hit && (active & 0x2) && addr_matches(guest_va, size)) hit = true;
if (!hit) return;
emit(guest_va, nullptr, value, size, tag);
}
} // namespace audit_68
} // namespace xe