Source changes (dormant parity infra, retained from iterate 2.AI/2.AO): - xenia-kernel/exports.rs: nt_create_event manual_reset polarity + related event wiring - xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps (.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as regenerable local artifacts — see memory + HANDOFF for the running findings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1732 lines
68 KiB
Diff
1732 lines
68 KiB
Diff
# AUDIT-068 cumulative canary instrumentation diff — Session 4 close
|
|
# Session 4 adds 0 LOC (zero new instrumentation needed).
|
|
# Diff content equals Session 3's; only this header changes.
|
|
|
|
diff --git a/src/xenia/base/byte_order.h b/src/xenia/base/byte_order.h
|
|
index 5a076f319..c80ee0ffc 100644
|
|
--- a/src/xenia/base/byte_order.h
|
|
+++ b/src/xenia/base/byte_order.h
|
|
@@ -11,6 +11,7 @@
|
|
#define XENIA_BASE_BYTE_ORDER_H_
|
|
|
|
#include <cstdint>
|
|
+#include <type_traits>
|
|
#if defined __has_include
|
|
#if __has_include(<version>)
|
|
#include <version>
|
|
@@ -21,6 +22,7 @@
|
|
#endif
|
|
|
|
#include "xenia/base/assert.h"
|
|
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
|
#include "xenia/base/platform.h"
|
|
|
|
#if !__cpp_lib_endian
|
|
@@ -88,6 +90,30 @@ struct endian_store {
|
|
operator T() const { return get(); }
|
|
|
|
void set(const T& src) {
|
|
+ // AUDIT-068 Session 2: hook the canonical be<T>/le<T> write path. Gated
|
|
+ // on the host→guest thunk being installed by Memory::Memory(); without
|
|
+ // that there is no Memory and therefore no possible guest-memory write.
|
|
+ // This ALSO prevents the slow-path from running during static-init order
|
|
+ // (which would race the cvar object construction in cpu_flags.cc and
|
|
+ // permanently latch g_active=0 before --audit_68_* cmdline override
|
|
+ // applies). See reading-error #35 / Session 2 plan.
|
|
+ if constexpr (sizeof(T) <= 8 && std::is_integral_v<T>) {
|
|
+ if (xe::audit_68::g_host_to_guest_thunk != nullptr) [[unlikely]] {
|
|
+ uint64_t v;
|
|
+ if constexpr (sizeof(T) == 8) {
|
|
+ v = static_cast<uint64_t>(src);
|
|
+ } else if constexpr (sizeof(T) == 4) {
|
|
+ v = static_cast<uint64_t>(static_cast<uint32_t>(src));
|
|
+ } else if constexpr (sizeof(T) == 2) {
|
|
+ v = static_cast<uint64_t>(static_cast<uint16_t>(src));
|
|
+ } else {
|
|
+ v = static_cast<uint64_t>(static_cast<uint8_t>(src));
|
|
+ }
|
|
+ xe::audit_68::check_host_write(
|
|
+ &value, v, static_cast<uint8_t>(sizeof(T)),
|
|
+ E == std::endian::big ? "be<T>::set" : "le<T>::set");
|
|
+ }
|
|
+ }
|
|
if constexpr (std::endian::native == E) {
|
|
value = src;
|
|
} else {
|
|
diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h
|
|
index 8ef40bbff..e78c8499c 100644
|
|
--- a/src/xenia/base/memory.h
|
|
+++ b/src/xenia/base/memory.h
|
|
@@ -18,6 +18,7 @@
|
|
#include <string_view>
|
|
#include <type_traits>
|
|
|
|
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
|
#include "xenia/base/byte_order.h"
|
|
|
|
namespace xe {
|
|
@@ -354,34 +355,52 @@ template <typename T>
|
|
void store(void* mem, const T& value);
|
|
template <>
|
|
inline void store<int8_t>(void* mem, const int8_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
|
+ static_cast<uint8_t>(value)),
|
|
+ 1, "store<i8>");
|
|
*reinterpret_cast<int8_t*>(mem) = value;
|
|
}
|
|
template <>
|
|
inline void store<uint8_t>(void* mem, const uint8_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 1,
|
|
+ "store<u8>");
|
|
*reinterpret_cast<uint8_t*>(mem) = value;
|
|
}
|
|
template <>
|
|
inline void store<int16_t>(void* mem, const int16_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
|
+ static_cast<uint16_t>(value)),
|
|
+ 2, "store<i16>");
|
|
*reinterpret_cast<int16_t*>(mem) = value;
|
|
}
|
|
template <>
|
|
inline void store<uint16_t>(void* mem, const uint16_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 2,
|
|
+ "store<u16>");
|
|
*reinterpret_cast<uint16_t*>(mem) = value;
|
|
}
|
|
template <>
|
|
inline void store<int32_t>(void* mem, const int32_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
|
+ static_cast<uint32_t>(value)),
|
|
+ 4, "store<i32>");
|
|
*reinterpret_cast<int32_t*>(mem) = value;
|
|
}
|
|
template <>
|
|
inline void store<uint32_t>(void* mem, const uint32_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 4,
|
|
+ "store<u32>");
|
|
*reinterpret_cast<uint32_t*>(mem) = value;
|
|
}
|
|
template <>
|
|
inline void store<int64_t>(void* mem, const int64_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 8,
|
|
+ "store<i64>");
|
|
*reinterpret_cast<int64_t*>(mem) = value;
|
|
}
|
|
template <>
|
|
inline void store<uint64_t>(void* mem, const uint64_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, value, 8, "store<u64>");
|
|
*reinterpret_cast<uint64_t*>(mem) = value;
|
|
}
|
|
template <>
|
|
@@ -411,34 +430,52 @@ template <typename T>
|
|
void store_and_swap(void* mem, const T& value);
|
|
template <>
|
|
inline void store_and_swap<int8_t>(void* mem, const int8_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
|
+ static_cast<uint8_t>(value)),
|
|
+ 1, "store_and_swap<i8>");
|
|
*reinterpret_cast<int8_t*>(mem) = value;
|
|
}
|
|
template <>
|
|
inline void store_and_swap<uint8_t>(void* mem, const uint8_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 1,
|
|
+ "store_and_swap<u8>");
|
|
*reinterpret_cast<uint8_t*>(mem) = value;
|
|
}
|
|
template <>
|
|
inline void store_and_swap<int16_t>(void* mem, const int16_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
|
+ static_cast<uint16_t>(value)),
|
|
+ 2, "store_and_swap<i16>");
|
|
*reinterpret_cast<int16_t*>(mem) = byte_swap(value);
|
|
}
|
|
template <>
|
|
inline void store_and_swap<uint16_t>(void* mem, const uint16_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 2,
|
|
+ "store_and_swap<u16>");
|
|
*reinterpret_cast<uint16_t*>(mem) = byte_swap(value);
|
|
}
|
|
template <>
|
|
inline void store_and_swap<int32_t>(void* mem, const int32_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
|
+ static_cast<uint32_t>(value)),
|
|
+ 4, "store_and_swap<i32>");
|
|
*reinterpret_cast<int32_t*>(mem) = byte_swap(value);
|
|
}
|
|
template <>
|
|
inline void store_and_swap<uint32_t>(void* mem, const uint32_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 4,
|
|
+ "store_and_swap<u32>");
|
|
*reinterpret_cast<uint32_t*>(mem) = byte_swap(value);
|
|
}
|
|
template <>
|
|
inline void store_and_swap<int64_t>(void* mem, const int64_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 8,
|
|
+ "store_and_swap<i64>");
|
|
*reinterpret_cast<int64_t*>(mem) = byte_swap(value);
|
|
}
|
|
template <>
|
|
inline void store_and_swap<uint64_t>(void* mem, const uint64_t& value) {
|
|
+ xe::audit_68::check_host_write(mem, value, 8, "store_and_swap<u64>");
|
|
*reinterpret_cast<uint64_t*>(mem) = byte_swap(value);
|
|
}
|
|
template <>
|
|
diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc
|
|
index 5da8f6adc..cbac9826c 100644
|
|
--- a/src/xenia/cpu/backend/x64/x64_emitter.cc
|
|
+++ b/src/xenia/cpu/backend/x64/x64_emitter.cc
|
|
@@ -13,6 +13,8 @@
|
|
|
|
#include <climits>
|
|
#include <cstring>
|
|
+#include <string>
|
|
+#include <vector>
|
|
|
|
#include "third_party/fmt/include/fmt/format.h"
|
|
#include "xenia/base/assert.h"
|
|
@@ -63,6 +65,76 @@ DEFINE_bool(instrument_call_times, false,
|
|
"Compute time taken for functions, for profiling guest code",
|
|
"x64");
|
|
#endif
|
|
+
|
|
+// AUDIT-061/067: forward decls of probe/watch tables (defined in
|
|
+// ppc_hir_builder.cc).
|
|
+namespace xe {
|
|
+namespace cpu {
|
|
+namespace audit61 {
|
|
+const std::vector<uint32_t>& pcs();
|
|
+} // namespace audit61
|
|
+namespace audit67 {
|
|
+const std::vector<uint32_t>& vals();
|
|
+} // namespace audit67
|
|
+} // namespace cpu
|
|
+} // namespace xe
|
|
+
|
|
+// AUDIT-061: handler for trap codes [200, 232). arg0 carries trap idx
|
|
+// (trap_code - 200), mapping to ::xe::cpu::audit61::pcs()[idx]. Emits one
|
|
+// log line per fire with cr0/cr6 LGE flags + key GPRs + LR + tid.
|
|
+static uint64_t TrapAudit61Branch(void* raw_context, uint64_t idx) {
|
|
+ auto* ctx = reinterpret_cast<xe::cpu::ppc::PPCContext_s*>(raw_context);
|
|
+ const auto& pcs = ::xe::cpu::audit61::pcs();
|
|
+ uint32_t pc = (idx < pcs.size()) ? pcs[static_cast<size_t>(idx)] : 0u;
|
|
+ uint32_t tid = 0;
|
|
+ if (ctx->thread_state) {
|
|
+ tid = ctx->thread_state->thread_id();
|
|
+ }
|
|
+ auto enc = [](uint8_t lt, uint8_t gt, uint8_t eq) {
|
|
+ char buf[4];
|
|
+ buf[0] = lt ? 'L' : '.';
|
|
+ buf[1] = gt ? 'G' : '.';
|
|
+ buf[2] = eq ? 'E' : '.';
|
|
+ buf[3] = '\0';
|
|
+ return std::string(buf);
|
|
+ };
|
|
+ XELOGI(
|
|
+ "AUDIT-061-BR pc={:08X} lr={:08X} cr0={} cr6={} r3={:08X} r4={:08X} "
|
|
+ "r5={:08X} r6={:08X} r31={:08X} tid={}",
|
|
+ pc, static_cast<uint32_t>(ctx->lr),
|
|
+ enc(ctx->cr0.cr0_lt, ctx->cr0.cr0_gt, ctx->cr0.cr0_eq),
|
|
+ enc(ctx->cr6.cr6_all_equal, ctx->cr6.cr6_1, ctx->cr6.cr6_none_equal),
|
|
+ static_cast<uint32_t>(ctx->r[3]), static_cast<uint32_t>(ctx->r[4]),
|
|
+ static_cast<uint32_t>(ctx->r[5]), static_cast<uint32_t>(ctx->r[6]),
|
|
+ static_cast<uint32_t>(ctx->r[31]), tid);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+// AUDIT-067: handler for trap codes [250, 254). arg0 carries trap idx
|
|
+// (trap_code - 250), mapping to ::xe::cpu::audit67::vals()[idx]. Fired when
|
|
+// a 4-byte guest store sees the configured value. The store-emit site stashed
|
|
+// (pc << 32) | (ea & 0xFFFFFFFF) into ctx->scratch right before the trap.
|
|
+static uint64_t TrapAudit67ValueWatch(void* raw_context, uint64_t idx) {
|
|
+ auto* ctx = reinterpret_cast<xe::cpu::ppc::PPCContext_s*>(raw_context);
|
|
+ const auto& vals = ::xe::cpu::audit67::vals();
|
|
+ uint32_t val =
|
|
+ (idx < vals.size()) ? vals[static_cast<size_t>(idx)] : 0u;
|
|
+ uint32_t pc = static_cast<uint32_t>(ctx->scratch >> 32);
|
|
+ uint32_t dst = static_cast<uint32_t>(ctx->scratch & 0xFFFFFFFFu);
|
|
+ uint32_t tid = 0;
|
|
+ if (ctx->thread_state) {
|
|
+ tid = ctx->thread_state->thread_id();
|
|
+ }
|
|
+ XELOGI(
|
|
+ "AUDIT-067-VAL pc={:08X} lr={:08X} val={:08X} dst={:08X} "
|
|
+ "r3={:08X} r4={:08X} r5={:08X} r6={:08X} r31={:08X} tid={}",
|
|
+ pc, static_cast<uint32_t>(ctx->lr), val, dst,
|
|
+ static_cast<uint32_t>(ctx->r[3]), static_cast<uint32_t>(ctx->r[4]),
|
|
+ static_cast<uint32_t>(ctx->r[5]), static_cast<uint32_t>(ctx->r[6]),
|
|
+ static_cast<uint32_t>(ctx->r[31]), tid);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
namespace xe {
|
|
namespace cpu {
|
|
namespace backend {
|
|
@@ -455,6 +527,20 @@ void X64Emitter::Trap(uint16_t trap_type) {
|
|
// ?
|
|
break;
|
|
default:
|
|
+ // AUDIT-067: trap codes [250, 254) dispatch the value-watch handler.
|
|
+ // arg0 = idx into ::xe::cpu::audit67::vals().
|
|
+ if (trap_type >= 250 && trap_type < 254) {
|
|
+ CallNative(::TrapAudit67ValueWatch,
|
|
+ static_cast<uint64_t>(trap_type - 250));
|
|
+ break;
|
|
+ }
|
|
+ // AUDIT-061: trap codes [200, 232) dispatch the branch-probe handler.
|
|
+ // arg0 = idx into ::xe::cpu::audit61::pcs().
|
|
+ if (trap_type >= 200 && trap_type < 232) {
|
|
+ CallNative(::TrapAudit61Branch,
|
|
+ static_cast<uint64_t>(trap_type - 200));
|
|
+ break;
|
|
+ }
|
|
XELOGW("Unknown trap type {}", trap_type);
|
|
db(0xCC);
|
|
break;
|
|
diff --git a/src/xenia/cpu/cpu_flags.cc b/src/xenia/cpu/cpu_flags.cc
|
|
index 3ff067e15..2298dd3d7 100644
|
|
--- a/src/xenia/cpu/cpu_flags.cc
|
|
+++ b/src/xenia/cpu/cpu_flags.cc
|
|
@@ -57,3 +57,83 @@ DEFINE_bool(break_condition_truncate, true, "truncate value to 32-bits", "CPU");
|
|
|
|
DEFINE_bool(break_on_debugbreak, true, "int3 on JITed __debugbreak requests.",
|
|
"CPU");
|
|
+
|
|
+// AUDIT-DEMO: smoke marker (memory entry: emulator.cc:225,283). Always-on bool.
|
|
+DEFINE_bool(audit_demo_setup_trace, true,
|
|
+ "Audit smoke marker: log AUDIT-DEMO-SETUP-BEGIN at emulator setup.",
|
|
+ "Audit");
|
|
+
|
|
+// AUDIT-061: comma-separated list of guest PCs to log on each fire.
|
|
+// Format: "0xPC1,0xPC2,..." (max 32 PCs). Each fire emits
|
|
+// AUDIT-061-BR pc=X lr=X cr0=LGE cr6=LGE r3=X r4=X r5=X r6=X r31=X tid=N.
|
|
+// Default empty (off); no perf cost when empty.
|
|
+DEFINE_string(audit_61_branch_probe_pcs, "",
|
|
+ "AUDIT-061: CSV of guest PCs to trace (cr0/cr6 + regs/tid).",
|
|
+ "Audit");
|
|
+
|
|
+// AUDIT-067: comma-separated list of u32 values to watch. When non-empty,
|
|
+// every 4-byte guest store (stw/stwu/stwx/stwux/stmw) emits a runtime
|
|
+// equality check; matches log AUDIT-067-VAL pc=X lr=X val=X dst=X r3..r6 r31 tid=N.
|
|
+// Max 4 values. Default empty (off); zero overhead when empty.
|
|
+DEFINE_string(audit_67_value_watch, "",
|
|
+ "AUDIT-067: CSV of u32 values (max 4) — log every guest "
|
|
+ "store whose value matches.",
|
|
+ "Audit");
|
|
+
|
|
+// AUDIT-068: host-side memory-write watch. See cpu_flags.h header for format.
|
|
+// Mirrors AUDIT-067 but covers host-side writes (xe::store_and_swap<T>,
|
|
+// Memory::Zero/Fill/Copy). Empty default = zero cost.
|
|
+DEFINE_string(audit_68_host_mem_watch_values, "",
|
|
+ "AUDIT-068: CSV of u32 values (max 8) — log every host-side "
|
|
+ "guest-memory write whose value matches.",
|
|
+ "Audit");
|
|
+DEFINE_string(audit_68_host_mem_watch_addrs, "",
|
|
+ "AUDIT-068: CSV of guest VAs or VA ranges 'START-END' (max 8) "
|
|
+ "— log every host-side guest-memory write whose guest VA falls "
|
|
+ "within the configured set.",
|
|
+ "Audit");
|
|
+
|
|
+// AUDIT-068 Session 3: read-mode probe. See cpu_flags.h for format.
|
|
+DEFINE_string(audit_68_host_mem_read_probe, "",
|
|
+ "AUDIT-068 Session 3: CSV of 'VA:SIZE:PERIOD_NS' tuples (max 8) "
|
|
+ "— a dedicated poll thread reads the value at each VA every "
|
|
+ "PERIOD_NS and emits AUDIT-068-READ-CHANGE on transition.",
|
|
+ "Audit");
|
|
+
|
|
+// Phase A — see kernel/event_log.h.
|
|
+DEFINE_string(phase_a_event_log_path, "",
|
|
+ "Phase A: write schema-v1 JSONL event log to this path. "
|
|
+ "Empty (default) = disabled.",
|
|
+ "Audit");
|
|
+DEFINE_bool(phase_a_event_log_mem_writes, false,
|
|
+ "Phase A: include mem.write events in the JSONL log. RESERVED — "
|
|
+ "not wired in this phase. Default false.",
|
|
+ "Audit");
|
|
+
|
|
+// Phase D Stage 1 — see kernel/event_log.h `EmitContentionObserved`.
|
|
+DEFINE_bool(kernel_emit_contention, false,
|
|
+ "Phase D Stage 1: emit `contention.observed` events when "
|
|
+ "RtlEnterCriticalSection's spin loop is exhausted and the call "
|
|
+ "falls through to xeKeWaitForSingleObject. Default false (zero "
|
|
+ "cost when disabled). Requires --phase_a_event_log_path to be "
|
|
+ "set as well.",
|
|
+ "Audit");
|
|
+
|
|
+// Phase B — see kernel/phase_b_snapshot.h.
|
|
+DEFINE_string(phase_b_snapshot_dir, "",
|
|
+ "Phase B: write 5-file structured state snapshot to "
|
|
+ "<dir>/canary/ at the moment immediately before the first "
|
|
+ "guest PPC instruction of entry_point. Empty (default) = "
|
|
+ "disabled, zero overhead.",
|
|
+ "Audit");
|
|
+DEFINE_bool(phase_b_snapshot_and_exit, false,
|
|
+ "Phase B: after writing the snapshot, exit the process "
|
|
+ "immediately (std::_Exit(0)) so re-runs are byte-deterministic.",
|
|
+ "Audit");
|
|
+DEFINE_bool(phase_b_dump_section_content, false,
|
|
+ "Phase B: in memory.json, populate section_contents[].content_b64 "
|
|
+ "with raw bytes of every committed XEX-image region. Default "
|
|
+ "false — per-region SHA-256 is enough for the routine diff; "
|
|
+ "this is the escape hatch for the STOP-and-report condition "
|
|
+ "(image_loaded_sha256 mismatch).",
|
|
+ "Audit");
|
|
diff --git a/src/xenia/cpu/cpu_flags.h b/src/xenia/cpu/cpu_flags.h
|
|
index 38c4f98ba..9b5ca7a1c 100644
|
|
--- a/src/xenia/cpu/cpu_flags.h
|
|
+++ b/src/xenia/cpu/cpu_flags.h
|
|
@@ -35,4 +35,52 @@ DECLARE_bool(break_condition_truncate);
|
|
|
|
DECLARE_bool(break_on_debugbreak);
|
|
|
|
+// AUDIT-DEMO smoke marker.
|
|
+DECLARE_bool(audit_demo_setup_trace);
|
|
+
|
|
+// AUDIT-061: multi-PC branch probe — emits one log line per fire with
|
|
+// (pc, lr, cr0 LGE, cr6 LGE, r3, r4, r5, r6, r31, tid). CSV of guest PCs.
|
|
+DECLARE_string(audit_61_branch_probe_pcs);
|
|
+
|
|
+// AUDIT-067: value-watch — emit a log line for each 32-bit guest store whose
|
|
+// value-to-be-stored matches any configured value. CSV of u32 values
|
|
+// ("0xDEADBEEF,..."), max 4 entries. Default empty (off); zero cost when empty.
|
|
+DECLARE_string(audit_67_value_watch);
|
|
+
|
|
+// AUDIT-068: host-side memory-write watch — emit a log line for each host-side
|
|
+// write to guest memory whose VALUE matches any configured u32 value, or whose
|
|
+// guest VA falls within any configured ADDR or ADDR-range. Mirrors AUDIT-067
|
|
+// but covers the host-side write paths (xe::store_and_swap<T>, Memory::Zero/
|
|
+// Fill/Copy) that AUDIT-067's JIT store-opcode hooks cannot see.
|
|
+//
|
|
+// VALUES: CSV of u32 values, max 8 entries; e.g. "0x8200A208,0x8200A928".
|
|
+// ADDRS: CSV of guest VAs or VA ranges, max 8 entries; range form is
|
|
+// "0xSTART-0xEND" (inclusive). e.g. "0x42500000-0x42600000,0xBCE25340".
|
|
+// Default empty (off); zero cost on the hot path when both are empty.
|
|
+DECLARE_string(audit_68_host_mem_watch_values);
|
|
+DECLARE_string(audit_68_host_mem_watch_addrs);
|
|
+
|
|
+// AUDIT-068 Session 3: read-mode probe. CSV of "VA:SIZE:PERIOD_NS" tuples
|
|
+// (max 8). A dedicated low-priority thread polls each VA every PERIOD_NS and
|
|
+// emits AUDIT-068-READ-CHANGE when the value transitions. SIZE in {1,2,4,8}.
|
|
+// Example: "0xBCE25340:4:1000000" = poll u32 at 0xBCE25340 every 1 ms.
|
|
+// Default empty (off); the poll thread is not spawned when empty.
|
|
+DECLARE_string(audit_68_host_mem_read_probe);
|
|
+
|
|
+// Phase A: JSONL event-log emitter path. When non-empty, the engine writes
|
|
+// schema-v1 JSONL events to this file. Empty (default) = no overhead, no
|
|
+// behavior change. Schema: xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md
|
|
+DECLARE_string(phase_a_event_log_path);
|
|
+DECLARE_bool(phase_a_event_log_mem_writes);
|
|
+
|
|
+// Phase B: initial-state snapshot. When the dir cvar is non-empty, the
|
|
+// engine writes a five-file structured state snapshot (cpu_state.json,
|
|
+// memory.json, kernel.json, vfs.json, config.json, plus manifest.json) to
|
|
+// `<dir>/canary/` at the moment immediately before the first guest PPC
|
|
+// instruction of the XEX entry_point executes. See
|
|
+// `xenia-rs/audit-runs/phase-b-state-equivalence/`.
|
|
+DECLARE_string(phase_b_snapshot_dir);
|
|
+DECLARE_bool(phase_b_snapshot_and_exit);
|
|
+DECLARE_bool(phase_b_dump_section_content);
|
|
+
|
|
#endif // XENIA_CPU_CPU_FLAGS_H_
|
|
diff --git a/src/xenia/cpu/ppc/ppc_emit_altivec.cc b/src/xenia/cpu/ppc/ppc_emit_altivec.cc
|
|
index 513b21391..c9af025ff 100644
|
|
--- a/src/xenia/cpu/ppc/ppc_emit_altivec.cc
|
|
+++ b/src/xenia/cpu/ppc/ppc_emit_altivec.cc
|
|
@@ -9,12 +9,28 @@
|
|
|
|
#include "xenia/cpu/ppc/ppc_emit-private.h"
|
|
|
|
+#include <vector>
|
|
#include "xenia/base/assert.h"
|
|
+#include "xenia/cpu/cpu_flags.h"
|
|
#include "xenia/cpu/ppc/ppc_context.h"
|
|
#include "xenia/cpu/ppc/ppc_hir_builder.h"
|
|
|
|
#include <cmath>
|
|
|
|
+// AUDIT-067: forward-decls. Defined in ppc_emit_memory.cc / ppc_hir_builder.cc.
|
|
+namespace xe {
|
|
+namespace cpu {
|
|
+namespace audit67 {
|
|
+const std::vector<uint32_t>& vals();
|
|
+}
|
|
+namespace ppc {
|
|
+void EmitAudit67ValueWatchVec(PPCHIRBuilder& f, uint32_t pc,
|
|
+ ::xe::cpu::hir::Value* vec128,
|
|
+ ::xe::cpu::hir::Value* ea);
|
|
+}
|
|
+}
|
|
+}
|
|
+
|
|
namespace xe {
|
|
namespace cpu {
|
|
namespace ppc {
|
|
@@ -175,6 +191,21 @@ int InstrEmit_stvewx_(PPCHIRBuilder& f, const InstrData& i, uint32_t vd,
|
|
f.Shr(f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantUint8(0xF)), 2);
|
|
Value* v = f.Extract(f.LoadVR(vd), el, INT32_TYPE);
|
|
f.Store(ea, f.ByteSwap(v));
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ // For stvewx: only one lane is actually stored; piggyback on the scalar
|
|
+ // value-watch helper by emitting the equivalent of stw of v at ea.
|
|
+ Value* pc_hi64 =
|
|
+ f.LoadConstantUint64(static_cast<uint64_t>(i.address) << 32);
|
|
+ Value* ea_lo64 = f.ZeroExtend(f.Truncate(ea, INT32_TYPE), INT64_TYPE);
|
|
+ Value* packed = f.Or(pc_hi64, ea_lo64);
|
|
+ const auto& vals = ::xe::cpu::audit67::vals();
|
|
+ for (size_t idx = 0; idx < vals.size(); ++idx) {
|
|
+ Value* cmp = f.CompareEQ(v, f.LoadConstantUint32(vals[idx]));
|
|
+ f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed);
|
|
+ f.ContextBarrier();
|
|
+ f.TrapTrue(cmp, static_cast<uint16_t>(250 + idx));
|
|
+ }
|
|
+ }
|
|
return 0;
|
|
}
|
|
int InstrEmit_stvewx(PPCHIRBuilder& f, const InstrData& i) {
|
|
@@ -187,7 +218,11 @@ int InstrEmit_stvewx128(PPCHIRBuilder& f, const InstrData& i) {
|
|
int InstrEmit_stvx_(PPCHIRBuilder& f, const InstrData& i, uint32_t vd,
|
|
uint32_t ra, uint32_t rb) {
|
|
Value* ea = f.And(CalculateEA_0(f, ra, rb), f.LoadConstantUint64(~0xFull));
|
|
- f.Store(ea, f.ByteSwap(f.LoadVR(vd)));
|
|
+ Value* vec = f.LoadVR(vd);
|
|
+ f.Store(ea, f.ByteSwap(vec));
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatchVec(f, i.address, vec, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
int InstrEmit_stvx(PPCHIRBuilder& f, const InstrData& i) {
|
|
diff --git a/src/xenia/cpu/ppc/ppc_emit_memory.cc b/src/xenia/cpu/ppc/ppc_emit_memory.cc
|
|
index b4bdabb49..a6b44697d 100644
|
|
--- a/src/xenia/cpu/ppc/ppc_emit_memory.cc
|
|
+++ b/src/xenia/cpu/ppc/ppc_emit_memory.cc
|
|
@@ -10,11 +10,22 @@
|
|
#include "xenia/cpu/ppc/ppc_emit-private.h"
|
|
|
|
#include <stddef.h>
|
|
+#include <vector>
|
|
#include "xenia/base/assert.h"
|
|
#include "xenia/base/cvar.h"
|
|
+#include "xenia/cpu/cpu_flags.h"
|
|
#include "xenia/cpu/ppc/ppc_context.h"
|
|
#include "xenia/cpu/ppc/ppc_hir_builder.h"
|
|
|
|
+// AUDIT-067: forward-decl of value-watch table (defined in ppc_hir_builder.cc).
|
|
+namespace xe {
|
|
+namespace cpu {
|
|
+namespace audit67 {
|
|
+const std::vector<uint32_t>& vals();
|
|
+} // namespace audit67
|
|
+} // namespace cpu
|
|
+} // namespace xe
|
|
+
|
|
DEFINE_bool(
|
|
disable_prefetch_and_cachecontrol, true,
|
|
"Disables translating ppc prefetch/cache flush instructions to host "
|
|
@@ -67,6 +78,90 @@ void StoreEA(PPCHIRBuilder& f, uint32_t rt, Value* ea) {
|
|
f.StoreGPR(rt, ea);
|
|
}
|
|
|
|
+// AUDIT-067: emit a runtime equality check on the 32-bit value-to-be-stored
|
|
+// against each configured watch value. On match, store (pc, EA) packed into
|
|
+// the PPCContext scratch field so the native trap handler can read them,
|
|
+// then fire a trap with code (kTrapBase + idx). Done host-side as a
|
|
+// build-time pc constant + a runtime EA truncate, packed as
|
|
+// (pc << 32) | (ea & 0xFFFFFFFF) so the handler can decompose.
|
|
+static void EmitAudit67ValueWatch(PPCHIRBuilder& f, uint32_t pc, Value* val32,
|
|
+ Value* ea) {
|
|
+ const auto& vals = ::xe::cpu::audit67::vals();
|
|
+ if (vals.empty()) return;
|
|
+ // pc is known at JIT time → emit as constant; ea is runtime.
|
|
+ Value* pc_hi64 = f.LoadConstantUint64(static_cast<uint64_t>(pc) << 32);
|
|
+ Value* ea_lo64 = f.ZeroExtend(f.Truncate(ea, INT32_TYPE), INT64_TYPE);
|
|
+ Value* packed = f.Or(pc_hi64, ea_lo64);
|
|
+ for (size_t idx = 0; idx < vals.size(); ++idx) {
|
|
+ Value* cmp = f.CompareEQ(val32, f.LoadConstantUint32(vals[idx]));
|
|
+ f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed);
|
|
+ f.ContextBarrier();
|
|
+ f.TrapTrue(cmp, static_cast<uint16_t>(250 + idx));
|
|
+ }
|
|
+}
|
|
+
|
|
+// AUDIT-067 128-bit (vector) variant: checks each of the 4 32-bit lanes in a
|
|
+// vector store. Used for stvx/stvxl/stvewx (memcpy-derived installs may use
|
|
+// 128-bit vector stores). The matched lane is reflected in the dst by
|
|
+// adding (lane * 4) so the handler can see exactly where in memory the
|
|
+// value lands. Declared with external linkage so altivec.cc can call it.
|
|
+void EmitAudit67ValueWatchVec(PPCHIRBuilder& f, uint32_t pc,
|
|
+ Value* vec128, Value* ea) {
|
|
+ const auto& vals = ::xe::cpu::audit67::vals();
|
|
+ if (vals.empty()) return;
|
|
+ Value* pc_hi64 = f.LoadConstantUint64(static_cast<uint64_t>(pc) << 32);
|
|
+ for (size_t idx = 0; idx < vals.size(); ++idx) {
|
|
+ Value* watch = f.LoadConstantUint32(vals[idx]);
|
|
+ for (uint8_t lane = 0; lane < 4; ++lane) {
|
|
+ Value* lane_val = f.Extract(vec128, lane, INT32_TYPE);
|
|
+ Value* cmp = f.CompareEQ(lane_val, watch);
|
|
+ Value* lane_off = f.LoadConstantUint32(static_cast<uint32_t>(lane * 4));
|
|
+ Value* dst32 = f.Add(f.Truncate(ea, INT32_TYPE), lane_off);
|
|
+ Value* packed = f.Or(pc_hi64, f.ZeroExtend(dst32, INT64_TYPE));
|
|
+ f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed);
|
|
+ f.ContextBarrier();
|
|
+ f.TrapTrue(cmp, static_cast<uint16_t>(250 + idx));
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+// AUDIT-067 64-bit variant: same as above but checks BOTH halves of a 64-bit
|
|
+// stored value. EA points at the start of the 8-byte store; the matched half
|
|
+// is encoded into the trap idx via (250 + 2*idx + half), where half=0 means
|
|
+// upper 32 bits (lower address), half=1 means lower 32 bits (upper address).
|
|
+static void EmitAudit67ValueWatch64(PPCHIRBuilder& f, uint32_t pc, Value* val64,
|
|
+ Value* ea) {
|
|
+ const auto& vals = ::xe::cpu::audit67::vals();
|
|
+ if (vals.empty()) return;
|
|
+ // PowerPC is big-endian: u64 stored at EA places upper-32 bits at EA+0
|
|
+ // and lower-32 bits at EA+4. Check both halves against each watch value.
|
|
+ Value* upper32 = f.Truncate(f.Shr(val64, int8_t(32)), INT32_TYPE); // bits[63:32]
|
|
+ Value* lower32 = f.Truncate(val64, INT32_TYPE); // bits[31:0]
|
|
+ Value* pc_hi64 = f.LoadConstantUint64(static_cast<uint64_t>(pc) << 32);
|
|
+ for (size_t idx = 0; idx < vals.size(); ++idx) {
|
|
+ // Upper half lands at EA+0.
|
|
+ {
|
|
+ Value* cmp = f.CompareEQ(upper32, f.LoadConstantUint32(vals[idx]));
|
|
+ Value* ea_lo64 = f.ZeroExtend(f.Truncate(ea, INT32_TYPE), INT64_TYPE);
|
|
+ Value* packed = f.Or(pc_hi64, ea_lo64);
|
|
+ f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed);
|
|
+ f.ContextBarrier();
|
|
+ f.TrapTrue(cmp, static_cast<uint16_t>(250 + idx));
|
|
+ }
|
|
+ // Lower half lands at EA+4.
|
|
+ {
|
|
+ Value* cmp = f.CompareEQ(lower32, f.LoadConstantUint32(vals[idx]));
|
|
+ Value* ea_plus4 =
|
|
+ f.Add(f.Truncate(ea, INT32_TYPE), f.LoadConstantUint32(4));
|
|
+ Value* ea_lo64 = f.ZeroExtend(ea_plus4, INT64_TYPE);
|
|
+ Value* packed = f.Or(pc_hi64, ea_lo64);
|
|
+ f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed);
|
|
+ f.ContextBarrier();
|
|
+ f.TrapTrue(cmp, static_cast<uint16_t>(250 + idx));
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
// Integer load (A-13)
|
|
|
|
int InstrEmit_lbz(PPCHIRBuilder& f, const InstrData& i) {
|
|
@@ -518,9 +613,11 @@ int InstrEmit_stw(PPCHIRBuilder& f, const InstrData& i) {
|
|
b = f.LoadGPR(i.D.RA);
|
|
}
|
|
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
|
- f.StoreOffset(b, offset,
|
|
- f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE)));
|
|
-
|
|
+ Value* val32 = f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE);
|
|
+ f.StoreOffset(b, offset, f.ByteSwap(val32));
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch(f, i.address, val32, f.Add(b, offset));
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -532,10 +629,14 @@ int InstrEmit_stmw(PPCHIRBuilder& f, const InstrData& i) {
|
|
b = f.LoadGPR(i.D.RA);
|
|
}
|
|
|
|
+ const bool watch_active = !::xe::cpu::audit67::vals().empty();
|
|
for (uint32_t j = 0; j < 32 - i.D.RT; ++j) {
|
|
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS) + j * 4);
|
|
- f.StoreOffset(b, offset,
|
|
- f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT + j), INT32_TYPE)));
|
|
+ Value* val32 = f.Truncate(f.LoadGPR(i.D.RT + j), INT32_TYPE);
|
|
+ f.StoreOffset(b, offset, f.ByteSwap(val32));
|
|
+ if (watch_active) {
|
|
+ EmitAudit67ValueWatch(f, i.address, val32, f.Add(b, offset));
|
|
+ }
|
|
}
|
|
return 0;
|
|
}
|
|
@@ -545,8 +646,12 @@ int InstrEmit_stwu(PPCHIRBuilder& f, const InstrData& i) {
|
|
// MEM(EA, 4) <- (RS)[32:63]
|
|
// RA <- EA
|
|
Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
|
- f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE)));
|
|
+ Value* val32 = f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE);
|
|
+ f.Store(ea, f.ByteSwap(val32));
|
|
StoreEA(f, i.D.RA, ea);
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch(f, i.address, val32, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -555,8 +660,12 @@ int InstrEmit_stwux(PPCHIRBuilder& f, const InstrData& i) {
|
|
// MEM(EA, 4) <- (RS)[32:63]
|
|
// RA <- EA
|
|
Value* ea = CalculateEA(f, i.X.RA, i.X.RB);
|
|
- f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)));
|
|
+ Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
|
|
+ f.Store(ea, f.ByteSwap(val32));
|
|
StoreEA(f, i.X.RA, ea);
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch(f, i.address, val32, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -568,7 +677,11 @@ int InstrEmit_stwx(PPCHIRBuilder& f, const InstrData& i) {
|
|
// EA <- b + (RB)
|
|
// MEM(EA, 4) <- (RS)[32:63]
|
|
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
|
- f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)));
|
|
+ Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
|
|
+ f.Store(ea, f.ByteSwap(val32));
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch(f, i.address, val32, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -587,7 +700,11 @@ int InstrEmit_std(PPCHIRBuilder& f, const InstrData& i) {
|
|
}
|
|
|
|
Value* offset = f.LoadConstantInt64(XEEXTS16(i.DS.DS << 2));
|
|
- f.StoreOffset(b, offset, f.ByteSwap(f.LoadGPR(i.DS.RT)));
|
|
+ Value* val64 = f.LoadGPR(i.DS.RT);
|
|
+ f.StoreOffset(b, offset, f.ByteSwap(val64));
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch64(f, i.address, val64, f.Add(b, offset));
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -596,8 +713,12 @@ int InstrEmit_stdu(PPCHIRBuilder& f, const InstrData& i) {
|
|
// MEM(EA, 8) <- (RS)
|
|
// RA <- EA
|
|
Value* ea = CalculateEA_i(f, i.DS.RA, XEEXTS16(i.DS.DS << 2));
|
|
- f.Store(ea, f.ByteSwap(f.LoadGPR(i.DS.RT)));
|
|
+ Value* val64 = f.LoadGPR(i.DS.RT);
|
|
+ f.Store(ea, f.ByteSwap(val64));
|
|
StoreEA(f, i.DS.RA, ea);
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch64(f, i.address, val64, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -606,8 +727,12 @@ int InstrEmit_stdux(PPCHIRBuilder& f, const InstrData& i) {
|
|
// MEM(EA, 8) <- (RS)
|
|
// RA <- EA
|
|
Value* ea = CalculateEA(f, i.X.RA, i.X.RB);
|
|
- f.Store(ea, f.ByteSwap(f.LoadGPR(i.X.RT)));
|
|
+ Value* val64 = f.LoadGPR(i.X.RT);
|
|
+ f.Store(ea, f.ByteSwap(val64));
|
|
StoreEA(f, i.X.RA, ea);
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch64(f, i.address, val64, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -619,7 +744,11 @@ int InstrEmit_stdx(PPCHIRBuilder& f, const InstrData& i) {
|
|
// EA <- b + (RB)
|
|
// MEM(EA, 8) <- (RS)
|
|
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
|
- f.Store(ea, f.ByteSwap(f.LoadGPR(i.X.RT)));
|
|
+ Value* val64 = f.LoadGPR(i.X.RT);
|
|
+ f.Store(ea, f.ByteSwap(val64));
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch64(f, i.address, val64, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -684,7 +813,11 @@ int InstrEmit_stwbrx(PPCHIRBuilder& f, const InstrData& i) {
|
|
// EA <- b + (RB)
|
|
// MEM(EA, 4) <- bswap((RS)[32:63])
|
|
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
|
- f.Store(ea, f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE));
|
|
+ Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
|
|
+ f.Store(ea, val32);
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch(f, i.address, val32, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -696,7 +829,11 @@ int InstrEmit_stdbrx(PPCHIRBuilder& f, const InstrData& i) {
|
|
// EA <- b + (RB)
|
|
// MEM(EA, 8) <- bswap(RS)
|
|
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
|
- f.Store(ea, f.LoadGPR(i.X.RT));
|
|
+ Value* val64 = f.LoadGPR(i.X.RT);
|
|
+ f.Store(ea, val64);
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch64(f, i.address, val64, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -843,7 +980,8 @@ int InstrEmit_stdcx(PPCHIRBuilder& f, const InstrData& i) {
|
|
// This will always succeed if under the global lock, however.
|
|
|
|
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
|
- Value* rt = f.ByteSwap(f.LoadGPR(i.X.RT));
|
|
+ Value* val64 = f.LoadGPR(i.X.RT);
|
|
+ Value* rt = f.ByteSwap(val64);
|
|
|
|
if (cvars::no_reserved_ops) {
|
|
f.Store(ea, rt);
|
|
@@ -862,6 +1000,9 @@ int InstrEmit_stdcx(PPCHIRBuilder& f, const InstrData& i) {
|
|
if (!cvars::no_reserved_ops) {
|
|
f.MemoryBarrier();
|
|
}
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch64(f, i.address, val64, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -885,7 +1026,8 @@ int InstrEmit_stwcx(PPCHIRBuilder& f, const InstrData& i) {
|
|
|
|
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
|
|
|
- Value* rt = f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE));
|
|
+ Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
|
|
+ Value* rt = f.ByteSwap(val32);
|
|
|
|
if (cvars::no_reserved_ops) {
|
|
f.Store(ea, rt);
|
|
@@ -904,7 +1046,9 @@ int InstrEmit_stwcx(PPCHIRBuilder& f, const InstrData& i) {
|
|
if (!cvars::no_reserved_ops) {
|
|
f.MemoryBarrier();
|
|
}
|
|
-
|
|
+ if (!::xe::cpu::audit67::vals().empty()) {
|
|
+ EmitAudit67ValueWatch(f, i.address, val32, ea);
|
|
+ }
|
|
return 0;
|
|
}
|
|
// Floating-point load (A-19)
|
|
diff --git a/src/xenia/cpu/ppc/ppc_hir_builder.cc b/src/xenia/cpu/ppc/ppc_hir_builder.cc
|
|
index 42d996cba..e2f7a45db 100644
|
|
--- a/src/xenia/cpu/ppc/ppc_hir_builder.cc
|
|
+++ b/src/xenia/cpu/ppc/ppc_hir_builder.cc
|
|
@@ -34,6 +34,97 @@ DEFINE_bool(
|
|
"unimplemented PowerPC instruction is encountered.",
|
|
"CPU");
|
|
|
|
+// AUDIT-061 — multi-PC branch probe. Parses cvars::audit_61_branch_probe_pcs
|
|
+// once and exposes a (pc -> trap_id) lookup table. trap_id range [200, 65535].
|
|
+// PCs outside the table are not probed. Native side reads g_audit61_pcs[idx].
|
|
+#include <vector>
|
|
+#include <string>
|
|
+namespace xe {
|
|
+namespace cpu {
|
|
+namespace audit61 {
|
|
+constexpr uint16_t kTrapBase = 200;
|
|
+constexpr size_t kMaxPcs = 32;
|
|
+static std::vector<uint32_t> g_pcs;
|
|
+static bool g_parsed = false;
|
|
+
|
|
+const std::vector<uint32_t>& pcs() {
|
|
+ if (!g_parsed) {
|
|
+ g_parsed = true;
|
|
+ const std::string& csv = cvars::audit_61_branch_probe_pcs;
|
|
+ size_t pos = 0;
|
|
+ while (pos < csv.size() && g_pcs.size() < kMaxPcs) {
|
|
+ size_t end = csv.find(',', pos);
|
|
+ std::string tok = csv.substr(pos, end - pos);
|
|
+ // strip whitespace
|
|
+ while (!tok.empty() && (tok.front() == ' ' || tok.front() == '\t'))
|
|
+ tok.erase(tok.begin());
|
|
+ while (!tok.empty() && (tok.back() == ' ' || tok.back() == '\t'))
|
|
+ tok.pop_back();
|
|
+ if (!tok.empty()) {
|
|
+ try {
|
|
+ uint32_t v = static_cast<uint32_t>(std::stoul(tok, nullptr, 0));
|
|
+ g_pcs.push_back(v);
|
|
+ } catch (...) {
|
|
+ }
|
|
+ }
|
|
+ if (end == std::string::npos) break;
|
|
+ pos = end + 1;
|
|
+ }
|
|
+ }
|
|
+ return g_pcs;
|
|
+}
|
|
+
|
|
+// Returns trap id for pc, or 0 if pc not in probe set.
|
|
+uint16_t trap_id_for(uint32_t pc) {
|
|
+ const auto& v = pcs();
|
|
+ for (size_t i = 0; i < v.size(); ++i) {
|
|
+ if (v[i] == pc) return static_cast<uint16_t>(kTrapBase + i);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+} // namespace audit61
|
|
+
|
|
+// AUDIT-067 — value-watch. Parses cvars::audit_67_value_watch once, exposes
|
|
+// values via vals(). Trap codes for matches start at kTrapBase = 250.
|
|
+namespace audit67 {
|
|
+constexpr uint16_t kTrapBase = 250;
|
|
+constexpr size_t kMaxVals = 4;
|
|
+static std::vector<uint32_t> g_vals;
|
|
+static bool g_parsed = false;
|
|
+
|
|
+const std::vector<uint32_t>& vals() {
|
|
+ if (!g_parsed) {
|
|
+ g_parsed = true;
|
|
+ const std::string& csv = cvars::audit_67_value_watch;
|
|
+ size_t pos = 0;
|
|
+ while (pos < csv.size() && g_vals.size() < kMaxVals) {
|
|
+ size_t end = csv.find(',', pos);
|
|
+ std::string tok = csv.substr(pos, end - pos);
|
|
+ while (!tok.empty() && (tok.front() == ' ' || tok.front() == '\t'))
|
|
+ tok.erase(tok.begin());
|
|
+ while (!tok.empty() && (tok.back() == ' ' || tok.back() == '\t'))
|
|
+ tok.pop_back();
|
|
+ if (!tok.empty()) {
|
|
+ try {
|
|
+ uint32_t v = static_cast<uint32_t>(std::stoul(tok, nullptr, 0));
|
|
+ g_vals.push_back(v);
|
|
+ } catch (...) {
|
|
+ }
|
|
+ }
|
|
+ if (end == std::string::npos) break;
|
|
+ pos = end + 1;
|
|
+ }
|
|
+ XELOGI("AUDIT-067-INIT csv=\"{}\" parsed_count={}", csv, g_vals.size());
|
|
+ for (size_t i = 0; i < g_vals.size(); ++i) {
|
|
+ XELOGI("AUDIT-067-INIT vals[{}] = 0x{:08X}", i, g_vals[i]);
|
|
+ }
|
|
+ }
|
|
+ return g_vals;
|
|
+}
|
|
+} // namespace audit67
|
|
+} // namespace cpu
|
|
+} // namespace xe
|
|
+
|
|
namespace xe {
|
|
namespace cpu {
|
|
namespace ppc {
|
|
@@ -174,6 +265,20 @@ bool PPCHIRBuilder::Emit(GuestFunction* function, uint32_t flags) {
|
|
|
|
MaybeBreakOnInstruction(address);
|
|
|
|
+ // AUDIT-061: emit a trap before this instruction if it's on the probe
|
|
+ // list. The trap fires BEFORE the cmp/branch HIR emit so the native
|
|
+ // handler observes cr0/cr6 set by the *previous* instruction (the cmp
|
|
+ // that controls this conditional branch). ContextBarrier flushes
|
|
+ // HIR temporaries to PPCContext so the handler reads consistent state.
|
|
+ if (!::xe::cpu::audit61::pcs().empty()) {
|
|
+ uint16_t tid = ::xe::cpu::audit61::trap_id_for(address);
|
|
+ if (tid != 0) {
|
|
+ Comment("--audit_61_branch_probe target");
|
|
+ ContextBarrier();
|
|
+ Trap(tid);
|
|
+ }
|
|
+ }
|
|
+
|
|
InstrData i;
|
|
i.address = address;
|
|
i.code = code;
|
|
diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc
|
|
index 1034dcac7..38148010c 100644
|
|
--- a/src/xenia/cpu/xex_module.cc
|
|
+++ b/src/xenia/cpu/xex_module.cc
|
|
@@ -51,6 +51,38 @@ DECLARE_bool(allow_plugins);
|
|
|
|
DECLARE_bool(disable_context_promotion);
|
|
|
|
+// AUDIT-068 Session 2: helper that scans a raw byte buffer for 4-byte aligned
|
|
+// u32 values that match the configured audit_68 value list, emitting a
|
|
+// per-position event. Used to pre-scan XEX-loader memcpys that bypass all
|
|
+// other hooked surfaces. Cost when off: a single relaxed atomic load.
|
|
+static inline void audit68_prescan_memcpy(uint32_t guest_va_dest,
|
|
+ const uint8_t* src, size_t size,
|
|
+ const char* tag) {
|
|
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
|
|
+ if (active == 0) return;
|
|
+ if ((active & 0x1) && size >= 4) {
|
|
+ size_t aligned_end = size & ~size_t(3);
|
|
+ for (size_t i = 0; i < aligned_end; i += 4) {
|
|
+ uint32_t be_u32 = (uint32_t(src[i + 0]) << 24) |
|
|
+ (uint32_t(src[i + 1]) << 16) |
|
|
+ (uint32_t(src[i + 2]) << 8) | uint32_t(src[i + 3]);
|
|
+ xe::audit_68::check_guest_va(
|
|
+ static_cast<uint32_t>(guest_va_dest + i), be_u32, 4, tag);
|
|
+ }
|
|
+ }
|
|
+ if (active & 0x2) {
|
|
+ // Coarse addr-only event over the full span (dest only).
|
|
+ uint64_t v = 0;
|
|
+ if (size >= 4) {
|
|
+ v = (uint64_t(src[0]) << 24) | (uint64_t(src[1]) << 16) |
|
|
+ (uint64_t(src[2]) << 8) | uint64_t(src[3]);
|
|
+ }
|
|
+ xe::audit_68::check_guest_va(guest_va_dest, v,
|
|
+ static_cast<uint8_t>(std::min<size_t>(size, 8)),
|
|
+ tag);
|
|
+ }
|
|
+}
|
|
+
|
|
static constexpr uint8_t xe_xex1_retail_key[16] = {
|
|
0xA2, 0x6C, 0x10, 0xF7, 0x1F, 0xD9, 0x35, 0xE9,
|
|
0x8B, 0x99, 0x92, 0x2C, 0xE9, 0x32, 0x15, 0x72};
|
|
@@ -424,6 +456,10 @@ int XexModule::ApplyPatch(XexModule* module) {
|
|
// If image_source_offset is set, copy [source_offset:source_size] to
|
|
// target_offset
|
|
if (patch_header->delta_image_source_offset) {
|
|
+ audit68_prescan_memcpy(
|
|
+ module->base_address_ + patch_header->delta_image_target_offset,
|
|
+ base_exe + patch_header->delta_image_source_offset,
|
|
+ patch_header->delta_image_source_size, "xex_memcpy_patch");
|
|
memcpy(base_exe + patch_header->delta_image_target_offset,
|
|
base_exe + patch_header->delta_image_source_offset,
|
|
patch_header->delta_image_source_size);
|
|
@@ -589,6 +625,8 @@ int XexModule::ReadImageUncompressed(const void* xex_addr, size_t xex_length) {
|
|
if (exe_length > uncompressed_size) {
|
|
return 1;
|
|
}
|
|
+ audit68_prescan_memcpy(base_address_, p, exe_length,
|
|
+ "xex_memcpy_uncompressed");
|
|
memcpy(buffer, p, exe_length);
|
|
return 0;
|
|
case XEX_ENCRYPTION_NORMAL:
|
|
@@ -665,6 +703,9 @@ int XexModule::ReadImageBasicCompressed(const void* xex_addr,
|
|
// Overflow.
|
|
return 1;
|
|
}
|
|
+ audit68_prescan_memcpy(
|
|
+ base_address_ + static_cast<uint32_t>(d - buffer), p, data_size,
|
|
+ "xex_memcpy_basic_block");
|
|
memcpy(d, p, data_size);
|
|
break;
|
|
case XEX_ENCRYPTION_NORMAL: {
|
|
@@ -799,6 +840,17 @@ int XexModule::ReadImageCompressed(const void* xex_addr, size_t xex_length) {
|
|
result_code = lzx_decompress(
|
|
compress_buffer, d - compress_buffer, buffer, uncompressed_size,
|
|
compression_info->normal.window_size, nullptr, 0);
|
|
+
|
|
+ // AUDIT-068 Session 2: lzx_decompress writes directly into guest
|
|
+ // memory via the host pointer `buffer`. There's no host-side hook
|
|
+ // covering its internal bulk writes, so post-scan the produced bytes
|
|
+ // to recover what the XEX loader actually placed at `base_address_`.
|
|
+ // This is THE most likely catch for the vtable install case (vtables
|
|
+ // live in the .rdata section that is part of the LZX-compressed image).
|
|
+ if (result_code == 0) {
|
|
+ audit68_prescan_memcpy(base_address_, buffer, uncompressed_size,
|
|
+ "xex_lzx_decompress_output");
|
|
+ }
|
|
} else {
|
|
XELOGE("Unable to allocate XEX memory at {:08X}-{:08X}.", base_address_,
|
|
uncompressed_size);
|
|
diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc
|
|
index 22ba66aee..f02b11d7f 100644
|
|
--- a/src/xenia/memory.cc
|
|
+++ b/src/xenia/memory.cc
|
|
@@ -14,6 +14,7 @@
|
|
|
|
#include "third_party/fmt/include/fmt/format.h"
|
|
#include "xenia/base/assert.h"
|
|
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
|
#include "xenia/base/byte_stream.h"
|
|
#include "xenia/base/clock.h"
|
|
#include "xenia/base/cvar.h"
|
|
@@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) {
|
|
|
|
static Memory* active_memory_ = nullptr;
|
|
|
|
+// AUDIT-068 — process-global accessor (declared in memory.h).
|
|
+Memory* Memory::active() { return active_memory_; }
|
|
+
|
|
void CrashDump() {
|
|
static std::atomic<int> in_crash_dump(0);
|
|
if (in_crash_dump.fetch_add(1)) {
|
|
@@ -151,11 +155,41 @@ Memory::Memory() {
|
|
uint32_t(xe::memory::allocation_granularity());
|
|
assert_zero(active_memory_);
|
|
active_memory_ = this;
|
|
+
|
|
+ // AUDIT-068: register host→guest translation thunk so the watch slow path
|
|
+ // in xenia-base can resolve guest VAs without depending on xenia-core.
|
|
+ xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t {
|
|
+ Memory* m = active_memory_;
|
|
+ return m ? m->HostToGuestVirtual(host_ptr) : 0u;
|
|
+ };
|
|
+
|
|
+ // AUDIT-068 Session 3: register guest→host translation thunk and a
|
|
+ // page-protect query thunk for the read-mode probe. The probe thread uses
|
|
+ // QueryProtect to skip unmapped/uncommitted pages before dereferencing.
|
|
+ xe::audit_68::g_guest_to_host_thunk = [](uint32_t va) -> const void* {
|
|
+ Memory* m = active_memory_;
|
|
+ return m ? reinterpret_cast<const void*>(m->TranslateVirtual(va))
|
|
+ : nullptr;
|
|
+ };
|
|
+ xe::audit_68::g_query_protect_thunk = [](uint32_t va,
|
|
+ uint32_t* out_protect) -> bool {
|
|
+ Memory* m = active_memory_;
|
|
+ if (!m) return false;
|
|
+ BaseHeap* heap = m->LookupHeap(va);
|
|
+ if (!heap) {
|
|
+ if (out_protect) *out_protect = 0;
|
|
+ return false;
|
|
+ }
|
|
+ return heap->QueryProtect(va, out_protect);
|
|
+ };
|
|
}
|
|
|
|
Memory::~Memory() {
|
|
assert_true(active_memory_ == this);
|
|
active_memory_ = nullptr;
|
|
+ xe::audit_68::g_host_to_guest_thunk = nullptr;
|
|
+ xe::audit_68::g_guest_to_host_thunk = nullptr;
|
|
+ xe::audit_68::g_query_protect_thunk = nullptr;
|
|
|
|
// Uninstall the MMIO handler, as we won't be able to service more
|
|
// requests.
|
|
@@ -540,16 +574,71 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const {
|
|
}
|
|
|
|
void Memory::Zero(uint32_t address, uint32_t size) {
|
|
+ // AUDIT-068: log a single span event with value=0; size is capped at 8 for
|
|
+ // the value field. Slow path is gated on the atomic flag.
|
|
+ xe::audit_68::check_guest_va(address, 0,
|
|
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
|
+ "Memory::Zero");
|
|
std::memset(TranslateVirtual(address), 0, size);
|
|
}
|
|
|
|
void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) {
|
|
+ // Replicate the fill byte across the value field so value_matches can
|
|
+ // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for
|
|
+ // capture purposes the byte itself in the low slot is enough.
|
|
+ uint64_t v = static_cast<uint64_t>(value);
|
|
+ v |= v << 8;
|
|
+ v |= v << 16;
|
|
+ v |= v << 32;
|
|
+ xe::audit_68::check_guest_va(address, v,
|
|
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
|
+ "Memory::Fill");
|
|
std::memset(TranslateVirtual(address), value, size);
|
|
}
|
|
|
|
void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) {
|
|
uint8_t* pdest = TranslateVirtual(dest);
|
|
const uint8_t* psrc = TranslateVirtual(src);
|
|
+ // AUDIT-068 Session 2: full byte-scan over 4-byte aligned positions of the
|
|
+ // source buffer. Catches XEX-loader-style memcpys where a vptr (the target
|
|
+ // u32 value) is buried somewhere mid-buffer rather than at offset 0. Cost
|
|
+ // O(size/4 * N_values) with N_values capped at 8 inside value_matches —
|
|
+ // negligible vs the underlying memcpy throughput.
|
|
+ //
|
|
+ // Gated on active bit 0x1 (values-mode) AND active != 0. If only addrs are
|
|
+ // configured (Run 2 voice-struct mode), we still emit a single addr-only
|
|
+ // event covering the destination span so addr-watch isn't broken.
|
|
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
|
|
+ if (active != 0) [[unlikely]] {
|
|
+ if ((active & 0x1) && size >= 4) {
|
|
+ // Scan source for any configured u32 value (big-endian, mirrors how
|
|
+ // guest sees the bytes). 4-byte aligned offsets only.
|
|
+ uint32_t aligned_end = size & ~3u;
|
|
+ for (uint32_t i = 0; i < aligned_end; i += 4) {
|
|
+ uint32_t be_u32 =
|
|
+ (uint32_t(psrc[i + 0]) << 24) | (uint32_t(psrc[i + 1]) << 16) |
|
|
+ (uint32_t(psrc[i + 2]) << 8) | uint32_t(psrc[i + 3]);
|
|
+ xe::audit_68::check_guest_va(dest + i, be_u32, 4, "Memory::Copy");
|
|
+ }
|
|
+ }
|
|
+ if (active & 0x2) {
|
|
+ // Addr-only mode: emit a single coarse event tagged with the dest base
|
|
+ // and first u32 of source for context. The slow-path range check will
|
|
+ // log iff the dest span intersects a configured addr range.
|
|
+ uint64_t v = 0;
|
|
+ if (size >= 4) {
|
|
+ v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) |
|
|
+ (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]);
|
|
+ } else if (size > 0) {
|
|
+ for (uint32_t i = 0; i < size; ++i) {
|
|
+ v = (v << 8) | psrc[i];
|
|
+ }
|
|
+ }
|
|
+ xe::audit_68::check_guest_va(
|
|
+ dest, v, static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
|
+ "Memory::Copy");
|
|
+ }
|
|
+ }
|
|
std::memcpy(pdest, psrc, size);
|
|
}
|
|
|
|
diff --git a/src/xenia/memory.h b/src/xenia/memory.h
|
|
index bd9519a40..fa712fe08 100644
|
|
--- a/src/xenia/memory.h
|
|
+++ b/src/xenia/memory.h
|
|
@@ -347,6 +347,13 @@ class Memory {
|
|
Memory();
|
|
~Memory();
|
|
|
|
+ // AUDIT-068: process-global Memory singleton accessor. Returns the
|
|
+ // currently-constructed Memory instance, or nullptr if none. Set inside
|
|
+ // Memory::Memory()/~Memory(); see memory.cc `active_memory_`. Used by
|
|
+ // xe::audit_68::check_host_write() to translate a host pointer back to a
|
|
+ // guest VA without an explicit Memory* context.
|
|
+ static Memory* active();
|
|
+
|
|
// Initializes the memory system.
|
|
// This may fail if the host address space could not be reserved or the
|
|
// mapping to the file system fails.
|
|
|
|
# === new file: src/xenia/base/audit_68_host_mem_watch_fwd.h ===
|
|
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* AUDIT-068: host-side memory-write watch — forward declarations only.
|
|
*
|
|
* Declarations here are intentionally minimal so that xenia/base/memory.h can
|
|
* include this without pulling in xenia/memory.h (which would create a
|
|
* circular dependency: xenia-base → xenia-core → xenia-base). The full
|
|
* definitions live in xenia/audit_68_host_mem_watch.{h,cc} (xenia-core).
|
|
*
|
|
* Hot path: callers (the integer specializations of xe::store_and_swap<T>)
|
|
* load the atomic flag once. When it is 0 (default), no further work is done
|
|
* — a single relaxed atomic load and a predictable branch.
|
|
******************************************************************************
|
|
*/
|
|
|
|
#ifndef XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
|
#define XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
|
|
|
#include <atomic>
|
|
#include <cstdint>
|
|
|
|
namespace xe {
|
|
namespace audit_68 {
|
|
|
|
// 0 = inactive (default). Non-zero = the cvars have been parsed and at least
|
|
// one watch is configured. Set lazily by check_host_write_slowpath() on first
|
|
// call after cvar parsing. Loaded relaxed on the hot path.
|
|
//
|
|
// Implementation lives in xenia-base (audit_68_host_mem_watch_base.cc) so
|
|
// that callers in xenia-base/xenia-cpu/xenia-kernel can resolve the symbol
|
|
// without depending on xenia-core link order.
|
|
extern std::atomic<uint32_t> g_active;
|
|
|
|
// Host-pointer → guest-VA translation thunk. xenia/memory.cc::Memory::Memory()
|
|
// registers a function pointer here that wraps Memory::HostToGuestVirtual.
|
|
// Until set, the slow path falls back to logging the raw host pointer.
|
|
using HostToGuestThunk = uint32_t (*)(const void*);
|
|
extern HostToGuestThunk g_host_to_guest_thunk;
|
|
|
|
// AUDIT-068 Session 3 — read-mode probe support.
|
|
//
|
|
// Guest-VA → host-pointer translation thunk (wraps Memory::TranslateVirtual).
|
|
// Used by the read-probe poll thread to sample bytes at configured guest VAs.
|
|
// May return non-null even for unmapped/uncommitted VAs (the underlying
|
|
// translation is arithmetic — virtual_membase_ + va) — callers MUST consult
|
|
// the QueryProtect thunk before dereferencing.
|
|
using GuestToHostThunk = const void* (*)(uint32_t);
|
|
extern GuestToHostThunk g_guest_to_host_thunk;
|
|
|
|
// Returns true iff the page containing `guest_va` is committed and readable;
|
|
// out_protect receives the raw page protect bits (kProtectRead, etc.). Wraps
|
|
// Memory::LookupHeap() + BaseHeap::QueryProtect(). Used as a guard before the
|
|
// read-probe samples bytes (early-boot heap-not-yet-mapped path must NOT
|
|
// crash).
|
|
using QueryProtectThunk = bool (*)(uint32_t, uint32_t* /*out_protect*/);
|
|
extern QueryProtectThunk g_query_protect_thunk;
|
|
|
|
// Slow path. Only invoked when g_active is non-zero. Implementation in
|
|
// xenia/base/audit_68_host_mem_watch_base.cc (xenia-base).
|
|
//
|
|
// host_ptr: the host pointer being written (from store_and_swap's `mem`).
|
|
// value: the value being stored (zero-extended to u64).
|
|
// size: 1, 2, 4 or 8.
|
|
// tag: caller-provided tag string (e.g. "store_and_swap<u32>"). Logged
|
|
// verbatim, no formatting. Must be a static string (lifetime
|
|
// beyond this call).
|
|
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
|
|
uint8_t size, const char* tag);
|
|
|
|
// Same as above, but with a known guest VA (for callers like Memory::Zero/
|
|
// Fill/Copy that have the VA but not a single host pointer).
|
|
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
|
|
const char* tag);
|
|
|
|
// Inline hot-path wrappers. Single relaxed atomic load + branch when inactive.
|
|
inline void check_host_write(const void* host_ptr, uint64_t value, uint8_t size,
|
|
const char* tag) {
|
|
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
|
|
check_host_write_slowpath(host_ptr, value, size, tag);
|
|
}
|
|
}
|
|
|
|
inline void check_guest_va(uint32_t guest_va, uint64_t value, uint8_t size,
|
|
const char* tag) {
|
|
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
|
|
check_guest_va_slowpath(guest_va, value, size, tag);
|
|
}
|
|
}
|
|
|
|
} // namespace audit_68
|
|
} // namespace xe
|
|
|
|
#endif // XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
|
|
|
# === new file: src/xenia/base/audit_68_host_mem_watch_base.cc ===
|
|
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* AUDIT-068 host-side memory-write watch — implementation (xenia-base).
|
|
*
|
|
* Mirrors AUDIT-067 in spirit (value-CSV cvar, lazy parse, atomic-bool
|
|
* activation) but observes the HOST-side write paths instead of the JIT'd
|
|
* guest store opcodes. Captures writes performed by xe::store_and_swap<T>
|
|
* (xenia/base/memory.h) and by Memory::Zero/Fill/Copy (xenia/memory.cc).
|
|
*
|
|
* Lives in xenia-base so that the slow-path symbols resolve for callers in
|
|
* xenia-base / xenia-cpu / xenia-kernel without depending on xenia-core link
|
|
* order. The host→guest VA translation is provided by a function-pointer
|
|
* thunk that xenia::Memory::Memory() registers at construction.
|
|
*
|
|
* See xenia/base/audit_68_host_mem_watch_fwd.h for the API.
|
|
* See xenia/cpu/cpu_flags.{h,cc} for the cvars.
|
|
******************************************************************************
|
|
*/
|
|
|
|
#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
|
|
|
#include <algorithm>
|
|
#include <atomic>
|
|
#include <chrono>
|
|
#include <cstring>
|
|
#include <mutex>
|
|
#include <string>
|
|
#include <thread>
|
|
#include <vector>
|
|
|
|
#include "xenia/base/cvar.h"
|
|
#include "xenia/base/logging.h"
|
|
#include "xenia/base/threading.h"
|
|
|
|
// We need the cvars but cpu_flags.h lives in xenia-cpu. To avoid an upward
|
|
// dep we re-declare them here with the same macros — cvar.h's DECLARE_*
|
|
// macros are header-safe (just `extern` declarations) and resolve against the
|
|
// definitions in xenia-cpu/cpu_flags.cc at link time. (xenia-cpu links AFTER
|
|
// xenia-base in the executable; symbols in xenia-cpu/cpu_flags.cc are still
|
|
// resolvable from xenia-base translation units because the lld pass folds
|
|
// all libraries together at the executable level.)
|
|
DECLARE_string(audit_68_host_mem_watch_values);
|
|
DECLARE_string(audit_68_host_mem_watch_addrs);
|
|
DECLARE_string(audit_68_host_mem_read_probe);
|
|
|
|
namespace xe {
|
|
namespace audit_68 {
|
|
|
|
// Hot-path flag (declared in fwd header). Initial sentinel UINT32_MAX means
|
|
// "unparsed"; the very first slow-path call invokes ensure_parsed() which
|
|
// replaces the sentinel with the actual active bitmask (0 if both cvars are
|
|
// empty, 1/2/3 otherwise). After that, hot-path calls observe the real value
|
|
// and bail out cheaply when off.
|
|
std::atomic<uint32_t> g_active{0xFFFFFFFFu};
|
|
|
|
// Host→guest VA translation thunk (declared in fwd header). Set by
|
|
// xenia::Memory::Memory() at construction; reset to nullptr by ~Memory().
|
|
HostToGuestThunk g_host_to_guest_thunk{nullptr};
|
|
|
|
// AUDIT-068 Session 3: guest→host translation + page-protect query thunks.
|
|
GuestToHostThunk g_guest_to_host_thunk{nullptr};
|
|
QueryProtectThunk g_query_protect_thunk{nullptr};
|
|
|
|
namespace {
|
|
|
|
constexpr size_t kMaxValues = 8;
|
|
constexpr size_t kMaxAddrRanges = 8;
|
|
|
|
struct AddrRange {
|
|
uint32_t start; // inclusive
|
|
uint32_t end; // inclusive
|
|
};
|
|
|
|
std::vector<uint32_t> g_values;
|
|
std::vector<AddrRange> g_addrs;
|
|
std::once_flag g_parsed_flag;
|
|
|
|
std::chrono::steady_clock::time_point g_t0;
|
|
std::once_flag g_t0_once;
|
|
|
|
int64_t host_ns_since_start() {
|
|
std::call_once(g_t0_once,
|
|
[]() { g_t0 = std::chrono::steady_clock::now(); });
|
|
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::steady_clock::now() - g_t0)
|
|
.count();
|
|
}
|
|
|
|
void trim(std::string& s) {
|
|
while (!s.empty() && (s.front() == ' ' || s.front() == '\t')) {
|
|
s.erase(s.begin());
|
|
}
|
|
while (!s.empty() && (s.back() == ' ' || s.back() == '\t')) {
|
|
s.pop_back();
|
|
}
|
|
}
|
|
|
|
bool parse_u32(const std::string& tok, uint32_t* out) {
|
|
try {
|
|
*out = static_cast<uint32_t>(std::stoul(tok, nullptr, 0));
|
|
return true;
|
|
} catch (...) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void parse_values_csv(const std::string& csv) {
|
|
size_t pos = 0;
|
|
while (pos < csv.size() && g_values.size() < kMaxValues) {
|
|
size_t end = csv.find(',', pos);
|
|
std::string tok = csv.substr(pos, end - pos);
|
|
trim(tok);
|
|
if (!tok.empty()) {
|
|
uint32_t v;
|
|
if (parse_u32(tok, &v)) {
|
|
g_values.push_back(v);
|
|
}
|
|
}
|
|
if (end == std::string::npos) break;
|
|
pos = end + 1;
|
|
}
|
|
}
|
|
|
|
void parse_addrs_csv(const std::string& csv) {
|
|
size_t pos = 0;
|
|
while (pos < csv.size() && g_addrs.size() < kMaxAddrRanges) {
|
|
size_t end = csv.find(',', pos);
|
|
std::string tok = csv.substr(pos, end - pos);
|
|
trim(tok);
|
|
if (!tok.empty()) {
|
|
size_t dash = tok.find('-', 2); // skip leading "0x" if present
|
|
AddrRange r{};
|
|
if (dash != std::string::npos) {
|
|
std::string s = tok.substr(0, dash);
|
|
std::string e = tok.substr(dash + 1);
|
|
trim(s);
|
|
trim(e);
|
|
uint32_t a, b;
|
|
if (parse_u32(s, &a) && parse_u32(e, &b)) {
|
|
r.start = a;
|
|
r.end = b;
|
|
g_addrs.push_back(r);
|
|
}
|
|
} else {
|
|
uint32_t a;
|
|
if (parse_u32(tok, &a)) {
|
|
r.start = a;
|
|
r.end = a + 7;
|
|
g_addrs.push_back(r);
|
|
}
|
|
}
|
|
}
|
|
if (end == std::string::npos) break;
|
|
pos = end + 1;
|
|
}
|
|
}
|
|
|
|
void parse_locked() {
|
|
parse_values_csv(cvars::audit_68_host_mem_watch_values);
|
|
parse_addrs_csv(cvars::audit_68_host_mem_watch_addrs);
|
|
|
|
uint32_t bits = 0;
|
|
if (!g_values.empty()) bits |= 0x1;
|
|
if (!g_addrs.empty()) bits |= 0x2;
|
|
g_active.store(bits, std::memory_order_release);
|
|
|
|
XELOGI(
|
|
"AUDIT-068-INIT values_csv=\"{}\" addrs_csv=\"{}\" values_parsed={} "
|
|
"addr_ranges_parsed={} active=0x{:X}",
|
|
cvars::audit_68_host_mem_watch_values,
|
|
cvars::audit_68_host_mem_watch_addrs, g_values.size(), g_addrs.size(),
|
|
bits);
|
|
for (size_t i = 0; i < g_values.size(); ++i) {
|
|
XELOGI("AUDIT-068-INIT value[{}] = 0x{:08X}", i, g_values[i]);
|
|
}
|
|
for (size_t i = 0; i < g_addrs.size(); ++i) {
|
|
XELOGI("AUDIT-068-INIT addr_range[{}] = 0x{:08X}-0x{:08X}", i,
|
|
g_addrs[i].start, g_addrs[i].end);
|
|
}
|
|
}
|
|
|
|
bool value_matches(uint64_t value, uint8_t size) {
|
|
for (uint32_t v : g_values) {
|
|
if (size >= 4 && static_cast<uint32_t>(value) == v) return true;
|
|
if (size == 8 && static_cast<uint32_t>(value >> 32) == v) return true;
|
|
if (size == 2 && (v & 0xFFFF) == (value & 0xFFFF)) return true;
|
|
if (size == 1 && (v & 0xFF) == (value & 0xFF)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool addr_matches(uint32_t guest_va, uint8_t size) {
|
|
uint32_t lo = guest_va;
|
|
uint32_t hi = guest_va + (size ? size - 1 : 0);
|
|
for (const auto& r : g_addrs) {
|
|
if (lo <= r.end && hi >= r.start) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
uint32_t current_tid() { return xe::threading::current_thread_id(); }
|
|
|
|
void emit(uint32_t guest_va, const void* host_ptr, uint64_t value,
|
|
uint8_t size, const char* tag) {
|
|
XELOGI(
|
|
"AUDIT-068-HOST-WRITE guest_va=0x{:08X} host_ptr=0x{:016X} "
|
|
"val=0x{:016X} sz={} fn={} host_ns={} tid={}",
|
|
guest_va, reinterpret_cast<uintptr_t>(host_ptr), value,
|
|
static_cast<uint32_t>(size), tag ? tag : "<null>",
|
|
host_ns_since_start(), current_tid());
|
|
}
|
|
|
|
// ===== AUDIT-068 Session 3 — read-mode probe state =====
|
|
|
|
constexpr size_t kMaxReadProbes = 8;
|
|
|
|
struct ReadProbe {
|
|
uint32_t guest_va;
|
|
uint8_t size; // 1, 2, 4, 8
|
|
uint64_t period_ns;
|
|
uint64_t last_value;
|
|
bool last_was_valid;
|
|
};
|
|
|
|
std::vector<ReadProbe> g_read_probes;
|
|
std::atomic<bool> g_read_probe_thread_running{false};
|
|
std::atomic<bool> g_read_probe_shutdown{false};
|
|
std::thread g_read_probe_thread;
|
|
std::once_flag g_read_probe_started;
|
|
|
|
bool parse_read_probe_tok(const std::string& tok, ReadProbe* out) {
|
|
// Expected form: "VA:SIZE:PERIOD_NS" — three colon-separated u64.
|
|
size_t c1 = tok.find(':');
|
|
if (c1 == std::string::npos) return false;
|
|
size_t c2 = tok.find(':', c1 + 1);
|
|
if (c2 == std::string::npos) return false;
|
|
std::string sva = tok.substr(0, c1);
|
|
std::string ssz = tok.substr(c1 + 1, c2 - c1 - 1);
|
|
std::string sper = tok.substr(c2 + 1);
|
|
trim(sva);
|
|
trim(ssz);
|
|
trim(sper);
|
|
try {
|
|
out->guest_va = static_cast<uint32_t>(std::stoul(sva, nullptr, 0));
|
|
uint32_t sz = static_cast<uint32_t>(std::stoul(ssz, nullptr, 0));
|
|
if (sz != 1 && sz != 2 && sz != 4 && sz != 8) return false;
|
|
out->size = static_cast<uint8_t>(sz);
|
|
out->period_ns = static_cast<uint64_t>(std::stoull(sper, nullptr, 0));
|
|
if (out->period_ns < 1000) out->period_ns = 1000; // 1us floor.
|
|
out->last_value = 0;
|
|
out->last_was_valid = false;
|
|
return true;
|
|
} catch (...) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void parse_read_probes_csv(const std::string& csv) {
|
|
size_t pos = 0;
|
|
while (pos < csv.size() && g_read_probes.size() < kMaxReadProbes) {
|
|
size_t end = csv.find(',', pos);
|
|
std::string tok = csv.substr(pos, end - pos);
|
|
trim(tok);
|
|
if (!tok.empty()) {
|
|
ReadProbe rp{};
|
|
if (parse_read_probe_tok(tok, &rp)) {
|
|
g_read_probes.push_back(rp);
|
|
}
|
|
}
|
|
if (end == std::string::npos) break;
|
|
pos = end + 1;
|
|
}
|
|
}
|
|
|
|
uint64_t sample_at(uint32_t guest_va, uint8_t size, bool* out_valid) {
|
|
*out_valid = false;
|
|
if (!g_guest_to_host_thunk || !g_query_protect_thunk) return 0;
|
|
uint32_t prot = 0;
|
|
if (!g_query_protect_thunk(guest_va, &prot)) return 0;
|
|
// Page must have at least read permission. The protect bits map to
|
|
// xe::memory::PageAccess: kReadOnly=1, kReadWrite=2, kExecuteReadOnly=3,
|
|
// kExecuteReadWrite=4. kNoAccess=0. Accept anything non-zero — caller
|
|
// distinguishes via the second-pass change detector anyway.
|
|
if (prot == 0) return 0;
|
|
const void* hp = g_guest_to_host_thunk(guest_va);
|
|
if (!hp) return 0;
|
|
uint64_t v = 0;
|
|
// Guest memory is big-endian. We use raw byte loads to avoid alignment
|
|
// traps for size>4 on possibly-unaligned VAs. The "value" we log is the
|
|
// host-endian interpretation of the BE bytes (matches store_and_swap's
|
|
// logging convention: the byte-swapped scalar).
|
|
const uint8_t* bp = reinterpret_cast<const uint8_t*>(hp);
|
|
switch (size) {
|
|
case 1: v = bp[0]; break;
|
|
case 2: v = (uint64_t(bp[0]) << 8) | bp[1]; break;
|
|
case 4:
|
|
v = (uint64_t(bp[0]) << 24) | (uint64_t(bp[1]) << 16) |
|
|
(uint64_t(bp[2]) << 8) | bp[3];
|
|
break;
|
|
case 8:
|
|
v = (uint64_t(bp[0]) << 56) | (uint64_t(bp[1]) << 48) |
|
|
(uint64_t(bp[2]) << 40) | (uint64_t(bp[3]) << 32) |
|
|
(uint64_t(bp[4]) << 24) | (uint64_t(bp[5]) << 16) |
|
|
(uint64_t(bp[6]) << 8) | bp[7];
|
|
break;
|
|
}
|
|
*out_valid = true;
|
|
return v;
|
|
}
|
|
|
|
void read_probe_thread_main() {
|
|
// Compute the GCD-ish min poll period across all probes; sleep that long
|
|
// between scans. Each probe fires only when its own period_ns has elapsed
|
|
// since the last sample (per-probe `next_fire_ns`).
|
|
uint64_t min_period_ns = UINT64_MAX;
|
|
for (const auto& p : g_read_probes) {
|
|
if (p.period_ns < min_period_ns) min_period_ns = p.period_ns;
|
|
}
|
|
if (min_period_ns == UINT64_MAX) return;
|
|
|
|
// Per-probe next-fire times.
|
|
std::vector<uint64_t> next_fire(g_read_probes.size(), 0);
|
|
|
|
XELOGI(
|
|
"AUDIT-068-READ-INIT probe_count={} min_period_ns={} thread spawned",
|
|
g_read_probes.size(), min_period_ns);
|
|
for (size_t i = 0; i < g_read_probes.size(); ++i) {
|
|
XELOGI("AUDIT-068-READ-INIT probe[{}] va=0x{:08X} size={} period_ns={}",
|
|
i, g_read_probes[i].guest_va,
|
|
static_cast<uint32_t>(g_read_probes[i].size),
|
|
g_read_probes[i].period_ns);
|
|
}
|
|
|
|
while (!g_read_probe_shutdown.load(std::memory_order_relaxed)) {
|
|
int64_t now_ns = host_ns_since_start();
|
|
for (size_t i = 0; i < g_read_probes.size(); ++i) {
|
|
if (static_cast<uint64_t>(now_ns) < next_fire[i]) continue;
|
|
ReadProbe& rp = g_read_probes[i];
|
|
bool valid = false;
|
|
uint64_t v = sample_at(rp.guest_va, rp.size, &valid);
|
|
if (valid) {
|
|
if (!rp.last_was_valid) {
|
|
// First successful read: emit the initial value, do NOT call it a
|
|
// "change" — but log so we know when the VA mapped.
|
|
XELOGI(
|
|
"AUDIT-068-READ-INITIAL va=0x{:08X} val=0x{:016X} sz={} "
|
|
"host_ns={} tid=probe",
|
|
rp.guest_va, v, static_cast<uint32_t>(rp.size), now_ns);
|
|
rp.last_value = v;
|
|
rp.last_was_valid = true;
|
|
} else if (v != rp.last_value) {
|
|
XELOGI(
|
|
"AUDIT-068-READ-CHANGE va=0x{:08X} old=0x{:016X} "
|
|
"new=0x{:016X} sz={} host_ns={} tid=probe",
|
|
rp.guest_va, rp.last_value, v, static_cast<uint32_t>(rp.size),
|
|
now_ns);
|
|
rp.last_value = v;
|
|
}
|
|
} else if (rp.last_was_valid) {
|
|
// Was valid, now invalid — page unmapped/reprotected.
|
|
XELOGI(
|
|
"AUDIT-068-READ-UNMAPPED va=0x{:08X} last=0x{:016X} sz={} "
|
|
"host_ns={} tid=probe",
|
|
rp.guest_va, rp.last_value, static_cast<uint32_t>(rp.size),
|
|
now_ns);
|
|
rp.last_was_valid = false;
|
|
}
|
|
next_fire[i] = static_cast<uint64_t>(now_ns) + rp.period_ns;
|
|
}
|
|
// Sleep until the next earliest fire, but no shorter than 1us and no
|
|
// longer than min_period_ns (to keep shutdown latency bounded).
|
|
uint64_t sleep_ns = min_period_ns;
|
|
if (sleep_ns < 1000) sleep_ns = 1000;
|
|
std::this_thread::sleep_for(std::chrono::nanoseconds(sleep_ns));
|
|
}
|
|
XELOGI("AUDIT-068-READ-EXIT thread shutting down");
|
|
}
|
|
|
|
void start_read_probe_thread_if_configured() {
|
|
std::call_once(g_read_probe_started, []() {
|
|
parse_read_probes_csv(cvars::audit_68_host_mem_read_probe);
|
|
if (g_read_probes.empty()) return;
|
|
if (!g_guest_to_host_thunk || !g_query_protect_thunk) {
|
|
XELOGI(
|
|
"AUDIT-068-READ-INIT thunks not ready (guest_to_host={} "
|
|
"query_protect={}) — read probe deferred",
|
|
(void*)g_guest_to_host_thunk, (void*)g_query_protect_thunk);
|
|
return;
|
|
}
|
|
g_read_probe_thread_running.store(true, std::memory_order_release);
|
|
g_read_probe_thread = std::thread(&read_probe_thread_main);
|
|
g_read_probe_thread.detach(); // best-effort; daemon-style.
|
|
});
|
|
}
|
|
|
|
} // namespace
|
|
|
|
void ensure_parsed() { std::call_once(g_parsed_flag, parse_locked); }
|
|
|
|
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
|
|
uint8_t size, const char* tag) {
|
|
// AUDIT-068 Session 2: defer parsing until Memory::Memory() has registered
|
|
// the host→guest thunk. This guarantees the cmdline cvar override has been
|
|
// applied AND the logging subsystem is alive before we latch g_active.
|
|
// Without this gate, a be<T>::set() call during static-init (e.g. from a
|
|
// global initializer in another translation unit) would trigger
|
|
// parse_locked() before cpu_flags.cc's cvar objects are constructed —
|
|
// latching g_active=0 permanently and silencing the watch.
|
|
HostToGuestThunk thunk = g_host_to_guest_thunk;
|
|
if (!thunk) return;
|
|
ensure_parsed();
|
|
// AUDIT-068 Session 3: lazy-start the read-probe poll thread. Same gate as
|
|
// ensure_parsed() — must come after Memory::Memory() has registered the
|
|
// thunks so the probe can read pages safely.
|
|
start_read_probe_thread_if_configured();
|
|
uint32_t active = g_active.load(std::memory_order_acquire);
|
|
if (active == 0) return;
|
|
|
|
uint32_t guest_va = 0;
|
|
if (thunk) {
|
|
guest_va = thunk(host_ptr);
|
|
}
|
|
|
|
bool hit = false;
|
|
if ((active & 0x1) && value_matches(value, size)) hit = true;
|
|
if (!hit && (active & 0x2) && thunk && addr_matches(guest_va, size)) {
|
|
hit = true;
|
|
}
|
|
if (!hit) return;
|
|
|
|
emit(guest_va, host_ptr, value, size, tag);
|
|
}
|
|
|
|
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
|
|
const char* tag) {
|
|
// AUDIT-068 Session 2: same static-init gate as check_host_write_slowpath.
|
|
// Callers (Memory::Zero/Fill/Copy + xex_module audit68_prescan_memcpy) only
|
|
// run after Memory::Memory(), but defensive in case of future expansion.
|
|
if (!g_host_to_guest_thunk) return;
|
|
ensure_parsed();
|
|
uint32_t active = g_active.load(std::memory_order_acquire);
|
|
if (active == 0) return;
|
|
|
|
bool hit = false;
|
|
if ((active & 0x1) && value_matches(value, size)) hit = true;
|
|
if (!hit && (active & 0x2) && addr_matches(guest_va, size)) hit = true;
|
|
if (!hit) return;
|
|
|
|
emit(guest_va, nullptr, value, size, tag);
|
|
}
|
|
|
|
} // namespace audit_68
|
|
} // namespace xe
|