# AUDIT-068 cumulative canary instrumentation diff — Session 4 close # Session 4 adds 0 LOC (zero new instrumentation needed). # Diff content equals Session 3's; only this header changes. diff --git a/src/xenia/base/byte_order.h b/src/xenia/base/byte_order.h index 5a076f319..c80ee0ffc 100644 --- a/src/xenia/base/byte_order.h +++ b/src/xenia/base/byte_order.h @@ -11,6 +11,7 @@ #define XENIA_BASE_BYTE_ORDER_H_ #include +#include #if defined __has_include #if __has_include() #include @@ -21,6 +22,7 @@ #endif #include "xenia/base/assert.h" +#include "xenia/base/audit_68_host_mem_watch_fwd.h" #include "xenia/base/platform.h" #if !__cpp_lib_endian @@ -88,6 +90,30 @@ struct endian_store { operator T() const { return get(); } void set(const T& src) { + // AUDIT-068 Session 2: hook the canonical be/le write path. Gated + // on the host→guest thunk being installed by Memory::Memory(); without + // that there is no Memory and therefore no possible guest-memory write. + // This ALSO prevents the slow-path from running during static-init order + // (which would race the cvar object construction in cpu_flags.cc and + // permanently latch g_active=0 before --audit_68_* cmdline override + // applies). See reading-error #35 / Session 2 plan. + if constexpr (sizeof(T) <= 8 && std::is_integral_v) { + if (xe::audit_68::g_host_to_guest_thunk != nullptr) [[unlikely]] { + uint64_t v; + if constexpr (sizeof(T) == 8) { + v = static_cast(src); + } else if constexpr (sizeof(T) == 4) { + v = static_cast(static_cast(src)); + } else if constexpr (sizeof(T) == 2) { + v = static_cast(static_cast(src)); + } else { + v = static_cast(static_cast(src)); + } + xe::audit_68::check_host_write( + &value, v, static_cast(sizeof(T)), + E == std::endian::big ? "be::set" : "le::set"); + } + } if constexpr (std::endian::native == E) { value = src; } else { diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h index 8ef40bbff..e78c8499c 100644 --- a/src/xenia/base/memory.h +++ b/src/xenia/base/memory.h @@ -18,6 +18,7 @@ #include #include +#include "xenia/base/audit_68_host_mem_watch_fwd.h" #include "xenia/base/byte_order.h" namespace xe { @@ -354,34 +355,52 @@ template void store(void* mem, const T& value); template <> inline void store(void* mem, const int8_t& value) { + xe::audit_68::check_host_write(mem, static_cast( + static_cast(value)), + 1, "store"); *reinterpret_cast(mem) = value; } template <> inline void store(void* mem, const uint8_t& value) { + xe::audit_68::check_host_write(mem, static_cast(value), 1, + "store"); *reinterpret_cast(mem) = value; } template <> inline void store(void* mem, const int16_t& value) { + xe::audit_68::check_host_write(mem, static_cast( + static_cast(value)), + 2, "store"); *reinterpret_cast(mem) = value; } template <> inline void store(void* mem, const uint16_t& value) { + xe::audit_68::check_host_write(mem, static_cast(value), 2, + "store"); *reinterpret_cast(mem) = value; } template <> inline void store(void* mem, const int32_t& value) { + xe::audit_68::check_host_write(mem, static_cast( + static_cast(value)), + 4, "store"); *reinterpret_cast(mem) = value; } template <> inline void store(void* mem, const uint32_t& value) { + xe::audit_68::check_host_write(mem, static_cast(value), 4, + "store"); *reinterpret_cast(mem) = value; } template <> inline void store(void* mem, const int64_t& value) { + xe::audit_68::check_host_write(mem, static_cast(value), 8, + "store"); *reinterpret_cast(mem) = value; } template <> inline void store(void* mem, const uint64_t& value) { + xe::audit_68::check_host_write(mem, value, 8, "store"); *reinterpret_cast(mem) = value; } template <> @@ -411,34 +430,52 @@ template void store_and_swap(void* mem, const T& value); template <> inline void store_and_swap(void* mem, const int8_t& value) { + xe::audit_68::check_host_write(mem, static_cast( + static_cast(value)), + 1, "store_and_swap"); *reinterpret_cast(mem) = value; } template <> inline void store_and_swap(void* mem, const uint8_t& value) { + xe::audit_68::check_host_write(mem, static_cast(value), 1, + "store_and_swap"); *reinterpret_cast(mem) = value; } template <> inline void store_and_swap(void* mem, const int16_t& value) { + xe::audit_68::check_host_write(mem, static_cast( + static_cast(value)), + 2, "store_and_swap"); *reinterpret_cast(mem) = byte_swap(value); } template <> inline void store_and_swap(void* mem, const uint16_t& value) { + xe::audit_68::check_host_write(mem, static_cast(value), 2, + "store_and_swap"); *reinterpret_cast(mem) = byte_swap(value); } template <> inline void store_and_swap(void* mem, const int32_t& value) { + xe::audit_68::check_host_write(mem, static_cast( + static_cast(value)), + 4, "store_and_swap"); *reinterpret_cast(mem) = byte_swap(value); } template <> inline void store_and_swap(void* mem, const uint32_t& value) { + xe::audit_68::check_host_write(mem, static_cast(value), 4, + "store_and_swap"); *reinterpret_cast(mem) = byte_swap(value); } template <> inline void store_and_swap(void* mem, const int64_t& value) { + xe::audit_68::check_host_write(mem, static_cast(value), 8, + "store_and_swap"); *reinterpret_cast(mem) = byte_swap(value); } template <> inline void store_and_swap(void* mem, const uint64_t& value) { + xe::audit_68::check_host_write(mem, value, 8, "store_and_swap"); *reinterpret_cast(mem) = byte_swap(value); } template <> diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 5da8f6adc..cbac9826c 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -13,6 +13,8 @@ #include #include +#include +#include #include "third_party/fmt/include/fmt/format.h" #include "xenia/base/assert.h" @@ -63,6 +65,76 @@ DEFINE_bool(instrument_call_times, false, "Compute time taken for functions, for profiling guest code", "x64"); #endif + +// AUDIT-061/067: forward decls of probe/watch tables (defined in +// ppc_hir_builder.cc). +namespace xe { +namespace cpu { +namespace audit61 { +const std::vector& pcs(); +} // namespace audit61 +namespace audit67 { +const std::vector& vals(); +} // namespace audit67 +} // namespace cpu +} // namespace xe + +// AUDIT-061: handler for trap codes [200, 232). arg0 carries trap idx +// (trap_code - 200), mapping to ::xe::cpu::audit61::pcs()[idx]. Emits one +// log line per fire with cr0/cr6 LGE flags + key GPRs + LR + tid. +static uint64_t TrapAudit61Branch(void* raw_context, uint64_t idx) { + auto* ctx = reinterpret_cast(raw_context); + const auto& pcs = ::xe::cpu::audit61::pcs(); + uint32_t pc = (idx < pcs.size()) ? pcs[static_cast(idx)] : 0u; + uint32_t tid = 0; + if (ctx->thread_state) { + tid = ctx->thread_state->thread_id(); + } + auto enc = [](uint8_t lt, uint8_t gt, uint8_t eq) { + char buf[4]; + buf[0] = lt ? 'L' : '.'; + buf[1] = gt ? 'G' : '.'; + buf[2] = eq ? 'E' : '.'; + buf[3] = '\0'; + return std::string(buf); + }; + XELOGI( + "AUDIT-061-BR pc={:08X} lr={:08X} cr0={} cr6={} r3={:08X} r4={:08X} " + "r5={:08X} r6={:08X} r31={:08X} tid={}", + pc, static_cast(ctx->lr), + enc(ctx->cr0.cr0_lt, ctx->cr0.cr0_gt, ctx->cr0.cr0_eq), + enc(ctx->cr6.cr6_all_equal, ctx->cr6.cr6_1, ctx->cr6.cr6_none_equal), + static_cast(ctx->r[3]), static_cast(ctx->r[4]), + static_cast(ctx->r[5]), static_cast(ctx->r[6]), + static_cast(ctx->r[31]), tid); + return 0; +} + +// AUDIT-067: handler for trap codes [250, 254). arg0 carries trap idx +// (trap_code - 250), mapping to ::xe::cpu::audit67::vals()[idx]. Fired when +// a 4-byte guest store sees the configured value. The store-emit site stashed +// (pc << 32) | (ea & 0xFFFFFFFF) into ctx->scratch right before the trap. +static uint64_t TrapAudit67ValueWatch(void* raw_context, uint64_t idx) { + auto* ctx = reinterpret_cast(raw_context); + const auto& vals = ::xe::cpu::audit67::vals(); + uint32_t val = + (idx < vals.size()) ? vals[static_cast(idx)] : 0u; + uint32_t pc = static_cast(ctx->scratch >> 32); + uint32_t dst = static_cast(ctx->scratch & 0xFFFFFFFFu); + uint32_t tid = 0; + if (ctx->thread_state) { + tid = ctx->thread_state->thread_id(); + } + XELOGI( + "AUDIT-067-VAL pc={:08X} lr={:08X} val={:08X} dst={:08X} " + "r3={:08X} r4={:08X} r5={:08X} r6={:08X} r31={:08X} tid={}", + pc, static_cast(ctx->lr), val, dst, + static_cast(ctx->r[3]), static_cast(ctx->r[4]), + static_cast(ctx->r[5]), static_cast(ctx->r[6]), + static_cast(ctx->r[31]), tid); + return 0; +} + namespace xe { namespace cpu { namespace backend { @@ -455,6 +527,20 @@ void X64Emitter::Trap(uint16_t trap_type) { // ? break; default: + // AUDIT-067: trap codes [250, 254) dispatch the value-watch handler. + // arg0 = idx into ::xe::cpu::audit67::vals(). + if (trap_type >= 250 && trap_type < 254) { + CallNative(::TrapAudit67ValueWatch, + static_cast(trap_type - 250)); + break; + } + // AUDIT-061: trap codes [200, 232) dispatch the branch-probe handler. + // arg0 = idx into ::xe::cpu::audit61::pcs(). + if (trap_type >= 200 && trap_type < 232) { + CallNative(::TrapAudit61Branch, + static_cast(trap_type - 200)); + break; + } XELOGW("Unknown trap type {}", trap_type); db(0xCC); break; diff --git a/src/xenia/cpu/cpu_flags.cc b/src/xenia/cpu/cpu_flags.cc index 3ff067e15..2298dd3d7 100644 --- a/src/xenia/cpu/cpu_flags.cc +++ b/src/xenia/cpu/cpu_flags.cc @@ -57,3 +57,83 @@ DEFINE_bool(break_condition_truncate, true, "truncate value to 32-bits", "CPU"); DEFINE_bool(break_on_debugbreak, true, "int3 on JITed __debugbreak requests.", "CPU"); + +// AUDIT-DEMO: smoke marker (memory entry: emulator.cc:225,283). Always-on bool. +DEFINE_bool(audit_demo_setup_trace, true, + "Audit smoke marker: log AUDIT-DEMO-SETUP-BEGIN at emulator setup.", + "Audit"); + +// AUDIT-061: comma-separated list of guest PCs to log on each fire. +// Format: "0xPC1,0xPC2,..." (max 32 PCs). Each fire emits +// AUDIT-061-BR pc=X lr=X cr0=LGE cr6=LGE r3=X r4=X r5=X r6=X r31=X tid=N. +// Default empty (off); no perf cost when empty. +DEFINE_string(audit_61_branch_probe_pcs, "", + "AUDIT-061: CSV of guest PCs to trace (cr0/cr6 + regs/tid).", + "Audit"); + +// AUDIT-067: comma-separated list of u32 values to watch. When non-empty, +// every 4-byte guest store (stw/stwu/stwx/stwux/stmw) emits a runtime +// equality check; matches log AUDIT-067-VAL pc=X lr=X val=X dst=X r3..r6 r31 tid=N. +// Max 4 values. Default empty (off); zero overhead when empty. +DEFINE_string(audit_67_value_watch, "", + "AUDIT-067: CSV of u32 values (max 4) — log every guest " + "store whose value matches.", + "Audit"); + +// AUDIT-068: host-side memory-write watch. See cpu_flags.h header for format. +// Mirrors AUDIT-067 but covers host-side writes (xe::store_and_swap, +// Memory::Zero/Fill/Copy). Empty default = zero cost. +DEFINE_string(audit_68_host_mem_watch_values, "", + "AUDIT-068: CSV of u32 values (max 8) — log every host-side " + "guest-memory write whose value matches.", + "Audit"); +DEFINE_string(audit_68_host_mem_watch_addrs, "", + "AUDIT-068: CSV of guest VAs or VA ranges 'START-END' (max 8) " + "— log every host-side guest-memory write whose guest VA falls " + "within the configured set.", + "Audit"); + +// AUDIT-068 Session 3: read-mode probe. See cpu_flags.h for format. +DEFINE_string(audit_68_host_mem_read_probe, "", + "AUDIT-068 Session 3: CSV of 'VA:SIZE:PERIOD_NS' tuples (max 8) " + "— a dedicated poll thread reads the value at each VA every " + "PERIOD_NS and emits AUDIT-068-READ-CHANGE on transition.", + "Audit"); + +// Phase A — see kernel/event_log.h. +DEFINE_string(phase_a_event_log_path, "", + "Phase A: write schema-v1 JSONL event log to this path. " + "Empty (default) = disabled.", + "Audit"); +DEFINE_bool(phase_a_event_log_mem_writes, false, + "Phase A: include mem.write events in the JSONL log. RESERVED — " + "not wired in this phase. Default false.", + "Audit"); + +// Phase D Stage 1 — see kernel/event_log.h `EmitContentionObserved`. +DEFINE_bool(kernel_emit_contention, false, + "Phase D Stage 1: emit `contention.observed` events when " + "RtlEnterCriticalSection's spin loop is exhausted and the call " + "falls through to xeKeWaitForSingleObject. Default false (zero " + "cost when disabled). Requires --phase_a_event_log_path to be " + "set as well.", + "Audit"); + +// Phase B — see kernel/phase_b_snapshot.h. +DEFINE_string(phase_b_snapshot_dir, "", + "Phase B: write 5-file structured state snapshot to " + "/canary/ at the moment immediately before the first " + "guest PPC instruction of entry_point. Empty (default) = " + "disabled, zero overhead.", + "Audit"); +DEFINE_bool(phase_b_snapshot_and_exit, false, + "Phase B: after writing the snapshot, exit the process " + "immediately (std::_Exit(0)) so re-runs are byte-deterministic.", + "Audit"); +DEFINE_bool(phase_b_dump_section_content, false, + "Phase B: in memory.json, populate section_contents[].content_b64 " + "with raw bytes of every committed XEX-image region. Default " + "false — per-region SHA-256 is enough for the routine diff; " + "this is the escape hatch for the STOP-and-report condition " + "(image_loaded_sha256 mismatch).", + "Audit"); diff --git a/src/xenia/cpu/cpu_flags.h b/src/xenia/cpu/cpu_flags.h index 38c4f98ba..9b5ca7a1c 100644 --- a/src/xenia/cpu/cpu_flags.h +++ b/src/xenia/cpu/cpu_flags.h @@ -35,4 +35,52 @@ DECLARE_bool(break_condition_truncate); DECLARE_bool(break_on_debugbreak); +// AUDIT-DEMO smoke marker. +DECLARE_bool(audit_demo_setup_trace); + +// AUDIT-061: multi-PC branch probe — emits one log line per fire with +// (pc, lr, cr0 LGE, cr6 LGE, r3, r4, r5, r6, r31, tid). CSV of guest PCs. +DECLARE_string(audit_61_branch_probe_pcs); + +// AUDIT-067: value-watch — emit a log line for each 32-bit guest store whose +// value-to-be-stored matches any configured value. CSV of u32 values +// ("0xDEADBEEF,..."), max 4 entries. Default empty (off); zero cost when empty. +DECLARE_string(audit_67_value_watch); + +// AUDIT-068: host-side memory-write watch — emit a log line for each host-side +// write to guest memory whose VALUE matches any configured u32 value, or whose +// guest VA falls within any configured ADDR or ADDR-range. Mirrors AUDIT-067 +// but covers the host-side write paths (xe::store_and_swap, Memory::Zero/ +// Fill/Copy) that AUDIT-067's JIT store-opcode hooks cannot see. +// +// VALUES: CSV of u32 values, max 8 entries; e.g. "0x8200A208,0x8200A928". +// ADDRS: CSV of guest VAs or VA ranges, max 8 entries; range form is +// "0xSTART-0xEND" (inclusive). e.g. "0x42500000-0x42600000,0xBCE25340". +// Default empty (off); zero cost on the hot path when both are empty. +DECLARE_string(audit_68_host_mem_watch_values); +DECLARE_string(audit_68_host_mem_watch_addrs); + +// AUDIT-068 Session 3: read-mode probe. CSV of "VA:SIZE:PERIOD_NS" tuples +// (max 8). A dedicated low-priority thread polls each VA every PERIOD_NS and +// emits AUDIT-068-READ-CHANGE when the value transitions. SIZE in {1,2,4,8}. +// Example: "0xBCE25340:4:1000000" = poll u32 at 0xBCE25340 every 1 ms. +// Default empty (off); the poll thread is not spawned when empty. +DECLARE_string(audit_68_host_mem_read_probe); + +// Phase A: JSONL event-log emitter path. When non-empty, the engine writes +// schema-v1 JSONL events to this file. Empty (default) = no overhead, no +// behavior change. Schema: xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md +DECLARE_string(phase_a_event_log_path); +DECLARE_bool(phase_a_event_log_mem_writes); + +// Phase B: initial-state snapshot. When the dir cvar is non-empty, the +// engine writes a five-file structured state snapshot (cpu_state.json, +// memory.json, kernel.json, vfs.json, config.json, plus manifest.json) to +// `/canary/` at the moment immediately before the first guest PPC +// instruction of the XEX entry_point executes. See +// `xenia-rs/audit-runs/phase-b-state-equivalence/`. +DECLARE_string(phase_b_snapshot_dir); +DECLARE_bool(phase_b_snapshot_and_exit); +DECLARE_bool(phase_b_dump_section_content); + #endif // XENIA_CPU_CPU_FLAGS_H_ diff --git a/src/xenia/cpu/ppc/ppc_emit_altivec.cc b/src/xenia/cpu/ppc/ppc_emit_altivec.cc index 513b21391..c9af025ff 100644 --- a/src/xenia/cpu/ppc/ppc_emit_altivec.cc +++ b/src/xenia/cpu/ppc/ppc_emit_altivec.cc @@ -9,12 +9,28 @@ #include "xenia/cpu/ppc/ppc_emit-private.h" +#include #include "xenia/base/assert.h" +#include "xenia/cpu/cpu_flags.h" #include "xenia/cpu/ppc/ppc_context.h" #include "xenia/cpu/ppc/ppc_hir_builder.h" #include +// AUDIT-067: forward-decls. Defined in ppc_emit_memory.cc / ppc_hir_builder.cc. +namespace xe { +namespace cpu { +namespace audit67 { +const std::vector& vals(); +} +namespace ppc { +void EmitAudit67ValueWatchVec(PPCHIRBuilder& f, uint32_t pc, + ::xe::cpu::hir::Value* vec128, + ::xe::cpu::hir::Value* ea); +} +} +} + namespace xe { namespace cpu { namespace ppc { @@ -175,6 +191,21 @@ int InstrEmit_stvewx_(PPCHIRBuilder& f, const InstrData& i, uint32_t vd, f.Shr(f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantUint8(0xF)), 2); Value* v = f.Extract(f.LoadVR(vd), el, INT32_TYPE); f.Store(ea, f.ByteSwap(v)); + if (!::xe::cpu::audit67::vals().empty()) { + // For stvewx: only one lane is actually stored; piggyback on the scalar + // value-watch helper by emitting the equivalent of stw of v at ea. + Value* pc_hi64 = + f.LoadConstantUint64(static_cast(i.address) << 32); + Value* ea_lo64 = f.ZeroExtend(f.Truncate(ea, INT32_TYPE), INT64_TYPE); + Value* packed = f.Or(pc_hi64, ea_lo64); + const auto& vals = ::xe::cpu::audit67::vals(); + for (size_t idx = 0; idx < vals.size(); ++idx) { + Value* cmp = f.CompareEQ(v, f.LoadConstantUint32(vals[idx])); + f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed); + f.ContextBarrier(); + f.TrapTrue(cmp, static_cast(250 + idx)); + } + } return 0; } int InstrEmit_stvewx(PPCHIRBuilder& f, const InstrData& i) { @@ -187,7 +218,11 @@ int InstrEmit_stvewx128(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_stvx_(PPCHIRBuilder& f, const InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { Value* ea = f.And(CalculateEA_0(f, ra, rb), f.LoadConstantUint64(~0xFull)); - f.Store(ea, f.ByteSwap(f.LoadVR(vd))); + Value* vec = f.LoadVR(vd); + f.Store(ea, f.ByteSwap(vec)); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatchVec(f, i.address, vec, ea); + } return 0; } int InstrEmit_stvx(PPCHIRBuilder& f, const InstrData& i) { diff --git a/src/xenia/cpu/ppc/ppc_emit_memory.cc b/src/xenia/cpu/ppc/ppc_emit_memory.cc index b4bdabb49..a6b44697d 100644 --- a/src/xenia/cpu/ppc/ppc_emit_memory.cc +++ b/src/xenia/cpu/ppc/ppc_emit_memory.cc @@ -10,11 +10,22 @@ #include "xenia/cpu/ppc/ppc_emit-private.h" #include +#include #include "xenia/base/assert.h" #include "xenia/base/cvar.h" +#include "xenia/cpu/cpu_flags.h" #include "xenia/cpu/ppc/ppc_context.h" #include "xenia/cpu/ppc/ppc_hir_builder.h" +// AUDIT-067: forward-decl of value-watch table (defined in ppc_hir_builder.cc). +namespace xe { +namespace cpu { +namespace audit67 { +const std::vector& vals(); +} // namespace audit67 +} // namespace cpu +} // namespace xe + DEFINE_bool( disable_prefetch_and_cachecontrol, true, "Disables translating ppc prefetch/cache flush instructions to host " @@ -67,6 +78,90 @@ void StoreEA(PPCHIRBuilder& f, uint32_t rt, Value* ea) { f.StoreGPR(rt, ea); } +// AUDIT-067: emit a runtime equality check on the 32-bit value-to-be-stored +// against each configured watch value. On match, store (pc, EA) packed into +// the PPCContext scratch field so the native trap handler can read them, +// then fire a trap with code (kTrapBase + idx). Done host-side as a +// build-time pc constant + a runtime EA truncate, packed as +// (pc << 32) | (ea & 0xFFFFFFFF) so the handler can decompose. +static void EmitAudit67ValueWatch(PPCHIRBuilder& f, uint32_t pc, Value* val32, + Value* ea) { + const auto& vals = ::xe::cpu::audit67::vals(); + if (vals.empty()) return; + // pc is known at JIT time → emit as constant; ea is runtime. + Value* pc_hi64 = f.LoadConstantUint64(static_cast(pc) << 32); + Value* ea_lo64 = f.ZeroExtend(f.Truncate(ea, INT32_TYPE), INT64_TYPE); + Value* packed = f.Or(pc_hi64, ea_lo64); + for (size_t idx = 0; idx < vals.size(); ++idx) { + Value* cmp = f.CompareEQ(val32, f.LoadConstantUint32(vals[idx])); + f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed); + f.ContextBarrier(); + f.TrapTrue(cmp, static_cast(250 + idx)); + } +} + +// AUDIT-067 128-bit (vector) variant: checks each of the 4 32-bit lanes in a +// vector store. Used for stvx/stvxl/stvewx (memcpy-derived installs may use +// 128-bit vector stores). The matched lane is reflected in the dst by +// adding (lane * 4) so the handler can see exactly where in memory the +// value lands. Declared with external linkage so altivec.cc can call it. +void EmitAudit67ValueWatchVec(PPCHIRBuilder& f, uint32_t pc, + Value* vec128, Value* ea) { + const auto& vals = ::xe::cpu::audit67::vals(); + if (vals.empty()) return; + Value* pc_hi64 = f.LoadConstantUint64(static_cast(pc) << 32); + for (size_t idx = 0; idx < vals.size(); ++idx) { + Value* watch = f.LoadConstantUint32(vals[idx]); + for (uint8_t lane = 0; lane < 4; ++lane) { + Value* lane_val = f.Extract(vec128, lane, INT32_TYPE); + Value* cmp = f.CompareEQ(lane_val, watch); + Value* lane_off = f.LoadConstantUint32(static_cast(lane * 4)); + Value* dst32 = f.Add(f.Truncate(ea, INT32_TYPE), lane_off); + Value* packed = f.Or(pc_hi64, f.ZeroExtend(dst32, INT64_TYPE)); + f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed); + f.ContextBarrier(); + f.TrapTrue(cmp, static_cast(250 + idx)); + } + } +} + +// AUDIT-067 64-bit variant: same as above but checks BOTH halves of a 64-bit +// stored value. EA points at the start of the 8-byte store; the matched half +// is encoded into the trap idx via (250 + 2*idx + half), where half=0 means +// upper 32 bits (lower address), half=1 means lower 32 bits (upper address). +static void EmitAudit67ValueWatch64(PPCHIRBuilder& f, uint32_t pc, Value* val64, + Value* ea) { + const auto& vals = ::xe::cpu::audit67::vals(); + if (vals.empty()) return; + // PowerPC is big-endian: u64 stored at EA places upper-32 bits at EA+0 + // and lower-32 bits at EA+4. Check both halves against each watch value. + Value* upper32 = f.Truncate(f.Shr(val64, int8_t(32)), INT32_TYPE); // bits[63:32] + Value* lower32 = f.Truncate(val64, INT32_TYPE); // bits[31:0] + Value* pc_hi64 = f.LoadConstantUint64(static_cast(pc) << 32); + for (size_t idx = 0; idx < vals.size(); ++idx) { + // Upper half lands at EA+0. + { + Value* cmp = f.CompareEQ(upper32, f.LoadConstantUint32(vals[idx])); + Value* ea_lo64 = f.ZeroExtend(f.Truncate(ea, INT32_TYPE), INT64_TYPE); + Value* packed = f.Or(pc_hi64, ea_lo64); + f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed); + f.ContextBarrier(); + f.TrapTrue(cmp, static_cast(250 + idx)); + } + // Lower half lands at EA+4. + { + Value* cmp = f.CompareEQ(lower32, f.LoadConstantUint32(vals[idx])); + Value* ea_plus4 = + f.Add(f.Truncate(ea, INT32_TYPE), f.LoadConstantUint32(4)); + Value* ea_lo64 = f.ZeroExtend(ea_plus4, INT64_TYPE); + Value* packed = f.Or(pc_hi64, ea_lo64); + f.StoreContext(offsetof(::xe::cpu::ppc::PPCContext, scratch), packed); + f.ContextBarrier(); + f.TrapTrue(cmp, static_cast(250 + idx)); + } + } +} + // Integer load (A-13) int InstrEmit_lbz(PPCHIRBuilder& f, const InstrData& i) { @@ -518,9 +613,11 @@ int InstrEmit_stw(PPCHIRBuilder& f, const InstrData& i) { b = f.LoadGPR(i.D.RA); } Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS)); - f.StoreOffset(b, offset, - f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE))); - + Value* val32 = f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE); + f.StoreOffset(b, offset, f.ByteSwap(val32)); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch(f, i.address, val32, f.Add(b, offset)); + } return 0; } @@ -532,10 +629,14 @@ int InstrEmit_stmw(PPCHIRBuilder& f, const InstrData& i) { b = f.LoadGPR(i.D.RA); } + const bool watch_active = !::xe::cpu::audit67::vals().empty(); for (uint32_t j = 0; j < 32 - i.D.RT; ++j) { Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS) + j * 4); - f.StoreOffset(b, offset, - f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT + j), INT32_TYPE))); + Value* val32 = f.Truncate(f.LoadGPR(i.D.RT + j), INT32_TYPE); + f.StoreOffset(b, offset, f.ByteSwap(val32)); + if (watch_active) { + EmitAudit67ValueWatch(f, i.address, val32, f.Add(b, offset)); + } } return 0; } @@ -545,8 +646,12 @@ int InstrEmit_stwu(PPCHIRBuilder& f, const InstrData& i) { // MEM(EA, 4) <- (RS)[32:63] // RA <- EA Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS)); - f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE))); + Value* val32 = f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE); + f.Store(ea, f.ByteSwap(val32)); StoreEA(f, i.D.RA, ea); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch(f, i.address, val32, ea); + } return 0; } @@ -555,8 +660,12 @@ int InstrEmit_stwux(PPCHIRBuilder& f, const InstrData& i) { // MEM(EA, 4) <- (RS)[32:63] // RA <- EA Value* ea = CalculateEA(f, i.X.RA, i.X.RB); - f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE))); + Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE); + f.Store(ea, f.ByteSwap(val32)); StoreEA(f, i.X.RA, ea); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch(f, i.address, val32, ea); + } return 0; } @@ -568,7 +677,11 @@ int InstrEmit_stwx(PPCHIRBuilder& f, const InstrData& i) { // EA <- b + (RB) // MEM(EA, 4) <- (RS)[32:63] Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); - f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE))); + Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE); + f.Store(ea, f.ByteSwap(val32)); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch(f, i.address, val32, ea); + } return 0; } @@ -587,7 +700,11 @@ int InstrEmit_std(PPCHIRBuilder& f, const InstrData& i) { } Value* offset = f.LoadConstantInt64(XEEXTS16(i.DS.DS << 2)); - f.StoreOffset(b, offset, f.ByteSwap(f.LoadGPR(i.DS.RT))); + Value* val64 = f.LoadGPR(i.DS.RT); + f.StoreOffset(b, offset, f.ByteSwap(val64)); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch64(f, i.address, val64, f.Add(b, offset)); + } return 0; } @@ -596,8 +713,12 @@ int InstrEmit_stdu(PPCHIRBuilder& f, const InstrData& i) { // MEM(EA, 8) <- (RS) // RA <- EA Value* ea = CalculateEA_i(f, i.DS.RA, XEEXTS16(i.DS.DS << 2)); - f.Store(ea, f.ByteSwap(f.LoadGPR(i.DS.RT))); + Value* val64 = f.LoadGPR(i.DS.RT); + f.Store(ea, f.ByteSwap(val64)); StoreEA(f, i.DS.RA, ea); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch64(f, i.address, val64, ea); + } return 0; } @@ -606,8 +727,12 @@ int InstrEmit_stdux(PPCHIRBuilder& f, const InstrData& i) { // MEM(EA, 8) <- (RS) // RA <- EA Value* ea = CalculateEA(f, i.X.RA, i.X.RB); - f.Store(ea, f.ByteSwap(f.LoadGPR(i.X.RT))); + Value* val64 = f.LoadGPR(i.X.RT); + f.Store(ea, f.ByteSwap(val64)); StoreEA(f, i.X.RA, ea); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch64(f, i.address, val64, ea); + } return 0; } @@ -619,7 +744,11 @@ int InstrEmit_stdx(PPCHIRBuilder& f, const InstrData& i) { // EA <- b + (RB) // MEM(EA, 8) <- (RS) Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); - f.Store(ea, f.ByteSwap(f.LoadGPR(i.X.RT))); + Value* val64 = f.LoadGPR(i.X.RT); + f.Store(ea, f.ByteSwap(val64)); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch64(f, i.address, val64, ea); + } return 0; } @@ -684,7 +813,11 @@ int InstrEmit_stwbrx(PPCHIRBuilder& f, const InstrData& i) { // EA <- b + (RB) // MEM(EA, 4) <- bswap((RS)[32:63]) Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); - f.Store(ea, f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)); + Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE); + f.Store(ea, val32); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch(f, i.address, val32, ea); + } return 0; } @@ -696,7 +829,11 @@ int InstrEmit_stdbrx(PPCHIRBuilder& f, const InstrData& i) { // EA <- b + (RB) // MEM(EA, 8) <- bswap(RS) Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); - f.Store(ea, f.LoadGPR(i.X.RT)); + Value* val64 = f.LoadGPR(i.X.RT); + f.Store(ea, val64); + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch64(f, i.address, val64, ea); + } return 0; } @@ -843,7 +980,8 @@ int InstrEmit_stdcx(PPCHIRBuilder& f, const InstrData& i) { // This will always succeed if under the global lock, however. Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); - Value* rt = f.ByteSwap(f.LoadGPR(i.X.RT)); + Value* val64 = f.LoadGPR(i.X.RT); + Value* rt = f.ByteSwap(val64); if (cvars::no_reserved_ops) { f.Store(ea, rt); @@ -862,6 +1000,9 @@ int InstrEmit_stdcx(PPCHIRBuilder& f, const InstrData& i) { if (!cvars::no_reserved_ops) { f.MemoryBarrier(); } + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch64(f, i.address, val64, ea); + } return 0; } @@ -885,7 +1026,8 @@ int InstrEmit_stwcx(PPCHIRBuilder& f, const InstrData& i) { Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); - Value* rt = f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)); + Value* val32 = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE); + Value* rt = f.ByteSwap(val32); if (cvars::no_reserved_ops) { f.Store(ea, rt); @@ -904,7 +1046,9 @@ int InstrEmit_stwcx(PPCHIRBuilder& f, const InstrData& i) { if (!cvars::no_reserved_ops) { f.MemoryBarrier(); } - + if (!::xe::cpu::audit67::vals().empty()) { + EmitAudit67ValueWatch(f, i.address, val32, ea); + } return 0; } // Floating-point load (A-19) diff --git a/src/xenia/cpu/ppc/ppc_hir_builder.cc b/src/xenia/cpu/ppc/ppc_hir_builder.cc index 42d996cba..e2f7a45db 100644 --- a/src/xenia/cpu/ppc/ppc_hir_builder.cc +++ b/src/xenia/cpu/ppc/ppc_hir_builder.cc @@ -34,6 +34,97 @@ DEFINE_bool( "unimplemented PowerPC instruction is encountered.", "CPU"); +// AUDIT-061 — multi-PC branch probe. Parses cvars::audit_61_branch_probe_pcs +// once and exposes a (pc -> trap_id) lookup table. trap_id range [200, 65535]. +// PCs outside the table are not probed. Native side reads g_audit61_pcs[idx]. +#include +#include +namespace xe { +namespace cpu { +namespace audit61 { +constexpr uint16_t kTrapBase = 200; +constexpr size_t kMaxPcs = 32; +static std::vector g_pcs; +static bool g_parsed = false; + +const std::vector& pcs() { + if (!g_parsed) { + g_parsed = true; + const std::string& csv = cvars::audit_61_branch_probe_pcs; + size_t pos = 0; + while (pos < csv.size() && g_pcs.size() < kMaxPcs) { + size_t end = csv.find(',', pos); + std::string tok = csv.substr(pos, end - pos); + // strip whitespace + while (!tok.empty() && (tok.front() == ' ' || tok.front() == '\t')) + tok.erase(tok.begin()); + while (!tok.empty() && (tok.back() == ' ' || tok.back() == '\t')) + tok.pop_back(); + if (!tok.empty()) { + try { + uint32_t v = static_cast(std::stoul(tok, nullptr, 0)); + g_pcs.push_back(v); + } catch (...) { + } + } + if (end == std::string::npos) break; + pos = end + 1; + } + } + return g_pcs; +} + +// Returns trap id for pc, or 0 if pc not in probe set. +uint16_t trap_id_for(uint32_t pc) { + const auto& v = pcs(); + for (size_t i = 0; i < v.size(); ++i) { + if (v[i] == pc) return static_cast(kTrapBase + i); + } + return 0; +} +} // namespace audit61 + +// AUDIT-067 — value-watch. Parses cvars::audit_67_value_watch once, exposes +// values via vals(). Trap codes for matches start at kTrapBase = 250. +namespace audit67 { +constexpr uint16_t kTrapBase = 250; +constexpr size_t kMaxVals = 4; +static std::vector g_vals; +static bool g_parsed = false; + +const std::vector& vals() { + if (!g_parsed) { + g_parsed = true; + const std::string& csv = cvars::audit_67_value_watch; + size_t pos = 0; + while (pos < csv.size() && g_vals.size() < kMaxVals) { + size_t end = csv.find(',', pos); + std::string tok = csv.substr(pos, end - pos); + while (!tok.empty() && (tok.front() == ' ' || tok.front() == '\t')) + tok.erase(tok.begin()); + while (!tok.empty() && (tok.back() == ' ' || tok.back() == '\t')) + tok.pop_back(); + if (!tok.empty()) { + try { + uint32_t v = static_cast(std::stoul(tok, nullptr, 0)); + g_vals.push_back(v); + } catch (...) { + } + } + if (end == std::string::npos) break; + pos = end + 1; + } + XELOGI("AUDIT-067-INIT csv=\"{}\" parsed_count={}", csv, g_vals.size()); + for (size_t i = 0; i < g_vals.size(); ++i) { + XELOGI("AUDIT-067-INIT vals[{}] = 0x{:08X}", i, g_vals[i]); + } + } + return g_vals; +} +} // namespace audit67 +} // namespace cpu +} // namespace xe + namespace xe { namespace cpu { namespace ppc { @@ -174,6 +265,20 @@ bool PPCHIRBuilder::Emit(GuestFunction* function, uint32_t flags) { MaybeBreakOnInstruction(address); + // AUDIT-061: emit a trap before this instruction if it's on the probe + // list. The trap fires BEFORE the cmp/branch HIR emit so the native + // handler observes cr0/cr6 set by the *previous* instruction (the cmp + // that controls this conditional branch). ContextBarrier flushes + // HIR temporaries to PPCContext so the handler reads consistent state. + if (!::xe::cpu::audit61::pcs().empty()) { + uint16_t tid = ::xe::cpu::audit61::trap_id_for(address); + if (tid != 0) { + Comment("--audit_61_branch_probe target"); + ContextBarrier(); + Trap(tid); + } + } + InstrData i; i.address = address; i.code = code; diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc index 1034dcac7..38148010c 100644 --- a/src/xenia/cpu/xex_module.cc +++ b/src/xenia/cpu/xex_module.cc @@ -51,6 +51,38 @@ DECLARE_bool(allow_plugins); DECLARE_bool(disable_context_promotion); +// AUDIT-068 Session 2: helper that scans a raw byte buffer for 4-byte aligned +// u32 values that match the configured audit_68 value list, emitting a +// per-position event. Used to pre-scan XEX-loader memcpys that bypass all +// other hooked surfaces. Cost when off: a single relaxed atomic load. +static inline void audit68_prescan_memcpy(uint32_t guest_va_dest, + const uint8_t* src, size_t size, + const char* tag) { + uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed); + if (active == 0) return; + if ((active & 0x1) && size >= 4) { + size_t aligned_end = size & ~size_t(3); + for (size_t i = 0; i < aligned_end; i += 4) { + uint32_t be_u32 = (uint32_t(src[i + 0]) << 24) | + (uint32_t(src[i + 1]) << 16) | + (uint32_t(src[i + 2]) << 8) | uint32_t(src[i + 3]); + xe::audit_68::check_guest_va( + static_cast(guest_va_dest + i), be_u32, 4, tag); + } + } + if (active & 0x2) { + // Coarse addr-only event over the full span (dest only). + uint64_t v = 0; + if (size >= 4) { + v = (uint64_t(src[0]) << 24) | (uint64_t(src[1]) << 16) | + (uint64_t(src[2]) << 8) | uint64_t(src[3]); + } + xe::audit_68::check_guest_va(guest_va_dest, v, + static_cast(std::min(size, 8)), + tag); + } +} + static constexpr uint8_t xe_xex1_retail_key[16] = { 0xA2, 0x6C, 0x10, 0xF7, 0x1F, 0xD9, 0x35, 0xE9, 0x8B, 0x99, 0x92, 0x2C, 0xE9, 0x32, 0x15, 0x72}; @@ -424,6 +456,10 @@ int XexModule::ApplyPatch(XexModule* module) { // If image_source_offset is set, copy [source_offset:source_size] to // target_offset if (patch_header->delta_image_source_offset) { + audit68_prescan_memcpy( + module->base_address_ + patch_header->delta_image_target_offset, + base_exe + patch_header->delta_image_source_offset, + patch_header->delta_image_source_size, "xex_memcpy_patch"); memcpy(base_exe + patch_header->delta_image_target_offset, base_exe + patch_header->delta_image_source_offset, patch_header->delta_image_source_size); @@ -589,6 +625,8 @@ int XexModule::ReadImageUncompressed(const void* xex_addr, size_t xex_length) { if (exe_length > uncompressed_size) { return 1; } + audit68_prescan_memcpy(base_address_, p, exe_length, + "xex_memcpy_uncompressed"); memcpy(buffer, p, exe_length); return 0; case XEX_ENCRYPTION_NORMAL: @@ -665,6 +703,9 @@ int XexModule::ReadImageBasicCompressed(const void* xex_addr, // Overflow. return 1; } + audit68_prescan_memcpy( + base_address_ + static_cast(d - buffer), p, data_size, + "xex_memcpy_basic_block"); memcpy(d, p, data_size); break; case XEX_ENCRYPTION_NORMAL: { @@ -799,6 +840,17 @@ int XexModule::ReadImageCompressed(const void* xex_addr, size_t xex_length) { result_code = lzx_decompress( compress_buffer, d - compress_buffer, buffer, uncompressed_size, compression_info->normal.window_size, nullptr, 0); + + // AUDIT-068 Session 2: lzx_decompress writes directly into guest + // memory via the host pointer `buffer`. There's no host-side hook + // covering its internal bulk writes, so post-scan the produced bytes + // to recover what the XEX loader actually placed at `base_address_`. + // This is THE most likely catch for the vtable install case (vtables + // live in the .rdata section that is part of the LZX-compressed image). + if (result_code == 0) { + audit68_prescan_memcpy(base_address_, buffer, uncompressed_size, + "xex_lzx_decompress_output"); + } } else { XELOGE("Unable to allocate XEX memory at {:08X}-{:08X}.", base_address_, uncompressed_size); diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index 22ba66aee..f02b11d7f 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -14,6 +14,7 @@ #include "third_party/fmt/include/fmt/format.h" #include "xenia/base/assert.h" +#include "xenia/base/audit_68_host_mem_watch_fwd.h" #include "xenia/base/byte_stream.h" #include "xenia/base/clock.h" #include "xenia/base/cvar.h" @@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) { static Memory* active_memory_ = nullptr; +// AUDIT-068 — process-global accessor (declared in memory.h). +Memory* Memory::active() { return active_memory_; } + void CrashDump() { static std::atomic in_crash_dump(0); if (in_crash_dump.fetch_add(1)) { @@ -151,11 +155,41 @@ Memory::Memory() { uint32_t(xe::memory::allocation_granularity()); assert_zero(active_memory_); active_memory_ = this; + + // AUDIT-068: register host→guest translation thunk so the watch slow path + // in xenia-base can resolve guest VAs without depending on xenia-core. + xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t { + Memory* m = active_memory_; + return m ? m->HostToGuestVirtual(host_ptr) : 0u; + }; + + // AUDIT-068 Session 3: register guest→host translation thunk and a + // page-protect query thunk for the read-mode probe. The probe thread uses + // QueryProtect to skip unmapped/uncommitted pages before dereferencing. + xe::audit_68::g_guest_to_host_thunk = [](uint32_t va) -> const void* { + Memory* m = active_memory_; + return m ? reinterpret_cast(m->TranslateVirtual(va)) + : nullptr; + }; + xe::audit_68::g_query_protect_thunk = [](uint32_t va, + uint32_t* out_protect) -> bool { + Memory* m = active_memory_; + if (!m) return false; + BaseHeap* heap = m->LookupHeap(va); + if (!heap) { + if (out_protect) *out_protect = 0; + return false; + } + return heap->QueryProtect(va, out_protect); + }; } Memory::~Memory() { assert_true(active_memory_ == this); active_memory_ = nullptr; + xe::audit_68::g_host_to_guest_thunk = nullptr; + xe::audit_68::g_guest_to_host_thunk = nullptr; + xe::audit_68::g_query_protect_thunk = nullptr; // Uninstall the MMIO handler, as we won't be able to service more // requests. @@ -540,16 +574,71 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const { } void Memory::Zero(uint32_t address, uint32_t size) { + // AUDIT-068: log a single span event with value=0; size is capped at 8 for + // the value field. Slow path is gated on the atomic flag. + xe::audit_68::check_guest_va(address, 0, + static_cast(std::min(size, 8)), + "Memory::Zero"); std::memset(TranslateVirtual(address), 0, size); } void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) { + // Replicate the fill byte across the value field so value_matches can + // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for + // capture purposes the byte itself in the low slot is enough. + uint64_t v = static_cast(value); + v |= v << 8; + v |= v << 16; + v |= v << 32; + xe::audit_68::check_guest_va(address, v, + static_cast(std::min(size, 8)), + "Memory::Fill"); std::memset(TranslateVirtual(address), value, size); } void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) { uint8_t* pdest = TranslateVirtual(dest); const uint8_t* psrc = TranslateVirtual(src); + // AUDIT-068 Session 2: full byte-scan over 4-byte aligned positions of the + // source buffer. Catches XEX-loader-style memcpys where a vptr (the target + // u32 value) is buried somewhere mid-buffer rather than at offset 0. Cost + // O(size/4 * N_values) with N_values capped at 8 inside value_matches — + // negligible vs the underlying memcpy throughput. + // + // Gated on active bit 0x1 (values-mode) AND active != 0. If only addrs are + // configured (Run 2 voice-struct mode), we still emit a single addr-only + // event covering the destination span so addr-watch isn't broken. + uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed); + if (active != 0) [[unlikely]] { + if ((active & 0x1) && size >= 4) { + // Scan source for any configured u32 value (big-endian, mirrors how + // guest sees the bytes). 4-byte aligned offsets only. + uint32_t aligned_end = size & ~3u; + for (uint32_t i = 0; i < aligned_end; i += 4) { + uint32_t be_u32 = + (uint32_t(psrc[i + 0]) << 24) | (uint32_t(psrc[i + 1]) << 16) | + (uint32_t(psrc[i + 2]) << 8) | uint32_t(psrc[i + 3]); + xe::audit_68::check_guest_va(dest + i, be_u32, 4, "Memory::Copy"); + } + } + if (active & 0x2) { + // Addr-only mode: emit a single coarse event tagged with the dest base + // and first u32 of source for context. The slow-path range check will + // log iff the dest span intersects a configured addr range. + uint64_t v = 0; + if (size >= 4) { + v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) | + (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]); + } else if (size > 0) { + for (uint32_t i = 0; i < size; ++i) { + v = (v << 8) | psrc[i]; + } + } + xe::audit_68::check_guest_va( + dest, v, static_cast(std::min(size, 8)), + "Memory::Copy"); + } + } std::memcpy(pdest, psrc, size); } diff --git a/src/xenia/memory.h b/src/xenia/memory.h index bd9519a40..fa712fe08 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -347,6 +347,13 @@ class Memory { Memory(); ~Memory(); + // AUDIT-068: process-global Memory singleton accessor. Returns the + // currently-constructed Memory instance, or nullptr if none. Set inside + // Memory::Memory()/~Memory(); see memory.cc `active_memory_`. Used by + // xe::audit_68::check_host_write() to translate a host pointer back to a + // guest VA without an explicit Memory* context. + static Memory* active(); + // Initializes the memory system. // This may fail if the host address space could not be reserved or the // mapping to the file system fails. # === new file: src/xenia/base/audit_68_host_mem_watch_fwd.h === /** ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** * AUDIT-068: host-side memory-write watch — forward declarations only. * * Declarations here are intentionally minimal so that xenia/base/memory.h can * include this without pulling in xenia/memory.h (which would create a * circular dependency: xenia-base → xenia-core → xenia-base). The full * definitions live in xenia/audit_68_host_mem_watch.{h,cc} (xenia-core). * * Hot path: callers (the integer specializations of xe::store_and_swap) * load the atomic flag once. When it is 0 (default), no further work is done * — a single relaxed atomic load and a predictable branch. ****************************************************************************** */ #ifndef XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_ #define XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_ #include #include namespace xe { namespace audit_68 { // 0 = inactive (default). Non-zero = the cvars have been parsed and at least // one watch is configured. Set lazily by check_host_write_slowpath() on first // call after cvar parsing. Loaded relaxed on the hot path. // // Implementation lives in xenia-base (audit_68_host_mem_watch_base.cc) so // that callers in xenia-base/xenia-cpu/xenia-kernel can resolve the symbol // without depending on xenia-core link order. extern std::atomic g_active; // Host-pointer → guest-VA translation thunk. xenia/memory.cc::Memory::Memory() // registers a function pointer here that wraps Memory::HostToGuestVirtual. // Until set, the slow path falls back to logging the raw host pointer. using HostToGuestThunk = uint32_t (*)(const void*); extern HostToGuestThunk g_host_to_guest_thunk; // AUDIT-068 Session 3 — read-mode probe support. // // Guest-VA → host-pointer translation thunk (wraps Memory::TranslateVirtual). // Used by the read-probe poll thread to sample bytes at configured guest VAs. // May return non-null even for unmapped/uncommitted VAs (the underlying // translation is arithmetic — virtual_membase_ + va) — callers MUST consult // the QueryProtect thunk before dereferencing. using GuestToHostThunk = const void* (*)(uint32_t); extern GuestToHostThunk g_guest_to_host_thunk; // Returns true iff the page containing `guest_va` is committed and readable; // out_protect receives the raw page protect bits (kProtectRead, etc.). Wraps // Memory::LookupHeap() + BaseHeap::QueryProtect(). Used as a guard before the // read-probe samples bytes (early-boot heap-not-yet-mapped path must NOT // crash). using QueryProtectThunk = bool (*)(uint32_t, uint32_t* /*out_protect*/); extern QueryProtectThunk g_query_protect_thunk; // Slow path. Only invoked when g_active is non-zero. Implementation in // xenia/base/audit_68_host_mem_watch_base.cc (xenia-base). // // host_ptr: the host pointer being written (from store_and_swap's `mem`). // value: the value being stored (zero-extended to u64). // size: 1, 2, 4 or 8. // tag: caller-provided tag string (e.g. "store_and_swap"). Logged // verbatim, no formatting. Must be a static string (lifetime // beyond this call). void check_host_write_slowpath(const void* host_ptr, uint64_t value, uint8_t size, const char* tag); // Same as above, but with a known guest VA (for callers like Memory::Zero/ // Fill/Copy that have the VA but not a single host pointer). void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size, const char* tag); // Inline hot-path wrappers. Single relaxed atomic load + branch when inactive. inline void check_host_write(const void* host_ptr, uint64_t value, uint8_t size, const char* tag) { if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] { check_host_write_slowpath(host_ptr, value, size, tag); } } inline void check_guest_va(uint32_t guest_va, uint64_t value, uint8_t size, const char* tag) { if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] { check_guest_va_slowpath(guest_va, value, size, tag); } } } // namespace audit_68 } // namespace xe #endif // XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_ # === new file: src/xenia/base/audit_68_host_mem_watch_base.cc === /** ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** * AUDIT-068 host-side memory-write watch — implementation (xenia-base). * * Mirrors AUDIT-067 in spirit (value-CSV cvar, lazy parse, atomic-bool * activation) but observes the HOST-side write paths instead of the JIT'd * guest store opcodes. Captures writes performed by xe::store_and_swap * (xenia/base/memory.h) and by Memory::Zero/Fill/Copy (xenia/memory.cc). * * Lives in xenia-base so that the slow-path symbols resolve for callers in * xenia-base / xenia-cpu / xenia-kernel without depending on xenia-core link * order. The host→guest VA translation is provided by a function-pointer * thunk that xenia::Memory::Memory() registers at construction. * * See xenia/base/audit_68_host_mem_watch_fwd.h for the API. * See xenia/cpu/cpu_flags.{h,cc} for the cvars. ****************************************************************************** */ #include "xenia/base/audit_68_host_mem_watch_fwd.h" #include #include #include #include #include #include #include #include #include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/threading.h" // We need the cvars but cpu_flags.h lives in xenia-cpu. To avoid an upward // dep we re-declare them here with the same macros — cvar.h's DECLARE_* // macros are header-safe (just `extern` declarations) and resolve against the // definitions in xenia-cpu/cpu_flags.cc at link time. (xenia-cpu links AFTER // xenia-base in the executable; symbols in xenia-cpu/cpu_flags.cc are still // resolvable from xenia-base translation units because the lld pass folds // all libraries together at the executable level.) DECLARE_string(audit_68_host_mem_watch_values); DECLARE_string(audit_68_host_mem_watch_addrs); DECLARE_string(audit_68_host_mem_read_probe); namespace xe { namespace audit_68 { // Hot-path flag (declared in fwd header). Initial sentinel UINT32_MAX means // "unparsed"; the very first slow-path call invokes ensure_parsed() which // replaces the sentinel with the actual active bitmask (0 if both cvars are // empty, 1/2/3 otherwise). After that, hot-path calls observe the real value // and bail out cheaply when off. std::atomic g_active{0xFFFFFFFFu}; // Host→guest VA translation thunk (declared in fwd header). Set by // xenia::Memory::Memory() at construction; reset to nullptr by ~Memory(). HostToGuestThunk g_host_to_guest_thunk{nullptr}; // AUDIT-068 Session 3: guest→host translation + page-protect query thunks. GuestToHostThunk g_guest_to_host_thunk{nullptr}; QueryProtectThunk g_query_protect_thunk{nullptr}; namespace { constexpr size_t kMaxValues = 8; constexpr size_t kMaxAddrRanges = 8; struct AddrRange { uint32_t start; // inclusive uint32_t end; // inclusive }; std::vector g_values; std::vector g_addrs; std::once_flag g_parsed_flag; std::chrono::steady_clock::time_point g_t0; std::once_flag g_t0_once; int64_t host_ns_since_start() { std::call_once(g_t0_once, []() { g_t0 = std::chrono::steady_clock::now(); }); return std::chrono::duration_cast( std::chrono::steady_clock::now() - g_t0) .count(); } void trim(std::string& s) { while (!s.empty() && (s.front() == ' ' || s.front() == '\t')) { s.erase(s.begin()); } while (!s.empty() && (s.back() == ' ' || s.back() == '\t')) { s.pop_back(); } } bool parse_u32(const std::string& tok, uint32_t* out) { try { *out = static_cast(std::stoul(tok, nullptr, 0)); return true; } catch (...) { return false; } } void parse_values_csv(const std::string& csv) { size_t pos = 0; while (pos < csv.size() && g_values.size() < kMaxValues) { size_t end = csv.find(',', pos); std::string tok = csv.substr(pos, end - pos); trim(tok); if (!tok.empty()) { uint32_t v; if (parse_u32(tok, &v)) { g_values.push_back(v); } } if (end == std::string::npos) break; pos = end + 1; } } void parse_addrs_csv(const std::string& csv) { size_t pos = 0; while (pos < csv.size() && g_addrs.size() < kMaxAddrRanges) { size_t end = csv.find(',', pos); std::string tok = csv.substr(pos, end - pos); trim(tok); if (!tok.empty()) { size_t dash = tok.find('-', 2); // skip leading "0x" if present AddrRange r{}; if (dash != std::string::npos) { std::string s = tok.substr(0, dash); std::string e = tok.substr(dash + 1); trim(s); trim(e); uint32_t a, b; if (parse_u32(s, &a) && parse_u32(e, &b)) { r.start = a; r.end = b; g_addrs.push_back(r); } } else { uint32_t a; if (parse_u32(tok, &a)) { r.start = a; r.end = a + 7; g_addrs.push_back(r); } } } if (end == std::string::npos) break; pos = end + 1; } } void parse_locked() { parse_values_csv(cvars::audit_68_host_mem_watch_values); parse_addrs_csv(cvars::audit_68_host_mem_watch_addrs); uint32_t bits = 0; if (!g_values.empty()) bits |= 0x1; if (!g_addrs.empty()) bits |= 0x2; g_active.store(bits, std::memory_order_release); XELOGI( "AUDIT-068-INIT values_csv=\"{}\" addrs_csv=\"{}\" values_parsed={} " "addr_ranges_parsed={} active=0x{:X}", cvars::audit_68_host_mem_watch_values, cvars::audit_68_host_mem_watch_addrs, g_values.size(), g_addrs.size(), bits); for (size_t i = 0; i < g_values.size(); ++i) { XELOGI("AUDIT-068-INIT value[{}] = 0x{:08X}", i, g_values[i]); } for (size_t i = 0; i < g_addrs.size(); ++i) { XELOGI("AUDIT-068-INIT addr_range[{}] = 0x{:08X}-0x{:08X}", i, g_addrs[i].start, g_addrs[i].end); } } bool value_matches(uint64_t value, uint8_t size) { for (uint32_t v : g_values) { if (size >= 4 && static_cast(value) == v) return true; if (size == 8 && static_cast(value >> 32) == v) return true; if (size == 2 && (v & 0xFFFF) == (value & 0xFFFF)) return true; if (size == 1 && (v & 0xFF) == (value & 0xFF)) return true; } return false; } bool addr_matches(uint32_t guest_va, uint8_t size) { uint32_t lo = guest_va; uint32_t hi = guest_va + (size ? size - 1 : 0); for (const auto& r : g_addrs) { if (lo <= r.end && hi >= r.start) return true; } return false; } uint32_t current_tid() { return xe::threading::current_thread_id(); } void emit(uint32_t guest_va, const void* host_ptr, uint64_t value, uint8_t size, const char* tag) { XELOGI( "AUDIT-068-HOST-WRITE guest_va=0x{:08X} host_ptr=0x{:016X} " "val=0x{:016X} sz={} fn={} host_ns={} tid={}", guest_va, reinterpret_cast(host_ptr), value, static_cast(size), tag ? tag : "", host_ns_since_start(), current_tid()); } // ===== AUDIT-068 Session 3 — read-mode probe state ===== constexpr size_t kMaxReadProbes = 8; struct ReadProbe { uint32_t guest_va; uint8_t size; // 1, 2, 4, 8 uint64_t period_ns; uint64_t last_value; bool last_was_valid; }; std::vector g_read_probes; std::atomic g_read_probe_thread_running{false}; std::atomic g_read_probe_shutdown{false}; std::thread g_read_probe_thread; std::once_flag g_read_probe_started; bool parse_read_probe_tok(const std::string& tok, ReadProbe* out) { // Expected form: "VA:SIZE:PERIOD_NS" — three colon-separated u64. size_t c1 = tok.find(':'); if (c1 == std::string::npos) return false; size_t c2 = tok.find(':', c1 + 1); if (c2 == std::string::npos) return false; std::string sva = tok.substr(0, c1); std::string ssz = tok.substr(c1 + 1, c2 - c1 - 1); std::string sper = tok.substr(c2 + 1); trim(sva); trim(ssz); trim(sper); try { out->guest_va = static_cast(std::stoul(sva, nullptr, 0)); uint32_t sz = static_cast(std::stoul(ssz, nullptr, 0)); if (sz != 1 && sz != 2 && sz != 4 && sz != 8) return false; out->size = static_cast(sz); out->period_ns = static_cast(std::stoull(sper, nullptr, 0)); if (out->period_ns < 1000) out->period_ns = 1000; // 1us floor. out->last_value = 0; out->last_was_valid = false; return true; } catch (...) { return false; } } void parse_read_probes_csv(const std::string& csv) { size_t pos = 0; while (pos < csv.size() && g_read_probes.size() < kMaxReadProbes) { size_t end = csv.find(',', pos); std::string tok = csv.substr(pos, end - pos); trim(tok); if (!tok.empty()) { ReadProbe rp{}; if (parse_read_probe_tok(tok, &rp)) { g_read_probes.push_back(rp); } } if (end == std::string::npos) break; pos = end + 1; } } uint64_t sample_at(uint32_t guest_va, uint8_t size, bool* out_valid) { *out_valid = false; if (!g_guest_to_host_thunk || !g_query_protect_thunk) return 0; uint32_t prot = 0; if (!g_query_protect_thunk(guest_va, &prot)) return 0; // Page must have at least read permission. The protect bits map to // xe::memory::PageAccess: kReadOnly=1, kReadWrite=2, kExecuteReadOnly=3, // kExecuteReadWrite=4. kNoAccess=0. Accept anything non-zero — caller // distinguishes via the second-pass change detector anyway. if (prot == 0) return 0; const void* hp = g_guest_to_host_thunk(guest_va); if (!hp) return 0; uint64_t v = 0; // Guest memory is big-endian. We use raw byte loads to avoid alignment // traps for size>4 on possibly-unaligned VAs. The "value" we log is the // host-endian interpretation of the BE bytes (matches store_and_swap's // logging convention: the byte-swapped scalar). const uint8_t* bp = reinterpret_cast(hp); switch (size) { case 1: v = bp[0]; break; case 2: v = (uint64_t(bp[0]) << 8) | bp[1]; break; case 4: v = (uint64_t(bp[0]) << 24) | (uint64_t(bp[1]) << 16) | (uint64_t(bp[2]) << 8) | bp[3]; break; case 8: v = (uint64_t(bp[0]) << 56) | (uint64_t(bp[1]) << 48) | (uint64_t(bp[2]) << 40) | (uint64_t(bp[3]) << 32) | (uint64_t(bp[4]) << 24) | (uint64_t(bp[5]) << 16) | (uint64_t(bp[6]) << 8) | bp[7]; break; } *out_valid = true; return v; } void read_probe_thread_main() { // Compute the GCD-ish min poll period across all probes; sleep that long // between scans. Each probe fires only when its own period_ns has elapsed // since the last sample (per-probe `next_fire_ns`). uint64_t min_period_ns = UINT64_MAX; for (const auto& p : g_read_probes) { if (p.period_ns < min_period_ns) min_period_ns = p.period_ns; } if (min_period_ns == UINT64_MAX) return; // Per-probe next-fire times. std::vector next_fire(g_read_probes.size(), 0); XELOGI( "AUDIT-068-READ-INIT probe_count={} min_period_ns={} thread spawned", g_read_probes.size(), min_period_ns); for (size_t i = 0; i < g_read_probes.size(); ++i) { XELOGI("AUDIT-068-READ-INIT probe[{}] va=0x{:08X} size={} period_ns={}", i, g_read_probes[i].guest_va, static_cast(g_read_probes[i].size), g_read_probes[i].period_ns); } while (!g_read_probe_shutdown.load(std::memory_order_relaxed)) { int64_t now_ns = host_ns_since_start(); for (size_t i = 0; i < g_read_probes.size(); ++i) { if (static_cast(now_ns) < next_fire[i]) continue; ReadProbe& rp = g_read_probes[i]; bool valid = false; uint64_t v = sample_at(rp.guest_va, rp.size, &valid); if (valid) { if (!rp.last_was_valid) { // First successful read: emit the initial value, do NOT call it a // "change" — but log so we know when the VA mapped. XELOGI( "AUDIT-068-READ-INITIAL va=0x{:08X} val=0x{:016X} sz={} " "host_ns={} tid=probe", rp.guest_va, v, static_cast(rp.size), now_ns); rp.last_value = v; rp.last_was_valid = true; } else if (v != rp.last_value) { XELOGI( "AUDIT-068-READ-CHANGE va=0x{:08X} old=0x{:016X} " "new=0x{:016X} sz={} host_ns={} tid=probe", rp.guest_va, rp.last_value, v, static_cast(rp.size), now_ns); rp.last_value = v; } } else if (rp.last_was_valid) { // Was valid, now invalid — page unmapped/reprotected. XELOGI( "AUDIT-068-READ-UNMAPPED va=0x{:08X} last=0x{:016X} sz={} " "host_ns={} tid=probe", rp.guest_va, rp.last_value, static_cast(rp.size), now_ns); rp.last_was_valid = false; } next_fire[i] = static_cast(now_ns) + rp.period_ns; } // Sleep until the next earliest fire, but no shorter than 1us and no // longer than min_period_ns (to keep shutdown latency bounded). uint64_t sleep_ns = min_period_ns; if (sleep_ns < 1000) sleep_ns = 1000; std::this_thread::sleep_for(std::chrono::nanoseconds(sleep_ns)); } XELOGI("AUDIT-068-READ-EXIT thread shutting down"); } void start_read_probe_thread_if_configured() { std::call_once(g_read_probe_started, []() { parse_read_probes_csv(cvars::audit_68_host_mem_read_probe); if (g_read_probes.empty()) return; if (!g_guest_to_host_thunk || !g_query_protect_thunk) { XELOGI( "AUDIT-068-READ-INIT thunks not ready (guest_to_host={} " "query_protect={}) — read probe deferred", (void*)g_guest_to_host_thunk, (void*)g_query_protect_thunk); return; } g_read_probe_thread_running.store(true, std::memory_order_release); g_read_probe_thread = std::thread(&read_probe_thread_main); g_read_probe_thread.detach(); // best-effort; daemon-style. }); } } // namespace void ensure_parsed() { std::call_once(g_parsed_flag, parse_locked); } void check_host_write_slowpath(const void* host_ptr, uint64_t value, uint8_t size, const char* tag) { // AUDIT-068 Session 2: defer parsing until Memory::Memory() has registered // the host→guest thunk. This guarantees the cmdline cvar override has been // applied AND the logging subsystem is alive before we latch g_active. // Without this gate, a be::set() call during static-init (e.g. from a // global initializer in another translation unit) would trigger // parse_locked() before cpu_flags.cc's cvar objects are constructed — // latching g_active=0 permanently and silencing the watch. HostToGuestThunk thunk = g_host_to_guest_thunk; if (!thunk) return; ensure_parsed(); // AUDIT-068 Session 3: lazy-start the read-probe poll thread. Same gate as // ensure_parsed() — must come after Memory::Memory() has registered the // thunks so the probe can read pages safely. start_read_probe_thread_if_configured(); uint32_t active = g_active.load(std::memory_order_acquire); if (active == 0) return; uint32_t guest_va = 0; if (thunk) { guest_va = thunk(host_ptr); } bool hit = false; if ((active & 0x1) && value_matches(value, size)) hit = true; if (!hit && (active & 0x2) && thunk && addr_matches(guest_va, size)) { hit = true; } if (!hit) return; emit(guest_va, host_ptr, value, size, tag); } void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size, const char* tag) { // AUDIT-068 Session 2: same static-init gate as check_host_write_slowpath. // Callers (Memory::Zero/Fill/Copy + xex_module audit68_prescan_memcpy) only // run after Memory::Memory(), but defensive in case of future expansion. if (!g_host_to_guest_thunk) return; ensure_parsed(); uint32_t active = g_active.load(std::memory_order_acquire); if (active == 0) return; bool hit = false; if ((active & 0x1) && value_matches(value, size)) hit = true; if (!hit && (active & 0x2) && addr_matches(guest_va, size)) hit = true; if (!hit) return; emit(guest_va, nullptr, value, size, tag); } } // namespace audit_68 } // namespace xe