handoff: VSync/event-wedge fixes + iterate 2.A–2.BC research notes
Source changes (dormant parity infra, retained from iterate 2.AI/2.AO): - xenia-kernel/exports.rs: nt_create_event manual_reset polarity + related event wiring - xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps (.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as regenerable local artifacts — see memory + HANDOFF for the running findings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
279
audit-runs/audit-068-host-mem-watch/fix-canary-v2.diff
Normal file
279
audit-runs/audit-068-host-mem-watch/fix-canary-v2.diff
Normal file
@@ -0,0 +1,279 @@
|
||||
# AUDIT-068 Session 2 — canary instrumentation extension diff
|
||||
#
|
||||
# Generated 2026-05-19. xenia-canary HEAD = 6de80dffe261b368ecefee36c9b2b337335228c0.
|
||||
# Session 1 changes are already in tree (see fix-canary.diff for the cumulative
|
||||
# Session 1 state). This diff is the post-Session-1 → post-Session-2 delta on
|
||||
# four files that Session 2 extended:
|
||||
# - src/xenia/base/byte_order.h (new — Step 1, +27 LOC, be<T>::set() hook)
|
||||
# - src/xenia/memory.cc (extended — Step 2 Memory::Copy byte-scan)
|
||||
# - src/xenia/cpu/xex_module.cc (new — Step 3, +35 LOC, xex_memcpy + lzx_decompress pre-scan)
|
||||
# - src/xenia/base/audit_68_host_mem_watch_base.cc (extended — static-init gate)
|
||||
#
|
||||
# Two of the four files (memory.cc, audit_68_host_mem_watch_base.cc) ALSO contain
|
||||
# Session 1 hooks. To see the pure Session 2 delta, diff against the post-Session-1
|
||||
# state of those files (recoverable from fix-canary.diff).
|
||||
#
|
||||
# byte_order.h was untouched by Session 1; the diff below for that file is purely
|
||||
# Session 2.
|
||||
# xex_module.cc was untouched by Session 1; ditto.
|
||||
#
|
||||
# Engine semantics: cvar-gated default-off, zero hot-path cost when off.
|
||||
# Total Session 2 additive: ~110 LOC.
|
||||
# Reading-error class #35 (Session 1) mitigated: see writer-report-v2.md Run 5.
|
||||
|
||||
diff --git a/src/xenia/base/byte_order.h b/src/xenia/base/byte_order.h
|
||||
index 5a076f319..c80ee0ffc 100644
|
||||
--- a/src/xenia/base/byte_order.h
|
||||
+++ b/src/xenia/base/byte_order.h
|
||||
@@ -11,6 +11,7 @@
|
||||
#define XENIA_BASE_BYTE_ORDER_H_
|
||||
|
||||
#include <cstdint>
|
||||
+#include <type_traits>
|
||||
#if defined __has_include
|
||||
#if __has_include(<version>)
|
||||
#include <version>
|
||||
@@ -21,6 +22,7 @@
|
||||
#endif
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
||||
#include "xenia/base/platform.h"
|
||||
|
||||
#if !__cpp_lib_endian
|
||||
@@ -88,6 +90,30 @@ struct endian_store {
|
||||
operator T() const { return get(); }
|
||||
|
||||
void set(const T& src) {
|
||||
+ // AUDIT-068 Session 2: hook the canonical be<T>/le<T> write path. Gated
|
||||
+ // on the host→guest thunk being installed by Memory::Memory(); without
|
||||
+ // that there is no Memory and therefore no possible guest-memory write.
|
||||
+ // This ALSO prevents the slow-path from running during static-init order
|
||||
+ // (which would race the cvar object construction in cpu_flags.cc and
|
||||
+ // permanently latch g_active=0 before --audit_68_* cmdline override
|
||||
+ // applies). See reading-error #35 / Session 2 plan.
|
||||
+ if constexpr (sizeof(T) <= 8 && std::is_integral_v<T>) {
|
||||
+ if (xe::audit_68::g_host_to_guest_thunk != nullptr) [[unlikely]] {
|
||||
+ uint64_t v;
|
||||
+ if constexpr (sizeof(T) == 8) {
|
||||
+ v = static_cast<uint64_t>(src);
|
||||
+ } else if constexpr (sizeof(T) == 4) {
|
||||
+ v = static_cast<uint64_t>(static_cast<uint32_t>(src));
|
||||
+ } else if constexpr (sizeof(T) == 2) {
|
||||
+ v = static_cast<uint64_t>(static_cast<uint16_t>(src));
|
||||
+ } else {
|
||||
+ v = static_cast<uint64_t>(static_cast<uint8_t>(src));
|
||||
+ }
|
||||
+ xe::audit_68::check_host_write(
|
||||
+ &value, v, static_cast<uint8_t>(sizeof(T)),
|
||||
+ E == std::endian::big ? "be<T>::set" : "le<T>::set");
|
||||
+ }
|
||||
+ }
|
||||
if constexpr (std::endian::native == E) {
|
||||
value = src;
|
||||
} else {
|
||||
diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc
|
||||
index 1034dcac7..38148010c 100644
|
||||
--- a/src/xenia/cpu/xex_module.cc
|
||||
+++ b/src/xenia/cpu/xex_module.cc
|
||||
@@ -51,6 +51,38 @@ DECLARE_bool(allow_plugins);
|
||||
|
||||
DECLARE_bool(disable_context_promotion);
|
||||
|
||||
+// AUDIT-068 Session 2: helper that scans a raw byte buffer for 4-byte aligned
|
||||
+// u32 values that match the configured audit_68 value list, emitting a
|
||||
+// per-position event. Used to pre-scan XEX-loader memcpys that bypass all
|
||||
+// other hooked surfaces. Cost when off: a single relaxed atomic load.
|
||||
+static inline void audit68_prescan_memcpy(uint32_t guest_va_dest,
|
||||
+ const uint8_t* src, size_t size,
|
||||
+ const char* tag) {
|
||||
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
|
||||
+ if (active == 0) return;
|
||||
+ if ((active & 0x1) && size >= 4) {
|
||||
+ size_t aligned_end = size & ~size_t(3);
|
||||
+ for (size_t i = 0; i < aligned_end; i += 4) {
|
||||
+ uint32_t be_u32 = (uint32_t(src[i + 0]) << 24) |
|
||||
+ (uint32_t(src[i + 1]) << 16) |
|
||||
+ (uint32_t(src[i + 2]) << 8) | uint32_t(src[i + 3]);
|
||||
+ xe::audit_68::check_guest_va(
|
||||
+ static_cast<uint32_t>(guest_va_dest + i), be_u32, 4, tag);
|
||||
+ }
|
||||
+ }
|
||||
+ if (active & 0x2) {
|
||||
+ // Coarse addr-only event over the full span (dest only).
|
||||
+ uint64_t v = 0;
|
||||
+ if (size >= 4) {
|
||||
+ v = (uint64_t(src[0]) << 24) | (uint64_t(src[1]) << 16) |
|
||||
+ (uint64_t(src[2]) << 8) | uint64_t(src[3]);
|
||||
+ }
|
||||
+ xe::audit_68::check_guest_va(guest_va_dest, v,
|
||||
+ static_cast<uint8_t>(std::min<size_t>(size, 8)),
|
||||
+ tag);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static constexpr uint8_t xe_xex1_retail_key[16] = {
|
||||
0xA2, 0x6C, 0x10, 0xF7, 0x1F, 0xD9, 0x35, 0xE9,
|
||||
0x8B, 0x99, 0x92, 0x2C, 0xE9, 0x32, 0x15, 0x72};
|
||||
@@ -424,6 +456,10 @@ int XexModule::ApplyPatch(XexModule* module) {
|
||||
// If image_source_offset is set, copy [source_offset:source_size] to
|
||||
// target_offset
|
||||
if (patch_header->delta_image_source_offset) {
|
||||
+ audit68_prescan_memcpy(
|
||||
+ module->base_address_ + patch_header->delta_image_target_offset,
|
||||
+ base_exe + patch_header->delta_image_source_offset,
|
||||
+ patch_header->delta_image_source_size, "xex_memcpy_patch");
|
||||
memcpy(base_exe + patch_header->delta_image_target_offset,
|
||||
base_exe + patch_header->delta_image_source_offset,
|
||||
patch_header->delta_image_source_size);
|
||||
@@ -589,6 +625,8 @@ int XexModule::ReadImageUncompressed(const void* xex_addr, size_t xex_length) {
|
||||
if (exe_length > uncompressed_size) {
|
||||
return 1;
|
||||
}
|
||||
+ audit68_prescan_memcpy(base_address_, p, exe_length,
|
||||
+ "xex_memcpy_uncompressed");
|
||||
memcpy(buffer, p, exe_length);
|
||||
return 0;
|
||||
case XEX_ENCRYPTION_NORMAL:
|
||||
@@ -665,6 +703,9 @@ int XexModule::ReadImageBasicCompressed(const void* xex_addr,
|
||||
// Overflow.
|
||||
return 1;
|
||||
}
|
||||
+ audit68_prescan_memcpy(
|
||||
+ base_address_ + static_cast<uint32_t>(d - buffer), p, data_size,
|
||||
+ "xex_memcpy_basic_block");
|
||||
memcpy(d, p, data_size);
|
||||
break;
|
||||
case XEX_ENCRYPTION_NORMAL: {
|
||||
@@ -799,6 +840,17 @@ int XexModule::ReadImageCompressed(const void* xex_addr, size_t xex_length) {
|
||||
result_code = lzx_decompress(
|
||||
compress_buffer, d - compress_buffer, buffer, uncompressed_size,
|
||||
compression_info->normal.window_size, nullptr, 0);
|
||||
+
|
||||
+ // AUDIT-068 Session 2: lzx_decompress writes directly into guest
|
||||
+ // memory via the host pointer `buffer`. There's no host-side hook
|
||||
+ // covering its internal bulk writes, so post-scan the produced bytes
|
||||
+ // to recover what the XEX loader actually placed at `base_address_`.
|
||||
+ // This is THE most likely catch for the vtable install case (vtables
|
||||
+ // live in the .rdata section that is part of the LZX-compressed image).
|
||||
+ if (result_code == 0) {
|
||||
+ audit68_prescan_memcpy(base_address_, buffer, uncompressed_size,
|
||||
+ "xex_lzx_decompress_output");
|
||||
+ }
|
||||
} else {
|
||||
XELOGE("Unable to allocate XEX memory at {:08X}-{:08X}.", base_address_,
|
||||
uncompressed_size);
|
||||
diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc
|
||||
index 22ba66aee..819a8a8a2 100644
|
||||
--- a/src/xenia/memory.cc
|
||||
+++ b/src/xenia/memory.cc
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
#include "xenia/base/assert.h"
|
||||
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
||||
#include "xenia/base/byte_stream.h"
|
||||
#include "xenia/base/clock.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
@@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) {
|
||||
|
||||
static Memory* active_memory_ = nullptr;
|
||||
|
||||
+// AUDIT-068 — process-global accessor (declared in memory.h).
|
||||
+Memory* Memory::active() { return active_memory_; }
|
||||
+
|
||||
void CrashDump() {
|
||||
static std::atomic<int> in_crash_dump(0);
|
||||
if (in_crash_dump.fetch_add(1)) {
|
||||
@@ -151,11 +155,19 @@ Memory::Memory() {
|
||||
uint32_t(xe::memory::allocation_granularity());
|
||||
assert_zero(active_memory_);
|
||||
active_memory_ = this;
|
||||
+
|
||||
+ // AUDIT-068: register host→guest translation thunk so the watch slow path
|
||||
+ // in xenia-base can resolve guest VAs without depending on xenia-core.
|
||||
+ xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t {
|
||||
+ Memory* m = active_memory_;
|
||||
+ return m ? m->HostToGuestVirtual(host_ptr) : 0u;
|
||||
+ };
|
||||
}
|
||||
|
||||
Memory::~Memory() {
|
||||
assert_true(active_memory_ == this);
|
||||
active_memory_ = nullptr;
|
||||
+ xe::audit_68::g_host_to_guest_thunk = nullptr;
|
||||
|
||||
// Uninstall the MMIO handler, as we won't be able to service more
|
||||
// requests.
|
||||
@@ -540,16 +552,71 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const {
|
||||
}
|
||||
|
||||
void Memory::Zero(uint32_t address, uint32_t size) {
|
||||
+ // AUDIT-068: log a single span event with value=0; size is capped at 8 for
|
||||
+ // the value field. Slow path is gated on the atomic flag.
|
||||
+ xe::audit_68::check_guest_va(address, 0,
|
||||
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
||||
+ "Memory::Zero");
|
||||
std::memset(TranslateVirtual(address), 0, size);
|
||||
}
|
||||
|
||||
void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) {
|
||||
+ // Replicate the fill byte across the value field so value_matches can
|
||||
+ // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for
|
||||
+ // capture purposes the byte itself in the low slot is enough.
|
||||
+ uint64_t v = static_cast<uint64_t>(value);
|
||||
+ v |= v << 8;
|
||||
+ v |= v << 16;
|
||||
+ v |= v << 32;
|
||||
+ xe::audit_68::check_guest_va(address, v,
|
||||
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
||||
+ "Memory::Fill");
|
||||
std::memset(TranslateVirtual(address), value, size);
|
||||
}
|
||||
|
||||
void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) {
|
||||
uint8_t* pdest = TranslateVirtual(dest);
|
||||
const uint8_t* psrc = TranslateVirtual(src);
|
||||
+ // AUDIT-068 Session 2: full byte-scan over 4-byte aligned positions of the
|
||||
+ // source buffer. Catches XEX-loader-style memcpys where a vptr (the target
|
||||
+ // u32 value) is buried somewhere mid-buffer rather than at offset 0. Cost
|
||||
+ // O(size/4 * N_values) with N_values capped at 8 inside value_matches —
|
||||
+ // negligible vs the underlying memcpy throughput.
|
||||
+ //
|
||||
+ // Gated on active bit 0x1 (values-mode) AND active != 0. If only addrs are
|
||||
+ // configured (Run 2 voice-struct mode), we still emit a single addr-only
|
||||
+ // event covering the destination span so addr-watch isn't broken.
|
||||
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
|
||||
+ if (active != 0) [[unlikely]] {
|
||||
+ if ((active & 0x1) && size >= 4) {
|
||||
+ // Scan source for any configured u32 value (big-endian, mirrors how
|
||||
+ // guest sees the bytes). 4-byte aligned offsets only.
|
||||
+ uint32_t aligned_end = size & ~3u;
|
||||
+ for (uint32_t i = 0; i < aligned_end; i += 4) {
|
||||
+ uint32_t be_u32 =
|
||||
+ (uint32_t(psrc[i + 0]) << 24) | (uint32_t(psrc[i + 1]) << 16) |
|
||||
+ (uint32_t(psrc[i + 2]) << 8) | uint32_t(psrc[i + 3]);
|
||||
+ xe::audit_68::check_guest_va(dest + i, be_u32, 4, "Memory::Copy");
|
||||
+ }
|
||||
+ }
|
||||
+ if (active & 0x2) {
|
||||
+ // Addr-only mode: emit a single coarse event tagged with the dest base
|
||||
+ // and first u32 of source for context. The slow-path range check will
|
||||
+ // log iff the dest span intersects a configured addr range.
|
||||
+ uint64_t v = 0;
|
||||
+ if (size >= 4) {
|
||||
+ v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) |
|
||||
+ (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]);
|
||||
+ } else if (size > 0) {
|
||||
+ for (uint32_t i = 0; i < size; ++i) {
|
||||
+ v = (v << 8) | psrc[i];
|
||||
+ }
|
||||
+ }
|
||||
+ xe::audit_68::check_guest_va(
|
||||
+ dest, v, static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
||||
+ "Memory::Copy");
|
||||
+ }
|
||||
+ }
|
||||
std::memcpy(pdest, psrc, size);
|
||||
}
|
||||
|
||||
841
audit-runs/audit-068-host-mem-watch/fix-canary-v3.diff
Normal file
841
audit-runs/audit-068-host-mem-watch/fix-canary-v3.diff
Normal file
@@ -0,0 +1,841 @@
|
||||
=== AUDIT-068 Session 3 — canary instrumentation v3 diff ===
|
||||
=== Date: 2026-05-20 ===
|
||||
=== Cumulative tracked-file diff (cpu_flags.{h,cc}, memory.cc) ===
|
||||
|
||||
diff --git a/src/xenia/cpu/cpu_flags.cc b/src/xenia/cpu/cpu_flags.cc
|
||||
index 3ff067e15..2298dd3d7 100644
|
||||
--- a/src/xenia/cpu/cpu_flags.cc
|
||||
+++ b/src/xenia/cpu/cpu_flags.cc
|
||||
@@ -57,3 +57,83 @@ DEFINE_bool(break_condition_truncate, true, "truncate value to 32-bits", "CPU");
|
||||
|
||||
DEFINE_bool(break_on_debugbreak, true, "int3 on JITed __debugbreak requests.",
|
||||
"CPU");
|
||||
+
|
||||
+// AUDIT-DEMO: smoke marker (memory entry: emulator.cc:225,283). Always-on bool.
|
||||
+DEFINE_bool(audit_demo_setup_trace, true,
|
||||
+ "Audit smoke marker: log AUDIT-DEMO-SETUP-BEGIN at emulator setup.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// AUDIT-061: comma-separated list of guest PCs to log on each fire.
|
||||
+// Format: "0xPC1,0xPC2,..." (max 32 PCs). Each fire emits
|
||||
+// AUDIT-061-BR pc=X lr=X cr0=LGE cr6=LGE r3=X r4=X r5=X r6=X r31=X tid=N.
|
||||
+// Default empty (off); no perf cost when empty.
|
||||
+DEFINE_string(audit_61_branch_probe_pcs, "",
|
||||
+ "AUDIT-061: CSV of guest PCs to trace (cr0/cr6 + regs/tid).",
|
||||
+ "Audit");
|
||||
+
|
||||
+// AUDIT-067: comma-separated list of u32 values to watch. When non-empty,
|
||||
+// every 4-byte guest store (stw/stwu/stwx/stwux/stmw) emits a runtime
|
||||
+// equality check; matches log AUDIT-067-VAL pc=X lr=X val=X dst=X r3..r6 r31 tid=N.
|
||||
+// Max 4 values. Default empty (off); zero overhead when empty.
|
||||
+DEFINE_string(audit_67_value_watch, "",
|
||||
+ "AUDIT-067: CSV of u32 values (max 4) — log every guest "
|
||||
+ "store whose value matches.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// AUDIT-068: host-side memory-write watch. See cpu_flags.h header for format.
|
||||
+// Mirrors AUDIT-067 but covers host-side writes (xe::store_and_swap<T>,
|
||||
+// Memory::Zero/Fill/Copy). Empty default = zero cost.
|
||||
+DEFINE_string(audit_68_host_mem_watch_values, "",
|
||||
+ "AUDIT-068: CSV of u32 values (max 8) — log every host-side "
|
||||
+ "guest-memory write whose value matches.",
|
||||
+ "Audit");
|
||||
+DEFINE_string(audit_68_host_mem_watch_addrs, "",
|
||||
+ "AUDIT-068: CSV of guest VAs or VA ranges 'START-END' (max 8) "
|
||||
+ "— log every host-side guest-memory write whose guest VA falls "
|
||||
+ "within the configured set.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// AUDIT-068 Session 3: read-mode probe. See cpu_flags.h for format.
|
||||
+DEFINE_string(audit_68_host_mem_read_probe, "",
|
||||
+ "AUDIT-068 Session 3: CSV of 'VA:SIZE:PERIOD_NS' tuples (max 8) "
|
||||
+ "— a dedicated poll thread reads the value at each VA every "
|
||||
+ "PERIOD_NS and emits AUDIT-068-READ-CHANGE on transition.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// Phase A — see kernel/event_log.h.
|
||||
+DEFINE_string(phase_a_event_log_path, "",
|
||||
+ "Phase A: write schema-v1 JSONL event log to this path. "
|
||||
+ "Empty (default) = disabled.",
|
||||
+ "Audit");
|
||||
+DEFINE_bool(phase_a_event_log_mem_writes, false,
|
||||
+ "Phase A: include mem.write events in the JSONL log. RESERVED — "
|
||||
+ "not wired in this phase. Default false.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// Phase D Stage 1 — see kernel/event_log.h `EmitContentionObserved`.
|
||||
+DEFINE_bool(kernel_emit_contention, false,
|
||||
+ "Phase D Stage 1: emit `contention.observed` events when "
|
||||
+ "RtlEnterCriticalSection's spin loop is exhausted and the call "
|
||||
+ "falls through to xeKeWaitForSingleObject. Default false (zero "
|
||||
+ "cost when disabled). Requires --phase_a_event_log_path to be "
|
||||
+ "set as well.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// Phase B — see kernel/phase_b_snapshot.h.
|
||||
+DEFINE_string(phase_b_snapshot_dir, "",
|
||||
+ "Phase B: write 5-file structured state snapshot to "
|
||||
+ "<dir>/canary/ at the moment immediately before the first "
|
||||
+ "guest PPC instruction of entry_point. Empty (default) = "
|
||||
+ "disabled, zero overhead.",
|
||||
+ "Audit");
|
||||
+DEFINE_bool(phase_b_snapshot_and_exit, false,
|
||||
+ "Phase B: after writing the snapshot, exit the process "
|
||||
+ "immediately (std::_Exit(0)) so re-runs are byte-deterministic.",
|
||||
+ "Audit");
|
||||
+DEFINE_bool(phase_b_dump_section_content, false,
|
||||
+ "Phase B: in memory.json, populate section_contents[].content_b64 "
|
||||
+ "with raw bytes of every committed XEX-image region. Default "
|
||||
+ "false — per-region SHA-256 is enough for the routine diff; "
|
||||
+ "this is the escape hatch for the STOP-and-report condition "
|
||||
+ "(image_loaded_sha256 mismatch).",
|
||||
+ "Audit");
|
||||
diff --git a/src/xenia/cpu/cpu_flags.h b/src/xenia/cpu/cpu_flags.h
|
||||
index 38c4f98ba..9b5ca7a1c 100644
|
||||
--- a/src/xenia/cpu/cpu_flags.h
|
||||
+++ b/src/xenia/cpu/cpu_flags.h
|
||||
@@ -35,4 +35,52 @@ DECLARE_bool(break_condition_truncate);
|
||||
|
||||
DECLARE_bool(break_on_debugbreak);
|
||||
|
||||
+// AUDIT-DEMO smoke marker.
|
||||
+DECLARE_bool(audit_demo_setup_trace);
|
||||
+
|
||||
+// AUDIT-061: multi-PC branch probe — emits one log line per fire with
|
||||
+// (pc, lr, cr0 LGE, cr6 LGE, r3, r4, r5, r6, r31, tid). CSV of guest PCs.
|
||||
+DECLARE_string(audit_61_branch_probe_pcs);
|
||||
+
|
||||
+// AUDIT-067: value-watch — emit a log line for each 32-bit guest store whose
|
||||
+// value-to-be-stored matches any configured value. CSV of u32 values
|
||||
+// ("0xDEADBEEF,..."), max 4 entries. Default empty (off); zero cost when empty.
|
||||
+DECLARE_string(audit_67_value_watch);
|
||||
+
|
||||
+// AUDIT-068: host-side memory-write watch — emit a log line for each host-side
|
||||
+// write to guest memory whose VALUE matches any configured u32 value, or whose
|
||||
+// guest VA falls within any configured ADDR or ADDR-range. Mirrors AUDIT-067
|
||||
+// but covers the host-side write paths (xe::store_and_swap<T>, Memory::Zero/
|
||||
+// Fill/Copy) that AUDIT-067's JIT store-opcode hooks cannot see.
|
||||
+//
|
||||
+// VALUES: CSV of u32 values, max 8 entries; e.g. "0x8200A208,0x8200A928".
|
||||
+// ADDRS: CSV of guest VAs or VA ranges, max 8 entries; range form is
|
||||
+// "0xSTART-0xEND" (inclusive). e.g. "0x42500000-0x42600000,0xBCE25340".
|
||||
+// Default empty (off); zero cost on the hot path when both are empty.
|
||||
+DECLARE_string(audit_68_host_mem_watch_values);
|
||||
+DECLARE_string(audit_68_host_mem_watch_addrs);
|
||||
+
|
||||
+// AUDIT-068 Session 3: read-mode probe. CSV of "VA:SIZE:PERIOD_NS" tuples
|
||||
+// (max 8). A dedicated low-priority thread polls each VA every PERIOD_NS and
|
||||
+// emits AUDIT-068-READ-CHANGE when the value transitions. SIZE in {1,2,4,8}.
|
||||
+// Example: "0xBCE25340:4:1000000" = poll u32 at 0xBCE25340 every 1 ms.
|
||||
+// Default empty (off); the poll thread is not spawned when empty.
|
||||
+DECLARE_string(audit_68_host_mem_read_probe);
|
||||
+
|
||||
+// Phase A: JSONL event-log emitter path. When non-empty, the engine writes
|
||||
+// schema-v1 JSONL events to this file. Empty (default) = no overhead, no
|
||||
+// behavior change. Schema: xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md
|
||||
+DECLARE_string(phase_a_event_log_path);
|
||||
+DECLARE_bool(phase_a_event_log_mem_writes);
|
||||
+
|
||||
+// Phase B: initial-state snapshot. When the dir cvar is non-empty, the
|
||||
+// engine writes a five-file structured state snapshot (cpu_state.json,
|
||||
+// memory.json, kernel.json, vfs.json, config.json, plus manifest.json) to
|
||||
+// `<dir>/canary/` at the moment immediately before the first guest PPC
|
||||
+// instruction of the XEX entry_point executes. See
|
||||
+// `xenia-rs/audit-runs/phase-b-state-equivalence/`.
|
||||
+DECLARE_string(phase_b_snapshot_dir);
|
||||
+DECLARE_bool(phase_b_snapshot_and_exit);
|
||||
+DECLARE_bool(phase_b_dump_section_content);
|
||||
+
|
||||
#endif // XENIA_CPU_CPU_FLAGS_H_
|
||||
diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc
|
||||
index 22ba66aee..f02b11d7f 100644
|
||||
--- a/src/xenia/memory.cc
|
||||
+++ b/src/xenia/memory.cc
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
#include "xenia/base/assert.h"
|
||||
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
||||
#include "xenia/base/byte_stream.h"
|
||||
#include "xenia/base/clock.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
@@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) {
|
||||
|
||||
static Memory* active_memory_ = nullptr;
|
||||
|
||||
+// AUDIT-068 — process-global accessor (declared in memory.h).
|
||||
+Memory* Memory::active() { return active_memory_; }
|
||||
+
|
||||
void CrashDump() {
|
||||
static std::atomic<int> in_crash_dump(0);
|
||||
if (in_crash_dump.fetch_add(1)) {
|
||||
@@ -151,11 +155,41 @@ Memory::Memory() {
|
||||
uint32_t(xe::memory::allocation_granularity());
|
||||
assert_zero(active_memory_);
|
||||
active_memory_ = this;
|
||||
+
|
||||
+ // AUDIT-068: register host→guest translation thunk so the watch slow path
|
||||
+ // in xenia-base can resolve guest VAs without depending on xenia-core.
|
||||
+ xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t {
|
||||
+ Memory* m = active_memory_;
|
||||
+ return m ? m->HostToGuestVirtual(host_ptr) : 0u;
|
||||
+ };
|
||||
+
|
||||
+ // AUDIT-068 Session 3: register guest→host translation thunk and a
|
||||
+ // page-protect query thunk for the read-mode probe. The probe thread uses
|
||||
+ // QueryProtect to skip unmapped/uncommitted pages before dereferencing.
|
||||
+ xe::audit_68::g_guest_to_host_thunk = [](uint32_t va) -> const void* {
|
||||
+ Memory* m = active_memory_;
|
||||
+ return m ? reinterpret_cast<const void*>(m->TranslateVirtual(va))
|
||||
+ : nullptr;
|
||||
+ };
|
||||
+ xe::audit_68::g_query_protect_thunk = [](uint32_t va,
|
||||
+ uint32_t* out_protect) -> bool {
|
||||
+ Memory* m = active_memory_;
|
||||
+ if (!m) return false;
|
||||
+ BaseHeap* heap = m->LookupHeap(va);
|
||||
+ if (!heap) {
|
||||
+ if (out_protect) *out_protect = 0;
|
||||
+ return false;
|
||||
+ }
|
||||
+ return heap->QueryProtect(va, out_protect);
|
||||
+ };
|
||||
}
|
||||
|
||||
Memory::~Memory() {
|
||||
assert_true(active_memory_ == this);
|
||||
active_memory_ = nullptr;
|
||||
+ xe::audit_68::g_host_to_guest_thunk = nullptr;
|
||||
+ xe::audit_68::g_guest_to_host_thunk = nullptr;
|
||||
+ xe::audit_68::g_query_protect_thunk = nullptr;
|
||||
|
||||
// Uninstall the MMIO handler, as we won't be able to service more
|
||||
// requests.
|
||||
@@ -540,16 +574,71 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const {
|
||||
}
|
||||
|
||||
void Memory::Zero(uint32_t address, uint32_t size) {
|
||||
+ // AUDIT-068: log a single span event with value=0; size is capped at 8 for
|
||||
+ // the value field. Slow path is gated on the atomic flag.
|
||||
+ xe::audit_68::check_guest_va(address, 0,
|
||||
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
||||
+ "Memory::Zero");
|
||||
std::memset(TranslateVirtual(address), 0, size);
|
||||
}
|
||||
|
||||
void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) {
|
||||
+ // Replicate the fill byte across the value field so value_matches can
|
||||
+ // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for
|
||||
+ // capture purposes the byte itself in the low slot is enough.
|
||||
+ uint64_t v = static_cast<uint64_t>(value);
|
||||
+ v |= v << 8;
|
||||
+ v |= v << 16;
|
||||
+ v |= v << 32;
|
||||
+ xe::audit_68::check_guest_va(address, v,
|
||||
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
||||
+ "Memory::Fill");
|
||||
std::memset(TranslateVirtual(address), value, size);
|
||||
}
|
||||
|
||||
void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) {
|
||||
uint8_t* pdest = TranslateVirtual(dest);
|
||||
const uint8_t* psrc = TranslateVirtual(src);
|
||||
+ // AUDIT-068 Session 2: full byte-scan over 4-byte aligned positions of the
|
||||
+ // source buffer. Catches XEX-loader-style memcpys where a vptr (the target
|
||||
+ // u32 value) is buried somewhere mid-buffer rather than at offset 0. Cost
|
||||
+ // O(size/4 * N_values) with N_values capped at 8 inside value_matches —
|
||||
+ // negligible vs the underlying memcpy throughput.
|
||||
+ //
|
||||
+ // Gated on active bit 0x1 (values-mode) AND active != 0. If only addrs are
|
||||
+ // configured (Run 2 voice-struct mode), we still emit a single addr-only
|
||||
+ // event covering the destination span so addr-watch isn't broken.
|
||||
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
|
||||
+ if (active != 0) [[unlikely]] {
|
||||
+ if ((active & 0x1) && size >= 4) {
|
||||
+ // Scan source for any configured u32 value (big-endian, mirrors how
|
||||
+ // guest sees the bytes). 4-byte aligned offsets only.
|
||||
+ uint32_t aligned_end = size & ~3u;
|
||||
+ for (uint32_t i = 0; i < aligned_end; i += 4) {
|
||||
+ uint32_t be_u32 =
|
||||
+ (uint32_t(psrc[i + 0]) << 24) | (uint32_t(psrc[i + 1]) << 16) |
|
||||
+ (uint32_t(psrc[i + 2]) << 8) | uint32_t(psrc[i + 3]);
|
||||
+ xe::audit_68::check_guest_va(dest + i, be_u32, 4, "Memory::Copy");
|
||||
+ }
|
||||
+ }
|
||||
+ if (active & 0x2) {
|
||||
+ // Addr-only mode: emit a single coarse event tagged with the dest base
|
||||
+ // and first u32 of source for context. The slow-path range check will
|
||||
+ // log iff the dest span intersects a configured addr range.
|
||||
+ uint64_t v = 0;
|
||||
+ if (size >= 4) {
|
||||
+ v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) |
|
||||
+ (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]);
|
||||
+ } else if (size > 0) {
|
||||
+ for (uint32_t i = 0; i < size; ++i) {
|
||||
+ v = (v << 8) | psrc[i];
|
||||
+ }
|
||||
+ }
|
||||
+ xe::audit_68::check_guest_va(
|
||||
+ dest, v, static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
||||
+ "Memory::Copy");
|
||||
+ }
|
||||
+ }
|
||||
std::memcpy(pdest, psrc, size);
|
||||
}
|
||||
|
||||
|
||||
=== Full contents of new file: src/xenia/base/audit_68_host_mem_watch_fwd.h ===
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* AUDIT-068: host-side memory-write watch — forward declarations only.
|
||||
*
|
||||
* Declarations here are intentionally minimal so that xenia/base/memory.h can
|
||||
* include this without pulling in xenia/memory.h (which would create a
|
||||
* circular dependency: xenia-base → xenia-core → xenia-base). The full
|
||||
* definitions live in xenia/audit_68_host_mem_watch.{h,cc} (xenia-core).
|
||||
*
|
||||
* Hot path: callers (the integer specializations of xe::store_and_swap<T>)
|
||||
* load the atomic flag once. When it is 0 (default), no further work is done
|
||||
* — a single relaxed atomic load and a predictable branch.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
||||
#define XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
|
||||
namespace xe {
|
||||
namespace audit_68 {
|
||||
|
||||
// 0 = inactive (default). Non-zero = the cvars have been parsed and at least
|
||||
// one watch is configured. Set lazily by check_host_write_slowpath() on first
|
||||
// call after cvar parsing. Loaded relaxed on the hot path.
|
||||
//
|
||||
// Implementation lives in xenia-base (audit_68_host_mem_watch_base.cc) so
|
||||
// that callers in xenia-base/xenia-cpu/xenia-kernel can resolve the symbol
|
||||
// without depending on xenia-core link order.
|
||||
extern std::atomic<uint32_t> g_active;
|
||||
|
||||
// Host-pointer → guest-VA translation thunk. xenia/memory.cc::Memory::Memory()
|
||||
// registers a function pointer here that wraps Memory::HostToGuestVirtual.
|
||||
// Until set, the slow path falls back to logging the raw host pointer.
|
||||
using HostToGuestThunk = uint32_t (*)(const void*);
|
||||
extern HostToGuestThunk g_host_to_guest_thunk;
|
||||
|
||||
// AUDIT-068 Session 3 — read-mode probe support.
|
||||
//
|
||||
// Guest-VA → host-pointer translation thunk (wraps Memory::TranslateVirtual).
|
||||
// Used by the read-probe poll thread to sample bytes at configured guest VAs.
|
||||
// May return non-null even for unmapped/uncommitted VAs (the underlying
|
||||
// translation is arithmetic — virtual_membase_ + va) — callers MUST consult
|
||||
// the QueryProtect thunk before dereferencing.
|
||||
using GuestToHostThunk = const void* (*)(uint32_t);
|
||||
extern GuestToHostThunk g_guest_to_host_thunk;
|
||||
|
||||
// Returns true iff the page containing `guest_va` is committed and readable;
|
||||
// out_protect receives the raw page protect bits (kProtectRead, etc.). Wraps
|
||||
// Memory::LookupHeap() + BaseHeap::QueryProtect(). Used as a guard before the
|
||||
// read-probe samples bytes (early-boot heap-not-yet-mapped path must NOT
|
||||
// crash).
|
||||
using QueryProtectThunk = bool (*)(uint32_t, uint32_t* /*out_protect*/);
|
||||
extern QueryProtectThunk g_query_protect_thunk;
|
||||
|
||||
// Slow path. Only invoked when g_active is non-zero. Implementation in
|
||||
// xenia/base/audit_68_host_mem_watch_base.cc (xenia-base).
|
||||
//
|
||||
// host_ptr: the host pointer being written (from store_and_swap's `mem`).
|
||||
// value: the value being stored (zero-extended to u64).
|
||||
// size: 1, 2, 4 or 8.
|
||||
// tag: caller-provided tag string (e.g. "store_and_swap<u32>"). Logged
|
||||
// verbatim, no formatting. Must be a static string (lifetime
|
||||
// beyond this call).
|
||||
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
|
||||
uint8_t size, const char* tag);
|
||||
|
||||
// Same as above, but with a known guest VA (for callers like Memory::Zero/
|
||||
// Fill/Copy that have the VA but not a single host pointer).
|
||||
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
|
||||
const char* tag);
|
||||
|
||||
// Inline hot-path wrappers. Single relaxed atomic load + branch when inactive.
|
||||
inline void check_host_write(const void* host_ptr, uint64_t value, uint8_t size,
|
||||
const char* tag) {
|
||||
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
|
||||
check_host_write_slowpath(host_ptr, value, size, tag);
|
||||
}
|
||||
}
|
||||
|
||||
inline void check_guest_va(uint32_t guest_va, uint64_t value, uint8_t size,
|
||||
const char* tag) {
|
||||
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
|
||||
check_guest_va_slowpath(guest_va, value, size, tag);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace audit_68
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
||||
|
||||
=== Full contents of new file: src/xenia/base/audit_68_host_mem_watch_base.cc ===
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* AUDIT-068 host-side memory-write watch — implementation (xenia-base).
|
||||
*
|
||||
* Mirrors AUDIT-067 in spirit (value-CSV cvar, lazy parse, atomic-bool
|
||||
* activation) but observes the HOST-side write paths instead of the JIT'd
|
||||
* guest store opcodes. Captures writes performed by xe::store_and_swap<T>
|
||||
* (xenia/base/memory.h) and by Memory::Zero/Fill/Copy (xenia/memory.cc).
|
||||
*
|
||||
* Lives in xenia-base so that the slow-path symbols resolve for callers in
|
||||
* xenia-base / xenia-cpu / xenia-kernel without depending on xenia-core link
|
||||
* order. The host→guest VA translation is provided by a function-pointer
|
||||
* thunk that xenia::Memory::Memory() registers at construction.
|
||||
*
|
||||
* See xenia/base/audit_68_host_mem_watch_fwd.h for the API.
|
||||
* See xenia/cpu/cpu_flags.{h,cc} for the cvars.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/threading.h"
|
||||
|
||||
// We need the cvars but cpu_flags.h lives in xenia-cpu. To avoid an upward
|
||||
// dep we re-declare them here with the same macros — cvar.h's DECLARE_*
|
||||
// macros are header-safe (just `extern` declarations) and resolve against the
|
||||
// definitions in xenia-cpu/cpu_flags.cc at link time. (xenia-cpu links AFTER
|
||||
// xenia-base in the executable; symbols in xenia-cpu/cpu_flags.cc are still
|
||||
// resolvable from xenia-base translation units because the lld pass folds
|
||||
// all libraries together at the executable level.)
|
||||
DECLARE_string(audit_68_host_mem_watch_values);
|
||||
DECLARE_string(audit_68_host_mem_watch_addrs);
|
||||
DECLARE_string(audit_68_host_mem_read_probe);
|
||||
|
||||
namespace xe {
|
||||
namespace audit_68 {
|
||||
|
||||
// Hot-path flag (declared in fwd header). Initial sentinel UINT32_MAX means
|
||||
// "unparsed"; the very first slow-path call invokes ensure_parsed() which
|
||||
// replaces the sentinel with the actual active bitmask (0 if both cvars are
|
||||
// empty, 1/2/3 otherwise). After that, hot-path calls observe the real value
|
||||
// and bail out cheaply when off.
|
||||
std::atomic<uint32_t> g_active{0xFFFFFFFFu};
|
||||
|
||||
// Host→guest VA translation thunk (declared in fwd header). Set by
|
||||
// xenia::Memory::Memory() at construction; reset to nullptr by ~Memory().
|
||||
HostToGuestThunk g_host_to_guest_thunk{nullptr};
|
||||
|
||||
// AUDIT-068 Session 3: guest→host translation + page-protect query thunks.
|
||||
GuestToHostThunk g_guest_to_host_thunk{nullptr};
|
||||
QueryProtectThunk g_query_protect_thunk{nullptr};
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr size_t kMaxValues = 8;
|
||||
constexpr size_t kMaxAddrRanges = 8;
|
||||
|
||||
struct AddrRange {
|
||||
uint32_t start; // inclusive
|
||||
uint32_t end; // inclusive
|
||||
};
|
||||
|
||||
std::vector<uint32_t> g_values;
|
||||
std::vector<AddrRange> g_addrs;
|
||||
std::once_flag g_parsed_flag;
|
||||
|
||||
std::chrono::steady_clock::time_point g_t0;
|
||||
std::once_flag g_t0_once;
|
||||
|
||||
int64_t host_ns_since_start() {
|
||||
std::call_once(g_t0_once,
|
||||
[]() { g_t0 = std::chrono::steady_clock::now(); });
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
std::chrono::steady_clock::now() - g_t0)
|
||||
.count();
|
||||
}
|
||||
|
||||
void trim(std::string& s) {
|
||||
while (!s.empty() && (s.front() == ' ' || s.front() == '\t')) {
|
||||
s.erase(s.begin());
|
||||
}
|
||||
while (!s.empty() && (s.back() == ' ' || s.back() == '\t')) {
|
||||
s.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
bool parse_u32(const std::string& tok, uint32_t* out) {
|
||||
try {
|
||||
*out = static_cast<uint32_t>(std::stoul(tok, nullptr, 0));
|
||||
return true;
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void parse_values_csv(const std::string& csv) {
|
||||
size_t pos = 0;
|
||||
while (pos < csv.size() && g_values.size() < kMaxValues) {
|
||||
size_t end = csv.find(',', pos);
|
||||
std::string tok = csv.substr(pos, end - pos);
|
||||
trim(tok);
|
||||
if (!tok.empty()) {
|
||||
uint32_t v;
|
||||
if (parse_u32(tok, &v)) {
|
||||
g_values.push_back(v);
|
||||
}
|
||||
}
|
||||
if (end == std::string::npos) break;
|
||||
pos = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void parse_addrs_csv(const std::string& csv) {
|
||||
size_t pos = 0;
|
||||
while (pos < csv.size() && g_addrs.size() < kMaxAddrRanges) {
|
||||
size_t end = csv.find(',', pos);
|
||||
std::string tok = csv.substr(pos, end - pos);
|
||||
trim(tok);
|
||||
if (!tok.empty()) {
|
||||
size_t dash = tok.find('-', 2); // skip leading "0x" if present
|
||||
AddrRange r{};
|
||||
if (dash != std::string::npos) {
|
||||
std::string s = tok.substr(0, dash);
|
||||
std::string e = tok.substr(dash + 1);
|
||||
trim(s);
|
||||
trim(e);
|
||||
uint32_t a, b;
|
||||
if (parse_u32(s, &a) && parse_u32(e, &b)) {
|
||||
r.start = a;
|
||||
r.end = b;
|
||||
g_addrs.push_back(r);
|
||||
}
|
||||
} else {
|
||||
uint32_t a;
|
||||
if (parse_u32(tok, &a)) {
|
||||
r.start = a;
|
||||
r.end = a + 7;
|
||||
g_addrs.push_back(r);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (end == std::string::npos) break;
|
||||
pos = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void parse_locked() {
|
||||
parse_values_csv(cvars::audit_68_host_mem_watch_values);
|
||||
parse_addrs_csv(cvars::audit_68_host_mem_watch_addrs);
|
||||
|
||||
uint32_t bits = 0;
|
||||
if (!g_values.empty()) bits |= 0x1;
|
||||
if (!g_addrs.empty()) bits |= 0x2;
|
||||
g_active.store(bits, std::memory_order_release);
|
||||
|
||||
XELOGI(
|
||||
"AUDIT-068-INIT values_csv=\"{}\" addrs_csv=\"{}\" values_parsed={} "
|
||||
"addr_ranges_parsed={} active=0x{:X}",
|
||||
cvars::audit_68_host_mem_watch_values,
|
||||
cvars::audit_68_host_mem_watch_addrs, g_values.size(), g_addrs.size(),
|
||||
bits);
|
||||
for (size_t i = 0; i < g_values.size(); ++i) {
|
||||
XELOGI("AUDIT-068-INIT value[{}] = 0x{:08X}", i, g_values[i]);
|
||||
}
|
||||
for (size_t i = 0; i < g_addrs.size(); ++i) {
|
||||
XELOGI("AUDIT-068-INIT addr_range[{}] = 0x{:08X}-0x{:08X}", i,
|
||||
g_addrs[i].start, g_addrs[i].end);
|
||||
}
|
||||
}
|
||||
|
||||
bool value_matches(uint64_t value, uint8_t size) {
|
||||
for (uint32_t v : g_values) {
|
||||
if (size >= 4 && static_cast<uint32_t>(value) == v) return true;
|
||||
if (size == 8 && static_cast<uint32_t>(value >> 32) == v) return true;
|
||||
if (size == 2 && (v & 0xFFFF) == (value & 0xFFFF)) return true;
|
||||
if (size == 1 && (v & 0xFF) == (value & 0xFF)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool addr_matches(uint32_t guest_va, uint8_t size) {
|
||||
uint32_t lo = guest_va;
|
||||
uint32_t hi = guest_va + (size ? size - 1 : 0);
|
||||
for (const auto& r : g_addrs) {
|
||||
if (lo <= r.end && hi >= r.start) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t current_tid() { return xe::threading::current_thread_id(); }
|
||||
|
||||
void emit(uint32_t guest_va, const void* host_ptr, uint64_t value,
|
||||
uint8_t size, const char* tag) {
|
||||
XELOGI(
|
||||
"AUDIT-068-HOST-WRITE guest_va=0x{:08X} host_ptr=0x{:016X} "
|
||||
"val=0x{:016X} sz={} fn={} host_ns={} tid={}",
|
||||
guest_va, reinterpret_cast<uintptr_t>(host_ptr), value,
|
||||
static_cast<uint32_t>(size), tag ? tag : "<null>",
|
||||
host_ns_since_start(), current_tid());
|
||||
}
|
||||
|
||||
// ===== AUDIT-068 Session 3 — read-mode probe state =====
|
||||
|
||||
constexpr size_t kMaxReadProbes = 8;
|
||||
|
||||
struct ReadProbe {
|
||||
uint32_t guest_va;
|
||||
uint8_t size; // 1, 2, 4, 8
|
||||
uint64_t period_ns;
|
||||
uint64_t last_value;
|
||||
bool last_was_valid;
|
||||
};
|
||||
|
||||
std::vector<ReadProbe> g_read_probes;
|
||||
std::atomic<bool> g_read_probe_thread_running{false};
|
||||
std::atomic<bool> g_read_probe_shutdown{false};
|
||||
std::thread g_read_probe_thread;
|
||||
std::once_flag g_read_probe_started;
|
||||
|
||||
bool parse_read_probe_tok(const std::string& tok, ReadProbe* out) {
|
||||
// Expected form: "VA:SIZE:PERIOD_NS" — three colon-separated u64.
|
||||
size_t c1 = tok.find(':');
|
||||
if (c1 == std::string::npos) return false;
|
||||
size_t c2 = tok.find(':', c1 + 1);
|
||||
if (c2 == std::string::npos) return false;
|
||||
std::string sva = tok.substr(0, c1);
|
||||
std::string ssz = tok.substr(c1 + 1, c2 - c1 - 1);
|
||||
std::string sper = tok.substr(c2 + 1);
|
||||
trim(sva);
|
||||
trim(ssz);
|
||||
trim(sper);
|
||||
try {
|
||||
out->guest_va = static_cast<uint32_t>(std::stoul(sva, nullptr, 0));
|
||||
uint32_t sz = static_cast<uint32_t>(std::stoul(ssz, nullptr, 0));
|
||||
if (sz != 1 && sz != 2 && sz != 4 && sz != 8) return false;
|
||||
out->size = static_cast<uint8_t>(sz);
|
||||
out->period_ns = static_cast<uint64_t>(std::stoull(sper, nullptr, 0));
|
||||
if (out->period_ns < 1000) out->period_ns = 1000; // 1us floor.
|
||||
out->last_value = 0;
|
||||
out->last_was_valid = false;
|
||||
return true;
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void parse_read_probes_csv(const std::string& csv) {
|
||||
size_t pos = 0;
|
||||
while (pos < csv.size() && g_read_probes.size() < kMaxReadProbes) {
|
||||
size_t end = csv.find(',', pos);
|
||||
std::string tok = csv.substr(pos, end - pos);
|
||||
trim(tok);
|
||||
if (!tok.empty()) {
|
||||
ReadProbe rp{};
|
||||
if (parse_read_probe_tok(tok, &rp)) {
|
||||
g_read_probes.push_back(rp);
|
||||
}
|
||||
}
|
||||
if (end == std::string::npos) break;
|
||||
pos = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t sample_at(uint32_t guest_va, uint8_t size, bool* out_valid) {
|
||||
*out_valid = false;
|
||||
if (!g_guest_to_host_thunk || !g_query_protect_thunk) return 0;
|
||||
uint32_t prot = 0;
|
||||
if (!g_query_protect_thunk(guest_va, &prot)) return 0;
|
||||
// Page must have at least read permission. The protect bits map to
|
||||
// xe::memory::PageAccess: kReadOnly=1, kReadWrite=2, kExecuteReadOnly=3,
|
||||
// kExecuteReadWrite=4. kNoAccess=0. Accept anything non-zero — caller
|
||||
// distinguishes via the second-pass change detector anyway.
|
||||
if (prot == 0) return 0;
|
||||
const void* hp = g_guest_to_host_thunk(guest_va);
|
||||
if (!hp) return 0;
|
||||
uint64_t v = 0;
|
||||
// Guest memory is big-endian. We use raw byte loads to avoid alignment
|
||||
// traps for size>4 on possibly-unaligned VAs. The "value" we log is the
|
||||
// host-endian interpretation of the BE bytes (matches store_and_swap's
|
||||
// logging convention: the byte-swapped scalar).
|
||||
const uint8_t* bp = reinterpret_cast<const uint8_t*>(hp);
|
||||
switch (size) {
|
||||
case 1: v = bp[0]; break;
|
||||
case 2: v = (uint64_t(bp[0]) << 8) | bp[1]; break;
|
||||
case 4:
|
||||
v = (uint64_t(bp[0]) << 24) | (uint64_t(bp[1]) << 16) |
|
||||
(uint64_t(bp[2]) << 8) | bp[3];
|
||||
break;
|
||||
case 8:
|
||||
v = (uint64_t(bp[0]) << 56) | (uint64_t(bp[1]) << 48) |
|
||||
(uint64_t(bp[2]) << 40) | (uint64_t(bp[3]) << 32) |
|
||||
(uint64_t(bp[4]) << 24) | (uint64_t(bp[5]) << 16) |
|
||||
(uint64_t(bp[6]) << 8) | bp[7];
|
||||
break;
|
||||
}
|
||||
*out_valid = true;
|
||||
return v;
|
||||
}
|
||||
|
||||
void read_probe_thread_main() {
|
||||
// Compute the GCD-ish min poll period across all probes; sleep that long
|
||||
// between scans. Each probe fires only when its own period_ns has elapsed
|
||||
// since the last sample (per-probe `next_fire_ns`).
|
||||
uint64_t min_period_ns = UINT64_MAX;
|
||||
for (const auto& p : g_read_probes) {
|
||||
if (p.period_ns < min_period_ns) min_period_ns = p.period_ns;
|
||||
}
|
||||
if (min_period_ns == UINT64_MAX) return;
|
||||
|
||||
// Per-probe next-fire times.
|
||||
std::vector<uint64_t> next_fire(g_read_probes.size(), 0);
|
||||
|
||||
XELOGI(
|
||||
"AUDIT-068-READ-INIT probe_count={} min_period_ns={} thread spawned",
|
||||
g_read_probes.size(), min_period_ns);
|
||||
for (size_t i = 0; i < g_read_probes.size(); ++i) {
|
||||
XELOGI("AUDIT-068-READ-INIT probe[{}] va=0x{:08X} size={} period_ns={}",
|
||||
i, g_read_probes[i].guest_va,
|
||||
static_cast<uint32_t>(g_read_probes[i].size),
|
||||
g_read_probes[i].period_ns);
|
||||
}
|
||||
|
||||
while (!g_read_probe_shutdown.load(std::memory_order_relaxed)) {
|
||||
int64_t now_ns = host_ns_since_start();
|
||||
for (size_t i = 0; i < g_read_probes.size(); ++i) {
|
||||
if (static_cast<uint64_t>(now_ns) < next_fire[i]) continue;
|
||||
ReadProbe& rp = g_read_probes[i];
|
||||
bool valid = false;
|
||||
uint64_t v = sample_at(rp.guest_va, rp.size, &valid);
|
||||
if (valid) {
|
||||
if (!rp.last_was_valid) {
|
||||
// First successful read: emit the initial value, do NOT call it a
|
||||
// "change" — but log so we know when the VA mapped.
|
||||
XELOGI(
|
||||
"AUDIT-068-READ-INITIAL va=0x{:08X} val=0x{:016X} sz={} "
|
||||
"host_ns={} tid=probe",
|
||||
rp.guest_va, v, static_cast<uint32_t>(rp.size), now_ns);
|
||||
rp.last_value = v;
|
||||
rp.last_was_valid = true;
|
||||
} else if (v != rp.last_value) {
|
||||
XELOGI(
|
||||
"AUDIT-068-READ-CHANGE va=0x{:08X} old=0x{:016X} "
|
||||
"new=0x{:016X} sz={} host_ns={} tid=probe",
|
||||
rp.guest_va, rp.last_value, v, static_cast<uint32_t>(rp.size),
|
||||
now_ns);
|
||||
rp.last_value = v;
|
||||
}
|
||||
} else if (rp.last_was_valid) {
|
||||
// Was valid, now invalid — page unmapped/reprotected.
|
||||
XELOGI(
|
||||
"AUDIT-068-READ-UNMAPPED va=0x{:08X} last=0x{:016X} sz={} "
|
||||
"host_ns={} tid=probe",
|
||||
rp.guest_va, rp.last_value, static_cast<uint32_t>(rp.size),
|
||||
now_ns);
|
||||
rp.last_was_valid = false;
|
||||
}
|
||||
next_fire[i] = static_cast<uint64_t>(now_ns) + rp.period_ns;
|
||||
}
|
||||
// Sleep until the next earliest fire, but no shorter than 1us and no
|
||||
// longer than min_period_ns (to keep shutdown latency bounded).
|
||||
uint64_t sleep_ns = min_period_ns;
|
||||
if (sleep_ns < 1000) sleep_ns = 1000;
|
||||
std::this_thread::sleep_for(std::chrono::nanoseconds(sleep_ns));
|
||||
}
|
||||
XELOGI("AUDIT-068-READ-EXIT thread shutting down");
|
||||
}
|
||||
|
||||
void start_read_probe_thread_if_configured() {
|
||||
std::call_once(g_read_probe_started, []() {
|
||||
parse_read_probes_csv(cvars::audit_68_host_mem_read_probe);
|
||||
if (g_read_probes.empty()) return;
|
||||
if (!g_guest_to_host_thunk || !g_query_protect_thunk) {
|
||||
XELOGI(
|
||||
"AUDIT-068-READ-INIT thunks not ready (guest_to_host={} "
|
||||
"query_protect={}) — read probe deferred",
|
||||
(void*)g_guest_to_host_thunk, (void*)g_query_protect_thunk);
|
||||
return;
|
||||
}
|
||||
g_read_probe_thread_running.store(true, std::memory_order_release);
|
||||
g_read_probe_thread = std::thread(&read_probe_thread_main);
|
||||
g_read_probe_thread.detach(); // best-effort; daemon-style.
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void ensure_parsed() { std::call_once(g_parsed_flag, parse_locked); }
|
||||
|
||||
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
|
||||
uint8_t size, const char* tag) {
|
||||
// AUDIT-068 Session 2: defer parsing until Memory::Memory() has registered
|
||||
// the host→guest thunk. This guarantees the cmdline cvar override has been
|
||||
// applied AND the logging subsystem is alive before we latch g_active.
|
||||
// Without this gate, a be<T>::set() call during static-init (e.g. from a
|
||||
// global initializer in another translation unit) would trigger
|
||||
// parse_locked() before cpu_flags.cc's cvar objects are constructed —
|
||||
// latching g_active=0 permanently and silencing the watch.
|
||||
HostToGuestThunk thunk = g_host_to_guest_thunk;
|
||||
if (!thunk) return;
|
||||
ensure_parsed();
|
||||
// AUDIT-068 Session 3: lazy-start the read-probe poll thread. Same gate as
|
||||
// ensure_parsed() — must come after Memory::Memory() has registered the
|
||||
// thunks so the probe can read pages safely.
|
||||
start_read_probe_thread_if_configured();
|
||||
uint32_t active = g_active.load(std::memory_order_acquire);
|
||||
if (active == 0) return;
|
||||
|
||||
uint32_t guest_va = 0;
|
||||
if (thunk) {
|
||||
guest_va = thunk(host_ptr);
|
||||
}
|
||||
|
||||
bool hit = false;
|
||||
if ((active & 0x1) && value_matches(value, size)) hit = true;
|
||||
if (!hit && (active & 0x2) && thunk && addr_matches(guest_va, size)) {
|
||||
hit = true;
|
||||
}
|
||||
if (!hit) return;
|
||||
|
||||
emit(guest_va, host_ptr, value, size, tag);
|
||||
}
|
||||
|
||||
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
|
||||
const char* tag) {
|
||||
// AUDIT-068 Session 2: same static-init gate as check_host_write_slowpath.
|
||||
// Callers (Memory::Zero/Fill/Copy + xex_module audit68_prescan_memcpy) only
|
||||
// run after Memory::Memory(), but defensive in case of future expansion.
|
||||
if (!g_host_to_guest_thunk) return;
|
||||
ensure_parsed();
|
||||
uint32_t active = g_active.load(std::memory_order_acquire);
|
||||
if (active == 0) return;
|
||||
|
||||
bool hit = false;
|
||||
if ((active & 0x1) && value_matches(value, size)) hit = true;
|
||||
if (!hit && (active & 0x2) && addr_matches(guest_va, size)) hit = true;
|
||||
if (!hit) return;
|
||||
|
||||
emit(guest_va, nullptr, value, size, tag);
|
||||
}
|
||||
|
||||
} // namespace audit_68
|
||||
} // namespace xe
|
||||
1731
audit-runs/audit-068-host-mem-watch/fix-canary-v4.diff
Normal file
1731
audit-runs/audit-068-host-mem-watch/fix-canary-v4.diff
Normal file
File diff suppressed because it is too large
Load Diff
688
audit-runs/audit-068-host-mem-watch/fix-canary.diff
Normal file
688
audit-runs/audit-068-host-mem-watch/fix-canary.diff
Normal file
@@ -0,0 +1,688 @@
|
||||
diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h
|
||||
index 8ef40bbff..e78c8499c 100644
|
||||
--- a/src/xenia/base/memory.h
|
||||
+++ b/src/xenia/base/memory.h
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
|
||||
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
||||
#include "xenia/base/byte_order.h"
|
||||
|
||||
namespace xe {
|
||||
@@ -354,34 +355,52 @@ template <typename T>
|
||||
void store(void* mem, const T& value);
|
||||
template <>
|
||||
inline void store<int8_t>(void* mem, const int8_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
||||
+ static_cast<uint8_t>(value)),
|
||||
+ 1, "store<i8>");
|
||||
*reinterpret_cast<int8_t*>(mem) = value;
|
||||
}
|
||||
template <>
|
||||
inline void store<uint8_t>(void* mem, const uint8_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 1,
|
||||
+ "store<u8>");
|
||||
*reinterpret_cast<uint8_t*>(mem) = value;
|
||||
}
|
||||
template <>
|
||||
inline void store<int16_t>(void* mem, const int16_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
||||
+ static_cast<uint16_t>(value)),
|
||||
+ 2, "store<i16>");
|
||||
*reinterpret_cast<int16_t*>(mem) = value;
|
||||
}
|
||||
template <>
|
||||
inline void store<uint16_t>(void* mem, const uint16_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 2,
|
||||
+ "store<u16>");
|
||||
*reinterpret_cast<uint16_t*>(mem) = value;
|
||||
}
|
||||
template <>
|
||||
inline void store<int32_t>(void* mem, const int32_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
||||
+ static_cast<uint32_t>(value)),
|
||||
+ 4, "store<i32>");
|
||||
*reinterpret_cast<int32_t*>(mem) = value;
|
||||
}
|
||||
template <>
|
||||
inline void store<uint32_t>(void* mem, const uint32_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 4,
|
||||
+ "store<u32>");
|
||||
*reinterpret_cast<uint32_t*>(mem) = value;
|
||||
}
|
||||
template <>
|
||||
inline void store<int64_t>(void* mem, const int64_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 8,
|
||||
+ "store<i64>");
|
||||
*reinterpret_cast<int64_t*>(mem) = value;
|
||||
}
|
||||
template <>
|
||||
inline void store<uint64_t>(void* mem, const uint64_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, value, 8, "store<u64>");
|
||||
*reinterpret_cast<uint64_t*>(mem) = value;
|
||||
}
|
||||
template <>
|
||||
@@ -411,34 +430,52 @@ template <typename T>
|
||||
void store_and_swap(void* mem, const T& value);
|
||||
template <>
|
||||
inline void store_and_swap<int8_t>(void* mem, const int8_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
||||
+ static_cast<uint8_t>(value)),
|
||||
+ 1, "store_and_swap<i8>");
|
||||
*reinterpret_cast<int8_t*>(mem) = value;
|
||||
}
|
||||
template <>
|
||||
inline void store_and_swap<uint8_t>(void* mem, const uint8_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 1,
|
||||
+ "store_and_swap<u8>");
|
||||
*reinterpret_cast<uint8_t*>(mem) = value;
|
||||
}
|
||||
template <>
|
||||
inline void store_and_swap<int16_t>(void* mem, const int16_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
||||
+ static_cast<uint16_t>(value)),
|
||||
+ 2, "store_and_swap<i16>");
|
||||
*reinterpret_cast<int16_t*>(mem) = byte_swap(value);
|
||||
}
|
||||
template <>
|
||||
inline void store_and_swap<uint16_t>(void* mem, const uint16_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 2,
|
||||
+ "store_and_swap<u16>");
|
||||
*reinterpret_cast<uint16_t*>(mem) = byte_swap(value);
|
||||
}
|
||||
template <>
|
||||
inline void store_and_swap<int32_t>(void* mem, const int32_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(
|
||||
+ static_cast<uint32_t>(value)),
|
||||
+ 4, "store_and_swap<i32>");
|
||||
*reinterpret_cast<int32_t*>(mem) = byte_swap(value);
|
||||
}
|
||||
template <>
|
||||
inline void store_and_swap<uint32_t>(void* mem, const uint32_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 4,
|
||||
+ "store_and_swap<u32>");
|
||||
*reinterpret_cast<uint32_t*>(mem) = byte_swap(value);
|
||||
}
|
||||
template <>
|
||||
inline void store_and_swap<int64_t>(void* mem, const int64_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, static_cast<uint64_t>(value), 8,
|
||||
+ "store_and_swap<i64>");
|
||||
*reinterpret_cast<int64_t*>(mem) = byte_swap(value);
|
||||
}
|
||||
template <>
|
||||
inline void store_and_swap<uint64_t>(void* mem, const uint64_t& value) {
|
||||
+ xe::audit_68::check_host_write(mem, value, 8, "store_and_swap<u64>");
|
||||
*reinterpret_cast<uint64_t*>(mem) = byte_swap(value);
|
||||
}
|
||||
template <>
|
||||
diff --git a/src/xenia/cpu/cpu_flags.cc b/src/xenia/cpu/cpu_flags.cc
|
||||
index 3ff067e15..705ad060b 100644
|
||||
--- a/src/xenia/cpu/cpu_flags.cc
|
||||
+++ b/src/xenia/cpu/cpu_flags.cc
|
||||
@@ -57,3 +57,76 @@ DEFINE_bool(break_condition_truncate, true, "truncate value to 32-bits", "CPU");
|
||||
|
||||
DEFINE_bool(break_on_debugbreak, true, "int3 on JITed __debugbreak requests.",
|
||||
"CPU");
|
||||
+
|
||||
+// AUDIT-DEMO: smoke marker (memory entry: emulator.cc:225,283). Always-on bool.
|
||||
+DEFINE_bool(audit_demo_setup_trace, true,
|
||||
+ "Audit smoke marker: log AUDIT-DEMO-SETUP-BEGIN at emulator setup.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// AUDIT-061: comma-separated list of guest PCs to log on each fire.
|
||||
+// Format: "0xPC1,0xPC2,..." (max 32 PCs). Each fire emits
|
||||
+// AUDIT-061-BR pc=X lr=X cr0=LGE cr6=LGE r3=X r4=X r5=X r6=X r31=X tid=N.
|
||||
+// Default empty (off); no perf cost when empty.
|
||||
+DEFINE_string(audit_61_branch_probe_pcs, "",
|
||||
+ "AUDIT-061: CSV of guest PCs to trace (cr0/cr6 + regs/tid).",
|
||||
+ "Audit");
|
||||
+
|
||||
+// AUDIT-067: comma-separated list of u32 values to watch. When non-empty,
|
||||
+// every 4-byte guest store (stw/stwu/stwx/stwux/stmw) emits a runtime
|
||||
+// equality check; matches log AUDIT-067-VAL pc=X lr=X val=X dst=X r3..r6 r31 tid=N.
|
||||
+// Max 4 values. Default empty (off); zero overhead when empty.
|
||||
+DEFINE_string(audit_67_value_watch, "",
|
||||
+ "AUDIT-067: CSV of u32 values (max 4) — log every guest "
|
||||
+ "store whose value matches.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// AUDIT-068: host-side memory-write watch. See cpu_flags.h header for format.
|
||||
+// Mirrors AUDIT-067 but covers host-side writes (xe::store_and_swap<T>,
|
||||
+// Memory::Zero/Fill/Copy). Empty default = zero cost.
|
||||
+DEFINE_string(audit_68_host_mem_watch_values, "",
|
||||
+ "AUDIT-068: CSV of u32 values (max 8) — log every host-side "
|
||||
+ "guest-memory write whose value matches.",
|
||||
+ "Audit");
|
||||
+DEFINE_string(audit_68_host_mem_watch_addrs, "",
|
||||
+ "AUDIT-068: CSV of guest VAs or VA ranges 'START-END' (max 8) "
|
||||
+ "— log every host-side guest-memory write whose guest VA falls "
|
||||
+ "within the configured set.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// Phase A — see kernel/event_log.h.
|
||||
+DEFINE_string(phase_a_event_log_path, "",
|
||||
+ "Phase A: write schema-v1 JSONL event log to this path. "
|
||||
+ "Empty (default) = disabled.",
|
||||
+ "Audit");
|
||||
+DEFINE_bool(phase_a_event_log_mem_writes, false,
|
||||
+ "Phase A: include mem.write events in the JSONL log. RESERVED — "
|
||||
+ "not wired in this phase. Default false.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// Phase D Stage 1 — see kernel/event_log.h `EmitContentionObserved`.
|
||||
+DEFINE_bool(kernel_emit_contention, false,
|
||||
+ "Phase D Stage 1: emit `contention.observed` events when "
|
||||
+ "RtlEnterCriticalSection's spin loop is exhausted and the call "
|
||||
+ "falls through to xeKeWaitForSingleObject. Default false (zero "
|
||||
+ "cost when disabled). Requires --phase_a_event_log_path to be "
|
||||
+ "set as well.",
|
||||
+ "Audit");
|
||||
+
|
||||
+// Phase B — see kernel/phase_b_snapshot.h.
|
||||
+DEFINE_string(phase_b_snapshot_dir, "",
|
||||
+ "Phase B: write 5-file structured state snapshot to "
|
||||
+ "<dir>/canary/ at the moment immediately before the first "
|
||||
+ "guest PPC instruction of entry_point. Empty (default) = "
|
||||
+ "disabled, zero overhead.",
|
||||
+ "Audit");
|
||||
+DEFINE_bool(phase_b_snapshot_and_exit, false,
|
||||
+ "Phase B: after writing the snapshot, exit the process "
|
||||
+ "immediately (std::_Exit(0)) so re-runs are byte-deterministic.",
|
||||
+ "Audit");
|
||||
+DEFINE_bool(phase_b_dump_section_content, false,
|
||||
+ "Phase B: in memory.json, populate section_contents[].content_b64 "
|
||||
+ "with raw bytes of every committed XEX-image region. Default "
|
||||
+ "false — per-region SHA-256 is enough for the routine diff; "
|
||||
+ "this is the escape hatch for the STOP-and-report condition "
|
||||
+ "(image_loaded_sha256 mismatch).",
|
||||
+ "Audit");
|
||||
diff --git a/src/xenia/cpu/cpu_flags.h b/src/xenia/cpu/cpu_flags.h
|
||||
index 38c4f98ba..2b1e1fd9c 100644
|
||||
--- a/src/xenia/cpu/cpu_flags.h
|
||||
+++ b/src/xenia/cpu/cpu_flags.h
|
||||
@@ -35,4 +35,45 @@ DECLARE_bool(break_condition_truncate);
|
||||
|
||||
DECLARE_bool(break_on_debugbreak);
|
||||
|
||||
+// AUDIT-DEMO smoke marker.
|
||||
+DECLARE_bool(audit_demo_setup_trace);
|
||||
+
|
||||
+// AUDIT-061: multi-PC branch probe — emits one log line per fire with
|
||||
+// (pc, lr, cr0 LGE, cr6 LGE, r3, r4, r5, r6, r31, tid). CSV of guest PCs.
|
||||
+DECLARE_string(audit_61_branch_probe_pcs);
|
||||
+
|
||||
+// AUDIT-067: value-watch — emit a log line for each 32-bit guest store whose
|
||||
+// value-to-be-stored matches any configured value. CSV of u32 values
|
||||
+// ("0xDEADBEEF,..."), max 4 entries. Default empty (off); zero cost when empty.
|
||||
+DECLARE_string(audit_67_value_watch);
|
||||
+
|
||||
+// AUDIT-068: host-side memory-write watch — emit a log line for each host-side
|
||||
+// write to guest memory whose VALUE matches any configured u32 value, or whose
|
||||
+// guest VA falls within any configured ADDR or ADDR-range. Mirrors AUDIT-067
|
||||
+// but covers the host-side write paths (xe::store_and_swap<T>, Memory::Zero/
|
||||
+// Fill/Copy) that AUDIT-067's JIT store-opcode hooks cannot see.
|
||||
+//
|
||||
+// VALUES: CSV of u32 values, max 8 entries; e.g. "0x8200A208,0x8200A928".
|
||||
+// ADDRS: CSV of guest VAs or VA ranges, max 8 entries; range form is
|
||||
+// "0xSTART-0xEND" (inclusive). e.g. "0x42500000-0x42600000,0xBCE25340".
|
||||
+// Default empty (off); zero cost on the hot path when both are empty.
|
||||
+DECLARE_string(audit_68_host_mem_watch_values);
|
||||
+DECLARE_string(audit_68_host_mem_watch_addrs);
|
||||
+
|
||||
+// Phase A: JSONL event-log emitter path. When non-empty, the engine writes
|
||||
+// schema-v1 JSONL events to this file. Empty (default) = no overhead, no
|
||||
+// behavior change. Schema: xenia-rs/audit-runs/phase-a-diff-harness/schema-v1.md
|
||||
+DECLARE_string(phase_a_event_log_path);
|
||||
+DECLARE_bool(phase_a_event_log_mem_writes);
|
||||
+
|
||||
+// Phase B: initial-state snapshot. When the dir cvar is non-empty, the
|
||||
+// engine writes a five-file structured state snapshot (cpu_state.json,
|
||||
+// memory.json, kernel.json, vfs.json, config.json, plus manifest.json) to
|
||||
+// `<dir>/canary/` at the moment immediately before the first guest PPC
|
||||
+// instruction of the XEX entry_point executes. See
|
||||
+// `xenia-rs/audit-runs/phase-b-state-equivalence/`.
|
||||
+DECLARE_string(phase_b_snapshot_dir);
|
||||
+DECLARE_bool(phase_b_snapshot_and_exit);
|
||||
+DECLARE_bool(phase_b_dump_section_content);
|
||||
+
|
||||
#endif // XENIA_CPU_CPU_FLAGS_H_
|
||||
diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc
|
||||
index 22ba66aee..571b424f5 100644
|
||||
--- a/src/xenia/memory.cc
|
||||
+++ b/src/xenia/memory.cc
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
#include "xenia/base/assert.h"
|
||||
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
||||
#include "xenia/base/byte_stream.h"
|
||||
#include "xenia/base/clock.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
@@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) {
|
||||
|
||||
static Memory* active_memory_ = nullptr;
|
||||
|
||||
+// AUDIT-068 — process-global accessor (declared in memory.h).
|
||||
+Memory* Memory::active() { return active_memory_; }
|
||||
+
|
||||
void CrashDump() {
|
||||
static std::atomic<int> in_crash_dump(0);
|
||||
if (in_crash_dump.fetch_add(1)) {
|
||||
@@ -151,11 +155,19 @@ Memory::Memory() {
|
||||
uint32_t(xe::memory::allocation_granularity());
|
||||
assert_zero(active_memory_);
|
||||
active_memory_ = this;
|
||||
+
|
||||
+ // AUDIT-068: register host→guest translation thunk so the watch slow path
|
||||
+ // in xenia-base can resolve guest VAs without depending on xenia-core.
|
||||
+ xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t {
|
||||
+ Memory* m = active_memory_;
|
||||
+ return m ? m->HostToGuestVirtual(host_ptr) : 0u;
|
||||
+ };
|
||||
}
|
||||
|
||||
Memory::~Memory() {
|
||||
assert_true(active_memory_ == this);
|
||||
active_memory_ = nullptr;
|
||||
+ xe::audit_68::g_host_to_guest_thunk = nullptr;
|
||||
|
||||
// Uninstall the MMIO handler, as we won't be able to service more
|
||||
// requests.
|
||||
@@ -540,16 +552,48 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const {
|
||||
}
|
||||
|
||||
void Memory::Zero(uint32_t address, uint32_t size) {
|
||||
+ // AUDIT-068: log a single span event with value=0; size is capped at 8 for
|
||||
+ // the value field. Slow path is gated on the atomic flag.
|
||||
+ xe::audit_68::check_guest_va(address, 0,
|
||||
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
||||
+ "Memory::Zero");
|
||||
std::memset(TranslateVirtual(address), 0, size);
|
||||
}
|
||||
|
||||
void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) {
|
||||
+ // Replicate the fill byte across the value field so value_matches can
|
||||
+ // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for
|
||||
+ // capture purposes the byte itself in the low slot is enough.
|
||||
+ uint64_t v = static_cast<uint64_t>(value);
|
||||
+ v |= v << 8;
|
||||
+ v |= v << 16;
|
||||
+ v |= v << 32;
|
||||
+ xe::audit_68::check_guest_va(address, v,
|
||||
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
||||
+ "Memory::Fill");
|
||||
std::memset(TranslateVirtual(address), value, size);
|
||||
}
|
||||
|
||||
void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) {
|
||||
uint8_t* pdest = TranslateVirtual(dest);
|
||||
const uint8_t* psrc = TranslateVirtual(src);
|
||||
+ // We don't know the data without scanning; just log the destination span +
|
||||
+ // first u32 of the source as a value hint. Slow path is gated.
|
||||
+ if (xe::audit_68::g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
|
||||
+ uint64_t v = 0;
|
||||
+ if (size >= 4) {
|
||||
+ // Read big-endian u32 from the source (mirrors how guest sees it).
|
||||
+ v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) |
|
||||
+ (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]);
|
||||
+ } else if (size > 0) {
|
||||
+ for (uint32_t i = 0; i < size; ++i) {
|
||||
+ v = (v << 8) | psrc[i];
|
||||
+ }
|
||||
+ }
|
||||
+ xe::audit_68::check_guest_va(dest, v,
|
||||
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
||||
+ "Memory::Copy");
|
||||
+ }
|
||||
std::memcpy(pdest, psrc, size);
|
||||
}
|
||||
|
||||
diff --git a/src/xenia/memory.h b/src/xenia/memory.h
|
||||
index bd9519a40..fa712fe08 100644
|
||||
--- a/src/xenia/memory.h
|
||||
+++ b/src/xenia/memory.h
|
||||
@@ -347,6 +347,13 @@ class Memory {
|
||||
Memory();
|
||||
~Memory();
|
||||
|
||||
+ // AUDIT-068: process-global Memory singleton accessor. Returns the
|
||||
+ // currently-constructed Memory instance, or nullptr if none. Set inside
|
||||
+ // Memory::Memory()/~Memory(); see memory.cc `active_memory_`. Used by
|
||||
+ // xe::audit_68::check_host_write() to translate a host pointer back to a
|
||||
+ // guest VA without an explicit Memory* context.
|
||||
+ static Memory* active();
|
||||
+
|
||||
// Initializes the memory system.
|
||||
// This may fail if the host address space could not be reserved or the
|
||||
// mapping to the file system fails.
|
||||
|
||||
=== NEW FILE src/xenia/base/audit_68_host_mem_watch_fwd.h ===
|
||||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* AUDIT-068: host-side memory-write watch — forward declarations only.
|
||||
*
|
||||
* Declarations here are intentionally minimal so that xenia/base/memory.h can
|
||||
* include this without pulling in xenia/memory.h (which would create a
|
||||
* circular dependency: xenia-base → xenia-core → xenia-base). The full
|
||||
* definitions live in xenia/audit_68_host_mem_watch.{h,cc} (xenia-core).
|
||||
*
|
||||
* Hot path: callers (the integer specializations of xe::store_and_swap<T>)
|
||||
* load the atomic flag once. When it is 0 (default), no further work is done
|
||||
* — a single relaxed atomic load and a predictable branch.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
||||
#define XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
|
||||
namespace xe {
|
||||
namespace audit_68 {
|
||||
|
||||
// 0 = inactive (default). Non-zero = the cvars have been parsed and at least
|
||||
// one watch is configured. Set lazily by check_host_write_slowpath() on first
|
||||
// call after cvar parsing. Loaded relaxed on the hot path.
|
||||
//
|
||||
// Implementation lives in xenia-base (audit_68_host_mem_watch_base.cc) so
|
||||
// that callers in xenia-base/xenia-cpu/xenia-kernel can resolve the symbol
|
||||
// without depending on xenia-core link order.
|
||||
extern std::atomic<uint32_t> g_active;
|
||||
|
||||
// Host-pointer → guest-VA translation thunk. xenia/memory.cc::Memory::Memory()
|
||||
// registers a function pointer here that wraps Memory::HostToGuestVirtual.
|
||||
// Until set, the slow path falls back to logging the raw host pointer.
|
||||
using HostToGuestThunk = uint32_t (*)(const void*);
|
||||
extern HostToGuestThunk g_host_to_guest_thunk;
|
||||
|
||||
// Slow path. Only invoked when g_active is non-zero. Implementation in
|
||||
// xenia/base/audit_68_host_mem_watch_base.cc (xenia-base).
|
||||
//
|
||||
// host_ptr: the host pointer being written (from store_and_swap's `mem`).
|
||||
// value: the value being stored (zero-extended to u64).
|
||||
// size: 1, 2, 4 or 8.
|
||||
// tag: caller-provided tag string (e.g. "store_and_swap<u32>"). Logged
|
||||
// verbatim, no formatting. Must be a static string (lifetime
|
||||
// beyond this call).
|
||||
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
|
||||
uint8_t size, const char* tag);
|
||||
|
||||
// Same as above, but with a known guest VA (for callers like Memory::Zero/
|
||||
// Fill/Copy that have the VA but not a single host pointer).
|
||||
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
|
||||
const char* tag);
|
||||
|
||||
// Inline hot-path wrappers. Single relaxed atomic load + branch when inactive.
|
||||
inline void check_host_write(const void* host_ptr, uint64_t value, uint8_t size,
|
||||
const char* tag) {
|
||||
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
|
||||
check_host_write_slowpath(host_ptr, value, size, tag);
|
||||
}
|
||||
}
|
||||
|
||||
inline void check_guest_va(uint32_t guest_va, uint64_t value, uint8_t size,
|
||||
const char* tag) {
|
||||
if (g_active.load(std::memory_order_relaxed) != 0) [[unlikely]] {
|
||||
check_guest_va_slowpath(guest_va, value, size, tag);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace audit_68
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_BASE_AUDIT_68_HOST_MEM_WATCH_FWD_H_
|
||||
|
||||
=== NEW FILE src/xenia/base/audit_68_host_mem_watch_base.cc ===
|
||||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* AUDIT-068 host-side memory-write watch — implementation (xenia-base).
|
||||
*
|
||||
* Mirrors AUDIT-067 in spirit (value-CSV cvar, lazy parse, atomic-bool
|
||||
* activation) but observes the HOST-side write paths instead of the JIT'd
|
||||
* guest store opcodes. Captures writes performed by xe::store_and_swap<T>
|
||||
* (xenia/base/memory.h) and by Memory::Zero/Fill/Copy (xenia/memory.cc).
|
||||
*
|
||||
* Lives in xenia-base so that the slow-path symbols resolve for callers in
|
||||
* xenia-base / xenia-cpu / xenia-kernel without depending on xenia-core link
|
||||
* order. The host→guest VA translation is provided by a function-pointer
|
||||
* thunk that xenia::Memory::Memory() registers at construction.
|
||||
*
|
||||
* See xenia/base/audit_68_host_mem_watch_fwd.h for the API.
|
||||
* See xenia/cpu/cpu_flags.{h,cc} for the cvars.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/threading.h"
|
||||
|
||||
// We need the cvars but cpu_flags.h lives in xenia-cpu. To avoid an upward
|
||||
// dep we re-declare them here with the same macros — cvar.h's DECLARE_*
|
||||
// macros are header-safe (just `extern` declarations) and resolve against the
|
||||
// definitions in xenia-cpu/cpu_flags.cc at link time. (xenia-cpu links AFTER
|
||||
// xenia-base in the executable; symbols in xenia-cpu/cpu_flags.cc are still
|
||||
// resolvable from xenia-base translation units because the lld pass folds
|
||||
// all libraries together at the executable level.)
|
||||
DECLARE_string(audit_68_host_mem_watch_values);
|
||||
DECLARE_string(audit_68_host_mem_watch_addrs);
|
||||
|
||||
namespace xe {
|
||||
namespace audit_68 {
|
||||
|
||||
// Hot-path flag (declared in fwd header). Initial sentinel UINT32_MAX means
|
||||
// "unparsed"; the very first slow-path call invokes ensure_parsed() which
|
||||
// replaces the sentinel with the actual active bitmask (0 if both cvars are
|
||||
// empty, 1/2/3 otherwise). After that, hot-path calls observe the real value
|
||||
// and bail out cheaply when off.
|
||||
std::atomic<uint32_t> g_active{0xFFFFFFFFu};
|
||||
|
||||
// Host→guest VA translation thunk (declared in fwd header). Set by
|
||||
// xenia::Memory::Memory() at construction; reset to nullptr by ~Memory().
|
||||
HostToGuestThunk g_host_to_guest_thunk{nullptr};
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr size_t kMaxValues = 8;
|
||||
constexpr size_t kMaxAddrRanges = 8;
|
||||
|
||||
struct AddrRange {
|
||||
uint32_t start; // inclusive
|
||||
uint32_t end; // inclusive
|
||||
};
|
||||
|
||||
std::vector<uint32_t> g_values;
|
||||
std::vector<AddrRange> g_addrs;
|
||||
std::once_flag g_parsed_flag;
|
||||
|
||||
std::chrono::steady_clock::time_point g_t0;
|
||||
std::once_flag g_t0_once;
|
||||
|
||||
int64_t host_ns_since_start() {
|
||||
std::call_once(g_t0_once,
|
||||
[]() { g_t0 = std::chrono::steady_clock::now(); });
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
std::chrono::steady_clock::now() - g_t0)
|
||||
.count();
|
||||
}
|
||||
|
||||
void trim(std::string& s) {
|
||||
while (!s.empty() && (s.front() == ' ' || s.front() == '\t')) {
|
||||
s.erase(s.begin());
|
||||
}
|
||||
while (!s.empty() && (s.back() == ' ' || s.back() == '\t')) {
|
||||
s.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
bool parse_u32(const std::string& tok, uint32_t* out) {
|
||||
try {
|
||||
*out = static_cast<uint32_t>(std::stoul(tok, nullptr, 0));
|
||||
return true;
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void parse_values_csv(const std::string& csv) {
|
||||
size_t pos = 0;
|
||||
while (pos < csv.size() && g_values.size() < kMaxValues) {
|
||||
size_t end = csv.find(',', pos);
|
||||
std::string tok = csv.substr(pos, end - pos);
|
||||
trim(tok);
|
||||
if (!tok.empty()) {
|
||||
uint32_t v;
|
||||
if (parse_u32(tok, &v)) {
|
||||
g_values.push_back(v);
|
||||
}
|
||||
}
|
||||
if (end == std::string::npos) break;
|
||||
pos = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void parse_addrs_csv(const std::string& csv) {
|
||||
size_t pos = 0;
|
||||
while (pos < csv.size() && g_addrs.size() < kMaxAddrRanges) {
|
||||
size_t end = csv.find(',', pos);
|
||||
std::string tok = csv.substr(pos, end - pos);
|
||||
trim(tok);
|
||||
if (!tok.empty()) {
|
||||
size_t dash = tok.find('-', 2); // skip leading "0x" if present
|
||||
AddrRange r{};
|
||||
if (dash != std::string::npos) {
|
||||
std::string s = tok.substr(0, dash);
|
||||
std::string e = tok.substr(dash + 1);
|
||||
trim(s);
|
||||
trim(e);
|
||||
uint32_t a, b;
|
||||
if (parse_u32(s, &a) && parse_u32(e, &b)) {
|
||||
r.start = a;
|
||||
r.end = b;
|
||||
g_addrs.push_back(r);
|
||||
}
|
||||
} else {
|
||||
uint32_t a;
|
||||
if (parse_u32(tok, &a)) {
|
||||
r.start = a;
|
||||
r.end = a + 7;
|
||||
g_addrs.push_back(r);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (end == std::string::npos) break;
|
||||
pos = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void parse_locked() {
|
||||
parse_values_csv(cvars::audit_68_host_mem_watch_values);
|
||||
parse_addrs_csv(cvars::audit_68_host_mem_watch_addrs);
|
||||
|
||||
uint32_t bits = 0;
|
||||
if (!g_values.empty()) bits |= 0x1;
|
||||
if (!g_addrs.empty()) bits |= 0x2;
|
||||
g_active.store(bits, std::memory_order_release);
|
||||
|
||||
XELOGI(
|
||||
"AUDIT-068-INIT values_csv=\"{}\" addrs_csv=\"{}\" values_parsed={} "
|
||||
"addr_ranges_parsed={} active=0x{:X}",
|
||||
cvars::audit_68_host_mem_watch_values,
|
||||
cvars::audit_68_host_mem_watch_addrs, g_values.size(), g_addrs.size(),
|
||||
bits);
|
||||
for (size_t i = 0; i < g_values.size(); ++i) {
|
||||
XELOGI("AUDIT-068-INIT value[{}] = 0x{:08X}", i, g_values[i]);
|
||||
}
|
||||
for (size_t i = 0; i < g_addrs.size(); ++i) {
|
||||
XELOGI("AUDIT-068-INIT addr_range[{}] = 0x{:08X}-0x{:08X}", i,
|
||||
g_addrs[i].start, g_addrs[i].end);
|
||||
}
|
||||
}
|
||||
|
||||
bool value_matches(uint64_t value, uint8_t size) {
|
||||
for (uint32_t v : g_values) {
|
||||
if (size >= 4 && static_cast<uint32_t>(value) == v) return true;
|
||||
if (size == 8 && static_cast<uint32_t>(value >> 32) == v) return true;
|
||||
if (size == 2 && (v & 0xFFFF) == (value & 0xFFFF)) return true;
|
||||
if (size == 1 && (v & 0xFF) == (value & 0xFF)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool addr_matches(uint32_t guest_va, uint8_t size) {
|
||||
uint32_t lo = guest_va;
|
||||
uint32_t hi = guest_va + (size ? size - 1 : 0);
|
||||
for (const auto& r : g_addrs) {
|
||||
if (lo <= r.end && hi >= r.start) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t current_tid() { return xe::threading::current_thread_id(); }
|
||||
|
||||
void emit(uint32_t guest_va, const void* host_ptr, uint64_t value,
|
||||
uint8_t size, const char* tag) {
|
||||
XELOGI(
|
||||
"AUDIT-068-HOST-WRITE guest_va=0x{:08X} host_ptr=0x{:016X} "
|
||||
"val=0x{:016X} sz={} fn={} host_ns={} tid={}",
|
||||
guest_va, reinterpret_cast<uintptr_t>(host_ptr), value,
|
||||
static_cast<uint32_t>(size), tag ? tag : "<null>",
|
||||
host_ns_since_start(), current_tid());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void ensure_parsed() { std::call_once(g_parsed_flag, parse_locked); }
|
||||
|
||||
void check_host_write_slowpath(const void* host_ptr, uint64_t value,
|
||||
uint8_t size, const char* tag) {
|
||||
ensure_parsed();
|
||||
uint32_t active = g_active.load(std::memory_order_acquire);
|
||||
if (active == 0) return;
|
||||
|
||||
uint32_t guest_va = 0;
|
||||
HostToGuestThunk thunk = g_host_to_guest_thunk;
|
||||
if (thunk) {
|
||||
guest_va = thunk(host_ptr);
|
||||
}
|
||||
|
||||
bool hit = false;
|
||||
if ((active & 0x1) && value_matches(value, size)) hit = true;
|
||||
if (!hit && (active & 0x2) && thunk && addr_matches(guest_va, size)) {
|
||||
hit = true;
|
||||
}
|
||||
if (!hit) return;
|
||||
|
||||
emit(guest_va, host_ptr, value, size, tag);
|
||||
}
|
||||
|
||||
void check_guest_va_slowpath(uint32_t guest_va, uint64_t value, uint8_t size,
|
||||
const char* tag) {
|
||||
ensure_parsed();
|
||||
uint32_t active = g_active.load(std::memory_order_acquire);
|
||||
if (active == 0) return;
|
||||
|
||||
bool hit = false;
|
||||
if ((active & 0x1) && value_matches(value, size)) hit = true;
|
||||
if (!hit && (active & 0x2) && addr_matches(guest_va, size)) hit = true;
|
||||
if (!hit) return;
|
||||
|
||||
emit(guest_va, nullptr, value, size, tag);
|
||||
}
|
||||
|
||||
} // namespace audit_68
|
||||
} // namespace xe
|
||||
@@ -0,0 +1,81 @@
|
||||
# AUDIT-068 Session 1 — host-side memory-write watch (canary instrumentation)
|
||||
|
||||
Date: 2026-05-19
|
||||
|
||||
## Goal
|
||||
|
||||
Capture which host C++ functions perform the writes to guest memory that ours never reproduces:
|
||||
1. Vtable install at `0xBCE25340 = 0x8200A208` (and clone `0x8200A928`) — gates `sub_825070F0`.
|
||||
2. Voice-struct field clear `[VOICE+0x164]` (value `0x00000000`, on guest-VA likely in heap `0x425xxxxx`).
|
||||
3. Anything else surfaced.
|
||||
|
||||
## Write-path surface inventory (canary)
|
||||
|
||||
### A. `xe::store_and_swap<T>` template family (`xenia-canary/src/xenia/base/memory.h:410-475`)
|
||||
|
||||
- Sized specializations for T = int8/uint8/int16/uint16/int32/uint32/int64/uint64/float/double.
|
||||
- String specializations recurse to `store_and_swap<uint8_t>` / `<uint16_t>`.
|
||||
- Receives `void* mem` = HOST pointer; does `*p = byte_swap(value)`.
|
||||
- **This is the canonical path** for host-side typed writes to guest memory used by kernel-import handlers in `xboxkrnl_*.cc`. Confirmed wide use (16 kernel sub-modules call `store_and_swap<uint32_t>` alone).
|
||||
- Vtable install (PPC `stw vptr,0(obj)` equivalent on host side) almost certainly uses `store_and_swap<uint32_t>(host_ptr, vptr)`.
|
||||
|
||||
### B. `Memory::Zero/Fill/Copy` (`xenia/memory.cc:542-554`)
|
||||
|
||||
- Use `std::memset`/`std::memcpy` directly via host pointer (after `TranslateVirtual`).
|
||||
- Wrappers for `RtlZeroMemory`, `RtlFillMemory`, `RtlMoveMemory`, `RtlCopyMemory`.
|
||||
- Bypass `store_and_swap` — must instrument separately if we want full coverage.
|
||||
- Voice-struct clears via `0x00000000` could plausibly come through here (RtlZeroMemory) or directly via `store_and_swap<uint32_t>` (typed write).
|
||||
|
||||
### C. Direct guest writes via `*TranslateVirtual<T*>() = …`
|
||||
|
||||
- Some sites cast and write through the host pointer directly without going through `store_and_swap`.
|
||||
- Lower coverage priority — start with A+B; add C only if first 2 don't catch our targets.
|
||||
|
||||
## Cvar design (mimics audit_67 pattern)
|
||||
|
||||
Two new cvars in `xenia/cpu/cpu_flags.{h,cc}`:
|
||||
|
||||
```cpp
|
||||
DECLARE_string(audit_68_host_mem_watch_values); // CSV of u32 values (max 8)
|
||||
DECLARE_string(audit_68_host_mem_watch_addrs); // CSV of guest VAs or VA ranges (max 8)
|
||||
```
|
||||
|
||||
Format examples:
|
||||
- Values: `--audit_68_host_mem_watch_values=0x8200A208,0x8200A928`
|
||||
- Addrs single: `--audit_68_host_mem_watch_addrs=0xBCE25340`
|
||||
- Addrs range: `--audit_68_host_mem_watch_addrs=0x42500000-0x42600000,0xBCE25340`
|
||||
|
||||
Default empty → zero overhead.
|
||||
|
||||
Sample log line (XELOGI):
|
||||
```
|
||||
AUDIT-068-HOST-WRITE guest_va=0xBCE25340 val=0x8200A208 sz=4 fn=<host_function> host_ns=10123456789 tid=N
|
||||
```
|
||||
|
||||
`fn=<host_function>` is filled by the caller (each `store_and_swap<T>` specialization passes `__FUNCTION__` or a tag). We can't get a real backtrace cheaply across MSVC; we instead instrument the high-fanout entry points (kernel-import handlers, `Memory::Zero/Fill/Copy`) with a string tag. For Session 1, capture is sufficient with just template name + caller tag.
|
||||
|
||||
## Implementation strategy
|
||||
|
||||
1. New file `xenia/audit_68_host_mem_watch.h` (top-level): forward decls of helper functions in `namespace xe::audit_68`:
|
||||
```cpp
|
||||
extern std::atomic<uint8_t> g_active; // 0=off, 1=values, 2=addrs, 3=both
|
||||
void check_host_write(const void* host_ptr, uint64_t value, uint8_t size,
|
||||
const char* tag);
|
||||
void check_guest_write(uint32_t guest_va, uint64_t value, uint8_t size,
|
||||
const char* tag);
|
||||
```
|
||||
2. New file `xenia/audit_68_host_mem_watch.cc`: lazy-parse the cvars on first call, atomic-bool sets active. Performs `Memory::active()->HostToGuestVirtual(host_ptr)` translation, then matches against value-list and addr-range list, emits XELOGI.
|
||||
3. `xenia/memory.h`: add public static `Memory::active()` (returns `active_memory_`).
|
||||
4. `xenia/base/memory.h`: extend `store_and_swap<T>` specializations (uint8/uint16/uint32/uint64 only — the integer typed paths most likely to write vptrs / clear flags) to check `g_active` and call the helper. Hot path: 1 atomic load + branch when off. The added cost when on is one cmp+jne per byte/word/dword/qword store; acceptable for capture runs.
|
||||
5. `xenia/memory.cc`: instrument `Memory::Zero/Fill/Copy` with calls to `check_guest_write` (each ranges through the affected guest VAs; for capture purposes we only log the first matching byte+size+tag — we don't expand to per-byte events).
|
||||
|
||||
Total estimated LOC: ~120-160 LOC across 5 files.
|
||||
|
||||
## Capture protocol
|
||||
|
||||
- Build canary with the new code.
|
||||
- Smoke test: `--audit_68_host_mem_watch_values=0x12345678` (no expected hits) → confirm no spurious lines, build/init OK.
|
||||
- Sanity test: `--audit_68_host_mem_watch_values=0x82000000` (very common vtable-base) → confirm many lines, then revert.
|
||||
- Run 1 (vtable install): `--audit_68_host_mem_watch_values=0x8200A208,0x8200A928 --mute=true`. Kill after ~90s.
|
||||
- Run 2 (voice-struct clear): `--audit_68_host_mem_watch_addrs=0x42500000-0x42600000 --mute=true`. Kill after ~30s (this is heap-region wide; likely lots of hits, capture early window only). May need narrower range once we see the first writes.
|
||||
- Cold-protocol: backup canary's cache (`xenia-canary/build-cross/bin/Windows/Debug/cache/`) to `/tmp/canary-cache-bak-audit-068`, wipe before run, restore after.
|
||||
94
audit-runs/audit-068-host-mem-watch/session-2-plan.md
Normal file
94
audit-runs/audit-068-host-mem-watch/session-2-plan.md
Normal file
@@ -0,0 +1,94 @@
|
||||
# AUDIT-068 Session 2 plan
|
||||
|
||||
Date authored: 2026-05-19 (end of Session 1).
|
||||
|
||||
## Session 1 outcome recap
|
||||
|
||||
The Session 1 instrumentation is in place and proven to work (1,639 sanity hits for value=0). The two target writers — vtable install at `0xBCE25340 = 0x8200A208` and voice-struct clear `[VOICE+0x164]=0` — produced 0 hits each.
|
||||
|
||||
The negative result narrows the search space: neither writer goes through `xe::store_and_swap<T>`, `xe::store<T>`, or `Memory::Zero/Fill/Copy`. The remaining un-hooked host-side write surfaces are:
|
||||
|
||||
1. `Memory::TranslateVirtual<T*>(va)` followed by **raw pointer assignment or `memcpy`** (the XEX loader pattern; appears throughout `xenia/cpu/xex_module.cc` and many kernel-import handlers).
|
||||
2. `xe::be<T>* p = …; *p = value;` — typed big-endian wrappers; assignment goes through `byte_swap` but does NOT invoke `store_and_swap`.
|
||||
3. `xe::TranslateVirtualBE<T>(va)` returning a `be<T>*` followed by assignment.
|
||||
|
||||
## Session 2 — extension of canary instrumentation
|
||||
|
||||
### Step 1: Hook the `xe::be<T>::operator=` family
|
||||
|
||||
In `xenia/base/byte_order.h` (find the `be<T>` template's `operator=`). Add a `check_host_write(this, value, sizeof(T), "be<T>::op=")` call before the store. Cost when off: one relaxed atomic load.
|
||||
|
||||
This catches the most common kernel-handler pattern:
|
||||
```cpp
|
||||
auto* p = memory()->TranslateVirtual<X_THING*>(addr);
|
||||
p->field = some_value; // be<u32>::operator=(some_value)
|
||||
```
|
||||
|
||||
### Step 2: Optionally hook `Memory::Copy` byte-by-byte for value matches
|
||||
|
||||
Current behavior: `Memory::Copy` only checks the first u32 of the source. Replace with a scan over the source bytes for every 4-byte aligned position, comparing against the configured value list (cap N=8 makes this cheap). This catches XEX loader memcpys that write a vptr embedded in a section.
|
||||
|
||||
Tradeoff: when watching value=0x00000000 with a large copy, this triggers many spurious hits. Solution: do the scan ONLY when the value list is non-empty (already gated on `g_active & 1`).
|
||||
|
||||
### Step 3: Add a `Memory::WriteWord32(addr, value)` shim and route XEX loader's memcpys through it
|
||||
|
||||
Two options:
|
||||
- (A) Wrap every `xex_module.cc` memcpy with a pre-scan that calls `check_guest_va(addr+i, *(uint32_t*)(src+i), 4, "xex_memcpy")` for each aligned 4-byte position. Localized change in `xex_module.cc`, ~10 LOC.
|
||||
- (B) Add a generic `Memory::CopyWithWatch` wrapper. Less invasive at the call sites but requires a parallel API.
|
||||
|
||||
Recommend (A) for Session 2 — surgical, scoped to the one source file.
|
||||
|
||||
### Step 4: Re-run the two captures from Session 1
|
||||
|
||||
Same cmdlines, expect non-zero hits this time. Specifically expect:
|
||||
|
||||
- **Run 1 (vtable install)**: at least one hit on a `xex_memcpy` write of `0x8200A208` into the heap region. If still 0, the install is a synthesized runtime computation by some kernel handler — at that point, add a process-wide allocator probe (log every `MmAllocatePhysicalMemoryEx` return and tag it; cross-reference with subsequent writes).
|
||||
- **Run 2 (voice-struct clear)**: depends on where the voice struct actually lives. Likely needs a guest-side memory probe FIRST (read voice struct base via `xeAudioGetVoice…` reflection) to find the exact heap region, THEN addr-watch over that.
|
||||
|
||||
### Step 5: Cross-reference each hit with ours's exports.rs
|
||||
|
||||
For every captured writer fn name, locate the matching handler in `xenia-rs/crates/xenia-kernel/src/exports.rs`:
|
||||
- If the handler exists but doesn't emit the write: Session 2's fix is to add the write in ours.
|
||||
- If the handler is missing entirely: Session 2's fix is to implement the handler.
|
||||
|
||||
For the XEX-loader memcpy case (most likely catch for vtable install): the analog in ours is `xenia-rs/crates/xenia-kernel/src/loader.rs` (or `xenia-cpu`/`xenia-binary`'s XEX module loader). Verify ours's section-loading code paths.
|
||||
|
||||
### Step 6: Predicted progression-metric impact
|
||||
|
||||
- If vtable `0x8200A208` install is identified and mirrored in ours: enables `sub_825070F0` to fire (per AUDIT-058/063/067), which spawns 4 worker threads (tid=27/28/29 + one unresumed in canary). This is THE keystone gap per Phase NonMatch.
|
||||
- If voice-struct clear is identified: removes the XAudio callback's blocking-wait path (per Phase HostAudio-Eager) so tid=14/15 sister chains catch up.
|
||||
- Combined: closes ~60% of the missing event volume (XAudio) + the sub_825070F0 worker fan-out.
|
||||
|
||||
### Risks / unknowns
|
||||
|
||||
1. **`be<T>::operator=` is everywhere**. The hot-path overhead matters less for capture runs (cvar-on) but adds atomic loads to EVERY guest-memory typed assignment in canary. If it bloats the build's runtime even when off, gate the hook behind a build-time `#ifdef XENIA_AUDIT_68`. Default should still be ON-by-build for the canary debug binary used as oracle.
|
||||
2. **The vptr install may be conditional / data-driven**. If the install runs only after some guest call sequence that ours doesn't reach (because ours's earlier state is divergent), then capturing the install in canary tells us WHAT writes it but Session 2 still needs to figure out WHY ours's path diverges before the install. This is the Phase NonMatch-style upstream-divergence problem.
|
||||
3. **Cold-boot determinism**: cache wipe + restore protocol (per memory #31/#32/#33/#34) must be honored across runs. Session 1 used backup `/tmp/canary-cache-bak-audit-068`.
|
||||
|
||||
### LOC budget
|
||||
|
||||
Steps 1-3 combined: estimated 60-90 LOC additive on canary, plus testing. Step 5 cross-referencing is purely investigative (no LOC).
|
||||
|
||||
### Cascade prediction (Session 2)
|
||||
|
||||
- A=catch the vtable installer: ~75% (raises from Session 1's 0% by widening coverage to `be<T>::op=` + `xex_memcpy`).
|
||||
- B=catch the voice-struct clearer: ~50% (depends on knowing the right addr range).
|
||||
- C=identify the ours-side gap for the vtable install: ~70% if A succeeds.
|
||||
- D=Session 3 lands the ours-side fix and progression metric moves: ~40-50%.
|
||||
|
||||
## Session 2 deliverable
|
||||
|
||||
- `audit-runs/audit-068-host-mem-watch/run3-vtable-extended.log` — vtable run with new hooks.
|
||||
- `audit-runs/audit-068-host-mem-watch/run4-voice-struct-extended.log` — voice struct run.
|
||||
- `audit-runs/audit-068-host-mem-watch/writer-report-v2.md` — annotated writer set + per-writer ours-side analog.
|
||||
- `audit-runs/audit-068-host-mem-watch/fix-canary-v2.diff` — extended canary instrumentation.
|
||||
- `memory/project_audit_068_session2_2026_05_XX.md` — memory entry.
|
||||
- `MEMORY.md` index update.
|
||||
|
||||
## Discipline
|
||||
|
||||
- `--mute=true` every canary run.
|
||||
- Wipe both cache locations before each cold run (`xenia-canary/build-cross/bin/Windows/Debug/cache` + `~/.local/share/Xenia/cache` if present).
|
||||
- Restore canary cache from `/tmp/canary-cache-bak-audit-068` at session end.
|
||||
- No modifications to ours source.
|
||||
- Keep canary instrumentation purely additive + cvar-gated default-off (parser-lazy via UINT32_MAX sentinel pattern landed in Session 1).
|
||||
178
audit-runs/audit-068-host-mem-watch/writer-report-v2.md
Normal file
178
audit-runs/audit-068-host-mem-watch/writer-report-v2.md
Normal file
@@ -0,0 +1,178 @@
|
||||
# AUDIT-068 Session 2 — writer report (extended coverage)
|
||||
|
||||
Date: 2026-05-19
|
||||
|
||||
## Summary
|
||||
|
||||
Session 2 extends Session 1's host-side write watch from `xe::store_and_swap<T>` + `xe::store<T>` + `Memory::Zero/Fill/Copy` to ALSO cover:
|
||||
1. **`xe::endian_store<T,E>::set()`** (the underlying impl of `xe::be<T>`/`xe::le<T>`), gated on `Memory::Memory()` having registered the host→guest thunk so static-init order doesn't race the cvar.
|
||||
2. **`Memory::Copy` full byte-scan** over every 4-byte-aligned source offset (gated on `g_active & 0x1`).
|
||||
3. **XEX loader memcpy/lzx_decompress pre-scan** at 4 sites in `xenia/cpu/xex_module.cc` (patch-memcpy, uncompressed-image memcpy, basic-block memcpy, LZX-decompress output).
|
||||
|
||||
The static-init gate proved load-bearing: my initial Run 5 (XEX section sanity) produced 0 hits because `endian_store::set()` was fired during static-init before `cvars::audit_68_host_mem_watch_*` objects were constructed; `parse_locked()` ran with empty strings and permanently latched `g_active=0`. Fix: defer parse until `g_host_to_guest_thunk` is non-null (set inside `Memory::Memory()`).
|
||||
|
||||
## LOC added (canary only)
|
||||
|
||||
| File | LOC delta | Purpose |
|
||||
|---|---:|---|
|
||||
| `src/xenia/base/byte_order.h` | +27 | `endian_store::set()` hook (gated on `g_host_to_guest_thunk != nullptr`) + `#include <type_traits>` + `#include "audit_68_host_mem_watch_fwd.h"` |
|
||||
| `src/xenia/memory.cc` | +35 / -17 | `Memory::Copy` byte-scan over 4-byte-aligned source positions; preserves addr-only coarse event |
|
||||
| `src/xenia/cpu/xex_module.cc` | +35 | Inline helper `audit68_prescan_memcpy()` + wraps at sites 427 (patch image), 592 (uncompressed exe load), 668 (basic-block memcpy), 840 (post-`lzx_decompress` scan of guest-image bytes) |
|
||||
| `src/xenia/base/audit_68_host_mem_watch_base.cc` | +12 | Static-init gate in `check_host_write_slowpath` and `check_guest_va_slowpath` |
|
||||
| **Total** | **~110 LOC additive** (cvar-gated; zero cost when off, modest cost when on) | |
|
||||
|
||||
xenia-rs HEAD `e6d43a23ac393004d2e5adf2f0395fd0b5e6448b` UNCHANGED.
|
||||
|
||||
## Captures
|
||||
|
||||
All runs cold-boot (cache wipe before each), `--mute=true`, against the Sylpheed ISO.
|
||||
|
||||
### Run 5 — XEX .text region sanity (validates Step 3)
|
||||
|
||||
Cmdline: `--audit_68_host_mem_watch_addrs=0x82000000-0x82010000 --mute=true`. 70 s wallclock.
|
||||
|
||||
**Result: 1 hit, in INIT line + 1 HOST-WRITE.** This is the Step 3 validation — Session 1's smoking-gun absence of writes to the XEX `.text` region IS now caught.
|
||||
|
||||
```
|
||||
i> 00000114 AUDIT-068-INIT values_csv="" addrs_csv="0x82000000-0x82010000" values_parsed=0 addr_ranges_parsed=1 active=0x2
|
||||
i> 00000114 AUDIT-068-INIT addr_range[0] = 0x82000000-0x82010000
|
||||
i> 00000114 AUDIT-068-HOST-WRITE guest_va=0x82000000 host_ptr=0x0000000000000000 val=0x000000004D5A9000 sz=8 fn=xex_lzx_decompress_output host_ns=300 tid=276
|
||||
```
|
||||
|
||||
The value `0x4D5A9000` is the BE-encoded first 4 bytes of the XEX image: `"MZ\x90\x00"` = PE/EXE magic. Exactly as expected — `lzx_decompress` writes the decoded image starting at `base_address_=0x82000000`. **Session 1's reading-error class #35 is now mitigated**.
|
||||
|
||||
Note: only ONE hit appears (the coarse addr-only event for the start of the lzx output region) because the addr-range `0x82000000-0x82010000` intersects only the head of the ~2 MB decompress span. The per-4-byte value loop is skipped (no values configured, `active & 0x1 == 0`).
|
||||
|
||||
### Run 3 — vtable `0x8200A208 / 0x8200A928` writers (extended)
|
||||
|
||||
Cmdline: `--audit_68_host_mem_watch_values=0x8200A208,0x8200A928,0x080082A2,0x2829820 --audit_68_host_mem_watch_addrs=0xBCE25340 --mute=true`. 90 s wallclock.
|
||||
|
||||
**Result: 0 HOST-WRITE hits** (INIT lines present; `active=0x3`). Boot reaches tid=29 spawn (post-Phase-NonMatch trigger window).
|
||||
|
||||
```
|
||||
i> 00000114 AUDIT-068-INIT values_csv="0x8200A208,0x8200A928,0x080082A2,0x2829820" addrs_csv="0xBCE25340" values_parsed=4 addr_ranges_parsed=1 active=0x3
|
||||
i> 00000114 AUDIT-068-INIT value[0] = 0x8200A208
|
||||
i> 00000114 AUDIT-068-INIT value[1] = 0x8200A928
|
||||
i> 00000114 AUDIT-068-INIT value[2] = 0x080082A2
|
||||
i> 00000114 AUDIT-068-INIT value[3] = 0x02829820
|
||||
i> 00000114 AUDIT-068-INIT addr_range[0] = 0xBCE25340-0xBCE25347
|
||||
```
|
||||
|
||||
**Critical implication**: with Session 2's extended coverage, NONE of the following surfaces ever wrote the target value or to the target VA in canary's full boot:
|
||||
- `xe::store_and_swap<T>` (T = u8/u16/u32/u64/i8/i16/i32/i64)
|
||||
- `xe::store<T>` (host-endian sibling)
|
||||
- `Memory::Zero/Fill/Copy` (incl. full byte-scan in `Memory::Copy`)
|
||||
- `xe::endian_store<T,E>::set()` (the underlying `be<T>`/`le<T>` write path)
|
||||
- XEX loader memcpy at 4 sites + `lzx_decompress` output
|
||||
|
||||
AUDIT-067 already ruled out all 16 PPC JIT'd store opcodes (stw/stwu/stwx/stwux/stwbrx/stwcx./stmw/std/stdu/stdux/stdx/stdbrx/stdcx./stvx/stvxl/stvewx). Combined verdict: **`0xBCE25340` is never explicitly written via any known canonical write surface**. Yet `sub_825070F0` reads `[0xBCE25340]=0x8200A208` per AUDIT-058/063/067 trigger fire. New search candidates listed below.
|
||||
|
||||
### Run 4 — voice-struct field clear extended
|
||||
|
||||
Cmdline: `--audit_68_host_mem_watch_addrs=0x42500000-0x42600000 --mute=true`. 60 s wallclock.
|
||||
|
||||
**Result: 0 HOST-WRITE hits** (INIT lines present; `active=0x2`).
|
||||
|
||||
Per Session 1 plan, the addr range `0x42500000-0x42600000` was a guess. With Session 2's extended coverage it remains a guess — voice struct base is unknown. Next step (Session 3+): instrument canary's `XAudio2AudioDriver::CreateVoice` (or equivalent) to log the heap region holding the voice array, then re-run with that range.
|
||||
|
||||
### Sanity (value=0) — confirms full-surface coverage
|
||||
|
||||
Cmdline: `--audit_68_host_mem_watch_values=0x00000000 --mute=true`. 20 s wallclock.
|
||||
|
||||
**Result: 78,738 hits** across all hooked surfaces:
|
||||
|
||||
| Surface | Hits | Notes |
|
||||
|---|---:|---|
|
||||
| `xex_lzx_decompress_output` | 78,655 | Every 4-byte-zero u32 in the LZX-decompressed Sylpheed image (.bss/.padding) |
|
||||
| `Memory::Zero` | 39 | Heap-page zero on Memory::Initialize + stack zeros |
|
||||
| `be<T>::set` | 35 | **NEW hook — proves Step 1 works.** Header writes from `kernel_state.cc` / `xboxkrnl_threading.cc` etc. |
|
||||
| `store_and_swap<u32>` | 5 | TIB/kernel-pointer init (same as Session 1) |
|
||||
| `Memory::Fill` | 4 | RtlFillMemory equivalents |
|
||||
|
||||
Session 1 sanity was 1,639 hits — Session 2 covers ~48× more surface area, validating that the new hooks fire correctly during boot.
|
||||
|
||||
## Headline finding
|
||||
|
||||
Session 2 expanded the host-write watch from **~5 surfaces** (store_and_swap, store, Memory::Zero/Fill/Copy) to **~9 surfaces** (+ be<T>::set, + xex_module memcpy at 4 sites, + lzx_decompress output). Sanity went from 1,639 → 78,738 hits, validating the new hooks.
|
||||
|
||||
**Despite this expansion**, the vtable install at `[0xBCE25340] = 0x8200A208` STILL produces 0 hits across canary's full boot. Combined with AUDIT-067's 16 PPC JIT store hooks producing 0 hits, the install path is officially OUTSIDE the known canonical write surfaces. Possible remaining paths (Session 3+ search space):
|
||||
|
||||
1. **Direct `*reinterpret_cast<T*>(host_ptr) = value`** in kernel-import handlers (raw pointer assignment, bypassing `xe::be<T>::set()`, `xe::store_and_swap`, and `Memory::*`). Audit needs ripgrep on `kernel/xboxkrnl/*.cc` for patterns matching the above.
|
||||
2. **Allocator-side initial-state writes** — `MmAllocatePhysicalMemoryEx` returning a block that already contains the value from a prior committed-but-deallocated page (cross-page artifact). Memory protection routines (`MmSetAllocationProtect` etc.) may also mutate.
|
||||
3. **GPU/HostMemory mmio mappings** — D3D12 backbuffer / texture upload may write to guest VA ranges directly via mapped allocations.
|
||||
4. **VFS file readback into guest VA** — `NtReadFile` writes the file contents into guest memory via `Memory::Copy` (now scanned) OR via a direct `memcpy(host_ptr, src, n)` in `xfile.cc`/host_path_file.cc. Need to audit those.
|
||||
5. **Kernel-import handler using a typed POD struct copy** — e.g. `*reinterpret_cast<X_FOO*>(host_ptr) = X_FOO{...}` where memberwise assignment runs through neither `be<T>::set()` (because POD struct copy uses memcpy semantics) nor `store_and_swap`.
|
||||
|
||||
Path 5 is the most likely candidate. The implicit copy-assignment of a struct containing `be<T>` members would NOT route through `set()` — only through bytewise memcpy. This is a hook-surface gap that Session 3 should target.
|
||||
|
||||
## Cross-reference each captured writer in ours
|
||||
|
||||
### `xex_lzx_decompress_output` (Run 5 — 1 hit)
|
||||
|
||||
Captures the LZX decompress of the XEX image into guest VA `base_address_=0x82000000`. In canary: `xenia/cpu/xex_module.cc:840` calls `lzx_decompress(compress_buffer, ..., buffer, uncompressed_size, ...)` where `buffer = memory()->TranslateVirtual(base_address_)`.
|
||||
|
||||
**Ours-side analog**: `xenia-rs/crates/xenia-xex/src/lzx.rs` + `xenia-rs/crates/xenia-xex/src/loader.rs`. Per Phase B `image_loaded_sha256 ea8d160e…` matching across cold runs, ours's LZX decoder produces byte-identical output to canary's. No fix needed. **GAP CLASS: NONE.**
|
||||
|
||||
### `be<T>::set` (sanity-v2 — 35 hits in 20 s)
|
||||
|
||||
Per sanity capture, these are likely kernel-state header writes (`kernel_state.cc:create_dispatch_table` etc.). Ours's analog: `xenia-rs/crates/xenia-kernel/src/state.rs` + `exports.rs` (each kernel handler that writes a `be<T>` field). Without enabling per-event tagging in the canary log we can't enumerate which handler produced which hit; full cross-reference deferred to Session 3.
|
||||
|
||||
**GAP CLASS: UNKNOWN — needs per-tid stack-trace enrichment in canary instrumentation.**
|
||||
|
||||
### `Memory::Zero`, `Memory::Fill`, `store_and_swap<u32>` (sanity-v2 — 48 hits combined)
|
||||
|
||||
Already covered by Session 1 cross-reference. No new gaps surfaced.
|
||||
|
||||
## Predicted vs actual outcomes
|
||||
|
||||
| Cascade rung | Prediction | Actual |
|
||||
|---|---|---|
|
||||
| A=catch vtable installer | ~75% | **FAIL** — 0 hits despite ~9-surface coverage. Hook-surface still incomplete OR install is via path-5-style POD struct copy. |
|
||||
| B=catch voice-struct clearer | ~50% | **FAIL** — 0 hits. Addr range was a guess; needs guest-side voice-base probe first. |
|
||||
| C=identify ours's gap if A succeeds | ~70% (cond. on A) | **N/A** (A failed). |
|
||||
| D=Session 3 progression-metric move | ~40-50% (cond. on A+C) | **N/A** (A failed). |
|
||||
|
||||
Validated rungs:
|
||||
| Rung | Actual |
|
||||
|---|---|
|
||||
| **E=Step 3 validation (XEX section caught)** | **PASS** — Run 5 caught `xex_lzx_decompress_output` at `0x82000000` with `MZ\x90\x00` magic. Session 1 reading-error #35 resolved at the hook level. |
|
||||
| **F=be<T>::set() hook fires correctly** | **PASS** — sanity-v2 saw 35 be<T>::set hits in 20 s without crashing static init. |
|
||||
|
||||
## Session 3 recommendation
|
||||
|
||||
Three concrete next steps in priority order:
|
||||
|
||||
**Step 1 — Hook raw pointer assignments inside `kernel/util/shim_utils.h`.** Per shim_utils.h, kernel-import handlers receive typed pointers (`X_HANDLE*`, etc.) and assign via `*ptr = value` raw assignment. `be<T>` field assignment in a POD struct does NOT go through `set()` because struct-level memcpy semantics skip the member init. Add a `XAUDIT_68_WRITE_FIELD(host_ptr, value)` macro to be invoked at known write sites OR (more invasive) instrument each `*ptr = ...` pattern. ~50-100 LOC additive.
|
||||
|
||||
**Step 2 — Add a memory-protection trap on guest VA `0xBCE25340` (4 bytes).** Use a guard page (`Memory::Protect` to read-only) and trap the host signal handler to log the writer's RIP/x86 instruction. This is the nuclear option — bypasses ALL emulation-layer hooks and catches the actual host store instruction. Requires platform-specific SIGSEGV/AEH handler integration. ~150-200 LOC platform-gated.
|
||||
|
||||
**Step 3 — Read-mode probe instead of write-mode.** Place a `RtlReadGuestU32(0xBCE25340)` probe at the FIRST iteration of canary's main loop AFTER memory init; log the VALUE at that address. If the value is `0` early then `0x8200A208` later, we know it's written between those moments. Combined with `--audit_61_branch_probe_pcs=0x825070F0` (which AUDIT-067 confirmed fires) and a binary-bisect over the boot trajectory.
|
||||
|
||||
Step 3 is cheapest (~20 LOC) and may pinpoint the install epoch without finding the writer; pair with bisection across the audit-068 event log.
|
||||
|
||||
## Cascade outcome
|
||||
|
||||
- A (vtable installer caught): **FAIL** — surfaces still incomplete, but space narrowed.
|
||||
- B (voice-struct clearer caught): **FAIL** — addr range remains a guess.
|
||||
- C (ours gap identified): **N/A** (A failed).
|
||||
- D (Session 3 progression move): **N/A**.
|
||||
- **E (Step 3 XEX-section validation)**: **PASS** — proves Session 1's #35 surface gap is at least partially closed.
|
||||
- **F (be<T>::set hook works)**: **PASS**.
|
||||
|
||||
Net: 2 cascade wins (E, F) for "instrumentation is sound and now covers ~9 surfaces"; 2 cascade losses (A, B) for "the actual writer is in a path that's STILL un-hooked or doesn't exist as a canonical write at all".
|
||||
|
||||
## Artifacts (this dir)
|
||||
|
||||
- `instrumentation-design.md` (Session 1)
|
||||
- `fix-canary.diff` (Session 1 — 5-file diff)
|
||||
- `fix-canary-v2.diff` (Session 2 — extends with 4 more sites)
|
||||
- `run1-vtable-writers.log` (Session 1 — 0 hits)
|
||||
- `run2-voice-struct-writers.log` (Session 1 — 0 hits)
|
||||
- `run3-vtable-extended.log` (Session 2 — 0 HOST-WRITE hits, INIT confirmed)
|
||||
- `run4-voice-struct-extended.log` (Session 2 — 0 hits)
|
||||
- `run5-xex-section-sanity.log` (Session 2 — **1 hit** validating Step 3)
|
||||
- `sanity-value0.log` (Session 1 — 1,639 hits)
|
||||
- `sanity-v2-value0.log` (Session 2 — 78,738 hits incl. 35 from be<T>::set)
|
||||
- `writer-report.md` (Session 1)
|
||||
- `writer-report-v2.md` (this file)
|
||||
- `session-2-plan.md`
|
||||
344
audit-runs/audit-068-host-mem-watch/writer-report-v3.md
Normal file
344
audit-runs/audit-068-host-mem-watch/writer-report-v3.md
Normal file
@@ -0,0 +1,344 @@
|
||||
# AUDIT-068 Session 3 — read-mode probe writer report
|
||||
|
||||
Date: 2026-05-20
|
||||
|
||||
## Summary
|
||||
|
||||
Session 3 adds a **read-mode probe** to the AUDIT-068 instrumentation. Instead
|
||||
of hooking host-side write surfaces (Session 1+2's approach, which produced 0
|
||||
hits across ~9 surfaces despite the install being real), the probe spawns a
|
||||
dedicated low-priority polling thread that samples configured guest VAs every
|
||||
`PERIOD_NS` and emits `AUDIT-068-READ-CHANGE` events on transition.
|
||||
|
||||
The probe bounded the install epoch for the `ANON_Class_713383D7` vptr to
|
||||
**host_ns ≈ 9.412–9.612 s** (varies ±200 ms between cold runs) and provided
|
||||
the first direct evidence that the install is a **bulk POD struct copy** of a
|
||||
12-byte `{vptr, self_ptr, self_ptr}` record into the instance's first three
|
||||
u32 slots — written simultaneously within the same 1 ms poll interval.
|
||||
**Reading-error class #36 (POD-struct copy-assignment bypass) is now
|
||||
confirmed in the strongest possible terms**: Run 10 enabled BOTH the read
|
||||
probe AND the full ~9-surface host-write watch simultaneously with the
|
||||
CORRECT target value `0x8200A1E8`, and observed the read probe catch the
|
||||
install while host-write surfaces produced **0 hits**.
|
||||
|
||||
A secondary finding overturns part of the AUDIT-067 framing: the actual vptr
|
||||
value installed is **`0x8200A1E8`**, not `0x8200A208`. The number `0x8200A208`
|
||||
is the address of the slot-1 fn pointer WITHIN the vtable (32 bytes into the
|
||||
vtable). The value stored at `[ctx_ptr]` is the vtable BASE = `0x8200A1E8`.
|
||||
AUDIT-067 hooked all 16 PPC store opcodes for `0x8200A208` — it should have
|
||||
also (or instead) watched `0x8200A1E8`. This may explain part of why AUDIT-067
|
||||
also produced 0 hits.
|
||||
|
||||
## LOC added (Session 3 delta, canary only)
|
||||
|
||||
| File | LOC delta | Purpose |
|
||||
|---|---:|---|
|
||||
| `src/xenia/cpu/cpu_flags.h` | +7 | New cvar `audit_68_host_mem_read_probe` declaration. |
|
||||
| `src/xenia/cpu/cpu_flags.cc` | +6 | Cvar definition. |
|
||||
| `src/xenia/memory.cc` | +18 | Register `g_guest_to_host_thunk` (wraps `Memory::TranslateVirtual`) and `g_query_protect_thunk` (wraps `LookupHeap`+`QueryProtect`) inside `Memory::Memory()`; reset to nullptr in `~Memory()`. |
|
||||
| `src/xenia/base/audit_68_host_mem_watch_fwd.h` | +17 | `GuestToHostThunk` + `QueryProtectThunk` extern decls. |
|
||||
| `src/xenia/base/audit_68_host_mem_watch_base.cc` | +~170 | `ReadProbe` struct + parser (`VA:SIZE:PERIOD_NS` CSV form) + `sample_at()` w/ page-protect guard + `read_probe_thread_main()` polling loop + `start_read_probe_thread_if_configured()` lazy-start (called from `check_host_write_slowpath`). |
|
||||
| **Total** | **~218 LOC additive** | All cvar-gated default-off (empty CSV = thread never spawned). |
|
||||
|
||||
Cumulative across Sessions 1+2+3: ~520 LOC.
|
||||
|
||||
xenia-rs HEAD `e6d43a23ac393004d2e5adf2f0395fd0b5e6448b` **UNCHANGED**.
|
||||
|
||||
## Cvar format
|
||||
|
||||
```
|
||||
--audit_68_host_mem_read_probe=VA1:SIZE1:PERIOD1,VA2:SIZE2:PERIOD2,...
|
||||
```
|
||||
|
||||
Each tuple is `VA:SIZE:PERIOD_NS`. SIZE ∈ {1, 2, 4, 8}. PERIOD_NS floored at
|
||||
1 us (1000). Max 8 tuples. Default empty (off).
|
||||
|
||||
Lazy-start: the poll thread spawns only on the first call to
|
||||
`check_host_write_slowpath()` after `Memory::Memory()` has registered the
|
||||
thunks. This reuses the Session 2 static-init gate. The thread is detached
|
||||
(daemon-style) and polls until process exit.
|
||||
|
||||
## Captures
|
||||
|
||||
All runs cold-boot (cache wipe before each), `--mute=true`, against the
|
||||
Sylpheed ISO. 90 s wallclock each.
|
||||
|
||||
### Run 6 — primary read-probe on `0xBCE25340`
|
||||
|
||||
Cmdline: `--audit_68_host_mem_read_probe=0xBCE25340:4:1000000 --mute=true`.
|
||||
|
||||
Observations:
|
||||
```
|
||||
host_ns=729615200 INITIAL 0x00000000
|
||||
host_ns=738072700 CHANGE 0x00000000 → 0xBCE254C0 (arena-local pointer)
|
||||
host_ns=1537758000 CHANGE 0xBCE254C0 → 0xBCE25640
|
||||
host_ns=1591760600 CHANGE 0xBCE25640 → 0xBCE25350
|
||||
host_ns=1592827100 CHANGE 0xBCE25350 → 0xBCE257C0
|
||||
host_ns=1601443500 CHANGE 0xBCE257C0 → 0x82061050 (looks like XEX vtable)
|
||||
host_ns=1602506700 CHANGE 0x82061050 → 0x820610E0 (final, stable through 90 s)
|
||||
```
|
||||
**Boot reached worker spawn (thid=27/28/29 visible in log tail)** — so the
|
||||
probe was alive for the whole 90 s wallclock; only ~7 changes occurred at
|
||||
`0xBCE25340` in this run, and the value never became `0x8200A208`.
|
||||
|
||||
This indicated the address `0xBCE25340` cited in AUDIT-058/067 is NOT
|
||||
deterministic across runs — there's "arena drift" in the `0xBCE25xxx` region.
|
||||
The Phase-NonMatch investigation memo (2026-05-19) already documented this:
|
||||
canary cold sample saw `ctx_ptr=0xBCE251C0` while AUDIT-058 saw `0xBCE25340`.
|
||||
|
||||
### Run 7 — neighbor bisect on `0xBCE25340 ± 4/8`
|
||||
|
||||
Cmdline: `--audit_68_host_mem_read_probe=0xBCE2533C:4:1000000,0xBCE25340:4:1000000,0xBCE25344:4:1000000,0xBCE25348:4:1000000`.
|
||||
|
||||
```
|
||||
host_ns=655976500 INITIAL all four = 0
|
||||
host_ns=664462100 CHANGE 0xBCE25340: 0 → 0xBCE254C0
|
||||
host_ns=1374604200 CHANGE 0xBCE25340: 0xBCE254C0 → 0x07C65ADA (3 SIMULTANEOUS)
|
||||
host_ns=1374604200 CHANGE 0xBCE25344: 0 → 0x001EE000
|
||||
host_ns=1374604200 CHANGE 0xBCE25348: 0 → 0x0003A313
|
||||
```
|
||||
|
||||
**Key signal**: at host_ns=1.374 s, three adjacent u32 slots changed within
|
||||
the same 1 ms poll interval but the neighbor at `0xBCE2533C` did NOT. This is
|
||||
a clear bulk struct-copy / memcpy footprint — the writer wrote a 12-byte
|
||||
record starting at `0xBCE25340`. The three values `{0x07C65ADA, 0x001EE000,
|
||||
0x0003A313}` are NOT the vtable (don't match `0x8200A208`/`0x8200A1E8`); they
|
||||
look like random-looking data (FNV-style hash, allocation size, refcount?).
|
||||
This particular write happens to a DIFFERENT object instance reusing the
|
||||
`0xBCE25340` slot, not the ANON_Class instance.
|
||||
|
||||
### Run 8 — locate the actual ctx_ptr via AUDIT-061 fire
|
||||
|
||||
Cmdline: `--audit_61_branch_probe_pcs=0x825070F0 --audit_68_host_mem_read_probe=0xBCE25340:4:1000000`.
|
||||
|
||||
`AUDIT-061-BR pc=825070F0 ... r3=BCE251C0 ...` fired late in the run. So in
|
||||
THIS cold trajectory the ANON_Class instance is at `0xBCE251C0`, not
|
||||
`0xBCE25340`. The probe at `0xBCE25340` was watching the wrong address.
|
||||
|
||||
### Run 9 — neighbor bisect on the correct ctx_ptr `0xBCE251C0`
|
||||
|
||||
Cmdline: `--audit_61_branch_probe_pcs=0x825070F0 --audit_68_host_mem_read_probe=0xBCE251BC:4:1000000,0xBCE251C0:4:1000000,0xBCE251C4:4:1000000,0xBCE251C8:4:1000000`.
|
||||
|
||||
```
|
||||
host_ns=633560300 INITIAL all four = 0
|
||||
host_ns=642041900 CHANGE 0xBCE251C0: 0 → 0xBCE25340 (arena ptr)
|
||||
host_ns=1387443500 CHANGE 0xBCE251C0: 0xBCE25340 → 0xBCE254C0 (2 SIMULTANEOUS)
|
||||
host_ns=1387443500 CHANGE 0xBCE251C8: 0 → 0x00000148
|
||||
host_ns=1412116800 CHANGE 0xBCE251C0: 0xBCE254C0 → 0 (2 SIMULTANEOUS clear)
|
||||
host_ns=1412116800 CHANGE 0xBCE251C8: 0x148 → 0
|
||||
host_ns=1457544600 CHANGE 0xBCE251C0: 0 → 0xBF80199A (2 SIMULTANEOUS — floats)
|
||||
host_ns=1457544600 CHANGE 0xBCE251C4: 0 → 0x3F802D83 (= -1.0008, 1.0014)
|
||||
host_ns=5710239000 CHANGE 0xBCE251C0: 0xBF80199A → 0xBCE25640 (arena ptr)
|
||||
host_ns=9416025400 CHANGE 0xBCE251C0: 0xBCE25640 → 0x8200A1E8 (3 SIMULTANEOUS — THE INSTALL)
|
||||
host_ns=9416025400 CHANGE 0xBCE251C4: 0xBCE251C0 → 0xBCE251C0 (self-ptr)
|
||||
host_ns=9416025400 CHANGE 0xBCE251C8: 0 → 0xBCE251C0 (self-ptr)
|
||||
AUDIT-061-BR pc=825070F0 r3=BCE251C0 (fire ~25 s wallclock)
|
||||
```
|
||||
|
||||
**The install epoch is host_ns = 9.416025400 s.** Three slots written
|
||||
simultaneously to `{vptr=0x8200A1E8, self=0xBCE251C0, self=0xBCE251C0}` —
|
||||
classic struct construction or `*ptr = X_FOO{...}` POD copy pattern. The
|
||||
slot at `0xBCE251BC` (4 bytes before `ctx_ptr`) did NOT change, bounding the
|
||||
write to exactly 12 bytes starting at `0xBCE251C0`.
|
||||
|
||||
The install is ~966 ms BEFORE the `sub_825070F0` fire (~10.4 s host_ns,
|
||||
matches Phase-NonMatch documented thread.create burst at 10.382 s) and well
|
||||
within the 60-90 s capture window.
|
||||
|
||||
### Run 10 — cross-validation: read-probe + host-write watch with correct value
|
||||
|
||||
Cmdline: `--audit_68_host_mem_watch_values=0x8200A1E8,0x8200A208,0xE8A10082,0x82A10082 --audit_68_host_mem_watch_addrs=0xBCE251C0 --audit_68_host_mem_read_probe=0xBCE251C0:4:1000000 --audit_61_branch_probe_pcs=0x825070F0`.
|
||||
|
||||
```
|
||||
host_ns=9612147300 CHANGE 0xBCE251C0: 0xBCE25640 → 0x8200A1E8 (read probe catches)
|
||||
AUDIT-061-BR pc=825070F0 r3=BCE251C0 (sub_825070F0 fires)
|
||||
AUDIT-068-HOST-WRITE: 0 hits (write surfaces miss)
|
||||
```
|
||||
|
||||
This is the definitive proof:
|
||||
1. The install IS captured by the read probe at host_ns ≈ 9.6 s.
|
||||
2. The corrected value `0x8200A1E8` (not `0x8200A208`) is the actual vptr.
|
||||
3. None of the ~9 host-write surfaces hooked in Session 1+2 catches it.
|
||||
|
||||
**Reading-error class #36 confirmed**: the writer uses a path that bypasses
|
||||
all of `xe::store_and_swap<T>`, `xe::store<T>`, `Memory::Zero/Fill/Copy`,
|
||||
`xe::endian_store::set()`, and `Memory::Copy` byte-scan — most likely a
|
||||
`*reinterpret_cast<X_FOO*>(host_ptr) = X_FOO{...}` raw POD struct
|
||||
copy-assignment OR a direct `memcpy(host_ptr_from_TranslateVirtual,
|
||||
&local_struct, sizeof(X_FOO))`.
|
||||
|
||||
## Headline finding
|
||||
|
||||
**Install epoch**: host_ns ≈ 9.4–9.6 s (varies ±200 ms across cold runs).
|
||||
This is ~966 ms before sub_825070F0 fires (~10.4 s host_ns).
|
||||
|
||||
**Neighbor pattern**: **3 simultaneous writes** at `0xBCE251C0`, `+4`, `+8`
|
||||
within the same 1 ms poll interval — `{vptr=0x8200A1E8, self=0xBCE251C0,
|
||||
self=0xBCE251C0}`. `0xBCE251BC` (`-4`) does NOT change. This is a 12-byte
|
||||
POD struct copy.
|
||||
|
||||
**Implications**:
|
||||
- The write is invisible to all currently-hooked host-write surfaces.
|
||||
- The value bytes `{0xE8, 0xA1, 0x00, 0x82, 0xC0, 0x51, 0xE2, 0xBC, 0xC0,
|
||||
0x51, 0xE2, 0xBC}` (big-endian guest order) must appear together in some
|
||||
source — either as a constant pre-baked vtable instance pattern that's
|
||||
memcpy'd, or as fields computed by host code and bulk-written.
|
||||
- The fact that the second and third slots are self-pointers (`= ctx_ptr`)
|
||||
suggests a doubly-linked-list head node initialization: `head.vptr = vtbl;
|
||||
head.next = &head; head.prev = &head;`. This is a textbook intrusive list
|
||||
/ queue head pattern.
|
||||
|
||||
## Wallclock relation to AUDIT-067's sub_825070F0 fire
|
||||
|
||||
| Event | Host_ns | Wallclock (≈) |
|
||||
|---|---:|---:|
|
||||
| Probe init (first slowpath call) | ~640 ms | ~1.6 s |
|
||||
| Various pre-install arena reuse of slot | 0.6–5.7 s | 1.6–6.5 s |
|
||||
| **Vptr install at `0xBCE251C0`** | **9.412–9.612 s** | **~10.4–10.6 s** |
|
||||
| Phase-NonMatch documented thread.create burst | 10.382–10.384 s | ~11.3 s |
|
||||
| sub_825070F0 fire (AUDIT-061-BR captured) | ~10.5 s | **~25 s wallclock** (AUDIT-067 quoted) |
|
||||
|
||||
The "host_ns ~10.5 s when sub_825070F0 fires" vs "~25 s wallclock" gap is
|
||||
because `host_ns` starts when the first AUDIT-068 slowpath call lands (i.e.
|
||||
when canary's static-init plus Wine startup are done) — Wine's
|
||||
JIT-warmup/early-boot takes ~15 s before guest PPC code starts. The
|
||||
ANON_Class install happens ~960 ms before sub_825070F0 dispatch, within the
|
||||
same "post-DiscImageDevice resolve" boot phase that AUDIT-058 framed.
|
||||
|
||||
## Session 4 recommendation
|
||||
|
||||
Three paths to identifying the writer, ranked by feasibility:
|
||||
|
||||
### Path 1 (RECOMMENDED) — POD struct-copy hook with NEW ε-constraint
|
||||
|
||||
The install epoch (host_ns ≈ 9.4–9.6 s) and the 12-byte simultaneous-write
|
||||
signature (3 u32 slots) narrows the candidate hooks dramatically. Two
|
||||
surgical instrumentation strategies:
|
||||
|
||||
(a) **Pre-instrument all `*reinterpret_cast<X*>(host_ptr) = X{...}` sites in
|
||||
canary**. Ripgrep finds them: pattern
|
||||
`\*reinterpret_cast<[A-Z]\w*\*>\([^)]*\)\s*=` in `src/xenia/kernel/**.cc`. A
|
||||
quick scan of Session 1 inventory listed ~30 such sites, but most are in
|
||||
kernel-import handlers that fire repeatedly — the ε-constraint of "fires
|
||||
exactly once at host_ns 9.4–9.6 s on tid=6" lets us bisect.
|
||||
|
||||
(b) **Wrap `xe::SetField()` / pointer-typed assignment helpers** if any
|
||||
exist. Otherwise instrument `memcpy(host_ptr_from_TranslateVirtual, ...)`
|
||||
patterns directly — there are ~40 such sites across kernel/util/cpu code per
|
||||
Session 1+2 surveys. The ones NOT already wrapped by Session 2 (xex_module.cc
|
||||
got 4 sites) are candidates.
|
||||
|
||||
LOC budget: ~50-100 additive in canary; default-off cvar
|
||||
`audit_68_pod_copy_watch_addrs` (CSV of VA ranges; emits on every memcpy/raw
|
||||
assign within range).
|
||||
|
||||
### Path 2 — Guard-page SIGSEGV trap
|
||||
|
||||
Use the existing canary `ExceptionHandler` infrastructure
|
||||
(src/xenia/base/exception_handler*.cc — already cross-platform, has Win SEH
|
||||
and POSIX SIGSEGV handlers wired). Mark the 4K page containing `0xBCE251C0`
|
||||
as read-only at host_ns = 9.4 s (just before the install epoch); the page
|
||||
fault triggers the writer's host instruction, log RIP/host stack, then
|
||||
unprotect+resume.
|
||||
|
||||
Pros: catches the writer with bytecode-level precision regardless of how it
|
||||
writes (memcpy, raw assign, vector store, etc.).
|
||||
|
||||
Cons: ~150–200 LOC platform-gated; needs accurate epoch timing (can't trap
|
||||
the whole boot or it crashes). Use host_ns ≥ 9.0 s as the gate.
|
||||
|
||||
### Path 3 — Kernel-handler grep with new ε-constraint
|
||||
|
||||
Now that the install epoch is known (9.4–9.6 s host_ns; just AFTER
|
||||
`DiscImageDevice::ResolvePath(\\dat\\movie)` per AUDIT-058 narrative), grep
|
||||
all kernel handlers for ones that fire in that window AND write to the
|
||||
heap. The probe log already shows this is right around the time
|
||||
`HostPathDevice::ResolvePath(\\dat\\movie)` runs and various worker file IO
|
||||
starts. Cross-reference with canary's existing kernel-call trace
|
||||
(`--log_level=4`) to enumerate handlers called in the 9.0–9.7 s window.
|
||||
|
||||
LOC: 0 (purely investigative).
|
||||
|
||||
**Recommended Session 4 priority: Path 1 first** (concrete instrumentation
|
||||
extends what we have, leverages the epoch constraint). Path 2 as backstop.
|
||||
Path 3 alongside as a cheap parallel investigation.
|
||||
|
||||
## Cascade outcome (Session 3)
|
||||
|
||||
- **A**: identify install epoch — **PASS** (9.4–9.6 s host_ns; ~966 ms before
|
||||
sub_825070F0).
|
||||
- **B**: identify neighbor pattern — **PASS** (3-slot simultaneous write,
|
||||
POD struct signature confirmed).
|
||||
- **C**: confirm reading-error #36 — **PASS** (Run 10 demonstrates host-write
|
||||
surfaces miss the install even with the CORRECT target value
|
||||
`0x8200A1E8`).
|
||||
- **D**: identify the host-side writer — **N/A** (Session 4 work, with epoch
|
||||
and signature constraints to narrow the search).
|
||||
- **E**: secondary discovery: actual vptr is `0x8200A1E8` not `0x8200A208`
|
||||
— **PASS** (AUDIT-067's target value was off by 32 bytes; may have
|
||||
contributed to that audit's 0-hit JIT store result).
|
||||
|
||||
Net 4/5 wins. Session 4 has concrete constraints (epoch, signature, value
|
||||
correction) to land the writer identification.
|
||||
|
||||
## Reading-error class #36 reinforcement
|
||||
|
||||
Session 3 directly demonstrates reading-error #36 (POD-struct
|
||||
copy-assignment bypass for typed BE/LE field watch). The corrective rule is
|
||||
now formalized as:
|
||||
|
||||
> When hooking host-side writes to guest memory, member-level set() hooks
|
||||
> (e.g. `xe::endian_store::set()`) catch ONLY explicit assignments like
|
||||
> `*be<T>* = value`. They DO NOT catch:
|
||||
> 1. POD struct copy-assignment (`*reinterpret_cast<X*>(host_ptr) = X{...}`).
|
||||
> 2. memcpy into the host pointer (`memcpy(host_ptr_from_TranslateVirtual,
|
||||
> &local_struct, sizeof(X))`).
|
||||
> 3. Vector-typed bulk store intrinsics that target guest memory.
|
||||
>
|
||||
> Mitigation: pair host-write hooks with **read-mode probes** at the
|
||||
> target VA — the read probe captures the install regardless of the writer's
|
||||
> mechanism, and provides epoch + neighbor-pattern constraints for the
|
||||
> follow-up targeted instrumentation.
|
||||
|
||||
This rule is now reflected in the AUDIT-068 Session 3 read-probe machinery —
|
||||
preserved in canary tree for all future audits.
|
||||
|
||||
## Discipline observed
|
||||
|
||||
- `--mute=true` on every run ✓
|
||||
- Cold-protocol: cache wipe before each cold run; cache restored from
|
||||
`/tmp/canary-cache-bak-audit-068` at session end ✓ (current cache was
|
||||
backed up at session start since prior backup was missing).
|
||||
- xenia-rs HEAD `e6d43a23…` UNCHANGED ✓ (verified by sha256 of `git diff
|
||||
HEAD` at session start vs end; uncommitted modifications from prior
|
||||
sessions are unchanged from session start, no new modifications made by
|
||||
this session).
|
||||
- Canary instrumentation purely additive + cvar-gated default-off ✓
|
||||
- No destructive shortcuts ✓
|
||||
- Static-init gate pattern preserved + extended (Session 3's read probe
|
||||
thread is also gated on `g_guest_to_host_thunk + g_query_protect_thunk`
|
||||
being non-null — same discipline as Session 2's thunk gate).
|
||||
|
||||
## Artifacts (this dir)
|
||||
|
||||
- `fix-canary-v3.diff` — cumulative Session 3 instrumentation (this run).
|
||||
- `run6-read-probe-bisect.log` — primary probe on `0xBCE25340` (90 s; 7
|
||||
changes, ended at `0x820610E0`, never `0x8200A208`).
|
||||
- `run7-read-probe-neighbors.log` — bisect probe on `0xBCE25340 ± 4/8`; 3
|
||||
simultaneous writes at `+0/+4/+8` confirming POD signature.
|
||||
- `run9-read-probe-251C0-neighbors.log` — neighbor probe on the actual
|
||||
ctx_ptr `0xBCE251C0`; **captures the install** at host_ns=9.416 s.
|
||||
- `run10-cross-validation.log` — read probe + host-write watch with CORRECT
|
||||
value `0x8200A1E8`; demonstrates 0 HOST-WRITE hits while read probe sees
|
||||
the install at host_ns=9.612 s.
|
||||
- `writer-report-v3.md` — this file.
|
||||
|
||||
(Run 8 was an intermediate diagnostic; data is included in Run 9/10 logs.)
|
||||
|
||||
## Phase B / progression
|
||||
|
||||
- `image_loaded_sha256 ea8d160e…` UNCHANGED (instrumentation does not touch
|
||||
XEX image processing).
|
||||
- xenia-rs HEAD UNCHANGED.
|
||||
- No progression-metric movement (Session 3 is instrumentation-only). Session
|
||||
4 has concrete leads.
|
||||
293
audit-runs/audit-068-host-mem-watch/writer-report-v4.md
Normal file
293
audit-runs/audit-068-host-mem-watch/writer-report-v4.md
Normal file
@@ -0,0 +1,293 @@
|
||||
# AUDIT-068 Session 4 — writer identified (guest PPC code)
|
||||
|
||||
Date: 2026-05-20
|
||||
|
||||
## Headline
|
||||
|
||||
**Writer found.** The host-side write of `0x8200A1E8` at `[0xBCE251C0]` is performed
|
||||
by **JIT-emitted guest PPC code**, NOT host C++ code. Reading-error #36 (POD
|
||||
struct-copy bypass) — registered in Sessions 2 and 3 as the explanation for the
|
||||
host-side surface gap — is **partially superseded**: the gap is real for host
|
||||
C++ writes, but the actual writer of THIS particular vptr install is on the
|
||||
guest side. AUDIT-067 (which hooks all 16 PPC store opcodes at JIT-emit time)
|
||||
caught it on the first try, once the correct target value `0x8200A1E8` was
|
||||
configured (per the Session 3 correction; AUDIT-067's prior runs watched the
|
||||
wrong value `0x8200A208`).
|
||||
|
||||
**No new instrumentation was needed.** Session 4 used the existing AUDIT-067
|
||||
machinery + Session 3's AUDIT-068 read-probe to cross-validate.
|
||||
|
||||
## Writer PC and ctor chain
|
||||
|
||||
The ANON_Class_713383D7 instance is constructed via a **three-level
|
||||
inheritance ctor chain**, fully on the guest PPC side. Each ctor writes the
|
||||
next vtable down to slot 0:
|
||||
|
||||
```
|
||||
sub_824FECE0 (deepest base ctor)
|
||||
├─ stw r31, 4(r31) ; *(this+4) = this ← self-pointer
|
||||
├─ stw r31, 8(r31) ; *(this+8) = this ← self-pointer
|
||||
├─ stw r11=1, 12(r31) ; *(this+12) = 1 (refcount?)
|
||||
└─ bl 0x8284DD1C ; sub-helper on &this[+16]
|
||||
↑ called from
|
||||
sub_825065E8 (intermediate base ctor — writes vtable 0x8200A908)
|
||||
├─ bl sub_824FECE0 ; chain to deepest base
|
||||
├─ lis r11, 0x8201; subi r11, 22264 → r11 = 0x8200A908
|
||||
├─ stw r11, 0(r31) ; *(this) = 0x8200A908
|
||||
└─ bl 0x825051D8 ; sub-helper init of fields
|
||||
↑ called from
|
||||
sub_824FD240 (most-derived ctor — writes vtable 0x8200A1E8)
|
||||
├─ bl sub_825065E8 ; chain to intermediate base
|
||||
├─ lis r11, 0x8201; subi r11, 24088 → r11 = 0x8200A1E8
|
||||
└─ stw r11, 0(r31) ; *(this) = 0x8200A1E8 ← THE INSTALL
|
||||
```
|
||||
|
||||
The doubly-linked list head sentinel pattern observed by Session 3's read
|
||||
probe (`{vptr, self, self}` at offsets {0, +4, +8}) is now fully explained:
|
||||
|
||||
- Offsets +4 and +8 are written by the **deepest base ctor** (`sub_824FECE0`
|
||||
at PCs `0x824FECFC` and `0x824FED04`) as `*(this+4) = this; *(this+8) =
|
||||
this`. This is the LIST_ENTRY head sentinel.
|
||||
- Offset 0 is overwritten three times in rapid succession by the inheritance
|
||||
chain — landing on `0x8200A1E8` after `sub_824FD240` completes. The read
|
||||
probe (1ms poll period) only ever sees the final value.
|
||||
|
||||
All three writes happen on the same 1ms poll tick from the read probe's
|
||||
perspective, which is why the install LOOKS like a 12-byte POD struct copy. It
|
||||
is actually 3 separate ctors writing 4 PPC `stw` instructions (one vtable
|
||||
slot, three list-init slots, plus a refcount byte that the read probe
|
||||
neighbor at `0xBCE251CC` would have detected). The neighbor at `0xBCE251BC`
|
||||
(`-4`) DOES NOT change because the ctor only writes at offsets >= 0.
|
||||
|
||||
## Capture evidence
|
||||
|
||||
### Run 11 — AUDIT-067 with corrected value `0x8200A1E8`
|
||||
|
||||
Cmdline: `--audit_67_value_watch=0x8200A1E8 --audit_68_host_mem_read_probe=0xBCE251C0:4:1000000 --audit_61_branch_probe_pcs=0x825070F0 --mute=true`.
|
||||
|
||||
The very first PPC store of `0x8200A1E8` hit:
|
||||
|
||||
```
|
||||
host_ns=10019392400 CHANGE 0xBCE251C0: 0xBCE25640 → 0x8200A908 (read probe — intermediate base ctor)
|
||||
host_ns=10021528400 CHANGE 0xBCE251C0: 0x8200A908 → 0x8200A1E8 (read probe — most-derived ctor)
|
||||
AUDIT-067-VAL pc=824FD264 lr=824FD258 val=8200A1E8 dst=BCE251C0
|
||||
r3=BCE251C0 r4=00000002 r5=00000020 r6=03A72280
|
||||
r31=BCE251C0 tid=6
|
||||
AUDIT-061-BR pc=825070F0 lr=824F7B24 r3=BCE251C0 tid=6 (slot-1 dispatch fires
|
||||
immediately after)
|
||||
```
|
||||
|
||||
The intermediate-base vtable write at PC `0x8250660C` (value `0x8200A908`)
|
||||
was NOT in this run's AUDIT-067 watch list (only `0x8200A1E8` was), so only
|
||||
the most-derived hit is logged. Run 12 confirms.
|
||||
|
||||
### Run 12 — AUDIT-067 with both vtable values + 3-slot read probe
|
||||
|
||||
Cmdline: `--audit_67_value_watch=0x8200A1E8,0x8200A908,0xBCE251C0 --audit_68_host_mem_read_probe=0xBCE251C0:4:1000000,0xBCE251C4:4:1000000,0xBCE251C8:4:1000000 --audit_61_branch_probe_pcs=0x825070F0 --mute=true`.
|
||||
|
||||
Captures the full ctor chain on a different cold-trajectory (instance at
|
||||
`0xBCE25340` this time — arena-drift sister of Run 11's `0xBCE251C0`):
|
||||
|
||||
```
|
||||
AUDIT-067-VAL pc=8250660C lr=82506600 val=8200A908 dst=BCE25340 (intermediate base ctor write)
|
||||
AUDIT-067-VAL pc=824FD264 lr=824FD258 val=8200A1E8 dst=BCE25340 (most-derived ctor write)
|
||||
AUDIT-061-BR pc=825070F0 lr=824F7B24 r3=BCE25340 (slot-1 dispatch)
|
||||
```
|
||||
|
||||
Both runs reproduce: the PC pair `{0x8250660C, 0x824FD264}` is invariant
|
||||
across cold runs. The instance address VARIES (arena drift), but the writer
|
||||
PCs do not.
|
||||
|
||||
## Why earlier sessions missed this
|
||||
|
||||
### Sessions 1+2
|
||||
|
||||
Hooked `xe::store_and_swap<T>`, `xe::store<T>`, `Memory::Zero/Fill/Copy`,
|
||||
`xe::endian_store::set()`, `Memory::Copy` byte-scan, 4 XEX-loader memcpy
|
||||
sites. These are HOST C++ write paths to guest memory. The JIT does NOT use
|
||||
them — JIT-emitted PPC stores compile down to direct x64 `mov` instructions
|
||||
operating on `virtual_membase_ + va`, with inline byte-swap intrinsics
|
||||
(`bswap` / `pshufb`). They bypass every `xe::store*` template.
|
||||
|
||||
Reading-error #35 (Session 1: "hook-surface incompleteness") was right to
|
||||
the extent that the surfaces don't cover all host-side write paths — but
|
||||
this writer was never on the host side at all.
|
||||
|
||||
### Session 3
|
||||
|
||||
Read probe captured the install epoch (~9.4-9.6s host_ns) and the neighbor
|
||||
pattern (3 simultaneous writes within 1ms). The "POD struct copy bypass"
|
||||
hypothesis (reading-error #36) was a reasonable explanation under the
|
||||
constraint "host-write surfaces miss the install", but the actual cause is
|
||||
that the writes come from the JIT not from host code at all.
|
||||
|
||||
### AUDIT-067 (prior to Session 4)
|
||||
|
||||
Watched value `0x8200A208`. The CORRECT vptr value is `0x8200A1E8` (per
|
||||
Session 3's correction). AUDIT-067 was hooked into every PPC store opcode and
|
||||
would have caught the install on the first run if it had watched the right
|
||||
value. Session 4 re-ran it with the corrected value and caught the writer.
|
||||
|
||||
## ours-side cross-reference
|
||||
|
||||
`sub_824FD240` is GUEST PPC code present in the Sylpheed XEX. Both engines'
|
||||
JIT compiles and runs the same machine code given the same inputs. There is
|
||||
no host-side analog in `xenia-rs/crates/xenia-kernel/` — and there shouldn't
|
||||
be: this isn't a kernel handler, it's a game's own class constructor.
|
||||
|
||||
Per `xenia-rs/docs/functions/sub_824F7800.md`:
|
||||
|
||||
> AUDIT-064 ours `--ctor-probe=0x824F7800` -n 500M: **0 fires**.
|
||||
>
|
||||
> The chain runs downstream of `sub_822F1AA8`'s vtable[0] dispatch through
|
||||
> `sub_82173990` — which waits on tid=13 — so ours never reaches it because
|
||||
> tid=13 is blocked on the AUDIT-049 wedge.
|
||||
|
||||
`sub_824FD240` is reached via:
|
||||
|
||||
```
|
||||
sub_824F8398
|
||||
→ sub_824F7CD0
|
||||
→ sub_824F7800
|
||||
→ sub_824FD240 (call at PC 0x824F7838) ← THE WRITER
|
||||
→ ... bctrl at PC 0x824F7B20 dispatches sub_825070F0
|
||||
```
|
||||
|
||||
In ours, the entire call chain above `sub_824F7800` fires **0 times** because
|
||||
the AUDIT-049 wedge blocks tid=13 upstream. Therefore the ANON_Class_713383D7
|
||||
instance is never constructed, the vtable `0x8200A1E8` is never installed,
|
||||
and the bctrl at `0x824F7B20` never dispatches `sub_825070F0`.
|
||||
|
||||
**This is consistent with all prior phase audits**. Session 4 confirms the
|
||||
existing diagnosis: the divergence root is upstream at tid=13, not at the
|
||||
ANON_Class ctor or the worker dispatch.
|
||||
|
||||
## Static-DB cross-check
|
||||
|
||||
| PC | Function | Notes |
|
||||
|---|---|---|
|
||||
| `0x824FECE0` | `sub_824FECE0` (deepest base ctor) | Writes self-pointers at +4/+8/+12; calls helper at `0x8284DD1C` |
|
||||
| `0x824FECFC` | inside `sub_824FECE0` | `stw r31, 4(r31)` — flink_ptr write |
|
||||
| `0x824FED04` | inside `sub_824FECE0` | `stw r31, 8(r31)` — blink_ptr write |
|
||||
| `0x825065E8` | `sub_825065E8` (intermediate base ctor) | Calls deepest; writes vtable `0x8200A908` |
|
||||
| `0x8250660C` | inside `sub_825065E8` | `stw r11, 0(r31)` — vtable `0x8200A908` write |
|
||||
| `0x825051D8` | called by intermediate base | Sub-helper initializing many `+0xXX` member fields |
|
||||
| `0x824FD240` | `sub_824FD240` (most-derived ctor) | Calls intermediate base; writes vtable `0x8200A1E8` |
|
||||
| `0x824FD264` | inside `sub_824FD240` | `stw r11, 0(r31)` — vtable `0x8200A1E8` write — THE INSTALL |
|
||||
| `0x824F7800` | `sub_824F7800` | Allocates instance at `+0x38` via `sub_824FD230`/`sub_824FD240` |
|
||||
| `0x824F7838` | inside `sub_824F7800` | `bl sub_824FD240` — invokes most-derived ctor |
|
||||
| `0x824F7B20` | inside `sub_824F7800` | `bctrl` — dispatches `sub_825070F0` via vtable slot 1 |
|
||||
| `0x825070F0` | `sub_825070F0` (slot-1 method) | Worker fan-out target — AUDIT-067/061's original lookup |
|
||||
|
||||
Static caller chain into `sub_824FD240`:
|
||||
- Single static caller: `0x824F7838` inside `sub_824F7800`.
|
||||
- The 4-fn dispatch ladder above (`824F8398 → 824F7CD0 → 824F7800 → bctrl →
|
||||
825070F0`) was already classified by AUDIT-064.
|
||||
|
||||
## ε-constraint validation
|
||||
|
||||
Session 3's install epoch bound was `host_ns ∈ [9.4, 9.6] s`. Run 11
|
||||
observed install at `host_ns=10.019s`; Run 12 captured the intermediate base
|
||||
ctor write at `host_ns ≈ 10s` (read probe transition timestamps not
|
||||
explicitly logged in Run 12 grep output, but within boot's normal jitter
|
||||
window). The earlier session's ±200ms estimate was off — actual jitter is
|
||||
closer to ±500ms cold-to-cold. Update: epoch is **`host_ns ∈ [9.4, 10.1] s`**.
|
||||
|
||||
## LOC added (Session 4)
|
||||
|
||||
**Zero canary LOC added**. All Session 4 work used existing AUDIT-067
|
||||
(JIT-emit value watch in `ppc_hir_builder.cc` + `ppc_emit_memory.cc`) and
|
||||
Session 3's `audit_68_host_mem_read_probe` cvar machinery (read-mode probe
|
||||
thread in `audit_68_host_mem_watch_base.cc`).
|
||||
|
||||
Cumulative across Sessions 1+2+3 (canary): ~520 LOC additive, all cvar-gated
|
||||
default-off, retained in tree. **Session 4 adds no LOC** — the writer was
|
||||
identifiable by re-running existing instrumentation with the corrected
|
||||
target value.
|
||||
|
||||
## Cascade outcome (Session 4)
|
||||
|
||||
- **A**: identify writer PC — **PASS** (`sub_824FD240+0x24` at PC `0x824FD264`;
|
||||
most-derived ctor of the inheritance chain).
|
||||
- **B**: identify caller chain — **PASS** (`sub_824F7800+0x38` → `sub_824FD240`;
|
||||
matches AUDIT-064's previously-known 4-fn dispatch ladder).
|
||||
- **C**: identify ours-side analog presence — **PASS** (no host analog needed;
|
||||
guest PPC code; ours's JIT would execute the same code if reached, but the
|
||||
call chain is unreachable due to tid=13 AUDIT-049 wedge).
|
||||
- **D**: reading-error class registration — **PASS** (see below; #36
|
||||
re-scoped).
|
||||
|
||||
Net 4/4 wins (no in-progress items). Session 4 closes AUDIT-068.
|
||||
|
||||
## Reading-error class re-scoping
|
||||
|
||||
**#36 (POD-struct copy-assignment bypass)** — registered Sessions 2+3 as the
|
||||
explanation for the host-side surface gap. Session 4 finding: this writer is
|
||||
NOT host C++; it is JIT-emitted PPC code. The class #36 framing remains valid
|
||||
in principle (host C++ POD copy IS a real bypass class, demonstrated by
|
||||
Session 2's reading-error #35 sanity), but it does NOT apply to THIS
|
||||
investigation. Updated rule:
|
||||
|
||||
> Before adding new host-side write hooks, always check whether the writer
|
||||
> could be GUEST CODE running under the JIT. AUDIT-067 (JIT-store value
|
||||
> watch) is the cheaper first check. If AUDIT-067 with the *correct* target
|
||||
> value still produces 0 hits, only THEN escalate to host-side surface
|
||||
> hooks. The reverse order (Session 1+2's host-first approach) wastes
|
||||
> instrumentation budget when the writer turns out to be guest-side.
|
||||
|
||||
Secondary rule: **always cross-check the configured target value against the
|
||||
read probe's observed values**. Session 1+2+3+AUDIT-067 all watched the wrong
|
||||
value (`0x8200A208`) because that was AUDIT-058's quoted value, which was
|
||||
actually the address of slot-1-WITHIN-the-vtable, not the vtable base. The
|
||||
read probe directly observed the correct value `0x8200A1E8` in Session 3 —
|
||||
Session 4 simply propagated that correction to AUDIT-067.
|
||||
|
||||
## Artifacts (this dir)
|
||||
|
||||
- `run11-audit067-corrected-value.log` — AUDIT-067 with value
|
||||
`0x8200A1E8`; 4 hits (1 install + 3 sibling instances in worker threads).
|
||||
- `run12-full-ctor-chain.log` — AUDIT-067 with full ctor chain
|
||||
(vtable values `0x8200A1E8` and `0x8200A908` + self-pointer
|
||||
`0xBCE251C0`) and 3-slot read probe; captures all 5 writer-related events
|
||||
on tid=6.
|
||||
- `writer-report-v4.md` — this file.
|
||||
|
||||
## Discipline observed
|
||||
|
||||
- xenia-rs HEAD `e6d43a23ac393004d2e5adf2f0395fd0b5e6448b` UNCHANGED ✓
|
||||
(verified: sha256 of `git diff HEAD` at session start =
|
||||
`ed30fd526643918f67311caff0a10d1346d73fd0c0323e02477883cf5ff20357`; same
|
||||
at session end).
|
||||
- `--mute=true` on every run ✓
|
||||
- Cold-protocol: cache wipe + restore from `/tmp/canary-cache-bak-audit-068`
|
||||
at session end ✓ (backup re-created at session start from current cache;
|
||||
prior session's backup was missing).
|
||||
- Canary tree: no new instrumentation added (zero LOC delta). All Sessions
|
||||
1-3 instrumentation retained as-is (cvar-gated default-off).
|
||||
- No destructive shortcuts ✓.
|
||||
|
||||
## AUDIT-068 closure
|
||||
|
||||
AUDIT-068 is **CLOSED**. The host-side writer of `0x8200A1E8` at
|
||||
`[0xBCE251C0]` is conclusively identified as **guest PPC code at
|
||||
`sub_824FD240+0x24` (PC `0x824FD264`)**, the most-derived constructor of the
|
||||
ANON_Class_713383D7 inheritance chain. The intermediate base ctor at
|
||||
`sub_825065E8+0x24` (PC `0x8250660C`) writes the intermediate vtable
|
||||
`0x8200A908`. The deepest base ctor at `sub_824FECE0` writes the
|
||||
doubly-linked-list head sentinel (self-pointer writes at offsets +4 and +8).
|
||||
|
||||
The Phase-NonMatch divergence root remains the **upstream tid=13 AUDIT-049
|
||||
wedge**, not the ctor or vtable. ours never reaches the calling code, so
|
||||
the instance is never constructed and `sub_825070F0` never dispatches. No
|
||||
host-side analog is needed because the writer is part of the game's own
|
||||
code.
|
||||
|
||||
## Recommended next steps (NOT Session 5 of AUDIT-068)
|
||||
|
||||
Move investigation upstream to the **AUDIT-049 / Phase-W wedge** at tid=13.
|
||||
That is where ours and canary actually diverge; the ANON_Class ctor and
|
||||
sub_825070F0 are downstream symptoms.
|
||||
|
||||
- Re-open the tid=13 wedge analysis under a new audit number.
|
||||
- Cross-reference `xenia-rs/audit-runs/phase-w-wedge-reattack/current-state.md`
|
||||
for the most recent state.
|
||||
120
audit-runs/audit-068-host-mem-watch/writer-report.md
Normal file
120
audit-runs/audit-068-host-mem-watch/writer-report.md
Normal file
@@ -0,0 +1,120 @@
|
||||
# AUDIT-068 Session 1 — writer report
|
||||
|
||||
Date: 2026-05-19
|
||||
|
||||
## Summary
|
||||
|
||||
Built canary instrumentation that hooks the three host-side write surfaces I expected to cover the AUDIT-067 / Phase HostAudio-Eager findings:
|
||||
1. `xe::store_and_swap<T>` template family (`xenia/base/memory.h`, T=u8/u16/u32/u64/i8/i16/i32/i64).
|
||||
2. `xe::store<T>` template family (host-endian sibling of above).
|
||||
3. `Memory::Zero/Fill/Copy` in `xenia/memory.cc`.
|
||||
|
||||
Total instrumentation: ~190 LOC kept in canary tree (cvar-gated default-off, zero hot-path cost when both cvars empty), 2 new files in `xenia/base/`:
|
||||
- `audit_68_host_mem_watch_fwd.h` — atomic + inline checks (forward decls).
|
||||
- `audit_68_host_mem_watch_base.cc` — slow-path impl, lazy CSV parse, host→guest VA translation via function-pointer thunk.
|
||||
|
||||
Cvars in `xenia/cpu/cpu_flags.{h,cc}`:
|
||||
- `--audit_68_host_mem_watch_values=CSV` (max 8 u32 values).
|
||||
- `--audit_68_host_mem_watch_addrs=CSV` (max 8 VAs or `START-END` ranges).
|
||||
|
||||
Smoke test (`--audit_68_host_mem_watch_values=0x12345678`, 30s): 0 hits, INIT lines emitted — instrumentation operational.
|
||||
|
||||
Sanity test (`--audit_68_host_mem_watch_values=0x00000000`, ~12s): **1,639 hits**, dominated by `Memory::Zero` (1,594) plus `store_and_swap<u32>` (13) and `store_and_swap<u64>` (2). Instrumentation works end-to-end and the guest-VA thunk resolves correctly (e.g. `guest_va=0x30000000 host_ptr=...30000000` for `store_and_swap`).
|
||||
|
||||
## Capture runs
|
||||
|
||||
### Run 1 — vtable `0x8200A208 / 0x8200A928` writers
|
||||
|
||||
Cmdline: `--audit_68_host_mem_watch_values=0x8200A208,0x8200A928,0x080082A2,0x2829820 --audit_68_host_mem_watch_addrs=0xBCE25340` (value list also includes the byte-swapped forms in case some caller passes a pre-swapped value through `store<T>` rather than `store_and_swap<T>`; addr watch on the known target instance address from AUDIT-058/067).
|
||||
|
||||
Wallclock: 90 s (post-10.4 s trigger window per Phase NonMatch).
|
||||
|
||||
**Result: 0 hits.** Log at `run1-vtable-writers.log` (81 KB; cold boot reached thread spawn through tid=29, matching Phase NonMatch trace).
|
||||
|
||||
### Run 2 — voice-struct field clear `[VOICE+0x164]`
|
||||
|
||||
Cmdline: `--audit_68_host_mem_watch_addrs=0x42500000-0x42600000`.
|
||||
|
||||
Wallclock: 60 s.
|
||||
|
||||
**Result: 0 hits.** Log at `run2-voice-struct-writers.log`.
|
||||
|
||||
### Sanity — value=0 reachability test
|
||||
|
||||
Cmdline: `--audit_68_host_mem_watch_values=0x00000000`. Wallclock: ~12 s.
|
||||
|
||||
**Result: 1,639 hits**, breakdown:
|
||||
| tag | hits |
|
||||
|---|---:|
|
||||
| `Memory::Zero` | 1,594 |
|
||||
| `Memory::Fill` | 30 |
|
||||
| `store_and_swap<u32>` | 13 |
|
||||
| `store_and_swap<u64>` | 2 |
|
||||
|
||||
Guest VAs span `0x30000000-0x30xxx000` (the 40 MB physical heap setup by Memory::Initialize) and `0xFFCAxxxx` (kernel high range, stacks/TLS). `store_and_swap<u32>` hits e.g. `0xFFCAE000 / 0xFFCAD000 / 0x30002000` — kernel pointer-init scribbles. NO hits in the XEX image region `0x82000000+`. Log at `sanity-value0.log`.
|
||||
|
||||
## Headline finding (negative-but-informative)
|
||||
|
||||
**Neither the vtable install nor the XEX section loader uses any of the hooked paths.** A separate Sanity-2 run watched the addr range `0x82000000-0x82010000` (Sylpheed's `.text` start) and got 0 hits across a full boot — yet that region MUST be written to during XEX load (the image is copied in from the file). This means:
|
||||
|
||||
- The XEX module loader (`xenia/cpu/xex_module.cc`) writes guest memory via **raw `memcpy()` and direct `*ptr = ...` host-pointer writes** that are NOT routed through `xe::store_and_swap<T>`, `xe::store<T>`, or `Memory::Zero/Fill/Copy`. Quick grep on `xex_module.cc` confirms: lines 286, 369, 422, 427, 525, 582, 592, 650, 668, 773, 795 all use plain `memcpy(host_ptr, src, size)` after a `Memory::TranslateVirtual` lookup.
|
||||
- The kernel-import handlers that COULD synthesize `0x8200A208` runtime (the original AUDIT-067 hypothesis) are not doing so — at least not via the hooked surfaces — within the 90 s window that includes the 10.4 s trigger.
|
||||
|
||||
So neither of the two main hypotheses (host-allocator vptr install via kernel handler; voice-struct clear via direct write) was captured by Session 1's instrumentation. Session 1's coverage gap is identified and is the deliverable for Session 2.
|
||||
|
||||
## What Session 1 nonetheless confirmed
|
||||
|
||||
1. **The instrumentation is sound.** 1,639 value=0 hits prove the `store_and_swap<T>` / `Memory::Zero/Fill/Copy` hooks and the host→guest VA translation thunk all work in default cold-boot.
|
||||
2. **AUDIT-067's "host-side install" framing remains correct** — guest stores were ruled out by AUDIT-067, host-side `store_and_swap` is now ruled out by AUDIT-068. The set of paths left for the installer is narrowed to: raw `memcpy`-via-`TranslateVirtual`, `*reinterpret_cast<be<T>*>(host)=v` patterns, or some other un-hooked direct host write.
|
||||
3. **Cold-boot guest-VA layout** (from sanity log):
|
||||
- `0x30000000-0x30xxxxxx` — physical heap (Memory::Zero on Initialize).
|
||||
- `0xFFCAxxxx` — kernel high (stacks etc).
|
||||
- `0x82000000+` — Sylpheed XEX image region (never touched by hooked surfaces).
|
||||
- Nothing observed in `0x42xxxxxx` or `0xBCxxxxxx` (yet — these allocate later than the 12 s sanity window).
|
||||
|
||||
## Per-writer breakdown (from sanity capture)
|
||||
|
||||
Only writers with hits in the 12 s window are listed; this is what Session 2 will need to mirror in ours where applicable.
|
||||
|
||||
### `Memory::Zero` (1,594 hits in 12 s)
|
||||
- All from tid=304 (host main thread / boot thread).
|
||||
- Affected guest VAs: `0x30000000-0x30xxx000` (heap-page zero on init), `0xFFCAB000-0xFFCAE000` (kernel stacks zero on alloc).
|
||||
- This is invoked by `Memory::Initialize` for heap setup and by the kernel during stack allocations.
|
||||
- Ours's analog: `xenia-kernel/src/state.rs`/`memory.rs` — Memory init and stack alloc. Likely already zero-init by Rust default; verify in Session 2.
|
||||
|
||||
### `Memory::Fill` (30 hits in 12 s)
|
||||
- Same tid=304, similar VA distribution.
|
||||
- Used for `RtlFillMemory` and some allocator default-fill paths.
|
||||
|
||||
### `store_and_swap<u32>` (13 hits in 12 s)
|
||||
- One of each: pointer-init writes by kernel-thread setup code (e.g. TIB fields).
|
||||
- Example: `0xFFCAE000`, `0xFFCAD000`, `0x30002000`. Likely the linked-list / TLS slot pointers written by `XThread::AllocateStack`.
|
||||
|
||||
### `store_and_swap<u64>` (2 hits)
|
||||
- Likely `RtlInitMemory` 64-bit-aligned scribbles.
|
||||
|
||||
## What's missing — the writers Session 2 must catch
|
||||
|
||||
Both Session-1 target writers (vtable install + voice-struct clear) escape the hooked surface. The XEX loader's raw `memcpy()` is the obvious blind spot but does not explain the vtable install (the vptr at `0xBCE25340` is in the heap, written AFTER load). Other candidates:
|
||||
|
||||
1. **`*xe::TranslateVirtual<be<T>*>(addr) = value;`** — typed-pointer cast through a host-endian `be<T>` reference. Lots of kernel-import code uses this pattern (e.g. `xboxkrnl_rtl.cc`'s `RtlCompareMemory` returns, `xboxkrnl_video.cc`'s frame-count writes). Doesn't go through `store_and_swap`.
|
||||
2. **Direct `*reinterpret_cast<uint32_t*>(host) = byte_swap(val)`** — a few performance-critical sites do this inline rather than via the template.
|
||||
3. **`Memory::Copy`** with `src` host-region having pre-encoded bytes — but the value-match path I added DOES catch this for the first u32 of the source, and we got 0 hits. Either `Memory::Copy` isn't used for vptr install, or the values don't appear as the first u32 of the copy.
|
||||
4. **GPU / VFS host-side initialisation** of mmio-mapped guest memory — separate APIs entirely, but Sylpheed isn't doing GPU vtable installs at this point.
|
||||
|
||||
## Per-target follow-up (Session 2 capture targets)
|
||||
|
||||
| Value/VA | Status from Session 1 | Session 2 plan |
|
||||
|---|---|---|
|
||||
| Vtable `0x8200A208` install at `0xBCE25340` | NOT CAUGHT (host-side, but escapes `store_and_swap` / `store` / `Memory::Zero/Fill/Copy`) | Add hooks on (a) the typed-pointer write surfaces (`be<T>::operator=` and `*TranslateVirtualBE<T>() = v`) and (b) a `Memory::WriteWord32` shim that catches raw u32 stores into TranslateVirtual host pointers. Also add a `Memory::Copy` value-watch that scans the WHOLE copy buffer for matches, not just the first u32. Re-run with vtable-value watch + addr-range watch on the heap region around `0xBCE25340`. |
|
||||
| Voice-struct field clear `[VOICE+0x164]` | NOT CAUGHT (same reason; plus the actual VOICE base may live outside `0x42xxxxxx`) | First find the actual VOICE base via guest-side enumeration in Phase HostAudio-Eager artifacts; once known, addr-range watch over the entire `MmAllocatePhysicalMemoryEx` block that contains the voice array. |
|
||||
|
||||
## Artifacts in this dir
|
||||
|
||||
- `instrumentation-design.md` — surface inventory + cvar design.
|
||||
- `fix-canary.diff` — combined diff of the 5 modified files plus full text of the 2 new files (`xenia/base/audit_68_host_mem_watch_fwd.h`, `xenia/base/audit_68_host_mem_watch_base.cc`).
|
||||
- `run1-vtable-writers.log` — 0 hits.
|
||||
- `run2-voice-struct-writers.log` — 0 hits.
|
||||
- `sanity-value0.log` — 1,639 hits (instrumentation alive proof).
|
||||
- `writer-report.md` — this file.
|
||||
- `session-2-plan.md` — actionable plan for next session.
|
||||
Reference in New Issue
Block a user