Files
xenia-rs/audit-runs/audit-068-host-mem-watch/fix-canary-v2.diff
MechaCat02 ef93a4fa14 handoff: VSync/event-wedge fixes + iterate 2.A–2.BC research notes
Source changes (dormant parity infra, retained from iterate 2.AI/2.AO):
- xenia-kernel/exports.rs: nt_create_event manual_reset polarity +
  related event wiring
- xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity

Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the
iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps
(.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as
regenerable local artifacts — see memory + HANDOFF for the running findings.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-05 07:19:08 +02:00

280 lines
12 KiB
Diff

# AUDIT-068 Session 2 — canary instrumentation extension diff
#
# Generated 2026-05-19. xenia-canary HEAD = 6de80dffe261b368ecefee36c9b2b337335228c0.
# Session 1 changes are already in tree (see fix-canary.diff for the cumulative
# Session 1 state). This diff is the post-Session-1 → post-Session-2 delta on
# four files that Session 2 extended:
# - src/xenia/base/byte_order.h (new — Step 1, +27 LOC, be<T>::set() hook)
# - src/xenia/memory.cc (extended — Step 2 Memory::Copy byte-scan)
# - src/xenia/cpu/xex_module.cc (new — Step 3, +35 LOC, xex_memcpy + lzx_decompress pre-scan)
# - src/xenia/base/audit_68_host_mem_watch_base.cc (extended — static-init gate)
#
# Two of the four files (memory.cc, audit_68_host_mem_watch_base.cc) ALSO contain
# Session 1 hooks. To see the pure Session 2 delta, diff against the post-Session-1
# state of those files (recoverable from fix-canary.diff).
#
# byte_order.h was untouched by Session 1; the diff below for that file is purely
# Session 2.
# xex_module.cc was untouched by Session 1; ditto.
#
# Engine semantics: cvar-gated default-off, zero hot-path cost when off.
# Total Session 2 additive: ~110 LOC.
# Reading-error class #35 (Session 1) mitigated: see writer-report-v2.md Run 5.
diff --git a/src/xenia/base/byte_order.h b/src/xenia/base/byte_order.h
index 5a076f319..c80ee0ffc 100644
--- a/src/xenia/base/byte_order.h
+++ b/src/xenia/base/byte_order.h
@@ -11,6 +11,7 @@
#define XENIA_BASE_BYTE_ORDER_H_
#include <cstdint>
+#include <type_traits>
#if defined __has_include
#if __has_include(<version>)
#include <version>
@@ -21,6 +22,7 @@
#endif
#include "xenia/base/assert.h"
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
#include "xenia/base/platform.h"
#if !__cpp_lib_endian
@@ -88,6 +90,30 @@ struct endian_store {
operator T() const { return get(); }
void set(const T& src) {
+ // AUDIT-068 Session 2: hook the canonical be<T>/le<T> write path. Gated
+ // on the host→guest thunk being installed by Memory::Memory(); without
+ // that there is no Memory and therefore no possible guest-memory write.
+ // This ALSO prevents the slow-path from running during static-init order
+ // (which would race the cvar object construction in cpu_flags.cc and
+ // permanently latch g_active=0 before --audit_68_* cmdline override
+ // applies). See reading-error #35 / Session 2 plan.
+ if constexpr (sizeof(T) <= 8 && std::is_integral_v<T>) {
+ if (xe::audit_68::g_host_to_guest_thunk != nullptr) [[unlikely]] {
+ uint64_t v;
+ if constexpr (sizeof(T) == 8) {
+ v = static_cast<uint64_t>(src);
+ } else if constexpr (sizeof(T) == 4) {
+ v = static_cast<uint64_t>(static_cast<uint32_t>(src));
+ } else if constexpr (sizeof(T) == 2) {
+ v = static_cast<uint64_t>(static_cast<uint16_t>(src));
+ } else {
+ v = static_cast<uint64_t>(static_cast<uint8_t>(src));
+ }
+ xe::audit_68::check_host_write(
+ &value, v, static_cast<uint8_t>(sizeof(T)),
+ E == std::endian::big ? "be<T>::set" : "le<T>::set");
+ }
+ }
if constexpr (std::endian::native == E) {
value = src;
} else {
diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc
index 1034dcac7..38148010c 100644
--- a/src/xenia/cpu/xex_module.cc
+++ b/src/xenia/cpu/xex_module.cc
@@ -51,6 +51,38 @@ DECLARE_bool(allow_plugins);
DECLARE_bool(disable_context_promotion);
+// AUDIT-068 Session 2: helper that scans a raw byte buffer for 4-byte aligned
+// u32 values that match the configured audit_68 value list, emitting a
+// per-position event. Used to pre-scan XEX-loader memcpys that bypass all
+// other hooked surfaces. Cost when off: a single relaxed atomic load.
+static inline void audit68_prescan_memcpy(uint32_t guest_va_dest,
+ const uint8_t* src, size_t size,
+ const char* tag) {
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
+ if (active == 0) return;
+ if ((active & 0x1) && size >= 4) {
+ size_t aligned_end = size & ~size_t(3);
+ for (size_t i = 0; i < aligned_end; i += 4) {
+ uint32_t be_u32 = (uint32_t(src[i + 0]) << 24) |
+ (uint32_t(src[i + 1]) << 16) |
+ (uint32_t(src[i + 2]) << 8) | uint32_t(src[i + 3]);
+ xe::audit_68::check_guest_va(
+ static_cast<uint32_t>(guest_va_dest + i), be_u32, 4, tag);
+ }
+ }
+ if (active & 0x2) {
+ // Coarse addr-only event over the full span (dest only).
+ uint64_t v = 0;
+ if (size >= 4) {
+ v = (uint64_t(src[0]) << 24) | (uint64_t(src[1]) << 16) |
+ (uint64_t(src[2]) << 8) | uint64_t(src[3]);
+ }
+ xe::audit_68::check_guest_va(guest_va_dest, v,
+ static_cast<uint8_t>(std::min<size_t>(size, 8)),
+ tag);
+ }
+}
+
static constexpr uint8_t xe_xex1_retail_key[16] = {
0xA2, 0x6C, 0x10, 0xF7, 0x1F, 0xD9, 0x35, 0xE9,
0x8B, 0x99, 0x92, 0x2C, 0xE9, 0x32, 0x15, 0x72};
@@ -424,6 +456,10 @@ int XexModule::ApplyPatch(XexModule* module) {
// If image_source_offset is set, copy [source_offset:source_size] to
// target_offset
if (patch_header->delta_image_source_offset) {
+ audit68_prescan_memcpy(
+ module->base_address_ + patch_header->delta_image_target_offset,
+ base_exe + patch_header->delta_image_source_offset,
+ patch_header->delta_image_source_size, "xex_memcpy_patch");
memcpy(base_exe + patch_header->delta_image_target_offset,
base_exe + patch_header->delta_image_source_offset,
patch_header->delta_image_source_size);
@@ -589,6 +625,8 @@ int XexModule::ReadImageUncompressed(const void* xex_addr, size_t xex_length) {
if (exe_length > uncompressed_size) {
return 1;
}
+ audit68_prescan_memcpy(base_address_, p, exe_length,
+ "xex_memcpy_uncompressed");
memcpy(buffer, p, exe_length);
return 0;
case XEX_ENCRYPTION_NORMAL:
@@ -665,6 +703,9 @@ int XexModule::ReadImageBasicCompressed(const void* xex_addr,
// Overflow.
return 1;
}
+ audit68_prescan_memcpy(
+ base_address_ + static_cast<uint32_t>(d - buffer), p, data_size,
+ "xex_memcpy_basic_block");
memcpy(d, p, data_size);
break;
case XEX_ENCRYPTION_NORMAL: {
@@ -799,6 +840,17 @@ int XexModule::ReadImageCompressed(const void* xex_addr, size_t xex_length) {
result_code = lzx_decompress(
compress_buffer, d - compress_buffer, buffer, uncompressed_size,
compression_info->normal.window_size, nullptr, 0);
+
+ // AUDIT-068 Session 2: lzx_decompress writes directly into guest
+ // memory via the host pointer `buffer`. There's no host-side hook
+ // covering its internal bulk writes, so post-scan the produced bytes
+ // to recover what the XEX loader actually placed at `base_address_`.
+ // This is THE most likely catch for the vtable install case (vtables
+ // live in the .rdata section that is part of the LZX-compressed image).
+ if (result_code == 0) {
+ audit68_prescan_memcpy(base_address_, buffer, uncompressed_size,
+ "xex_lzx_decompress_output");
+ }
} else {
XELOGE("Unable to allocate XEX memory at {:08X}-{:08X}.", base_address_,
uncompressed_size);
diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc
index 22ba66aee..819a8a8a2 100644
--- a/src/xenia/memory.cc
+++ b/src/xenia/memory.cc
@@ -14,6 +14,7 @@
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/assert.h"
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
#include "xenia/base/byte_stream.h"
#include "xenia/base/clock.h"
#include "xenia/base/cvar.h"
@@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) {
static Memory* active_memory_ = nullptr;
+// AUDIT-068 — process-global accessor (declared in memory.h).
+Memory* Memory::active() { return active_memory_; }
+
void CrashDump() {
static std::atomic<int> in_crash_dump(0);
if (in_crash_dump.fetch_add(1)) {
@@ -151,11 +155,19 @@ Memory::Memory() {
uint32_t(xe::memory::allocation_granularity());
assert_zero(active_memory_);
active_memory_ = this;
+
+ // AUDIT-068: register host→guest translation thunk so the watch slow path
+ // in xenia-base can resolve guest VAs without depending on xenia-core.
+ xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t {
+ Memory* m = active_memory_;
+ return m ? m->HostToGuestVirtual(host_ptr) : 0u;
+ };
}
Memory::~Memory() {
assert_true(active_memory_ == this);
active_memory_ = nullptr;
+ xe::audit_68::g_host_to_guest_thunk = nullptr;
// Uninstall the MMIO handler, as we won't be able to service more
// requests.
@@ -540,16 +552,71 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const {
}
void Memory::Zero(uint32_t address, uint32_t size) {
+ // AUDIT-068: log a single span event with value=0; size is capped at 8 for
+ // the value field. Slow path is gated on the atomic flag.
+ xe::audit_68::check_guest_va(address, 0,
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
+ "Memory::Zero");
std::memset(TranslateVirtual(address), 0, size);
}
void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) {
+ // Replicate the fill byte across the value field so value_matches can
+ // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for
+ // capture purposes the byte itself in the low slot is enough.
+ uint64_t v = static_cast<uint64_t>(value);
+ v |= v << 8;
+ v |= v << 16;
+ v |= v << 32;
+ xe::audit_68::check_guest_va(address, v,
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
+ "Memory::Fill");
std::memset(TranslateVirtual(address), value, size);
}
void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) {
uint8_t* pdest = TranslateVirtual(dest);
const uint8_t* psrc = TranslateVirtual(src);
+ // AUDIT-068 Session 2: full byte-scan over 4-byte aligned positions of the
+ // source buffer. Catches XEX-loader-style memcpys where a vptr (the target
+ // u32 value) is buried somewhere mid-buffer rather than at offset 0. Cost
+ // O(size/4 * N_values) with N_values capped at 8 inside value_matches —
+ // negligible vs the underlying memcpy throughput.
+ //
+ // Gated on active bit 0x1 (values-mode) AND active != 0. If only addrs are
+ // configured (Run 2 voice-struct mode), we still emit a single addr-only
+ // event covering the destination span so addr-watch isn't broken.
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
+ if (active != 0) [[unlikely]] {
+ if ((active & 0x1) && size >= 4) {
+ // Scan source for any configured u32 value (big-endian, mirrors how
+ // guest sees the bytes). 4-byte aligned offsets only.
+ uint32_t aligned_end = size & ~3u;
+ for (uint32_t i = 0; i < aligned_end; i += 4) {
+ uint32_t be_u32 =
+ (uint32_t(psrc[i + 0]) << 24) | (uint32_t(psrc[i + 1]) << 16) |
+ (uint32_t(psrc[i + 2]) << 8) | uint32_t(psrc[i + 3]);
+ xe::audit_68::check_guest_va(dest + i, be_u32, 4, "Memory::Copy");
+ }
+ }
+ if (active & 0x2) {
+ // Addr-only mode: emit a single coarse event tagged with the dest base
+ // and first u32 of source for context. The slow-path range check will
+ // log iff the dest span intersects a configured addr range.
+ uint64_t v = 0;
+ if (size >= 4) {
+ v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) |
+ (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]);
+ } else if (size > 0) {
+ for (uint32_t i = 0; i < size; ++i) {
+ v = (v << 8) | psrc[i];
+ }
+ }
+ xe::audit_68::check_guest_va(
+ dest, v, static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
+ "Memory::Copy");
+ }
+ }
std::memcpy(pdest, psrc, size);
}