Source changes (dormant parity infra, retained from iterate 2.AI/2.AO): - xenia-kernel/exports.rs: nt_create_event manual_reset polarity + related event wiring - xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps (.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as regenerable local artifacts — see memory + HANDOFF for the running findings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
280 lines
12 KiB
Diff
280 lines
12 KiB
Diff
# AUDIT-068 Session 2 — canary instrumentation extension diff
|
|
#
|
|
# Generated 2026-05-19. xenia-canary HEAD = 6de80dffe261b368ecefee36c9b2b337335228c0.
|
|
# Session 1 changes are already in tree (see fix-canary.diff for the cumulative
|
|
# Session 1 state). This diff is the post-Session-1 → post-Session-2 delta on
|
|
# four files that Session 2 extended:
|
|
# - src/xenia/base/byte_order.h (new — Step 1, +27 LOC, be<T>::set() hook)
|
|
# - src/xenia/memory.cc (extended — Step 2 Memory::Copy byte-scan)
|
|
# - src/xenia/cpu/xex_module.cc (new — Step 3, +35 LOC, xex_memcpy + lzx_decompress pre-scan)
|
|
# - src/xenia/base/audit_68_host_mem_watch_base.cc (extended — static-init gate)
|
|
#
|
|
# Two of the four files (memory.cc, audit_68_host_mem_watch_base.cc) ALSO contain
|
|
# Session 1 hooks. To see the pure Session 2 delta, diff against the post-Session-1
|
|
# state of those files (recoverable from fix-canary.diff).
|
|
#
|
|
# byte_order.h was untouched by Session 1; the diff below for that file is purely
|
|
# Session 2.
|
|
# xex_module.cc was untouched by Session 1; ditto.
|
|
#
|
|
# Engine semantics: cvar-gated default-off, zero hot-path cost when off.
|
|
# Total Session 2 additive: ~110 LOC.
|
|
# Reading-error class #35 (Session 1) mitigated: see writer-report-v2.md Run 5.
|
|
|
|
diff --git a/src/xenia/base/byte_order.h b/src/xenia/base/byte_order.h
|
|
index 5a076f319..c80ee0ffc 100644
|
|
--- a/src/xenia/base/byte_order.h
|
|
+++ b/src/xenia/base/byte_order.h
|
|
@@ -11,6 +11,7 @@
|
|
#define XENIA_BASE_BYTE_ORDER_H_
|
|
|
|
#include <cstdint>
|
|
+#include <type_traits>
|
|
#if defined __has_include
|
|
#if __has_include(<version>)
|
|
#include <version>
|
|
@@ -21,6 +22,7 @@
|
|
#endif
|
|
|
|
#include "xenia/base/assert.h"
|
|
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
|
#include "xenia/base/platform.h"
|
|
|
|
#if !__cpp_lib_endian
|
|
@@ -88,6 +90,30 @@ struct endian_store {
|
|
operator T() const { return get(); }
|
|
|
|
void set(const T& src) {
|
|
+ // AUDIT-068 Session 2: hook the canonical be<T>/le<T> write path. Gated
|
|
+ // on the host→guest thunk being installed by Memory::Memory(); without
|
|
+ // that there is no Memory and therefore no possible guest-memory write.
|
|
+ // This ALSO prevents the slow-path from running during static-init order
|
|
+ // (which would race the cvar object construction in cpu_flags.cc and
|
|
+ // permanently latch g_active=0 before --audit_68_* cmdline override
|
|
+ // applies). See reading-error #35 / Session 2 plan.
|
|
+ if constexpr (sizeof(T) <= 8 && std::is_integral_v<T>) {
|
|
+ if (xe::audit_68::g_host_to_guest_thunk != nullptr) [[unlikely]] {
|
|
+ uint64_t v;
|
|
+ if constexpr (sizeof(T) == 8) {
|
|
+ v = static_cast<uint64_t>(src);
|
|
+ } else if constexpr (sizeof(T) == 4) {
|
|
+ v = static_cast<uint64_t>(static_cast<uint32_t>(src));
|
|
+ } else if constexpr (sizeof(T) == 2) {
|
|
+ v = static_cast<uint64_t>(static_cast<uint16_t>(src));
|
|
+ } else {
|
|
+ v = static_cast<uint64_t>(static_cast<uint8_t>(src));
|
|
+ }
|
|
+ xe::audit_68::check_host_write(
|
|
+ &value, v, static_cast<uint8_t>(sizeof(T)),
|
|
+ E == std::endian::big ? "be<T>::set" : "le<T>::set");
|
|
+ }
|
|
+ }
|
|
if constexpr (std::endian::native == E) {
|
|
value = src;
|
|
} else {
|
|
diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc
|
|
index 1034dcac7..38148010c 100644
|
|
--- a/src/xenia/cpu/xex_module.cc
|
|
+++ b/src/xenia/cpu/xex_module.cc
|
|
@@ -51,6 +51,38 @@ DECLARE_bool(allow_plugins);
|
|
|
|
DECLARE_bool(disable_context_promotion);
|
|
|
|
+// AUDIT-068 Session 2: helper that scans a raw byte buffer for 4-byte aligned
|
|
+// u32 values that match the configured audit_68 value list, emitting a
|
|
+// per-position event. Used to pre-scan XEX-loader memcpys that bypass all
|
|
+// other hooked surfaces. Cost when off: a single relaxed atomic load.
|
|
+static inline void audit68_prescan_memcpy(uint32_t guest_va_dest,
|
|
+ const uint8_t* src, size_t size,
|
|
+ const char* tag) {
|
|
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
|
|
+ if (active == 0) return;
|
|
+ if ((active & 0x1) && size >= 4) {
|
|
+ size_t aligned_end = size & ~size_t(3);
|
|
+ for (size_t i = 0; i < aligned_end; i += 4) {
|
|
+ uint32_t be_u32 = (uint32_t(src[i + 0]) << 24) |
|
|
+ (uint32_t(src[i + 1]) << 16) |
|
|
+ (uint32_t(src[i + 2]) << 8) | uint32_t(src[i + 3]);
|
|
+ xe::audit_68::check_guest_va(
|
|
+ static_cast<uint32_t>(guest_va_dest + i), be_u32, 4, tag);
|
|
+ }
|
|
+ }
|
|
+ if (active & 0x2) {
|
|
+ // Coarse addr-only event over the full span (dest only).
|
|
+ uint64_t v = 0;
|
|
+ if (size >= 4) {
|
|
+ v = (uint64_t(src[0]) << 24) | (uint64_t(src[1]) << 16) |
|
|
+ (uint64_t(src[2]) << 8) | uint64_t(src[3]);
|
|
+ }
|
|
+ xe::audit_68::check_guest_va(guest_va_dest, v,
|
|
+ static_cast<uint8_t>(std::min<size_t>(size, 8)),
|
|
+ tag);
|
|
+ }
|
|
+}
|
|
+
|
|
static constexpr uint8_t xe_xex1_retail_key[16] = {
|
|
0xA2, 0x6C, 0x10, 0xF7, 0x1F, 0xD9, 0x35, 0xE9,
|
|
0x8B, 0x99, 0x92, 0x2C, 0xE9, 0x32, 0x15, 0x72};
|
|
@@ -424,6 +456,10 @@ int XexModule::ApplyPatch(XexModule* module) {
|
|
// If image_source_offset is set, copy [source_offset:source_size] to
|
|
// target_offset
|
|
if (patch_header->delta_image_source_offset) {
|
|
+ audit68_prescan_memcpy(
|
|
+ module->base_address_ + patch_header->delta_image_target_offset,
|
|
+ base_exe + patch_header->delta_image_source_offset,
|
|
+ patch_header->delta_image_source_size, "xex_memcpy_patch");
|
|
memcpy(base_exe + patch_header->delta_image_target_offset,
|
|
base_exe + patch_header->delta_image_source_offset,
|
|
patch_header->delta_image_source_size);
|
|
@@ -589,6 +625,8 @@ int XexModule::ReadImageUncompressed(const void* xex_addr, size_t xex_length) {
|
|
if (exe_length > uncompressed_size) {
|
|
return 1;
|
|
}
|
|
+ audit68_prescan_memcpy(base_address_, p, exe_length,
|
|
+ "xex_memcpy_uncompressed");
|
|
memcpy(buffer, p, exe_length);
|
|
return 0;
|
|
case XEX_ENCRYPTION_NORMAL:
|
|
@@ -665,6 +703,9 @@ int XexModule::ReadImageBasicCompressed(const void* xex_addr,
|
|
// Overflow.
|
|
return 1;
|
|
}
|
|
+ audit68_prescan_memcpy(
|
|
+ base_address_ + static_cast<uint32_t>(d - buffer), p, data_size,
|
|
+ "xex_memcpy_basic_block");
|
|
memcpy(d, p, data_size);
|
|
break;
|
|
case XEX_ENCRYPTION_NORMAL: {
|
|
@@ -799,6 +840,17 @@ int XexModule::ReadImageCompressed(const void* xex_addr, size_t xex_length) {
|
|
result_code = lzx_decompress(
|
|
compress_buffer, d - compress_buffer, buffer, uncompressed_size,
|
|
compression_info->normal.window_size, nullptr, 0);
|
|
+
|
|
+ // AUDIT-068 Session 2: lzx_decompress writes directly into guest
|
|
+ // memory via the host pointer `buffer`. There's no host-side hook
|
|
+ // covering its internal bulk writes, so post-scan the produced bytes
|
|
+ // to recover what the XEX loader actually placed at `base_address_`.
|
|
+ // This is THE most likely catch for the vtable install case (vtables
|
|
+ // live in the .rdata section that is part of the LZX-compressed image).
|
|
+ if (result_code == 0) {
|
|
+ audit68_prescan_memcpy(base_address_, buffer, uncompressed_size,
|
|
+ "xex_lzx_decompress_output");
|
|
+ }
|
|
} else {
|
|
XELOGE("Unable to allocate XEX memory at {:08X}-{:08X}.", base_address_,
|
|
uncompressed_size);
|
|
diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc
|
|
index 22ba66aee..819a8a8a2 100644
|
|
--- a/src/xenia/memory.cc
|
|
+++ b/src/xenia/memory.cc
|
|
@@ -14,6 +14,7 @@
|
|
|
|
#include "third_party/fmt/include/fmt/format.h"
|
|
#include "xenia/base/assert.h"
|
|
+#include "xenia/base/audit_68_host_mem_watch_fwd.h"
|
|
#include "xenia/base/byte_stream.h"
|
|
#include "xenia/base/clock.h"
|
|
#include "xenia/base/cvar.h"
|
|
@@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) {
|
|
|
|
static Memory* active_memory_ = nullptr;
|
|
|
|
+// AUDIT-068 — process-global accessor (declared in memory.h).
|
|
+Memory* Memory::active() { return active_memory_; }
|
|
+
|
|
void CrashDump() {
|
|
static std::atomic<int> in_crash_dump(0);
|
|
if (in_crash_dump.fetch_add(1)) {
|
|
@@ -151,11 +155,19 @@ Memory::Memory() {
|
|
uint32_t(xe::memory::allocation_granularity());
|
|
assert_zero(active_memory_);
|
|
active_memory_ = this;
|
|
+
|
|
+ // AUDIT-068: register host→guest translation thunk so the watch slow path
|
|
+ // in xenia-base can resolve guest VAs without depending on xenia-core.
|
|
+ xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t {
|
|
+ Memory* m = active_memory_;
|
|
+ return m ? m->HostToGuestVirtual(host_ptr) : 0u;
|
|
+ };
|
|
}
|
|
|
|
Memory::~Memory() {
|
|
assert_true(active_memory_ == this);
|
|
active_memory_ = nullptr;
|
|
+ xe::audit_68::g_host_to_guest_thunk = nullptr;
|
|
|
|
// Uninstall the MMIO handler, as we won't be able to service more
|
|
// requests.
|
|
@@ -540,16 +552,71 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const {
|
|
}
|
|
|
|
void Memory::Zero(uint32_t address, uint32_t size) {
|
|
+ // AUDIT-068: log a single span event with value=0; size is capped at 8 for
|
|
+ // the value field. Slow path is gated on the atomic flag.
|
|
+ xe::audit_68::check_guest_va(address, 0,
|
|
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
|
+ "Memory::Zero");
|
|
std::memset(TranslateVirtual(address), 0, size);
|
|
}
|
|
|
|
void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) {
|
|
+ // Replicate the fill byte across the value field so value_matches can
|
|
+ // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for
|
|
+ // capture purposes the byte itself in the low slot is enough.
|
|
+ uint64_t v = static_cast<uint64_t>(value);
|
|
+ v |= v << 8;
|
|
+ v |= v << 16;
|
|
+ v |= v << 32;
|
|
+ xe::audit_68::check_guest_va(address, v,
|
|
+ static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
|
+ "Memory::Fill");
|
|
std::memset(TranslateVirtual(address), value, size);
|
|
}
|
|
|
|
void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) {
|
|
uint8_t* pdest = TranslateVirtual(dest);
|
|
const uint8_t* psrc = TranslateVirtual(src);
|
|
+ // AUDIT-068 Session 2: full byte-scan over 4-byte aligned positions of the
|
|
+ // source buffer. Catches XEX-loader-style memcpys where a vptr (the target
|
|
+ // u32 value) is buried somewhere mid-buffer rather than at offset 0. Cost
|
|
+ // O(size/4 * N_values) with N_values capped at 8 inside value_matches —
|
|
+ // negligible vs the underlying memcpy throughput.
|
|
+ //
|
|
+ // Gated on active bit 0x1 (values-mode) AND active != 0. If only addrs are
|
|
+ // configured (Run 2 voice-struct mode), we still emit a single addr-only
|
|
+ // event covering the destination span so addr-watch isn't broken.
|
|
+ uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed);
|
|
+ if (active != 0) [[unlikely]] {
|
|
+ if ((active & 0x1) && size >= 4) {
|
|
+ // Scan source for any configured u32 value (big-endian, mirrors how
|
|
+ // guest sees the bytes). 4-byte aligned offsets only.
|
|
+ uint32_t aligned_end = size & ~3u;
|
|
+ for (uint32_t i = 0; i < aligned_end; i += 4) {
|
|
+ uint32_t be_u32 =
|
|
+ (uint32_t(psrc[i + 0]) << 24) | (uint32_t(psrc[i + 1]) << 16) |
|
|
+ (uint32_t(psrc[i + 2]) << 8) | uint32_t(psrc[i + 3]);
|
|
+ xe::audit_68::check_guest_va(dest + i, be_u32, 4, "Memory::Copy");
|
|
+ }
|
|
+ }
|
|
+ if (active & 0x2) {
|
|
+ // Addr-only mode: emit a single coarse event tagged with the dest base
|
|
+ // and first u32 of source for context. The slow-path range check will
|
|
+ // log iff the dest span intersects a configured addr range.
|
|
+ uint64_t v = 0;
|
|
+ if (size >= 4) {
|
|
+ v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) |
|
|
+ (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]);
|
|
+ } else if (size > 0) {
|
|
+ for (uint32_t i = 0; i < size; ++i) {
|
|
+ v = (v << 8) | psrc[i];
|
|
+ }
|
|
+ }
|
|
+ xe::audit_68::check_guest_va(
|
|
+ dest, v, static_cast<uint8_t>(std::min<uint32_t>(size, 8)),
|
|
+ "Memory::Copy");
|
|
+ }
|
|
+ }
|
|
std::memcpy(pdest, psrc, size);
|
|
}
|
|
|