# AUDIT-068 Session 2 — canary instrumentation extension diff # # Generated 2026-05-19. xenia-canary HEAD = 6de80dffe261b368ecefee36c9b2b337335228c0. # Session 1 changes are already in tree (see fix-canary.diff for the cumulative # Session 1 state). This diff is the post-Session-1 → post-Session-2 delta on # four files that Session 2 extended: # - src/xenia/base/byte_order.h (new — Step 1, +27 LOC, be::set() hook) # - src/xenia/memory.cc (extended — Step 2 Memory::Copy byte-scan) # - src/xenia/cpu/xex_module.cc (new — Step 3, +35 LOC, xex_memcpy + lzx_decompress pre-scan) # - src/xenia/base/audit_68_host_mem_watch_base.cc (extended — static-init gate) # # Two of the four files (memory.cc, audit_68_host_mem_watch_base.cc) ALSO contain # Session 1 hooks. To see the pure Session 2 delta, diff against the post-Session-1 # state of those files (recoverable from fix-canary.diff). # # byte_order.h was untouched by Session 1; the diff below for that file is purely # Session 2. # xex_module.cc was untouched by Session 1; ditto. # # Engine semantics: cvar-gated default-off, zero hot-path cost when off. # Total Session 2 additive: ~110 LOC. # Reading-error class #35 (Session 1) mitigated: see writer-report-v2.md Run 5. diff --git a/src/xenia/base/byte_order.h b/src/xenia/base/byte_order.h index 5a076f319..c80ee0ffc 100644 --- a/src/xenia/base/byte_order.h +++ b/src/xenia/base/byte_order.h @@ -11,6 +11,7 @@ #define XENIA_BASE_BYTE_ORDER_H_ #include +#include #if defined __has_include #if __has_include() #include @@ -21,6 +22,7 @@ #endif #include "xenia/base/assert.h" +#include "xenia/base/audit_68_host_mem_watch_fwd.h" #include "xenia/base/platform.h" #if !__cpp_lib_endian @@ -88,6 +90,30 @@ struct endian_store { operator T() const { return get(); } void set(const T& src) { + // AUDIT-068 Session 2: hook the canonical be/le write path. Gated + // on the host→guest thunk being installed by Memory::Memory(); without + // that there is no Memory and therefore no possible guest-memory write. + // This ALSO prevents the slow-path from running during static-init order + // (which would race the cvar object construction in cpu_flags.cc and + // permanently latch g_active=0 before --audit_68_* cmdline override + // applies). See reading-error #35 / Session 2 plan. + if constexpr (sizeof(T) <= 8 && std::is_integral_v) { + if (xe::audit_68::g_host_to_guest_thunk != nullptr) [[unlikely]] { + uint64_t v; + if constexpr (sizeof(T) == 8) { + v = static_cast(src); + } else if constexpr (sizeof(T) == 4) { + v = static_cast(static_cast(src)); + } else if constexpr (sizeof(T) == 2) { + v = static_cast(static_cast(src)); + } else { + v = static_cast(static_cast(src)); + } + xe::audit_68::check_host_write( + &value, v, static_cast(sizeof(T)), + E == std::endian::big ? "be::set" : "le::set"); + } + } if constexpr (std::endian::native == E) { value = src; } else { diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc index 1034dcac7..38148010c 100644 --- a/src/xenia/cpu/xex_module.cc +++ b/src/xenia/cpu/xex_module.cc @@ -51,6 +51,38 @@ DECLARE_bool(allow_plugins); DECLARE_bool(disable_context_promotion); +// AUDIT-068 Session 2: helper that scans a raw byte buffer for 4-byte aligned +// u32 values that match the configured audit_68 value list, emitting a +// per-position event. Used to pre-scan XEX-loader memcpys that bypass all +// other hooked surfaces. Cost when off: a single relaxed atomic load. +static inline void audit68_prescan_memcpy(uint32_t guest_va_dest, + const uint8_t* src, size_t size, + const char* tag) { + uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed); + if (active == 0) return; + if ((active & 0x1) && size >= 4) { + size_t aligned_end = size & ~size_t(3); + for (size_t i = 0; i < aligned_end; i += 4) { + uint32_t be_u32 = (uint32_t(src[i + 0]) << 24) | + (uint32_t(src[i + 1]) << 16) | + (uint32_t(src[i + 2]) << 8) | uint32_t(src[i + 3]); + xe::audit_68::check_guest_va( + static_cast(guest_va_dest + i), be_u32, 4, tag); + } + } + if (active & 0x2) { + // Coarse addr-only event over the full span (dest only). + uint64_t v = 0; + if (size >= 4) { + v = (uint64_t(src[0]) << 24) | (uint64_t(src[1]) << 16) | + (uint64_t(src[2]) << 8) | uint64_t(src[3]); + } + xe::audit_68::check_guest_va(guest_va_dest, v, + static_cast(std::min(size, 8)), + tag); + } +} + static constexpr uint8_t xe_xex1_retail_key[16] = { 0xA2, 0x6C, 0x10, 0xF7, 0x1F, 0xD9, 0x35, 0xE9, 0x8B, 0x99, 0x92, 0x2C, 0xE9, 0x32, 0x15, 0x72}; @@ -424,6 +456,10 @@ int XexModule::ApplyPatch(XexModule* module) { // If image_source_offset is set, copy [source_offset:source_size] to // target_offset if (patch_header->delta_image_source_offset) { + audit68_prescan_memcpy( + module->base_address_ + patch_header->delta_image_target_offset, + base_exe + patch_header->delta_image_source_offset, + patch_header->delta_image_source_size, "xex_memcpy_patch"); memcpy(base_exe + patch_header->delta_image_target_offset, base_exe + patch_header->delta_image_source_offset, patch_header->delta_image_source_size); @@ -589,6 +625,8 @@ int XexModule::ReadImageUncompressed(const void* xex_addr, size_t xex_length) { if (exe_length > uncompressed_size) { return 1; } + audit68_prescan_memcpy(base_address_, p, exe_length, + "xex_memcpy_uncompressed"); memcpy(buffer, p, exe_length); return 0; case XEX_ENCRYPTION_NORMAL: @@ -665,6 +703,9 @@ int XexModule::ReadImageBasicCompressed(const void* xex_addr, // Overflow. return 1; } + audit68_prescan_memcpy( + base_address_ + static_cast(d - buffer), p, data_size, + "xex_memcpy_basic_block"); memcpy(d, p, data_size); break; case XEX_ENCRYPTION_NORMAL: { @@ -799,6 +840,17 @@ int XexModule::ReadImageCompressed(const void* xex_addr, size_t xex_length) { result_code = lzx_decompress( compress_buffer, d - compress_buffer, buffer, uncompressed_size, compression_info->normal.window_size, nullptr, 0); + + // AUDIT-068 Session 2: lzx_decompress writes directly into guest + // memory via the host pointer `buffer`. There's no host-side hook + // covering its internal bulk writes, so post-scan the produced bytes + // to recover what the XEX loader actually placed at `base_address_`. + // This is THE most likely catch for the vtable install case (vtables + // live in the .rdata section that is part of the LZX-compressed image). + if (result_code == 0) { + audit68_prescan_memcpy(base_address_, buffer, uncompressed_size, + "xex_lzx_decompress_output"); + } } else { XELOGE("Unable to allocate XEX memory at {:08X}-{:08X}.", base_address_, uncompressed_size); diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index 22ba66aee..819a8a8a2 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -14,6 +14,7 @@ #include "third_party/fmt/include/fmt/format.h" #include "xenia/base/assert.h" +#include "xenia/base/audit_68_host_mem_watch_fwd.h" #include "xenia/base/byte_stream.h" #include "xenia/base/clock.h" #include "xenia/base/cvar.h" @@ -90,6 +91,9 @@ uint32_t get_page_count(uint32_t value, uint32_t page_size) { static Memory* active_memory_ = nullptr; +// AUDIT-068 — process-global accessor (declared in memory.h). +Memory* Memory::active() { return active_memory_; } + void CrashDump() { static std::atomic in_crash_dump(0); if (in_crash_dump.fetch_add(1)) { @@ -151,11 +155,19 @@ Memory::Memory() { uint32_t(xe::memory::allocation_granularity()); assert_zero(active_memory_); active_memory_ = this; + + // AUDIT-068: register host→guest translation thunk so the watch slow path + // in xenia-base can resolve guest VAs without depending on xenia-core. + xe::audit_68::g_host_to_guest_thunk = [](const void* host_ptr) -> uint32_t { + Memory* m = active_memory_; + return m ? m->HostToGuestVirtual(host_ptr) : 0u; + }; } Memory::~Memory() { assert_true(active_memory_ == this); active_memory_ = nullptr; + xe::audit_68::g_host_to_guest_thunk = nullptr; // Uninstall the MMIO handler, as we won't be able to service more // requests. @@ -540,16 +552,71 @@ uint32_t Memory::GetPhysicalAddress(uint32_t address) const { } void Memory::Zero(uint32_t address, uint32_t size) { + // AUDIT-068: log a single span event with value=0; size is capped at 8 for + // the value field. Slow path is gated on the atomic flag. + xe::audit_68::check_guest_va(address, 0, + static_cast(std::min(size, 8)), + "Memory::Zero"); std::memset(TranslateVirtual(address), 0, size); } void Memory::Fill(uint32_t address, uint32_t size, uint8_t value) { + // Replicate the fill byte across the value field so value_matches can + // recognise e.g. 0xDEADBEEF only if the byte is 0xDE/0xAD/0xBE/0xEF — for + // capture purposes the byte itself in the low slot is enough. + uint64_t v = static_cast(value); + v |= v << 8; + v |= v << 16; + v |= v << 32; + xe::audit_68::check_guest_va(address, v, + static_cast(std::min(size, 8)), + "Memory::Fill"); std::memset(TranslateVirtual(address), value, size); } void Memory::Copy(uint32_t dest, uint32_t src, uint32_t size) { uint8_t* pdest = TranslateVirtual(dest); const uint8_t* psrc = TranslateVirtual(src); + // AUDIT-068 Session 2: full byte-scan over 4-byte aligned positions of the + // source buffer. Catches XEX-loader-style memcpys where a vptr (the target + // u32 value) is buried somewhere mid-buffer rather than at offset 0. Cost + // O(size/4 * N_values) with N_values capped at 8 inside value_matches — + // negligible vs the underlying memcpy throughput. + // + // Gated on active bit 0x1 (values-mode) AND active != 0. If only addrs are + // configured (Run 2 voice-struct mode), we still emit a single addr-only + // event covering the destination span so addr-watch isn't broken. + uint32_t active = xe::audit_68::g_active.load(std::memory_order_relaxed); + if (active != 0) [[unlikely]] { + if ((active & 0x1) && size >= 4) { + // Scan source for any configured u32 value (big-endian, mirrors how + // guest sees the bytes). 4-byte aligned offsets only. + uint32_t aligned_end = size & ~3u; + for (uint32_t i = 0; i < aligned_end; i += 4) { + uint32_t be_u32 = + (uint32_t(psrc[i + 0]) << 24) | (uint32_t(psrc[i + 1]) << 16) | + (uint32_t(psrc[i + 2]) << 8) | uint32_t(psrc[i + 3]); + xe::audit_68::check_guest_va(dest + i, be_u32, 4, "Memory::Copy"); + } + } + if (active & 0x2) { + // Addr-only mode: emit a single coarse event tagged with the dest base + // and first u32 of source for context. The slow-path range check will + // log iff the dest span intersects a configured addr range. + uint64_t v = 0; + if (size >= 4) { + v = (uint64_t(psrc[0]) << 24) | (uint64_t(psrc[1]) << 16) | + (uint64_t(psrc[2]) << 8) | uint64_t(psrc[3]); + } else if (size > 0) { + for (uint32_t i = 0; i < size; ++i) { + v = (v << 8) | psrc[i]; + } + } + xe::audit_68::check_guest_va( + dest, v, static_cast(std::min(size, 8)), + "Memory::Copy"); + } + } std::memcpy(pdest, psrc, size); }