feat(memory): --mem-watch=ADDR per-store writer trace

Adds an opt-in diagnostic that emits one tracing line per guest store
overlapping any armed byte address, naming the writer (tid, pc, lr)
plus old/new u32 lanes. Mirrors the --pc-probe / --branch-probe shape;
pc/lr are stamped from worker_prologue via a thread-local Cell, so
default runs (empty watch set) take a single is_empty() check on each
write. Lockstep digest preserved (instructions=100000003 across reruns,
sylpheed_n50m.json golden byte-identical).

Diagnostic infra only; no functional change. Used to identify producers
of dispatch-state writes for the audit-017 / audit-019 hunt.
This commit is contained in:
MechaCat02
2026-05-06 21:00:20 +02:00
parent cc54ca8e64
commit 978a6950d1
3 changed files with 219 additions and 2 deletions

View File

@@ -213,6 +213,14 @@ enum Commands {
/// `XENIA_BRANCH_PROBE`.
#[arg(long)]
branch_probe: Option<String>,
/// Diagnostic. Comma-separated guest byte addresses; on every
/// guest store that overlaps any listed byte, emit one
/// `MEM-WATCH` line at tracing target `mem_watch` with the
/// (tid, pc, lr) of the writer plus old/new u32 lanes.
/// Read-only; lockstep digest unaffected. Settable via
/// `XENIA_MEM_WATCH`. Example: `--mem-watch=0x828F40B4`.
#[arg(long)]
mem_watch: Option<String>,
},
/// Browse XISO disc image contents
Browse {
@@ -371,6 +379,7 @@ fn main() -> Result<()> {
ctor_probe,
dump_addr,
branch_probe,
mem_watch,
} => cmd_exec(
&path,
max_instructions,
@@ -392,6 +401,7 @@ fn main() -> Result<()> {
ctor_probe.as_deref(),
dump_addr.as_deref(),
branch_probe.as_deref(),
mem_watch.as_deref(),
),
Commands::Browse { path } => cmd_browse(&path),
Commands::Info { path } => cmd_info(&path),
@@ -596,6 +606,7 @@ fn cmd_exec(
ctor_probe: Option<&str>,
dump_addr: Option<&str>,
branch_probe: Option<&str>,
mem_watch: Option<&str>,
) -> Result<()> {
cmd_exec_inner(
path,
@@ -618,6 +629,7 @@ fn cmd_exec(
ctor_probe,
dump_addr,
branch_probe,
mem_watch,
None,
None,
false,
@@ -659,6 +671,7 @@ fn cmd_check(
None, // ctor_probe — diagnostic, never wanted on goldens
None, // dump_addr — same
None, // branch_probe — diagnostic, never wanted on goldens
None, // mem_watch — same
out,
expect,
stable_digest,
@@ -686,6 +699,7 @@ fn cmd_exec_inner(
ctor_probe: Option<&str>,
dump_addr: Option<&str>,
branch_probe: Option<&str>,
mem_watch: Option<&str>,
digest_out: Option<&str>,
digest_expect: Option<&str>,
stable_digest: bool,
@@ -1058,6 +1072,41 @@ fn cmd_exec_inner(
}
}
let mem_watch_combined: Option<String> = match (mem_watch, std::env::var("XENIA_MEM_WATCH").ok()) {
(Some(s), _) => Some(s.to_string()),
(None, Some(s)) if !s.is_empty() => Some(s),
_ => None,
};
let mut mem_watch_addrs: Vec<u32> = Vec::new();
if let Some(list) = mem_watch_combined {
for token in list.split(',').map(str::trim).filter(|s| !s.is_empty()) {
let parsed = if let Some(hex) = token.strip_prefix("0x").or_else(|| token.strip_prefix("0X")) {
u32::from_str_radix(hex, 16)
} else {
token.parse::<u32>()
};
match parsed {
Ok(addr) => mem_watch_addrs.push(addr),
Err(_) => {
return Err(anyhow::anyhow!(
"invalid address in --mem-watch: {token:?}"
));
}
}
}
if !quiet && !mem_watch_addrs.is_empty() {
let strs: Vec<String> = mem_watch_addrs
.iter()
.map(|a| format!("{a:#010x}"))
.collect();
tracing::info!(
"mem-watch armed: {} ({})",
mem_watch_addrs.len(),
strs.join(", ")
);
}
}
// Install the GPU register aperture MMIO region on the guest memory so
// any `0x7FC8xxxx` access routes to our atomic mailbox. Matches canary's
// `graphics_system.cc:141-144`. The callbacks capture Arc clones of the
@@ -1272,6 +1321,10 @@ fn cmd_exec_inner(
// M1.4 — wrap `mem` in an `Arc<GuestMemory>` after all init mutations
// are complete. The worker thread (if spawned below) holds its own
if !mem_watch_addrs.is_empty() {
mem.arm_mem_watch(mem_watch_addrs);
}
// Arc clone for the duration of the run; the CPU side passes
// `&*mem_arc` (= `&GuestMemory`) into `run_execution`. The
// trait-level invariant carrying this is correctness: writes are
@@ -1999,6 +2052,12 @@ fn worker_prologue(
kernel.fire_ctor_probe_if_match(hw_id, mem);
kernel.fire_branch_probe_if_match(hw_id);
if mem.has_mem_watch() {
let ctx = kernel.scheduler.ctx(hw_id);
let tid = kernel.scheduler.tid(hw_id).unwrap_or(0);
xenia_memory::set_writer_ctx(tid, ctx.pc, ctx.lr as u32);
}
// 1) Halt-sentinel check (per HW thread).
if pc == LR_HALT {
let injected_here = kernel.interrupts.saved.is_some()

View File

@@ -1,3 +1,4 @@
use std::cell::Cell;
use std::sync::atomic::{AtomicU64, Ordering};
use crate::access::MemoryAccess;
@@ -5,6 +6,23 @@ use crate::mmio::MmioRegion;
use crate::page_table::{AllocationState, MemoryProtect, PageEntry};
use crate::MemoryError;
thread_local! {
static WRITER_CTX: Cell<(u32, u32, u32)> = const { Cell::new((0, 0, 0)) };
}
/// Stamp the (tid, pc, lr) of the executing instruction on the current
/// host thread. Read by [`GuestMemory::check_mem_watch`] when a watched
/// store fires, so the emitted trace line names the writer. Cheap —
/// thread-local `Cell::set`, no syscalls. Default `(0,0,0)` is harmless
/// when no watch is armed.
pub fn set_writer_ctx(tid: u32, pc: u32, lr: u32) {
WRITER_CTX.with(|c| c.set((tid, pc, lr)));
}
fn writer_ctx() -> (u32, u32, u32) {
WRITER_CTX.with(|c| c.get())
}
const PAGE_SIZE: u32 = 4096;
/// Total guest address space: 4GB.
const GUEST_ADDRESS_SPACE: usize = 0x1_0000_0000;
@@ -64,6 +82,13 @@ pub struct GuestMemory {
/// Monotonic global write counter — makes per-page versions
/// cross-comparable even when their indices alias.
writes_total: AtomicU64,
/// Sorted list of guest byte addresses to log on every store that
/// touches them. Populated once via [`Self::arm_mem_watch`] before
/// the run starts; stable for the run. Empty by default → the hot
/// path's `is_empty()` check is a single cache-resident load.
mem_watch_addrs: Vec<u32>,
/// Count of fires observed (for tests / hand-off telemetry).
mem_watch_count: AtomicU64,
}
/// Greatest common bit-mask such that `(a & m) == (b & m)` for every bit
@@ -106,6 +131,8 @@ impl GuestMemory {
owned: true,
page_versions: (0..PAGE_COUNT).map(|_| AtomicU64::new(0)).collect(),
writes_total: AtomicU64::new(0),
mem_watch_addrs: Vec::new(),
mem_watch_count: AtomicU64::new(0),
})
}
@@ -329,6 +356,8 @@ impl GuestMemory {
/// remains the caller's responsibility (the table isn't reachable
/// from `GuestMemory` without a wider plumbing change).
pub fn write_bulk(&self, addr: u32, buf: &[u8]) {
let len = buf.len() as u32;
let old_lane = self.capture_mem_watch_old(addr, len);
let ptr = self.translate_virtual_mut(addr);
unsafe {
std::ptr::copy_nonoverlapping(buf.as_ptr(), ptr, buf.len());
@@ -336,7 +365,7 @@ impl GuestMemory {
if buf.is_empty() {
return;
}
let last_byte = addr.saturating_add(buf.len() as u32).saturating_sub(1);
let last_byte = addr.saturating_add(len).saturating_sub(1);
let first_page = addr / PAGE_SIZE;
let last_page = last_byte / PAGE_SIZE;
for page in first_page..=last_page {
@@ -345,6 +374,7 @@ impl GuestMemory {
// the page works.
self.bump_page_version(page * PAGE_SIZE);
}
self.check_mem_watch(addr, len, old_lane);
}
/// Check if a guest address has been allocated/committed. Acquire load
@@ -370,6 +400,81 @@ impl GuestMemory {
.get(page)
.map(|a| PageEntry::from_raw(a.load(std::sync::atomic::Ordering::Acquire)))
}
/// Arm the memory watch set. Each address is checked for byte-exact
/// overlap with every store; on a hit, one `tracing::info!` line is
/// emitted at target `mem_watch` with the (tid, pc, lr) of the
/// writer (set via [`set_writer_ctx`] from the interpreter prologue),
/// the previous value, and the new value. Read-only diagnostic; the
/// store itself is unaffected.
pub fn arm_mem_watch(&mut self, mut addrs: Vec<u32>) {
addrs.sort();
addrs.dedup();
self.mem_watch_addrs = addrs;
}
/// Number of mem-watch fires observed since arming.
pub fn mem_watch_count(&self) -> u64 {
self.mem_watch_count.load(Ordering::Relaxed)
}
/// True iff at least one watch address is armed.
#[inline]
pub fn has_mem_watch(&self) -> bool {
!self.mem_watch_addrs.is_empty()
}
/// Hot-path check (post-store): if any watched byte address falls
/// inside `[addr, addr+len)`, emit a one-line record naming the
/// (tid, pc, lr) of the writer (per [`set_writer_ctx`]), the
/// post-store u32 lane at the watched address, and the store
/// width. `old_lane` is the u32 lane the caller captured BEFORE
/// the store fired.
#[inline]
fn check_mem_watch(&self, addr: u32, len: u32, old_lane_at_watch: Option<(u32, u32)>) {
if self.mem_watch_addrs.is_empty() {
return;
}
let store_end = addr.saturating_add(len);
for &watch in &self.mem_watch_addrs {
if watch >= addr && watch < store_end {
let new_val = {
let p = self.translate_virtual(watch) as *const [u8; 4];
u32::from_be_bytes(unsafe { *p })
};
let old_val = old_lane_at_watch
.and_then(|(w, v)| (w == watch).then_some(v))
.unwrap_or(0);
let (tid, pc, lr) = writer_ctx();
self.mem_watch_count.fetch_add(1, Ordering::Relaxed);
tracing::info!(
target: "mem_watch",
"MEM-WATCH addr={:#010x} old={:#010x} new={:#010x} store_addr={:#010x} store_len={} tid={} pc={:#010x} lr={:#010x}",
watch, old_val, new_val, addr, len, tid, pc, lr,
);
}
}
}
/// Returns `Some((watch, u32_lane))` if the store at `[addr, addr+len)`
/// overlaps the first watched address; otherwise `None`. Used by
/// the write hooks to capture OLD before the store and pass to
/// [`Self::check_mem_watch`] post-store. Hot-path early-out.
#[inline]
fn capture_mem_watch_old(&self, addr: u32, len: u32) -> Option<(u32, u32)> {
if self.mem_watch_addrs.is_empty() {
return None;
}
let store_end = addr.saturating_add(len);
for &watch in &self.mem_watch_addrs {
if watch >= addr && watch < store_end {
let p = self.translate_virtual(watch) as *const [u8; 4];
let v = u32::from_be_bytes(unsafe { *p });
return Some((watch, v));
}
}
None
}
}
impl MemoryAccess for GuestMemory {
@@ -435,9 +540,11 @@ impl MemoryAccess for GuestMemory {
return;
}
if !self.is_mapped(addr) { return; }
let old_lane = self.capture_mem_watch_old(addr, 1);
let ptr = self.translate_virtual_mut(addr);
unsafe { *ptr = val };
self.bump_page_version(addr);
self.check_mem_watch(addr, 1, old_lane);
}
fn write_u16(&self, addr: u32, val: u16) {
@@ -445,6 +552,7 @@ impl MemoryAccess for GuestMemory {
(mmio.write_callback)(addr, val as u32);
} else if !self.is_mapped(addr) {
} else {
let old_lane = self.capture_mem_watch_old(addr, 2);
let ptr = self.translate_virtual_mut(addr);
unsafe {
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 2);
@@ -456,6 +564,7 @@ impl MemoryAccess for GuestMemory {
if (addr & 0xFFF) >= (PAGE_SIZE - 1) {
self.bump_page_version(addr.wrapping_add(1));
}
self.check_mem_watch(addr, 2, old_lane);
}
}
@@ -464,6 +573,7 @@ impl MemoryAccess for GuestMemory {
(mmio.write_callback)(addr, val);
} else if !self.is_mapped(addr) {
} else {
let old_lane = self.capture_mem_watch_old(addr, 4);
let ptr = self.translate_virtual_mut(addr);
unsafe {
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 4);
@@ -472,6 +582,7 @@ impl MemoryAccess for GuestMemory {
if (addr & 0xFFF) >= (PAGE_SIZE - 3) {
self.bump_page_version(addr.wrapping_add(3));
}
self.check_mem_watch(addr, 4, old_lane);
}
}
@@ -481,6 +592,7 @@ impl MemoryAccess for GuestMemory {
(mmio.write_callback)(addr.wrapping_add(4), val as u32);
} else if !self.is_mapped(addr) {
} else {
let old_lane = self.capture_mem_watch_old(addr, 8);
let ptr = self.translate_virtual_mut(addr);
unsafe {
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 8);
@@ -489,6 +601,7 @@ impl MemoryAccess for GuestMemory {
if (addr & 0xFFF) >= (PAGE_SIZE - 7) {
self.bump_page_version(addr.wrapping_add(7));
}
self.check_mem_watch(addr, 8, old_lane);
}
}
@@ -782,6 +895,51 @@ mod tests {
assert_eq!(mem.read_u8(0x3000), 0x11);
assert_eq!(mem.read_u8(0x3007), 0x88);
}
#[test]
fn mem_watch_fires_on_overlapping_store() {
let mut mem = empty_mem();
mem.alloc(0x4000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
mem.arm_mem_watch(vec![0x4010]);
super::set_writer_ctx(7, 0x8200_0000, 0x8200_0004);
// u32 store directly on the watched address fires.
mem.write_u32(0x4010, 0xDEAD_BEEF);
assert_eq!(mem.mem_watch_count(), 1);
// u8 store on the watched byte itself fires.
mem.write_u8(0x4010, 0x11);
assert_eq!(mem.mem_watch_count(), 2);
// u8 store at +2 is outside the byte-exact watch — no fire.
mem.write_u8(0x4012, 0x22);
assert_eq!(mem.mem_watch_count(), 2);
// u16 store strictly outside the watched byte does NOT fire.
mem.write_u16(0x4014, 0xCAFE);
assert_eq!(mem.mem_watch_count(), 2);
// bulk write spanning the watch fires once.
mem.write_bulk(0x4000, &[0u8; 0x20]);
assert_eq!(mem.mem_watch_count(), 3);
}
#[test]
fn mem_watch_empty_set_zero_overhead_path() {
// With no addresses armed, write_u32 must NOT bump the count
// and must produce identical post-store memory state.
let mut mem = empty_mem();
mem.alloc(0x5000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
mem.write_u32(0x5000, 0x1234_5678);
assert_eq!(mem.read_u32(0x5000), 0x1234_5678);
assert_eq!(mem.mem_watch_count(), 0);
}
#[test]
fn mem_watch_arm_dedups_and_sorts() {
let mut mem = empty_mem();
mem.alloc(0x6000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
mem.arm_mem_watch(vec![0x6008, 0x6004, 0x6008, 0x6004]);
// A single store hitting either address fires once per watch addr.
mem.write_u64(0x6004, 0x1111_2222_3333_4444);
// 0x6004 and 0x6008 are both inside [0x6004, 0x600C); two fires.
assert_eq!(mem.mem_watch_count(), 2);
}
}
impl Drop for GuestMemory {

View File

@@ -8,7 +8,7 @@ mod platform;
use thiserror::Error;
pub use access::MemoryAccess;
pub use heap::{GuestMemory, HeapType};
pub use heap::{set_writer_ctx, GuestMemory, HeapType};
pub use mmio::MmioRegion;
pub use page_table::PageEntry;