feat(memory): --mem-watch=ADDR per-store writer trace

Adds an opt-in diagnostic that emits one tracing line per guest store
overlapping any armed byte address, naming the writer (tid, pc, lr)
plus old/new u32 lanes. Mirrors the --pc-probe / --branch-probe shape;
pc/lr are stamped from worker_prologue via a thread-local Cell, so
default runs (empty watch set) take a single is_empty() check on each
write. Lockstep digest preserved (instructions=100000003 across reruns,
sylpheed_n50m.json golden byte-identical).

Diagnostic infra only; no functional change. Used to identify producers
of dispatch-state writes for the audit-017 / audit-019 hunt.
This commit is contained in:
MechaCat02
2026-05-06 21:00:20 +02:00
parent cc54ca8e64
commit 978a6950d1
3 changed files with 219 additions and 2 deletions

View File

@@ -1,3 +1,4 @@
use std::cell::Cell;
use std::sync::atomic::{AtomicU64, Ordering};
use crate::access::MemoryAccess;
@@ -5,6 +6,23 @@ use crate::mmio::MmioRegion;
use crate::page_table::{AllocationState, MemoryProtect, PageEntry};
use crate::MemoryError;
thread_local! {
static WRITER_CTX: Cell<(u32, u32, u32)> = const { Cell::new((0, 0, 0)) };
}
/// Stamp the (tid, pc, lr) of the executing instruction on the current
/// host thread. Read by [`GuestMemory::check_mem_watch`] when a watched
/// store fires, so the emitted trace line names the writer. Cheap —
/// thread-local `Cell::set`, no syscalls. Default `(0,0,0)` is harmless
/// when no watch is armed.
pub fn set_writer_ctx(tid: u32, pc: u32, lr: u32) {
WRITER_CTX.with(|c| c.set((tid, pc, lr)));
}
fn writer_ctx() -> (u32, u32, u32) {
WRITER_CTX.with(|c| c.get())
}
const PAGE_SIZE: u32 = 4096;
/// Total guest address space: 4GB.
const GUEST_ADDRESS_SPACE: usize = 0x1_0000_0000;
@@ -64,6 +82,13 @@ pub struct GuestMemory {
/// Monotonic global write counter — makes per-page versions
/// cross-comparable even when their indices alias.
writes_total: AtomicU64,
/// Sorted list of guest byte addresses to log on every store that
/// touches them. Populated once via [`Self::arm_mem_watch`] before
/// the run starts; stable for the run. Empty by default → the hot
/// path's `is_empty()` check is a single cache-resident load.
mem_watch_addrs: Vec<u32>,
/// Count of fires observed (for tests / hand-off telemetry).
mem_watch_count: AtomicU64,
}
/// Greatest common bit-mask such that `(a & m) == (b & m)` for every bit
@@ -106,6 +131,8 @@ impl GuestMemory {
owned: true,
page_versions: (0..PAGE_COUNT).map(|_| AtomicU64::new(0)).collect(),
writes_total: AtomicU64::new(0),
mem_watch_addrs: Vec::new(),
mem_watch_count: AtomicU64::new(0),
})
}
@@ -329,6 +356,8 @@ impl GuestMemory {
/// remains the caller's responsibility (the table isn't reachable
/// from `GuestMemory` without a wider plumbing change).
pub fn write_bulk(&self, addr: u32, buf: &[u8]) {
let len = buf.len() as u32;
let old_lane = self.capture_mem_watch_old(addr, len);
let ptr = self.translate_virtual_mut(addr);
unsafe {
std::ptr::copy_nonoverlapping(buf.as_ptr(), ptr, buf.len());
@@ -336,7 +365,7 @@ impl GuestMemory {
if buf.is_empty() {
return;
}
let last_byte = addr.saturating_add(buf.len() as u32).saturating_sub(1);
let last_byte = addr.saturating_add(len).saturating_sub(1);
let first_page = addr / PAGE_SIZE;
let last_page = last_byte / PAGE_SIZE;
for page in first_page..=last_page {
@@ -345,6 +374,7 @@ impl GuestMemory {
// the page works.
self.bump_page_version(page * PAGE_SIZE);
}
self.check_mem_watch(addr, len, old_lane);
}
/// Check if a guest address has been allocated/committed. Acquire load
@@ -370,6 +400,81 @@ impl GuestMemory {
.get(page)
.map(|a| PageEntry::from_raw(a.load(std::sync::atomic::Ordering::Acquire)))
}
/// Arm the memory watch set. Each address is checked for byte-exact
/// overlap with every store; on a hit, one `tracing::info!` line is
/// emitted at target `mem_watch` with the (tid, pc, lr) of the
/// writer (set via [`set_writer_ctx`] from the interpreter prologue),
/// the previous value, and the new value. Read-only diagnostic; the
/// store itself is unaffected.
pub fn arm_mem_watch(&mut self, mut addrs: Vec<u32>) {
addrs.sort();
addrs.dedup();
self.mem_watch_addrs = addrs;
}
/// Number of mem-watch fires observed since arming.
pub fn mem_watch_count(&self) -> u64 {
self.mem_watch_count.load(Ordering::Relaxed)
}
/// True iff at least one watch address is armed.
#[inline]
pub fn has_mem_watch(&self) -> bool {
!self.mem_watch_addrs.is_empty()
}
/// Hot-path check (post-store): if any watched byte address falls
/// inside `[addr, addr+len)`, emit a one-line record naming the
/// (tid, pc, lr) of the writer (per [`set_writer_ctx`]), the
/// post-store u32 lane at the watched address, and the store
/// width. `old_lane` is the u32 lane the caller captured BEFORE
/// the store fired.
#[inline]
fn check_mem_watch(&self, addr: u32, len: u32, old_lane_at_watch: Option<(u32, u32)>) {
if self.mem_watch_addrs.is_empty() {
return;
}
let store_end = addr.saturating_add(len);
for &watch in &self.mem_watch_addrs {
if watch >= addr && watch < store_end {
let new_val = {
let p = self.translate_virtual(watch) as *const [u8; 4];
u32::from_be_bytes(unsafe { *p })
};
let old_val = old_lane_at_watch
.and_then(|(w, v)| (w == watch).then_some(v))
.unwrap_or(0);
let (tid, pc, lr) = writer_ctx();
self.mem_watch_count.fetch_add(1, Ordering::Relaxed);
tracing::info!(
target: "mem_watch",
"MEM-WATCH addr={:#010x} old={:#010x} new={:#010x} store_addr={:#010x} store_len={} tid={} pc={:#010x} lr={:#010x}",
watch, old_val, new_val, addr, len, tid, pc, lr,
);
}
}
}
/// Returns `Some((watch, u32_lane))` if the store at `[addr, addr+len)`
/// overlaps the first watched address; otherwise `None`. Used by
/// the write hooks to capture OLD before the store and pass to
/// [`Self::check_mem_watch`] post-store. Hot-path early-out.
#[inline]
fn capture_mem_watch_old(&self, addr: u32, len: u32) -> Option<(u32, u32)> {
if self.mem_watch_addrs.is_empty() {
return None;
}
let store_end = addr.saturating_add(len);
for &watch in &self.mem_watch_addrs {
if watch >= addr && watch < store_end {
let p = self.translate_virtual(watch) as *const [u8; 4];
let v = u32::from_be_bytes(unsafe { *p });
return Some((watch, v));
}
}
None
}
}
impl MemoryAccess for GuestMemory {
@@ -435,9 +540,11 @@ impl MemoryAccess for GuestMemory {
return;
}
if !self.is_mapped(addr) { return; }
let old_lane = self.capture_mem_watch_old(addr, 1);
let ptr = self.translate_virtual_mut(addr);
unsafe { *ptr = val };
self.bump_page_version(addr);
self.check_mem_watch(addr, 1, old_lane);
}
fn write_u16(&self, addr: u32, val: u16) {
@@ -445,6 +552,7 @@ impl MemoryAccess for GuestMemory {
(mmio.write_callback)(addr, val as u32);
} else if !self.is_mapped(addr) {
} else {
let old_lane = self.capture_mem_watch_old(addr, 2);
let ptr = self.translate_virtual_mut(addr);
unsafe {
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 2);
@@ -456,6 +564,7 @@ impl MemoryAccess for GuestMemory {
if (addr & 0xFFF) >= (PAGE_SIZE - 1) {
self.bump_page_version(addr.wrapping_add(1));
}
self.check_mem_watch(addr, 2, old_lane);
}
}
@@ -464,6 +573,7 @@ impl MemoryAccess for GuestMemory {
(mmio.write_callback)(addr, val);
} else if !self.is_mapped(addr) {
} else {
let old_lane = self.capture_mem_watch_old(addr, 4);
let ptr = self.translate_virtual_mut(addr);
unsafe {
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 4);
@@ -472,6 +582,7 @@ impl MemoryAccess for GuestMemory {
if (addr & 0xFFF) >= (PAGE_SIZE - 3) {
self.bump_page_version(addr.wrapping_add(3));
}
self.check_mem_watch(addr, 4, old_lane);
}
}
@@ -481,6 +592,7 @@ impl MemoryAccess for GuestMemory {
(mmio.write_callback)(addr.wrapping_add(4), val as u32);
} else if !self.is_mapped(addr) {
} else {
let old_lane = self.capture_mem_watch_old(addr, 8);
let ptr = self.translate_virtual_mut(addr);
unsafe {
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 8);
@@ -489,6 +601,7 @@ impl MemoryAccess for GuestMemory {
if (addr & 0xFFF) >= (PAGE_SIZE - 7) {
self.bump_page_version(addr.wrapping_add(7));
}
self.check_mem_watch(addr, 8, old_lane);
}
}
@@ -782,6 +895,51 @@ mod tests {
assert_eq!(mem.read_u8(0x3000), 0x11);
assert_eq!(mem.read_u8(0x3007), 0x88);
}
#[test]
fn mem_watch_fires_on_overlapping_store() {
let mut mem = empty_mem();
mem.alloc(0x4000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
mem.arm_mem_watch(vec![0x4010]);
super::set_writer_ctx(7, 0x8200_0000, 0x8200_0004);
// u32 store directly on the watched address fires.
mem.write_u32(0x4010, 0xDEAD_BEEF);
assert_eq!(mem.mem_watch_count(), 1);
// u8 store on the watched byte itself fires.
mem.write_u8(0x4010, 0x11);
assert_eq!(mem.mem_watch_count(), 2);
// u8 store at +2 is outside the byte-exact watch — no fire.
mem.write_u8(0x4012, 0x22);
assert_eq!(mem.mem_watch_count(), 2);
// u16 store strictly outside the watched byte does NOT fire.
mem.write_u16(0x4014, 0xCAFE);
assert_eq!(mem.mem_watch_count(), 2);
// bulk write spanning the watch fires once.
mem.write_bulk(0x4000, &[0u8; 0x20]);
assert_eq!(mem.mem_watch_count(), 3);
}
#[test]
fn mem_watch_empty_set_zero_overhead_path() {
// With no addresses armed, write_u32 must NOT bump the count
// and must produce identical post-store memory state.
let mut mem = empty_mem();
mem.alloc(0x5000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
mem.write_u32(0x5000, 0x1234_5678);
assert_eq!(mem.read_u32(0x5000), 0x1234_5678);
assert_eq!(mem.mem_watch_count(), 0);
}
#[test]
fn mem_watch_arm_dedups_and_sorts() {
let mut mem = empty_mem();
mem.alloc(0x6000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
mem.arm_mem_watch(vec![0x6008, 0x6004, 0x6008, 0x6004]);
// A single store hitting either address fires once per watch addr.
mem.write_u64(0x6004, 0x1111_2222_3333_4444);
// 0x6004 and 0x6008 are both inside [0x6004, 0x600C); two fires.
assert_eq!(mem.mem_watch_count(), 2);
}
}
impl Drop for GuestMemory {

View File

@@ -8,7 +8,7 @@ mod platform;
use thiserror::Error;
pub use access::MemoryAccess;
pub use heap::{GuestMemory, HeapType};
pub use heap::{set_writer_ctx, GuestMemory, HeapType};
pub use mmio::MmioRegion;
pub use page_table::PageEntry;