feat(memory): --mem-watch=ADDR per-store writer trace
Adds an opt-in diagnostic that emits one tracing line per guest store overlapping any armed byte address, naming the writer (tid, pc, lr) plus old/new u32 lanes. Mirrors the --pc-probe / --branch-probe shape; pc/lr are stamped from worker_prologue via a thread-local Cell, so default runs (empty watch set) take a single is_empty() check on each write. Lockstep digest preserved (instructions=100000003 across reruns, sylpheed_n50m.json golden byte-identical). Diagnostic infra only; no functional change. Used to identify producers of dispatch-state writes for the audit-017 / audit-019 hunt.
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
use std::cell::Cell;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
use crate::access::MemoryAccess;
|
||||
@@ -5,6 +6,23 @@ use crate::mmio::MmioRegion;
|
||||
use crate::page_table::{AllocationState, MemoryProtect, PageEntry};
|
||||
use crate::MemoryError;
|
||||
|
||||
thread_local! {
|
||||
static WRITER_CTX: Cell<(u32, u32, u32)> = const { Cell::new((0, 0, 0)) };
|
||||
}
|
||||
|
||||
/// Stamp the (tid, pc, lr) of the executing instruction on the current
|
||||
/// host thread. Read by [`GuestMemory::check_mem_watch`] when a watched
|
||||
/// store fires, so the emitted trace line names the writer. Cheap —
|
||||
/// thread-local `Cell::set`, no syscalls. Default `(0,0,0)` is harmless
|
||||
/// when no watch is armed.
|
||||
pub fn set_writer_ctx(tid: u32, pc: u32, lr: u32) {
|
||||
WRITER_CTX.with(|c| c.set((tid, pc, lr)));
|
||||
}
|
||||
|
||||
fn writer_ctx() -> (u32, u32, u32) {
|
||||
WRITER_CTX.with(|c| c.get())
|
||||
}
|
||||
|
||||
const PAGE_SIZE: u32 = 4096;
|
||||
/// Total guest address space: 4GB.
|
||||
const GUEST_ADDRESS_SPACE: usize = 0x1_0000_0000;
|
||||
@@ -64,6 +82,13 @@ pub struct GuestMemory {
|
||||
/// Monotonic global write counter — makes per-page versions
|
||||
/// cross-comparable even when their indices alias.
|
||||
writes_total: AtomicU64,
|
||||
/// Sorted list of guest byte addresses to log on every store that
|
||||
/// touches them. Populated once via [`Self::arm_mem_watch`] before
|
||||
/// the run starts; stable for the run. Empty by default → the hot
|
||||
/// path's `is_empty()` check is a single cache-resident load.
|
||||
mem_watch_addrs: Vec<u32>,
|
||||
/// Count of fires observed (for tests / hand-off telemetry).
|
||||
mem_watch_count: AtomicU64,
|
||||
}
|
||||
|
||||
/// Greatest common bit-mask such that `(a & m) == (b & m)` for every bit
|
||||
@@ -106,6 +131,8 @@ impl GuestMemory {
|
||||
owned: true,
|
||||
page_versions: (0..PAGE_COUNT).map(|_| AtomicU64::new(0)).collect(),
|
||||
writes_total: AtomicU64::new(0),
|
||||
mem_watch_addrs: Vec::new(),
|
||||
mem_watch_count: AtomicU64::new(0),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -329,6 +356,8 @@ impl GuestMemory {
|
||||
/// remains the caller's responsibility (the table isn't reachable
|
||||
/// from `GuestMemory` without a wider plumbing change).
|
||||
pub fn write_bulk(&self, addr: u32, buf: &[u8]) {
|
||||
let len = buf.len() as u32;
|
||||
let old_lane = self.capture_mem_watch_old(addr, len);
|
||||
let ptr = self.translate_virtual_mut(addr);
|
||||
unsafe {
|
||||
std::ptr::copy_nonoverlapping(buf.as_ptr(), ptr, buf.len());
|
||||
@@ -336,7 +365,7 @@ impl GuestMemory {
|
||||
if buf.is_empty() {
|
||||
return;
|
||||
}
|
||||
let last_byte = addr.saturating_add(buf.len() as u32).saturating_sub(1);
|
||||
let last_byte = addr.saturating_add(len).saturating_sub(1);
|
||||
let first_page = addr / PAGE_SIZE;
|
||||
let last_page = last_byte / PAGE_SIZE;
|
||||
for page in first_page..=last_page {
|
||||
@@ -345,6 +374,7 @@ impl GuestMemory {
|
||||
// the page works.
|
||||
self.bump_page_version(page * PAGE_SIZE);
|
||||
}
|
||||
self.check_mem_watch(addr, len, old_lane);
|
||||
}
|
||||
|
||||
/// Check if a guest address has been allocated/committed. Acquire load
|
||||
@@ -370,6 +400,81 @@ impl GuestMemory {
|
||||
.get(page)
|
||||
.map(|a| PageEntry::from_raw(a.load(std::sync::atomic::Ordering::Acquire)))
|
||||
}
|
||||
|
||||
/// Arm the memory watch set. Each address is checked for byte-exact
|
||||
/// overlap with every store; on a hit, one `tracing::info!` line is
|
||||
/// emitted at target `mem_watch` with the (tid, pc, lr) of the
|
||||
/// writer (set via [`set_writer_ctx`] from the interpreter prologue),
|
||||
/// the previous value, and the new value. Read-only diagnostic; the
|
||||
/// store itself is unaffected.
|
||||
pub fn arm_mem_watch(&mut self, mut addrs: Vec<u32>) {
|
||||
addrs.sort();
|
||||
addrs.dedup();
|
||||
self.mem_watch_addrs = addrs;
|
||||
}
|
||||
|
||||
/// Number of mem-watch fires observed since arming.
|
||||
pub fn mem_watch_count(&self) -> u64 {
|
||||
self.mem_watch_count.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// True iff at least one watch address is armed.
|
||||
#[inline]
|
||||
pub fn has_mem_watch(&self) -> bool {
|
||||
!self.mem_watch_addrs.is_empty()
|
||||
}
|
||||
|
||||
/// Hot-path check (post-store): if any watched byte address falls
|
||||
/// inside `[addr, addr+len)`, emit a one-line record naming the
|
||||
/// (tid, pc, lr) of the writer (per [`set_writer_ctx`]), the
|
||||
/// post-store u32 lane at the watched address, and the store
|
||||
/// width. `old_lane` is the u32 lane the caller captured BEFORE
|
||||
/// the store fired.
|
||||
#[inline]
|
||||
fn check_mem_watch(&self, addr: u32, len: u32, old_lane_at_watch: Option<(u32, u32)>) {
|
||||
if self.mem_watch_addrs.is_empty() {
|
||||
return;
|
||||
}
|
||||
let store_end = addr.saturating_add(len);
|
||||
for &watch in &self.mem_watch_addrs {
|
||||
if watch >= addr && watch < store_end {
|
||||
let new_val = {
|
||||
let p = self.translate_virtual(watch) as *const [u8; 4];
|
||||
u32::from_be_bytes(unsafe { *p })
|
||||
};
|
||||
let old_val = old_lane_at_watch
|
||||
.and_then(|(w, v)| (w == watch).then_some(v))
|
||||
.unwrap_or(0);
|
||||
let (tid, pc, lr) = writer_ctx();
|
||||
self.mem_watch_count.fetch_add(1, Ordering::Relaxed);
|
||||
tracing::info!(
|
||||
target: "mem_watch",
|
||||
"MEM-WATCH addr={:#010x} old={:#010x} new={:#010x} store_addr={:#010x} store_len={} tid={} pc={:#010x} lr={:#010x}",
|
||||
watch, old_val, new_val, addr, len, tid, pc, lr,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `Some((watch, u32_lane))` if the store at `[addr, addr+len)`
|
||||
/// overlaps the first watched address; otherwise `None`. Used by
|
||||
/// the write hooks to capture OLD before the store and pass to
|
||||
/// [`Self::check_mem_watch`] post-store. Hot-path early-out.
|
||||
#[inline]
|
||||
fn capture_mem_watch_old(&self, addr: u32, len: u32) -> Option<(u32, u32)> {
|
||||
if self.mem_watch_addrs.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let store_end = addr.saturating_add(len);
|
||||
for &watch in &self.mem_watch_addrs {
|
||||
if watch >= addr && watch < store_end {
|
||||
let p = self.translate_virtual(watch) as *const [u8; 4];
|
||||
let v = u32::from_be_bytes(unsafe { *p });
|
||||
return Some((watch, v));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl MemoryAccess for GuestMemory {
|
||||
@@ -435,9 +540,11 @@ impl MemoryAccess for GuestMemory {
|
||||
return;
|
||||
}
|
||||
if !self.is_mapped(addr) { return; }
|
||||
let old_lane = self.capture_mem_watch_old(addr, 1);
|
||||
let ptr = self.translate_virtual_mut(addr);
|
||||
unsafe { *ptr = val };
|
||||
self.bump_page_version(addr);
|
||||
self.check_mem_watch(addr, 1, old_lane);
|
||||
}
|
||||
|
||||
fn write_u16(&self, addr: u32, val: u16) {
|
||||
@@ -445,6 +552,7 @@ impl MemoryAccess for GuestMemory {
|
||||
(mmio.write_callback)(addr, val as u32);
|
||||
} else if !self.is_mapped(addr) {
|
||||
} else {
|
||||
let old_lane = self.capture_mem_watch_old(addr, 2);
|
||||
let ptr = self.translate_virtual_mut(addr);
|
||||
unsafe {
|
||||
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 2);
|
||||
@@ -456,6 +564,7 @@ impl MemoryAccess for GuestMemory {
|
||||
if (addr & 0xFFF) >= (PAGE_SIZE - 1) {
|
||||
self.bump_page_version(addr.wrapping_add(1));
|
||||
}
|
||||
self.check_mem_watch(addr, 2, old_lane);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -464,6 +573,7 @@ impl MemoryAccess for GuestMemory {
|
||||
(mmio.write_callback)(addr, val);
|
||||
} else if !self.is_mapped(addr) {
|
||||
} else {
|
||||
let old_lane = self.capture_mem_watch_old(addr, 4);
|
||||
let ptr = self.translate_virtual_mut(addr);
|
||||
unsafe {
|
||||
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 4);
|
||||
@@ -472,6 +582,7 @@ impl MemoryAccess for GuestMemory {
|
||||
if (addr & 0xFFF) >= (PAGE_SIZE - 3) {
|
||||
self.bump_page_version(addr.wrapping_add(3));
|
||||
}
|
||||
self.check_mem_watch(addr, 4, old_lane);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -481,6 +592,7 @@ impl MemoryAccess for GuestMemory {
|
||||
(mmio.write_callback)(addr.wrapping_add(4), val as u32);
|
||||
} else if !self.is_mapped(addr) {
|
||||
} else {
|
||||
let old_lane = self.capture_mem_watch_old(addr, 8);
|
||||
let ptr = self.translate_virtual_mut(addr);
|
||||
unsafe {
|
||||
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 8);
|
||||
@@ -489,6 +601,7 @@ impl MemoryAccess for GuestMemory {
|
||||
if (addr & 0xFFF) >= (PAGE_SIZE - 7) {
|
||||
self.bump_page_version(addr.wrapping_add(7));
|
||||
}
|
||||
self.check_mem_watch(addr, 8, old_lane);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -782,6 +895,51 @@ mod tests {
|
||||
assert_eq!(mem.read_u8(0x3000), 0x11);
|
||||
assert_eq!(mem.read_u8(0x3007), 0x88);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mem_watch_fires_on_overlapping_store() {
|
||||
let mut mem = empty_mem();
|
||||
mem.alloc(0x4000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
|
||||
mem.arm_mem_watch(vec![0x4010]);
|
||||
super::set_writer_ctx(7, 0x8200_0000, 0x8200_0004);
|
||||
// u32 store directly on the watched address fires.
|
||||
mem.write_u32(0x4010, 0xDEAD_BEEF);
|
||||
assert_eq!(mem.mem_watch_count(), 1);
|
||||
// u8 store on the watched byte itself fires.
|
||||
mem.write_u8(0x4010, 0x11);
|
||||
assert_eq!(mem.mem_watch_count(), 2);
|
||||
// u8 store at +2 is outside the byte-exact watch — no fire.
|
||||
mem.write_u8(0x4012, 0x22);
|
||||
assert_eq!(mem.mem_watch_count(), 2);
|
||||
// u16 store strictly outside the watched byte does NOT fire.
|
||||
mem.write_u16(0x4014, 0xCAFE);
|
||||
assert_eq!(mem.mem_watch_count(), 2);
|
||||
// bulk write spanning the watch fires once.
|
||||
mem.write_bulk(0x4000, &[0u8; 0x20]);
|
||||
assert_eq!(mem.mem_watch_count(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mem_watch_empty_set_zero_overhead_path() {
|
||||
// With no addresses armed, write_u32 must NOT bump the count
|
||||
// and must produce identical post-store memory state.
|
||||
let mut mem = empty_mem();
|
||||
mem.alloc(0x5000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
|
||||
mem.write_u32(0x5000, 0x1234_5678);
|
||||
assert_eq!(mem.read_u32(0x5000), 0x1234_5678);
|
||||
assert_eq!(mem.mem_watch_count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mem_watch_arm_dedups_and_sorts() {
|
||||
let mut mem = empty_mem();
|
||||
mem.alloc(0x6000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
|
||||
mem.arm_mem_watch(vec![0x6008, 0x6004, 0x6008, 0x6004]);
|
||||
// A single store hitting either address fires once per watch addr.
|
||||
mem.write_u64(0x6004, 0x1111_2222_3333_4444);
|
||||
// 0x6004 and 0x6008 are both inside [0x6004, 0x600C); two fires.
|
||||
assert_eq!(mem.mem_watch_count(), 2);
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for GuestMemory {
|
||||
|
||||
@@ -8,7 +8,7 @@ mod platform;
|
||||
use thiserror::Error;
|
||||
|
||||
pub use access::MemoryAccess;
|
||||
pub use heap::{GuestMemory, HeapType};
|
||||
pub use heap::{set_writer_ctx, GuestMemory, HeapType};
|
||||
pub use mmio::MmioRegion;
|
||||
pub use page_table::PageEntry;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user