diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 94f0999..08d840a 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -213,6 +213,14 @@ enum Commands { /// `XENIA_BRANCH_PROBE`. #[arg(long)] branch_probe: Option, + /// Diagnostic. Comma-separated guest byte addresses; on every + /// guest store that overlaps any listed byte, emit one + /// `MEM-WATCH` line at tracing target `mem_watch` with the + /// (tid, pc, lr) of the writer plus old/new u32 lanes. + /// Read-only; lockstep digest unaffected. Settable via + /// `XENIA_MEM_WATCH`. Example: `--mem-watch=0x828F40B4`. + #[arg(long)] + mem_watch: Option, }, /// Browse XISO disc image contents Browse { @@ -371,6 +379,7 @@ fn main() -> Result<()> { ctor_probe, dump_addr, branch_probe, + mem_watch, } => cmd_exec( &path, max_instructions, @@ -392,6 +401,7 @@ fn main() -> Result<()> { ctor_probe.as_deref(), dump_addr.as_deref(), branch_probe.as_deref(), + mem_watch.as_deref(), ), Commands::Browse { path } => cmd_browse(&path), Commands::Info { path } => cmd_info(&path), @@ -596,6 +606,7 @@ fn cmd_exec( ctor_probe: Option<&str>, dump_addr: Option<&str>, branch_probe: Option<&str>, + mem_watch: Option<&str>, ) -> Result<()> { cmd_exec_inner( path, @@ -618,6 +629,7 @@ fn cmd_exec( ctor_probe, dump_addr, branch_probe, + mem_watch, None, None, false, @@ -659,6 +671,7 @@ fn cmd_check( None, // ctor_probe — diagnostic, never wanted on goldens None, // dump_addr — same None, // branch_probe — diagnostic, never wanted on goldens + None, // mem_watch — same out, expect, stable_digest, @@ -686,6 +699,7 @@ fn cmd_exec_inner( ctor_probe: Option<&str>, dump_addr: Option<&str>, branch_probe: Option<&str>, + mem_watch: Option<&str>, digest_out: Option<&str>, digest_expect: Option<&str>, stable_digest: bool, @@ -1058,6 +1072,41 @@ fn cmd_exec_inner( } } + let mem_watch_combined: Option = match (mem_watch, std::env::var("XENIA_MEM_WATCH").ok()) { + (Some(s), _) => Some(s.to_string()), + (None, Some(s)) if !s.is_empty() => Some(s), + _ => None, + }; + let mut mem_watch_addrs: Vec = Vec::new(); + if let Some(list) = mem_watch_combined { + for token in list.split(',').map(str::trim).filter(|s| !s.is_empty()) { + let parsed = if let Some(hex) = token.strip_prefix("0x").or_else(|| token.strip_prefix("0X")) { + u32::from_str_radix(hex, 16) + } else { + token.parse::() + }; + match parsed { + Ok(addr) => mem_watch_addrs.push(addr), + Err(_) => { + return Err(anyhow::anyhow!( + "invalid address in --mem-watch: {token:?}" + )); + } + } + } + if !quiet && !mem_watch_addrs.is_empty() { + let strs: Vec = mem_watch_addrs + .iter() + .map(|a| format!("{a:#010x}")) + .collect(); + tracing::info!( + "mem-watch armed: {} ({})", + mem_watch_addrs.len(), + strs.join(", ") + ); + } + } + // Install the GPU register aperture MMIO region on the guest memory so // any `0x7FC8xxxx` access routes to our atomic mailbox. Matches canary's // `graphics_system.cc:141-144`. The callbacks capture Arc clones of the @@ -1272,6 +1321,10 @@ fn cmd_exec_inner( // M1.4 — wrap `mem` in an `Arc` after all init mutations // are complete. The worker thread (if spawned below) holds its own + if !mem_watch_addrs.is_empty() { + mem.arm_mem_watch(mem_watch_addrs); + } + // Arc clone for the duration of the run; the CPU side passes // `&*mem_arc` (= `&GuestMemory`) into `run_execution`. The // trait-level invariant carrying this is correctness: writes are @@ -1999,6 +2052,12 @@ fn worker_prologue( kernel.fire_ctor_probe_if_match(hw_id, mem); kernel.fire_branch_probe_if_match(hw_id); + if mem.has_mem_watch() { + let ctx = kernel.scheduler.ctx(hw_id); + let tid = kernel.scheduler.tid(hw_id).unwrap_or(0); + xenia_memory::set_writer_ctx(tid, ctx.pc, ctx.lr as u32); + } + // 1) Halt-sentinel check (per HW thread). if pc == LR_HALT { let injected_here = kernel.interrupts.saved.is_some() diff --git a/crates/xenia-memory/src/heap.rs b/crates/xenia-memory/src/heap.rs index c951a8d..6e5e1a3 100644 --- a/crates/xenia-memory/src/heap.rs +++ b/crates/xenia-memory/src/heap.rs @@ -1,3 +1,4 @@ +use std::cell::Cell; use std::sync::atomic::{AtomicU64, Ordering}; use crate::access::MemoryAccess; @@ -5,6 +6,23 @@ use crate::mmio::MmioRegion; use crate::page_table::{AllocationState, MemoryProtect, PageEntry}; use crate::MemoryError; +thread_local! { + static WRITER_CTX: Cell<(u32, u32, u32)> = const { Cell::new((0, 0, 0)) }; +} + +/// Stamp the (tid, pc, lr) of the executing instruction on the current +/// host thread. Read by [`GuestMemory::check_mem_watch`] when a watched +/// store fires, so the emitted trace line names the writer. Cheap — +/// thread-local `Cell::set`, no syscalls. Default `(0,0,0)` is harmless +/// when no watch is armed. +pub fn set_writer_ctx(tid: u32, pc: u32, lr: u32) { + WRITER_CTX.with(|c| c.set((tid, pc, lr))); +} + +fn writer_ctx() -> (u32, u32, u32) { + WRITER_CTX.with(|c| c.get()) +} + const PAGE_SIZE: u32 = 4096; /// Total guest address space: 4GB. const GUEST_ADDRESS_SPACE: usize = 0x1_0000_0000; @@ -64,6 +82,13 @@ pub struct GuestMemory { /// Monotonic global write counter — makes per-page versions /// cross-comparable even when their indices alias. writes_total: AtomicU64, + /// Sorted list of guest byte addresses to log on every store that + /// touches them. Populated once via [`Self::arm_mem_watch`] before + /// the run starts; stable for the run. Empty by default → the hot + /// path's `is_empty()` check is a single cache-resident load. + mem_watch_addrs: Vec, + /// Count of fires observed (for tests / hand-off telemetry). + mem_watch_count: AtomicU64, } /// Greatest common bit-mask such that `(a & m) == (b & m)` for every bit @@ -106,6 +131,8 @@ impl GuestMemory { owned: true, page_versions: (0..PAGE_COUNT).map(|_| AtomicU64::new(0)).collect(), writes_total: AtomicU64::new(0), + mem_watch_addrs: Vec::new(), + mem_watch_count: AtomicU64::new(0), }) } @@ -329,6 +356,8 @@ impl GuestMemory { /// remains the caller's responsibility (the table isn't reachable /// from `GuestMemory` without a wider plumbing change). pub fn write_bulk(&self, addr: u32, buf: &[u8]) { + let len = buf.len() as u32; + let old_lane = self.capture_mem_watch_old(addr, len); let ptr = self.translate_virtual_mut(addr); unsafe { std::ptr::copy_nonoverlapping(buf.as_ptr(), ptr, buf.len()); @@ -336,7 +365,7 @@ impl GuestMemory { if buf.is_empty() { return; } - let last_byte = addr.saturating_add(buf.len() as u32).saturating_sub(1); + let last_byte = addr.saturating_add(len).saturating_sub(1); let first_page = addr / PAGE_SIZE; let last_page = last_byte / PAGE_SIZE; for page in first_page..=last_page { @@ -345,6 +374,7 @@ impl GuestMemory { // the page works. self.bump_page_version(page * PAGE_SIZE); } + self.check_mem_watch(addr, len, old_lane); } /// Check if a guest address has been allocated/committed. Acquire load @@ -370,6 +400,81 @@ impl GuestMemory { .get(page) .map(|a| PageEntry::from_raw(a.load(std::sync::atomic::Ordering::Acquire))) } + + /// Arm the memory watch set. Each address is checked for byte-exact + /// overlap with every store; on a hit, one `tracing::info!` line is + /// emitted at target `mem_watch` with the (tid, pc, lr) of the + /// writer (set via [`set_writer_ctx`] from the interpreter prologue), + /// the previous value, and the new value. Read-only diagnostic; the + /// store itself is unaffected. + pub fn arm_mem_watch(&mut self, mut addrs: Vec) { + addrs.sort(); + addrs.dedup(); + self.mem_watch_addrs = addrs; + } + + /// Number of mem-watch fires observed since arming. + pub fn mem_watch_count(&self) -> u64 { + self.mem_watch_count.load(Ordering::Relaxed) + } + + /// True iff at least one watch address is armed. + #[inline] + pub fn has_mem_watch(&self) -> bool { + !self.mem_watch_addrs.is_empty() + } + + /// Hot-path check (post-store): if any watched byte address falls + /// inside `[addr, addr+len)`, emit a one-line record naming the + /// (tid, pc, lr) of the writer (per [`set_writer_ctx`]), the + /// post-store u32 lane at the watched address, and the store + /// width. `old_lane` is the u32 lane the caller captured BEFORE + /// the store fired. + #[inline] + fn check_mem_watch(&self, addr: u32, len: u32, old_lane_at_watch: Option<(u32, u32)>) { + if self.mem_watch_addrs.is_empty() { + return; + } + let store_end = addr.saturating_add(len); + for &watch in &self.mem_watch_addrs { + if watch >= addr && watch < store_end { + let new_val = { + let p = self.translate_virtual(watch) as *const [u8; 4]; + u32::from_be_bytes(unsafe { *p }) + }; + let old_val = old_lane_at_watch + .and_then(|(w, v)| (w == watch).then_some(v)) + .unwrap_or(0); + let (tid, pc, lr) = writer_ctx(); + self.mem_watch_count.fetch_add(1, Ordering::Relaxed); + tracing::info!( + target: "mem_watch", + "MEM-WATCH addr={:#010x} old={:#010x} new={:#010x} store_addr={:#010x} store_len={} tid={} pc={:#010x} lr={:#010x}", + watch, old_val, new_val, addr, len, tid, pc, lr, + ); + } + } + } + + /// Returns `Some((watch, u32_lane))` if the store at `[addr, addr+len)` + /// overlaps the first watched address; otherwise `None`. Used by + /// the write hooks to capture OLD before the store and pass to + /// [`Self::check_mem_watch`] post-store. Hot-path early-out. + #[inline] + fn capture_mem_watch_old(&self, addr: u32, len: u32) -> Option<(u32, u32)> { + if self.mem_watch_addrs.is_empty() { + return None; + } + let store_end = addr.saturating_add(len); + for &watch in &self.mem_watch_addrs { + if watch >= addr && watch < store_end { + let p = self.translate_virtual(watch) as *const [u8; 4]; + let v = u32::from_be_bytes(unsafe { *p }); + return Some((watch, v)); + } + } + None + } } impl MemoryAccess for GuestMemory { @@ -435,9 +540,11 @@ impl MemoryAccess for GuestMemory { return; } if !self.is_mapped(addr) { return; } + let old_lane = self.capture_mem_watch_old(addr, 1); let ptr = self.translate_virtual_mut(addr); unsafe { *ptr = val }; self.bump_page_version(addr); + self.check_mem_watch(addr, 1, old_lane); } fn write_u16(&self, addr: u32, val: u16) { @@ -445,6 +552,7 @@ impl MemoryAccess for GuestMemory { (mmio.write_callback)(addr, val as u32); } else if !self.is_mapped(addr) { } else { + let old_lane = self.capture_mem_watch_old(addr, 2); let ptr = self.translate_virtual_mut(addr); unsafe { std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 2); @@ -456,6 +564,7 @@ impl MemoryAccess for GuestMemory { if (addr & 0xFFF) >= (PAGE_SIZE - 1) { self.bump_page_version(addr.wrapping_add(1)); } + self.check_mem_watch(addr, 2, old_lane); } } @@ -464,6 +573,7 @@ impl MemoryAccess for GuestMemory { (mmio.write_callback)(addr, val); } else if !self.is_mapped(addr) { } else { + let old_lane = self.capture_mem_watch_old(addr, 4); let ptr = self.translate_virtual_mut(addr); unsafe { std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 4); @@ -472,6 +582,7 @@ impl MemoryAccess for GuestMemory { if (addr & 0xFFF) >= (PAGE_SIZE - 3) { self.bump_page_version(addr.wrapping_add(3)); } + self.check_mem_watch(addr, 4, old_lane); } } @@ -481,6 +592,7 @@ impl MemoryAccess for GuestMemory { (mmio.write_callback)(addr.wrapping_add(4), val as u32); } else if !self.is_mapped(addr) { } else { + let old_lane = self.capture_mem_watch_old(addr, 8); let ptr = self.translate_virtual_mut(addr); unsafe { std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 8); @@ -489,6 +601,7 @@ impl MemoryAccess for GuestMemory { if (addr & 0xFFF) >= (PAGE_SIZE - 7) { self.bump_page_version(addr.wrapping_add(7)); } + self.check_mem_watch(addr, 8, old_lane); } } @@ -782,6 +895,51 @@ mod tests { assert_eq!(mem.read_u8(0x3000), 0x11); assert_eq!(mem.read_u8(0x3007), 0x88); } + + #[test] + fn mem_watch_fires_on_overlapping_store() { + let mut mem = empty_mem(); + mem.alloc(0x4000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap(); + mem.arm_mem_watch(vec![0x4010]); + super::set_writer_ctx(7, 0x8200_0000, 0x8200_0004); + // u32 store directly on the watched address fires. + mem.write_u32(0x4010, 0xDEAD_BEEF); + assert_eq!(mem.mem_watch_count(), 1); + // u8 store on the watched byte itself fires. + mem.write_u8(0x4010, 0x11); + assert_eq!(mem.mem_watch_count(), 2); + // u8 store at +2 is outside the byte-exact watch — no fire. + mem.write_u8(0x4012, 0x22); + assert_eq!(mem.mem_watch_count(), 2); + // u16 store strictly outside the watched byte does NOT fire. + mem.write_u16(0x4014, 0xCAFE); + assert_eq!(mem.mem_watch_count(), 2); + // bulk write spanning the watch fires once. + mem.write_bulk(0x4000, &[0u8; 0x20]); + assert_eq!(mem.mem_watch_count(), 3); + } + + #[test] + fn mem_watch_empty_set_zero_overhead_path() { + // With no addresses armed, write_u32 must NOT bump the count + // and must produce identical post-store memory state. + let mut mem = empty_mem(); + mem.alloc(0x5000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap(); + mem.write_u32(0x5000, 0x1234_5678); + assert_eq!(mem.read_u32(0x5000), 0x1234_5678); + assert_eq!(mem.mem_watch_count(), 0); + } + + #[test] + fn mem_watch_arm_dedups_and_sorts() { + let mut mem = empty_mem(); + mem.alloc(0x6000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap(); + mem.arm_mem_watch(vec![0x6008, 0x6004, 0x6008, 0x6004]); + // A single store hitting either address fires once per watch addr. + mem.write_u64(0x6004, 0x1111_2222_3333_4444); + // 0x6004 and 0x6008 are both inside [0x6004, 0x600C); two fires. + assert_eq!(mem.mem_watch_count(), 2); + } } impl Drop for GuestMemory { diff --git a/crates/xenia-memory/src/lib.rs b/crates/xenia-memory/src/lib.rs index f7d991a..3da9547 100644 --- a/crates/xenia-memory/src/lib.rs +++ b/crates/xenia-memory/src/lib.rs @@ -8,7 +8,7 @@ mod platform; use thiserror::Error; pub use access::MemoryAccess; -pub use heap::{GuestMemory, HeapType}; +pub use heap::{set_writer_ctx, GuestMemory, HeapType}; pub use mmio::MmioRegion; pub use page_table::PageEntry;