use std::cell::Cell; use std::sync::atomic::{AtomicU64, Ordering}; use crate::access::MemoryAccess; use crate::mmio::MmioRegion; use crate::page_table::{AllocationState, MemoryProtect, PageEntry}; use crate::MemoryError; thread_local! { static WRITER_CTX: Cell<(u32, u32, u32)> = const { Cell::new((0, 0, 0)) }; } /// Stamp the (tid, pc, lr) of the executing instruction on the current /// host thread. Read by [`GuestMemory::check_mem_watch`] when a watched /// store fires, so the emitted trace line names the writer. Cheap — /// thread-local `Cell::set`, no syscalls. Default `(0,0,0)` is harmless /// when no watch is armed. pub fn set_writer_ctx(tid: u32, pc: u32, lr: u32) { WRITER_CTX.with(|c| c.set((tid, pc, lr))); } fn writer_ctx() -> (u32, u32, u32) { WRITER_CTX.with(|c| c.get()) } const PAGE_SIZE: u32 = 4096; /// Total guest address space: 4GB. const GUEST_ADDRESS_SPACE: usize = 0x1_0000_0000; /// Number of 4K pages in the 4GB address space. const PAGE_COUNT: usize = GUEST_ADDRESS_SPACE / PAGE_SIZE as usize; /// Physical memory mask (512MB physical address space). const PHYSICAL_ADDR_MASK: u32 = 0x1FFF_FFFF; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum HeapType { GuestVirtual, GuestXex, GuestPhysical, } /// The core guest memory system. Manages a 4GB virtual address space /// via mmap/VirtualAlloc, with page-level tracking and MMIO dispatch. pub struct GuestMemory { /// Host pointer to the base of the 4GB guest address space. membase: *mut u8, /// Page table tracking allocation state for each 4K page. Each entry is /// an `AtomicU64` carrying the bit-packed [`PageEntry`] representation. /// Atomic so [`Self::alloc`] (and friends) can take `&self` and run /// concurrently with the load/store hot path's [`Self::is_mapped`] /// checks. Allocation crosses many pages but each per-page Release store /// is independently published; readers (`is_mapped`/`page_entry`) use /// Acquire loads. Multi-page atomicity is not provided — callers ensure /// happens-before via export ordering (alloc completes before any guest /// access of the new region). page_table: Vec, /// Registered MMIO regions (sorted by base address for binary search). mmio_regions: Vec, /// Cached *necessary* condition for an address to fall inside *any* /// registered MMIO region: an address `a` can match only if /// `(a & mmio_aperture_mask) == mmio_aperture_value`. Recomputed /// inside [`add_mmio_region`] as the union (greatest common /// prefix) of every region's `(mask, base & mask)` pair. /// /// With the GPU MMIO at `0x7FC8_0000 / 0xFFFF_0000` as the only /// registered region, this is a single bit-mask compare per scalar /// load/store — eliminating the prior O(N) `iter().find` over the /// region list on every access. With zero regions registered the /// flag stays at the "match nothing" sentinel and the hot path /// returns `None` without touching the Vec. mmio_aperture_mask: u32, mmio_aperture_value: u32, /// Whether the memory mapping is owned (should be unmapped on drop). owned: bool, /// P5 texture-cache invalidation: per-4KB-page monotonic write /// version. Every `write_u8/16/32/64` bumps /// `page_versions[addr >> 12]`, and a global `writes_total` counter /// (shared by all pages) gets stamped into each page. The texture /// cache computes `max(page_versions[..])` over the texture's byte /// footprint at bind time and re-decodes if any page has advanced /// since the cached entry. page_versions: Vec, /// Monotonic global write counter — makes per-page versions /// cross-comparable even when their indices alias. writes_total: AtomicU64, /// Sorted list of guest byte addresses to log on every store that /// touches them. Populated once via [`Self::arm_mem_watch`] before /// the run starts; stable for the run. Empty by default → the hot /// path's `is_empty()` check is a single cache-resident load. mem_watch_addrs: Vec, /// Count of fires observed (for tests / hand-off telemetry). mem_watch_count: AtomicU64, } /// Greatest common bit-mask such that `(a & m) == (b & m)` for every bit /// where `mask_a` *and* `mask_b` are set and the masked values agree. /// Used by `add_mmio_region` to fold a new region into the cached /// fast-reject pair without losing soundness — the result is always a /// *necessary* condition for membership in either region. #[inline] fn fold_aperture( cur_mask: u32, cur_value: u32, new_mask: u32, new_value: u32, ) -> (u32, u32) { // Bits that both masks cover AND on which both values agree are the // only bits we can keep. Disagreement on any covered bit collapses // that bit out of the cache. let common_mask = cur_mask & new_mask; let agreed = !(cur_value ^ new_value); let m = common_mask & agreed; (m, cur_value & m) } unsafe impl Send for GuestMemory {} unsafe impl Sync for GuestMemory {} impl GuestMemory { /// Create a new guest memory space by reserving a 4GB virtual address region. pub fn new() -> Result { let membase = crate::platform::reserve_address_space(GUEST_ADDRESS_SPACE)?; Ok(Self { membase, page_table: (0..PAGE_COUNT).map(|_| std::sync::atomic::AtomicU64::new(0)).collect(), mmio_regions: Vec::new(), // Sentinel "match nothing" — `(a & !0) == !0` is false for // any `a`, so `find_mmio` short-circuits to `None` until the // first region is registered. mmio_aperture_mask: u32::MAX, mmio_aperture_value: u32::MAX, owned: true, page_versions: (0..PAGE_COUNT).map(|_| AtomicU64::new(0)).collect(), writes_total: AtomicU64::new(0), mem_watch_addrs: Vec::new(), mem_watch_count: AtomicU64::new(0), }) } /// Current version watermark for the page containing `addr`. Bumped by /// any write through `write_u8/16/32/64`. Not affected by MMIO writes /// (those don't touch the backing texture memory). /// /// Acquire load: any thread observing a value `v` here also observes /// every memory write the bumping thread published before its /// Release-store of `v` (see [`bump_page_version`]). This is the /// synchronizes-with edge consumed by the texture cache once the GPU /// runs on its own host thread. pub fn page_version(&self, addr: u32) -> u64 { let idx = (addr / PAGE_SIZE) as usize; self.page_versions .get(idx) .map(|a| a.load(Ordering::Acquire)) .unwrap_or(0) } /// Maximum page version across the byte span `[addr, addr+len)`. /// O(pages) — fast for typical texture sizes (1 MiB = 256 pages). pub fn max_page_version(&self, addr: u32, len: u32) -> u64 { if len == 0 { return self.page_version(addr); } let first = addr / PAGE_SIZE; let last = addr.saturating_add(len.saturating_sub(1)) / PAGE_SIZE; let mut m = 0u64; for p in first..=last { if let Some(slot) = self.page_versions.get(p as usize) { let v = slot.load(Ordering::Acquire); if v > m { m = v; } } } m } /// Total number of write events observed. Useful for cross-page tie /// breaking and HUD-level "is the guest scribbling?" metrics. pub fn writes_total(&self) -> u64 { self.writes_total.load(Ordering::Relaxed) } #[inline] fn bump_page_version(&self, addr: u32) { // Relaxed is sufficient for the global tick — the only payload // that depends on a particular value is the per-page slot below, // and the publish-edge there is its own Release store. let stamp = self .writes_total .fetch_add(1, Ordering::Relaxed) .wrapping_add(1); let idx = (addr / PAGE_SIZE) as usize; if let Some(slot) = self.page_versions.get(idx) { // Release: any reader that Acquire-loads this slot and sees // `stamp` also observes the data store that preceded this // bump (the unsafe `*ptr = val` in the surrounding write_*). slot.store(stamp, Ordering::Release); } } /// Get the host base pointer for the guest address space. pub fn membase(&self) -> *const u8 { self.membase } /// Get a mutable host base pointer. pub fn membase_mut(&mut self) -> *mut u8 { self.membase } /// Translate a guest virtual address to a host pointer. pub fn translate_virtual(&self, guest_addr: u32) -> *const u8 { unsafe { self.membase.add(guest_addr as usize) } } /// Translate a guest virtual address to a mutable host pointer. /// /// Takes `&self`. The returned pointer is into the shared /// `membase` mapping; the soundness contract is the trait-level one /// in [`crate::access::MemoryAccess`] — callers must not concurrently /// read and write the same byte range from different threads. pub fn translate_virtual_mut(&self, guest_addr: u32) -> *mut u8 { unsafe { self.membase.add(guest_addr as usize) } } /// Translate a guest physical address to a host pointer. pub fn translate_physical(&self, guest_addr: u32) -> *const u8 { let phys = guest_addr & PHYSICAL_ADDR_MASK; unsafe { self.membase.add(phys as usize) } } /// Register an MMIO region. pub fn add_mmio_region(&mut self, region: MmioRegion) { let new_mask = region.mask; let new_value = region.base_address & region.mask; if self.mmio_regions.is_empty() { self.mmio_aperture_mask = new_mask; self.mmio_aperture_value = new_value; } else { let (m, v) = fold_aperture( self.mmio_aperture_mask, self.mmio_aperture_value, new_mask, new_value, ); self.mmio_aperture_mask = m; self.mmio_aperture_value = v; } let base = region.base_address; let idx = self .mmio_regions .binary_search_by_key(&base, |r| r.base_address) .unwrap_or_else(|i| i); self.mmio_regions.insert(idx, region); } /// Check if an address is in a registered MMIO region. /// /// Tier-3 perf — non-MMIO addresses (the common case for code fetch /// and main-RAM data accesses) get rejected by a single bit-mask /// compare against the cached aperture, skipping the linear search /// over `mmio_regions`. The `iter().find` fallback only runs for /// addresses that pass the necessary-but-not-sufficient prefilter, /// preserving exact MMIO semantics when multiple regions share a /// prefix or when a region's `mask` admits non-contiguous addresses. #[inline] fn find_mmio(&self, addr: u32) -> Option<&MmioRegion> { if (addr & self.mmio_aperture_mask) != self.mmio_aperture_value { return None; } self.mmio_regions.iter().find(|r| r.contains(addr)) } /// Allocate a region in the guest address space. /// /// Validates that `base` is page-aligned and that `base + size` does not /// overflow the 4GB guest address space. Takes `&self` — `page_table` /// is `Vec` so per-page state updates use atomic stores /// (`Release` ordering, paired with `Acquire` loads in /// [`Self::is_mapped`] / [`Self::page_entry`]). The kernel ensures /// happens-before across the alloc-then-use boundary at the export /// level (the guest cannot observe the new region until the export /// returns), so a single Release per page suffices and we don't need /// multi-page atomicity. pub fn alloc( &self, base: u32, size: u32, protect: MemoryProtect, ) -> Result { if !base.is_multiple_of(PAGE_SIZE) { return Err(MemoryError::AllocationFailed(format!( "alloc base {:#x} is not page-aligned", base ))); } let end = (base as u64).saturating_add(size as u64); if end > GUEST_ADDRESS_SPACE as u64 { return Err(MemoryError::AllocationFailed(format!( "alloc range {:#x}+{:#x} exceeds 4GB guest space", base, size ))); } let page_start = (base / PAGE_SIZE) as usize; let page_count = size.div_ceil(PAGE_SIZE) as usize; // Commit pages via platform. `commit_memory` takes `*mut u8` but // doesn't actually need exclusive access — the OS-level mmap call // is independently thread-safe. let host_ptr = unsafe { self.membase.add(base as usize) }; crate::platform::commit_memory(host_ptr, page_count * PAGE_SIZE as usize)?; // Build a single `PageEntry` once, then Release-store it into each // affected slot. Using a fresh `PageEntry::default()` per page // would yield the same bits but at higher cost. let mut entry = PageEntry::default(); entry.set_base_address(page_start as u32); entry.set_region_page_count(page_count as u32); entry.set_allocation_protect(protect); entry.set_current_protect(protect); entry.set_state(AllocationState::RESERVE | AllocationState::COMMIT); let raw = entry.raw(); for i in 0..page_count { let idx = page_start + i; if let Some(slot) = self.page_table.get(idx) { slot.store(raw, std::sync::atomic::Ordering::Release); } } Ok(base) } /// Read a slice of bytes from guest memory (bypassing MMIO for bulk reads). pub fn read_bulk(&self, addr: u32, buf: &mut [u8]) { let ptr = self.translate_virtual(addr); unsafe { std::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr(), buf.len()); } } /// Write a slice of bytes to guest memory (bypassing MMIO for bulk writes). /// /// Takes `&self` (matches the trait-level write contract): the actual /// store goes through a raw `*mut u8` derived from `membase`, which /// has no Rust aliasing semantics. Callers must respect the trait /// contract — no concurrent read/write of the same byte range from /// different threads. Used by the XEX loader (init, single-thread) /// and `NtReadFile` (mid-execution; the file's destination buffer is /// guest-thread-private by construction). /// /// XMODBUG-002: bumps `page_versions` for every page the write /// touches. Pre-fix, callers like `NtReadFile` could rewrite a page /// containing texture or shader bytes that a downstream cache had /// already keyed on the prior version — the cache would happily /// hand back the stale decoded bytes. The per-byte `write_*` methods /// already bump the version after their store; this is the bulk /// equivalent. Reservation-table invalidation for `lwarx`/`stwcx.` /// remains the caller's responsibility (the table isn't reachable /// from `GuestMemory` without a wider plumbing change). pub fn write_bulk(&self, addr: u32, buf: &[u8]) { let len = buf.len() as u32; let old_lane = self.capture_mem_watch_old(addr, len); let ptr = self.translate_virtual_mut(addr); unsafe { std::ptr::copy_nonoverlapping(buf.as_ptr(), ptr, buf.len()); } if buf.is_empty() { return; } let last_byte = addr.saturating_add(len).saturating_sub(1); let first_page = addr / PAGE_SIZE; let last_page = last_byte / PAGE_SIZE; for page in first_page..=last_page { // Use the page-aligned address; bump_page_version computes // the slot index by `addr / PAGE_SIZE` so any address within // the page works. self.bump_page_version(page * PAGE_SIZE); } self.check_mem_watch(addr, len, old_lane); } /// Check if a guest address has been allocated/committed. Acquire load /// pairs with the Release store in [`Self::alloc`] — any thread that /// observes `state.contains(COMMIT)` here also observes every /// allocation-side metadata write that preceded the store. pub fn is_mapped(&self, addr: u32) -> bool { let page = (addr / PAGE_SIZE) as usize; if page >= self.page_table.len() { return false; } let raw = self.page_table[page].load(std::sync::atomic::Ordering::Acquire); PageEntry::from_raw(raw) .state() .contains(AllocationState::COMMIT) } /// Get a page table entry for a given address, or None if out of range. /// Returns by value (the storage is now atomic; we publish a snapshot). pub fn page_entry(&self, addr: u32) -> Option { let page = (addr / PAGE_SIZE) as usize; self.page_table .get(page) .map(|a| PageEntry::from_raw(a.load(std::sync::atomic::Ordering::Acquire))) } /// Arm the memory watch set. Each address is checked for byte-exact /// overlap with every store; on a hit, one `tracing::info!` line is /// emitted at target `mem_watch` with the (tid, pc, lr) of the /// writer (set via [`set_writer_ctx`] from the interpreter prologue), /// the previous value, and the new value. Read-only diagnostic; the /// store itself is unaffected. pub fn arm_mem_watch(&mut self, mut addrs: Vec) { addrs.sort(); addrs.dedup(); self.mem_watch_addrs = addrs; } /// Number of mem-watch fires observed since arming. pub fn mem_watch_count(&self) -> u64 { self.mem_watch_count.load(Ordering::Relaxed) } /// True iff at least one watch address is armed. #[inline] pub fn has_mem_watch(&self) -> bool { !self.mem_watch_addrs.is_empty() } /// Hot-path check (post-store): if any watched byte address falls /// inside `[addr, addr+len)`, emit a one-line record naming the /// (tid, pc, lr) of the writer (per [`set_writer_ctx`]), the /// post-store u32 lane at the watched address, and the store /// width. `old_lane` is the u32 lane the caller captured BEFORE /// the store fired. #[inline] fn check_mem_watch(&self, addr: u32, len: u32, old_lane_at_watch: Option<(u32, u32)>) { if self.mem_watch_addrs.is_empty() { return; } let store_end = addr.saturating_add(len); for &watch in &self.mem_watch_addrs { if watch >= addr && watch < store_end { let new_val = { let p = self.translate_virtual(watch) as *const [u8; 4]; u32::from_be_bytes(unsafe { *p }) }; let old_val = old_lane_at_watch .and_then(|(w, v)| (w == watch).then_some(v)) .unwrap_or(0); let (tid, pc, lr) = writer_ctx(); self.mem_watch_count.fetch_add(1, Ordering::Relaxed); tracing::info!( target: "mem_watch", "MEM-WATCH addr={:#010x} old={:#010x} new={:#010x} store_addr={:#010x} store_len={} tid={} pc={:#010x} lr={:#010x}", watch, old_val, new_val, addr, len, tid, pc, lr, ); } } } /// Returns `Some((watch, u32_lane))` if the store at `[addr, addr+len)` /// overlaps the first watched address; otherwise `None`. Used by /// the write hooks to capture OLD before the store and pass to /// [`Self::check_mem_watch`] post-store. Hot-path early-out. #[inline] fn capture_mem_watch_old(&self, addr: u32, len: u32) -> Option<(u32, u32)> { if self.mem_watch_addrs.is_empty() { return None; } let store_end = addr.saturating_add(len); for &watch in &self.mem_watch_addrs { if watch >= addr && watch < store_end { let p = self.translate_virtual(watch) as *const [u8; 4]; let v = u32::from_be_bytes(unsafe { *p }); return Some((watch, v)); } } None } } impl MemoryAccess for GuestMemory { // Tier-3 perf: `#[inline]` on the hot read/write paths lets LLVM // fold the MMIO + mapping checks into the interpreter's load/store // handlers, hoisting the "not-MMIO, mapped" branch out of the loop // body for consecutive same-page accesses. #[inline] fn read_u8(&self, addr: u32) -> u8 { // MMIO dispatch must come first — a byte read at an MMIO-mapped // address should invoke the callback, not the backing memory. if let Some(mmio) = self.find_mmio(addr) { return (mmio.read_callback)(addr) as u8; } if !self.is_mapped(addr) { return 0; } let ptr = self.translate_virtual(addr); unsafe { *ptr } } #[inline] fn read_u16(&self, addr: u32) -> u16 { if let Some(mmio) = self.find_mmio(addr) { (mmio.read_callback)(addr) as u16 } else if !self.is_mapped(addr) { 0 } else { let ptr = self.translate_virtual(addr) as *const [u8; 2]; u16::from_be_bytes(unsafe { *ptr }) } } #[inline] fn read_u32(&self, addr: u32) -> u32 { if let Some(mmio) = self.find_mmio(addr) { (mmio.read_callback)(addr) } else if !self.is_mapped(addr) { 0 } else { let ptr = self.translate_virtual(addr) as *const [u8; 4]; u32::from_be_bytes(unsafe { *ptr }) } } #[inline] fn read_u64(&self, addr: u32) -> u64 { if let Some(mmio) = self.find_mmio(addr) { let hi = (mmio.read_callback)(addr) as u64; let lo = (mmio.read_callback)(addr.wrapping_add(4)) as u64; (hi << 32) | lo } else if !self.is_mapped(addr) { 0 } else { let ptr = self.translate_virtual(addr) as *const [u8; 8]; u64::from_be_bytes(unsafe { *ptr }) } } fn write_u8(&self, addr: u32, val: u8) { // MMIO dispatch first — a byte write at an MMIO-mapped address // must invoke the callback, not the backing memory. if let Some(mmio) = self.find_mmio(addr) { (mmio.write_callback)(addr, val as u32); return; } if !self.is_mapped(addr) { return; } let old_lane = self.capture_mem_watch_old(addr, 1); let ptr = self.translate_virtual_mut(addr); unsafe { *ptr = val }; self.bump_page_version(addr); self.check_mem_watch(addr, 1, old_lane); } fn write_u16(&self, addr: u32, val: u16) { if let Some(mmio) = self.find_mmio(addr) { (mmio.write_callback)(addr, val as u32); } else if !self.is_mapped(addr) { } else { let old_lane = self.capture_mem_watch_old(addr, 2); let ptr = self.translate_virtual_mut(addr); unsafe { std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 2); } self.bump_page_version(addr); // A 16-bit write can cross a page boundary; bump the neighbour // too so the texture cache sees the write even if it's looking // at the next page's version. if (addr & 0xFFF) >= (PAGE_SIZE - 1) { self.bump_page_version(addr.wrapping_add(1)); } self.check_mem_watch(addr, 2, old_lane); } } fn write_u32(&self, addr: u32, val: u32) { if let Some(mmio) = self.find_mmio(addr) { (mmio.write_callback)(addr, val); } else if !self.is_mapped(addr) { } else { let old_lane = self.capture_mem_watch_old(addr, 4); let ptr = self.translate_virtual_mut(addr); unsafe { std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 4); } self.bump_page_version(addr); if (addr & 0xFFF) >= (PAGE_SIZE - 3) { self.bump_page_version(addr.wrapping_add(3)); } self.check_mem_watch(addr, 4, old_lane); } } fn write_u64(&self, addr: u32, val: u64) { if let Some(mmio) = self.find_mmio(addr) { (mmio.write_callback)(addr, (val >> 32) as u32); (mmio.write_callback)(addr.wrapping_add(4), val as u32); } else if !self.is_mapped(addr) { } else { let old_lane = self.capture_mem_watch_old(addr, 8); let ptr = self.translate_virtual_mut(addr); unsafe { std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 8); } self.bump_page_version(addr); if (addr & 0xFFF) >= (PAGE_SIZE - 7) { self.bump_page_version(addr.wrapping_add(7)); } self.check_mem_watch(addr, 8, old_lane); } } fn translate(&self, addr: u32) -> Option<*const u8> { if self.find_mmio(addr).is_some() || !self.is_mapped(addr) { None } else { Some(self.translate_virtual(addr)) } } fn translate_mut(&self, addr: u32) -> Option<*mut u8> { if self.find_mmio(addr).is_some() { None } else { Some(self.translate_virtual_mut(addr)) } } /// Override the default impl to hand the xenia-cpu `DecodeCache` a /// real per-page version. Zero means "never written" which the cache /// treats as a valid version; first write bumps to 1 (via the /// global `writes_total` counter already maintained). #[inline] fn page_version(&self, addr: u32) -> u64 { GuestMemory::page_version(self, addr) } } #[cfg(test)] mod tests { use super::*; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Arc; fn empty_mem() -> GuestMemory { GuestMemory::new().expect("reserve 4GB") } #[test] fn alloc_rejects_unaligned_base() { let mut mem = empty_mem(); let err = mem.alloc(0x1001, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap_err(); assert!(matches!(err, MemoryError::AllocationFailed(_))); } #[test] fn alloc_rejects_overflow_past_4gb() { let mut mem = empty_mem(); let err = mem.alloc(0xFFFF_0000, 0x0002_0000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap_err(); assert!(matches!(err, MemoryError::AllocationFailed(_))); } #[test] fn alloc_succeeds_for_valid_region() { let mut mem = empty_mem(); let base = mem.alloc(0x1000, 0x2000, MemoryProtect::READ | MemoryProtect::WRITE).expect("alloc ok"); assert_eq!(base, 0x1000); assert!(mem.is_mapped(0x1000)); assert!(mem.is_mapped(0x2FFF)); assert!(!mem.is_mapped(0x3000)); } #[test] fn page_entry_returns_none_out_of_range() { let mem = empty_mem(); // page_entry takes u32; all u32 values fit in the 4GB page table, // so OOB-via-addr isn't reachable. Verify the Option behavior on an // unmapped but in-range page: entry exists but is free. let e = mem.page_entry(0xDEAD_BEEF).expect("in-range"); assert!(e.is_free()); } #[test] fn read_u8_dispatches_to_mmio() { let mut mem = empty_mem(); let seen_addr = Arc::new(AtomicU32::new(0)); let seen_clone = seen_addr.clone(); mem.add_mmio_region(MmioRegion { base_address: 0xEA00_0000, mask: 0xFFFF_FF00, size: 0x100, read_callback: Box::new(move |a| { seen_clone.store(a, Ordering::SeqCst); 0x42 }), write_callback: Box::new(|_, _| {}), }); let v = mem.read_u8(0xEA00_0008); assert_eq!(v, 0x42); assert_eq!(seen_addr.load(Ordering::SeqCst), 0xEA00_0008); } #[test] fn write_u8_dispatches_to_mmio() { let mut mem = empty_mem(); let captured = Arc::new(AtomicU32::new(0)); let captured_clone = captured.clone(); mem.add_mmio_region(MmioRegion { base_address: 0xEB00_0000, mask: 0xFFFF_FF00, size: 0x100, read_callback: Box::new(|_| 0), write_callback: Box::new(move |_, v| { captured_clone.store(v, Ordering::SeqCst); }), }); mem.write_u8(0xEB00_0004, 0xAB); assert_eq!(captured.load(Ordering::SeqCst), 0xAB); } #[test] fn u32_read_write_roundtrip_is_big_endian() { let mut mem = empty_mem(); mem.alloc(0x2000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap(); mem.write_u32(0x2000, 0xDEAD_BEEF); assert_eq!(mem.read_u32(0x2000), 0xDEAD_BEEF); // And verify byte layout is big-endian (PPC native order). assert_eq!(mem.read_u8(0x2000), 0xDE); assert_eq!(mem.read_u8(0x2001), 0xAD); assert_eq!(mem.read_u8(0x2002), 0xBE); assert_eq!(mem.read_u8(0x2003), 0xEF); } #[test] fn page_versions_bump_on_write() { let mut mem = empty_mem(); mem.alloc(0x8000, 0x2000, MemoryProtect::READ | MemoryProtect::WRITE) .unwrap(); let v0 = mem.page_version(0x8000); assert_eq!(v0, 0); mem.write_u32(0x8000, 0xDEAD_BEEF); let v1 = mem.page_version(0x8000); assert!(v1 > v0, "page version should advance on write"); // A write to a different page advances only that page. mem.write_u8(0x9000, 0xAB); assert_eq!(mem.page_version(0x8000), v1); assert!(mem.page_version(0x9000) > v1); // `max_page_version` across the span picks up the later write. let span_max = mem.max_page_version(0x8000, 0x1001); assert_eq!(span_max, mem.page_version(0x9000)); } #[test] fn mmio_fast_path_skips_non_mmio_address() { // After registering a region in the GPU MMIO aperture, a write // to an unrelated main-RAM address must NOT be intercepted — // it must hit backing memory and bump page_version. let mut mem = empty_mem(); mem.alloc(0x2000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE) .unwrap(); let dispatched = Arc::new(AtomicU32::new(0)); let dispatched_clone = dispatched.clone(); mem.add_mmio_region(MmioRegion { base_address: 0x7FC8_0000, mask: 0xFFFF_0000, size: 0x0001_0000, read_callback: Box::new(move |_| { dispatched_clone.fetch_add(1, Ordering::SeqCst); 0 }), write_callback: Box::new(|_, _| {}), }); let v0 = mem.page_version(0x2000); mem.write_u32(0x2000, 0xCAFE_F00D); assert_eq!(mem.read_u32(0x2000), 0xCAFE_F00D); assert!(mem.page_version(0x2000) > v0); assert_eq!(dispatched.load(Ordering::SeqCst), 0, "non-MMIO read must not have hit the MMIO callback"); } #[test] fn mmio_fast_path_dispatches_for_aperture() { // Addresses inside the registered aperture must still hit the // callback after the fast-path landed. let mut mem = empty_mem(); let writes = Arc::new(AtomicU32::new(0)); let reads = Arc::new(AtomicU32::new(0)); let writes_clone = writes.clone(); let reads_clone = reads.clone(); mem.add_mmio_region(MmioRegion { base_address: 0x7FC8_0000, mask: 0xFFFF_0000, size: 0x0001_0000, read_callback: Box::new(move |_| { reads_clone.fetch_add(1, Ordering::SeqCst); 0xAA }), write_callback: Box::new(move |_, _| { writes_clone.fetch_add(1, Ordering::SeqCst); }), }); mem.write_u32(0x7FC8_0420, 0x1234); assert_eq!(writes.load(Ordering::SeqCst), 1); let v = mem.read_u32(0x7FC8_0008); assert_eq!(v, 0xAA); assert_eq!(reads.load(Ordering::SeqCst), 1); } #[test] fn mmio_fast_path_handles_two_disjoint_regions() { // Two disjoint MMIO regions — both must dispatch, and a // non-MMIO address still must not. let mut mem = empty_mem(); mem.alloc(0x2000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE) .unwrap(); let a_writes = Arc::new(AtomicU32::new(0)); let b_writes = Arc::new(AtomicU32::new(0)); let a_clone = a_writes.clone(); let b_clone = b_writes.clone(); mem.add_mmio_region(MmioRegion { base_address: 0x7FC8_0000, mask: 0xFFFF_0000, size: 0x0001_0000, read_callback: Box::new(|_| 0), write_callback: Box::new(move |_, _| { a_clone.fetch_add(1, Ordering::SeqCst); }), }); mem.add_mmio_region(MmioRegion { base_address: 0xEA00_0000, mask: 0xFFFF_0000, size: 0x0001_0000, read_callback: Box::new(|_| 0), write_callback: Box::new(move |_, _| { b_clone.fetch_add(1, Ordering::SeqCst); }), }); // Both regions still dispatch. mem.write_u32(0x7FC8_0008, 1); mem.write_u32(0xEA00_0008, 2); assert_eq!(a_writes.load(Ordering::SeqCst), 1); assert_eq!(b_writes.load(Ordering::SeqCst), 1); // Non-MMIO write still bypasses both callbacks. let v0 = mem.page_version(0x2000); mem.write_u32(0x2000, 0xDEAD_BEEF); assert_eq!(a_writes.load(Ordering::SeqCst), 1); assert_eq!(b_writes.load(Ordering::SeqCst), 1); assert!(mem.page_version(0x2000) > v0); assert_eq!(mem.read_u32(0x2000), 0xDEAD_BEEF); } #[test] fn mmio_fold_aperture_idempotent_for_identical_regions() { // Regression: re-registering the same region must not collapse // the cached aperture (which would force every fast-rejected // address back through the linear iter().find). let (m, v) = super::fold_aperture( 0xFFFF_0000, 0x7FC8_0000, 0xFFFF_0000, 0x7FC8_0000, ); assert_eq!(m, 0xFFFF_0000); assert_eq!(v, 0x7FC8_0000); } #[test] fn mmio_fold_aperture_widens_for_disjoint_regions() { // Folding two disjoint regions yields a *necessary*-only mask. // The cached pair must accept both region addresses (the inner // contains() is the sufficient check) and reject something // outside both. let (m, v) = super::fold_aperture( 0xFFFF_0000, 0x7FC8_0000, 0xFFFF_0000, 0xEA00_0000, ); assert_eq!((0x7FC8_0420u32 & m), v); assert_eq!((0xEA00_0008u32 & m), v); // 0x2000 is outside both; the fold-mask compare must reject it. assert_ne!((0x0000_2000u32 & m), v); } #[test] fn page_versions_ignore_mmio_writes() { let mut mem = empty_mem(); mem.add_mmio_region(MmioRegion { base_address: 0xEC00_0000, mask: 0xFFFF_FF00, size: 0x100, read_callback: Box::new(|_| 0), write_callback: Box::new(|_, _| {}), }); let before = mem.page_version(0xEC00_0000); mem.write_u32(0xEC00_0004, 0x1234); assert_eq!(mem.page_version(0xEC00_0000), before); } #[test] fn u64_read_write_roundtrip_is_big_endian() { let mut mem = empty_mem(); mem.alloc(0x3000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap(); mem.write_u64(0x3000, 0x1122_3344_5566_7788); assert_eq!(mem.read_u64(0x3000), 0x1122_3344_5566_7788); assert_eq!(mem.read_u8(0x3000), 0x11); assert_eq!(mem.read_u8(0x3007), 0x88); } #[test] fn mem_watch_fires_on_overlapping_store() { let mut mem = empty_mem(); mem.alloc(0x4000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap(); mem.arm_mem_watch(vec![0x4010]); super::set_writer_ctx(7, 0x8200_0000, 0x8200_0004); // u32 store directly on the watched address fires. mem.write_u32(0x4010, 0xDEAD_BEEF); assert_eq!(mem.mem_watch_count(), 1); // u8 store on the watched byte itself fires. mem.write_u8(0x4010, 0x11); assert_eq!(mem.mem_watch_count(), 2); // u8 store at +2 is outside the byte-exact watch — no fire. mem.write_u8(0x4012, 0x22); assert_eq!(mem.mem_watch_count(), 2); // u16 store strictly outside the watched byte does NOT fire. mem.write_u16(0x4014, 0xCAFE); assert_eq!(mem.mem_watch_count(), 2); // bulk write spanning the watch fires once. mem.write_bulk(0x4000, &[0u8; 0x20]); assert_eq!(mem.mem_watch_count(), 3); } #[test] fn mem_watch_empty_set_zero_overhead_path() { // With no addresses armed, write_u32 must NOT bump the count // and must produce identical post-store memory state. let mut mem = empty_mem(); mem.alloc(0x5000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap(); mem.write_u32(0x5000, 0x1234_5678); assert_eq!(mem.read_u32(0x5000), 0x1234_5678); assert_eq!(mem.mem_watch_count(), 0); } #[test] fn mem_watch_arm_dedups_and_sorts() { let mut mem = empty_mem(); mem.alloc(0x6000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap(); mem.arm_mem_watch(vec![0x6008, 0x6004, 0x6008, 0x6004]); // A single store hitting either address fires once per watch addr. mem.write_u64(0x6004, 0x1111_2222_3333_4444); // 0x6004 and 0x6008 are both inside [0x6004, 0x600C); two fires. assert_eq!(mem.mem_watch_count(), 2); } } impl Drop for GuestMemory { fn drop(&mut self) { if self.owned && !self.membase.is_null() { unsafe { crate::platform::release_address_space(self.membase, GUEST_ADDRESS_SPACE); } } } }