Re-shape MemoryAccess so write methods take &self and rely on interior mutability (atomics in GuestMemory, Cell in test mocks). This unblocks the &Arc<KernelState>-only execution model the CPU/HLE crates moved to. GuestMemory grows: per-4 KiB-page write-version counter (page_version) that the CPU's decode cache and the texture cache observe via Acquire, fenced 32-bit/64-bit read/write helpers (Release on writer / Acquire on reader) that PM4_EVENT_WRITE_SHD and the matching CPU consumers use to synchronize fence publication, and broader page-table / heap accounting needed by the new HLE allocators. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
774 lines
30 KiB
Rust
774 lines
30 KiB
Rust
use std::sync::atomic::{AtomicU64, Ordering};
|
|
|
|
use crate::access::MemoryAccess;
|
|
use crate::mmio::MmioRegion;
|
|
use crate::page_table::{AllocationState, MemoryProtect, PageEntry};
|
|
use crate::MemoryError;
|
|
|
|
const PAGE_SIZE: u32 = 4096;
|
|
/// Total guest address space: 4GB.
|
|
const GUEST_ADDRESS_SPACE: usize = 0x1_0000_0000;
|
|
/// Number of 4K pages in the 4GB address space.
|
|
const PAGE_COUNT: usize = GUEST_ADDRESS_SPACE / PAGE_SIZE as usize;
|
|
/// Physical memory mask (512MB physical address space).
|
|
const PHYSICAL_ADDR_MASK: u32 = 0x1FFF_FFFF;
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum HeapType {
|
|
GuestVirtual,
|
|
GuestXex,
|
|
GuestPhysical,
|
|
}
|
|
|
|
/// The core guest memory system. Manages a 4GB virtual address space
|
|
/// via mmap/VirtualAlloc, with page-level tracking and MMIO dispatch.
|
|
pub struct GuestMemory {
|
|
/// Host pointer to the base of the 4GB guest address space.
|
|
membase: *mut u8,
|
|
/// Page table tracking allocation state for each 4K page. Each entry is
|
|
/// an `AtomicU64` carrying the bit-packed [`PageEntry`] representation.
|
|
/// Atomic so [`Self::alloc`] (and friends) can take `&self` and run
|
|
/// concurrently with the load/store hot path's [`Self::is_mapped`]
|
|
/// checks. Allocation crosses many pages but each per-page Release store
|
|
/// is independently published; readers (`is_mapped`/`page_entry`) use
|
|
/// Acquire loads. Multi-page atomicity is not provided — callers ensure
|
|
/// happens-before via export ordering (alloc completes before any guest
|
|
/// access of the new region).
|
|
page_table: Vec<std::sync::atomic::AtomicU64>,
|
|
/// Registered MMIO regions (sorted by base address for binary search).
|
|
mmio_regions: Vec<MmioRegion>,
|
|
/// Cached *necessary* condition for an address to fall inside *any*
|
|
/// registered MMIO region: an address `a` can match only if
|
|
/// `(a & mmio_aperture_mask) == mmio_aperture_value`. Recomputed
|
|
/// inside [`add_mmio_region`] as the union (greatest common
|
|
/// prefix) of every region's `(mask, base & mask)` pair.
|
|
///
|
|
/// With the GPU MMIO at `0x7FC8_0000 / 0xFFFF_0000` as the only
|
|
/// registered region, this is a single bit-mask compare per scalar
|
|
/// load/store — eliminating the prior O(N) `iter().find` over the
|
|
/// region list on every access. With zero regions registered the
|
|
/// flag stays at the "match nothing" sentinel and the hot path
|
|
/// returns `None` without touching the Vec.
|
|
mmio_aperture_mask: u32,
|
|
mmio_aperture_value: u32,
|
|
/// Whether the memory mapping is owned (should be unmapped on drop).
|
|
owned: bool,
|
|
/// P5 texture-cache invalidation: per-4KB-page monotonic write
|
|
/// version. Every `write_u8/16/32/64` bumps
|
|
/// `page_versions[addr >> 12]`, and a global `writes_total` counter
|
|
/// (shared by all pages) gets stamped into each page. The texture
|
|
/// cache computes `max(page_versions[..])` over the texture's byte
|
|
/// footprint at bind time and re-decodes if any page has advanced
|
|
/// since the cached entry.
|
|
page_versions: Vec<AtomicU64>,
|
|
/// Monotonic global write counter — makes per-page versions
|
|
/// cross-comparable even when their indices alias.
|
|
writes_total: AtomicU64,
|
|
}
|
|
|
|
/// Greatest common bit-mask such that `(a & m) == (b & m)` for every bit
|
|
/// where `mask_a` *and* `mask_b` are set and the masked values agree.
|
|
/// Used by `add_mmio_region` to fold a new region into the cached
|
|
/// fast-reject pair without losing soundness — the result is always a
|
|
/// *necessary* condition for membership in either region.
|
|
#[inline]
|
|
fn fold_aperture(
|
|
cur_mask: u32,
|
|
cur_value: u32,
|
|
new_mask: u32,
|
|
new_value: u32,
|
|
) -> (u32, u32) {
|
|
// Bits that both masks cover AND on which both values agree are the
|
|
// only bits we can keep. Disagreement on any covered bit collapses
|
|
// that bit out of the cache.
|
|
let common_mask = cur_mask & new_mask;
|
|
let agreed = !(cur_value ^ new_value);
|
|
let m = common_mask & agreed;
|
|
(m, cur_value & m)
|
|
}
|
|
|
|
unsafe impl Send for GuestMemory {}
|
|
unsafe impl Sync for GuestMemory {}
|
|
|
|
impl GuestMemory {
|
|
/// Create a new guest memory space by reserving a 4GB virtual address region.
|
|
pub fn new() -> Result<Self, MemoryError> {
|
|
let membase = crate::platform::reserve_address_space(GUEST_ADDRESS_SPACE)?;
|
|
Ok(Self {
|
|
membase,
|
|
page_table: (0..PAGE_COUNT).map(|_| std::sync::atomic::AtomicU64::new(0)).collect(),
|
|
mmio_regions: Vec::new(),
|
|
// Sentinel "match nothing" — `(a & !0) == !0` is false for
|
|
// any `a`, so `find_mmio` short-circuits to `None` until the
|
|
// first region is registered.
|
|
mmio_aperture_mask: u32::MAX,
|
|
mmio_aperture_value: u32::MAX,
|
|
owned: true,
|
|
page_versions: (0..PAGE_COUNT).map(|_| AtomicU64::new(0)).collect(),
|
|
writes_total: AtomicU64::new(0),
|
|
})
|
|
}
|
|
|
|
/// Current version watermark for the page containing `addr`. Bumped by
|
|
/// any write through `write_u8/16/32/64`. Not affected by MMIO writes
|
|
/// (those don't touch the backing texture memory).
|
|
///
|
|
/// Acquire load: any thread observing a value `v` here also observes
|
|
/// every memory write the bumping thread published before its
|
|
/// Release-store of `v` (see [`bump_page_version`]). This is the
|
|
/// synchronizes-with edge consumed by the texture cache once the GPU
|
|
/// runs on its own host thread.
|
|
pub fn page_version(&self, addr: u32) -> u64 {
|
|
let idx = (addr / PAGE_SIZE) as usize;
|
|
self.page_versions
|
|
.get(idx)
|
|
.map(|a| a.load(Ordering::Acquire))
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
/// Maximum page version across the byte span `[addr, addr+len)`.
|
|
/// O(pages) — fast for typical texture sizes (1 MiB = 256 pages).
|
|
pub fn max_page_version(&self, addr: u32, len: u32) -> u64 {
|
|
if len == 0 {
|
|
return self.page_version(addr);
|
|
}
|
|
let first = addr / PAGE_SIZE;
|
|
let last = addr.saturating_add(len.saturating_sub(1)) / PAGE_SIZE;
|
|
let mut m = 0u64;
|
|
for p in first..=last {
|
|
if let Some(slot) = self.page_versions.get(p as usize) {
|
|
let v = slot.load(Ordering::Acquire);
|
|
if v > m {
|
|
m = v;
|
|
}
|
|
}
|
|
}
|
|
m
|
|
}
|
|
|
|
/// Total number of write events observed. Useful for cross-page tie
|
|
/// breaking and HUD-level "is the guest scribbling?" metrics.
|
|
pub fn writes_total(&self) -> u64 {
|
|
self.writes_total.load(Ordering::Relaxed)
|
|
}
|
|
|
|
#[inline]
|
|
fn bump_page_version(&self, addr: u32) {
|
|
// Relaxed is sufficient for the global tick — the only payload
|
|
// that depends on a particular value is the per-page slot below,
|
|
// and the publish-edge there is its own Release store.
|
|
let stamp = self
|
|
.writes_total
|
|
.fetch_add(1, Ordering::Relaxed)
|
|
.wrapping_add(1);
|
|
let idx = (addr / PAGE_SIZE) as usize;
|
|
if let Some(slot) = self.page_versions.get(idx) {
|
|
// Release: any reader that Acquire-loads this slot and sees
|
|
// `stamp` also observes the data store that preceded this
|
|
// bump (the unsafe `*ptr = val` in the surrounding write_*).
|
|
slot.store(stamp, Ordering::Release);
|
|
}
|
|
}
|
|
|
|
/// Get the host base pointer for the guest address space.
|
|
pub fn membase(&self) -> *const u8 {
|
|
self.membase
|
|
}
|
|
|
|
/// Get a mutable host base pointer.
|
|
pub fn membase_mut(&mut self) -> *mut u8 {
|
|
self.membase
|
|
}
|
|
|
|
/// Translate a guest virtual address to a host pointer.
|
|
pub fn translate_virtual(&self, guest_addr: u32) -> *const u8 {
|
|
unsafe { self.membase.add(guest_addr as usize) }
|
|
}
|
|
|
|
/// Translate a guest virtual address to a mutable host pointer.
|
|
///
|
|
/// Takes `&self`. The returned pointer is into the shared
|
|
/// `membase` mapping; the soundness contract is the trait-level one
|
|
/// in [`crate::access::MemoryAccess`] — callers must not concurrently
|
|
/// read and write the same byte range from different threads.
|
|
pub fn translate_virtual_mut(&self, guest_addr: u32) -> *mut u8 {
|
|
unsafe { self.membase.add(guest_addr as usize) }
|
|
}
|
|
|
|
/// Translate a guest physical address to a host pointer.
|
|
pub fn translate_physical(&self, guest_addr: u32) -> *const u8 {
|
|
let phys = guest_addr & PHYSICAL_ADDR_MASK;
|
|
unsafe { self.membase.add(phys as usize) }
|
|
}
|
|
|
|
/// Register an MMIO region.
|
|
pub fn add_mmio_region(&mut self, region: MmioRegion) {
|
|
let new_mask = region.mask;
|
|
let new_value = region.base_address & region.mask;
|
|
if self.mmio_regions.is_empty() {
|
|
self.mmio_aperture_mask = new_mask;
|
|
self.mmio_aperture_value = new_value;
|
|
} else {
|
|
let (m, v) = fold_aperture(
|
|
self.mmio_aperture_mask,
|
|
self.mmio_aperture_value,
|
|
new_mask,
|
|
new_value,
|
|
);
|
|
self.mmio_aperture_mask = m;
|
|
self.mmio_aperture_value = v;
|
|
}
|
|
let base = region.base_address;
|
|
let idx = self
|
|
.mmio_regions
|
|
.binary_search_by_key(&base, |r| r.base_address)
|
|
.unwrap_or_else(|i| i);
|
|
self.mmio_regions.insert(idx, region);
|
|
}
|
|
|
|
/// Check if an address is in a registered MMIO region.
|
|
///
|
|
/// Tier-3 perf — non-MMIO addresses (the common case for code fetch
|
|
/// and main-RAM data accesses) get rejected by a single bit-mask
|
|
/// compare against the cached aperture, skipping the linear search
|
|
/// over `mmio_regions`. The `iter().find` fallback only runs for
|
|
/// addresses that pass the necessary-but-not-sufficient prefilter,
|
|
/// preserving exact MMIO semantics when multiple regions share a
|
|
/// prefix or when a region's `mask` admits non-contiguous addresses.
|
|
#[inline]
|
|
fn find_mmio(&self, addr: u32) -> Option<&MmioRegion> {
|
|
if (addr & self.mmio_aperture_mask) != self.mmio_aperture_value {
|
|
return None;
|
|
}
|
|
self.mmio_regions.iter().find(|r| r.contains(addr))
|
|
}
|
|
|
|
/// Allocate a region in the guest address space.
|
|
///
|
|
/// Validates that `base` is page-aligned and that `base + size` does not
|
|
/// overflow the 4GB guest address space. Takes `&self` — `page_table`
|
|
/// is `Vec<AtomicU64>` so per-page state updates use atomic stores
|
|
/// (`Release` ordering, paired with `Acquire` loads in
|
|
/// [`Self::is_mapped`] / [`Self::page_entry`]). The kernel ensures
|
|
/// happens-before across the alloc-then-use boundary at the export
|
|
/// level (the guest cannot observe the new region until the export
|
|
/// returns), so a single Release per page suffices and we don't need
|
|
/// multi-page atomicity.
|
|
pub fn alloc(
|
|
&self,
|
|
base: u32,
|
|
size: u32,
|
|
protect: MemoryProtect,
|
|
) -> Result<u32, MemoryError> {
|
|
if !base.is_multiple_of(PAGE_SIZE) {
|
|
return Err(MemoryError::AllocationFailed(format!(
|
|
"alloc base {:#x} is not page-aligned", base
|
|
)));
|
|
}
|
|
let end = (base as u64).saturating_add(size as u64);
|
|
if end > GUEST_ADDRESS_SPACE as u64 {
|
|
return Err(MemoryError::AllocationFailed(format!(
|
|
"alloc range {:#x}+{:#x} exceeds 4GB guest space", base, size
|
|
)));
|
|
}
|
|
|
|
let page_start = (base / PAGE_SIZE) as usize;
|
|
let page_count = size.div_ceil(PAGE_SIZE) as usize;
|
|
|
|
// Commit pages via platform. `commit_memory` takes `*mut u8` but
|
|
// doesn't actually need exclusive access — the OS-level mmap call
|
|
// is independently thread-safe.
|
|
let host_ptr = unsafe { self.membase.add(base as usize) };
|
|
crate::platform::commit_memory(host_ptr, page_count * PAGE_SIZE as usize)?;
|
|
|
|
// Build a single `PageEntry` once, then Release-store it into each
|
|
// affected slot. Using a fresh `PageEntry::default()` per page
|
|
// would yield the same bits but at higher cost.
|
|
let mut entry = PageEntry::default();
|
|
entry.set_base_address(page_start as u32);
|
|
entry.set_region_page_count(page_count as u32);
|
|
entry.set_allocation_protect(protect);
|
|
entry.set_current_protect(protect);
|
|
entry.set_state(AllocationState::RESERVE | AllocationState::COMMIT);
|
|
let raw = entry.raw();
|
|
for i in 0..page_count {
|
|
let idx = page_start + i;
|
|
if let Some(slot) = self.page_table.get(idx) {
|
|
slot.store(raw, std::sync::atomic::Ordering::Release);
|
|
}
|
|
}
|
|
|
|
Ok(base)
|
|
}
|
|
|
|
/// Read a slice of bytes from guest memory (bypassing MMIO for bulk reads).
|
|
pub fn read_bulk(&self, addr: u32, buf: &mut [u8]) {
|
|
let ptr = self.translate_virtual(addr);
|
|
unsafe {
|
|
std::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr(), buf.len());
|
|
}
|
|
}
|
|
|
|
/// Write a slice of bytes to guest memory (bypassing MMIO for bulk writes).
|
|
///
|
|
/// Takes `&self` (matches the trait-level write contract): the actual
|
|
/// store goes through a raw `*mut u8` derived from `membase`, which
|
|
/// has no Rust aliasing semantics. Callers must respect the trait
|
|
/// contract — no concurrent read/write of the same byte range from
|
|
/// different threads. Used by the XEX loader (init, single-thread)
|
|
/// and `NtReadFile` (mid-execution; the file's destination buffer is
|
|
/// guest-thread-private by construction).
|
|
pub fn write_bulk(&self, addr: u32, buf: &[u8]) {
|
|
let ptr = self.translate_virtual_mut(addr);
|
|
unsafe {
|
|
std::ptr::copy_nonoverlapping(buf.as_ptr(), ptr, buf.len());
|
|
}
|
|
}
|
|
|
|
/// Check if a guest address has been allocated/committed. Acquire load
|
|
/// pairs with the Release store in [`Self::alloc`] — any thread that
|
|
/// observes `state.contains(COMMIT)` here also observes every
|
|
/// allocation-side metadata write that preceded the store.
|
|
pub fn is_mapped(&self, addr: u32) -> bool {
|
|
let page = (addr / PAGE_SIZE) as usize;
|
|
if page >= self.page_table.len() {
|
|
return false;
|
|
}
|
|
let raw = self.page_table[page].load(std::sync::atomic::Ordering::Acquire);
|
|
PageEntry::from_raw(raw)
|
|
.state()
|
|
.contains(AllocationState::COMMIT)
|
|
}
|
|
|
|
/// Get a page table entry for a given address, or None if out of range.
|
|
/// Returns by value (the storage is now atomic; we publish a snapshot).
|
|
pub fn page_entry(&self, addr: u32) -> Option<PageEntry> {
|
|
let page = (addr / PAGE_SIZE) as usize;
|
|
self.page_table
|
|
.get(page)
|
|
.map(|a| PageEntry::from_raw(a.load(std::sync::atomic::Ordering::Acquire)))
|
|
}
|
|
}
|
|
|
|
impl MemoryAccess for GuestMemory {
|
|
// Tier-3 perf: `#[inline]` on the hot read/write paths lets LLVM
|
|
// fold the MMIO + mapping checks into the interpreter's load/store
|
|
// handlers, hoisting the "not-MMIO, mapped" branch out of the loop
|
|
// body for consecutive same-page accesses.
|
|
#[inline]
|
|
fn read_u8(&self, addr: u32) -> u8 {
|
|
// MMIO dispatch must come first — a byte read at an MMIO-mapped
|
|
// address should invoke the callback, not the backing memory.
|
|
if let Some(mmio) = self.find_mmio(addr) {
|
|
return (mmio.read_callback)(addr) as u8;
|
|
}
|
|
if !self.is_mapped(addr) { return 0; }
|
|
let ptr = self.translate_virtual(addr);
|
|
unsafe { *ptr }
|
|
}
|
|
|
|
#[inline]
|
|
fn read_u16(&self, addr: u32) -> u16 {
|
|
if let Some(mmio) = self.find_mmio(addr) {
|
|
(mmio.read_callback)(addr) as u16
|
|
} else if !self.is_mapped(addr) {
|
|
0
|
|
} else {
|
|
let ptr = self.translate_virtual(addr) as *const [u8; 2];
|
|
u16::from_be_bytes(unsafe { *ptr })
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn read_u32(&self, addr: u32) -> u32 {
|
|
if let Some(mmio) = self.find_mmio(addr) {
|
|
(mmio.read_callback)(addr)
|
|
} else if !self.is_mapped(addr) {
|
|
0
|
|
} else {
|
|
let ptr = self.translate_virtual(addr) as *const [u8; 4];
|
|
u32::from_be_bytes(unsafe { *ptr })
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn read_u64(&self, addr: u32) -> u64 {
|
|
if let Some(mmio) = self.find_mmio(addr) {
|
|
let hi = (mmio.read_callback)(addr) as u64;
|
|
let lo = (mmio.read_callback)(addr.wrapping_add(4)) as u64;
|
|
(hi << 32) | lo
|
|
} else if !self.is_mapped(addr) {
|
|
0
|
|
} else {
|
|
let ptr = self.translate_virtual(addr) as *const [u8; 8];
|
|
u64::from_be_bytes(unsafe { *ptr })
|
|
}
|
|
}
|
|
|
|
fn write_u8(&self, addr: u32, val: u8) {
|
|
// MMIO dispatch first — a byte write at an MMIO-mapped address
|
|
// must invoke the callback, not the backing memory.
|
|
if let Some(mmio) = self.find_mmio(addr) {
|
|
(mmio.write_callback)(addr, val as u32);
|
|
return;
|
|
}
|
|
if !self.is_mapped(addr) { return; }
|
|
let ptr = self.translate_virtual_mut(addr);
|
|
unsafe { *ptr = val };
|
|
self.bump_page_version(addr);
|
|
}
|
|
|
|
fn write_u16(&self, addr: u32, val: u16) {
|
|
if let Some(mmio) = self.find_mmio(addr) {
|
|
(mmio.write_callback)(addr, val as u32);
|
|
} else if !self.is_mapped(addr) {
|
|
} else {
|
|
let ptr = self.translate_virtual_mut(addr);
|
|
unsafe {
|
|
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 2);
|
|
}
|
|
self.bump_page_version(addr);
|
|
// A 16-bit write can cross a page boundary; bump the neighbour
|
|
// too so the texture cache sees the write even if it's looking
|
|
// at the next page's version.
|
|
if (addr & 0xFFF) >= (PAGE_SIZE - 1) {
|
|
self.bump_page_version(addr.wrapping_add(1));
|
|
}
|
|
}
|
|
}
|
|
|
|
fn write_u32(&self, addr: u32, val: u32) {
|
|
if let Some(mmio) = self.find_mmio(addr) {
|
|
(mmio.write_callback)(addr, val);
|
|
} else if !self.is_mapped(addr) {
|
|
} else {
|
|
let ptr = self.translate_virtual_mut(addr);
|
|
unsafe {
|
|
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 4);
|
|
}
|
|
self.bump_page_version(addr);
|
|
if (addr & 0xFFF) >= (PAGE_SIZE - 3) {
|
|
self.bump_page_version(addr.wrapping_add(3));
|
|
}
|
|
}
|
|
}
|
|
|
|
fn write_u64(&self, addr: u32, val: u64) {
|
|
if let Some(mmio) = self.find_mmio(addr) {
|
|
(mmio.write_callback)(addr, (val >> 32) as u32);
|
|
(mmio.write_callback)(addr.wrapping_add(4), val as u32);
|
|
} else if !self.is_mapped(addr) {
|
|
} else {
|
|
let ptr = self.translate_virtual_mut(addr);
|
|
unsafe {
|
|
std::ptr::copy_nonoverlapping(val.to_be_bytes().as_ptr(), ptr, 8);
|
|
}
|
|
self.bump_page_version(addr);
|
|
if (addr & 0xFFF) >= (PAGE_SIZE - 7) {
|
|
self.bump_page_version(addr.wrapping_add(7));
|
|
}
|
|
}
|
|
}
|
|
|
|
fn translate(&self, addr: u32) -> Option<*const u8> {
|
|
if self.find_mmio(addr).is_some() || !self.is_mapped(addr) {
|
|
None
|
|
} else {
|
|
Some(self.translate_virtual(addr))
|
|
}
|
|
}
|
|
|
|
fn translate_mut(&self, addr: u32) -> Option<*mut u8> {
|
|
if self.find_mmio(addr).is_some() {
|
|
None
|
|
} else {
|
|
Some(self.translate_virtual_mut(addr))
|
|
}
|
|
}
|
|
|
|
/// Override the default impl to hand the xenia-cpu `DecodeCache` a
|
|
/// real per-page version. Zero means "never written" which the cache
|
|
/// treats as a valid version; first write bumps to 1 (via the
|
|
/// global `writes_total` counter already maintained).
|
|
#[inline]
|
|
fn page_version(&self, addr: u32) -> u64 {
|
|
GuestMemory::page_version(self, addr)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::sync::atomic::{AtomicU32, Ordering};
|
|
use std::sync::Arc;
|
|
|
|
fn empty_mem() -> GuestMemory { GuestMemory::new().expect("reserve 4GB") }
|
|
|
|
#[test]
|
|
fn alloc_rejects_unaligned_base() {
|
|
let mut mem = empty_mem();
|
|
let err = mem.alloc(0x1001, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap_err();
|
|
assert!(matches!(err, MemoryError::AllocationFailed(_)));
|
|
}
|
|
|
|
#[test]
|
|
fn alloc_rejects_overflow_past_4gb() {
|
|
let mut mem = empty_mem();
|
|
let err = mem.alloc(0xFFFF_0000, 0x0002_0000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap_err();
|
|
assert!(matches!(err, MemoryError::AllocationFailed(_)));
|
|
}
|
|
|
|
#[test]
|
|
fn alloc_succeeds_for_valid_region() {
|
|
let mut mem = empty_mem();
|
|
let base = mem.alloc(0x1000, 0x2000, MemoryProtect::READ | MemoryProtect::WRITE).expect("alloc ok");
|
|
assert_eq!(base, 0x1000);
|
|
assert!(mem.is_mapped(0x1000));
|
|
assert!(mem.is_mapped(0x2FFF));
|
|
assert!(!mem.is_mapped(0x3000));
|
|
}
|
|
|
|
#[test]
|
|
fn page_entry_returns_none_out_of_range() {
|
|
let mem = empty_mem();
|
|
// page_entry takes u32; all u32 values fit in the 4GB page table,
|
|
// so OOB-via-addr isn't reachable. Verify the Option behavior on an
|
|
// unmapped but in-range page: entry exists but is free.
|
|
let e = mem.page_entry(0xDEAD_BEEF).expect("in-range");
|
|
assert!(e.is_free());
|
|
}
|
|
|
|
#[test]
|
|
fn read_u8_dispatches_to_mmio() {
|
|
let mut mem = empty_mem();
|
|
let seen_addr = Arc::new(AtomicU32::new(0));
|
|
let seen_clone = seen_addr.clone();
|
|
mem.add_mmio_region(MmioRegion {
|
|
base_address: 0xEA00_0000,
|
|
mask: 0xFFFF_FF00,
|
|
size: 0x100,
|
|
read_callback: Box::new(move |a| {
|
|
seen_clone.store(a, Ordering::SeqCst);
|
|
0x42
|
|
}),
|
|
write_callback: Box::new(|_, _| {}),
|
|
});
|
|
let v = mem.read_u8(0xEA00_0008);
|
|
assert_eq!(v, 0x42);
|
|
assert_eq!(seen_addr.load(Ordering::SeqCst), 0xEA00_0008);
|
|
}
|
|
|
|
#[test]
|
|
fn write_u8_dispatches_to_mmio() {
|
|
let mut mem = empty_mem();
|
|
let captured = Arc::new(AtomicU32::new(0));
|
|
let captured_clone = captured.clone();
|
|
mem.add_mmio_region(MmioRegion {
|
|
base_address: 0xEB00_0000,
|
|
mask: 0xFFFF_FF00,
|
|
size: 0x100,
|
|
read_callback: Box::new(|_| 0),
|
|
write_callback: Box::new(move |_, v| {
|
|
captured_clone.store(v, Ordering::SeqCst);
|
|
}),
|
|
});
|
|
mem.write_u8(0xEB00_0004, 0xAB);
|
|
assert_eq!(captured.load(Ordering::SeqCst), 0xAB);
|
|
}
|
|
|
|
#[test]
|
|
fn u32_read_write_roundtrip_is_big_endian() {
|
|
let mut mem = empty_mem();
|
|
mem.alloc(0x2000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
|
|
mem.write_u32(0x2000, 0xDEAD_BEEF);
|
|
assert_eq!(mem.read_u32(0x2000), 0xDEAD_BEEF);
|
|
// And verify byte layout is big-endian (PPC native order).
|
|
assert_eq!(mem.read_u8(0x2000), 0xDE);
|
|
assert_eq!(mem.read_u8(0x2001), 0xAD);
|
|
assert_eq!(mem.read_u8(0x2002), 0xBE);
|
|
assert_eq!(mem.read_u8(0x2003), 0xEF);
|
|
}
|
|
|
|
#[test]
|
|
fn page_versions_bump_on_write() {
|
|
let mut mem = empty_mem();
|
|
mem.alloc(0x8000, 0x2000, MemoryProtect::READ | MemoryProtect::WRITE)
|
|
.unwrap();
|
|
let v0 = mem.page_version(0x8000);
|
|
assert_eq!(v0, 0);
|
|
mem.write_u32(0x8000, 0xDEAD_BEEF);
|
|
let v1 = mem.page_version(0x8000);
|
|
assert!(v1 > v0, "page version should advance on write");
|
|
// A write to a different page advances only that page.
|
|
mem.write_u8(0x9000, 0xAB);
|
|
assert_eq!(mem.page_version(0x8000), v1);
|
|
assert!(mem.page_version(0x9000) > v1);
|
|
// `max_page_version` across the span picks up the later write.
|
|
let span_max = mem.max_page_version(0x8000, 0x1001);
|
|
assert_eq!(span_max, mem.page_version(0x9000));
|
|
}
|
|
|
|
#[test]
|
|
fn mmio_fast_path_skips_non_mmio_address() {
|
|
// After registering a region in the GPU MMIO aperture, a write
|
|
// to an unrelated main-RAM address must NOT be intercepted —
|
|
// it must hit backing memory and bump page_version.
|
|
let mut mem = empty_mem();
|
|
mem.alloc(0x2000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE)
|
|
.unwrap();
|
|
let dispatched = Arc::new(AtomicU32::new(0));
|
|
let dispatched_clone = dispatched.clone();
|
|
mem.add_mmio_region(MmioRegion {
|
|
base_address: 0x7FC8_0000,
|
|
mask: 0xFFFF_0000,
|
|
size: 0x0001_0000,
|
|
read_callback: Box::new(move |_| {
|
|
dispatched_clone.fetch_add(1, Ordering::SeqCst);
|
|
0
|
|
}),
|
|
write_callback: Box::new(|_, _| {}),
|
|
});
|
|
let v0 = mem.page_version(0x2000);
|
|
mem.write_u32(0x2000, 0xCAFE_F00D);
|
|
assert_eq!(mem.read_u32(0x2000), 0xCAFE_F00D);
|
|
assert!(mem.page_version(0x2000) > v0);
|
|
assert_eq!(dispatched.load(Ordering::SeqCst), 0,
|
|
"non-MMIO read must not have hit the MMIO callback");
|
|
}
|
|
|
|
#[test]
|
|
fn mmio_fast_path_dispatches_for_aperture() {
|
|
// Addresses inside the registered aperture must still hit the
|
|
// callback after the fast-path landed.
|
|
let mut mem = empty_mem();
|
|
let writes = Arc::new(AtomicU32::new(0));
|
|
let reads = Arc::new(AtomicU32::new(0));
|
|
let writes_clone = writes.clone();
|
|
let reads_clone = reads.clone();
|
|
mem.add_mmio_region(MmioRegion {
|
|
base_address: 0x7FC8_0000,
|
|
mask: 0xFFFF_0000,
|
|
size: 0x0001_0000,
|
|
read_callback: Box::new(move |_| {
|
|
reads_clone.fetch_add(1, Ordering::SeqCst);
|
|
0xAA
|
|
}),
|
|
write_callback: Box::new(move |_, _| {
|
|
writes_clone.fetch_add(1, Ordering::SeqCst);
|
|
}),
|
|
});
|
|
mem.write_u32(0x7FC8_0420, 0x1234);
|
|
assert_eq!(writes.load(Ordering::SeqCst), 1);
|
|
let v = mem.read_u32(0x7FC8_0008);
|
|
assert_eq!(v, 0xAA);
|
|
assert_eq!(reads.load(Ordering::SeqCst), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn mmio_fast_path_handles_two_disjoint_regions() {
|
|
// Two disjoint MMIO regions — both must dispatch, and a
|
|
// non-MMIO address still must not.
|
|
let mut mem = empty_mem();
|
|
mem.alloc(0x2000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE)
|
|
.unwrap();
|
|
let a_writes = Arc::new(AtomicU32::new(0));
|
|
let b_writes = Arc::new(AtomicU32::new(0));
|
|
let a_clone = a_writes.clone();
|
|
let b_clone = b_writes.clone();
|
|
mem.add_mmio_region(MmioRegion {
|
|
base_address: 0x7FC8_0000,
|
|
mask: 0xFFFF_0000,
|
|
size: 0x0001_0000,
|
|
read_callback: Box::new(|_| 0),
|
|
write_callback: Box::new(move |_, _| {
|
|
a_clone.fetch_add(1, Ordering::SeqCst);
|
|
}),
|
|
});
|
|
mem.add_mmio_region(MmioRegion {
|
|
base_address: 0xEA00_0000,
|
|
mask: 0xFFFF_0000,
|
|
size: 0x0001_0000,
|
|
read_callback: Box::new(|_| 0),
|
|
write_callback: Box::new(move |_, _| {
|
|
b_clone.fetch_add(1, Ordering::SeqCst);
|
|
}),
|
|
});
|
|
// Both regions still dispatch.
|
|
mem.write_u32(0x7FC8_0008, 1);
|
|
mem.write_u32(0xEA00_0008, 2);
|
|
assert_eq!(a_writes.load(Ordering::SeqCst), 1);
|
|
assert_eq!(b_writes.load(Ordering::SeqCst), 1);
|
|
// Non-MMIO write still bypasses both callbacks.
|
|
let v0 = mem.page_version(0x2000);
|
|
mem.write_u32(0x2000, 0xDEAD_BEEF);
|
|
assert_eq!(a_writes.load(Ordering::SeqCst), 1);
|
|
assert_eq!(b_writes.load(Ordering::SeqCst), 1);
|
|
assert!(mem.page_version(0x2000) > v0);
|
|
assert_eq!(mem.read_u32(0x2000), 0xDEAD_BEEF);
|
|
}
|
|
|
|
#[test]
|
|
fn mmio_fold_aperture_idempotent_for_identical_regions() {
|
|
// Regression: re-registering the same region must not collapse
|
|
// the cached aperture (which would force every fast-rejected
|
|
// address back through the linear iter().find).
|
|
let (m, v) = super::fold_aperture(
|
|
0xFFFF_0000, 0x7FC8_0000,
|
|
0xFFFF_0000, 0x7FC8_0000,
|
|
);
|
|
assert_eq!(m, 0xFFFF_0000);
|
|
assert_eq!(v, 0x7FC8_0000);
|
|
}
|
|
|
|
#[test]
|
|
fn mmio_fold_aperture_widens_for_disjoint_regions() {
|
|
// Folding two disjoint regions yields a *necessary*-only mask.
|
|
// The cached pair must accept both region addresses (the inner
|
|
// contains() is the sufficient check) and reject something
|
|
// outside both.
|
|
let (m, v) = super::fold_aperture(
|
|
0xFFFF_0000, 0x7FC8_0000,
|
|
0xFFFF_0000, 0xEA00_0000,
|
|
);
|
|
assert_eq!((0x7FC8_0420u32 & m), v);
|
|
assert_eq!((0xEA00_0008u32 & m), v);
|
|
// 0x2000 is outside both; the fold-mask compare must reject it.
|
|
assert_ne!((0x0000_2000u32 & m), v);
|
|
}
|
|
|
|
#[test]
|
|
fn page_versions_ignore_mmio_writes() {
|
|
let mut mem = empty_mem();
|
|
mem.add_mmio_region(MmioRegion {
|
|
base_address: 0xEC00_0000,
|
|
mask: 0xFFFF_FF00,
|
|
size: 0x100,
|
|
read_callback: Box::new(|_| 0),
|
|
write_callback: Box::new(|_, _| {}),
|
|
});
|
|
let before = mem.page_version(0xEC00_0000);
|
|
mem.write_u32(0xEC00_0004, 0x1234);
|
|
assert_eq!(mem.page_version(0xEC00_0000), before);
|
|
}
|
|
|
|
#[test]
|
|
fn u64_read_write_roundtrip_is_big_endian() {
|
|
let mut mem = empty_mem();
|
|
mem.alloc(0x3000, 0x1000, MemoryProtect::READ | MemoryProtect::WRITE).unwrap();
|
|
mem.write_u64(0x3000, 0x1122_3344_5566_7788);
|
|
assert_eq!(mem.read_u64(0x3000), 0x1122_3344_5566_7788);
|
|
assert_eq!(mem.read_u8(0x3000), 0x11);
|
|
assert_eq!(mem.read_u8(0x3007), 0x88);
|
|
}
|
|
}
|
|
|
|
impl Drop for GuestMemory {
|
|
fn drop(&mut self) {
|
|
if self.owned && !self.membase.is_null() {
|
|
unsafe {
|
|
crate::platform::release_address_space(self.membase, GUEST_ADDRESS_SPACE);
|
|
}
|
|
}
|
|
}
|
|
}
|