//! Inter-thread reservation table for `lwarx`/`stwcx.` and //! `ldarx`/`stdcx.`. //! //! On real Xenon, each core's `lwarx` places a reservation on a 128-byte //! cache line; any other CPU's store to the line invalidates the //! reservation. `stwcx.`'s success depends on the reservation still being //! valid. Under M3's per-HW-thread parallelism, we need an inter-thread //! mechanism for the same guarantee. //! //! M2 introduces the table behind a runtime `reservations_enabled` flag //! (default `false`). When the flag is `false`, the interpreter's //! existing per-`PpcContext` `reserved_line`/`has_reservation` fields are //! used as-is — no inter-thread tracking. M3 flips the flag on once the //! per-HW-thread host threads are spawning. //! //! ## Design //! //! - **Banked AtomicU64 array** of [`NUM_LINES`] entries (4096 × 8 B = //! 32 KiB total). Each entry packs `(line_address, generation, //! hw_id)`. A zero value means "no reservation on this bank". //! - **Hash function**: `(line >> 7) & (NUM_LINES - 1)`. Different lines //! that map to the same bank conservatively invalidate each other's //! reservations — sound (real Xenon's L2 has finite associativity and //! has the same property), at the cost of slightly more `stwcx.` //! failures than a perfect-mapping table would produce. //! - **`active_reservers: AtomicU16`** — a fast-path counter //! incremented by every `lwarx` and decremented when its reservation is //! either committed or invalidated. `write_u32` checks this with a //! single `Relaxed` load; when zero (the common case in code that //! doesn't use atomics), the invalidation hook is a one-instruction //! skip. //! - **Generation counter**: monotonic across all reservations, //! incremented atomically. 24 bits of generation packed in the slot //! means 16 M reuses per slot before wraparound; at multi-million //! reservations/sec sustained that's still many seconds, and a //! stale-gen `stwcx.` simply fails (sound, not livelocking). //! //! ## Invariants //! //! 1. A `stwcx.(addr)` succeeds only if the line slot still holds the //! same `(line, gen, hw_id)` triple the reserver stamped at `lwarx`. //! 2. Any plain store to a reserved line invalidates it (slot CASed to //! zero). Hash-collision side-effect: a store to a different line //! that maps to the same bank also invalidates — guests that observe //! a `stwcx.` failure simply retry, so this is correctness-preserving. //! 3. `stwcx.` from a different `hw_id` than the reserver fails even if //! the line and gen would otherwise match — only the originating HW //! thread can commit its own reservation. //! //! Memory ordering: all CAS / store operations on the line slot use //! `AcqRel`; readers use `Acquire`. The store inside `stwcx.`'s payload //! itself (the actual data write) is the caller's responsibility — see //! [`crate::interpreter`]'s `stwcx.` arm. use std::sync::atomic::{AtomicU16, AtomicU64, Ordering}; /// Real Xenon L2 cache-line size — the granule a reservation covers. pub const LINE_BYTES: u32 = 0x80; /// Mask to align an address to a cache-line boundary. pub const LINE_MASK: u32 = !(LINE_BYTES - 1); /// Number of bank entries in the reservation table. Power of two so the /// hash is a single AND. 32 KiB total at 8 B per entry. pub const NUM_LINES: usize = 4096; const HASH_MASK: u32 = (NUM_LINES as u32) - 1; /// Pack `(line_addr, generation, hw_id)` into a single u64. The packed /// layout is: /// bits 63..32: line address (we only need the high bits since the /// low 7 are always zero — reserved range is line-aligned) /// bits 31..8: 24-bit generation /// bits 7..0: 8-bit `hw_id` /// /// A packed value of `0` means "no reservation". Since we never reserve /// on guest virtual address `0` (the page is unmapped) and the /// generation increments from `1`, zero is a safe sentinel. #[inline] pub fn pack(line_addr: u32, generation: u32, hw_id: u8) -> u64 { debug_assert!(line_addr & !LINE_MASK == 0, "line_addr must be line-aligned"); debug_assert!(generation < (1 << 24), "generation must fit in 24 bits"); ((line_addr as u64) << 32) | ((generation as u64 & 0xFF_FFFF) << 8) | (hw_id as u64) } /// Inverse of [`pack`]. Returns `None` if the value is the zero sentinel /// (no reservation). #[inline] pub fn unpack(raw: u64) -> Option<(u32, u32, u8)> { if raw == 0 { return None; } let line = (raw >> 32) as u32; let generation = ((raw >> 8) & 0xFF_FFFF) as u32; let hw_id = (raw & 0xFF) as u8; Some((line, generation, hw_id)) } #[inline] fn hash(line_addr: u32) -> usize { ((line_addr >> 7) & HASH_MASK) as usize } #[inline] fn align_to_line(addr: u32) -> u32 { addr & LINE_MASK } /// Banked reservation table shared across all emulated HW threads. Built /// once per emulation instance; lives behind an `Arc` so worker host /// threads (M3) can hold their own clones without lifetime gymnastics. pub struct ReservationTable { lines: Vec, active_reservers: AtomicU16, next_gen: AtomicU64, /// Runtime activation flag. Default `false`. M2.8's /// `--reservations-table` flag (or M3 spawn) flips this to `true`, /// at which point the interpreter's `lwarx`/`stwcx.` arms route /// through the table; otherwise they use the legacy per-`PpcContext` /// reservation fields. enabled: std::sync::atomic::AtomicBool, } impl Default for ReservationTable { fn default() -> Self { Self::new() } } impl ReservationTable { /// Construct a fresh table with all banks empty. pub fn new() -> Self { let mut lines = Vec::with_capacity(NUM_LINES); for _ in 0..NUM_LINES { lines.push(AtomicU64::new(0)); } Self { lines, active_reservers: AtomicU16::new(0), // Start at 1 so the very first reservation gets a non-zero // gen and the packed slot value is non-zero (zero is the // "no reservation" sentinel). next_gen: AtomicU64::new(1), enabled: std::sync::atomic::AtomicBool::new(false), } } /// Activate the table. The interpreter's `lwarx`/`stwcx.` arms will /// route through this table on subsequent dispatches. Idempotent. pub fn enable(&self) { self.enabled .store(true, std::sync::atomic::Ordering::Release); } /// Deactivate the table. The interpreter falls back to per-`PpcContext` /// reservation fields. Idempotent. pub fn disable(&self) { self.enabled .store(false, std::sync::atomic::Ordering::Release); } /// Whether the table is currently active. The interpreter consults /// this on every `lwarx`/`stwcx.` to decide which path runs. pub fn is_enabled(&self) -> bool { self.enabled.load(std::sync::atomic::Ordering::Acquire) } /// True when at least one reservation is currently outstanding. /// Plain `write_u32` consults this to skip the invalidation hook /// when no thread holds a reservation — the common case for /// non-atomic code. #[inline] pub fn has_active_reservers(&self) -> bool { self.active_reservers.load(Ordering::Relaxed) > 0 } /// `lwarx(addr)` — claim a reservation on the line containing `addr`. /// Returns the generation stamped into the slot; the interpreter /// stores this alongside the per-`PpcContext` `has_reservation` bit /// so a subsequent `stwcx.` can verify the same gen still holds. /// /// If a different reservation already occupied the bank, it's /// silently overwritten — that thread's `stwcx.` will fail because /// the slot no longer matches its stamped gen. Matches Xenon /// behavior (a different core's lwarx on the same line displaces /// any prior reservation). pub fn reserve(&self, addr: u32, hw_id: u8) -> u32 { let line = align_to_line(addr); let generation = (self .next_gen .fetch_add(1, Ordering::Relaxed) & 0xFF_FFFF) as u32; let new_raw = pack(line, generation, hw_id); // Release: prior reads of the reservation target should // happen-before any thread that observes the new slot value. let prev = self.lines[hash(line)].swap(new_raw, Ordering::AcqRel); // If the previous slot was non-zero, the displaced reserver is // implicitly invalidated — decrement the active counter for it. // Else, increment for our new reservation. Net effect: the // counter equals the number of *bank slots* with a non-zero // value, which is an upper bound on actual reservers. if prev == 0 { self.active_reservers.fetch_add(1, Ordering::Relaxed); } generation } /// `stwcx.(addr)` — try to commit a reservation. Returns `true` if /// the slot still holds `(line, my_gen, my_hw_id)` (in which case /// it's CAS'd back to zero, releasing the bank), `false` otherwise. /// The data store itself is the caller's responsibility — see /// [`crate::interpreter`]'s `stwcx.` arm. pub fn try_commit(&self, addr: u32, my_gen: u32, my_hw_id: u8) -> bool { let line = align_to_line(addr); let expected = pack(line, my_gen, my_hw_id); match self.lines[hash(line)].compare_exchange( expected, 0, Ordering::AcqRel, Ordering::Relaxed, ) { Ok(_) => { // Successfully released the slot; decrement the active // count. self.active_reservers.fetch_sub(1, Ordering::Relaxed); true } Err(_) => false, } } /// Hook for plain (non-reserving) stores: invalidate any /// reservation on the containing line. Cheap when the bank is /// already empty (single Acquire load + branch). pub fn invalidate_for_write(&self, addr: u32) { let line = align_to_line(addr); let bank = &self.lines[hash(line)]; let prev = bank.load(Ordering::Acquire); if prev == 0 { return; } // Verify the slot still holds a reservation on *this* line // before clearing — hash collisions mean the bank may hold a // reservation on an unrelated line that maps to the same slot. // Real Xenon has the same property (limited L2 associativity); // we mirror it here. A spurious bank match invalidates a // different line's reservation; the affected `stwcx.` retries — // sound, slightly less efficient. if let Some((bank_line, _generation, _hw)) = unpack(prev) { if bank_line != line { // Different line in the same bank — leave it alone (we // chose not to invalidate cross-line collisions to // reduce false-fail noise; real-HW behavior is similar // since L2 associativity sets cross-line constraints). return; } } // CAS-clear the bank if it still holds the value we observed. // If a concurrent `stwcx.` or `reserve` raced with us, the CAS // fails — that's fine; the line slot is now in a different // state and the displaced reservation will be picked up there. if bank .compare_exchange(prev, 0, Ordering::AcqRel, Ordering::Relaxed) .is_ok() { self.active_reservers.fetch_sub(1, Ordering::Relaxed); } } /// Drop a per-`PpcContext` reservation without committing. Called /// when the interpreter clears `has_reservation` due to a /// non-`stwcx.` event (context switch, exception, etc.). Safe to /// call when the table doesn't hold our reservation anymore (the /// CAS simply fails). pub fn release(&self, addr: u32, my_gen: u32, my_hw_id: u8) { let _ = self.try_commit(addr, my_gen, my_hw_id); } } #[cfg(test)] mod tests { use super::*; use std::sync::Arc; use std::thread; #[test] fn pack_unpack_roundtrip() { let raw = pack(0x1000_0000, 42, 5); let (line, generation, hw) = unpack(raw).unwrap(); assert_eq!(line, 0x1000_0000); assert_eq!(generation, 42); assert_eq!(hw, 5); } #[test] fn unpack_zero_is_none() { assert!(unpack(0).is_none()); } #[test] fn reserve_then_commit_succeeds() { let t = ReservationTable::new(); let gn = t.reserve(0x1234, 0); assert!(t.try_commit(0x1234, gn, 0)); // Already released — second commit fails. assert!(!t.try_commit(0x1234, gn, 0)); } #[test] fn other_hw_id_cannot_commit() { let t = ReservationTable::new(); let gn = t.reserve(0x1234, 0); assert!( !t.try_commit(0x1234, gn, 1), "stwcx. from a different hw_id must fail" ); // Original owner can still commit. assert!(t.try_commit(0x1234, gn, 0)); } #[test] fn lwarx_displaces_prior_reservation() { let t = ReservationTable::new(); let g0 = t.reserve(0x1234, 0); // Different HW thread's lwarx on the same line. let g1 = t.reserve(0x1234, 1); // Original reserver's stwcx. fails because the gen changed. assert!(!t.try_commit(0x1234, g0, 0)); // New reserver's stwcx. succeeds. assert!(t.try_commit(0x1234, g1, 1)); } #[test] fn invalidate_clears_matching_reservation() { let t = ReservationTable::new(); let gn = t.reserve(0x1234, 0); t.invalidate_for_write(0x1238); // same line as 0x1234 assert!(!t.try_commit(0x1234, gn, 0)); assert_eq!(t.active_reservers.load(Ordering::Relaxed), 0); } #[test] fn invalidate_different_line_in_same_bank_is_noop() { let t = ReservationTable::new(); // Force a hash collision: addr A and addr B with same hash but // different line addresses. let line_a = 0x0000_1000; let line_b = line_a + ((NUM_LINES as u32) << 7); // +0x80000 → same hash assert_eq!(hash(line_a), hash(line_b)); let gn = t.reserve(line_a, 0); // Invalidating line_b must NOT clear line_a's reservation. t.invalidate_for_write(line_b); assert!(t.try_commit(line_a, gn, 0)); } #[test] fn has_active_reservers_tracks_count() { let t = ReservationTable::new(); assert!(!t.has_active_reservers()); let g0 = t.reserve(0x1000, 0); assert!(t.has_active_reservers()); let g1 = t.reserve(0x2000, 1); assert!(t.has_active_reservers()); t.try_commit(0x1000, g0, 0); assert!(t.has_active_reservers()); t.try_commit(0x2000, g1, 1); assert!(!t.has_active_reservers()); } /// Stress test: 8 host threads each loop reserve+stwcx on the same /// line. Exactly one stwcx per round can win; the others fail and /// retry. The total number of *successful* commits across N /// outer iterations equals N (one winner per round). /// /// This proves the table's mutual-exclusion property: at most one /// thread's stwcx. on a given line can succeed between two events /// that would invalidate the line. #[test] fn concurrent_lwarx_stwcx_serializes() { let t = Arc::new(ReservationTable::new()); const ROUNDS: u32 = 1000; const THREADS: u8 = 8; let total_successes = Arc::new(AtomicU64::new(0)); let mut handles = Vec::new(); for hw_id in 0..THREADS { let t_clone = t.clone(); let s_clone = total_successes.clone(); handles.push( thread::Builder::new() .name(format!("res-stress-{hw_id}")) .spawn(move || { let mut wins = 0u64; for _ in 0..ROUNDS { let gn = t_clone.reserve(0x1234_5678, hw_id); if t_clone.try_commit(0x1234_5678, gn, hw_id) { wins += 1; } } s_clone.fetch_add(wins, Ordering::Relaxed); }) .expect("spawn"), ); } for h in handles { h.join().expect("join"); } let total = total_successes.load(Ordering::Relaxed); // Lower bound: every round had at least one winner — but races // can cause some rounds to have zero (all threads' reservations // got displaced before any could commit). Assert progress: at // least 10% of attempts succeed, and active_reservers is back // to zero. let attempts = ROUNDS as u64 * THREADS as u64; assert!( total > attempts / 10, "expected at least 10% successful commits, got {total}/{attempts}" ); assert_eq!( t.active_reservers.load(Ordering::Relaxed), 0, "all reservations should have been resolved" ); } }