xenia-rs/crates/xenia-cpu/src/reservation.rs

//! Inter-thread reservation table for `lwarx`/`stwcx.` and
//! `ldarx`/`stdcx.`.
//!
//! On real Xenon, each core's `lwarx` places a reservation on a 128-byte
//! cache line; any other CPU's store to the line invalidates the
//! reservation. `stwcx.`'s success depends on the reservation still being
//! valid. Under M3's per-HW-thread parallelism, we need an inter-thread
//! mechanism for the same guarantee.
//!
//! M2 introduces the table behind a runtime `reservations_enabled` flag
//! (default `false`). When the flag is `false`, the interpreter's
//! existing per-`PpcContext` `reserved_line`/`has_reservation` fields are
//! used as-is — no inter-thread tracking. M3 flips the flag on once the
//! per-HW-thread host threads are spawning.
//!
//! ## Design
//!
//! - **Banked AtomicU64 array** of [`NUM_LINES`] entries (4096 × 8 B =
//!   32 KiB total). Each entry packs `(line_address, generation,
//!   hw_id)`. A zero value means "no reservation on this bank".
//! - **Hash function**: `(line >> 7) & (NUM_LINES - 1)`. Different lines
//!   that map to the same bank conservatively invalidate each other's
//!   reservations — sound (real Xenon's L2 has finite associativity and
//!   has the same property), at the cost of slightly more `stwcx.`
//!   failures than a perfect-mapping table would produce.
//! - **`active_reservers: AtomicU16`** — a fast-path counter
//!   incremented by every `lwarx` and decremented when its reservation is
//!   either committed or invalidated. `write_u32` checks this with a
//!   single `Relaxed` load; when zero (the common case in code that
//!   doesn't use atomics), the invalidation hook is a one-instruction
//!   skip.
//! - **Generation counter**: monotonic across all reservations,
//!   incremented atomically. 24 bits of generation packed in the slot
//!   means 16 M reuses per slot before wraparound; at multi-million
//!   reservations/sec sustained that's still many seconds, and a
//!   stale-gen `stwcx.` simply fails (sound, not livelocking).
//!
//! ## Invariants
//!
//! 1. A `stwcx.(addr)` succeeds only if the line slot still holds the
//!    same `(line, gen, hw_id)` triple the reserver stamped at `lwarx`.
//! 2. Any plain store to a reserved line invalidates it (slot CASed to
//!    zero). Hash-collision side-effect: a store to a different line
//!    that maps to the same bank also invalidates — guests that observe
//!    a `stwcx.` failure simply retry, so this is correctness-preserving.
//! 3. `stwcx.` from a different `hw_id` than the reserver fails even if
//!    the line and gen would otherwise match — only the originating HW
//!    thread can commit its own reservation.
//!
//! Memory ordering: all CAS / store operations on the line slot use
//! `AcqRel`; readers use `Acquire`. The store inside `stwcx.`'s payload
//! itself (the actual data write) is the caller's responsibility — see
//! [`crate::interpreter`]'s `stwcx.` arm.

use std::sync::atomic::{AtomicU16, AtomicU64, Ordering};

/// Real Xenon L2 cache-line size — the granule a reservation covers.
pub const LINE_BYTES: u32 = 0x80;
/// Mask to align an address to a cache-line boundary.
pub const LINE_MASK: u32 = !(LINE_BYTES - 1);
/// Number of bank entries in the reservation table. Power of two so the
/// hash is a single AND. 32 KiB total at 8 B per entry.
pub const NUM_LINES: usize = 4096;
const HASH_MASK: u32 = (NUM_LINES as u32) - 1;

/// Pack `(line_addr, generation, hw_id)` into a single u64. The packed
/// layout is:
///   bits 63..32: line address (we only need the high bits since the
///                low 7 are always zero — reserved range is line-aligned)
///   bits 31..8:  24-bit generation
///   bits 7..0:   8-bit `hw_id`
///
/// A packed value of `0` means "no reservation". Since we never reserve
/// on guest virtual address `0` (the page is unmapped) and the
/// generation increments from `1`, zero is a safe sentinel.
#[inline]
pub fn pack(line_addr: u32, generation: u32, hw_id: u8) -> u64 {
    debug_assert!(line_addr & !LINE_MASK == 0, "line_addr must be line-aligned");
    debug_assert!(generation < (1 << 24), "generation must fit in 24 bits");
    ((line_addr as u64) << 32)
        | ((generation as u64 & 0xFF_FFFF) << 8)
        | (hw_id as u64)
}

/// Inverse of [`pack`]. Returns `None` if the value is the zero sentinel
/// (no reservation).
#[inline]
pub fn unpack(raw: u64) -> Option<(u32, u32, u8)> {
    if raw == 0 {
        return None;
    }
    let line = (raw >> 32) as u32;
    let generation = ((raw >> 8) & 0xFF_FFFF) as u32;
    let hw_id = (raw & 0xFF) as u8;
    Some((line, generation, hw_id))
}

#[inline]
fn hash(line_addr: u32) -> usize {
    ((line_addr >> 7) & HASH_MASK) as usize
}

#[inline]
fn align_to_line(addr: u32) -> u32 {
    addr & LINE_MASK
}

/// Banked reservation table shared across all emulated HW threads. Built
/// once per emulation instance; lives behind an `Arc` so worker host
/// threads (M3) can hold their own clones without lifetime gymnastics.
pub struct ReservationTable {
    lines: Vec<AtomicU64>,
    active_reservers: AtomicU16,
    next_gen: AtomicU64,
    /// Runtime activation flag. Default `false`. M2.8's
    /// `--reservations-table` flag (or M3 spawn) flips this to `true`,
    /// at which point the interpreter's `lwarx`/`stwcx.` arms route
    /// through the table; otherwise they use the legacy per-`PpcContext`
    /// reservation fields.
    enabled: std::sync::atomic::AtomicBool,
}

impl Default for ReservationTable {
    fn default() -> Self {
        Self::new()
    }
}

impl ReservationTable {
    /// Construct a fresh table with all banks empty.
    pub fn new() -> Self {
        let mut lines = Vec::with_capacity(NUM_LINES);
        for _ in 0..NUM_LINES {
            lines.push(AtomicU64::new(0));
        }
        Self {
            lines,
            active_reservers: AtomicU16::new(0),
            // Start at 1 so the very first reservation gets a non-zero
            // gen and the packed slot value is non-zero (zero is the
            // "no reservation" sentinel).
            next_gen: AtomicU64::new(1),
            enabled: std::sync::atomic::AtomicBool::new(false),
        }
    }

    /// Activate the table. The interpreter's `lwarx`/`stwcx.` arms will
    /// route through this table on subsequent dispatches. Idempotent.
    pub fn enable(&self) {
        self.enabled
            .store(true, std::sync::atomic::Ordering::Release);
    }

    /// Deactivate the table. The interpreter falls back to per-`PpcContext`
    /// reservation fields. Idempotent.
    pub fn disable(&self) {
        self.enabled
            .store(false, std::sync::atomic::Ordering::Release);
    }

    /// Whether the table is currently active. The interpreter consults
    /// this on every `lwarx`/`stwcx.` to decide which path runs.
    pub fn is_enabled(&self) -> bool {
        self.enabled.load(std::sync::atomic::Ordering::Acquire)
    }

    /// True when at least one reservation is currently outstanding.
    /// Plain `write_u32` consults this to skip the invalidation hook
    /// when no thread holds a reservation — the common case for
    /// non-atomic code.
    #[inline]
    pub fn has_active_reservers(&self) -> bool {
        self.active_reservers.load(Ordering::Relaxed) > 0
    }

    /// `lwarx(addr)` — claim a reservation on the line containing `addr`.
    /// Returns the generation stamped into the slot; the interpreter
    /// stores this alongside the per-`PpcContext` `has_reservation` bit
    /// so a subsequent `stwcx.` can verify the same gen still holds.
    ///
    /// If a different reservation already occupied the bank, it's
    /// silently overwritten — that thread's `stwcx.` will fail because
    /// the slot no longer matches its stamped gen. Matches Xenon
    /// behavior (a different core's lwarx on the same line displaces
    /// any prior reservation).
    pub fn reserve(&self, addr: u32, hw_id: u8) -> u32 {
        let line = align_to_line(addr);
        let generation = (self
            .next_gen
            .fetch_add(1, Ordering::Relaxed)
            & 0xFF_FFFF) as u32;
        let new_raw = pack(line, generation, hw_id);
        // Release: prior reads of the reservation target should
        // happen-before any thread that observes the new slot value.
        let prev = self.lines[hash(line)].swap(new_raw, Ordering::AcqRel);
        // If the previous slot was non-zero, the displaced reserver is
        // implicitly invalidated — decrement the active counter for it.
        // Else, increment for our new reservation. Net effect: the
        // counter equals the number of *bank slots* with a non-zero
        // value, which is an upper bound on actual reservers.
        if prev == 0 {
            self.active_reservers.fetch_add(1, Ordering::Relaxed);
        }
        generation
    }

    /// `stwcx.(addr)` — try to commit a reservation. Returns `true` if
    /// the slot still holds `(line, my_gen, my_hw_id)` (in which case
    /// it's CAS'd back to zero, releasing the bank), `false` otherwise.
    /// The data store itself is the caller's responsibility — see
    /// [`crate::interpreter`]'s `stwcx.` arm.
    pub fn try_commit(&self, addr: u32, my_gen: u32, my_hw_id: u8) -> bool {
        let line = align_to_line(addr);
        let expected = pack(line, my_gen, my_hw_id);
        match self.lines[hash(line)].compare_exchange(
            expected,
            0,
            Ordering::AcqRel,
            Ordering::Relaxed,
        ) {
            Ok(_) => {
                // Successfully released the slot; decrement the active
                // count.
                self.active_reservers.fetch_sub(1, Ordering::Relaxed);
                true
            }
            Err(_) => false,
        }
    }

    /// Hook for plain (non-reserving) stores: invalidate any
    /// reservation on the containing line. Cheap when the bank is
    /// already empty (single Acquire load + branch).
    pub fn invalidate_for_write(&self, addr: u32) {
        let line = align_to_line(addr);
        let bank = &self.lines[hash(line)];
        let prev = bank.load(Ordering::Acquire);
        if prev == 0 {
            return;
        }
        // Verify the slot still holds a reservation on *this* line
        // before clearing — hash collisions mean the bank may hold a
        // reservation on an unrelated line that maps to the same slot.
        // Real Xenon has the same property (limited L2 associativity);
        // we mirror it here. A spurious bank match invalidates a
        // different line's reservation; the affected `stwcx.` retries —
        // sound, slightly less efficient.
        if let Some((bank_line, _generation, _hw)) = unpack(prev) {
            if bank_line != line {
                // Different line in the same bank — leave it alone (we
                // chose not to invalidate cross-line collisions to
                // reduce false-fail noise; real-HW behavior is similar
                // since L2 associativity sets cross-line constraints).
                return;
            }
        }
        // CAS-clear the bank if it still holds the value we observed.
        // If a concurrent `stwcx.` or `reserve` raced with us, the CAS
        // fails — that's fine; the line slot is now in a different
        // state and the displaced reservation will be picked up there.
        if bank
            .compare_exchange(prev, 0, Ordering::AcqRel, Ordering::Relaxed)
            .is_ok()
        {
            self.active_reservers.fetch_sub(1, Ordering::Relaxed);
        }
    }

    /// Drop a per-`PpcContext` reservation without committing. Called
    /// when the interpreter clears `has_reservation` due to a
    /// non-`stwcx.` event (context switch, exception, etc.). Safe to
    /// call when the table doesn't hold our reservation anymore (the
    /// CAS simply fails).
    pub fn release(&self, addr: u32, my_gen: u32, my_hw_id: u8) {
        let _ = self.try_commit(addr, my_gen, my_hw_id);
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::sync::Arc;
    use std::thread;

    #[test]
    fn pack_unpack_roundtrip() {
        let raw = pack(0x1000_0000, 42, 5);
        let (line, generation, hw) = unpack(raw).unwrap();
        assert_eq!(line, 0x1000_0000);
        assert_eq!(generation, 42);
        assert_eq!(hw, 5);
    }

    #[test]
    fn unpack_zero_is_none() {
        assert!(unpack(0).is_none());
    }

    #[test]
    fn reserve_then_commit_succeeds() {
        let t = ReservationTable::new();
        let gn = t.reserve(0x1234, 0);
        assert!(t.try_commit(0x1234, gn, 0));
        // Already released — second commit fails.
        assert!(!t.try_commit(0x1234, gn, 0));
    }

    #[test]
    fn other_hw_id_cannot_commit() {
        let t = ReservationTable::new();
        let gn = t.reserve(0x1234, 0);
        assert!(
            !t.try_commit(0x1234, gn, 1),
            "stwcx. from a different hw_id must fail"
        );
        // Original owner can still commit.
        assert!(t.try_commit(0x1234, gn, 0));
    }

    #[test]
    fn lwarx_displaces_prior_reservation() {
        let t = ReservationTable::new();
        let g0 = t.reserve(0x1234, 0);
        // Different HW thread's lwarx on the same line.
        let g1 = t.reserve(0x1234, 1);
        // Original reserver's stwcx. fails because the gen changed.
        assert!(!t.try_commit(0x1234, g0, 0));
        // New reserver's stwcx. succeeds.
        assert!(t.try_commit(0x1234, g1, 1));
    }

    #[test]
    fn invalidate_clears_matching_reservation() {
        let t = ReservationTable::new();
        let gn = t.reserve(0x1234, 0);
        t.invalidate_for_write(0x1238); // same line as 0x1234
        assert!(!t.try_commit(0x1234, gn, 0));
        assert_eq!(t.active_reservers.load(Ordering::Relaxed), 0);
    }

    #[test]
    fn invalidate_different_line_in_same_bank_is_noop() {
        let t = ReservationTable::new();
        // Force a hash collision: addr A and addr B with same hash but
        // different line addresses.
        let line_a = 0x0000_1000;
        let line_b = line_a + ((NUM_LINES as u32) << 7); // +0x80000 → same hash
        assert_eq!(hash(line_a), hash(line_b));
        let gn = t.reserve(line_a, 0);
        // Invalidating line_b must NOT clear line_a's reservation.
        t.invalidate_for_write(line_b);
        assert!(t.try_commit(line_a, gn, 0));
    }

    #[test]
    fn has_active_reservers_tracks_count() {
        let t = ReservationTable::new();
        assert!(!t.has_active_reservers());
        let g0 = t.reserve(0x1000, 0);
        assert!(t.has_active_reservers());
        let g1 = t.reserve(0x2000, 1);
        assert!(t.has_active_reservers());
        t.try_commit(0x1000, g0, 0);
        assert!(t.has_active_reservers());
        t.try_commit(0x2000, g1, 1);
        assert!(!t.has_active_reservers());
    }

    /// Stress test: 8 host threads each loop reserve+stwcx on the same
    /// line. Exactly one stwcx per round can win; the others fail and
    /// retry. The total number of *successful* commits across N
    /// outer iterations equals N (one winner per round).
    ///
    /// This proves the table's mutual-exclusion property: at most one
    /// thread's stwcx. on a given line can succeed between two events
    /// that would invalidate the line.
    #[test]
    fn concurrent_lwarx_stwcx_serializes() {
        let t = Arc::new(ReservationTable::new());
        const ROUNDS: u32 = 1000;
        const THREADS: u8 = 8;
        let total_successes = Arc::new(AtomicU64::new(0));

        let mut handles = Vec::new();
        for hw_id in 0..THREADS {
            let t_clone = t.clone();
            let s_clone = total_successes.clone();
            handles.push(
                thread::Builder::new()
                    .name(format!("res-stress-{hw_id}"))
                    .spawn(move || {
                        let mut wins = 0u64;
                        for _ in 0..ROUNDS {
                            let gn = t_clone.reserve(0x1234_5678, hw_id);
                            if t_clone.try_commit(0x1234_5678, gn, hw_id) {
                                wins += 1;
                            }
                        }
                        s_clone.fetch_add(wins, Ordering::Relaxed);
                    })
                    .expect("spawn"),
            );
        }
        for h in handles {
            h.join().expect("join");
        }
        let total = total_successes.load(Ordering::Relaxed);
        // Lower bound: every round had at least one winner — but races
        // can cause some rounds to have zero (all threads' reservations
        // got displaced before any could commit). Assert progress: at
        // least 10% of attempts succeed, and active_reservers is back
        // to zero.
        let attempts = ROUNDS as u64 * THREADS as u64;
        assert!(
            total > attempts / 10,
            "expected at least 10% successful commits, got {total}/{attempts}"
        );
        assert_eq!(
            t.active_reservers.load(Ordering::Relaxed),
            0,
            "all reservations should have been resolved"
        );
    }
}