diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs
index a31754d..2c504fd 100644
--- a/crates/xenia-app/src/main.rs
+++ b/crates/xenia-app/src/main.rs
@@ -2740,7 +2740,7 @@ fn worker_prologue(
     // the helper, no overhead on the hot path.
     kernel.fire_ctor_probe_if_match(hw_id, mem);
     kernel.fire_branch_probe_if_match(hw_id);
-    kernel.fire_lr_trace_if_match(hw_id);
+    kernel.fire_lr_trace_if_match(hw_id, mem);
 
     if mem.has_mem_watch() {
         let ctx = kernel.scheduler.ctx(hw_id);
diff --git a/crates/xenia-kernel/src/state.rs b/crates/xenia-kernel/src/state.rs
index 4f01105..65af299 100644
--- a/crates/xenia-kernel/src/state.rs
+++ b/crates/xenia-kernel/src/state.rs
@@ -1509,7 +1509,7 @@ impl KernelState {
     /// `self.lr_trace_pcs`, emit one JSONL record. Format mirrors what
     /// xenia-canary's `--log_lr_on_pc` patch emits, plus the cycle
     /// counter. Read-only; lockstep digest unaffected.
-    pub fn fire_lr_trace_if_match(&self, hw_id: u8) {
+    pub fn fire_lr_trace_if_match(&self, hw_id: u8, mem: &GuestMemory) {
         if self.lr_trace_pcs.is_empty() {
             return;
         }
@@ -1518,6 +1518,155 @@ impl KernelState {
         if !self.lr_trace_pcs.contains(&pc) {
             return;
         }
+        // 2.AT THROWAWAY DEREF PROBE: at the opt_callback block HEAD
+        // (PC 0x822F2248 — the bcctrl at 0x822F2278 is mid-block and the
+        // block-cache fast path never lands the live ctx.pc on it, so we
+        // resolve at the head where the object/vtable are already stable),
+        // resolve the vtable+0x1C method PC so we can lr-trace it.
+        // Read-only (loads only). Logs first 8 hits.
+        if pc == 0x822F2248 {
+            use std::sync::atomic::{AtomicU32, Ordering};
+            static DEREF_HITS: AtomicU32 = AtomicU32::new(0);
+            let n = DEREF_HITS.fetch_add(1, Ordering::Relaxed);
+            if n < 8 {
+                let obj = mem.read_u32(0x828E1F08);
+                let vtable = mem.read_u32(obj);
+                let method = mem.read_u32(vtable.wrapping_add(0x1C));
+                let r4 = ctx.gpr[4] as u32;
+                println!(
+                    "DEREF-PROBE pc=0x822f2248 hit={} obj=0x{:08x} vtable=0x{:08x} method=0x{:08x} r4=0x{:08x}",
+                    n, obj, vtable, method, r4,
+                );
+            }
+        }
+        // 2.AT LEVEL-2 PROBE: method 0x821753c8 is itself a virtual
+        // trampoline:  r11=[r3+8]; r11=[r11+44]; if r11==0 beqlr (no-op!);
+        // r3=[r11]; r11=[[r3]+48]; bctr. Capture this (r3), field_8,
+        // next_obj=[field_8+44] (the NULL-check target), and final
+        // vtable+0x30 method. Read-only. First 8 hits.
+        if pc == 0x821753c8 {
+            use std::sync::atomic::{AtomicU32, Ordering};
+            static L2_HITS: AtomicU32 = AtomicU32::new(0);
+            let n = L2_HITS.fetch_add(1, Ordering::Relaxed);
+            if n < 8 {
+                let this = ctx.gpr[3] as u32;
+                let field8 = mem.read_u32(this.wrapping_add(8));
+                let next_obj = mem.read_u32(field8.wrapping_add(44));
+                let (l2_obj, l2_vtable, l2_method) = if next_obj != 0 {
+                    let o = mem.read_u32(next_obj);
+                    let vt = mem.read_u32(o);
+                    let m = mem.read_u32(vt.wrapping_add(0x30));
+                    (o, vt, m)
+                } else {
+                    (0, 0, 0)
+                };
+                println!(
+                    "L2-PROBE pc=0x821753c8 hit={} this=0x{:08x} field8=0x{:08x} next_obj=0x{:08x} l2_obj=0x{:08x} l2_vtable=0x{:08x} l2_method=0x{:08x}",
+                    n, this, field8, next_obj, l2_obj, l2_vtable, l2_method,
+                );
+            }
+        }
+        // 2.BC THROWAWAY PROBE A: right after `bl sub_822F13B0` returns
+        // inside opt_callback sub_822F2248 (block continues at 0x822F22D0
+        // after the enqueue). r31 = the work-object returned. The branch at
+        // 0x822F22D4 gates on [r31+8] (state field) which selects which of
+        // the two events ([r31+0] or [r31+4]) gets NtSetEvent'd. Capture
+        // the state field + both candidate handles + the queue head/tail at
+        // +84/+88 of the GLOBAL object [ [0x828E1F08]... actually the global
+        // at [0x828F+14404] ]. Read-only. First 12 hits.
+        if pc == 0x822F22D0 {
+            use std::sync::atomic::{AtomicU32, Ordering};
+            static P_A: AtomicU32 = AtomicU32::new(0);
+            let n = P_A.fetch_add(1, Ordering::Relaxed);
+            if n < 12 {
+                // At block head 0x822F22D0 the `or r31,r3,r3` has NOT yet run,
+                // so gpr[31] is stale (incoming `this`). The bl to sub_822F13B0
+                // just returned, so the FRESH object ptr is in gpr[3].
+                let r31 = ctx.gpr[3] as u32; // = r3 returned from sub_822F13B0 (global 0x828f3844)
+                let state = mem.read_u32(r31.wrapping_add(8));
+                let h0 = mem.read_u32(r31.wrapping_add(0));
+                let h1 = mem.read_u32(r31.wrapping_add(4));
+                let q84 = mem.read_u32(r31.wrapping_add(84));
+                let q88 = mem.read_u32(r31.wrapping_add(88));
+                let tid = self.scheduler.tid(hw_id).unwrap_or(0);
+                println!(
+                    "PROBE-A pc=0x822f22d0 hit={} tid={} obj=0x{:08x} state(+8)=0x{:08x} h0(+0)=0x{:08x} h1(+4)=0x{:08x} q84=0x{:08x} q88=0x{:08x}",
+                    n, tid, r31, state, h0, h1, q84, q88,
+                );
+            }
+        }
+        // 2.BC THROWAWAY PROBE B: at the NtSetEvent wrapper sub_824AA2F0
+        // head. opt_callback reaches it via 0x822F22F0 (lr=0x822f22f4) or
+        // 0x822F2300 (lr=0x822f2304). r3 = the event HANDLE being signaled.
+        // This is the decisive "does ours reach NtSetEvent from opt_callback
+        // and on which handle (0x10e8?)" check. Read-only. First 16 hits +
+        // tally of calls reached from the opt_callback lrs.
+        if pc == 0x824AA2F0 {
+            use std::sync::atomic::{AtomicU32, Ordering};
+            static P_B: AtomicU32 = AtomicU32::new(0);
+            static P_B_OPTCB: AtomicU32 = AtomicU32::new(0);
+            let lr = ctx.lr as u32;
+            let from_optcb = lr == 0x822F22F4 || lr == 0x822F2304;
+            if from_optcb {
+                P_B_OPTCB.fetch_add(1, Ordering::Relaxed);
+            }
+            let n = P_B.fetch_add(1, Ordering::Relaxed);
+            if n < 16 || from_optcb {
+                let tid = self.scheduler.tid(hw_id).unwrap_or(0);
+                let handle = ctx.gpr[3] as u32;
+                println!(
+                    "PROBE-B pc=0x824aa2f0(NtSetEvent-wrapper) hit={} tid={} handle=0x{:08x} lr=0x{:08x} from_optcb={} optcb_total={}",
+                    n, tid, handle, lr, from_optcb,
+                    P_B_OPTCB.load(Ordering::Relaxed),
+                );
+            }
+        }
+        // 2.BC THROWAWAY PROBE C: at 0x824ac574 (bl NtWaitForSingleObjectEx)
+        // r3 = the handle tid=1 wedges on. Capture which handle + tid. This
+        // is the consumer side: does opt_callback's signalled handle (0x108c
+        // per PROBE-B) EQUAL the handle the waiter blocks on? Read-only.
+        // Logs first 8 distinct + a tally per handle. First 24 hits.
+        if pc == 0x824AC574 {
+            use std::sync::atomic::{AtomicU32, Ordering};
+            static P_C: AtomicU32 = AtomicU32::new(0);
+            // Per-handle tally for the two events of interest + 0x10e8.
+            static W_108C: AtomicU32 = AtomicU32::new(0);
+            static W_1090: AtomicU32 = AtomicU32::new(0);
+            static W_10E8: AtomicU32 = AtomicU32::new(0);
+            static W_108C_T1: AtomicU32 = AtomicU32::new(0);
+            static W_10E8_T1: AtomicU32 = AtomicU32::new(0);
+            let tid = self.scheduler.tid(hw_id).unwrap_or(0);
+            let handle = ctx.gpr[3] as u32;
+            match handle {
+                0x108c => {
+                    W_108C.fetch_add(1, Ordering::Relaxed);
+                    if tid == 1 { W_108C_T1.fetch_add(1, Ordering::Relaxed); }
+                }
+                0x1090 => { W_1090.fetch_add(1, Ordering::Relaxed); }
+                0x10e8 => {
+                    W_10E8.fetch_add(1, Ordering::Relaxed);
+                    if tid == 1 { W_10E8_T1.fetch_add(1, Ordering::Relaxed); }
+                }
+                _ => {}
+            }
+            let n = P_C.fetch_add(1, Ordering::Relaxed);
+            if n < 24 {
+                println!(
+                    "PROBE-C pc=0x824ac574(NtWaitForSingleObjectEx) hit={} tid={} wait_handle=0x{:08x}",
+                    n, tid, handle,
+                );
+            }
+            // Periodic tally dump.
+            if n % 2000 == 0 {
+                println!(
+                    "PROBE-C-TALLY n={} waits_on: 0x108c={}(tid1={}) 0x1090={} 0x10e8={}(tid1={})",
+                    n,
+                    W_108C.load(Ordering::Relaxed), W_108C_T1.load(Ordering::Relaxed),
+                    W_1090.load(Ordering::Relaxed),
+                    W_10E8.load(Ordering::Relaxed), W_10E8_T1.load(Ordering::Relaxed),
+                );
+            }
+        }
         let tid = self.scheduler.tid(hw_id).unwrap_or(0);
         let r3 = ctx.gpr[3] as u32;
         let r4 = ctx.gpr[4] as u32;