Compare commits
1 Commits
iterate-2A
...
iterate-2A
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d293f1bcaf |
@@ -2465,20 +2465,10 @@ fn coord_pre_round(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if kernel.xaudio_tick_enabled {
|
if kernel.xaudio_tick_enabled {
|
||||||
let fired = if kernel.parallel_active {
|
if kernel.parallel_active {
|
||||||
kernel.xaudio.tick_wallclock()
|
kernel.xaudio.tick_wallclock();
|
||||||
} else {
|
} else {
|
||||||
kernel.xaudio.tick_instr(stats.instruction_count)
|
kernel.xaudio.tick_instr(stats.instruction_count);
|
||||||
};
|
|
||||||
// AUDIT-2AU Option β: on each audio period, re-signal the XAudio
|
|
||||||
// render loop's captured frame-event pair (buffer-ready /
|
|
||||||
// frame-done). Emulates canary's host XAudio2 OnBufferEnd firing
|
|
||||||
// those events every period; without it ours's render loop
|
|
||||||
// (tid=11) wedges on its second KeWait forever and starves the
|
|
||||||
// tid=9/10 mixers + tid=12 DPC downstream (2.AS cascade). Gated
|
|
||||||
// by the same instruction-count tick => deterministic.
|
|
||||||
if fired {
|
|
||||||
xenia_kernel::exports::pulse_xaudio_frame_events(kernel);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2750,7 +2740,7 @@ fn worker_prologue(
|
|||||||
// the helper, no overhead on the hot path.
|
// the helper, no overhead on the hot path.
|
||||||
kernel.fire_ctor_probe_if_match(hw_id, mem);
|
kernel.fire_ctor_probe_if_match(hw_id, mem);
|
||||||
kernel.fire_branch_probe_if_match(hw_id);
|
kernel.fire_branch_probe_if_match(hw_id);
|
||||||
kernel.fire_lr_trace_if_match(hw_id);
|
kernel.fire_lr_trace_if_match(hw_id, mem);
|
||||||
|
|
||||||
if mem.has_mem_watch() {
|
if mem.has_mem_watch() {
|
||||||
let ctx = kernel.scheduler.ctx(hw_id);
|
let ctx = kernel.scheduler.ctx(hw_id);
|
||||||
|
|||||||
@@ -5346,27 +5346,6 @@ fn emit_signal_match_if_waiters(
|
|||||||
crate::event_log::emit_signal_match(tid, cycle, signal_call, target_handle, n, &tids);
|
crate::event_log::emit_signal_match(tid, cycle, signal_call, target_handle, n, &tids);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// AUDIT-2AU Option β: re-signal the XAudio render loop's frame-event
|
|
||||||
/// pair, emulating the host XAudio2 OnBufferEnd callback firing once per
|
|
||||||
/// audio period. Called from the round prologue gated by the same
|
|
||||||
/// instruction-count audio cadence that drives `tick_instr`, so timing
|
|
||||||
/// is deterministic (never host_ns). Mirrors `ke_set_event`'s signal +
|
|
||||||
/// wake sequence for each captured event handle (see
|
|
||||||
/// `do_wait_multiple` capture site + `XAudioState::frame_events`).
|
|
||||||
pub fn pulse_xaudio_frame_events(state: &mut KernelState) {
|
|
||||||
if state.xaudio.frame_events.is_empty() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
let events = state.xaudio.frame_events.clone();
|
|
||||||
for h in events {
|
|
||||||
if let Some(KernelObject::Event { signaled, .. }) = state.objects.get_mut(&h) {
|
|
||||||
*signaled = true;
|
|
||||||
emit_signal_match_if_waiters(state, "XAudioFramePulse", h);
|
|
||||||
wake_eligible_waiters(state, h);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn ke_set_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) {
|
fn ke_set_event(ctx: &mut PpcContext, mem: &GuestMemory, state: &mut KernelState) {
|
||||||
// r3 = PKEVENT on Ke* (guest pointer). See `ensure_dispatcher_object`
|
// r3 = PKEVENT on Ke* (guest pointer). See `ensure_dispatcher_object`
|
||||||
// for why we need the lazy-shadow step here.
|
// for why we need the lazy-shadow step here.
|
||||||
@@ -5673,30 +5652,6 @@ fn do_wait_multiple(
|
|||||||
Some(None) => None,
|
Some(None) => None,
|
||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
// AUDIT-2AU Option β: capture the XAudio render loop's frame-event
|
|
||||||
// pair at the wait site. Sylpheed's render-driver thread (tid=11,
|
|
||||||
// entry 0x824d2a94 = canary tid=4) blocks here on a WaitAny over two
|
|
||||||
// guest-address Events (the "buffer ready" manual-reset + "frame
|
|
||||||
// done" auto-reset pair). In canary these are signaled every audio
|
|
||||||
// period by the host XAudio2 OnBufferEnd callback; in ours nothing
|
|
||||||
// signals them after the first fast-path consumes the auto-reset
|
|
||||||
// member, so the loop wedges forever (2.AL). Record the pair (no
|
|
||||||
// hardcoded addresses) so the round-prologue audio-cadence ticker
|
|
||||||
// re-signals them. Discriminator: a *multi*-handle WaitAny whose
|
|
||||||
// members are all guest-address Events, with at least one XAudio
|
|
||||||
// client registered — tid=2's lone guest-Event wait goes through
|
|
||||||
// do_wait_single, so this won't catch it.
|
|
||||||
if !wait_all
|
|
||||||
&& handles.len() >= 2
|
|
||||||
&& state.xaudio.any_registered()
|
|
||||||
&& handles.iter().all(|&h| {
|
|
||||||
h >= 0x8000_0000 && matches!(state.objects.get(&h), Some(KernelObject::Event { .. }))
|
|
||||||
})
|
|
||||||
{
|
|
||||||
for &h in &handles {
|
|
||||||
state.xaudio.note_frame_event(h);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let current_ref = state.scheduler.current_ref();
|
let current_ref = state.scheduler.current_ref();
|
||||||
for &h in &handles {
|
for &h in &handles {
|
||||||
handle_enqueue_waiter(state, h, current_ref);
|
handle_enqueue_waiter(state, h, current_ref);
|
||||||
|
|||||||
@@ -1509,7 +1509,7 @@ impl KernelState {
|
|||||||
/// `self.lr_trace_pcs`, emit one JSONL record. Format mirrors what
|
/// `self.lr_trace_pcs`, emit one JSONL record. Format mirrors what
|
||||||
/// xenia-canary's `--log_lr_on_pc` patch emits, plus the cycle
|
/// xenia-canary's `--log_lr_on_pc` patch emits, plus the cycle
|
||||||
/// counter. Read-only; lockstep digest unaffected.
|
/// counter. Read-only; lockstep digest unaffected.
|
||||||
pub fn fire_lr_trace_if_match(&self, hw_id: u8) {
|
pub fn fire_lr_trace_if_match(&self, hw_id: u8, mem: &GuestMemory) {
|
||||||
if self.lr_trace_pcs.is_empty() {
|
if self.lr_trace_pcs.is_empty() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1518,6 +1518,155 @@ impl KernelState {
|
|||||||
if !self.lr_trace_pcs.contains(&pc) {
|
if !self.lr_trace_pcs.contains(&pc) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// 2.AT THROWAWAY DEREF PROBE: at the opt_callback block HEAD
|
||||||
|
// (PC 0x822F2248 — the bcctrl at 0x822F2278 is mid-block and the
|
||||||
|
// block-cache fast path never lands the live ctx.pc on it, so we
|
||||||
|
// resolve at the head where the object/vtable are already stable),
|
||||||
|
// resolve the vtable+0x1C method PC so we can lr-trace it.
|
||||||
|
// Read-only (loads only). Logs first 8 hits.
|
||||||
|
if pc == 0x822F2248 {
|
||||||
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
|
static DEREF_HITS: AtomicU32 = AtomicU32::new(0);
|
||||||
|
let n = DEREF_HITS.fetch_add(1, Ordering::Relaxed);
|
||||||
|
if n < 8 {
|
||||||
|
let obj = mem.read_u32(0x828E1F08);
|
||||||
|
let vtable = mem.read_u32(obj);
|
||||||
|
let method = mem.read_u32(vtable.wrapping_add(0x1C));
|
||||||
|
let r4 = ctx.gpr[4] as u32;
|
||||||
|
println!(
|
||||||
|
"DEREF-PROBE pc=0x822f2248 hit={} obj=0x{:08x} vtable=0x{:08x} method=0x{:08x} r4=0x{:08x}",
|
||||||
|
n, obj, vtable, method, r4,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2.AT LEVEL-2 PROBE: method 0x821753c8 is itself a virtual
|
||||||
|
// trampoline: r11=[r3+8]; r11=[r11+44]; if r11==0 beqlr (no-op!);
|
||||||
|
// r3=[r11]; r11=[[r3]+48]; bctr. Capture this (r3), field_8,
|
||||||
|
// next_obj=[field_8+44] (the NULL-check target), and final
|
||||||
|
// vtable+0x30 method. Read-only. First 8 hits.
|
||||||
|
if pc == 0x821753c8 {
|
||||||
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
|
static L2_HITS: AtomicU32 = AtomicU32::new(0);
|
||||||
|
let n = L2_HITS.fetch_add(1, Ordering::Relaxed);
|
||||||
|
if n < 8 {
|
||||||
|
let this = ctx.gpr[3] as u32;
|
||||||
|
let field8 = mem.read_u32(this.wrapping_add(8));
|
||||||
|
let next_obj = mem.read_u32(field8.wrapping_add(44));
|
||||||
|
let (l2_obj, l2_vtable, l2_method) = if next_obj != 0 {
|
||||||
|
let o = mem.read_u32(next_obj);
|
||||||
|
let vt = mem.read_u32(o);
|
||||||
|
let m = mem.read_u32(vt.wrapping_add(0x30));
|
||||||
|
(o, vt, m)
|
||||||
|
} else {
|
||||||
|
(0, 0, 0)
|
||||||
|
};
|
||||||
|
println!(
|
||||||
|
"L2-PROBE pc=0x821753c8 hit={} this=0x{:08x} field8=0x{:08x} next_obj=0x{:08x} l2_obj=0x{:08x} l2_vtable=0x{:08x} l2_method=0x{:08x}",
|
||||||
|
n, this, field8, next_obj, l2_obj, l2_vtable, l2_method,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2.BC THROWAWAY PROBE A: right after `bl sub_822F13B0` returns
|
||||||
|
// inside opt_callback sub_822F2248 (block continues at 0x822F22D0
|
||||||
|
// after the enqueue). r31 = the work-object returned. The branch at
|
||||||
|
// 0x822F22D4 gates on [r31+8] (state field) which selects which of
|
||||||
|
// the two events ([r31+0] or [r31+4]) gets NtSetEvent'd. Capture
|
||||||
|
// the state field + both candidate handles + the queue head/tail at
|
||||||
|
// +84/+88 of the GLOBAL object [ [0x828E1F08]... actually the global
|
||||||
|
// at [0x828F+14404] ]. Read-only. First 12 hits.
|
||||||
|
if pc == 0x822F22D0 {
|
||||||
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
|
static P_A: AtomicU32 = AtomicU32::new(0);
|
||||||
|
let n = P_A.fetch_add(1, Ordering::Relaxed);
|
||||||
|
if n < 12 {
|
||||||
|
// At block head 0x822F22D0 the `or r31,r3,r3` has NOT yet run,
|
||||||
|
// so gpr[31] is stale (incoming `this`). The bl to sub_822F13B0
|
||||||
|
// just returned, so the FRESH object ptr is in gpr[3].
|
||||||
|
let r31 = ctx.gpr[3] as u32; // = r3 returned from sub_822F13B0 (global 0x828f3844)
|
||||||
|
let state = mem.read_u32(r31.wrapping_add(8));
|
||||||
|
let h0 = mem.read_u32(r31.wrapping_add(0));
|
||||||
|
let h1 = mem.read_u32(r31.wrapping_add(4));
|
||||||
|
let q84 = mem.read_u32(r31.wrapping_add(84));
|
||||||
|
let q88 = mem.read_u32(r31.wrapping_add(88));
|
||||||
|
let tid = self.scheduler.tid(hw_id).unwrap_or(0);
|
||||||
|
println!(
|
||||||
|
"PROBE-A pc=0x822f22d0 hit={} tid={} obj=0x{:08x} state(+8)=0x{:08x} h0(+0)=0x{:08x} h1(+4)=0x{:08x} q84=0x{:08x} q88=0x{:08x}",
|
||||||
|
n, tid, r31, state, h0, h1, q84, q88,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2.BC THROWAWAY PROBE B: at the NtSetEvent wrapper sub_824AA2F0
|
||||||
|
// head. opt_callback reaches it via 0x822F22F0 (lr=0x822f22f4) or
|
||||||
|
// 0x822F2300 (lr=0x822f2304). r3 = the event HANDLE being signaled.
|
||||||
|
// This is the decisive "does ours reach NtSetEvent from opt_callback
|
||||||
|
// and on which handle (0x10e8?)" check. Read-only. First 16 hits +
|
||||||
|
// tally of calls reached from the opt_callback lrs.
|
||||||
|
if pc == 0x824AA2F0 {
|
||||||
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
|
static P_B: AtomicU32 = AtomicU32::new(0);
|
||||||
|
static P_B_OPTCB: AtomicU32 = AtomicU32::new(0);
|
||||||
|
let lr = ctx.lr as u32;
|
||||||
|
let from_optcb = lr == 0x822F22F4 || lr == 0x822F2304;
|
||||||
|
if from_optcb {
|
||||||
|
P_B_OPTCB.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
let n = P_B.fetch_add(1, Ordering::Relaxed);
|
||||||
|
if n < 16 || from_optcb {
|
||||||
|
let tid = self.scheduler.tid(hw_id).unwrap_or(0);
|
||||||
|
let handle = ctx.gpr[3] as u32;
|
||||||
|
println!(
|
||||||
|
"PROBE-B pc=0x824aa2f0(NtSetEvent-wrapper) hit={} tid={} handle=0x{:08x} lr=0x{:08x} from_optcb={} optcb_total={}",
|
||||||
|
n, tid, handle, lr, from_optcb,
|
||||||
|
P_B_OPTCB.load(Ordering::Relaxed),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2.BC THROWAWAY PROBE C: at 0x824ac574 (bl NtWaitForSingleObjectEx)
|
||||||
|
// r3 = the handle tid=1 wedges on. Capture which handle + tid. This
|
||||||
|
// is the consumer side: does opt_callback's signalled handle (0x108c
|
||||||
|
// per PROBE-B) EQUAL the handle the waiter blocks on? Read-only.
|
||||||
|
// Logs first 8 distinct + a tally per handle. First 24 hits.
|
||||||
|
if pc == 0x824AC574 {
|
||||||
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
|
static P_C: AtomicU32 = AtomicU32::new(0);
|
||||||
|
// Per-handle tally for the two events of interest + 0x10e8.
|
||||||
|
static W_108C: AtomicU32 = AtomicU32::new(0);
|
||||||
|
static W_1090: AtomicU32 = AtomicU32::new(0);
|
||||||
|
static W_10E8: AtomicU32 = AtomicU32::new(0);
|
||||||
|
static W_108C_T1: AtomicU32 = AtomicU32::new(0);
|
||||||
|
static W_10E8_T1: AtomicU32 = AtomicU32::new(0);
|
||||||
|
let tid = self.scheduler.tid(hw_id).unwrap_or(0);
|
||||||
|
let handle = ctx.gpr[3] as u32;
|
||||||
|
match handle {
|
||||||
|
0x108c => {
|
||||||
|
W_108C.fetch_add(1, Ordering::Relaxed);
|
||||||
|
if tid == 1 { W_108C_T1.fetch_add(1, Ordering::Relaxed); }
|
||||||
|
}
|
||||||
|
0x1090 => { W_1090.fetch_add(1, Ordering::Relaxed); }
|
||||||
|
0x10e8 => {
|
||||||
|
W_10E8.fetch_add(1, Ordering::Relaxed);
|
||||||
|
if tid == 1 { W_10E8_T1.fetch_add(1, Ordering::Relaxed); }
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
let n = P_C.fetch_add(1, Ordering::Relaxed);
|
||||||
|
if n < 24 {
|
||||||
|
println!(
|
||||||
|
"PROBE-C pc=0x824ac574(NtWaitForSingleObjectEx) hit={} tid={} wait_handle=0x{:08x}",
|
||||||
|
n, tid, handle,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Periodic tally dump.
|
||||||
|
if n % 2000 == 0 {
|
||||||
|
println!(
|
||||||
|
"PROBE-C-TALLY n={} waits_on: 0x108c={}(tid1={}) 0x1090={} 0x10e8={}(tid1={})",
|
||||||
|
n,
|
||||||
|
W_108C.load(Ordering::Relaxed), W_108C_T1.load(Ordering::Relaxed),
|
||||||
|
W_1090.load(Ordering::Relaxed),
|
||||||
|
W_10E8.load(Ordering::Relaxed), W_10E8_T1.load(Ordering::Relaxed),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
let tid = self.scheduler.tid(hw_id).unwrap_or(0);
|
let tid = self.scheduler.tid(hw_id).unwrap_or(0);
|
||||||
let r3 = ctx.gpr[3] as u32;
|
let r3 = ctx.gpr[3] as u32;
|
||||||
let r4 = ctx.gpr[4] as u32;
|
let r4 = ctx.gpr[4] as u32;
|
||||||
|
|||||||
@@ -110,20 +110,6 @@ pub struct XAudioState {
|
|||||||
/// `xenia_cpu` (none currently) to keep this self-contained.
|
/// `xenia_cpu` (none currently) to keep this self-contained.
|
||||||
pub worker_handles: [Option<u32>; XAUDIO_MAX_CLIENTS],
|
pub worker_handles: [Option<u32>; XAUDIO_MAX_CLIENTS],
|
||||||
pub worker_refs: [Option<ThreadRef>; XAUDIO_MAX_CLIENTS],
|
pub worker_refs: [Option<ThreadRef>; XAUDIO_MAX_CLIENTS],
|
||||||
/// AUDIT-2AU Option β: guest-address Event handles that the XAudio
|
|
||||||
/// render-driver loop (Sylpheed tid=11, entry 0x824d2a94 = canary
|
|
||||||
/// tid=4) blocks on via `KeWaitForMultipleObjects(WaitAny)`. These
|
|
||||||
/// are the per-frame "buffer ready" / "frame done" events that, in
|
|
||||||
/// canary, are signaled by the host XAudio2 driver's OnBufferEnd
|
|
||||||
/// callback every audio period. In ours the render loop's *second*
|
|
||||||
/// KeWait blocks forever because the auto-reset member was consumed
|
|
||||||
/// by the first fast-path and the manual-reset member is never
|
|
||||||
/// signaled (2.AL: signal.match on these SIDs = 0 whole-run). We
|
|
||||||
/// discover the exact handle pair at the wait site (no hardcoded
|
|
||||||
/// guest addresses) and re-signal them at the audio cadence from the
|
|
||||||
/// round prologue so the render loop sustains. Deterministic: signal
|
|
||||||
/// timing is gated by the instruction-count ticker, never host_ns.
|
|
||||||
pub frame_events: Vec<u32>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for XAudioState {
|
impl Default for XAudioState {
|
||||||
@@ -138,7 +124,6 @@ impl Default for XAudioState {
|
|||||||
last_instant: None,
|
last_instant: None,
|
||||||
worker_handles: [None; XAUDIO_MAX_CLIENTS],
|
worker_handles: [None; XAUDIO_MAX_CLIENTS],
|
||||||
worker_refs: [None; XAUDIO_MAX_CLIENTS],
|
worker_refs: [None; XAUDIO_MAX_CLIENTS],
|
||||||
frame_events: Vec::new(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -175,15 +160,6 @@ impl XAudioState {
|
|||||||
self.clients.iter().any(|c| c.is_some())
|
self.clients.iter().any(|c| c.is_some())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// AUDIT-2AU Option β: remember a guest-address Event handle the XAudio
|
|
||||||
/// render loop blocks on, so the cadence ticker can re-signal it. Dedup
|
|
||||||
/// to keep the set tiny (Sylpheed's render loop waits on exactly two).
|
|
||||||
pub fn note_frame_event(&mut self, handle: u32) {
|
|
||||||
if !self.frame_events.contains(&handle) {
|
|
||||||
self.frame_events.push(handle);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn enqueue_all_active(&mut self) {
|
fn enqueue_all_active(&mut self) {
|
||||||
for i in 0..XAUDIO_MAX_CLIENTS {
|
for i in 0..XAUDIO_MAX_CLIENTS {
|
||||||
if self.clients[i].is_none() {
|
if self.clients[i].is_none() {
|
||||||
|
|||||||
Reference in New Issue
Block a user