xenia-app: observability subsystem, --parallel runtime, stress harness
observability.rs installs the tracing subscriber stack (env-filter +
JSON file appender + chrome trace + error layer) and the metrics
recorder shared by the workspace. main.rs grows the new CLI surface:
--parallel, --reservations-table, --trace-handles, --analyze=
{rust,sql,both}, xenia dis --json, --ui, plus the wiring that runs
the CPU through the new scheduler, drives the GPU's threaded backend,
and surfaces the framebuffer + HUD via xenia-ui.
Add tests/parallel_stress.rs (#[ignore]-gated long form, short form
runs 20×@5M) and tests/golden/sylpheed_n2m.json — the digest the
lockstep/parallel combos compare against.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
16
crates/xenia-app/tests/golden/sylpheed_n2m.json
Normal file
16
crates/xenia-app/tests/golden/sylpheed_n2m.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"path": "/home/fabi/RE Project Sylpheed/Project Sylpheed - Arc of Deception (USA, Europe) (En,Ja).iso",
|
||||
"instructions": 2000000,
|
||||
"imports": 5634,
|
||||
"unimpl": 0,
|
||||
"packets": 0,
|
||||
"draws": 0,
|
||||
"swaps": 0,
|
||||
"resolves": 0,
|
||||
"unique_render_targets": 0,
|
||||
"shader_blobs_live": 0,
|
||||
"interrupts_delivered": 0,
|
||||
"interrupts_dropped": 13,
|
||||
"texture_cache_entries": 0,
|
||||
"texture_decodes": 0
|
||||
}
|
||||
111
crates/xenia-app/tests/parallel_stress.rs
Normal file
111
crates/xenia-app/tests/parallel_stress.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
//! M3 real-parallelism stress harness.
|
||||
//!
|
||||
//! Runs `xenia-rs check sylpheed.iso --parallel --halt-on-deadlock`
|
||||
//! many times back-to-back to surface lost-wakeups, lock-order
|
||||
//! inversions, and ABA hazards that a single run wouldn't reliably
|
||||
//! reproduce. Failures dump per-run stdout/stderr to
|
||||
//! `target/parallel-stress-NNN.{stdout,stderr}` for post-mortem.
|
||||
//!
|
||||
//! Two configurations:
|
||||
//! - `parallel_stress_short`: 20 runs at -n 5_000_000. Quick smoke
|
||||
//! check — runs in a few minutes on the current substrate.
|
||||
//! - `parallel_stress_long` (ignored, opt-in): 100 runs at
|
||||
//! -n 50_000_000. The full gate from the master plan; expected
|
||||
//! runtime is hours until the perf gap (Step 05's deferred parking
|
||||
//! fix) closes.
|
||||
//!
|
||||
//! Run with `cargo test --release -p xenia-app --test parallel_stress
|
||||
//! -- --ignored --nocapture` for the full 100x; otherwise the short
|
||||
//! variant runs as part of the normal test suite when explicitly
|
||||
//! invoked: `cargo test --release -p xenia-app --test parallel_stress
|
||||
//! -- --nocapture parallel_stress_short`.
|
||||
|
||||
use std::process::Command;
|
||||
use std::time::Instant;
|
||||
|
||||
const ISO_DEFAULT: &str = "/home/fabi/RE Project Sylpheed/Project Sylpheed - Arc of Deception (USA, Europe) (En,Ja).iso";
|
||||
|
||||
fn iso_path() -> String {
|
||||
std::env::var("SYLPHEED_ISO").unwrap_or_else(|_| ISO_DEFAULT.to_string())
|
||||
}
|
||||
|
||||
fn run_stress(label: &str, runs: u32, max_instr: u64) {
|
||||
let bin = env!("CARGO_BIN_EXE_xenia-rs");
|
||||
let iso = iso_path();
|
||||
if !std::path::Path::new(&iso).exists() {
|
||||
eprintln!("{label}: iso not found at {iso}; set SYLPHEED_ISO to override. SKIPPING.");
|
||||
return;
|
||||
}
|
||||
std::fs::create_dir_all("target").ok();
|
||||
let mut failures: u32 = 0;
|
||||
let mut wall_ms: Vec<u128> = Vec::with_capacity(runs as usize);
|
||||
let max_instr_str = max_instr.to_string();
|
||||
for run in 1..=runs {
|
||||
let t0 = Instant::now();
|
||||
let out = Command::new(bin)
|
||||
.args([
|
||||
"exec",
|
||||
&iso,
|
||||
"-n",
|
||||
&max_instr_str,
|
||||
"--parallel",
|
||||
"--halt-on-deadlock",
|
||||
"--quiet",
|
||||
])
|
||||
.output()
|
||||
.expect("failed to spawn xenia-rs");
|
||||
let dt = t0.elapsed().as_millis();
|
||||
wall_ms.push(dt);
|
||||
let exit_ok = out.status.success();
|
||||
let vdswap2 = String::from_utf8_lossy(&out.stderr).contains("VdSwap")
|
||||
|| String::from_utf8_lossy(&out.stdout).contains("VdSwap");
|
||||
let _ = vdswap2; // VdSwap=2 not required at -n 5M; tracked for diagnostic only.
|
||||
if !exit_ok {
|
||||
failures += 1;
|
||||
std::fs::write(
|
||||
format!("target/parallel-stress-{label}-{run:03}.stdout"),
|
||||
&out.stdout,
|
||||
)
|
||||
.ok();
|
||||
std::fs::write(
|
||||
format!("target/parallel-stress-{label}-{run:03}.stderr"),
|
||||
&out.stderr,
|
||||
)
|
||||
.ok();
|
||||
eprintln!(
|
||||
"{label}: run {run}/{runs} FAILED (wall={}ms, exit={:?})",
|
||||
dt,
|
||||
out.status.code()
|
||||
);
|
||||
} else {
|
||||
eprintln!("{label}: run {run}/{runs} ok (wall={dt}ms)");
|
||||
}
|
||||
}
|
||||
wall_ms.sort();
|
||||
let p50 = wall_ms[wall_ms.len() / 2];
|
||||
let p95_idx = ((wall_ms.len() - 1) * 95) / 100;
|
||||
let p95 = wall_ms[p95_idx];
|
||||
let max = *wall_ms.last().unwrap();
|
||||
eprintln!(
|
||||
"{label} summary: runs={runs} ok={} failed={failures} p50={p50}ms p95={p95}ms max={max}ms",
|
||||
runs - failures,
|
||||
);
|
||||
assert_eq!(failures, 0, "{label}: {failures} of {runs} stress runs failed");
|
||||
}
|
||||
|
||||
/// 20 runs at -n 5M. Session-feasible (~10 minutes at the current
|
||||
/// perf level). Surfaces lost-wakeup / lock-order / phaser-timeout
|
||||
/// bugs that a single run wouldn't reproduce.
|
||||
#[test]
|
||||
#[ignore = "stress test; run via `cargo test ... -- --ignored parallel_stress_short`"]
|
||||
fn parallel_stress_short() {
|
||||
run_stress("short", 20, 5_000_000);
|
||||
}
|
||||
|
||||
/// 100 runs at -n 50M. The full M3 follow-up gate per the master
|
||||
/// plan. Expected runtime is hours until the perf gap closes.
|
||||
#[test]
|
||||
#[ignore = "full stress test; run via `cargo test ... -- --ignored parallel_stress_long`"]
|
||||
fn parallel_stress_long() {
|
||||
run_stress("long", 100, 50_000_000);
|
||||
}
|
||||
Reference in New Issue
Block a user