From 1f416aaa2e31173497f7be915b71dc26188bf2cd Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sun, 3 May 2026 13:46:02 +0200 Subject: [PATCH] =?UTF-8?q?test(check):=20ORACBUG-004=20=E2=80=94=20sylphe?= =?UTF-8?q?ed=5Fn50m=20stable-digest=20oracle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a regression-catcher golden for Sylpheed boot at -n 50M lockstep, covering the first VdSwap pair (the n2m oracle is swap-blind because the first VdSwap fires at ~18M instructions). The new --stable-digest flag emits/compares only fields that are deterministic in lockstep: instructions, imports, unimpl, draws, swaps, unique_render_targets, shader_blobs_live, texture_cache_entries Excluded: packets — empirically ±2-8% lockstep variance (GPU thread race per audit M11) resolves, interrupts_delivered, interrupts_dropped, texture_decodes — scheduling-sensitive under --parallel path — cwd-dependent Empirical determinism: 3 consecutive lockstep -n 50M runs produce byte-identical stable-digest output. The n4b canonical-invocation golden the audit's recommended next sprint also called for is deferred. Per audit memory `--parallel --reservations-table` is pathologically slow (>32 min for -n 100M), so -n 4B in that mode would be many hours per run, not the 5-15 min the plan estimated. n4b will be captured one-shot post-renderer-unblock as a manual artifact under audit-runs/post-fix/, not as a test golden. See crates/xenia-app/tests/golden/README.md. Test infrastructure: - crates/xenia-app/tests/sylpheed_oracles.rs — invokes CARGO_BIN_EXE_xenia-rs against the ISO. Path resolved via SYLPHEED_ISO env var (skips gracefully if missing). - #[ignore]-gated; run via: cargo test --release -p xenia-app --test sylpheed_oracles \\ -- --ignored --nocapture Closes ORACBUG-004 (P0). Partial: ORACBUG-006 (P1 deferred). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-app/src/main.rs | 49 ++++++++++- crates/xenia-app/tests/golden/README.md | 72 ++++++++++++++++ .../xenia-app/tests/golden/sylpheed_n50m.json | 10 +++ crates/xenia-app/tests/sylpheed_oracles.rs | 85 +++++++++++++++++++ 4 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 crates/xenia-app/tests/golden/README.md create mode 100644 crates/xenia-app/tests/golden/sylpheed_n50m.json create mode 100644 crates/xenia-app/tests/sylpheed_oracles.rs diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 74f5c5c..a8cf88e 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -226,6 +226,13 @@ enum Commands { /// Optional golden digest JSON; `check` exits non-zero on mismatch #[arg(long)] expect: Option, + /// Emit/compare only stable fields (excludes timing-sensitive counters + /// like `packets`, `interrupts_delivered`, `resolves`). Required for any + /// golden captured under `--parallel`; recommended for lockstep goldens + /// at -n ≥50M because `packets` has empirical ±2–8% jitter from a GPU + /// thread race. + #[arg(long)] + stable_digest: bool, /// Force the threaded GPU backend (default at M1.9). #[arg(long)] gpu_thread: bool, @@ -324,6 +331,7 @@ fn main() -> Result<()> { max_instructions, out, expect, + stable_digest, gpu_thread, gpu_inline, reservations_table, @@ -333,6 +341,7 @@ fn main() -> Result<()> { max_instructions, out.as_deref(), expect.as_deref(), + stable_digest, gpu_thread, gpu_inline, reservations_table, @@ -528,6 +537,7 @@ fn cmd_exec( parallel, None, None, + false, ) } @@ -538,6 +548,7 @@ fn cmd_check( max_instructions: u64, out: Option<&str>, expect: Option<&str>, + stable_digest: bool, gpu_thread: bool, gpu_inline: bool, reservations_table: bool, @@ -561,6 +572,7 @@ fn cmd_check( parallel, out, expect, + stable_digest, ) } @@ -582,6 +594,7 @@ fn cmd_exec_inner( parallel: bool, digest_out: Option<&str>, digest_expect: Option<&str>, + stable_digest: bool, ) -> Result<()> { let started = Instant::now(); let data = load_xex_data(path)?; @@ -1190,10 +1203,14 @@ fn cmd_exec_inner( // catch any drift between runs. if digest_out.is_some() || digest_expect.is_some() { let digest = RunDigest::capture(path, &kernel, &stats); - let json = digest.to_json(); + let json = if stable_digest { + digest.stable_fields_json() + } else { + digest.to_json() + }; if let Some(out_path) = digest_out { std::fs::write(out_path, &json)?; - info!(out = out_path, "run digest written"); + info!(out = out_path, stable = stable_digest, "run digest written"); } else { println!("{json}"); } @@ -1340,6 +1357,34 @@ impl RunDigest { self.texture_decodes, ) } + + /// Stable-fields-only digest for goldens that need to survive non-determinism. + /// Excludes timing-sensitive counters: `packets` has documented ±2.5–8% lockstep + /// noise from a GPU thread race; `resolves`, `interrupts_delivered`, + /// `interrupts_dropped`, and `texture_decodes` are scheduling-sensitive under + /// `--parallel`. Also omits `path` (cwd-dependent). The remaining fields are + /// deterministic in lockstep at a fixed instruction budget. Use via + /// `--stable-digest`. + fn stable_fields_json(&self) -> String { + format!( + "{{\n \"instructions\": {},\n \ + \"imports\": {},\n \ + \"unimpl\": {},\n \ + \"draws\": {},\n \ + \"swaps\": {},\n \ + \"unique_render_targets\": {},\n \ + \"shader_blobs_live\": {},\n \ + \"texture_cache_entries\": {}\n}}\n", + self.instructions, + self.imports, + self.unimpl, + self.draws, + self.swaps, + self.unique_render_targets, + self.shader_blobs_live, + self.texture_cache_entries, + ) + } } fn escape_json_string(s: &str) -> String { diff --git a/crates/xenia-app/tests/golden/README.md b/crates/xenia-app/tests/golden/README.md new file mode 100644 index 0000000..c020521 --- /dev/null +++ b/crates/xenia-app/tests/golden/README.md @@ -0,0 +1,72 @@ +# Sylpheed regression goldens + +These JSON files anchor `xenia-rs check` digest output for Project Sylpheed. + +## Files + +| File | -n | Mode | Captures | +|------|----|------|----------| +| `sylpheed_n2m.json` | 2_000_000 | full digest | early boot (swaps=0, no rendering) | +| `sylpheed_n50m.json` | 50_000_000 | stable-digest | first VdSwap pair (swaps=2 post-Phase-A) | + +## Stable-digest mode + +`sylpheed_n50m.json` is captured with `--stable-digest`, which omits +timing-sensitive counters: `packets` (±2–8% lockstep noise from a GPU thread +race), `resolves`, `interrupts_delivered`, `interrupts_dropped`, +`texture_decodes`. The remaining fields are byte-identical across repeated +lockstep runs at a fixed -n. + +`sylpheed_n2m.json` predates the stable-digest flag and uses full-digest +compare. It still works because at -n 2M the GPU pipeline has not produced any +packets yet — `packets=0` is trivially deterministic. + +## Circularity hazard + +Per ORACBUG-001/002/003, these goldens were captured by running the same code +they validate. They detect **regression** from a known-good snapshot, not +**correctness**. When a planned fix intentionally moves the digest (e.g. a +shader fix landing `draws > 0` for the first time), re-baseline the golden as +a separate commit and reference the audit ID in the message. + +## Re-baselining + +```sh +cargo build --release -p xenia-app +target/release/xenia-rs check \ + "$SYLPHEED_ISO" \ + -n 50000000 \ + --stable-digest \ + --out crates/xenia-app/tests/golden/sylpheed_n50m.json +``` + +## Running the goldens + +```sh +cargo test --release -p xenia-app --test sylpheed_oracles -- --ignored --nocapture +``` + +The tests are `#[ignore]`-gated because each run takes a few seconds, which is +unacceptable in the default `cargo test` cycle. The ISO path defaults to the +contributor's local `~/RE Project Sylpheed/Project Sylpheed*.iso` and can be +overridden via `SYLPHEED_ISO=/path/to/sylpheed.iso`. + +## n4b canonical-invocation regression anchor (deferred) + +The audit's recommended next sprint also called for a `sylpheed_n4b.json` +golden capturing the canonical reference invocation +`xenia-rs check sylpheed.iso -n 4_000_000_000 --parallel --reservations-table`. +This is **deferred** because: + +1. The `--parallel --reservations-table` combination is empirically pathologically + slow at -n 100M (>32 min per run per the audit memory). At -n 4B the run cost + is many hours, not the single-session-friendly 5–15 min the original plan + estimated. +2. Each phase that intentionally moves rendering counters (C, D, E, F) would + need a re-baseline of n4b — a significant time cost compounding over the + sprint. + +Once the renderer-unblock phases (C+D+E) land and `draws > 0` is confirmed at +-n 100M lockstep, an n4b artifact may be captured one-shot and stored under +`audit-runs/post-fix/` (not as a test golden) as a manual regression anchor for +the canonical invocation. diff --git a/crates/xenia-app/tests/golden/sylpheed_n50m.json b/crates/xenia-app/tests/golden/sylpheed_n50m.json new file mode 100644 index 0000000..99f0816 --- /dev/null +++ b/crates/xenia-app/tests/golden/sylpheed_n50m.json @@ -0,0 +1,10 @@ +{ + "instructions": 50000008, + "imports": 407415, + "unimpl": 0, + "draws": 0, + "swaps": 2, + "unique_render_targets": 0, + "shader_blobs_live": 0, + "texture_cache_entries": 0 +} diff --git a/crates/xenia-app/tests/sylpheed_oracles.rs b/crates/xenia-app/tests/sylpheed_oracles.rs new file mode 100644 index 0000000..d7c8083 --- /dev/null +++ b/crates/xenia-app/tests/sylpheed_oracles.rs @@ -0,0 +1,85 @@ +//! Sylpheed boot-sequence regression oracles. +//! +//! These goldens trigger `xenia-rs check` against the Project Sylpheed ISO and +//! compare the resulting digest to a checked-in JSON file via `--stable-digest`, +//! which excludes timing-sensitive counters (`packets`, `interrupts_*`, +//! `resolves`, `texture_decodes`). The remaining fields are deterministic in +//! lockstep at a fixed instruction budget — verified empirically across 3 +//! consecutive runs. +//! +//! Goldens are CIRCULAR per ORACBUG-001/002/003: they were captured by running +//! the same code they validate. Treat them as **regression anchors** (catch +//! drift from a known-good snapshot) not **correctness anchors** (no claim +//! about absolute behavior). When a planned fix intentionally moves the +//! digest (e.g. swap fix → `swaps` increments; renderer fix → `draws` becomes +//! non-zero), re-baseline the golden as a separate commit. +//! +//! Tests are `#[ignore]`-gated because the runs take ~4 seconds each, which +//! is unacceptable for the default `cargo test` cycle. Run explicitly: +//! cargo test --release -p xenia-app --test sylpheed_oracles -- --ignored --nocapture +//! +//! ISO path is read from the `SYLPHEED_ISO` env var, falling back to the +//! repo-relative default. CI/contributors without the ISO will see the test +//! skip gracefully. + +use std::process::Command; + +const ISO_DEFAULT: &str = "/home/fabi/RE Project Sylpheed/Project Sylpheed - Arc of Deception (USA, Europe) (En,Ja).iso"; + +fn iso_path() -> String { + std::env::var("SYLPHEED_ISO").unwrap_or_else(|_| ISO_DEFAULT.to_string()) +} + +fn run_oracle(label: &str, max_instr: u64, golden_rel: &str) { + let bin = env!("CARGO_BIN_EXE_xenia-rs"); + let iso = iso_path(); + if !std::path::Path::new(&iso).exists() { + eprintln!("{label}: iso not found at {iso}; set SYLPHEED_ISO to override. SKIPPING."); + return; + } + + // Resolve the golden path relative to the test's CARGO_MANIFEST_DIR so the + // test runs correctly from any cwd. + let manifest_dir = env!("CARGO_MANIFEST_DIR"); + let golden = std::path::Path::new(manifest_dir).join(golden_rel); + assert!( + golden.exists(), + "{label}: golden file missing at {}", + golden.display() + ); + + let max_instr_str = max_instr.to_string(); + let golden_str = golden.to_string_lossy().to_string(); + + let out = Command::new(bin) + .args([ + "check", + &iso, + "-n", + &max_instr_str, + "--stable-digest", + "--expect", + &golden_str, + ]) + .output() + .expect("failed to spawn xenia-rs"); + + if !out.status.success() { + eprintln!( + "{label}: STDOUT:\n{}\nSTDERR:\n{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); + panic!("{label}: digest mismatch (exit {:?})", out.status.code()); + } +} + +/// Sylpheed boot to first VdSwap pair, captured at -n 50M lockstep. +/// Catches regressions in: addi/addic semantics, kernel HLE for VdSwap path, +/// thread spawning, file I/O for sound/config. With Phase A's swap fix landed, +/// `swaps` should be 2 and `draws` 0 (Phase E gates draws>0). +#[test] +#[ignore = "long-running; run via `cargo test ... -- --ignored sylpheed_n50m`"] +fn sylpheed_n50m() { + run_oracle("sylpheed_n50m", 50_000_000, "tests/golden/sylpheed_n50m.json"); +}