From 4ff08f6116d30b6057a8a56da73d0befc3bc2988 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Fri, 8 May 2026 20:22:21 +0200 Subject: [PATCH] M4: class-aware probe tokens via M3 vtable+method tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CLI extension only — no schema change. Adds symbolic resolution for --pc-probe / --branch-probe / --ctor-probe tokens: - `0xADDR` / `2186674160` — numeric (current behavior, no DB load). - `Class::method` — joins classes × methods × demangled_names. - `Class::*` — joins classes × methods (all slots). - `function_name` — falls back to functions.name for free functions / saverestore stubs / labels. New `xenia_analysis::lookup::resolve_probe_token(db_path, token)` opens the DB read-only ONLY when a token is non-numeric, so legacy numeric flows pay no IO. New `--probe-db PATH` flag (or `XENIA_PROBE_DB` env / default `sylpheed.db` next to the .iso) selects the DB. Symbolic resolution happens BEFORE any guest exec, so it cannot affect the lockstep digest. Verified deterministic across two reruns at -n 2M (instructions=2000005 identical). End-to-end smoke test on Sylpheed: `--pc-probe='ANON_Class_6B674251::*'` resolves to all 45 method PCs of that anonymous class (matching the methods-table row count for that vtable). Tests 621→626 (+5 lookup unit tests covering numeric passthrough, symbolic-without-DB error, Class::method resolution, Class::* expansion, and functions.name fallback). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/xenia-analysis/SCHEMA.md | 24 ++- crates/xenia-analysis/src/lookup.rs | 222 ++++++++++++++++++++++++++++ crates/xenia-app/src/main.rs | 72 +++++++-- 3 files changed, 298 insertions(+), 20 deletions(-) create mode 100644 crates/xenia-analysis/src/lookup.rs diff --git a/crates/xenia-analysis/SCHEMA.md b/crates/xenia-analysis/SCHEMA.md index 9423f69..6039428 100644 --- a/crates/xenia-analysis/SCHEMA.md +++ b/crates/xenia-analysis/SCHEMA.md @@ -142,11 +142,27 @@ SELECT name FROM functions WHERE address = 2186674160; -- 0x824D29F0 (CompleteObjectLocator at vtable[-1]; TypeDescriptor at COL+0xC; mangled name at TD+0x8). -## Layer M4 — Class-aware probe targeting (planned) +## Layer M4 — Class-aware probe targeting (landed) -CLI extension only — no schema changes. `--pc-probe=Class::method` and -`--pc-probe-class=ClassName` resolve via M3's tables. See -`crates/xenia-analysis/src/lookup.rs` (when landed). +CLI extension only — no schema changes. The probe-token grammar adds three +symbolic forms on top of the existing `0xADDR` literal: + +- `Class::method` — joins `classes` × `methods` × `demangled_names` to find + every PC whose vtable belongs to that class and whose demangled + `method_name` matches. +- `Class::*` — joins `classes` × `methods` to find every method PC of that + class. +- `function_name` — falls back to `functions.name` lookup for free functions + / saverestore stubs / labels. + +Numeric tokens never touch the DB (preserves zero-IO fast path; lockstep +digest unaffected). Symbolic tokens require the DuckDB at `--probe-db PATH` +or `XENIA_PROBE_DB`; default is `sylpheed.db` next to the .iso when present. + +Resolution happens BEFORE guest exec begins, so it cannot affect the +lockstep digest. + +See `crates/xenia-analysis/src/lookup.rs`. --- diff --git a/crates/xenia-analysis/src/lookup.rs b/crates/xenia-analysis/src/lookup.rs new file mode 100644 index 0000000..2235650 --- /dev/null +++ b/crates/xenia-analysis/src/lookup.rs @@ -0,0 +1,222 @@ +//! Symbolic-name resolution for runtime probes (M4). +//! +//! Lets `--pc-probe` / `--branch-probe` / `--ctor-probe` accept names like +//! `xe::apu::AudioSystem::Setup` or `MyClass::*` instead of bare PC literals. +//! Resolution joins the M3-produced `classes` × `methods` × `functions` tables +//! and the M2 `demangled_names` table. +//! +//! Numeric tokens (`0x824D6640`, `2186674160`) are returned unchanged; symbolic +//! tokens require a path to an existing `sylpheed.db` (passed by the caller). +//! +//! All DB access is read-only and happens before guest execution, so the +//! lockstep digest is unaffected. + +use std::path::Path; + +use anyhow::{anyhow, Result}; +use duckdb::params; + +/// Parse one probe token into one or more PCs. +/// +/// Recognized forms: +/// - `0xADDR` / `ADDR` (decimal) → returns one PC unchanged. +/// - `Class::method` → all `methods.function_address` matching that +/// `class_name` + `method_name` pair. +/// - `Class::*` → all `methods.function_address` for that class. +/// - `func::Name` (free function) → falls back to `functions.name` lookup. +/// +/// `db_path` is consulted ONLY if the token is non-numeric. When `db_path` is +/// `None` and the token is symbolic, returns an error suggesting the user +/// either pass `--db` or use a numeric address. +pub fn resolve_probe_token(db_path: Option<&Path>, token: &str) -> Result> { + let token = token.trim(); + if token.is_empty() { + return Ok(vec![]); + } + + if let Some(pc) = parse_numeric(token) { + return Ok(vec![pc]); + } + + let db = db_path.ok_or_else(|| { + anyhow!( + "symbolic probe token {token:?} requires a sylpheed.db; \ + pass --probe-db=PATH or use a numeric 0x… address", + ) + })?; + + if !db.exists() { + return Err(anyhow!("--probe-db not found: {}", db.display())); + } + + let conn = duckdb::Connection::open_with_flags( + db, + duckdb::Config::default().access_mode(duckdb::AccessMode::ReadOnly)?, + )?; + + // Class::method or Class::* + if let Some((class, method)) = token.split_once("::") { + if method == "*" { + return resolve_class_star(&conn, class); + } + // Try Class::method first, then fall back to functions.name lookup. + let pcs = resolve_class_method(&conn, class, method)?; + if !pcs.is_empty() { + return Ok(pcs); + } + } + + // Last-resort: functions.name match (e.g. for `entry_point` or + // `__savegprlr_22`). Substring-free; user gets a clear error if missing. + resolve_function_name(&conn, token) +} + +fn parse_numeric(token: &str) -> Option { + if let Some(hex) = token.strip_prefix("0x").or_else(|| token.strip_prefix("0X")) { + return u32::from_str_radix(hex, 16).ok(); + } + token.parse::().ok() +} + +fn resolve_class_method(conn: &duckdb::Connection, class: &str, method: &str) -> Result> { + // Two-step lookup so we can give better errors: + // 1. find matching methods rows joined to classes; + // 2. surface the function_address column. + let mut stmt = conn.prepare( + "SELECT DISTINCT m.function_address FROM methods m + JOIN classes c ON c.vtable_address = m.vtable_address + JOIN demangled_names dn ON dn.address = m.function_address + WHERE c.name = ? AND dn.method_name = ?", + )?; + let pcs: Vec = stmt + .query_map(params![class, method], |r| r.get::<_, i64>(0).map(|x| x as u32))? + .filter_map(|r| r.ok()) + .collect(); + Ok(pcs) +} + +fn resolve_class_star(conn: &duckdb::Connection, class: &str) -> Result> { + let mut stmt = conn.prepare( + "SELECT DISTINCT m.function_address FROM methods m + JOIN classes c ON c.vtable_address = m.vtable_address + WHERE c.name = ?", + )?; + let pcs: Vec = stmt + .query_map(params![class], |r| r.get::<_, i64>(0).map(|x| x as u32))? + .filter_map(|r| r.ok()) + .collect(); + if pcs.is_empty() { + return Err(anyhow!( + "no class named {class:?} found in classes table — has --dis populated this DB?", + )); + } + Ok(pcs) +} + +fn resolve_function_name(conn: &duckdb::Connection, name: &str) -> Result> { + let mut stmt = conn.prepare("SELECT address FROM functions WHERE name = ?")?; + let pcs: Vec = stmt + .query_map(params![name], |r| r.get::<_, i64>(0).map(|x| x as u32))? + .filter_map(|r| r.ok()) + .collect(); + if pcs.is_empty() { + return Err(anyhow!( + "probe token {name:?} did not match any classes::methods or functions row", + )); + } + Ok(pcs) +} + +#[cfg(test)] +mod tests { + use super::*; + use duckdb::Connection; + + fn build_synthetic_db(path: &Path) { + let conn = Connection::open(path).expect("open"); + conn.execute_batch( + " + CREATE TABLE functions ( + address BIGINT PRIMARY KEY, + name VARCHAR + ); + CREATE TABLE classes ( + name VARCHAR PRIMARY KEY, + vtable_address BIGINT, + rtti_present BOOLEAN, + base_classes_json VARCHAR + ); + CREATE TABLE methods ( + vtable_address BIGINT, + slot BIGINT, + function_address BIGINT, + mangled_name VARCHAR, + demangled_name VARCHAR, + PRIMARY KEY (vtable_address, slot) + ); + CREATE TABLE demangled_names ( + address BIGINT, + mangled VARCHAR, + raw_demangled VARCHAR, + namespace_path VARCHAR, + class_name VARCHAR, + method_name VARCHAR, + params_signature VARCHAR + ); + INSERT INTO classes VALUES ('Foo', 11000, true, NULL); + INSERT INTO functions VALUES (12000, 'sub_2EE0'), (12100, 'sub_2F44'); + INSERT INTO methods VALUES (11000, 0, 12000, NULL, NULL), + (11000, 1, 12100, NULL, NULL); + INSERT INTO demangled_names (address, mangled, raw_demangled, class_name, method_name) + VALUES (12000, '?bar@Foo@@QEAAXXZ', 'void Foo::bar(void)', 'Foo', 'bar'), + (12100, '?baz@Foo@@QEAAXXZ', 'void Foo::baz(void)', 'Foo', 'baz'); + ", + ) + .expect("seed"); + } + + #[test] + fn numeric_passthrough_no_db_needed() { + let pcs = resolve_probe_token(None, "0x824D6640").unwrap(); + assert_eq!(pcs, vec![0x824D6640]); + let pcs = resolve_probe_token(None, "2186095088").unwrap(); + assert_eq!(pcs, vec![0x824D29F0]); + } + + #[test] + fn symbolic_token_without_db_errors() { + let err = resolve_probe_token(None, "Foo::bar").unwrap_err(); + assert!(format!("{err}").contains("requires a sylpheed.db")); + } + + #[test] + fn class_method_resolves() { + let tmp = std::env::temp_dir().join("xenia_lookup_test.duckdb"); + let _ = std::fs::remove_file(&tmp); + build_synthetic_db(&tmp); + let pcs = resolve_probe_token(Some(&tmp), "Foo::bar").unwrap(); + assert_eq!(pcs, vec![12000]); + let _ = std::fs::remove_file(&tmp); + } + + #[test] + fn class_star_returns_all_methods() { + let tmp = std::env::temp_dir().join("xenia_lookup_star.duckdb"); + let _ = std::fs::remove_file(&tmp); + build_synthetic_db(&tmp); + let mut pcs = resolve_probe_token(Some(&tmp), "Foo::*").unwrap(); + pcs.sort(); + assert_eq!(pcs, vec![12000, 12100]); + let _ = std::fs::remove_file(&tmp); + } + + #[test] + fn function_name_fallback() { + let tmp = std::env::temp_dir().join("xenia_lookup_fn.duckdb"); + let _ = std::fs::remove_file(&tmp); + build_synthetic_db(&tmp); + let pcs = resolve_probe_token(Some(&tmp), "sub_2EE0").unwrap(); + assert_eq!(pcs, vec![12000]); + let _ = std::fs::remove_file(&tmp); + } +} diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index ea60689..18a3292 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -224,6 +224,12 @@ enum Commands { /// `--dump-section=BASE:LEN:PATH` end-of-run guest memory snapshot. #[arg(long)] dump_section: Option, + /// Path to a `sylpheed.db` (M3-populated) for resolving symbolic + /// probe tokens like `Class::method` or `Class::*`. Required only + /// if any of the `--*-probe` flags contain a non-numeric token. + /// Default: `sylpheed.db` next to the .iso file when present. + #[arg(long)] + probe_db: Option, }, /// Browse XISO disc image contents Browse { @@ -384,6 +390,7 @@ fn main() -> Result<()> { branch_probe, mem_watch, dump_section, + probe_db, } => cmd_exec( &path, max_instructions, @@ -407,6 +414,7 @@ fn main() -> Result<()> { branch_probe.as_deref(), mem_watch.as_deref(), dump_section.as_deref(), + probe_db.as_deref(), ), Commands::Browse { path } => cmd_browse(&path), Commands::Info { path } => cmd_info(&path), @@ -443,6 +451,28 @@ fn main() -> Result<()> { } } +/// Resolve where to look for `sylpheed.db` when probe tokens are symbolic. +/// Precedence: +/// 1. explicit `--probe-db` flag (cmd_arg) +/// 2. `XENIA_PROBE_DB` env var +/// 3. `sylpheed.db` next to the .iso path (if it exists) +/// Returns `None` when no DB is available — resolve_probe_token will then +/// only succeed for numeric tokens. +fn resolve_probe_db_path(cmd_arg: Option<&str>, iso_path: &str) -> Option { + if let Some(p) = cmd_arg { + return Some(std::path::PathBuf::from(p)); + } + if let Ok(p) = std::env::var("XENIA_PROBE_DB") { + if !p.is_empty() { return Some(std::path::PathBuf::from(p)); } + } + let iso = std::path::Path::new(iso_path); + if let Some(parent) = iso.parent() { + let candidate = parent.join("sylpheed.db"); + if candidate.exists() { return Some(candidate); } + } + None +} + /// Load XEX data from a path. If the path is an ISO, extract default.xex from it. #[instrument(skip_all, fields(path = %path))] fn load_xex_data(path: &str) -> Result> { @@ -613,6 +643,7 @@ fn cmd_exec( branch_probe: Option<&str>, mem_watch: Option<&str>, dump_section: Option<&str>, + probe_db: Option<&str>, ) -> Result<()> { cmd_exec_inner( path, @@ -637,6 +668,7 @@ fn cmd_exec( branch_probe, mem_watch, dump_section, + probe_db, None, None, false, @@ -680,6 +712,7 @@ fn cmd_check( None, // branch_probe — diagnostic, never wanted on goldens None, // mem_watch — same None, // dump_section — same + None, // probe_db — same out, expect, stable_digest, @@ -709,6 +742,7 @@ fn cmd_exec_inner( branch_probe: Option<&str>, mem_watch: Option<&str>, dump_section: Option<&str>, + probe_db: Option<&str>, digest_out: Option<&str>, digest_expect: Option<&str>, stable_digest: bool, @@ -958,11 +992,15 @@ fn cmd_exec_inner( } } - // Diagnostic. Parse `--ctor-probe=0x8217C850,0x...` (or - // `XENIA_CTOR_PROBE=...`) into `kernel.ctor_probe_pcs`. The - // worker prologue checks this set on every step; on a hit it - // prints a single back-chain capture line. Empty set = no - // probes = no-op fast path. + // Diagnostic. Parse `--ctor-probe=0x8217C850,...` or symbolic forms like + // `Class::method`, `Class::*`, `function_name` (or `XENIA_CTOR_PROBE=...`) + // into `kernel.ctor_probe_pcs`. Symbolic resolution reads `--probe-db` / + // `XENIA_PROBE_DB` (or the `sylpheed.db` next to the .iso when present). + // Empty set = no probes = no-op fast path. + // + // Symbolic resolution happens BEFORE guest exec begins, so it cannot + // affect the lockstep digest. + let probe_db_path = resolve_probe_db_path(probe_db, path); let ctor_probe_combined: Option = match ( ctor_probe, std::env::var("XENIA_CTOR_PROBE").ok().or_else(|| std::env::var("XENIA_PC_PROBE").ok()), @@ -973,7 +1011,7 @@ fn cmd_exec_inner( }; if let Some(list) = ctor_probe_combined { for token in list.split(',').map(str::trim).filter(|s| !s.is_empty()) { - let (pc_str, consumer): (&str, Option<(u32, u32)>) = match token.split_once('@') { + let (pc_part, consumer): (&str, Option<(u32, u32)>) = match token.split_once('@') { None => (token, None), Some((pc_part, rest)) => { let (disp_str, off_str) = rest.split_once(':').ok_or_else(|| { @@ -990,12 +1028,13 @@ fn cmd_exec_inner( (pc_part.trim(), Some((disp, off))) } }; - let pc = parse_hex_u32(pc_str).map_err(|e| { - anyhow::anyhow!("invalid PC in --pc-probe: {token:?}: {e}") - })?; - kernel.ctor_probe_pcs.insert(pc); - if let Some(c) = consumer { - kernel.pc_probe_consumers.insert(pc, c); + let pcs = xenia_analysis::lookup::resolve_probe_token(probe_db_path.as_deref(), pc_part) + .map_err(|e| anyhow::anyhow!("--pc-probe {token:?}: {e}"))?; + for pc in pcs { + kernel.ctor_probe_pcs.insert(pc); + if let Some(c) = consumer { + kernel.pc_probe_consumers.insert(pc, c); + } } } if !quiet && !kernel.ctor_probe_pcs.is_empty() { @@ -1023,10 +1062,11 @@ fn cmd_exec_inner( }; if let Some(list) = branch_probe_combined { for token in list.split(',').map(str::trim).filter(|s| !s.is_empty()) { - let pc = parse_hex_u32(token).map_err(|e| { - anyhow::anyhow!("invalid PC in --branch-probe: {token:?}: {e}") - })?; - kernel.branch_probe_pcs.insert(pc); + let pcs = xenia_analysis::lookup::resolve_probe_token(probe_db_path.as_deref(), token) + .map_err(|e| anyhow::anyhow!("--branch-probe {token:?}: {e}"))?; + for pc in pcs { + kernel.branch_probe_pcs.insert(pc); + } } if !quiet && !kernel.branch_probe_pcs.is_empty() { let mut pcs: Vec = kernel.branch_probe_pcs.iter().copied().collect();