Merge analysis-overhaul/m4-classaware-probes
This commit is contained in:
@@ -142,11 +142,27 @@ SELECT name FROM functions WHERE address = 2186674160; -- 0x824D29F0
|
||||
(CompleteObjectLocator at vtable[-1]; TypeDescriptor at COL+0xC; mangled
|
||||
name at TD+0x8).
|
||||
|
||||
## Layer M4 — Class-aware probe targeting (planned)
|
||||
## Layer M4 — Class-aware probe targeting (landed)
|
||||
|
||||
CLI extension only — no schema changes. `--pc-probe=Class::method` and
|
||||
`--pc-probe-class=ClassName` resolve via M3's tables. See
|
||||
`crates/xenia-analysis/src/lookup.rs` (when landed).
|
||||
CLI extension only — no schema changes. The probe-token grammar adds three
|
||||
symbolic forms on top of the existing `0xADDR` literal:
|
||||
|
||||
- `Class::method` — joins `classes` × `methods` × `demangled_names` to find
|
||||
every PC whose vtable belongs to that class and whose demangled
|
||||
`method_name` matches.
|
||||
- `Class::*` — joins `classes` × `methods` to find every method PC of that
|
||||
class.
|
||||
- `function_name` — falls back to `functions.name` lookup for free functions
|
||||
/ saverestore stubs / labels.
|
||||
|
||||
Numeric tokens never touch the DB (preserves zero-IO fast path; lockstep
|
||||
digest unaffected). Symbolic tokens require the DuckDB at `--probe-db PATH`
|
||||
or `XENIA_PROBE_DB`; default is `sylpheed.db` next to the .iso when present.
|
||||
|
||||
Resolution happens BEFORE guest exec begins, so it cannot affect the
|
||||
lockstep digest.
|
||||
|
||||
See `crates/xenia-analysis/src/lookup.rs`.
|
||||
|
||||
---
|
||||
|
||||
|
||||
222
crates/xenia-analysis/src/lookup.rs
Normal file
222
crates/xenia-analysis/src/lookup.rs
Normal file
@@ -0,0 +1,222 @@
|
||||
//! Symbolic-name resolution for runtime probes (M4).
|
||||
//!
|
||||
//! Lets `--pc-probe` / `--branch-probe` / `--ctor-probe` accept names like
|
||||
//! `xe::apu::AudioSystem::Setup` or `MyClass::*` instead of bare PC literals.
|
||||
//! Resolution joins the M3-produced `classes` × `methods` × `functions` tables
|
||||
//! and the M2 `demangled_names` table.
|
||||
//!
|
||||
//! Numeric tokens (`0x824D6640`, `2186674160`) are returned unchanged; symbolic
|
||||
//! tokens require a path to an existing `sylpheed.db` (passed by the caller).
|
||||
//!
|
||||
//! All DB access is read-only and happens before guest execution, so the
|
||||
//! lockstep digest is unaffected.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use duckdb::params;
|
||||
|
||||
/// Parse one probe token into one or more PCs.
|
||||
///
|
||||
/// Recognized forms:
|
||||
/// - `0xADDR` / `ADDR` (decimal) → returns one PC unchanged.
|
||||
/// - `Class::method` → all `methods.function_address` matching that
|
||||
/// `class_name` + `method_name` pair.
|
||||
/// - `Class::*` → all `methods.function_address` for that class.
|
||||
/// - `func::Name` (free function) → falls back to `functions.name` lookup.
|
||||
///
|
||||
/// `db_path` is consulted ONLY if the token is non-numeric. When `db_path` is
|
||||
/// `None` and the token is symbolic, returns an error suggesting the user
|
||||
/// either pass `--db` or use a numeric address.
|
||||
pub fn resolve_probe_token(db_path: Option<&Path>, token: &str) -> Result<Vec<u32>> {
|
||||
let token = token.trim();
|
||||
if token.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
if let Some(pc) = parse_numeric(token) {
|
||||
return Ok(vec![pc]);
|
||||
}
|
||||
|
||||
let db = db_path.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"symbolic probe token {token:?} requires a sylpheed.db; \
|
||||
pass --probe-db=PATH or use a numeric 0x… address",
|
||||
)
|
||||
})?;
|
||||
|
||||
if !db.exists() {
|
||||
return Err(anyhow!("--probe-db not found: {}", db.display()));
|
||||
}
|
||||
|
||||
let conn = duckdb::Connection::open_with_flags(
|
||||
db,
|
||||
duckdb::Config::default().access_mode(duckdb::AccessMode::ReadOnly)?,
|
||||
)?;
|
||||
|
||||
// Class::method or Class::*
|
||||
if let Some((class, method)) = token.split_once("::") {
|
||||
if method == "*" {
|
||||
return resolve_class_star(&conn, class);
|
||||
}
|
||||
// Try Class::method first, then fall back to functions.name lookup.
|
||||
let pcs = resolve_class_method(&conn, class, method)?;
|
||||
if !pcs.is_empty() {
|
||||
return Ok(pcs);
|
||||
}
|
||||
}
|
||||
|
||||
// Last-resort: functions.name match (e.g. for `entry_point` or
|
||||
// `__savegprlr_22`). Substring-free; user gets a clear error if missing.
|
||||
resolve_function_name(&conn, token)
|
||||
}
|
||||
|
||||
fn parse_numeric(token: &str) -> Option<u32> {
|
||||
if let Some(hex) = token.strip_prefix("0x").or_else(|| token.strip_prefix("0X")) {
|
||||
return u32::from_str_radix(hex, 16).ok();
|
||||
}
|
||||
token.parse::<u32>().ok()
|
||||
}
|
||||
|
||||
fn resolve_class_method(conn: &duckdb::Connection, class: &str, method: &str) -> Result<Vec<u32>> {
|
||||
// Two-step lookup so we can give better errors:
|
||||
// 1. find matching methods rows joined to classes;
|
||||
// 2. surface the function_address column.
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT DISTINCT m.function_address FROM methods m
|
||||
JOIN classes c ON c.vtable_address = m.vtable_address
|
||||
JOIN demangled_names dn ON dn.address = m.function_address
|
||||
WHERE c.name = ? AND dn.method_name = ?",
|
||||
)?;
|
||||
let pcs: Vec<u32> = stmt
|
||||
.query_map(params![class, method], |r| r.get::<_, i64>(0).map(|x| x as u32))?
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
Ok(pcs)
|
||||
}
|
||||
|
||||
fn resolve_class_star(conn: &duckdb::Connection, class: &str) -> Result<Vec<u32>> {
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT DISTINCT m.function_address FROM methods m
|
||||
JOIN classes c ON c.vtable_address = m.vtable_address
|
||||
WHERE c.name = ?",
|
||||
)?;
|
||||
let pcs: Vec<u32> = stmt
|
||||
.query_map(params![class], |r| r.get::<_, i64>(0).map(|x| x as u32))?
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
if pcs.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"no class named {class:?} found in classes table — has --dis populated this DB?",
|
||||
));
|
||||
}
|
||||
Ok(pcs)
|
||||
}
|
||||
|
||||
fn resolve_function_name(conn: &duckdb::Connection, name: &str) -> Result<Vec<u32>> {
|
||||
let mut stmt = conn.prepare("SELECT address FROM functions WHERE name = ?")?;
|
||||
let pcs: Vec<u32> = stmt
|
||||
.query_map(params![name], |r| r.get::<_, i64>(0).map(|x| x as u32))?
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
if pcs.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"probe token {name:?} did not match any classes::methods or functions row",
|
||||
));
|
||||
}
|
||||
Ok(pcs)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use duckdb::Connection;
|
||||
|
||||
fn build_synthetic_db(path: &Path) {
|
||||
let conn = Connection::open(path).expect("open");
|
||||
conn.execute_batch(
|
||||
"
|
||||
CREATE TABLE functions (
|
||||
address BIGINT PRIMARY KEY,
|
||||
name VARCHAR
|
||||
);
|
||||
CREATE TABLE classes (
|
||||
name VARCHAR PRIMARY KEY,
|
||||
vtable_address BIGINT,
|
||||
rtti_present BOOLEAN,
|
||||
base_classes_json VARCHAR
|
||||
);
|
||||
CREATE TABLE methods (
|
||||
vtable_address BIGINT,
|
||||
slot BIGINT,
|
||||
function_address BIGINT,
|
||||
mangled_name VARCHAR,
|
||||
demangled_name VARCHAR,
|
||||
PRIMARY KEY (vtable_address, slot)
|
||||
);
|
||||
CREATE TABLE demangled_names (
|
||||
address BIGINT,
|
||||
mangled VARCHAR,
|
||||
raw_demangled VARCHAR,
|
||||
namespace_path VARCHAR,
|
||||
class_name VARCHAR,
|
||||
method_name VARCHAR,
|
||||
params_signature VARCHAR
|
||||
);
|
||||
INSERT INTO classes VALUES ('Foo', 11000, true, NULL);
|
||||
INSERT INTO functions VALUES (12000, 'sub_2EE0'), (12100, 'sub_2F44');
|
||||
INSERT INTO methods VALUES (11000, 0, 12000, NULL, NULL),
|
||||
(11000, 1, 12100, NULL, NULL);
|
||||
INSERT INTO demangled_names (address, mangled, raw_demangled, class_name, method_name)
|
||||
VALUES (12000, '?bar@Foo@@QEAAXXZ', 'void Foo::bar(void)', 'Foo', 'bar'),
|
||||
(12100, '?baz@Foo@@QEAAXXZ', 'void Foo::baz(void)', 'Foo', 'baz');
|
||||
",
|
||||
)
|
||||
.expect("seed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn numeric_passthrough_no_db_needed() {
|
||||
let pcs = resolve_probe_token(None, "0x824D6640").unwrap();
|
||||
assert_eq!(pcs, vec![0x824D6640]);
|
||||
let pcs = resolve_probe_token(None, "2186095088").unwrap();
|
||||
assert_eq!(pcs, vec![0x824D29F0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn symbolic_token_without_db_errors() {
|
||||
let err = resolve_probe_token(None, "Foo::bar").unwrap_err();
|
||||
assert!(format!("{err}").contains("requires a sylpheed.db"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_method_resolves() {
|
||||
let tmp = std::env::temp_dir().join("xenia_lookup_test.duckdb");
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
build_synthetic_db(&tmp);
|
||||
let pcs = resolve_probe_token(Some(&tmp), "Foo::bar").unwrap();
|
||||
assert_eq!(pcs, vec![12000]);
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_star_returns_all_methods() {
|
||||
let tmp = std::env::temp_dir().join("xenia_lookup_star.duckdb");
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
build_synthetic_db(&tmp);
|
||||
let mut pcs = resolve_probe_token(Some(&tmp), "Foo::*").unwrap();
|
||||
pcs.sort();
|
||||
assert_eq!(pcs, vec![12000, 12100]);
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_name_fallback() {
|
||||
let tmp = std::env::temp_dir().join("xenia_lookup_fn.duckdb");
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
build_synthetic_db(&tmp);
|
||||
let pcs = resolve_probe_token(Some(&tmp), "sub_2EE0").unwrap();
|
||||
assert_eq!(pcs, vec![12000]);
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
}
|
||||
}
|
||||
@@ -224,6 +224,12 @@ enum Commands {
|
||||
/// `--dump-section=BASE:LEN:PATH` end-of-run guest memory snapshot.
|
||||
#[arg(long)]
|
||||
dump_section: Option<String>,
|
||||
/// Path to a `sylpheed.db` (M3-populated) for resolving symbolic
|
||||
/// probe tokens like `Class::method` or `Class::*`. Required only
|
||||
/// if any of the `--*-probe` flags contain a non-numeric token.
|
||||
/// Default: `sylpheed.db` next to the .iso file when present.
|
||||
#[arg(long)]
|
||||
probe_db: Option<String>,
|
||||
},
|
||||
/// Browse XISO disc image contents
|
||||
Browse {
|
||||
@@ -384,6 +390,7 @@ fn main() -> Result<()> {
|
||||
branch_probe,
|
||||
mem_watch,
|
||||
dump_section,
|
||||
probe_db,
|
||||
} => cmd_exec(
|
||||
&path,
|
||||
max_instructions,
|
||||
@@ -407,6 +414,7 @@ fn main() -> Result<()> {
|
||||
branch_probe.as_deref(),
|
||||
mem_watch.as_deref(),
|
||||
dump_section.as_deref(),
|
||||
probe_db.as_deref(),
|
||||
),
|
||||
Commands::Browse { path } => cmd_browse(&path),
|
||||
Commands::Info { path } => cmd_info(&path),
|
||||
@@ -443,6 +451,28 @@ fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve where to look for `sylpheed.db` when probe tokens are symbolic.
|
||||
/// Precedence:
|
||||
/// 1. explicit `--probe-db` flag (cmd_arg)
|
||||
/// 2. `XENIA_PROBE_DB` env var
|
||||
/// 3. `sylpheed.db` next to the .iso path (if it exists)
|
||||
/// Returns `None` when no DB is available — resolve_probe_token will then
|
||||
/// only succeed for numeric tokens.
|
||||
fn resolve_probe_db_path(cmd_arg: Option<&str>, iso_path: &str) -> Option<std::path::PathBuf> {
|
||||
if let Some(p) = cmd_arg {
|
||||
return Some(std::path::PathBuf::from(p));
|
||||
}
|
||||
if let Ok(p) = std::env::var("XENIA_PROBE_DB") {
|
||||
if !p.is_empty() { return Some(std::path::PathBuf::from(p)); }
|
||||
}
|
||||
let iso = std::path::Path::new(iso_path);
|
||||
if let Some(parent) = iso.parent() {
|
||||
let candidate = parent.join("sylpheed.db");
|
||||
if candidate.exists() { return Some(candidate); }
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Load XEX data from a path. If the path is an ISO, extract default.xex from it.
|
||||
#[instrument(skip_all, fields(path = %path))]
|
||||
fn load_xex_data(path: &str) -> Result<Vec<u8>> {
|
||||
@@ -613,6 +643,7 @@ fn cmd_exec(
|
||||
branch_probe: Option<&str>,
|
||||
mem_watch: Option<&str>,
|
||||
dump_section: Option<&str>,
|
||||
probe_db: Option<&str>,
|
||||
) -> Result<()> {
|
||||
cmd_exec_inner(
|
||||
path,
|
||||
@@ -637,6 +668,7 @@ fn cmd_exec(
|
||||
branch_probe,
|
||||
mem_watch,
|
||||
dump_section,
|
||||
probe_db,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
@@ -680,6 +712,7 @@ fn cmd_check(
|
||||
None, // branch_probe — diagnostic, never wanted on goldens
|
||||
None, // mem_watch — same
|
||||
None, // dump_section — same
|
||||
None, // probe_db — same
|
||||
out,
|
||||
expect,
|
||||
stable_digest,
|
||||
@@ -709,6 +742,7 @@ fn cmd_exec_inner(
|
||||
branch_probe: Option<&str>,
|
||||
mem_watch: Option<&str>,
|
||||
dump_section: Option<&str>,
|
||||
probe_db: Option<&str>,
|
||||
digest_out: Option<&str>,
|
||||
digest_expect: Option<&str>,
|
||||
stable_digest: bool,
|
||||
@@ -958,11 +992,15 @@ fn cmd_exec_inner(
|
||||
}
|
||||
}
|
||||
|
||||
// Diagnostic. Parse `--ctor-probe=0x8217C850,0x...` (or
|
||||
// `XENIA_CTOR_PROBE=...`) into `kernel.ctor_probe_pcs`. The
|
||||
// worker prologue checks this set on every step; on a hit it
|
||||
// prints a single back-chain capture line. Empty set = no
|
||||
// probes = no-op fast path.
|
||||
// Diagnostic. Parse `--ctor-probe=0x8217C850,...` or symbolic forms like
|
||||
// `Class::method`, `Class::*`, `function_name` (or `XENIA_CTOR_PROBE=...`)
|
||||
// into `kernel.ctor_probe_pcs`. Symbolic resolution reads `--probe-db` /
|
||||
// `XENIA_PROBE_DB` (or the `sylpheed.db` next to the .iso when present).
|
||||
// Empty set = no probes = no-op fast path.
|
||||
//
|
||||
// Symbolic resolution happens BEFORE guest exec begins, so it cannot
|
||||
// affect the lockstep digest.
|
||||
let probe_db_path = resolve_probe_db_path(probe_db, path);
|
||||
let ctor_probe_combined: Option<String> = match (
|
||||
ctor_probe,
|
||||
std::env::var("XENIA_CTOR_PROBE").ok().or_else(|| std::env::var("XENIA_PC_PROBE").ok()),
|
||||
@@ -973,7 +1011,7 @@ fn cmd_exec_inner(
|
||||
};
|
||||
if let Some(list) = ctor_probe_combined {
|
||||
for token in list.split(',').map(str::trim).filter(|s| !s.is_empty()) {
|
||||
let (pc_str, consumer): (&str, Option<(u32, u32)>) = match token.split_once('@') {
|
||||
let (pc_part, consumer): (&str, Option<(u32, u32)>) = match token.split_once('@') {
|
||||
None => (token, None),
|
||||
Some((pc_part, rest)) => {
|
||||
let (disp_str, off_str) = rest.split_once(':').ok_or_else(|| {
|
||||
@@ -990,12 +1028,13 @@ fn cmd_exec_inner(
|
||||
(pc_part.trim(), Some((disp, off)))
|
||||
}
|
||||
};
|
||||
let pc = parse_hex_u32(pc_str).map_err(|e| {
|
||||
anyhow::anyhow!("invalid PC in --pc-probe: {token:?}: {e}")
|
||||
})?;
|
||||
kernel.ctor_probe_pcs.insert(pc);
|
||||
if let Some(c) = consumer {
|
||||
kernel.pc_probe_consumers.insert(pc, c);
|
||||
let pcs = xenia_analysis::lookup::resolve_probe_token(probe_db_path.as_deref(), pc_part)
|
||||
.map_err(|e| anyhow::anyhow!("--pc-probe {token:?}: {e}"))?;
|
||||
for pc in pcs {
|
||||
kernel.ctor_probe_pcs.insert(pc);
|
||||
if let Some(c) = consumer {
|
||||
kernel.pc_probe_consumers.insert(pc, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !quiet && !kernel.ctor_probe_pcs.is_empty() {
|
||||
@@ -1023,10 +1062,11 @@ fn cmd_exec_inner(
|
||||
};
|
||||
if let Some(list) = branch_probe_combined {
|
||||
for token in list.split(',').map(str::trim).filter(|s| !s.is_empty()) {
|
||||
let pc = parse_hex_u32(token).map_err(|e| {
|
||||
anyhow::anyhow!("invalid PC in --branch-probe: {token:?}: {e}")
|
||||
})?;
|
||||
kernel.branch_probe_pcs.insert(pc);
|
||||
let pcs = xenia_analysis::lookup::resolve_probe_token(probe_db_path.as_deref(), token)
|
||||
.map_err(|e| anyhow::anyhow!("--branch-probe {token:?}: {e}"))?;
|
||||
for pc in pcs {
|
||||
kernel.branch_probe_pcs.insert(pc);
|
||||
}
|
||||
}
|
||||
if !quiet && !kernel.branch_probe_pcs.is_empty() {
|
||||
let mut pcs: Vec<u32> = kernel.branch_probe_pcs.iter().copied().collect();
|
||||
|
||||
Reference in New Issue
Block a user