M3: vtable scan + MSVC RTTI walk + 3 new tables

Adds detection of statically-allocated MSVC vtables in .rdata/.data:
- New `xenia_analysis::vtables` walks read-only sections looking for runs of
  ≥3 contiguous big-endian u32 values where each value lands on a known
  function start (from M1's corrected functions table). 2-slot runs are
  rejected to keep false-positive rate down.
- For each candidate the MSVC RTTI walk vtable[-1] → CompleteObjectLocator
  → TypeDescriptor → mangled name is attempted; on success the demangled
  class name is recorded along with a best-effort RTTIClassHierarchyDescriptor
  walk to fill base_classes_json. On failure (RTTI stripped — common for
  shipped game binaries) the class is named ANON_Class_<fnv1a-hash> keyed
  by sorted method-PC list, so identical vtables collapse to one entry.
- DB: new tables `vtables`, `methods`, `classes` with indices on
  function_address and rtti_present. `write_analysis_results` takes a
  `&[Vtable]` slice; `write_disasm` (back-compat) passes empty.
- cmd_dis wires the scan after xref analysis using
  `func_analysis.functions.keys()` as the function-start oracle.

Validation on Sylpheed (RTTI stripped, as expected): 722 vtables / 499
unique classes / 5571 methods. Sanity invariant: every methods.function_address
joins to functions.address (0 broken refs). Largest vtable: 131 slots.

Tests 617→621 (+4 vtable unit tests covering 3-slot detect, 2-slot reject,
synth name stability, and synth name divergence).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-08 20:17:45 +02:00
parent bd5753311e
commit 1d6c51fbf8
6 changed files with 620 additions and 8 deletions

View File

@@ -303,6 +303,9 @@ impl DbWriter {
/// (`functions`, `labels`, `xrefs`) and their indices. Always executes
/// in `--analyze=rust` and `--analyze=both` modes; skipped only when
/// the caller deliberately chooses a Rust-free DB layout.
///
/// `vtables` is the M3 result; pass an empty slice when the caller has
/// not run the vtable scan (the tables are still created, just empty).
#[tracing::instrument(skip_all, name = "db.write_analysis_results")]
pub fn write_analysis_results(
&mut self,
@@ -311,6 +314,7 @@ impl DbWriter {
func_analysis: &FuncAnalysis,
labels: &HashMap<u32, String>,
xrefs: &XrefMap,
vtables: &[crate::vtables::Vtable],
) -> anyhow::Result<()> {
self.conn.execute_batch("
CREATE TABLE functions (
@@ -339,6 +343,31 @@ impl DbWriter {
kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other
);
CREATE TABLE vtables (
address BIGINT PRIMARY KEY, -- absolute VA of vtable[0]
length BIGINT NOT NULL, -- number of method slots
col_address BIGINT, -- VA of CompleteObjectLocator (NULL when no RTTI)
class_name VARCHAR NOT NULL, -- demangled class name OR ANON_Class_<hash> when stripped
rtti_present BOOLEAN NOT NULL, -- true when COL → TypeDescriptor walk succeeded
base_classes_json VARCHAR -- JSON array of base class names (NULL if none / parse failure)
);
CREATE TABLE methods (
vtable_address BIGINT NOT NULL, -- vtable this slot belongs to
slot BIGINT NOT NULL, -- 0-based slot index
function_address BIGINT NOT NULL, -- VA of the function this slot points at
mangled_name VARCHAR, -- raw label name when mangled (?...)
demangled_name VARCHAR, -- LLVM-style demangled output
PRIMARY KEY (vtable_address, slot)
);
CREATE TABLE classes (
name VARCHAR PRIMARY KEY, -- class name (demangled or ANON_*)
vtable_address BIGINT NOT NULL, -- representative vtable (first detected)
rtti_present BOOLEAN NOT NULL,
base_classes_json VARCHAR -- JSON of base class names (NULL when stripped)
);
CREATE TABLE demangled_names (
address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string)
mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ)
@@ -364,6 +393,8 @@ impl DbWriter {
insert_pdata_entries(&self.conn, &func_analysis.pdata_entries)?;
insert_labels(&self.conn, labels)?;
insert_demangled_from_labels(&self.conn, labels, info.import_libraries)?;
insert_vtables(&self.conn, vtables, pe, info.image_base)?;
insert_methods_and_classes(&self.conn, vtables, labels)?;
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
let indices = [
@@ -374,6 +405,8 @@ impl DbWriter {
("idx_demangled_address", "CREATE INDEX idx_demangled_address ON demangled_names(address)"),
("idx_demangled_class", "CREATE INDEX idx_demangled_class ON demangled_names(class_name)"),
("idx_demangled_method", "CREATE INDEX idx_demangled_method ON demangled_names(method_name)"),
("idx_methods_function", "CREATE INDEX idx_methods_function ON methods(function_address)"),
("idx_classes_rtti", "CREATE INDEX idx_classes_rtti ON classes(rtti_present)"),
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
@@ -390,7 +423,7 @@ impl DbWriter {
/// Back-compat wrapper for callers that want the full pre-Phase-3
/// "everything in one shot" behaviour. Equivalent to
/// `ingest_instructions` + `write_analysis_results`.
/// `ingest_instructions` + `write_analysis_results` with no M3 vtables.
#[tracing::instrument(skip_all, name = "db.write_disasm")]
pub fn write_disasm(
&mut self,
@@ -401,7 +434,7 @@ impl DbWriter {
xrefs: &XrefMap,
) -> anyhow::Result<()> {
self.ingest_instructions(pe, info, func_analysis, labels)?;
self.write_analysis_results(pe, info, func_analysis, labels, xrefs)?;
self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[])?;
Ok(())
}
@@ -730,6 +763,89 @@ fn insert_functions(
Ok(())
}
fn insert_vtables(
conn: &Connection,
vtables: &[crate::vtables::Vtable],
_pe: &[u8],
_image_base: u32,
) -> anyhow::Result<()> {
if vtables.is_empty() { return Ok(()); }
let mut stmt = conn.prepare(
"INSERT INTO vtables
(address, length, col_address, class_name, rtti_present, base_classes_json)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT DO NOTHING"
)?;
let mut count = 0u64;
for v in vtables {
stmt.execute(params![
v.address as i64,
v.length as i64,
v.col_address.map(|a| a as i64),
v.class_name.as_str(),
v.rtti_present,
v.base_classes_json.as_deref(),
])?;
count += 1;
}
metrics::counter!("db.rows", "table" => "vtables").increment(count);
tracing::info!(rows = count, table = "vtables", "bulk insert complete");
Ok(())
}
fn insert_methods_and_classes(
conn: &Connection,
vtables: &[crate::vtables::Vtable],
labels: &HashMap<u32, String>,
) -> anyhow::Result<()> {
if vtables.is_empty() { return Ok(()); }
// methods rows
let methods = crate::vtables::methods_table(vtables, labels);
if !methods.is_empty() {
let mut stmt = conn.prepare(
"INSERT INTO methods
(vtable_address, slot, function_address, mangled_name, demangled_name)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT DO NOTHING"
)?;
for (vt_addr, slot, fn_addr, mangled, demangled) in &methods {
stmt.execute(params![
*vt_addr as i64,
*slot as i64,
*fn_addr as i64,
mangled.as_deref(),
demangled.as_deref(),
])?;
}
metrics::counter!("db.rows", "table" => "methods").increment(methods.len() as u64);
tracing::info!(rows = methods.len(), table = "methods", "bulk insert complete");
}
// classes rows (deduped by class_name, first-detected wins)
let classes = crate::vtables::classes_table(vtables);
if !classes.is_empty() {
let mut stmt = conn.prepare(
"INSERT INTO classes
(name, vtable_address, rtti_present, base_classes_json)
VALUES (?, ?, ?, ?)
ON CONFLICT DO NOTHING"
)?;
for (name, vt_addr, rtti, bases) in &classes {
stmt.execute(params![
name.as_str(),
*vt_addr as i64,
*rtti,
bases.as_deref(),
])?;
}
metrics::counter!("db.rows", "table" => "classes").increment(classes.len() as u64);
tracing::info!(rows = classes.len(), table = "classes", "bulk insert complete");
}
Ok(())
}
fn insert_demangled_from_labels(
conn: &Connection,
labels: &HashMap<u32, String>,