M3: vtable scan + MSVC RTTI walk + 3 new tables
Adds detection of statically-allocated MSVC vtables in .rdata/.data: - New `xenia_analysis::vtables` walks read-only sections looking for runs of ≥3 contiguous big-endian u32 values where each value lands on a known function start (from M1's corrected functions table). 2-slot runs are rejected to keep false-positive rate down. - For each candidate the MSVC RTTI walk vtable[-1] → CompleteObjectLocator → TypeDescriptor → mangled name is attempted; on success the demangled class name is recorded along with a best-effort RTTIClassHierarchyDescriptor walk to fill base_classes_json. On failure (RTTI stripped — common for shipped game binaries) the class is named ANON_Class_<fnv1a-hash> keyed by sorted method-PC list, so identical vtables collapse to one entry. - DB: new tables `vtables`, `methods`, `classes` with indices on function_address and rtti_present. `write_analysis_results` takes a `&[Vtable]` slice; `write_disasm` (back-compat) passes empty. - cmd_dis wires the scan after xref analysis using `func_analysis.functions.keys()` as the function-start oracle. Validation on Sylpheed (RTTI stripped, as expected): 722 vtables / 499 unique classes / 5571 methods. Sanity invariant: every methods.function_address joins to functions.address (0 broken refs). Largest vtable: 131 slots. Tests 617→621 (+4 vtable unit tests covering 3-slot detect, 2-slot reject, synth name stability, and synth name divergence). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -303,6 +303,9 @@ impl DbWriter {
|
||||
/// (`functions`, `labels`, `xrefs`) and their indices. Always executes
|
||||
/// in `--analyze=rust` and `--analyze=both` modes; skipped only when
|
||||
/// the caller deliberately chooses a Rust-free DB layout.
|
||||
///
|
||||
/// `vtables` is the M3 result; pass an empty slice when the caller has
|
||||
/// not run the vtable scan (the tables are still created, just empty).
|
||||
#[tracing::instrument(skip_all, name = "db.write_analysis_results")]
|
||||
pub fn write_analysis_results(
|
||||
&mut self,
|
||||
@@ -311,6 +314,7 @@ impl DbWriter {
|
||||
func_analysis: &FuncAnalysis,
|
||||
labels: &HashMap<u32, String>,
|
||||
xrefs: &XrefMap,
|
||||
vtables: &[crate::vtables::Vtable],
|
||||
) -> anyhow::Result<()> {
|
||||
self.conn.execute_batch("
|
||||
CREATE TABLE functions (
|
||||
@@ -339,6 +343,31 @@ impl DbWriter {
|
||||
kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other
|
||||
);
|
||||
|
||||
CREATE TABLE vtables (
|
||||
address BIGINT PRIMARY KEY, -- absolute VA of vtable[0]
|
||||
length BIGINT NOT NULL, -- number of method slots
|
||||
col_address BIGINT, -- VA of CompleteObjectLocator (NULL when no RTTI)
|
||||
class_name VARCHAR NOT NULL, -- demangled class name OR ANON_Class_<hash> when stripped
|
||||
rtti_present BOOLEAN NOT NULL, -- true when COL → TypeDescriptor walk succeeded
|
||||
base_classes_json VARCHAR -- JSON array of base class names (NULL if none / parse failure)
|
||||
);
|
||||
|
||||
CREATE TABLE methods (
|
||||
vtable_address BIGINT NOT NULL, -- vtable this slot belongs to
|
||||
slot BIGINT NOT NULL, -- 0-based slot index
|
||||
function_address BIGINT NOT NULL, -- VA of the function this slot points at
|
||||
mangled_name VARCHAR, -- raw label name when mangled (?...)
|
||||
demangled_name VARCHAR, -- LLVM-style demangled output
|
||||
PRIMARY KEY (vtable_address, slot)
|
||||
);
|
||||
|
||||
CREATE TABLE classes (
|
||||
name VARCHAR PRIMARY KEY, -- class name (demangled or ANON_*)
|
||||
vtable_address BIGINT NOT NULL, -- representative vtable (first detected)
|
||||
rtti_present BOOLEAN NOT NULL,
|
||||
base_classes_json VARCHAR -- JSON of base class names (NULL when stripped)
|
||||
);
|
||||
|
||||
CREATE TABLE demangled_names (
|
||||
address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string)
|
||||
mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ)
|
||||
@@ -364,6 +393,8 @@ impl DbWriter {
|
||||
insert_pdata_entries(&self.conn, &func_analysis.pdata_entries)?;
|
||||
insert_labels(&self.conn, labels)?;
|
||||
insert_demangled_from_labels(&self.conn, labels, info.import_libraries)?;
|
||||
insert_vtables(&self.conn, vtables, pe, info.image_base)?;
|
||||
insert_methods_and_classes(&self.conn, vtables, labels)?;
|
||||
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
|
||||
|
||||
let indices = [
|
||||
@@ -374,6 +405,8 @@ impl DbWriter {
|
||||
("idx_demangled_address", "CREATE INDEX idx_demangled_address ON demangled_names(address)"),
|
||||
("idx_demangled_class", "CREATE INDEX idx_demangled_class ON demangled_names(class_name)"),
|
||||
("idx_demangled_method", "CREATE INDEX idx_demangled_method ON demangled_names(method_name)"),
|
||||
("idx_methods_function", "CREATE INDEX idx_methods_function ON methods(function_address)"),
|
||||
("idx_classes_rtti", "CREATE INDEX idx_classes_rtti ON classes(rtti_present)"),
|
||||
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
|
||||
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
|
||||
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
|
||||
@@ -390,7 +423,7 @@ impl DbWriter {
|
||||
|
||||
/// Back-compat wrapper for callers that want the full pre-Phase-3
|
||||
/// "everything in one shot" behaviour. Equivalent to
|
||||
/// `ingest_instructions` + `write_analysis_results`.
|
||||
/// `ingest_instructions` + `write_analysis_results` with no M3 vtables.
|
||||
#[tracing::instrument(skip_all, name = "db.write_disasm")]
|
||||
pub fn write_disasm(
|
||||
&mut self,
|
||||
@@ -401,7 +434,7 @@ impl DbWriter {
|
||||
xrefs: &XrefMap,
|
||||
) -> anyhow::Result<()> {
|
||||
self.ingest_instructions(pe, info, func_analysis, labels)?;
|
||||
self.write_analysis_results(pe, info, func_analysis, labels, xrefs)?;
|
||||
self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -730,6 +763,89 @@ fn insert_functions(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_vtables(
|
||||
conn: &Connection,
|
||||
vtables: &[crate::vtables::Vtable],
|
||||
_pe: &[u8],
|
||||
_image_base: u32,
|
||||
) -> anyhow::Result<()> {
|
||||
if vtables.is_empty() { return Ok(()); }
|
||||
let mut stmt = conn.prepare(
|
||||
"INSERT INTO vtables
|
||||
(address, length, col_address, class_name, rtti_present, base_classes_json)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT DO NOTHING"
|
||||
)?;
|
||||
let mut count = 0u64;
|
||||
for v in vtables {
|
||||
stmt.execute(params![
|
||||
v.address as i64,
|
||||
v.length as i64,
|
||||
v.col_address.map(|a| a as i64),
|
||||
v.class_name.as_str(),
|
||||
v.rtti_present,
|
||||
v.base_classes_json.as_deref(),
|
||||
])?;
|
||||
count += 1;
|
||||
}
|
||||
metrics::counter!("db.rows", "table" => "vtables").increment(count);
|
||||
tracing::info!(rows = count, table = "vtables", "bulk insert complete");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_methods_and_classes(
|
||||
conn: &Connection,
|
||||
vtables: &[crate::vtables::Vtable],
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> anyhow::Result<()> {
|
||||
if vtables.is_empty() { return Ok(()); }
|
||||
|
||||
// methods rows
|
||||
let methods = crate::vtables::methods_table(vtables, labels);
|
||||
if !methods.is_empty() {
|
||||
let mut stmt = conn.prepare(
|
||||
"INSERT INTO methods
|
||||
(vtable_address, slot, function_address, mangled_name, demangled_name)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT DO NOTHING"
|
||||
)?;
|
||||
for (vt_addr, slot, fn_addr, mangled, demangled) in &methods {
|
||||
stmt.execute(params![
|
||||
*vt_addr as i64,
|
||||
*slot as i64,
|
||||
*fn_addr as i64,
|
||||
mangled.as_deref(),
|
||||
demangled.as_deref(),
|
||||
])?;
|
||||
}
|
||||
metrics::counter!("db.rows", "table" => "methods").increment(methods.len() as u64);
|
||||
tracing::info!(rows = methods.len(), table = "methods", "bulk insert complete");
|
||||
}
|
||||
|
||||
// classes rows (deduped by class_name, first-detected wins)
|
||||
let classes = crate::vtables::classes_table(vtables);
|
||||
if !classes.is_empty() {
|
||||
let mut stmt = conn.prepare(
|
||||
"INSERT INTO classes
|
||||
(name, vtable_address, rtti_present, base_classes_json)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT DO NOTHING"
|
||||
)?;
|
||||
for (name, vt_addr, rtti, bases) in &classes {
|
||||
stmt.execute(params![
|
||||
name.as_str(),
|
||||
*vt_addr as i64,
|
||||
*rtti,
|
||||
bases.as_deref(),
|
||||
])?;
|
||||
}
|
||||
metrics::counter!("db.rows", "table" => "classes").increment(classes.len() as u64);
|
||||
tracing::info!(rows = classes.len(), table = "classes", "bulk insert complete");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_demangled_from_labels(
|
||||
conn: &Connection,
|
||||
labels: &HashMap<u32, String>,
|
||||
|
||||
Reference in New Issue
Block a user