M2: MSVC C++ demangler + demangled_names DB table
Adds an MSVC name-demangling layer in front of M3's vtable / RTTI work: - New `xenia_analysis::demangle` wraps the `msvc-demangler` crate (a Rust port of LLVM's `MicrosoftDemangle.cpp`). `demangle()` short-circuits on non-mangled inputs (`?` prefix check); `demangle_or_raw()` always returns a record (raw passthrough on parse failure). - Heuristic split of the formatted demangled string into structured fields `(namespace_path, class_name, method_name, params_signature)`. Top-level paren / template-bracket aware, so `a::b<c::d>::e` and signatures with templated arg types parse correctly. - DB: new `demangled_names(address, mangled, raw_demangled, namespace_path, class_name, method_name, params_signature)` with indices on address / class_name / method_name. Populated from any label whose name starts with `?` plus any import name that happens to be mangled. For Sylpheed (a fully stripped binary) this table is empty out-of-the-box; the layer's value lands in M3, which will append rows for every RTTI TypeDescriptor name found in `.rdata`. Tests 610→617 (+7 demangler unit tests covering early-out, raw fallback, member function form, RTTI form, qname split, paren-template safety, and top-level `::` splitting). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -339,6 +339,16 @@ impl DbWriter {
|
||||
kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other
|
||||
);
|
||||
|
||||
CREATE TABLE demangled_names (
|
||||
address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string)
|
||||
mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ)
|
||||
raw_demangled VARCHAR NOT NULL, -- LLVM-style demangled output (or mangled string on parse failure)
|
||||
namespace_path VARCHAR, -- e.g. xe::apu (NULL = global / parser failure)
|
||||
class_name VARCHAR, -- e.g. AudioSystem (NULL = free function / parser failure)
|
||||
method_name VARCHAR, -- e.g. Setup (NULL on parser failure)
|
||||
params_signature VARCHAR -- contents of the outermost (...) (NULL = not a function)
|
||||
);
|
||||
|
||||
CREATE TABLE xrefs (
|
||||
source BIGINT NOT NULL, -- VA of the referencing instruction
|
||||
target BIGINT NOT NULL, -- VA of the referenced destination
|
||||
@@ -353,6 +363,7 @@ impl DbWriter {
|
||||
insert_functions(&self.conn, func_analysis, labels)?;
|
||||
insert_pdata_entries(&self.conn, &func_analysis.pdata_entries)?;
|
||||
insert_labels(&self.conn, labels)?;
|
||||
insert_demangled_from_labels(&self.conn, labels, info.import_libraries)?;
|
||||
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
|
||||
|
||||
let indices = [
|
||||
@@ -360,6 +371,9 @@ impl DbWriter {
|
||||
("idx_functions_pdata_validated", "CREATE INDEX idx_functions_pdata_validated ON functions(pdata_validated)"),
|
||||
("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"),
|
||||
("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"),
|
||||
("idx_demangled_address", "CREATE INDEX idx_demangled_address ON demangled_names(address)"),
|
||||
("idx_demangled_class", "CREATE INDEX idx_demangled_class ON demangled_names(class_name)"),
|
||||
("idx_demangled_method", "CREATE INDEX idx_demangled_method ON demangled_names(method_name)"),
|
||||
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
|
||||
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
|
||||
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
|
||||
@@ -716,6 +730,69 @@ fn insert_functions(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_demangled_from_labels(
|
||||
conn: &Connection,
|
||||
labels: &HashMap<u32, String>,
|
||||
import_libraries: &[xenia_xex::header::ImportLibrary],
|
||||
) -> anyhow::Result<()> {
|
||||
let mut stmt = conn.prepare(
|
||||
"INSERT INTO demangled_names
|
||||
(address, mangled, raw_demangled, namespace_path, class_name,
|
||||
method_name, params_signature)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)"
|
||||
)?;
|
||||
|
||||
let mut count = 0u64;
|
||||
|
||||
for (&addr, name) in labels {
|
||||
// The label table holds raw symbol names (`?...@...`). Imports come
|
||||
// wrapped as `__imp_<lib>_<sym>`; strip the `__imp_<lib>_` prefix to
|
||||
// recover any mangled inner name (rare for kernel imports but
|
||||
// defensive). For now, skip imports entirely — they're handled below
|
||||
// via `import_libraries`.
|
||||
if name.starts_with("__imp_") {
|
||||
continue;
|
||||
}
|
||||
if let Some(d) = crate::demangle::demangle(name) {
|
||||
stmt.execute(params![
|
||||
addr as i64,
|
||||
d.mangled,
|
||||
d.raw_demangled,
|
||||
d.namespace_path,
|
||||
d.class_name,
|
||||
d.method_name,
|
||||
d.params_signature,
|
||||
])?;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Defensive: also demangle any import name that happens to be mangled.
|
||||
for lib in import_libraries {
|
||||
for imp in &lib.imports {
|
||||
let resolved = crate::resolve_ordinal(&lib.name, imp.ordinal);
|
||||
if let Some(name) = resolved
|
||||
&& let Some(d) = crate::demangle::demangle(name)
|
||||
{
|
||||
stmt.execute(params![
|
||||
imp.address as i64,
|
||||
d.mangled,
|
||||
d.raw_demangled,
|
||||
d.namespace_path,
|
||||
d.class_name,
|
||||
d.method_name,
|
||||
d.params_signature,
|
||||
])?;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metrics::counter!("db.rows", "table" => "demangled_names").increment(count);
|
||||
tracing::info!(rows = count, table = "demangled_names", "demangler complete");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_pdata_entries(
|
||||
conn: &Connection,
|
||||
entries: &[xenia_xex::pdata::PdataEntry],
|
||||
|
||||
Reference in New Issue
Block a user