M2: MSVC C++ demangler + demangled_names DB table

Adds an MSVC name-demangling layer in front of M3's vtable / RTTI work:
- New `xenia_analysis::demangle` wraps the `msvc-demangler` crate (a Rust
  port of LLVM's `MicrosoftDemangle.cpp`). `demangle()` short-circuits on
  non-mangled inputs (`?` prefix check); `demangle_or_raw()` always returns
  a record (raw passthrough on parse failure).
- Heuristic split of the formatted demangled string into structured fields
  `(namespace_path, class_name, method_name, params_signature)`. Top-level
  paren / template-bracket aware, so `a::b<c::d>::e` and signatures with
  templated arg types parse correctly.
- DB: new `demangled_names(address, mangled, raw_demangled, namespace_path,
  class_name, method_name, params_signature)` with indices on address /
  class_name / method_name. Populated from any label whose name starts with
  `?` plus any import name that happens to be mangled.

For Sylpheed (a fully stripped binary) this table is empty out-of-the-box;
the layer's value lands in M3, which will append rows for every RTTI
TypeDescriptor name found in `.rdata`.

Tests 610→617 (+7 demangler unit tests covering early-out, raw fallback,
member function form, RTTI form, qname split, paren-template safety, and
top-level `::` splitting).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-08 20:02:21 +02:00
parent fd68285210
commit 89f5f7e4a9
7 changed files with 405 additions and 6 deletions

View File

@@ -339,6 +339,16 @@ impl DbWriter {
kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other
);
CREATE TABLE demangled_names (
address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string)
mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ)
raw_demangled VARCHAR NOT NULL, -- LLVM-style demangled output (or mangled string on parse failure)
namespace_path VARCHAR, -- e.g. xe::apu (NULL = global / parser failure)
class_name VARCHAR, -- e.g. AudioSystem (NULL = free function / parser failure)
method_name VARCHAR, -- e.g. Setup (NULL on parser failure)
params_signature VARCHAR -- contents of the outermost (...) (NULL = not a function)
);
CREATE TABLE xrefs (
source BIGINT NOT NULL, -- VA of the referencing instruction
target BIGINT NOT NULL, -- VA of the referenced destination
@@ -353,6 +363,7 @@ impl DbWriter {
insert_functions(&self.conn, func_analysis, labels)?;
insert_pdata_entries(&self.conn, &func_analysis.pdata_entries)?;
insert_labels(&self.conn, labels)?;
insert_demangled_from_labels(&self.conn, labels, info.import_libraries)?;
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
let indices = [
@@ -360,6 +371,9 @@ impl DbWriter {
("idx_functions_pdata_validated", "CREATE INDEX idx_functions_pdata_validated ON functions(pdata_validated)"),
("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"),
("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"),
("idx_demangled_address", "CREATE INDEX idx_demangled_address ON demangled_names(address)"),
("idx_demangled_class", "CREATE INDEX idx_demangled_class ON demangled_names(class_name)"),
("idx_demangled_method", "CREATE INDEX idx_demangled_method ON demangled_names(method_name)"),
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
@@ -716,6 +730,69 @@ fn insert_functions(
Ok(())
}
fn insert_demangled_from_labels(
conn: &Connection,
labels: &HashMap<u32, String>,
import_libraries: &[xenia_xex::header::ImportLibrary],
) -> anyhow::Result<()> {
let mut stmt = conn.prepare(
"INSERT INTO demangled_names
(address, mangled, raw_demangled, namespace_path, class_name,
method_name, params_signature)
VALUES (?, ?, ?, ?, ?, ?, ?)"
)?;
let mut count = 0u64;
for (&addr, name) in labels {
// The label table holds raw symbol names (`?...@...`). Imports come
// wrapped as `__imp_<lib>_<sym>`; strip the `__imp_<lib>_` prefix to
// recover any mangled inner name (rare for kernel imports but
// defensive). For now, skip imports entirely — they're handled below
// via `import_libraries`.
if name.starts_with("__imp_") {
continue;
}
if let Some(d) = crate::demangle::demangle(name) {
stmt.execute(params![
addr as i64,
d.mangled,
d.raw_demangled,
d.namespace_path,
d.class_name,
d.method_name,
d.params_signature,
])?;
count += 1;
}
}
// Defensive: also demangle any import name that happens to be mangled.
for lib in import_libraries {
for imp in &lib.imports {
let resolved = crate::resolve_ordinal(&lib.name, imp.ordinal);
if let Some(name) = resolved
&& let Some(d) = crate::demangle::demangle(name)
{
stmt.execute(params![
imp.address as i64,
d.mangled,
d.raw_demangled,
d.namespace_path,
d.class_name,
d.method_name,
d.params_signature,
])?;
count += 1;
}
}
}
metrics::counter!("db.rows", "table" => "demangled_names").increment(count);
tracing::info!(rows = count, table = "demangled_names", "demangler complete");
Ok(())
}
fn insert_pdata_entries(
conn: &Connection,
entries: &[xenia_xex::pdata::PdataEntry],