Merge analysis-overhaul/m3-vtables-rtti
This commit is contained in:
@@ -102,12 +102,45 @@ SELECT name FROM functions WHERE address = 2186674160; -- 0x824D29F0
|
|||||||
- `msvc-demangler` crate (`https://docs.rs/msvc-demangler/0.11`).
|
- `msvc-demangler` crate (`https://docs.rs/msvc-demangler/0.11`).
|
||||||
- LLVM `MicrosoftDemangle.cpp` (the parser this crate ports).
|
- LLVM `MicrosoftDemangle.cpp` (the parser this crate ports).
|
||||||
|
|
||||||
## Layer M3 — Vtable + RTTI detection (planned)
|
## Layer M3 — Vtable + RTTI detection (landed)
|
||||||
|
|
||||||
Adds `vtables`, `methods`, `classes` tables. Heuristic vtable scan over
|
### Schema additions
|
||||||
`.rdata` + `.data`, optional MSVC RTTI `CompleteObjectLocator → TypeDescriptor`
|
- `vtables(address PK, length, col_address NULL, class_name, rtti_present,
|
||||||
walk, anonymous-class fallback when RTTI is stripped. See
|
base_classes_json NULL)` — every detected static vtable.
|
||||||
`crates/xenia-analysis/src/vtables.rs` (when landed).
|
- `methods(vtable_address, slot, function_address, mangled_name NULL,
|
||||||
|
demangled_name NULL, PRIMARY KEY (vtable_address, slot))` — one row per
|
||||||
|
method slot.
|
||||||
|
- `classes(name PK, vtable_address, rtti_present, base_classes_json NULL)` —
|
||||||
|
deduped by class name (first-detected vtable wins).
|
||||||
|
- Indices: `methods.function_address`, `classes.rtti_present`.
|
||||||
|
|
||||||
|
### What this layer does
|
||||||
|
- Walks `.rdata` and `.data` looking for runs of ≥3 consecutive 4-byte BE
|
||||||
|
values where each value is a known function start (from M1's corrected
|
||||||
|
`functions` table). Single-2-method vtables are intentionally rejected to
|
||||||
|
control false-positive rate.
|
||||||
|
- Attempts the MSVC RTTI walk `vtable[-1] → CompleteObjectLocator → TypeDescriptor`
|
||||||
|
for each candidate. When successful, the demangled `class ClassName`
|
||||||
|
string fills `class_name` and a best-effort
|
||||||
|
`RTTIClassHierarchyDescriptor` walk fills `base_classes_json` (JSON array
|
||||||
|
of base class names).
|
||||||
|
- Falls back to `ANON_Class_<8-hex>` keyed by FNV-1a hash of the sorted
|
||||||
|
method-PC tuple when RTTI is absent (typical for shipped game binaries).
|
||||||
|
Identical vtables across the binary (multiple instances) collapse to the
|
||||||
|
same anonymous name.
|
||||||
|
|
||||||
|
### What this layer does NOT do
|
||||||
|
- Vtables built at runtime in heap-allocated memory (e.g. by ctors copying
|
||||||
|
static templates) are out of scope — only static `.rdata`/`.data` content.
|
||||||
|
- Multiple-inheritance "extra" vftables (one per base subobject) are detected
|
||||||
|
as independent vtables with no link between them.
|
||||||
|
- Inheritance-tree walking beyond `RTTIClassHierarchyDescriptor`'s direct
|
||||||
|
base list is not attempted.
|
||||||
|
|
||||||
|
### Reference docs
|
||||||
|
- openrce.org "Reversing Microsoft Visual C++" — RTTI layout articles
|
||||||
|
(CompleteObjectLocator at vtable[-1]; TypeDescriptor at COL+0xC; mangled
|
||||||
|
name at TD+0x8).
|
||||||
|
|
||||||
## Layer M4 — Class-aware probe targeting (planned)
|
## Layer M4 — Class-aware probe targeting (planned)
|
||||||
|
|
||||||
|
|||||||
@@ -303,6 +303,9 @@ impl DbWriter {
|
|||||||
/// (`functions`, `labels`, `xrefs`) and their indices. Always executes
|
/// (`functions`, `labels`, `xrefs`) and their indices. Always executes
|
||||||
/// in `--analyze=rust` and `--analyze=both` modes; skipped only when
|
/// in `--analyze=rust` and `--analyze=both` modes; skipped only when
|
||||||
/// the caller deliberately chooses a Rust-free DB layout.
|
/// the caller deliberately chooses a Rust-free DB layout.
|
||||||
|
///
|
||||||
|
/// `vtables` is the M3 result; pass an empty slice when the caller has
|
||||||
|
/// not run the vtable scan (the tables are still created, just empty).
|
||||||
#[tracing::instrument(skip_all, name = "db.write_analysis_results")]
|
#[tracing::instrument(skip_all, name = "db.write_analysis_results")]
|
||||||
pub fn write_analysis_results(
|
pub fn write_analysis_results(
|
||||||
&mut self,
|
&mut self,
|
||||||
@@ -311,6 +314,7 @@ impl DbWriter {
|
|||||||
func_analysis: &FuncAnalysis,
|
func_analysis: &FuncAnalysis,
|
||||||
labels: &HashMap<u32, String>,
|
labels: &HashMap<u32, String>,
|
||||||
xrefs: &XrefMap,
|
xrefs: &XrefMap,
|
||||||
|
vtables: &[crate::vtables::Vtable],
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
self.conn.execute_batch("
|
self.conn.execute_batch("
|
||||||
CREATE TABLE functions (
|
CREATE TABLE functions (
|
||||||
@@ -339,6 +343,31 @@ impl DbWriter {
|
|||||||
kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other
|
kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other
|
||||||
);
|
);
|
||||||
|
|
||||||
|
CREATE TABLE vtables (
|
||||||
|
address BIGINT PRIMARY KEY, -- absolute VA of vtable[0]
|
||||||
|
length BIGINT NOT NULL, -- number of method slots
|
||||||
|
col_address BIGINT, -- VA of CompleteObjectLocator (NULL when no RTTI)
|
||||||
|
class_name VARCHAR NOT NULL, -- demangled class name OR ANON_Class_<hash> when stripped
|
||||||
|
rtti_present BOOLEAN NOT NULL, -- true when COL → TypeDescriptor walk succeeded
|
||||||
|
base_classes_json VARCHAR -- JSON array of base class names (NULL if none / parse failure)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE methods (
|
||||||
|
vtable_address BIGINT NOT NULL, -- vtable this slot belongs to
|
||||||
|
slot BIGINT NOT NULL, -- 0-based slot index
|
||||||
|
function_address BIGINT NOT NULL, -- VA of the function this slot points at
|
||||||
|
mangled_name VARCHAR, -- raw label name when mangled (?...)
|
||||||
|
demangled_name VARCHAR, -- LLVM-style demangled output
|
||||||
|
PRIMARY KEY (vtable_address, slot)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE classes (
|
||||||
|
name VARCHAR PRIMARY KEY, -- class name (demangled or ANON_*)
|
||||||
|
vtable_address BIGINT NOT NULL, -- representative vtable (first detected)
|
||||||
|
rtti_present BOOLEAN NOT NULL,
|
||||||
|
base_classes_json VARCHAR -- JSON of base class names (NULL when stripped)
|
||||||
|
);
|
||||||
|
|
||||||
CREATE TABLE demangled_names (
|
CREATE TABLE demangled_names (
|
||||||
address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string)
|
address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string)
|
||||||
mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ)
|
mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ)
|
||||||
@@ -364,6 +393,8 @@ impl DbWriter {
|
|||||||
insert_pdata_entries(&self.conn, &func_analysis.pdata_entries)?;
|
insert_pdata_entries(&self.conn, &func_analysis.pdata_entries)?;
|
||||||
insert_labels(&self.conn, labels)?;
|
insert_labels(&self.conn, labels)?;
|
||||||
insert_demangled_from_labels(&self.conn, labels, info.import_libraries)?;
|
insert_demangled_from_labels(&self.conn, labels, info.import_libraries)?;
|
||||||
|
insert_vtables(&self.conn, vtables, pe, info.image_base)?;
|
||||||
|
insert_methods_and_classes(&self.conn, vtables, labels)?;
|
||||||
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
|
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
|
||||||
|
|
||||||
let indices = [
|
let indices = [
|
||||||
@@ -374,6 +405,8 @@ impl DbWriter {
|
|||||||
("idx_demangled_address", "CREATE INDEX idx_demangled_address ON demangled_names(address)"),
|
("idx_demangled_address", "CREATE INDEX idx_demangled_address ON demangled_names(address)"),
|
||||||
("idx_demangled_class", "CREATE INDEX idx_demangled_class ON demangled_names(class_name)"),
|
("idx_demangled_class", "CREATE INDEX idx_demangled_class ON demangled_names(class_name)"),
|
||||||
("idx_demangled_method", "CREATE INDEX idx_demangled_method ON demangled_names(method_name)"),
|
("idx_demangled_method", "CREATE INDEX idx_demangled_method ON demangled_names(method_name)"),
|
||||||
|
("idx_methods_function", "CREATE INDEX idx_methods_function ON methods(function_address)"),
|
||||||
|
("idx_classes_rtti", "CREATE INDEX idx_classes_rtti ON classes(rtti_present)"),
|
||||||
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
|
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
|
||||||
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
|
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
|
||||||
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
|
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
|
||||||
@@ -390,7 +423,7 @@ impl DbWriter {
|
|||||||
|
|
||||||
/// Back-compat wrapper for callers that want the full pre-Phase-3
|
/// Back-compat wrapper for callers that want the full pre-Phase-3
|
||||||
/// "everything in one shot" behaviour. Equivalent to
|
/// "everything in one shot" behaviour. Equivalent to
|
||||||
/// `ingest_instructions` + `write_analysis_results`.
|
/// `ingest_instructions` + `write_analysis_results` with no M3 vtables.
|
||||||
#[tracing::instrument(skip_all, name = "db.write_disasm")]
|
#[tracing::instrument(skip_all, name = "db.write_disasm")]
|
||||||
pub fn write_disasm(
|
pub fn write_disasm(
|
||||||
&mut self,
|
&mut self,
|
||||||
@@ -401,7 +434,7 @@ impl DbWriter {
|
|||||||
xrefs: &XrefMap,
|
xrefs: &XrefMap,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
self.ingest_instructions(pe, info, func_analysis, labels)?;
|
self.ingest_instructions(pe, info, func_analysis, labels)?;
|
||||||
self.write_analysis_results(pe, info, func_analysis, labels, xrefs)?;
|
self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[])?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -730,6 +763,89 @@ fn insert_functions(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn insert_vtables(
|
||||||
|
conn: &Connection,
|
||||||
|
vtables: &[crate::vtables::Vtable],
|
||||||
|
_pe: &[u8],
|
||||||
|
_image_base: u32,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
if vtables.is_empty() { return Ok(()); }
|
||||||
|
let mut stmt = conn.prepare(
|
||||||
|
"INSERT INTO vtables
|
||||||
|
(address, length, col_address, class_name, rtti_present, base_classes_json)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT DO NOTHING"
|
||||||
|
)?;
|
||||||
|
let mut count = 0u64;
|
||||||
|
for v in vtables {
|
||||||
|
stmt.execute(params![
|
||||||
|
v.address as i64,
|
||||||
|
v.length as i64,
|
||||||
|
v.col_address.map(|a| a as i64),
|
||||||
|
v.class_name.as_str(),
|
||||||
|
v.rtti_present,
|
||||||
|
v.base_classes_json.as_deref(),
|
||||||
|
])?;
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
metrics::counter!("db.rows", "table" => "vtables").increment(count);
|
||||||
|
tracing::info!(rows = count, table = "vtables", "bulk insert complete");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn insert_methods_and_classes(
|
||||||
|
conn: &Connection,
|
||||||
|
vtables: &[crate::vtables::Vtable],
|
||||||
|
labels: &HashMap<u32, String>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
if vtables.is_empty() { return Ok(()); }
|
||||||
|
|
||||||
|
// methods rows
|
||||||
|
let methods = crate::vtables::methods_table(vtables, labels);
|
||||||
|
if !methods.is_empty() {
|
||||||
|
let mut stmt = conn.prepare(
|
||||||
|
"INSERT INTO methods
|
||||||
|
(vtable_address, slot, function_address, mangled_name, demangled_name)
|
||||||
|
VALUES (?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT DO NOTHING"
|
||||||
|
)?;
|
||||||
|
for (vt_addr, slot, fn_addr, mangled, demangled) in &methods {
|
||||||
|
stmt.execute(params![
|
||||||
|
*vt_addr as i64,
|
||||||
|
*slot as i64,
|
||||||
|
*fn_addr as i64,
|
||||||
|
mangled.as_deref(),
|
||||||
|
demangled.as_deref(),
|
||||||
|
])?;
|
||||||
|
}
|
||||||
|
metrics::counter!("db.rows", "table" => "methods").increment(methods.len() as u64);
|
||||||
|
tracing::info!(rows = methods.len(), table = "methods", "bulk insert complete");
|
||||||
|
}
|
||||||
|
|
||||||
|
// classes rows (deduped by class_name, first-detected wins)
|
||||||
|
let classes = crate::vtables::classes_table(vtables);
|
||||||
|
if !classes.is_empty() {
|
||||||
|
let mut stmt = conn.prepare(
|
||||||
|
"INSERT INTO classes
|
||||||
|
(name, vtable_address, rtti_present, base_classes_json)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
ON CONFLICT DO NOTHING"
|
||||||
|
)?;
|
||||||
|
for (name, vt_addr, rtti, bases) in &classes {
|
||||||
|
stmt.execute(params![
|
||||||
|
name.as_str(),
|
||||||
|
*vt_addr as i64,
|
||||||
|
*rtti,
|
||||||
|
bases.as_deref(),
|
||||||
|
])?;
|
||||||
|
}
|
||||||
|
metrics::counter!("db.rows", "table" => "classes").increment(classes.len() as u64);
|
||||||
|
tracing::info!(rows = classes.len(), table = "classes", "bulk insert complete");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn insert_demangled_from_labels(
|
fn insert_demangled_from_labels(
|
||||||
conn: &Connection,
|
conn: &Connection,
|
||||||
labels: &HashMap<u32, String>,
|
labels: &HashMap<u32, String>,
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ pub mod formatter;
|
|||||||
pub mod sinks;
|
pub mod sinks;
|
||||||
pub mod sql_views;
|
pub mod sql_views;
|
||||||
pub mod demangle;
|
pub mod demangle;
|
||||||
|
pub mod vtables;
|
||||||
|
pub mod lookup;
|
||||||
|
|
||||||
mod ordinals;
|
mod ordinals;
|
||||||
pub use ordinals::resolve_ordinal;
|
pub use ordinals::resolve_ordinal;
|
||||||
|
|||||||
424
crates/xenia-analysis/src/vtables.rs
Normal file
424
crates/xenia-analysis/src/vtables.rs
Normal file
@@ -0,0 +1,424 @@
|
|||||||
|
//! MSVC vtable + RTTI detection.
|
||||||
|
//!
|
||||||
|
//! Heuristic two-pass scan over the binary's read-only data sections. Pass 1
|
||||||
|
//! finds candidate vtables — runs of ≥3 contiguous big-endian u32 values that
|
||||||
|
//! all land on known function entries. Pass 2 attempts the MSVC RTTI walk
|
||||||
|
//! `vtable[-1] → CompleteObjectLocator → TypeDescriptor → mangled name`. When
|
||||||
|
//! RTTI is stripped (typical for shipped game binaries), each anonymous vtable
|
||||||
|
//! gets a deterministic name `ANON_Class_<hex>` keyed by a hash of its
|
||||||
|
//! sorted method PCs (so identical vtables across multiple class instances
|
||||||
|
//! collapse to one entry).
|
||||||
|
//!
|
||||||
|
//! What this module does NOT do:
|
||||||
|
//! - Vtables in heap-allocated memory (built at runtime by ctors) are out of
|
||||||
|
//! scope — only vtables present statically in `.rdata` / `.data`.
|
||||||
|
//! - RTTI inheritance (`BaseClassDescriptor` walk) is best-effort; we record
|
||||||
|
//! the first-level base list when present and leave it NULL otherwise.
|
||||||
|
//! - Multiple-inheritance "extra" vftables (one per base subobject) are
|
||||||
|
//! detected as independent vtables; we don't link them.
|
||||||
|
//!
|
||||||
|
//! Reference: openrce.org "Reversing Microsoft Visual C++" RTTI articles
|
||||||
|
//! (CompleteObjectLocator / TypeDescriptor / BaseClassDescriptor layout).
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use xenia_xex::pe::PeSection;
|
||||||
|
|
||||||
|
use crate::demangle;
|
||||||
|
|
||||||
|
/// One detected vtable.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Vtable {
|
||||||
|
/// Absolute VA of `vtable[0]` (first method slot).
|
||||||
|
pub address: u32,
|
||||||
|
/// Number of methods in the vtable.
|
||||||
|
pub length: u32,
|
||||||
|
/// Absolute VA of the `CompleteObjectLocator` from `vtable[-1]`, if it
|
||||||
|
/// looked like a valid pointer into `.rdata`. NULL when no RTTI / stripped.
|
||||||
|
pub col_address: Option<u32>,
|
||||||
|
/// Class name. Demangled from RTTI when available, otherwise the synthetic
|
||||||
|
/// `ANON_Class_<hex>` form.
|
||||||
|
pub class_name: String,
|
||||||
|
/// True when the COL → TypeDescriptor walk succeeded.
|
||||||
|
pub rtti_present: bool,
|
||||||
|
/// First-level base class names from `RTTIClassHierarchyDescriptor`, JSON-encoded.
|
||||||
|
/// `None` when not parseable.
|
||||||
|
pub base_classes_json: Option<String>,
|
||||||
|
/// One entry per slot: function VA in `.text`.
|
||||||
|
pub methods: Vec<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run the vtable scan + RTTI walk. `function_starts` is the set of valid
|
||||||
|
/// `.text` function entry VAs from M1's corrected `functions` table.
|
||||||
|
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
|
||||||
|
pub fn analyze(
|
||||||
|
pe: &[u8],
|
||||||
|
image_base: u32,
|
||||||
|
sections: &[PeSection],
|
||||||
|
function_starts: &std::collections::BTreeSet<u32>,
|
||||||
|
) -> Vec<Vtable> {
|
||||||
|
let started = std::time::Instant::now();
|
||||||
|
// Sections we'll scan for vtable bodies.
|
||||||
|
let scan_targets: Vec<&PeSection> = sections
|
||||||
|
.iter()
|
||||||
|
.filter(|s| matches!(s.name.as_str(), ".rdata" | ".data"))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Range table for "is this VA in .rdata or .data?"
|
||||||
|
let rdata_ranges: Vec<(u32, u32)> = sections
|
||||||
|
.iter()
|
||||||
|
.filter(|s| s.name == ".rdata")
|
||||||
|
.map(|s| (image_base + s.virtual_address, image_base + s.virtual_address + s.virtual_size))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut candidates: Vec<Vtable> = Vec::new();
|
||||||
|
|
||||||
|
for section in scan_targets {
|
||||||
|
let va_start = image_base + section.virtual_address;
|
||||||
|
let va_end = va_start + section.virtual_size;
|
||||||
|
let raw_start = section.virtual_address as usize;
|
||||||
|
let raw_end = (section.virtual_address + section.virtual_size) as usize;
|
||||||
|
if raw_end > pe.len() { continue; }
|
||||||
|
let bytes = &pe[raw_start..raw_end.min(pe.len())];
|
||||||
|
|
||||||
|
let mut i = 0usize;
|
||||||
|
while i + 12 <= bytes.len() {
|
||||||
|
// Try to start a run at this 4-aligned offset.
|
||||||
|
if !i.is_multiple_of(4) { i += 1; continue; }
|
||||||
|
let mut run_len = 0usize;
|
||||||
|
let mut methods: Vec<u32> = Vec::new();
|
||||||
|
let mut j = i;
|
||||||
|
while j + 4 <= bytes.len() {
|
||||||
|
let val = u32::from_be_bytes([bytes[j], bytes[j + 1], bytes[j + 2], bytes[j + 3]]);
|
||||||
|
if function_starts.contains(&val) {
|
||||||
|
methods.push(val);
|
||||||
|
run_len += 1;
|
||||||
|
j += 4;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if run_len >= 3 {
|
||||||
|
let address = va_start + (i as u32);
|
||||||
|
candidates.push(Vtable {
|
||||||
|
address,
|
||||||
|
length: run_len as u32,
|
||||||
|
col_address: None,
|
||||||
|
class_name: synth_anon_name(&methods),
|
||||||
|
rtti_present: false,
|
||||||
|
base_classes_json: None,
|
||||||
|
methods,
|
||||||
|
});
|
||||||
|
i += run_len * 4;
|
||||||
|
} else {
|
||||||
|
i += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let _ = (va_start, va_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
// RTTI walk: for each candidate, look at vtable[-1].
|
||||||
|
let pe_image_base = image_base;
|
||||||
|
for v in &mut candidates {
|
||||||
|
if v.address < 4 { continue; }
|
||||||
|
let col_off = (v.address - pe_image_base - 4) as usize;
|
||||||
|
if col_off + 4 > pe.len() { continue; }
|
||||||
|
let col_ptr = u32::from_be_bytes([pe[col_off], pe[col_off + 1], pe[col_off + 2], pe[col_off + 3]]);
|
||||||
|
if col_ptr == 0 { continue; }
|
||||||
|
if !is_in_ranges(col_ptr, &rdata_ranges) { continue; }
|
||||||
|
|
||||||
|
// Try to extract the TypeDescriptor mangled-name string.
|
||||||
|
if let Some((td_ptr, hierarchy_ptr)) = read_col(pe, image_base, col_ptr)
|
||||||
|
&& let Some(mangled) = read_typedescriptor_name(pe, image_base, td_ptr, &rdata_ranges)
|
||||||
|
&& let Some(class) = demangle_rtti_typename(&mangled)
|
||||||
|
{
|
||||||
|
v.col_address = Some(col_ptr);
|
||||||
|
v.class_name = class;
|
||||||
|
v.rtti_present = true;
|
||||||
|
v.base_classes_json = read_class_hierarchy(pe, image_base, hierarchy_ptr, &rdata_ranges);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||||
|
let rtti_count = candidates.iter().filter(|v| v.rtti_present).count();
|
||||||
|
metrics::histogram!("analysis.phase_ms", "phase" => "vtables").record(elapsed_ms);
|
||||||
|
tracing::info!(
|
||||||
|
vtables = candidates.len(),
|
||||||
|
rtti = rtti_count,
|
||||||
|
anon = candidates.len() - rtti_count,
|
||||||
|
elapsed_ms,
|
||||||
|
"vtable scan complete"
|
||||||
|
);
|
||||||
|
candidates
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool {
|
||||||
|
ranges.iter().any(|&(s, e)| addr >= s && addr < e)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read 4 big-endian bytes at absolute VA `addr` from the PE image.
|
||||||
|
fn read_be_u32(pe: &[u8], image_base: u32, addr: u32) -> Option<u32> {
|
||||||
|
let off = addr.wrapping_sub(image_base) as usize;
|
||||||
|
if off + 4 > pe.len() { return None; }
|
||||||
|
Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a `CompleteObjectLocator` at VA `col`. Returns
|
||||||
|
/// `(type_descriptor_ptr, class_hierarchy_descriptor_ptr)` on success.
|
||||||
|
///
|
||||||
|
/// Layout (32-bit MSVC):
|
||||||
|
/// ```text
|
||||||
|
/// +0x00 signature (0 for x86 without /GR-, can be 1)
|
||||||
|
/// +0x04 offset within complete object
|
||||||
|
/// +0x08 cdOffset (this-pointer adjuster)
|
||||||
|
/// +0x0C TypeDescriptor *
|
||||||
|
/// +0x10 RTTIClassHierarchyDescriptor *
|
||||||
|
/// ```
|
||||||
|
fn read_col(pe: &[u8], image_base: u32, col: u32) -> Option<(u32, u32)> {
|
||||||
|
let td = read_be_u32(pe, image_base, col + 0x0C)?;
|
||||||
|
let chd = read_be_u32(pe, image_base, col + 0x10)?;
|
||||||
|
if td == 0 { return None; }
|
||||||
|
Some((td, chd))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read a TypeDescriptor's mangled-name string at VA `td`.
|
||||||
|
///
|
||||||
|
/// Layout: `+0x00` vftable ptr, `+0x04` "spare", `+0x08` zero-terminated
|
||||||
|
/// mangled name (e.g. `.?AVClassName@@`).
|
||||||
|
fn read_typedescriptor_name(
|
||||||
|
pe: &[u8],
|
||||||
|
image_base: u32,
|
||||||
|
td: u32,
|
||||||
|
rdata_ranges: &[(u32, u32)],
|
||||||
|
) -> Option<String> {
|
||||||
|
if !is_in_ranges(td, rdata_ranges) { return None; }
|
||||||
|
let name_va = td + 0x08;
|
||||||
|
let off = name_va.wrapping_sub(image_base) as usize;
|
||||||
|
if off + 1 > pe.len() { return None; }
|
||||||
|
// Read up to 256 bytes or until NUL.
|
||||||
|
let mut end = off;
|
||||||
|
while end < pe.len().min(off + 256) && pe[end] != 0 { end += 1; }
|
||||||
|
if end == off { return None; }
|
||||||
|
let s = std::str::from_utf8(&pe[off..end]).ok()?;
|
||||||
|
// Sanity: MSVC RTTI names always start with `.?A`.
|
||||||
|
if !s.starts_with(".?A") { return None; }
|
||||||
|
Some(s.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Demangle an RTTI type-name string of the form `.?AVClassName@ns@@`.
|
||||||
|
/// MSVC convention: leading `.` is the marker for an RTTI string; strip it
|
||||||
|
/// before passing to the demangler.
|
||||||
|
fn demangle_rtti_typename(rtti_name: &str) -> Option<String> {
|
||||||
|
let stripped = rtti_name.strip_prefix('.')?;
|
||||||
|
let raw = msvc_demangler::demangle(stripped, msvc_demangler::DemangleFlags::llvm()).ok()?;
|
||||||
|
// Output looks like `class xe::apu::AudioSystem` or `struct foo::Bar`.
|
||||||
|
let cls = raw
|
||||||
|
.strip_prefix("class ")
|
||||||
|
.or_else(|| raw.strip_prefix("struct "))
|
||||||
|
.or_else(|| raw.strip_prefix("union "))
|
||||||
|
.unwrap_or(&raw);
|
||||||
|
Some(cls.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Best-effort `RTTIClassHierarchyDescriptor` walk: read the
|
||||||
|
/// `BaseClassArray` entries and demangle each base's TypeDescriptor name.
|
||||||
|
/// Returns a JSON array string on success.
|
||||||
|
///
|
||||||
|
/// Layout:
|
||||||
|
/// ```text
|
||||||
|
/// RTTIClassHierarchyDescriptor:
|
||||||
|
/// +0x00 signature
|
||||||
|
/// +0x04 attributes
|
||||||
|
/// +0x08 numBaseClasses
|
||||||
|
/// +0x0C BaseClassArray * (-> array of BaseClassDescriptor *)
|
||||||
|
/// BaseClassDescriptor:
|
||||||
|
/// +0x00 TypeDescriptor *
|
||||||
|
/// +0x04 numContainedBases
|
||||||
|
/// ...
|
||||||
|
/// ```
|
||||||
|
fn read_class_hierarchy(
|
||||||
|
pe: &[u8],
|
||||||
|
image_base: u32,
|
||||||
|
chd: u32,
|
||||||
|
rdata_ranges: &[(u32, u32)],
|
||||||
|
) -> Option<String> {
|
||||||
|
if !is_in_ranges(chd, rdata_ranges) { return None; }
|
||||||
|
let num_bases = read_be_u32(pe, image_base, chd + 0x08)?;
|
||||||
|
if num_bases == 0 || num_bases > 256 { return None; } // sanity cap
|
||||||
|
let bca_ptr = read_be_u32(pe, image_base, chd + 0x0C)?;
|
||||||
|
if !is_in_ranges(bca_ptr, rdata_ranges) { return None; }
|
||||||
|
|
||||||
|
let mut names: Vec<String> = Vec::new();
|
||||||
|
for i in 0..num_bases {
|
||||||
|
let bcd_ptr = match read_be_u32(pe, image_base, bca_ptr + i * 4) {
|
||||||
|
Some(p) if is_in_ranges(p, rdata_ranges) => p,
|
||||||
|
_ => return None,
|
||||||
|
};
|
||||||
|
let td_ptr = match read_be_u32(pe, image_base, bcd_ptr) {
|
||||||
|
Some(p) if is_in_ranges(p, rdata_ranges) => p,
|
||||||
|
_ => return None,
|
||||||
|
};
|
||||||
|
let mangled = match read_typedescriptor_name(pe, image_base, td_ptr, rdata_ranges) {
|
||||||
|
Some(s) => s,
|
||||||
|
None => return None,
|
||||||
|
};
|
||||||
|
let cls = demangle_rtti_typename(&mangled).unwrap_or(mangled);
|
||||||
|
names.push(cls);
|
||||||
|
}
|
||||||
|
serde_json::to_string(&names).ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Synthetic name for an RTTI-stripped vtable, derived from a stable hash of
|
||||||
|
/// the sorted method-PC list. Two vtables with identical method ordering
|
||||||
|
/// collapse to the same anonymous name.
|
||||||
|
fn synth_anon_name(methods: &[u32]) -> String {
|
||||||
|
// FNV-1a 64-bit on the sorted PC list; we only use 32 bits for brevity.
|
||||||
|
let mut sorted = methods.to_vec();
|
||||||
|
sorted.sort_unstable();
|
||||||
|
let mut h: u64 = 0xcbf29ce484222325;
|
||||||
|
for pc in &sorted {
|
||||||
|
for b in pc.to_le_bytes() {
|
||||||
|
h ^= b as u64;
|
||||||
|
h = h.wrapping_mul(0x100000001b3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
format!("ANON_Class_{:08X}", (h as u32))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the per-method `(vtable_address, slot, function_address)` list for
|
||||||
|
/// DB insertion, with optional demangled-name lookup for any function that
|
||||||
|
/// has a matching `?…` label. Skips slots whose function isn't in the
|
||||||
|
/// supplied label map.
|
||||||
|
pub fn methods_table(
|
||||||
|
vtables: &[Vtable],
|
||||||
|
labels: &std::collections::HashMap<u32, String>,
|
||||||
|
) -> Vec<(u32, u32, u32, Option<String>, Option<String>)> {
|
||||||
|
let mut out = Vec::new();
|
||||||
|
for v in vtables {
|
||||||
|
for (slot, &fn_va) in v.methods.iter().enumerate() {
|
||||||
|
let label = labels.get(&fn_va).cloned();
|
||||||
|
let demangled = label.as_ref()
|
||||||
|
.and_then(|l| demangle::demangle(l).map(|d| d.raw_demangled));
|
||||||
|
out.push((v.address, slot as u32, fn_va, label, demangled));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a `class_name → Vtable` summary for the `classes` table. Multiple
|
||||||
|
/// vtables sharing the same class name (multiple instances at link time)
|
||||||
|
/// collapse via `BTreeMap` — the first detected vtable wins.
|
||||||
|
pub fn classes_table(vtables: &[Vtable]) -> Vec<(String, u32, bool, Option<String>)> {
|
||||||
|
let mut by_name: BTreeMap<String, &Vtable> = BTreeMap::new();
|
||||||
|
for v in vtables {
|
||||||
|
by_name.entry(v.class_name.clone()).or_insert(v);
|
||||||
|
}
|
||||||
|
by_name
|
||||||
|
.into_iter()
|
||||||
|
.map(|(name, v)| (name, v.address, v.rtti_present, v.base_classes_json.clone()))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn synth_anon_name_is_stable() {
|
||||||
|
let a = synth_anon_name(&[0x82001000, 0x82001100, 0x82001200]);
|
||||||
|
let b = synth_anon_name(&[0x82001200, 0x82001000, 0x82001100]);
|
||||||
|
assert_eq!(a, b, "anon name must be order-independent");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn synth_anon_name_differs_for_different_methods() {
|
||||||
|
let a = synth_anon_name(&[0x82001000, 0x82001100]);
|
||||||
|
let b = synth_anon_name(&[0x82002000, 0x82002100]);
|
||||||
|
assert_ne!(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_3_method_vtable_in_rdata() {
|
||||||
|
let image_base = 0x82000000u32;
|
||||||
|
let rdata_va = 0x1000u32;
|
||||||
|
let text_va = 0x2000u32;
|
||||||
|
let rdata_size = 16u32;
|
||||||
|
let text_size = 0x100u32;
|
||||||
|
|
||||||
|
// PE buffer big enough for both sections.
|
||||||
|
let total = (text_va + text_size) as usize;
|
||||||
|
let mut pe = vec![0u8; total];
|
||||||
|
|
||||||
|
// Vtable: 3 method PCs at .rdata start, all valid function entries.
|
||||||
|
let m: [u32; 3] = [image_base + text_va, image_base + text_va + 0x10, image_base + text_va + 0x20];
|
||||||
|
for (i, val) in m.iter().enumerate() {
|
||||||
|
pe[rdata_va as usize + i * 4..rdata_va as usize + (i + 1) * 4]
|
||||||
|
.copy_from_slice(&val.to_be_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
let sections = vec![
|
||||||
|
PeSection {
|
||||||
|
name: ".rdata".into(),
|
||||||
|
virtual_address: rdata_va,
|
||||||
|
virtual_size: rdata_size,
|
||||||
|
raw_offset: rdata_va,
|
||||||
|
raw_size: rdata_size,
|
||||||
|
flags: 0x4000_0040,
|
||||||
|
},
|
||||||
|
PeSection {
|
||||||
|
name: ".text".into(),
|
||||||
|
virtual_address: text_va,
|
||||||
|
virtual_size: text_size,
|
||||||
|
raw_offset: text_va,
|
||||||
|
raw_size: text_size,
|
||||||
|
flags: 0x6000_0020,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let mut function_starts = std::collections::BTreeSet::new();
|
||||||
|
for &pc in &m { function_starts.insert(pc); }
|
||||||
|
|
||||||
|
let vtables = analyze(&pe, image_base, §ions, &function_starts);
|
||||||
|
assert_eq!(vtables.len(), 1);
|
||||||
|
assert_eq!(vtables[0].length, 3);
|
||||||
|
assert_eq!(vtables[0].address, image_base + rdata_va);
|
||||||
|
assert!(vtables[0].class_name.starts_with("ANON_Class_"));
|
||||||
|
assert!(!vtables[0].rtti_present);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rejects_2_method_run() {
|
||||||
|
let image_base = 0x82000000u32;
|
||||||
|
let rdata_va = 0x1000u32;
|
||||||
|
let text_va = 0x2000u32;
|
||||||
|
|
||||||
|
let total = (text_va + 0x100) as usize;
|
||||||
|
let mut pe = vec![0u8; total];
|
||||||
|
let m: [u32; 2] = [image_base + text_va, image_base + text_va + 0x10];
|
||||||
|
for (i, val) in m.iter().enumerate() {
|
||||||
|
pe[rdata_va as usize + i * 4..rdata_va as usize + (i + 1) * 4]
|
||||||
|
.copy_from_slice(&val.to_be_bytes());
|
||||||
|
}
|
||||||
|
let sections = vec![
|
||||||
|
PeSection {
|
||||||
|
name: ".rdata".into(),
|
||||||
|
virtual_address: rdata_va,
|
||||||
|
virtual_size: 8,
|
||||||
|
raw_offset: rdata_va,
|
||||||
|
raw_size: 8,
|
||||||
|
flags: 0x4000_0040,
|
||||||
|
},
|
||||||
|
PeSection {
|
||||||
|
name: ".text".into(),
|
||||||
|
virtual_address: text_va,
|
||||||
|
virtual_size: 0x100,
|
||||||
|
raw_offset: text_va,
|
||||||
|
raw_size: 0x100,
|
||||||
|
flags: 0x6000_0020,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let mut function_starts = std::collections::BTreeSet::new();
|
||||||
|
for &pc in &m { function_starts.insert(pc); }
|
||||||
|
let vtables = analyze(&pe, image_base, §ions, &function_starts);
|
||||||
|
assert_eq!(vtables.len(), 0, "runs of 2 must be rejected to keep false-positive rate down");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -106,7 +106,7 @@ fn db_schema_matches_expected_columns() {
|
|||||||
w.write_base(&info).expect("write_base");
|
w.write_base(&info).expect("write_base");
|
||||||
w.ingest_instructions(&pe, &info, &func_analysis, &labels)
|
w.ingest_instructions(&pe, &info, &func_analysis, &labels)
|
||||||
.expect("ingest_instructions");
|
.expect("ingest_instructions");
|
||||||
w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs)
|
w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs, &[])
|
||||||
.expect("write_analysis_results");
|
.expect("write_analysis_results");
|
||||||
w.create_sql_views().expect("create_sql_views");
|
w.create_sql_views().expect("create_sql_views");
|
||||||
}
|
}
|
||||||
@@ -181,6 +181,27 @@ fn db_schema_matches_expected_columns() {
|
|||||||
("method_name", "VARCHAR"),
|
("method_name", "VARCHAR"),
|
||||||
("params_signature", "VARCHAR"),
|
("params_signature", "VARCHAR"),
|
||||||
]),
|
]),
|
||||||
|
("vtables", &[
|
||||||
|
("address", "BIGINT"),
|
||||||
|
("length", "BIGINT"),
|
||||||
|
("col_address", "BIGINT"),
|
||||||
|
("class_name", "VARCHAR"),
|
||||||
|
("rtti_present", "BOOLEAN"),
|
||||||
|
("base_classes_json", "VARCHAR"),
|
||||||
|
]),
|
||||||
|
("methods", &[
|
||||||
|
("vtable_address", "BIGINT"),
|
||||||
|
("slot", "BIGINT"),
|
||||||
|
("function_address", "BIGINT"),
|
||||||
|
("mangled_name", "VARCHAR"),
|
||||||
|
("demangled_name", "VARCHAR"),
|
||||||
|
]),
|
||||||
|
("classes", &[
|
||||||
|
("name", "VARCHAR"),
|
||||||
|
("vtable_address", "BIGINT"),
|
||||||
|
("rtti_present", "BOOLEAN"),
|
||||||
|
("base_classes_json", "VARCHAR"),
|
||||||
|
]),
|
||||||
("xrefs", &[
|
("xrefs", &[
|
||||||
("source", "BIGINT"),
|
("source", "BIGINT"),
|
||||||
("target", "BIGINT"),
|
("target", "BIGINT"),
|
||||||
|
|||||||
@@ -4051,6 +4051,21 @@ fn cmd_dis(
|
|||||||
"xref analysis complete"
|
"xref analysis complete"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Vtable + RTTI scan (M3). Uses M1's corrected function-start set as the
|
||||||
|
// pointer-validity oracle; runs over .rdata + .data.
|
||||||
|
let function_starts: std::collections::BTreeSet<u32> =
|
||||||
|
func_analysis.functions.keys().copied().collect();
|
||||||
|
let vtables = xenia_analysis::vtables::analyze(
|
||||||
|
&pe_image, base, §ions, &function_starts,
|
||||||
|
);
|
||||||
|
let rtti_count = vtables.iter().filter(|v| v.rtti_present).count();
|
||||||
|
info!(
|
||||||
|
vtables = vtables.len(),
|
||||||
|
rtti = rtti_count,
|
||||||
|
anon = vtables.len() - rtti_count,
|
||||||
|
"vtable scan complete",
|
||||||
|
);
|
||||||
|
|
||||||
// Build DisasmInfo
|
// Build DisasmInfo
|
||||||
let disasm_info = xenia_analysis::formatter::DisasmInfo {
|
let disasm_info = xenia_analysis::formatter::DisasmInfo {
|
||||||
image_base: base,
|
image_base: base,
|
||||||
@@ -4074,6 +4089,7 @@ fn cmd_dis(
|
|||||||
&func_analysis,
|
&func_analysis,
|
||||||
&xref_result.labels,
|
&xref_result.labels,
|
||||||
&xref_result.xrefs,
|
&xref_result.xrefs,
|
||||||
|
&vtables,
|
||||||
)?;
|
)?;
|
||||||
if matches!(analyze, AnalyzeMode::Sql | AnalyzeMode::Both) {
|
if matches!(analyze, AnalyzeMode::Sql | AnalyzeMode::Both) {
|
||||||
w.create_sql_views()?;
|
w.create_sql_views()?;
|
||||||
|
|||||||
Reference in New Issue
Block a user