Merge analysis-overhaul/m3-vtables-rtti

This commit is contained in:
MechaCat02
2026-05-08 20:17:50 +02:00
6 changed files with 620 additions and 8 deletions

View File

@@ -102,12 +102,45 @@ SELECT name FROM functions WHERE address = 2186674160; -- 0x824D29F0
- `msvc-demangler` crate (`https://docs.rs/msvc-demangler/0.11`).
- LLVM `MicrosoftDemangle.cpp` (the parser this crate ports).
## Layer M3 — Vtable + RTTI detection (planned)
## Layer M3 — Vtable + RTTI detection (landed)
Adds `vtables`, `methods`, `classes` tables. Heuristic vtable scan over
`.rdata` + `.data`, optional MSVC RTTI `CompleteObjectLocator → TypeDescriptor`
walk, anonymous-class fallback when RTTI is stripped. See
`crates/xenia-analysis/src/vtables.rs` (when landed).
### Schema additions
- `vtables(address PK, length, col_address NULL, class_name, rtti_present,
base_classes_json NULL)` — every detected static vtable.
- `methods(vtable_address, slot, function_address, mangled_name NULL,
demangled_name NULL, PRIMARY KEY (vtable_address, slot))` — one row per
method slot.
- `classes(name PK, vtable_address, rtti_present, base_classes_json NULL)` —
deduped by class name (first-detected vtable wins).
- Indices: `methods.function_address`, `classes.rtti_present`.
### What this layer does
- Walks `.rdata` and `.data` looking for runs of ≥3 consecutive 4-byte BE
values where each value is a known function start (from M1's corrected
`functions` table). Single-2-method vtables are intentionally rejected to
control false-positive rate.
- Attempts the MSVC RTTI walk `vtable[-1] → CompleteObjectLocator → TypeDescriptor`
for each candidate. When successful, the demangled `class ClassName`
string fills `class_name` and a best-effort
`RTTIClassHierarchyDescriptor` walk fills `base_classes_json` (JSON array
of base class names).
- Falls back to `ANON_Class_<8-hex>` keyed by FNV-1a hash of the sorted
method-PC tuple when RTTI is absent (typical for shipped game binaries).
Identical vtables across the binary (multiple instances) collapse to the
same anonymous name.
### What this layer does NOT do
- Vtables built at runtime in heap-allocated memory (e.g. by ctors copying
static templates) are out of scope — only static `.rdata`/`.data` content.
- Multiple-inheritance "extra" vftables (one per base subobject) are detected
as independent vtables with no link between them.
- Inheritance-tree walking beyond `RTTIClassHierarchyDescriptor`'s direct
base list is not attempted.
### Reference docs
- openrce.org "Reversing Microsoft Visual C++" — RTTI layout articles
(CompleteObjectLocator at vtable[-1]; TypeDescriptor at COL+0xC; mangled
name at TD+0x8).
## Layer M4 — Class-aware probe targeting (planned)

View File

@@ -303,6 +303,9 @@ impl DbWriter {
/// (`functions`, `labels`, `xrefs`) and their indices. Always executes
/// in `--analyze=rust` and `--analyze=both` modes; skipped only when
/// the caller deliberately chooses a Rust-free DB layout.
///
/// `vtables` is the M3 result; pass an empty slice when the caller has
/// not run the vtable scan (the tables are still created, just empty).
#[tracing::instrument(skip_all, name = "db.write_analysis_results")]
pub fn write_analysis_results(
&mut self,
@@ -311,6 +314,7 @@ impl DbWriter {
func_analysis: &FuncAnalysis,
labels: &HashMap<u32, String>,
xrefs: &XrefMap,
vtables: &[crate::vtables::Vtable],
) -> anyhow::Result<()> {
self.conn.execute_batch("
CREATE TABLE functions (
@@ -339,6 +343,31 @@ impl DbWriter {
kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other
);
CREATE TABLE vtables (
address BIGINT PRIMARY KEY, -- absolute VA of vtable[0]
length BIGINT NOT NULL, -- number of method slots
col_address BIGINT, -- VA of CompleteObjectLocator (NULL when no RTTI)
class_name VARCHAR NOT NULL, -- demangled class name OR ANON_Class_<hash> when stripped
rtti_present BOOLEAN NOT NULL, -- true when COL → TypeDescriptor walk succeeded
base_classes_json VARCHAR -- JSON array of base class names (NULL if none / parse failure)
);
CREATE TABLE methods (
vtable_address BIGINT NOT NULL, -- vtable this slot belongs to
slot BIGINT NOT NULL, -- 0-based slot index
function_address BIGINT NOT NULL, -- VA of the function this slot points at
mangled_name VARCHAR, -- raw label name when mangled (?...)
demangled_name VARCHAR, -- LLVM-style demangled output
PRIMARY KEY (vtable_address, slot)
);
CREATE TABLE classes (
name VARCHAR PRIMARY KEY, -- class name (demangled or ANON_*)
vtable_address BIGINT NOT NULL, -- representative vtable (first detected)
rtti_present BOOLEAN NOT NULL,
base_classes_json VARCHAR -- JSON of base class names (NULL when stripped)
);
CREATE TABLE demangled_names (
address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string)
mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ)
@@ -364,6 +393,8 @@ impl DbWriter {
insert_pdata_entries(&self.conn, &func_analysis.pdata_entries)?;
insert_labels(&self.conn, labels)?;
insert_demangled_from_labels(&self.conn, labels, info.import_libraries)?;
insert_vtables(&self.conn, vtables, pe, info.image_base)?;
insert_methods_and_classes(&self.conn, vtables, labels)?;
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
let indices = [
@@ -374,6 +405,8 @@ impl DbWriter {
("idx_demangled_address", "CREATE INDEX idx_demangled_address ON demangled_names(address)"),
("idx_demangled_class", "CREATE INDEX idx_demangled_class ON demangled_names(class_name)"),
("idx_demangled_method", "CREATE INDEX idx_demangled_method ON demangled_names(method_name)"),
("idx_methods_function", "CREATE INDEX idx_methods_function ON methods(function_address)"),
("idx_classes_rtti", "CREATE INDEX idx_classes_rtti ON classes(rtti_present)"),
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
@@ -390,7 +423,7 @@ impl DbWriter {
/// Back-compat wrapper for callers that want the full pre-Phase-3
/// "everything in one shot" behaviour. Equivalent to
/// `ingest_instructions` + `write_analysis_results`.
/// `ingest_instructions` + `write_analysis_results` with no M3 vtables.
#[tracing::instrument(skip_all, name = "db.write_disasm")]
pub fn write_disasm(
&mut self,
@@ -401,7 +434,7 @@ impl DbWriter {
xrefs: &XrefMap,
) -> anyhow::Result<()> {
self.ingest_instructions(pe, info, func_analysis, labels)?;
self.write_analysis_results(pe, info, func_analysis, labels, xrefs)?;
self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[])?;
Ok(())
}
@@ -730,6 +763,89 @@ fn insert_functions(
Ok(())
}
fn insert_vtables(
conn: &Connection,
vtables: &[crate::vtables::Vtable],
_pe: &[u8],
_image_base: u32,
) -> anyhow::Result<()> {
if vtables.is_empty() { return Ok(()); }
let mut stmt = conn.prepare(
"INSERT INTO vtables
(address, length, col_address, class_name, rtti_present, base_classes_json)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT DO NOTHING"
)?;
let mut count = 0u64;
for v in vtables {
stmt.execute(params![
v.address as i64,
v.length as i64,
v.col_address.map(|a| a as i64),
v.class_name.as_str(),
v.rtti_present,
v.base_classes_json.as_deref(),
])?;
count += 1;
}
metrics::counter!("db.rows", "table" => "vtables").increment(count);
tracing::info!(rows = count, table = "vtables", "bulk insert complete");
Ok(())
}
fn insert_methods_and_classes(
conn: &Connection,
vtables: &[crate::vtables::Vtable],
labels: &HashMap<u32, String>,
) -> anyhow::Result<()> {
if vtables.is_empty() { return Ok(()); }
// methods rows
let methods = crate::vtables::methods_table(vtables, labels);
if !methods.is_empty() {
let mut stmt = conn.prepare(
"INSERT INTO methods
(vtable_address, slot, function_address, mangled_name, demangled_name)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT DO NOTHING"
)?;
for (vt_addr, slot, fn_addr, mangled, demangled) in &methods {
stmt.execute(params![
*vt_addr as i64,
*slot as i64,
*fn_addr as i64,
mangled.as_deref(),
demangled.as_deref(),
])?;
}
metrics::counter!("db.rows", "table" => "methods").increment(methods.len() as u64);
tracing::info!(rows = methods.len(), table = "methods", "bulk insert complete");
}
// classes rows (deduped by class_name, first-detected wins)
let classes = crate::vtables::classes_table(vtables);
if !classes.is_empty() {
let mut stmt = conn.prepare(
"INSERT INTO classes
(name, vtable_address, rtti_present, base_classes_json)
VALUES (?, ?, ?, ?)
ON CONFLICT DO NOTHING"
)?;
for (name, vt_addr, rtti, bases) in &classes {
stmt.execute(params![
name.as_str(),
*vt_addr as i64,
*rtti,
bases.as_deref(),
])?;
}
metrics::counter!("db.rows", "table" => "classes").increment(classes.len() as u64);
tracing::info!(rows = classes.len(), table = "classes", "bulk insert complete");
}
Ok(())
}
fn insert_demangled_from_labels(
conn: &Connection,
labels: &HashMap<u32, String>,

View File

@@ -7,6 +7,8 @@ pub mod formatter;
pub mod sinks;
pub mod sql_views;
pub mod demangle;
pub mod vtables;
pub mod lookup;
mod ordinals;
pub use ordinals::resolve_ordinal;

View File

@@ -0,0 +1,424 @@
//! MSVC vtable + RTTI detection.
//!
//! Heuristic two-pass scan over the binary's read-only data sections. Pass 1
//! finds candidate vtables — runs of ≥3 contiguous big-endian u32 values that
//! all land on known function entries. Pass 2 attempts the MSVC RTTI walk
//! `vtable[-1] → CompleteObjectLocator → TypeDescriptor → mangled name`. When
//! RTTI is stripped (typical for shipped game binaries), each anonymous vtable
//! gets a deterministic name `ANON_Class_<hex>` keyed by a hash of its
//! sorted method PCs (so identical vtables across multiple class instances
//! collapse to one entry).
//!
//! What this module does NOT do:
//! - Vtables in heap-allocated memory (built at runtime by ctors) are out of
//! scope — only vtables present statically in `.rdata` / `.data`.
//! - RTTI inheritance (`BaseClassDescriptor` walk) is best-effort; we record
//! the first-level base list when present and leave it NULL otherwise.
//! - Multiple-inheritance "extra" vftables (one per base subobject) are
//! detected as independent vtables; we don't link them.
//!
//! Reference: openrce.org "Reversing Microsoft Visual C++" RTTI articles
//! (CompleteObjectLocator / TypeDescriptor / BaseClassDescriptor layout).
use std::collections::BTreeMap;
use xenia_xex::pe::PeSection;
use crate::demangle;
/// One detected vtable.
#[derive(Debug, Clone)]
pub struct Vtable {
/// Absolute VA of `vtable[0]` (first method slot).
pub address: u32,
/// Number of methods in the vtable.
pub length: u32,
/// Absolute VA of the `CompleteObjectLocator` from `vtable[-1]`, if it
/// looked like a valid pointer into `.rdata`. NULL when no RTTI / stripped.
pub col_address: Option<u32>,
/// Class name. Demangled from RTTI when available, otherwise the synthetic
/// `ANON_Class_<hex>` form.
pub class_name: String,
/// True when the COL → TypeDescriptor walk succeeded.
pub rtti_present: bool,
/// First-level base class names from `RTTIClassHierarchyDescriptor`, JSON-encoded.
/// `None` when not parseable.
pub base_classes_json: Option<String>,
/// One entry per slot: function VA in `.text`.
pub methods: Vec<u32>,
}
/// Run the vtable scan + RTTI walk. `function_starts` is the set of valid
/// `.text` function entry VAs from M1's corrected `functions` table.
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
pub fn analyze(
pe: &[u8],
image_base: u32,
sections: &[PeSection],
function_starts: &std::collections::BTreeSet<u32>,
) -> Vec<Vtable> {
let started = std::time::Instant::now();
// Sections we'll scan for vtable bodies.
let scan_targets: Vec<&PeSection> = sections
.iter()
.filter(|s| matches!(s.name.as_str(), ".rdata" | ".data"))
.collect();
// Range table for "is this VA in .rdata or .data?"
let rdata_ranges: Vec<(u32, u32)> = sections
.iter()
.filter(|s| s.name == ".rdata")
.map(|s| (image_base + s.virtual_address, image_base + s.virtual_address + s.virtual_size))
.collect();
let mut candidates: Vec<Vtable> = Vec::new();
for section in scan_targets {
let va_start = image_base + section.virtual_address;
let va_end = va_start + section.virtual_size;
let raw_start = section.virtual_address as usize;
let raw_end = (section.virtual_address + section.virtual_size) as usize;
if raw_end > pe.len() { continue; }
let bytes = &pe[raw_start..raw_end.min(pe.len())];
let mut i = 0usize;
while i + 12 <= bytes.len() {
// Try to start a run at this 4-aligned offset.
if !i.is_multiple_of(4) { i += 1; continue; }
let mut run_len = 0usize;
let mut methods: Vec<u32> = Vec::new();
let mut j = i;
while j + 4 <= bytes.len() {
let val = u32::from_be_bytes([bytes[j], bytes[j + 1], bytes[j + 2], bytes[j + 3]]);
if function_starts.contains(&val) {
methods.push(val);
run_len += 1;
j += 4;
} else {
break;
}
}
if run_len >= 3 {
let address = va_start + (i as u32);
candidates.push(Vtable {
address,
length: run_len as u32,
col_address: None,
class_name: synth_anon_name(&methods),
rtti_present: false,
base_classes_json: None,
methods,
});
i += run_len * 4;
} else {
i += 4;
}
}
let _ = (va_start, va_end);
}
// RTTI walk: for each candidate, look at vtable[-1].
let pe_image_base = image_base;
for v in &mut candidates {
if v.address < 4 { continue; }
let col_off = (v.address - pe_image_base - 4) as usize;
if col_off + 4 > pe.len() { continue; }
let col_ptr = u32::from_be_bytes([pe[col_off], pe[col_off + 1], pe[col_off + 2], pe[col_off + 3]]);
if col_ptr == 0 { continue; }
if !is_in_ranges(col_ptr, &rdata_ranges) { continue; }
// Try to extract the TypeDescriptor mangled-name string.
if let Some((td_ptr, hierarchy_ptr)) = read_col(pe, image_base, col_ptr)
&& let Some(mangled) = read_typedescriptor_name(pe, image_base, td_ptr, &rdata_ranges)
&& let Some(class) = demangle_rtti_typename(&mangled)
{
v.col_address = Some(col_ptr);
v.class_name = class;
v.rtti_present = true;
v.base_classes_json = read_class_hierarchy(pe, image_base, hierarchy_ptr, &rdata_ranges);
}
}
let elapsed_ms = started.elapsed().as_millis() as f64;
let rtti_count = candidates.iter().filter(|v| v.rtti_present).count();
metrics::histogram!("analysis.phase_ms", "phase" => "vtables").record(elapsed_ms);
tracing::info!(
vtables = candidates.len(),
rtti = rtti_count,
anon = candidates.len() - rtti_count,
elapsed_ms,
"vtable scan complete"
);
candidates
}
fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool {
ranges.iter().any(|&(s, e)| addr >= s && addr < e)
}
/// Read 4 big-endian bytes at absolute VA `addr` from the PE image.
fn read_be_u32(pe: &[u8], image_base: u32, addr: u32) -> Option<u32> {
let off = addr.wrapping_sub(image_base) as usize;
if off + 4 > pe.len() { return None; }
Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]))
}
/// Parse a `CompleteObjectLocator` at VA `col`. Returns
/// `(type_descriptor_ptr, class_hierarchy_descriptor_ptr)` on success.
///
/// Layout (32-bit MSVC):
/// ```text
/// +0x00 signature (0 for x86 without /GR-, can be 1)
/// +0x04 offset within complete object
/// +0x08 cdOffset (this-pointer adjuster)
/// +0x0C TypeDescriptor *
/// +0x10 RTTIClassHierarchyDescriptor *
/// ```
fn read_col(pe: &[u8], image_base: u32, col: u32) -> Option<(u32, u32)> {
let td = read_be_u32(pe, image_base, col + 0x0C)?;
let chd = read_be_u32(pe, image_base, col + 0x10)?;
if td == 0 { return None; }
Some((td, chd))
}
/// Read a TypeDescriptor's mangled-name string at VA `td`.
///
/// Layout: `+0x00` vftable ptr, `+0x04` "spare", `+0x08` zero-terminated
/// mangled name (e.g. `.?AVClassName@@`).
fn read_typedescriptor_name(
pe: &[u8],
image_base: u32,
td: u32,
rdata_ranges: &[(u32, u32)],
) -> Option<String> {
if !is_in_ranges(td, rdata_ranges) { return None; }
let name_va = td + 0x08;
let off = name_va.wrapping_sub(image_base) as usize;
if off + 1 > pe.len() { return None; }
// Read up to 256 bytes or until NUL.
let mut end = off;
while end < pe.len().min(off + 256) && pe[end] != 0 { end += 1; }
if end == off { return None; }
let s = std::str::from_utf8(&pe[off..end]).ok()?;
// Sanity: MSVC RTTI names always start with `.?A`.
if !s.starts_with(".?A") { return None; }
Some(s.to_string())
}
/// Demangle an RTTI type-name string of the form `.?AVClassName@ns@@`.
/// MSVC convention: leading `.` is the marker for an RTTI string; strip it
/// before passing to the demangler.
fn demangle_rtti_typename(rtti_name: &str) -> Option<String> {
let stripped = rtti_name.strip_prefix('.')?;
let raw = msvc_demangler::demangle(stripped, msvc_demangler::DemangleFlags::llvm()).ok()?;
// Output looks like `class xe::apu::AudioSystem` or `struct foo::Bar`.
let cls = raw
.strip_prefix("class ")
.or_else(|| raw.strip_prefix("struct "))
.or_else(|| raw.strip_prefix("union "))
.unwrap_or(&raw);
Some(cls.to_string())
}
/// Best-effort `RTTIClassHierarchyDescriptor` walk: read the
/// `BaseClassArray` entries and demangle each base's TypeDescriptor name.
/// Returns a JSON array string on success.
///
/// Layout:
/// ```text
/// RTTIClassHierarchyDescriptor:
/// +0x00 signature
/// +0x04 attributes
/// +0x08 numBaseClasses
/// +0x0C BaseClassArray * (-> array of BaseClassDescriptor *)
/// BaseClassDescriptor:
/// +0x00 TypeDescriptor *
/// +0x04 numContainedBases
/// ...
/// ```
fn read_class_hierarchy(
pe: &[u8],
image_base: u32,
chd: u32,
rdata_ranges: &[(u32, u32)],
) -> Option<String> {
if !is_in_ranges(chd, rdata_ranges) { return None; }
let num_bases = read_be_u32(pe, image_base, chd + 0x08)?;
if num_bases == 0 || num_bases > 256 { return None; } // sanity cap
let bca_ptr = read_be_u32(pe, image_base, chd + 0x0C)?;
if !is_in_ranges(bca_ptr, rdata_ranges) { return None; }
let mut names: Vec<String> = Vec::new();
for i in 0..num_bases {
let bcd_ptr = match read_be_u32(pe, image_base, bca_ptr + i * 4) {
Some(p) if is_in_ranges(p, rdata_ranges) => p,
_ => return None,
};
let td_ptr = match read_be_u32(pe, image_base, bcd_ptr) {
Some(p) if is_in_ranges(p, rdata_ranges) => p,
_ => return None,
};
let mangled = match read_typedescriptor_name(pe, image_base, td_ptr, rdata_ranges) {
Some(s) => s,
None => return None,
};
let cls = demangle_rtti_typename(&mangled).unwrap_or(mangled);
names.push(cls);
}
serde_json::to_string(&names).ok()
}
/// Synthetic name for an RTTI-stripped vtable, derived from a stable hash of
/// the sorted method-PC list. Two vtables with identical method ordering
/// collapse to the same anonymous name.
fn synth_anon_name(methods: &[u32]) -> String {
// FNV-1a 64-bit on the sorted PC list; we only use 32 bits for brevity.
let mut sorted = methods.to_vec();
sorted.sort_unstable();
let mut h: u64 = 0xcbf29ce484222325;
for pc in &sorted {
for b in pc.to_le_bytes() {
h ^= b as u64;
h = h.wrapping_mul(0x100000001b3);
}
}
format!("ANON_Class_{:08X}", (h as u32))
}
/// Build the per-method `(vtable_address, slot, function_address)` list for
/// DB insertion, with optional demangled-name lookup for any function that
/// has a matching `?…` label. Skips slots whose function isn't in the
/// supplied label map.
pub fn methods_table(
vtables: &[Vtable],
labels: &std::collections::HashMap<u32, String>,
) -> Vec<(u32, u32, u32, Option<String>, Option<String>)> {
let mut out = Vec::new();
for v in vtables {
for (slot, &fn_va) in v.methods.iter().enumerate() {
let label = labels.get(&fn_va).cloned();
let demangled = label.as_ref()
.and_then(|l| demangle::demangle(l).map(|d| d.raw_demangled));
out.push((v.address, slot as u32, fn_va, label, demangled));
}
}
out
}
/// Build a `class_name → Vtable` summary for the `classes` table. Multiple
/// vtables sharing the same class name (multiple instances at link time)
/// collapse via `BTreeMap` — the first detected vtable wins.
pub fn classes_table(vtables: &[Vtable]) -> Vec<(String, u32, bool, Option<String>)> {
let mut by_name: BTreeMap<String, &Vtable> = BTreeMap::new();
for v in vtables {
by_name.entry(v.class_name.clone()).or_insert(v);
}
by_name
.into_iter()
.map(|(name, v)| (name, v.address, v.rtti_present, v.base_classes_json.clone()))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn synth_anon_name_is_stable() {
let a = synth_anon_name(&[0x82001000, 0x82001100, 0x82001200]);
let b = synth_anon_name(&[0x82001200, 0x82001000, 0x82001100]);
assert_eq!(a, b, "anon name must be order-independent");
}
#[test]
fn synth_anon_name_differs_for_different_methods() {
let a = synth_anon_name(&[0x82001000, 0x82001100]);
let b = synth_anon_name(&[0x82002000, 0x82002100]);
assert_ne!(a, b);
}
#[test]
fn detects_3_method_vtable_in_rdata() {
let image_base = 0x82000000u32;
let rdata_va = 0x1000u32;
let text_va = 0x2000u32;
let rdata_size = 16u32;
let text_size = 0x100u32;
// PE buffer big enough for both sections.
let total = (text_va + text_size) as usize;
let mut pe = vec![0u8; total];
// Vtable: 3 method PCs at .rdata start, all valid function entries.
let m: [u32; 3] = [image_base + text_va, image_base + text_va + 0x10, image_base + text_va + 0x20];
for (i, val) in m.iter().enumerate() {
pe[rdata_va as usize + i * 4..rdata_va as usize + (i + 1) * 4]
.copy_from_slice(&val.to_be_bytes());
}
let sections = vec![
PeSection {
name: ".rdata".into(),
virtual_address: rdata_va,
virtual_size: rdata_size,
raw_offset: rdata_va,
raw_size: rdata_size,
flags: 0x4000_0040,
},
PeSection {
name: ".text".into(),
virtual_address: text_va,
virtual_size: text_size,
raw_offset: text_va,
raw_size: text_size,
flags: 0x6000_0020,
},
];
let mut function_starts = std::collections::BTreeSet::new();
for &pc in &m { function_starts.insert(pc); }
let vtables = analyze(&pe, image_base, &sections, &function_starts);
assert_eq!(vtables.len(), 1);
assert_eq!(vtables[0].length, 3);
assert_eq!(vtables[0].address, image_base + rdata_va);
assert!(vtables[0].class_name.starts_with("ANON_Class_"));
assert!(!vtables[0].rtti_present);
}
#[test]
fn rejects_2_method_run() {
let image_base = 0x82000000u32;
let rdata_va = 0x1000u32;
let text_va = 0x2000u32;
let total = (text_va + 0x100) as usize;
let mut pe = vec![0u8; total];
let m: [u32; 2] = [image_base + text_va, image_base + text_va + 0x10];
for (i, val) in m.iter().enumerate() {
pe[rdata_va as usize + i * 4..rdata_va as usize + (i + 1) * 4]
.copy_from_slice(&val.to_be_bytes());
}
let sections = vec![
PeSection {
name: ".rdata".into(),
virtual_address: rdata_va,
virtual_size: 8,
raw_offset: rdata_va,
raw_size: 8,
flags: 0x4000_0040,
},
PeSection {
name: ".text".into(),
virtual_address: text_va,
virtual_size: 0x100,
raw_offset: text_va,
raw_size: 0x100,
flags: 0x6000_0020,
},
];
let mut function_starts = std::collections::BTreeSet::new();
for &pc in &m { function_starts.insert(pc); }
let vtables = analyze(&pe, image_base, &sections, &function_starts);
assert_eq!(vtables.len(), 0, "runs of 2 must be rejected to keep false-positive rate down");
}
}

View File

@@ -106,7 +106,7 @@ fn db_schema_matches_expected_columns() {
w.write_base(&info).expect("write_base");
w.ingest_instructions(&pe, &info, &func_analysis, &labels)
.expect("ingest_instructions");
w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs)
w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs, &[])
.expect("write_analysis_results");
w.create_sql_views().expect("create_sql_views");
}
@@ -181,6 +181,27 @@ fn db_schema_matches_expected_columns() {
("method_name", "VARCHAR"),
("params_signature", "VARCHAR"),
]),
("vtables", &[
("address", "BIGINT"),
("length", "BIGINT"),
("col_address", "BIGINT"),
("class_name", "VARCHAR"),
("rtti_present", "BOOLEAN"),
("base_classes_json", "VARCHAR"),
]),
("methods", &[
("vtable_address", "BIGINT"),
("slot", "BIGINT"),
("function_address", "BIGINT"),
("mangled_name", "VARCHAR"),
("demangled_name", "VARCHAR"),
]),
("classes", &[
("name", "VARCHAR"),
("vtable_address", "BIGINT"),
("rtti_present", "BOOLEAN"),
("base_classes_json", "VARCHAR"),
]),
("xrefs", &[
("source", "BIGINT"),
("target", "BIGINT"),

View File

@@ -4051,6 +4051,21 @@ fn cmd_dis(
"xref analysis complete"
);
// Vtable + RTTI scan (M3). Uses M1's corrected function-start set as the
// pointer-validity oracle; runs over .rdata + .data.
let function_starts: std::collections::BTreeSet<u32> =
func_analysis.functions.keys().copied().collect();
let vtables = xenia_analysis::vtables::analyze(
&pe_image, base, &sections, &function_starts,
);
let rtti_count = vtables.iter().filter(|v| v.rtti_present).count();
info!(
vtables = vtables.len(),
rtti = rtti_count,
anon = vtables.len() - rtti_count,
"vtable scan complete",
);
// Build DisasmInfo
let disasm_info = xenia_analysis::formatter::DisasmInfo {
image_base: base,
@@ -4074,6 +4089,7 @@ fn cmd_dis(
&func_analysis,
&xref_result.labels,
&xref_result.xrefs,
&vtables,
)?;
if matches!(analyze, AnalyzeMode::Sql | AnalyzeMode::Both) {
w.create_sql_views()?;