diff --git a/crates/xenia-analysis/SCHEMA.md b/crates/xenia-analysis/SCHEMA.md index 4e40166..9423f69 100644 --- a/crates/xenia-analysis/SCHEMA.md +++ b/crates/xenia-analysis/SCHEMA.md @@ -102,12 +102,45 @@ SELECT name FROM functions WHERE address = 2186674160; -- 0x824D29F0 - `msvc-demangler` crate (`https://docs.rs/msvc-demangler/0.11`). - LLVM `MicrosoftDemangle.cpp` (the parser this crate ports). -## Layer M3 — Vtable + RTTI detection (planned) +## Layer M3 — Vtable + RTTI detection (landed) -Adds `vtables`, `methods`, `classes` tables. Heuristic vtable scan over -`.rdata` + `.data`, optional MSVC RTTI `CompleteObjectLocator → TypeDescriptor` -walk, anonymous-class fallback when RTTI is stripped. See -`crates/xenia-analysis/src/vtables.rs` (when landed). +### Schema additions +- `vtables(address PK, length, col_address NULL, class_name, rtti_present, + base_classes_json NULL)` — every detected static vtable. +- `methods(vtable_address, slot, function_address, mangled_name NULL, + demangled_name NULL, PRIMARY KEY (vtable_address, slot))` — one row per + method slot. +- `classes(name PK, vtable_address, rtti_present, base_classes_json NULL)` — + deduped by class name (first-detected vtable wins). +- Indices: `methods.function_address`, `classes.rtti_present`. + +### What this layer does +- Walks `.rdata` and `.data` looking for runs of ≥3 consecutive 4-byte BE + values where each value is a known function start (from M1's corrected + `functions` table). Single-2-method vtables are intentionally rejected to + control false-positive rate. +- Attempts the MSVC RTTI walk `vtable[-1] → CompleteObjectLocator → TypeDescriptor` + for each candidate. When successful, the demangled `class ClassName` + string fills `class_name` and a best-effort + `RTTIClassHierarchyDescriptor` walk fills `base_classes_json` (JSON array + of base class names). +- Falls back to `ANON_Class_<8-hex>` keyed by FNV-1a hash of the sorted + method-PC tuple when RTTI is absent (typical for shipped game binaries). + Identical vtables across the binary (multiple instances) collapse to the + same anonymous name. + +### What this layer does NOT do +- Vtables built at runtime in heap-allocated memory (e.g. by ctors copying + static templates) are out of scope — only static `.rdata`/`.data` content. +- Multiple-inheritance "extra" vftables (one per base subobject) are detected + as independent vtables with no link between them. +- Inheritance-tree walking beyond `RTTIClassHierarchyDescriptor`'s direct + base list is not attempted. + +### Reference docs +- openrce.org "Reversing Microsoft Visual C++" — RTTI layout articles + (CompleteObjectLocator at vtable[-1]; TypeDescriptor at COL+0xC; mangled + name at TD+0x8). ## Layer M4 — Class-aware probe targeting (planned) diff --git a/crates/xenia-analysis/src/db.rs b/crates/xenia-analysis/src/db.rs index dbdf4b9..08f0628 100644 --- a/crates/xenia-analysis/src/db.rs +++ b/crates/xenia-analysis/src/db.rs @@ -303,6 +303,9 @@ impl DbWriter { /// (`functions`, `labels`, `xrefs`) and their indices. Always executes /// in `--analyze=rust` and `--analyze=both` modes; skipped only when /// the caller deliberately chooses a Rust-free DB layout. + /// + /// `vtables` is the M3 result; pass an empty slice when the caller has + /// not run the vtable scan (the tables are still created, just empty). #[tracing::instrument(skip_all, name = "db.write_analysis_results")] pub fn write_analysis_results( &mut self, @@ -311,6 +314,7 @@ impl DbWriter { func_analysis: &FuncAnalysis, labels: &HashMap, xrefs: &XrefMap, + vtables: &[crate::vtables::Vtable], ) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE functions ( @@ -339,6 +343,31 @@ impl DbWriter { kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other ); + CREATE TABLE vtables ( + address BIGINT PRIMARY KEY, -- absolute VA of vtable[0] + length BIGINT NOT NULL, -- number of method slots + col_address BIGINT, -- VA of CompleteObjectLocator (NULL when no RTTI) + class_name VARCHAR NOT NULL, -- demangled class name OR ANON_Class_ when stripped + rtti_present BOOLEAN NOT NULL, -- true when COL → TypeDescriptor walk succeeded + base_classes_json VARCHAR -- JSON array of base class names (NULL if none / parse failure) + ); + + CREATE TABLE methods ( + vtable_address BIGINT NOT NULL, -- vtable this slot belongs to + slot BIGINT NOT NULL, -- 0-based slot index + function_address BIGINT NOT NULL, -- VA of the function this slot points at + mangled_name VARCHAR, -- raw label name when mangled (?...) + demangled_name VARCHAR, -- LLVM-style demangled output + PRIMARY KEY (vtable_address, slot) + ); + + CREATE TABLE classes ( + name VARCHAR PRIMARY KEY, -- class name (demangled or ANON_*) + vtable_address BIGINT NOT NULL, -- representative vtable (first detected) + rtti_present BOOLEAN NOT NULL, + base_classes_json VARCHAR -- JSON of base class names (NULL when stripped) + ); + CREATE TABLE demangled_names ( address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string) mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ) @@ -364,6 +393,8 @@ impl DbWriter { insert_pdata_entries(&self.conn, &func_analysis.pdata_entries)?; insert_labels(&self.conn, labels)?; insert_demangled_from_labels(&self.conn, labels, info.import_libraries)?; + insert_vtables(&self.conn, vtables, pe, info.image_base)?; + insert_methods_and_classes(&self.conn, vtables, labels)?; insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?; let indices = [ @@ -374,6 +405,8 @@ impl DbWriter { ("idx_demangled_address", "CREATE INDEX idx_demangled_address ON demangled_names(address)"), ("idx_demangled_class", "CREATE INDEX idx_demangled_class ON demangled_names(class_name)"), ("idx_demangled_method", "CREATE INDEX idx_demangled_method ON demangled_names(method_name)"), + ("idx_methods_function", "CREATE INDEX idx_methods_function ON methods(function_address)"), + ("idx_classes_rtti", "CREATE INDEX idx_classes_rtti ON classes(rtti_present)"), ("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"), ("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"), ("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"), @@ -390,7 +423,7 @@ impl DbWriter { /// Back-compat wrapper for callers that want the full pre-Phase-3 /// "everything in one shot" behaviour. Equivalent to - /// `ingest_instructions` + `write_analysis_results`. + /// `ingest_instructions` + `write_analysis_results` with no M3 vtables. #[tracing::instrument(skip_all, name = "db.write_disasm")] pub fn write_disasm( &mut self, @@ -401,7 +434,7 @@ impl DbWriter { xrefs: &XrefMap, ) -> anyhow::Result<()> { self.ingest_instructions(pe, info, func_analysis, labels)?; - self.write_analysis_results(pe, info, func_analysis, labels, xrefs)?; + self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[])?; Ok(()) } @@ -730,6 +763,89 @@ fn insert_functions( Ok(()) } +fn insert_vtables( + conn: &Connection, + vtables: &[crate::vtables::Vtable], + _pe: &[u8], + _image_base: u32, +) -> anyhow::Result<()> { + if vtables.is_empty() { return Ok(()); } + let mut stmt = conn.prepare( + "INSERT INTO vtables + (address, length, col_address, class_name, rtti_present, base_classes_json) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT DO NOTHING" + )?; + let mut count = 0u64; + for v in vtables { + stmt.execute(params![ + v.address as i64, + v.length as i64, + v.col_address.map(|a| a as i64), + v.class_name.as_str(), + v.rtti_present, + v.base_classes_json.as_deref(), + ])?; + count += 1; + } + metrics::counter!("db.rows", "table" => "vtables").increment(count); + tracing::info!(rows = count, table = "vtables", "bulk insert complete"); + Ok(()) +} + +fn insert_methods_and_classes( + conn: &Connection, + vtables: &[crate::vtables::Vtable], + labels: &HashMap, +) -> anyhow::Result<()> { + if vtables.is_empty() { return Ok(()); } + + // methods rows + let methods = crate::vtables::methods_table(vtables, labels); + if !methods.is_empty() { + let mut stmt = conn.prepare( + "INSERT INTO methods + (vtable_address, slot, function_address, mangled_name, demangled_name) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT DO NOTHING" + )?; + for (vt_addr, slot, fn_addr, mangled, demangled) in &methods { + stmt.execute(params![ + *vt_addr as i64, + *slot as i64, + *fn_addr as i64, + mangled.as_deref(), + demangled.as_deref(), + ])?; + } + metrics::counter!("db.rows", "table" => "methods").increment(methods.len() as u64); + tracing::info!(rows = methods.len(), table = "methods", "bulk insert complete"); + } + + // classes rows (deduped by class_name, first-detected wins) + let classes = crate::vtables::classes_table(vtables); + if !classes.is_empty() { + let mut stmt = conn.prepare( + "INSERT INTO classes + (name, vtable_address, rtti_present, base_classes_json) + VALUES (?, ?, ?, ?) + ON CONFLICT DO NOTHING" + )?; + for (name, vt_addr, rtti, bases) in &classes { + stmt.execute(params![ + name.as_str(), + *vt_addr as i64, + *rtti, + bases.as_deref(), + ])?; + } + metrics::counter!("db.rows", "table" => "classes").increment(classes.len() as u64); + tracing::info!(rows = classes.len(), table = "classes", "bulk insert complete"); + } + + Ok(()) +} + fn insert_demangled_from_labels( conn: &Connection, labels: &HashMap, diff --git a/crates/xenia-analysis/src/lib.rs b/crates/xenia-analysis/src/lib.rs index ef52c20..96d0388 100644 --- a/crates/xenia-analysis/src/lib.rs +++ b/crates/xenia-analysis/src/lib.rs @@ -7,6 +7,8 @@ pub mod formatter; pub mod sinks; pub mod sql_views; pub mod demangle; +pub mod vtables; +pub mod lookup; mod ordinals; pub use ordinals::resolve_ordinal; diff --git a/crates/xenia-analysis/src/vtables.rs b/crates/xenia-analysis/src/vtables.rs new file mode 100644 index 0000000..06e4219 --- /dev/null +++ b/crates/xenia-analysis/src/vtables.rs @@ -0,0 +1,424 @@ +//! MSVC vtable + RTTI detection. +//! +//! Heuristic two-pass scan over the binary's read-only data sections. Pass 1 +//! finds candidate vtables — runs of ≥3 contiguous big-endian u32 values that +//! all land on known function entries. Pass 2 attempts the MSVC RTTI walk +//! `vtable[-1] → CompleteObjectLocator → TypeDescriptor → mangled name`. When +//! RTTI is stripped (typical for shipped game binaries), each anonymous vtable +//! gets a deterministic name `ANON_Class_` keyed by a hash of its +//! sorted method PCs (so identical vtables across multiple class instances +//! collapse to one entry). +//! +//! What this module does NOT do: +//! - Vtables in heap-allocated memory (built at runtime by ctors) are out of +//! scope — only vtables present statically in `.rdata` / `.data`. +//! - RTTI inheritance (`BaseClassDescriptor` walk) is best-effort; we record +//! the first-level base list when present and leave it NULL otherwise. +//! - Multiple-inheritance "extra" vftables (one per base subobject) are +//! detected as independent vtables; we don't link them. +//! +//! Reference: openrce.org "Reversing Microsoft Visual C++" RTTI articles +//! (CompleteObjectLocator / TypeDescriptor / BaseClassDescriptor layout). + +use std::collections::BTreeMap; + +use xenia_xex::pe::PeSection; + +use crate::demangle; + +/// One detected vtable. +#[derive(Debug, Clone)] +pub struct Vtable { + /// Absolute VA of `vtable[0]` (first method slot). + pub address: u32, + /// Number of methods in the vtable. + pub length: u32, + /// Absolute VA of the `CompleteObjectLocator` from `vtable[-1]`, if it + /// looked like a valid pointer into `.rdata`. NULL when no RTTI / stripped. + pub col_address: Option, + /// Class name. Demangled from RTTI when available, otherwise the synthetic + /// `ANON_Class_` form. + pub class_name: String, + /// True when the COL → TypeDescriptor walk succeeded. + pub rtti_present: bool, + /// First-level base class names from `RTTIClassHierarchyDescriptor`, JSON-encoded. + /// `None` when not parseable. + pub base_classes_json: Option, + /// One entry per slot: function VA in `.text`. + pub methods: Vec, +} + +/// Run the vtable scan + RTTI walk. `function_starts` is the set of valid +/// `.text` function entry VAs from M1's corrected `functions` table. +#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))] +pub fn analyze( + pe: &[u8], + image_base: u32, + sections: &[PeSection], + function_starts: &std::collections::BTreeSet, +) -> Vec { + let started = std::time::Instant::now(); + // Sections we'll scan for vtable bodies. + let scan_targets: Vec<&PeSection> = sections + .iter() + .filter(|s| matches!(s.name.as_str(), ".rdata" | ".data")) + .collect(); + + // Range table for "is this VA in .rdata or .data?" + let rdata_ranges: Vec<(u32, u32)> = sections + .iter() + .filter(|s| s.name == ".rdata") + .map(|s| (image_base + s.virtual_address, image_base + s.virtual_address + s.virtual_size)) + .collect(); + + let mut candidates: Vec = Vec::new(); + + for section in scan_targets { + let va_start = image_base + section.virtual_address; + let va_end = va_start + section.virtual_size; + let raw_start = section.virtual_address as usize; + let raw_end = (section.virtual_address + section.virtual_size) as usize; + if raw_end > pe.len() { continue; } + let bytes = &pe[raw_start..raw_end.min(pe.len())]; + + let mut i = 0usize; + while i + 12 <= bytes.len() { + // Try to start a run at this 4-aligned offset. + if !i.is_multiple_of(4) { i += 1; continue; } + let mut run_len = 0usize; + let mut methods: Vec = Vec::new(); + let mut j = i; + while j + 4 <= bytes.len() { + let val = u32::from_be_bytes([bytes[j], bytes[j + 1], bytes[j + 2], bytes[j + 3]]); + if function_starts.contains(&val) { + methods.push(val); + run_len += 1; + j += 4; + } else { + break; + } + } + if run_len >= 3 { + let address = va_start + (i as u32); + candidates.push(Vtable { + address, + length: run_len as u32, + col_address: None, + class_name: synth_anon_name(&methods), + rtti_present: false, + base_classes_json: None, + methods, + }); + i += run_len * 4; + } else { + i += 4; + } + } + let _ = (va_start, va_end); + } + + // RTTI walk: for each candidate, look at vtable[-1]. + let pe_image_base = image_base; + for v in &mut candidates { + if v.address < 4 { continue; } + let col_off = (v.address - pe_image_base - 4) as usize; + if col_off + 4 > pe.len() { continue; } + let col_ptr = u32::from_be_bytes([pe[col_off], pe[col_off + 1], pe[col_off + 2], pe[col_off + 3]]); + if col_ptr == 0 { continue; } + if !is_in_ranges(col_ptr, &rdata_ranges) { continue; } + + // Try to extract the TypeDescriptor mangled-name string. + if let Some((td_ptr, hierarchy_ptr)) = read_col(pe, image_base, col_ptr) + && let Some(mangled) = read_typedescriptor_name(pe, image_base, td_ptr, &rdata_ranges) + && let Some(class) = demangle_rtti_typename(&mangled) + { + v.col_address = Some(col_ptr); + v.class_name = class; + v.rtti_present = true; + v.base_classes_json = read_class_hierarchy(pe, image_base, hierarchy_ptr, &rdata_ranges); + } + } + + let elapsed_ms = started.elapsed().as_millis() as f64; + let rtti_count = candidates.iter().filter(|v| v.rtti_present).count(); + metrics::histogram!("analysis.phase_ms", "phase" => "vtables").record(elapsed_ms); + tracing::info!( + vtables = candidates.len(), + rtti = rtti_count, + anon = candidates.len() - rtti_count, + elapsed_ms, + "vtable scan complete" + ); + candidates +} + +fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool { + ranges.iter().any(|&(s, e)| addr >= s && addr < e) +} + +/// Read 4 big-endian bytes at absolute VA `addr` from the PE image. +fn read_be_u32(pe: &[u8], image_base: u32, addr: u32) -> Option { + let off = addr.wrapping_sub(image_base) as usize; + if off + 4 > pe.len() { return None; } + Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]])) +} + +/// Parse a `CompleteObjectLocator` at VA `col`. Returns +/// `(type_descriptor_ptr, class_hierarchy_descriptor_ptr)` on success. +/// +/// Layout (32-bit MSVC): +/// ```text +/// +0x00 signature (0 for x86 without /GR-, can be 1) +/// +0x04 offset within complete object +/// +0x08 cdOffset (this-pointer adjuster) +/// +0x0C TypeDescriptor * +/// +0x10 RTTIClassHierarchyDescriptor * +/// ``` +fn read_col(pe: &[u8], image_base: u32, col: u32) -> Option<(u32, u32)> { + let td = read_be_u32(pe, image_base, col + 0x0C)?; + let chd = read_be_u32(pe, image_base, col + 0x10)?; + if td == 0 { return None; } + Some((td, chd)) +} + +/// Read a TypeDescriptor's mangled-name string at VA `td`. +/// +/// Layout: `+0x00` vftable ptr, `+0x04` "spare", `+0x08` zero-terminated +/// mangled name (e.g. `.?AVClassName@@`). +fn read_typedescriptor_name( + pe: &[u8], + image_base: u32, + td: u32, + rdata_ranges: &[(u32, u32)], +) -> Option { + if !is_in_ranges(td, rdata_ranges) { return None; } + let name_va = td + 0x08; + let off = name_va.wrapping_sub(image_base) as usize; + if off + 1 > pe.len() { return None; } + // Read up to 256 bytes or until NUL. + let mut end = off; + while end < pe.len().min(off + 256) && pe[end] != 0 { end += 1; } + if end == off { return None; } + let s = std::str::from_utf8(&pe[off..end]).ok()?; + // Sanity: MSVC RTTI names always start with `.?A`. + if !s.starts_with(".?A") { return None; } + Some(s.to_string()) +} + +/// Demangle an RTTI type-name string of the form `.?AVClassName@ns@@`. +/// MSVC convention: leading `.` is the marker for an RTTI string; strip it +/// before passing to the demangler. +fn demangle_rtti_typename(rtti_name: &str) -> Option { + let stripped = rtti_name.strip_prefix('.')?; + let raw = msvc_demangler::demangle(stripped, msvc_demangler::DemangleFlags::llvm()).ok()?; + // Output looks like `class xe::apu::AudioSystem` or `struct foo::Bar`. + let cls = raw + .strip_prefix("class ") + .or_else(|| raw.strip_prefix("struct ")) + .or_else(|| raw.strip_prefix("union ")) + .unwrap_or(&raw); + Some(cls.to_string()) +} + +/// Best-effort `RTTIClassHierarchyDescriptor` walk: read the +/// `BaseClassArray` entries and demangle each base's TypeDescriptor name. +/// Returns a JSON array string on success. +/// +/// Layout: +/// ```text +/// RTTIClassHierarchyDescriptor: +/// +0x00 signature +/// +0x04 attributes +/// +0x08 numBaseClasses +/// +0x0C BaseClassArray * (-> array of BaseClassDescriptor *) +/// BaseClassDescriptor: +/// +0x00 TypeDescriptor * +/// +0x04 numContainedBases +/// ... +/// ``` +fn read_class_hierarchy( + pe: &[u8], + image_base: u32, + chd: u32, + rdata_ranges: &[(u32, u32)], +) -> Option { + if !is_in_ranges(chd, rdata_ranges) { return None; } + let num_bases = read_be_u32(pe, image_base, chd + 0x08)?; + if num_bases == 0 || num_bases > 256 { return None; } // sanity cap + let bca_ptr = read_be_u32(pe, image_base, chd + 0x0C)?; + if !is_in_ranges(bca_ptr, rdata_ranges) { return None; } + + let mut names: Vec = Vec::new(); + for i in 0..num_bases { + let bcd_ptr = match read_be_u32(pe, image_base, bca_ptr + i * 4) { + Some(p) if is_in_ranges(p, rdata_ranges) => p, + _ => return None, + }; + let td_ptr = match read_be_u32(pe, image_base, bcd_ptr) { + Some(p) if is_in_ranges(p, rdata_ranges) => p, + _ => return None, + }; + let mangled = match read_typedescriptor_name(pe, image_base, td_ptr, rdata_ranges) { + Some(s) => s, + None => return None, + }; + let cls = demangle_rtti_typename(&mangled).unwrap_or(mangled); + names.push(cls); + } + serde_json::to_string(&names).ok() +} + +/// Synthetic name for an RTTI-stripped vtable, derived from a stable hash of +/// the sorted method-PC list. Two vtables with identical method ordering +/// collapse to the same anonymous name. +fn synth_anon_name(methods: &[u32]) -> String { + // FNV-1a 64-bit on the sorted PC list; we only use 32 bits for brevity. + let mut sorted = methods.to_vec(); + sorted.sort_unstable(); + let mut h: u64 = 0xcbf29ce484222325; + for pc in &sorted { + for b in pc.to_le_bytes() { + h ^= b as u64; + h = h.wrapping_mul(0x100000001b3); + } + } + format!("ANON_Class_{:08X}", (h as u32)) +} + +/// Build the per-method `(vtable_address, slot, function_address)` list for +/// DB insertion, with optional demangled-name lookup for any function that +/// has a matching `?…` label. Skips slots whose function isn't in the +/// supplied label map. +pub fn methods_table( + vtables: &[Vtable], + labels: &std::collections::HashMap, +) -> Vec<(u32, u32, u32, Option, Option)> { + let mut out = Vec::new(); + for v in vtables { + for (slot, &fn_va) in v.methods.iter().enumerate() { + let label = labels.get(&fn_va).cloned(); + let demangled = label.as_ref() + .and_then(|l| demangle::demangle(l).map(|d| d.raw_demangled)); + out.push((v.address, slot as u32, fn_va, label, demangled)); + } + } + out +} + +/// Build a `class_name → Vtable` summary for the `classes` table. Multiple +/// vtables sharing the same class name (multiple instances at link time) +/// collapse via `BTreeMap` — the first detected vtable wins. +pub fn classes_table(vtables: &[Vtable]) -> Vec<(String, u32, bool, Option)> { + let mut by_name: BTreeMap = BTreeMap::new(); + for v in vtables { + by_name.entry(v.class_name.clone()).or_insert(v); + } + by_name + .into_iter() + .map(|(name, v)| (name, v.address, v.rtti_present, v.base_classes_json.clone())) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn synth_anon_name_is_stable() { + let a = synth_anon_name(&[0x82001000, 0x82001100, 0x82001200]); + let b = synth_anon_name(&[0x82001200, 0x82001000, 0x82001100]); + assert_eq!(a, b, "anon name must be order-independent"); + } + + #[test] + fn synth_anon_name_differs_for_different_methods() { + let a = synth_anon_name(&[0x82001000, 0x82001100]); + let b = synth_anon_name(&[0x82002000, 0x82002100]); + assert_ne!(a, b); + } + + #[test] + fn detects_3_method_vtable_in_rdata() { + let image_base = 0x82000000u32; + let rdata_va = 0x1000u32; + let text_va = 0x2000u32; + let rdata_size = 16u32; + let text_size = 0x100u32; + + // PE buffer big enough for both sections. + let total = (text_va + text_size) as usize; + let mut pe = vec![0u8; total]; + + // Vtable: 3 method PCs at .rdata start, all valid function entries. + let m: [u32; 3] = [image_base + text_va, image_base + text_va + 0x10, image_base + text_va + 0x20]; + for (i, val) in m.iter().enumerate() { + pe[rdata_va as usize + i * 4..rdata_va as usize + (i + 1) * 4] + .copy_from_slice(&val.to_be_bytes()); + } + + let sections = vec![ + PeSection { + name: ".rdata".into(), + virtual_address: rdata_va, + virtual_size: rdata_size, + raw_offset: rdata_va, + raw_size: rdata_size, + flags: 0x4000_0040, + }, + PeSection { + name: ".text".into(), + virtual_address: text_va, + virtual_size: text_size, + raw_offset: text_va, + raw_size: text_size, + flags: 0x6000_0020, + }, + ]; + let mut function_starts = std::collections::BTreeSet::new(); + for &pc in &m { function_starts.insert(pc); } + + let vtables = analyze(&pe, image_base, §ions, &function_starts); + assert_eq!(vtables.len(), 1); + assert_eq!(vtables[0].length, 3); + assert_eq!(vtables[0].address, image_base + rdata_va); + assert!(vtables[0].class_name.starts_with("ANON_Class_")); + assert!(!vtables[0].rtti_present); + } + + #[test] + fn rejects_2_method_run() { + let image_base = 0x82000000u32; + let rdata_va = 0x1000u32; + let text_va = 0x2000u32; + + let total = (text_va + 0x100) as usize; + let mut pe = vec![0u8; total]; + let m: [u32; 2] = [image_base + text_va, image_base + text_va + 0x10]; + for (i, val) in m.iter().enumerate() { + pe[rdata_va as usize + i * 4..rdata_va as usize + (i + 1) * 4] + .copy_from_slice(&val.to_be_bytes()); + } + let sections = vec![ + PeSection { + name: ".rdata".into(), + virtual_address: rdata_va, + virtual_size: 8, + raw_offset: rdata_va, + raw_size: 8, + flags: 0x4000_0040, + }, + PeSection { + name: ".text".into(), + virtual_address: text_va, + virtual_size: 0x100, + raw_offset: text_va, + raw_size: 0x100, + flags: 0x6000_0020, + }, + ]; + let mut function_starts = std::collections::BTreeSet::new(); + for &pc in &m { function_starts.insert(pc); } + let vtables = analyze(&pe, image_base, §ions, &function_starts); + assert_eq!(vtables.len(), 0, "runs of 2 must be rejected to keep false-positive rate down"); + } +} diff --git a/crates/xenia-analysis/tests/db_schema_golden.rs b/crates/xenia-analysis/tests/db_schema_golden.rs index dc69b62..018bafc 100644 --- a/crates/xenia-analysis/tests/db_schema_golden.rs +++ b/crates/xenia-analysis/tests/db_schema_golden.rs @@ -106,7 +106,7 @@ fn db_schema_matches_expected_columns() { w.write_base(&info).expect("write_base"); w.ingest_instructions(&pe, &info, &func_analysis, &labels) .expect("ingest_instructions"); - w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs) + w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs, &[]) .expect("write_analysis_results"); w.create_sql_views().expect("create_sql_views"); } @@ -181,6 +181,27 @@ fn db_schema_matches_expected_columns() { ("method_name", "VARCHAR"), ("params_signature", "VARCHAR"), ]), + ("vtables", &[ + ("address", "BIGINT"), + ("length", "BIGINT"), + ("col_address", "BIGINT"), + ("class_name", "VARCHAR"), + ("rtti_present", "BOOLEAN"), + ("base_classes_json", "VARCHAR"), + ]), + ("methods", &[ + ("vtable_address", "BIGINT"), + ("slot", "BIGINT"), + ("function_address", "BIGINT"), + ("mangled_name", "VARCHAR"), + ("demangled_name", "VARCHAR"), + ]), + ("classes", &[ + ("name", "VARCHAR"), + ("vtable_address", "BIGINT"), + ("rtti_present", "BOOLEAN"), + ("base_classes_json", "VARCHAR"), + ]), ("xrefs", &[ ("source", "BIGINT"), ("target", "BIGINT"), diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 8e6ef0e..ea60689 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -4051,6 +4051,21 @@ fn cmd_dis( "xref analysis complete" ); + // Vtable + RTTI scan (M3). Uses M1's corrected function-start set as the + // pointer-validity oracle; runs over .rdata + .data. + let function_starts: std::collections::BTreeSet = + func_analysis.functions.keys().copied().collect(); + let vtables = xenia_analysis::vtables::analyze( + &pe_image, base, §ions, &function_starts, + ); + let rtti_count = vtables.iter().filter(|v| v.rtti_present).count(); + info!( + vtables = vtables.len(), + rtti = rtti_count, + anon = vtables.len() - rtti_count, + "vtable scan complete", + ); + // Build DisasmInfo let disasm_info = xenia_analysis::formatter::DisasmInfo { image_base: base, @@ -4074,6 +4089,7 @@ fn cmd_dis( &func_analysis, &xref_result.labels, &xref_result.xrefs, + &vtables, )?; if matches!(analyze, AnalyzeMode::Sql | AnalyzeMode::Both) { w.create_sql_views()?;