diff --git a/crates/xenia-analysis/SCHEMA.md b/crates/xenia-analysis/SCHEMA.md index 1ed2a38..862de23 100644 --- a/crates/xenia-analysis/SCHEMA.md +++ b/crates/xenia-analysis/SCHEMA.md @@ -394,11 +394,71 @@ byte-identical digests (`instructions=2000005`). --- +## Layer M5.5 — `this`-flow indirect-dispatch resolution (landed) + +### Schema additions +- New table `vptr_writes(writer_pc, vtable_address, vptr_offset, writer_function)` — + every detected `stw rVtable, vptr_off(rThis)` site. +- New table `indirect_dispatch_sites(dispatch_pc PK, vptr_offset, slot, candidate_count)` — + one row per resolved dispatch. +- New table `indirect_dispatch_candidates(dispatch_pc, vtable_address, method_address)` — + one row per (dispatch × candidate vtable). Joined to existing + `xrefs.kind='ind_call'` edges (one ind_call row per candidate). +- New indices on `vptr_writes.vtable_address`, `vptr_writes.vptr_offset`, + `indirect_dispatch_candidates.method_address`, + `indirect_dispatch_candidates.vtable_address`, + `indirect_dispatch_sites.(vptr_offset, slot)`. + +### What this layer does (class-membership inference) +1. **Phase 1 — vptr-write scan**: walk every function with the lis+addi + tracker; whenever `stw rA, off(rB)` writes a known M3 vtable address, + record `(vtable_addr, vptr_offset, writer_pc)`. +2. **Phase 2 — invert**: build `vtables_by_offset[vptr_off] = {V}` for the + set of vtables ever written at that offset. +3. **Phase 3 — dispatch detection**: walk back ≤16 instructions from each + `bcctrl`/`bctr LK=1`, find the canonical + `lwz vt, off(this); lwz fn, slot*4(vt); mtctr fn` chain. Extract + `(vptr_off, slot)`. Bail on register clobber, branch, or label + boundary. +4. **Phase 4 — emit**: for each `(dispatch_pc, vptr_off, slot)`, emit one + `xrefs.kind='ind_call'` row per candidate vtable that has a + matching slot. Multi-candidate rows are an over-approximation. + +### What this layer does NOT do +- No alias resolution at multi-candidate sites — emits one edge per + matching vtable. Downstream queries should filter + `indirect_dispatch_sites WHERE candidate_count=1` for high-confidence + edges. +- No flow-sensitive analysis: register state is killed at every label + (basic-block boundary) and at `bl`/`bcl` calls (volatile r0..r12 + + ctr). We do NOT propagate values across calls in the chain-walker. +- No tracking of vptr writes via X-form indexed (`stwx`), VMX, or + multiword stores. Only D-form `stw rA, off(rB)`. +- Does not synthesise vptr writes for inlined / elided constructors. + If a class never has a writer at offset `vptr_off`, dispatches + through that offset find no candidates. + +### Sylpheed yield +- 567 vptr writes covering 214 distinct vtables (~30% of M3's 722). +- 29 distinct vptr offsets used; offset 0 dominates (501/567 = 88%, + single-inheritance). +- **6,842 dispatch sites resolved**: 97 single-candidate + (high-confidence) + 6,745 multi-candidate (over-approximation). +- 687,963 `ind_call` xref rows total. +- **2,746 newly-reachable functions** via the M5 BFS view + (`v_indirect_reachability_from_entry`) compared to call/j/br alone. +- Audit-009 cluster (renderer plateau): functions newly visible + include `0x823BC9E0`, `0x823BC290`, `0x823BC5A0`, `0x823BB158`, + `0x823BB1E0`, `0x823BCAF0`, `0x823BC4C8` — actionable starting + points for the cluster's reachability hunt. + +### Reference docs +- IBM PowerPC ABI (volatile/non-volatile register partition). +- Itanium C++ ABI on vtable layout (offset-from-`this` model adapted + by MSVC for Win32 PPC). + ## Forward work (not yet landed) -- **M5.5** — `this`-flow extension to M5. Resolve vtable dispatches via - `lwz vt, off(this)` patterns by tracing constructor-side vptr writes. - Highest-value future work for the audit-009 cluster renderer hunt. - **M9.5** — full `__CxxFrameHandler` scope-table parsing (try/catch range names, per-state cleanup actions). - **M11.5** — walk the static-initialiser driver call chain from the diff --git a/crates/xenia-analysis/src/db.rs b/crates/xenia-analysis/src/db.rs index 4551c6d..5724e22 100644 --- a/crates/xenia-analysis/src/db.rs +++ b/crates/xenia-analysis/src/db.rs @@ -307,7 +307,7 @@ impl DbWriter { /// `vtables` is the M3 result; pass an empty slice when the caller has /// not run the vtable scan (the tables are still created, just empty). /// `strings` is the M7 result; same convention. `funcptr_arrays` is the - /// M8/M11 result. + /// M8/M11 result. `typed_ind` is the M5.5 result. #[tracing::instrument(skip_all, name = "db.write_analysis_results")] pub fn write_analysis_results( &mut self, @@ -319,6 +319,7 @@ impl DbWriter { vtables: &[crate::vtables::Vtable], strings: &[crate::strings::DetectedString], funcptr_arrays: &[crate::funcptr_arrays::FuncPtrArray], + typed_ind: Option<&crate::ind_dispatch_typed::TypedIndirectResult>, ) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE functions ( @@ -407,6 +408,39 @@ impl DbWriter { PRIMARY KEY (array_address, slot) ); + -- M5.5 — typed indirect-dispatch resolutions. Each row is one + -- bcctrl site that matched the canonical lwz vt, off(this); + -- lwz fn, slot(vt); mtctr; bcctrl pattern. candidate_count > 1 + -- means the analysis could not pick a single class; downstream + -- queries should treat such rows as reachability-only. + CREATE TABLE indirect_dispatch_sites ( + dispatch_pc BIGINT PRIMARY KEY, + vptr_offset BIGINT NOT NULL, + slot BIGINT NOT NULL, + candidate_count BIGINT NOT NULL + ); + + -- M5.5 — one row per (dispatch site × candidate vtable). The + -- ind_call xref edges in the `xrefs` table are derived from + -- this; this view lets you join back to vtable / method info. + CREATE TABLE indirect_dispatch_candidates ( + dispatch_pc BIGINT NOT NULL, + vtable_address BIGINT NOT NULL, + method_address BIGINT NOT NULL, + PRIMARY KEY (dispatch_pc, vtable_address) + ); + + -- M5.5 — every detected `stw rVtable, vptr_off(rThis)` writer + -- found in any function. Useful for diagnosing why a class + -- has (or does not have) coverage in the dispatch resolver. + CREATE TABLE vptr_writes ( + writer_pc BIGINT NOT NULL, + vtable_address BIGINT NOT NULL, + vptr_offset BIGINT NOT NULL, + writer_function BIGINT NOT NULL, + PRIMARY KEY (writer_pc, vtable_address, vptr_offset) + ); + CREATE TABLE demangled_names ( address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string) mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ) @@ -437,6 +471,9 @@ impl DbWriter { insert_methods_and_classes(&self.conn, vtables, labels)?; insert_strings(&self.conn, strings)?; insert_funcptr_arrays(&self.conn, funcptr_arrays)?; + if let Some(t) = typed_ind { + insert_typed_ind_dispatch(&self.conn, t)?; + } insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?; let indices = [ @@ -454,6 +491,11 @@ impl DbWriter { ("idx_xrefs_addr_mode", "CREATE INDEX idx_xrefs_addr_mode ON xrefs(addr_mode)"), ("idx_fparrays_kind", "CREATE INDEX idx_fparrays_kind ON function_pointer_arrays(kind)"), ("idx_fpentries_function", "CREATE INDEX idx_fpentries_function ON function_pointer_array_entries(function_address)"), + ("idx_indcand_method", "CREATE INDEX idx_indcand_method ON indirect_dispatch_candidates(method_address)"), + ("idx_indcand_vtable", "CREATE INDEX idx_indcand_vtable ON indirect_dispatch_candidates(vtable_address)"), + ("idx_indsites_offset_slot", "CREATE INDEX idx_indsites_offset_slot ON indirect_dispatch_sites(vptr_offset, slot)"), + ("idx_vptrw_vtable", "CREATE INDEX idx_vptrw_vtable ON vptr_writes(vtable_address)"), + ("idx_vptrw_offset", "CREATE INDEX idx_vptrw_offset ON vptr_writes(vptr_offset)"), ("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"), ("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"), ("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"), @@ -482,7 +524,7 @@ impl DbWriter { xrefs: &XrefMap, ) -> anyhow::Result<()> { self.ingest_instructions(pe, info, func_analysis, labels)?; - self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[], &[], &[])?; + self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[], &[], &[], None)?; Ok(()) } @@ -951,6 +993,64 @@ fn insert_strings( Ok(()) } +fn insert_typed_ind_dispatch( + conn: &Connection, + t: &crate::ind_dispatch_typed::TypedIndirectResult, +) -> anyhow::Result<()> { + if !t.dispatches.is_empty() { + let mut stmt_site = conn.prepare( + "INSERT INTO indirect_dispatch_sites + (dispatch_pc, vptr_offset, slot, candidate_count) + VALUES (?, ?, ?, ?) ON CONFLICT DO NOTHING" + )?; + let mut stmt_cand = conn.prepare( + "INSERT INTO indirect_dispatch_candidates + (dispatch_pc, vtable_address, method_address) + VALUES (?, ?, ?) ON CONFLICT DO NOTHING" + )?; + let mut n_sites = 0u64; + let mut n_cand = 0u64; + for d in &t.dispatches { + stmt_site.execute(params![ + d.dispatch_pc as i64, + d.vptr_offset as i64, + d.slot as i64, + d.candidate_vtables.len() as i64, + ])?; + n_sites += 1; + for (vt, m) in d.candidate_vtables.iter().zip(d.method_pcs.iter()) { + stmt_cand.execute(params![ + d.dispatch_pc as i64, *vt as i64, *m as i64, + ])?; + n_cand += 1; + } + } + metrics::counter!("db.rows", "table" => "indirect_dispatch_sites").increment(n_sites); + metrics::counter!("db.rows", "table" => "indirect_dispatch_candidates").increment(n_cand); + tracing::info!(sites = n_sites, candidates = n_cand, "typed indirect-dispatch insert complete"); + } + if !t.vptr_writes.is_empty() { + let mut stmt = conn.prepare( + "INSERT INTO vptr_writes + (writer_pc, vtable_address, vptr_offset, writer_function) + VALUES (?, ?, ?, ?) ON CONFLICT DO NOTHING" + )?; + let mut n = 0u64; + for w in &t.vptr_writes { + stmt.execute(params![ + w.writer_pc as i64, + w.vtable_addr as i64, + w.vptr_offset as i64, + w.writer_function as i64, + ])?; + n += 1; + } + metrics::counter!("db.rows", "table" => "vptr_writes").increment(n); + tracing::info!(rows = n, "vptr_writes insert complete"); + } + Ok(()) +} + fn insert_funcptr_arrays( conn: &Connection, arrays: &[crate::funcptr_arrays::FuncPtrArray], diff --git a/crates/xenia-analysis/src/ind_dispatch_typed.rs b/crates/xenia-analysis/src/ind_dispatch_typed.rs new file mode 100644 index 0000000..3b3d82e --- /dev/null +++ b/crates/xenia-analysis/src/ind_dispatch_typed.rs @@ -0,0 +1,636 @@ +//! M5.5 — `this`-flow indirect-dispatch resolution. +//! +//! M5 only resolved the canonical `lis+addi → lwz off(vt) → mtctr → bcctrl` +//! pattern (vtable address materialised statically; rare in real C++). +//! This layer closes the dominant case, where the dispatch reads through +//! the object's `vptr` field: +//! +//! ```text +//! lwz rVt, vptr_off(this) ; rVt = this->vptr +//! ... ; (rVt not clobbered) +//! lwz rFn, slot*4(rVt) ; rFn = vtable[slot] +//! ... ; (rFn / ctr not clobbered) +//! mtctr rFn +//! ... +//! bcctrl +//! ``` +//! +//! Resolution strategy (class-membership inference): +//! +//! 1. **Phase 1 — vptr-write scan.** Walk every function with a tiny +//! register tracker (mirrors the lis+addi propagation in +//! `xenia_analysis::xref`). Whenever a `stw rA, off(rB)` writes a +//! known M3 vtable address into `off(rB)`, record +//! `(vtable_addr, vptr_offset, writer_pc)`. These are constructor- +//! side vptr stores. +//! +//! 2. **Phase 2 — invert by offset.** Build +//! `vtables_by_offset[vptr_off] = set of vtables ever written at +//! that offset`. Most classes use offset 0 (single inheritance); +//! multiple-inheritance secondary vptrs land at non-zero offsets. +//! +//! 3. **Phase 3 — dispatch-site scan.** For each `bcctrl`, walk back +//! up to 16 instructions looking for the canonical sequence, +//! extracting `(vptr_off, slot)`. Bail on any clobber of the +//! tracked register, on any branch instruction, or on a label +//! boundary. +//! +//! 4. **Phase 4 — emit edges.** For each detected +//! `(dispatch_pc, vptr_off, slot)`: +//! - Look up all candidate vtables `V` where: +//! - `vtables_by_offset[vptr_off]` contains `V`, AND +//! - `V.length > slot` +//! - Emit one `ind_call` edge from `dispatch_pc` to +//! `V.methods[slot]` per candidate. +//! +//! Multi-candidate sites are an over-approximation: the analysis can't +//! distinguish without alias info which of the matching classes the +//! `this` register actually holds. Downstream queries can filter by +//! the exposed `candidate_count` column — single-candidate edges are +//! high-confidence, multi-candidate edges are reachability-only. +//! +//! ### What this layer does NOT do +//! +//! - No flow-sensitive analysis: register state is killed at every +//! label (basic-block boundary), and we do not propagate values +//! across calls (since the ABI's volatile/non-volatile partition is +//! unreliable for `this`-pointer chains). +//! - No alias resolution: a multi-candidate site emits one edge per +//! matching vtable, not the exact one used at runtime. +//! - Does not handle vptr writes via X-form indexed stores (`stwx`) +//! or VMX/VMX128 stores — only D-form `stw rA, off(rB)`. The MSVC +//! compiler uses D-form for all canonical vptr writes we've seen. +//! - Does not synthesise vptr writes for inlined / elided constructors. +//! If a class never has a writer at offset `vptr_off`, dispatches +//! through that offset will not find candidates. +//! +//! Reference: IBM PowerPC ABI, Itanium C++ ABI on vtable layout (the +//! same offset-from-`this` model applies on Win32 PPC). + +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; + +use crate::func::FuncAnalysis; +use crate::vtables::Vtable; + +/// One detected dispatch site after typed resolution. +#[derive(Debug, Clone)] +pub struct TypedDispatch { + pub dispatch_pc: u32, + pub vptr_offset: u32, + pub slot: u32, + /// Set of candidate vtable addresses whose `(vptr_offset, slot)` matched. + pub candidate_vtables: Vec, + /// Set of resolved method PCs (one per candidate vtable). + pub method_pcs: Vec, +} + +/// Result of the M5.5 pass. +#[derive(Debug, Default)] +pub struct TypedIndirectResult { + pub dispatches: Vec, + /// Phase-1 raw output, exposed for diagnostics. + pub vptr_writes: Vec, +} + +/// One detected constructor-side vptr write. +#[derive(Debug, Clone, Copy)] +pub struct VptrWrite { + pub vtable_addr: u32, + pub vptr_offset: u32, + pub writer_pc: u32, + pub writer_function: u32, +} + +const OP_ADDI: u32 = 14; +const OP_ADDIS: u32 = 15; +const OP_BCCTR: u32 = 19; +const OP_LWZ: u32 = 32; +const OP_ORI: u32 = 24; +const OP_STW: u32 = 36; +const OP_X_FORM: u32 = 31; + +/// Run the full M5.5 analysis. +#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))] +pub fn analyze( + pe: &[u8], + image_base: u32, + func_analysis: &FuncAnalysis, + vtables: &[Vtable], + labels: &HashMap, +) -> TypedIndirectResult { + let started = std::time::Instant::now(); + + let vtable_addrs: BTreeSet = vtables.iter().map(|v| v.address).collect(); + let vtable_by_addr: BTreeMap = + vtables.iter().map(|v| (v.address, v)).collect(); + + let block_boundaries: HashSet = labels.keys().copied().collect(); + + // Phase 1: scan for vptr writes. + let vptr_writes = scan_vptr_writes( + pe, image_base, func_analysis, &vtable_addrs, &block_boundaries, + ); + + // Phase 2: invert by offset. + let mut vtables_by_offset: HashMap> = HashMap::new(); + for w in &vptr_writes { + vtables_by_offset.entry(w.vptr_offset).or_default().insert(w.vtable_addr); + } + + // Phase 3 + 4: scan dispatches and emit edges. + let dispatches = scan_dispatches_and_resolve( + pe, image_base, func_analysis, &block_boundaries, + &vtables_by_offset, &vtable_by_addr, + ); + + let elapsed_ms = started.elapsed().as_millis() as f64; + let single_candidate = dispatches.iter().filter(|d| d.candidate_vtables.len() == 1).count(); + let multi_candidate = dispatches.len() - single_candidate; + let total_edges: usize = dispatches.iter().map(|d| d.method_pcs.len()).sum(); + metrics::histogram!("analysis.phase_ms", "phase" => "ind_dispatch_typed").record(elapsed_ms); + tracing::info!( + vptr_writes = vptr_writes.len(), + offsets = vtables_by_offset.len(), + dispatches = dispatches.len(), + single = single_candidate, + multi = multi_candidate, + edges = total_edges, + elapsed_ms, + "M5.5 typed indirect-dispatch scan complete", + ); + + TypedIndirectResult { dispatches, vptr_writes } +} + +fn read_instr(pe: &[u8], image_base: u32, addr: u32) -> Option { + let off = addr.wrapping_sub(image_base) as usize; + if off + 4 > pe.len() { return None; } + Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]])) +} + +/// Phase 1 — find every `stw rA, off(rB)` where the lis+addi-tracked +/// value of `rA` equals a known vtable address. +fn scan_vptr_writes( + pe: &[u8], + image_base: u32, + func_analysis: &FuncAnalysis, + vtable_addrs: &BTreeSet, + block_boundaries: &HashSet, +) -> Vec { + let mut writes: Vec = Vec::new(); + for (&fn_start, fi) in &func_analysis.functions { + if fi.is_saverestore { continue; } + let mut reg: [Option; 32] = [None; 32]; + let mut pc = fn_start; + while pc < fi.end { + if pc != fn_start && block_boundaries.contains(&pc) { + reg = [None; 32]; + } + let Some(instr) = read_instr(pe, image_base, pc) else { break }; + let op = instr >> 26; + let rd = ((instr >> 21) & 0x1F) as usize; + let ra = ((instr >> 16) & 0x1F) as usize; + let simm = ((instr & 0xFFFF) as i16) as i32; + let uimm = instr & 0xFFFF; + match op { + OP_ADDIS if ra == 0 => reg[rd] = Some(uimm << 16), + OP_ADDIS => { + reg[rd] = reg[ra].map(|b| b.wrapping_add(uimm << 16)); + } + OP_ADDI if ra != 0 => { + reg[rd] = reg[ra].map(|b| b.wrapping_add(simm as u32)); + } + OP_ADDI => reg[rd] = Some(simm as u32), + OP_ORI => { + let rs = rd; + reg[ra] = reg[rs].map(|b| b | uimm); + } + OP_STW => { + // `stw rS, off(rA)` — rS in bits 21..25, rA in 16..20. + if ra != 0 + && let Some(vtable_addr) = reg[rd] + && vtable_addrs.contains(&vtable_addr) + { + // The vptr offset is the displacement; rB's value + // is irrelevant for class-membership inference. + writes.push(VptrWrite { + vtable_addr, + vptr_offset: simm as u32, + writer_pc: pc, + writer_function: fn_start, + }); + } + // stw doesn't write to rD. + } + OP_LWZ => reg[rd] = None, + 32..=35 | 40..=43 | 48..=51 => reg[rd] = None, + OP_X_FORM => { + let xo = (instr >> 1) & 0x3FF; + if xo != 444 && xo != 467 { reg[rd] = None; } + } + 18 => { + // `bl` (LK=1) clobbers volatile r0..r12 + ctr. Plain + // `b` makes the next instruction unreachable; the + // label-based reset handles join points. + if (instr & 1) != 0 { + for r in 0..=12 { reg[r] = None; } + } + } + 16 => { + if (instr & 1) != 0 { + for r in 0..=12 { reg[r] = None; } + } + } + _ => {} + } + pc = pc.wrapping_add(4); + } + } + writes +} + +/// Phase 3 + 4 — scan every `bcctrl`/`bctr` instruction; for each, walk +/// backward up to 16 instructions to find the canonical +/// `lwz vt, vptr_off(this); lwz fn, slot(vt); mtctr fn; bcctrl` sequence. +/// Emit one `TypedDispatch` per dispatch site that resolves to ≥ 1 +/// candidate vtable. +fn scan_dispatches_and_resolve( + pe: &[u8], + image_base: u32, + func_analysis: &FuncAnalysis, + block_boundaries: &HashSet, + vtables_by_offset: &HashMap>, + vtable_by_addr: &BTreeMap, +) -> Vec { + let mut out: Vec = Vec::new(); + for (&fn_start, fi) in &func_analysis.functions { + if fi.is_saverestore { continue; } + let mut pc = fn_start; + while pc < fi.end { + let Some(instr) = read_instr(pe, image_base, pc) else { break }; + let op = instr >> 26; + if op == OP_BCCTR { + let xo = (instr >> 1) & 0x3FF; + let lk = (instr & 1) != 0; + if xo == 528 && lk + && let Some(d) = try_resolve_dispatch_site( + pe, image_base, fn_start, fi.end, pc, + block_boundaries, vtables_by_offset, vtable_by_addr, + ) + { + out.push(d); + } + } + pc = pc.wrapping_add(4); + } + } + out +} + +/// Backwards scan from `bcctrl` at `pc` (looking back at most 16 instrs +/// within the same basic block). Returns `Some(_)` only when the full +/// `lwz vt, off(rA); lwz fn, slot(vt); mtctr fn` chain is present and the +/// `(vptr_off, slot)` pair has at least one candidate vtable. +fn try_resolve_dispatch_site( + pe: &[u8], + image_base: u32, + fn_start: u32, + _fn_end: u32, + bcctrl_pc: u32, + block_boundaries: &HashSet, + vtables_by_offset: &HashMap>, + vtable_by_addr: &BTreeMap, +) -> Option { + const LOOKBACK: u32 = 16; + + // Walk back 1..LOOKBACK instrs to find `mtctr rFn`. + let mut mtctr_rs: Option = None; + let mut mtctr_pc: Option = None; + for i in 1..=LOOKBACK { + let p = bcctrl_pc.wrapping_sub(i * 4); + if p < fn_start { break; } + if block_boundaries.contains(&p) { break; } + let Some(instr) = read_instr(pe, image_base, p) else { break }; + let op = instr >> 26; + if op == OP_X_FORM { + let xo = (instr >> 1) & 0x3FF; + if xo == 467 { + let spr = (((instr >> 11) & 0x1F) << 5) | ((instr >> 16) & 0x1F); + if spr == 9 { + mtctr_rs = Some(((instr >> 21) & 0x1F) as usize); + mtctr_pc = Some(p); + break; + } + } + } + } + let mtctr_rs = mtctr_rs?; + let mtctr_pc = mtctr_pc?; + + // Walk back from mtctr to find `lwz rFn, slot(rVt)` defining mtctr_rs. + let mut slot: Option = None; + let mut vt_reg: Option = None; + let mut fn_lwz_pc: Option = None; + for i in 1..=LOOKBACK { + let p = mtctr_pc.wrapping_sub(i * 4); + if p < fn_start { break; } + if block_boundaries.contains(&p) { break; } + let Some(instr) = read_instr(pe, image_base, p) else { break }; + let op = instr >> 26; + let rd = ((instr >> 21) & 0x1F) as usize; + if op == OP_LWZ { + if rd == mtctr_rs { + let ra = ((instr >> 16) & 0x1F) as usize; + if ra == 0 { return None; } + let off = ((instr & 0xFFFF) as i16) as i32; + if off < 0 || (off % 4) != 0 { return None; } + slot = Some((off as u32) / 4); + vt_reg = Some(ra); + fn_lwz_pc = Some(p); + break; + } + // Other lwz; if it writes our target reg, it's a clobber, but + // the loop already keys on the lwz that produces the value, so + // no clobber check needed beyond seeing rd == mtctr_rs. + } else if writes_reg(instr, mtctr_rs as u32) { + return None; + } + } + let slot = slot?; + let vt_reg = vt_reg?; + let fn_lwz_pc = fn_lwz_pc?; + + // Walk back from the fn-lwz to find `lwz rVt, vptr_off(rThis)` defining vt_reg. + let mut vptr_off: Option = None; + for i in 1..=LOOKBACK { + let p = fn_lwz_pc.wrapping_sub(i * 4); + if p < fn_start { break; } + if block_boundaries.contains(&p) { break; } + let Some(instr) = read_instr(pe, image_base, p) else { break }; + let op = instr >> 26; + let rd = ((instr >> 21) & 0x1F) as usize; + if op == OP_LWZ && rd == vt_reg { + let ra = ((instr >> 16) & 0x1F) as usize; + if ra == 0 { return None; } + let off = ((instr & 0xFFFF) as i16) as i32; + // Negative offsets are valid in C++ (multiple inheritance casts + // can produce them in some ABIs); reinterpret as u32 wrap. + vptr_off = Some(off as u32); + break; + } + if writes_reg(instr, vt_reg as u32) { + return None; + } + } + let vptr_off = vptr_off?; + + // Phase 4 — resolve to candidate vtables. + let candidates = vtables_by_offset.get(&vptr_off)?; + let mut candidate_vtables: Vec = Vec::new(); + let mut method_pcs: Vec = Vec::new(); + for &vt_addr in candidates { + if let Some(vt) = vtable_by_addr.get(&vt_addr) + && vt.length > slot + && let Some(&method_pc) = vt.methods.get(slot as usize) + { + candidate_vtables.push(vt_addr); + method_pcs.push(method_pc); + } + } + if method_pcs.is_empty() { return None; } + + Some(TypedDispatch { + dispatch_pc: bcctrl_pc, + vptr_offset: vptr_off, + slot, + candidate_vtables, + method_pcs, + }) +} + +/// Conservative "does this instruction write to register `r`" predicate. +/// Used to detect register clobbers between the value-producing lwz and +/// its consumer. +fn writes_reg(instr: u32, r: u32) -> bool { + let op = instr >> 26; + let rd = (instr >> 21) & 0x1F; + let _ra = (instr >> 16) & 0x1F; + match op { + // Most arithmetic / load opcodes use bits 21..25 = rD/rT. + 14 | 15 | 32..=43 | 46 | 48..=51 => rd == r, + // ori/oris/xor/etc. opcodes 24..29 — rA in bits 16..20 is the dest. + 24 | 25 | 26 | 27 | 28 | 29 => ((instr >> 16) & 0x1F) == r, + // X-form: most write rD; some write rA. Check both, conservatively. + OP_X_FORM => { + let xo = (instr >> 1) & 0x3FF; + // Logical X-form (and/or/xor/etc.): rA is the dest. + // Logical X-form ops (and/or/xor/etc.) write rA, not rD. + if matches!(xo, 26 | 28 | 60 | 124 | 284 | 316 | 444 | 476 | 536 | 539 | 922 | 954) { + ((instr >> 16) & 0x1F) == r + } else { + rd == r + } + } + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::func::FuncInfo; + use std::collections::BTreeMap; + + fn mk_vtable(addr: u32, methods: Vec) -> Vtable { + Vtable { + address: addr, + length: methods.len() as u32, + col_address: None, + class_name: format!("ANON_{addr:08X}"), + rtti_present: false, + base_classes_json: None, + methods, + } + } + + fn mk_func_analysis(start: u32, len: u32) -> FuncAnalysis { + let mut functions: BTreeMap = BTreeMap::new(); + functions.insert(start, FuncInfo { + start, + end: start + len, + frame_size: 0, + saved_gprs: 0, + is_leaf: false, + is_saverestore: false, + pdata_validated: false, + pdata_length: None, + has_eh: false, + }); + FuncAnalysis { functions, save_gpr_base: None, restore_gpr_base: None, pdata_entries: Vec::new() } + } + + fn write_be(pe: &mut [u8], at: usize, v: u32) { + pe[at..at + 4].copy_from_slice(&v.to_be_bytes()); + } + + /// Encode a vptr-write site: `lis rN, hi(vt); addi rN, rN, lo(vt); stw rN, off(rOther)`. + fn enc_vptr_write(pe: &mut [u8], at: usize, vt: u32, write_off: i16, dest_reg: u32) { + let hi = (vt >> 16) as u16; + let lo = (vt & 0xFFFF) as i16; + let lis = (15u32 << 26) | (3 << 21) | 0 << 16 | (hi as u32); + let addi = (14u32 << 26) | (3 << 21) | (3 << 16) | ((lo as u16) as u32); + let stw = (36u32 << 26) | (3 << 21) | (dest_reg << 16) | ((write_off as u16) as u32); + write_be(pe, at, lis); + write_be(pe, at + 4, addi); + write_be(pe, at + 8, stw); + } + + /// Encode a dispatch site: + /// lwz r4, vptr_off(r3) ; r4 = this->vptr + /// lwz r5, slot*4(r4) ; r5 = vptr[slot] + /// mtctr r5 + /// bcctrl + fn enc_dispatch(pe: &mut [u8], at: usize, vptr_off: i16, slot: u32) { + let lwz_vt = (32u32 << 26) | (4 << 21) | (3 << 16) | ((vptr_off as u16) as u32); + let lwz_fn = (32u32 << 26) | (5 << 21) | (4 << 16) | ((slot * 4) & 0xFFFF); + // mtctr r5 = mtspr CTR(=9), r5: SPR_low (=9) → bits 16..20. + let mtctr = (31u32 << 26) | (5 << 21) | (9 << 16) | (467 << 1); + let bcctrl = (19u32 << 26) | (20 << 21) | (528 << 1) | 1; + write_be(pe, at, lwz_vt); + write_be(pe, at + 4, lwz_fn); + write_be(pe, at + 8, mtctr); + write_be(pe, at + 12, bcctrl); + } + + #[test] + fn single_candidate_vtable_resolves_to_one_method() { + let image_base = 0x82000000u32; + let mut pe = vec![0u8; 0x4000]; + + // Function A — constructor — at 0x82001000. Writes vt=0x82010000 at off=0. + let ctor_pc = 0x82001000u32; + enc_vptr_write(&mut pe, (ctor_pc - image_base) as usize, 0x82010000, 0, 31); + + // Function B — dispatcher — at 0x82002000. Calls slot 2 of vptr at off 0. + let disp_pc = 0x82002000u32; + enc_dispatch(&mut pe, (disp_pc - image_base) as usize, 0, 2); + let bcctrl_pc = disp_pc + 12; + + // Both functions in func_analysis (synthesise). + let mut fa = mk_func_analysis(ctor_pc, 0x40); + fa.functions.insert(disp_pc, FuncInfo { + start: disp_pc, end: disp_pc + 0x40, frame_size: 0, saved_gprs: 0, + is_leaf: false, is_saverestore: false, + pdata_validated: false, pdata_length: None, has_eh: false, + }); + + let vt = mk_vtable(0x82010000, vec![0xAA, 0xBB, 0xCC, 0xDD]); + let labels: HashMap = HashMap::new(); + let r = analyze(&pe, image_base, &fa, &[vt], &labels); + + assert_eq!(r.vptr_writes.len(), 1); + assert_eq!(r.vptr_writes[0].vtable_addr, 0x82010000); + assert_eq!(r.vptr_writes[0].vptr_offset, 0); + + assert_eq!(r.dispatches.len(), 1); + let d = &r.dispatches[0]; + assert_eq!(d.dispatch_pc, bcctrl_pc); + assert_eq!(d.vptr_offset, 0); + assert_eq!(d.slot, 2); + assert_eq!(d.method_pcs, vec![0xCC]); + assert_eq!(d.candidate_vtables, vec![0x82010000]); + } + + #[test] + fn multi_candidate_emits_one_edge_per_match() { + let image_base = 0x82000000u32; + let mut pe = vec![0u8; 0x4000]; + + // Two ctors, each writing a different vtable at offset 0. + let ctor_a = 0x82001000u32; + enc_vptr_write(&mut pe, (ctor_a - image_base) as usize, 0x82010000, 0, 31); + let ctor_b = 0x82001100u32; + enc_vptr_write(&mut pe, (ctor_b - image_base) as usize, 0x82010040, 0, 31); + + // One dispatch at slot 1. + let disp = 0x82002000u32; + enc_dispatch(&mut pe, (disp - image_base) as usize, 0, 1); + + let mut fa = mk_func_analysis(ctor_a, 0x40); + fa.functions.insert(ctor_b, FuncInfo { + start: ctor_b, end: ctor_b + 0x40, frame_size: 0, saved_gprs: 0, + is_leaf: false, is_saverestore: false, + pdata_validated: false, pdata_length: None, has_eh: false, + }); + fa.functions.insert(disp, FuncInfo { + start: disp, end: disp + 0x40, frame_size: 0, saved_gprs: 0, + is_leaf: false, is_saverestore: false, + pdata_validated: false, pdata_length: None, has_eh: false, + }); + + let vts = vec![ + mk_vtable(0x82010000, vec![0x11, 0x22, 0x33, 0x44]), + mk_vtable(0x82010040, vec![0x55, 0x66, 0x77, 0x88]), + ]; + let labels: HashMap = HashMap::new(); + let r = analyze(&pe, image_base, &fa, &vts, &labels); + + assert_eq!(r.vptr_writes.len(), 2); + assert_eq!(r.dispatches.len(), 1); + let d = &r.dispatches[0]; + assert_eq!(d.candidate_vtables.len(), 2); + assert!(d.method_pcs.contains(&0x22)); + assert!(d.method_pcs.contains(&0x66)); + } + + #[test] + fn out_of_bounds_slot_yields_no_dispatch() { + let image_base = 0x82000000u32; + let mut pe = vec![0u8; 0x4000]; + + let ctor = 0x82001000u32; + enc_vptr_write(&mut pe, (ctor - image_base) as usize, 0x82010000, 0, 31); + + let disp = 0x82002000u32; + // slot 10 — vtable only has 4 methods. + enc_dispatch(&mut pe, (disp - image_base) as usize, 0, 10); + + let mut fa = mk_func_analysis(ctor, 0x40); + fa.functions.insert(disp, FuncInfo { + start: disp, end: disp + 0x40, frame_size: 0, saved_gprs: 0, + is_leaf: false, is_saverestore: false, + pdata_validated: false, pdata_length: None, has_eh: false, + }); + + let vt = mk_vtable(0x82010000, vec![0x11, 0x22, 0x33, 0x44]); + let labels: HashMap = HashMap::new(); + let r = analyze(&pe, image_base, &fa, &[vt], &labels); + assert_eq!(r.dispatches.len(), 0); + } + + #[test] + fn no_writer_at_offset_yields_no_dispatch() { + let image_base = 0x82000000u32; + let mut pe = vec![0u8; 0x4000]; + + // ctor writes at offset 0 + let ctor = 0x82001000u32; + enc_vptr_write(&mut pe, (ctor - image_base) as usize, 0x82010000, 0, 31); + + // dispatch reads from offset 8 — no class writes vptr there. + let disp = 0x82002000u32; + enc_dispatch(&mut pe, (disp - image_base) as usize, 8, 1); + + let mut fa = mk_func_analysis(ctor, 0x40); + fa.functions.insert(disp, FuncInfo { + start: disp, end: disp + 0x40, frame_size: 0, saved_gprs: 0, + is_leaf: false, is_saverestore: false, + pdata_validated: false, pdata_length: None, has_eh: false, + }); + + let vt = mk_vtable(0x82010000, vec![0x11, 0x22, 0x33, 0x44]); + let labels: HashMap = HashMap::new(); + let r = analyze(&pe, image_base, &fa, &[vt], &labels); + assert_eq!(r.dispatches.len(), 0); + } +} diff --git a/crates/xenia-analysis/src/lib.rs b/crates/xenia-analysis/src/lib.rs index afda477..5e086c4 100644 --- a/crates/xenia-analysis/src/lib.rs +++ b/crates/xenia-analysis/src/lib.rs @@ -10,6 +10,7 @@ pub mod demangle; pub mod vtables; pub mod lookup; pub mod indirect; +pub mod ind_dispatch_typed; pub mod strings; pub mod funcptr_arrays; diff --git a/crates/xenia-analysis/src/xref.rs b/crates/xenia-analysis/src/xref.rs index b37ad08..892e845 100644 --- a/crates/xenia-analysis/src/xref.rs +++ b/crates/xenia-analysis/src/xref.rs @@ -55,8 +55,10 @@ pub enum AddrMode { /// Multi-word D-form: `lmw / stmw rS, simm(rA)` — emits one xref per /// register slot (32-rS slots starting at the resolved base). Multiword, - /// X-form indexed: `stwx / stbx / sthx / stwux / stbux / sthux / stdx / stdux`. - /// Static resolution requires both rA and rB constant. + /// X-form indexed: `stwx / stbx / sthx / stwux / stbux / sthux / stdx / + /// stdux` plus AltiVec/VMX vector stores `stvx / stvxl / stvebx / + /// stvehx / stvewx`. Static resolution requires both rA and rB + /// constant. (M6 + VMX follow-up.) XFormIndexed, /// X-form byte-reverse: `stwbrx / sthbrx / lwbrx / lhbrx`. XFormByteRev, diff --git a/crates/xenia-analysis/tests/db_schema_golden.rs b/crates/xenia-analysis/tests/db_schema_golden.rs index fd7d1e6..95cb86e 100644 --- a/crates/xenia-analysis/tests/db_schema_golden.rs +++ b/crates/xenia-analysis/tests/db_schema_golden.rs @@ -107,7 +107,7 @@ fn db_schema_matches_expected_columns() { w.write_base(&info).expect("write_base"); w.ingest_instructions(&pe, &info, &func_analysis, &labels) .expect("ingest_instructions"); - w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs, &[], &[], &[]) + w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs, &[], &[], &[], None) .expect("write_analysis_results"); w.create_sql_views().expect("create_sql_views"); } @@ -232,6 +232,23 @@ fn db_schema_matches_expected_columns() { ("slot", "BIGINT"), ("function_address", "BIGINT"), ]), + ("indirect_dispatch_sites", &[ + ("dispatch_pc", "BIGINT"), + ("vptr_offset", "BIGINT"), + ("slot", "BIGINT"), + ("candidate_count", "BIGINT"), + ]), + ("indirect_dispatch_candidates", &[ + ("dispatch_pc", "BIGINT"), + ("vtable_address", "BIGINT"), + ("method_address", "BIGINT"), + ]), + ("vptr_writes", &[ + ("writer_pc", "BIGINT"), + ("vtable_address", "BIGINT"), + ("vptr_offset", "BIGINT"), + ("writer_function", "BIGINT"), + ]), ("xrefs", &[ ("source", "BIGINT"), ("target", "BIGINT"), diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 3c79cd5..0dc42a8 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -4218,6 +4218,35 @@ fn cmd_dis( "function-pointer array scan complete", ); + // M5.5 — typed indirect-dispatch resolution (this->vptr → method). + let typed_ind = xenia_analysis::ind_dispatch_typed::analyze( + &pe_image, base, &func_analysis, &vtables, &xref_result.labels, + ); + let single = typed_ind.dispatches.iter().filter(|d| d.candidate_vtables.len() == 1).count(); + let multi = typed_ind.dispatches.len() - single; + let typed_edges: usize = typed_ind.dispatches.iter().map(|d| d.method_pcs.len()).sum(); + info!( + vptr_writes = typed_ind.vptr_writes.len(), + dispatches = typed_ind.dispatches.len(), + single_candidate = single, + multi_candidate = multi, + edges = typed_edges, + "M5.5 typed indirect-dispatch scan complete", + ); + // Add ind_call edges for every (dispatch_pc, method) candidate. + for d in &typed_ind.dispatches { + for &method_pc in &d.method_pcs { + xref_result.xrefs + .entry(method_pc) + .or_default() + .push(xenia_analysis::xref::Xref { + source: d.dispatch_pc, + kind: xenia_analysis::xref::XrefKind::IndirectCall, + addr_mode: None, + }); + } + } + // Build DisasmInfo let disasm_info = xenia_analysis::formatter::DisasmInfo { image_base: base, @@ -4244,6 +4273,7 @@ fn cmd_dis( &vtables, &strings, &fparrays, + Some(&typed_ind), )?; w.write_tls(tls_info.as_ref())?; if matches!(analyze, AnalyzeMode::Sql | AnalyzeMode::Both) {