Five LOW-priority milestones bundled. Total ~700 LOC across 11 files. ## M9 — has_eh derived from pdata.flags exception bit - New `functions.has_eh BOOLEAN NOT NULL` column. Derived from M1's already-parsed `pdata.flags` (bit 31 of the packed word — the exception-handler-present flag, distinct from bit 30 which is the always-1 32-bit-code flag). Index idx_functions_has_eh. - Sylpheed: 2,975 of 23,073 pdata-validated functions have EH (12.9%). ## M10 — .tls section / IMAGE_TLS_DIRECTORY32 parser - New `xenia_xex::tls::parse_tls` parses the directory + zero-terminated callback array. Returns None when the binary has no .tls section. - New `tls_info` (singleton row) + `tls_callbacks(slot, address)` tables. - New `DbWriter::write_tls()` no-ops on None. - Sylpheed has no .tls section → 0 rows; infra ready for binaries with __declspec(thread). ## M8 + M11 — function_pointer_arrays (dispatch tables + static initialisers) - New `xenia_analysis::funcptr_arrays::analyze` widens M3's vtable scan: detects runs of ≥2 function pointers in .rdata and classifies each as `vtable` (M3 re-emit), `dispatch_table` (M8), or `static_init` (M11) via a constructor-prologue heuristic (mfspr + small stwu). - New tables `function_pointer_arrays(address PK, length, kind)` and `function_pointer_array_entries(array_address, slot, function_address)`. - Sylpheed: 722 vtables + 388 dispatch_tables = 1,110 arrays / 6,347 slots. 0 static_init detected (Sylpheed's ctors don't all match the conservative heuristic; M11.5 future work can chain via the entry- point's static-init driver). ## M12 — --lr-trace runtime canary-diff harness - New CLI `exec --lr-trace=PC[,PC,...]` and `--lr-trace-out=PATH` flags. Symbolic resolution (Class::method, Class::*) via M4 lookup. Env vars XENIA_LR_TRACE / XENIA_LR_TRACE_OUT also work. - New `KernelState::lr_trace_pcs` + `lr_trace_writer` + helper `fire_lr_trace_if_match(hw_id)` invoked from the per-instr probe slot. - JSONL output: pc/tid/hw/cycle/r3/r4/r5/r6/lr — superset of what xenia-canary's --log_lr_on_pc patch emits, with a cycle counter for cross-run reproducibility. Diff-friendly via `jq`. - Lockstep digest unaffected: smoke test on entry-point PC fires once with cycle=0/lr=BCBCBCBC/all-GPR-zero (correct initial state). Tests 636→640 (+2 TLS tests, +2 funcptr_arrays tests). Schema golden updated for new tables + has_eh column. Lockstep determinism preserved (instructions=2000005 ×2 reruns identical). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
472 lines
19 KiB
Rust
472 lines
19 KiB
Rust
//! Indirect-dispatch reachability for vtable-bound `bcctrl`/`bctrl` sites.
|
|
//!
|
|
//! Walks each detected function with a tiny per-basic-block register tracker,
|
|
//! recognising the canonical MSVC PowerPC pattern that loads a slot from a
|
|
//! statically-addressed vtable into CTR and indirectly calls it:
|
|
//!
|
|
//! ```text
|
|
//! lis rA, hi
|
|
//! addi rA, rA, lo ; rA = vtable_address
|
|
//! lwz rB, slot*4(rA) ; rB = vtable[slot]
|
|
//! mtctr rB ; CTR = vtable[slot]
|
|
//! bcctrl ; indirect call → vtable[slot]
|
|
//! ```
|
|
//!
|
|
//! Pattern hits are emitted as `(source_pc, target_pc)` pairs that callers
|
|
//! insert into the `xrefs` table with `kind='ind_call'`.
|
|
//!
|
|
//! ### What this does NOT cover
|
|
//!
|
|
//! - Vtable pointer loaded from a `this`-pointer field (`lwz rA, off(this)`)
|
|
//! is the dominant pattern in real C++ code; resolving it requires
|
|
//! alias / points-to analysis that's far beyond this layer's scope.
|
|
//! - Indirect calls via function-pointer fields (callbacks) are similarly
|
|
//! unresolvable without object-flow analysis.
|
|
//! - Register state is intentionally killed at every label (basic-block
|
|
//! boundary) — we don't try to do flow-sensitive merging across joins.
|
|
//!
|
|
//! Reference: IBM PowerPC ABI on register-save convention, plus the
|
|
//! `xenia_analysis::xref` `lis+addi`/`lis+ori` tracker which we mirror
|
|
//! conceptually.
|
|
|
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
|
|
|
use crate::func::FuncAnalysis;
|
|
use crate::vtables::Vtable;
|
|
|
|
/// One detected indirect-call edge: `bcctrl` at `source` jumps to `target`.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub struct IndirectEdge {
|
|
pub source: u32,
|
|
pub target: u32,
|
|
/// Vtable the source resolved through.
|
|
pub via_vtable: u32,
|
|
/// Method slot index within the vtable.
|
|
pub slot: u32,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
enum RegVal {
|
|
/// Register holds a known constant (e.g. after `lis+addi`).
|
|
Const(u32),
|
|
/// Register holds a method pointer loaded from a known vtable slot.
|
|
MethodPtr {
|
|
vtable_addr: u32,
|
|
slot: u32,
|
|
method_pc: u32,
|
|
},
|
|
}
|
|
|
|
const OP_ADDI: u32 = 14;
|
|
const OP_ADDIS: u32 = 15;
|
|
const OP_BCCTR: u32 = 19; // also covers blr — distinguish via XO
|
|
const OP_LWZ: u32 = 32;
|
|
const OP_ORI: u32 = 24;
|
|
const OP_X_FORM: u32 = 31; // mtspr / mr / etc.
|
|
|
|
/// Run the static indirect-dispatch scan. Returns one edge per resolvable
|
|
/// `bcctrl` site.
|
|
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
|
|
pub fn analyze(
|
|
pe: &[u8],
|
|
image_base: u32,
|
|
func_analysis: &FuncAnalysis,
|
|
vtables: &[Vtable],
|
|
labels: &HashMap<u32, String>,
|
|
) -> Vec<IndirectEdge> {
|
|
let started = std::time::Instant::now();
|
|
// Index vtables by their start VA so the lwz handler can decide
|
|
// whether a given Const(addr) is "really" a vtable.
|
|
let vtable_by_addr: BTreeMap<u32, &Vtable> =
|
|
vtables.iter().map(|v| (v.address, v)).collect();
|
|
|
|
// Set of all "label"-bearing PCs in the analyzed binary. We treat each
|
|
// label as a basic-block boundary (anything `loc_*` is a jump target,
|
|
// so register state arriving at it is unreliable).
|
|
let mut block_boundaries: HashSet<u32> = HashSet::with_capacity(labels.len());
|
|
for &addr in labels.keys() {
|
|
block_boundaries.insert(addr);
|
|
}
|
|
|
|
let mut edges: Vec<IndirectEdge> = Vec::new();
|
|
|
|
for (&fn_start, fi) in &func_analysis.functions {
|
|
if fi.is_saverestore { continue; }
|
|
let mut reg: [Option<RegVal>; 32] = [None; 32];
|
|
let mut ctr: Option<RegVal> = None;
|
|
let mut pc = fn_start;
|
|
while pc < fi.end {
|
|
// Reset register state on basic-block entry. We don't reset on
|
|
// the function entry itself (PC == fn_start) because labels and
|
|
// function-starts coincide; the initial state is already None.
|
|
if pc != fn_start && block_boundaries.contains(&pc) {
|
|
reg = [None; 32];
|
|
ctr = None;
|
|
}
|
|
|
|
let instr = match read_instr(pe, image_base, pc) {
|
|
Some(i) => i,
|
|
None => break,
|
|
};
|
|
|
|
let op = instr >> 26;
|
|
let rd = ((instr >> 21) & 0x1F) as usize;
|
|
let ra = ((instr >> 16) & 0x1F) as usize;
|
|
let simm = ((instr & 0xFFFF) as i16) as i32;
|
|
let uimm = instr & 0xFFFF;
|
|
|
|
match op {
|
|
// lis rD, IMM (== addis rD, r0, IMM)
|
|
OP_ADDIS if ra == 0 => {
|
|
reg[rd] = Some(RegVal::Const(uimm << 16));
|
|
}
|
|
// addis rD, rA, IMM
|
|
OP_ADDIS => {
|
|
if let Some(RegVal::Const(b)) = reg[ra] {
|
|
reg[rd] = Some(RegVal::Const(b.wrapping_add(uimm << 16)));
|
|
} else {
|
|
reg[rd] = None;
|
|
}
|
|
}
|
|
// addi rD, rA, IMM
|
|
OP_ADDI if ra != 0 => {
|
|
if let Some(RegVal::Const(b)) = reg[ra] {
|
|
reg[rd] = Some(RegVal::Const(b.wrapping_add(simm as u32)));
|
|
} else {
|
|
reg[rd] = None;
|
|
}
|
|
}
|
|
// li rD, IMM (== addi rD, 0, IMM)
|
|
OP_ADDI => {
|
|
reg[rd] = Some(RegVal::Const(simm as u32));
|
|
}
|
|
// ori rA, rS, IMM — note operand order: bits 21..25 = rS, 16..20 = rA
|
|
OP_ORI => {
|
|
let rs = rd; // bits 21..25 = source
|
|
if let Some(RegVal::Const(b)) = reg[rs] {
|
|
reg[ra] = Some(RegVal::Const(b | uimm));
|
|
} else {
|
|
reg[ra] = None;
|
|
}
|
|
}
|
|
// lwz rD, off(rA) — try to resolve as vtable slot load.
|
|
OP_LWZ => {
|
|
if ra != 0
|
|
&& let Some(RegVal::Const(base)) = reg[ra]
|
|
{
|
|
let target = base.wrapping_add(simm as u32);
|
|
// Two-step lookup so we accept both:
|
|
// (a) base = exact vtable head, simm/4 = slot
|
|
// (b) base + simm = exact vtable head (rare;
|
|
// compiler hoists the slot offset into addi)
|
|
let resolved = resolve_vtable_slot(target, &vtable_by_addr)
|
|
.or_else(|| resolve_vtable_slot_via_off(base, simm, &vtable_by_addr));
|
|
reg[rd] = resolved.map(|(vt, slot, pc)| RegVal::MethodPtr {
|
|
vtable_addr: vt, slot, method_pc: pc,
|
|
});
|
|
} else {
|
|
reg[rd] = None;
|
|
}
|
|
}
|
|
// X-form: mtspr/mtctr, bcctrl, mr, etc.
|
|
OP_X_FORM => {
|
|
let xo = (instr >> 1) & 0x3FF;
|
|
match xo {
|
|
467 => {
|
|
// mtspr SPR, rS — PPC SPR field is split: high 5 bits
|
|
// in PPC bits 16:20 (= Rust bits 11..15), low 5 bits
|
|
// in PPC bits 11:15 (= Rust bits 16..20). Mirrors
|
|
// the convention in `func.rs::is_mfspr_lr`.
|
|
let spr = (((instr >> 11) & 0x1F) << 5) | ((instr >> 16) & 0x1F);
|
|
if spr == 9 {
|
|
ctr = reg[rd];
|
|
}
|
|
// Otherwise no observable effect on tracked state.
|
|
}
|
|
// Anything that writes rD (most arithmetic, loads, etc.) clobbers it.
|
|
// Conservative: invalidate rD on any X-form that has rD in bits 21..25
|
|
// and is NOT a comparison or branch.
|
|
_ => {
|
|
// Heuristic: most X-form ops with non-zero RC encode rD; we
|
|
// invalidate to avoid stale Const propagation past arithmetic.
|
|
// This is over-eager but safe (false negatives on edges, never
|
|
// false positives).
|
|
reg[rd] = None;
|
|
}
|
|
}
|
|
}
|
|
// bcctr/bcctrl — opcode 19, XO=528. LK in low bit.
|
|
OP_BCCTR => {
|
|
let xo = (instr >> 1) & 0x3FF;
|
|
if xo == 528 {
|
|
let lk = (instr & 1) != 0;
|
|
if lk
|
|
&& let Some(RegVal::MethodPtr { vtable_addr, slot, method_pc }) = ctr
|
|
{
|
|
edges.push(IndirectEdge {
|
|
source: pc,
|
|
target: method_pc,
|
|
via_vtable: vtable_addr,
|
|
slot,
|
|
});
|
|
}
|
|
// After the call, CTR is preserved but rD register
|
|
// values across the call boundary are not trustworthy.
|
|
// Don't touch reg state — most ABIs preserve only
|
|
// some regs anyway.
|
|
}
|
|
}
|
|
// op 18: b / bl / ba / bla. LK=1 is a call; LK=0 is an
|
|
// unconditional branch with no fall-through (next PC is
|
|
// reached only via a different basic block, which the
|
|
// label-based reset already handles). On a call, the
|
|
// PowerPC ABI marks r0..r12 + ctr as volatile and
|
|
// r13..r31 as non-volatile (callee-saved); preserve the
|
|
// non-volatile half so vtable pointers loaded into r30/r31
|
|
// before a `bl` survive the call.
|
|
18 => {
|
|
let lk = (instr & 1) != 0;
|
|
if lk {
|
|
for r in 0..=12 { reg[r] = None; }
|
|
ctr = None;
|
|
}
|
|
// LK=0 (`b`) makes fall-through unreachable; nothing to do —
|
|
// any next reachable PC will hit a label boundary.
|
|
}
|
|
// Conditional branches (op 16) fall through; preserve all reg
|
|
// state for the fall-through path. The label-based join-point
|
|
// invalidation bounds false-positive risk for jump-IN paths.
|
|
16 => {
|
|
let lk = (instr & 1) != 0;
|
|
if lk {
|
|
for r in 0..=12 { reg[r] = None; }
|
|
ctr = None;
|
|
}
|
|
}
|
|
// Stores and loads we don't track explicitly clobber rD only
|
|
// when rD is on the destination side; the conservative rule
|
|
// is "any non-recognised opcode that may write rD invalidates it".
|
|
36..=55 => {
|
|
// Loads write rD; stores don't. The safe pessimisation is
|
|
// to invalidate rD for the load family (32..=35, 40..=43, etc.)
|
|
// and leave it alone for stores. We've already handled lwz
|
|
// above; for the rest, invalidate rD.
|
|
if matches!(op, 32..=35 | 40..=43 | 48..=51) {
|
|
reg[rd] = None;
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
pc = pc.wrapping_add(4);
|
|
}
|
|
}
|
|
|
|
let elapsed_ms = started.elapsed().as_millis() as f64;
|
|
metrics::histogram!("analysis.phase_ms", "phase" => "indirect").record(elapsed_ms);
|
|
tracing::info!(
|
|
edges = edges.len(),
|
|
elapsed_ms,
|
|
"indirect-dispatch scan complete"
|
|
);
|
|
edges
|
|
}
|
|
|
|
fn read_instr(pe: &[u8], image_base: u32, addr: u32) -> Option<u32> {
|
|
let off = addr.wrapping_sub(image_base) as usize;
|
|
if off + 4 > pe.len() { return None; }
|
|
Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]))
|
|
}
|
|
|
|
/// `target = base + simm` where `target` is an exact vtable head (rare,
|
|
/// compiler hoists the slot offset into the addi).
|
|
fn resolve_vtable_slot_via_off(
|
|
base: u32,
|
|
simm: i32,
|
|
vtable_by_addr: &BTreeMap<u32, &Vtable>,
|
|
) -> Option<(u32, u32, u32)> {
|
|
let target = base.wrapping_add(simm as u32);
|
|
if let Some(v) = vtable_by_addr.get(&target)
|
|
&& !v.methods.is_empty()
|
|
{
|
|
return Some((v.address, 0, v.methods[0]));
|
|
}
|
|
None
|
|
}
|
|
|
|
/// `target` is an absolute address. If it falls inside a known vtable's
|
|
/// `[address, address + length*4)` range AND is 4-aligned to a slot,
|
|
/// return `(vtable_addr, slot, method_pc)`.
|
|
fn resolve_vtable_slot(
|
|
target: u32,
|
|
vtable_by_addr: &BTreeMap<u32, &Vtable>,
|
|
) -> Option<(u32, u32, u32)> {
|
|
// BTreeMap range search for the largest key ≤ target.
|
|
let (&vt_addr, vt) = vtable_by_addr.range(..=target).next_back()?;
|
|
if target < vt_addr { return None; }
|
|
let off = target - vt_addr;
|
|
if !off.is_multiple_of(4) { return None; }
|
|
let slot = off / 4;
|
|
if slot >= vt.length { return None; }
|
|
let method_pc = *vt.methods.get(slot as usize)?;
|
|
Some((vt_addr, slot, method_pc))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::func::FuncInfo;
|
|
use std::collections::BTreeMap;
|
|
|
|
fn mk_vtable(addr: u32, methods: Vec<u32>) -> Vtable {
|
|
Vtable {
|
|
address: addr,
|
|
length: methods.len() as u32,
|
|
col_address: None,
|
|
class_name: "ANON_test".into(),
|
|
rtti_present: false,
|
|
base_classes_json: None,
|
|
methods,
|
|
}
|
|
}
|
|
|
|
/// Encode the canonical pattern at PC `start`:
|
|
/// lis r3, hi
|
|
/// addi r3, r3, lo ; r3 = vtable_addr
|
|
/// lwz r4, slot*4(r3) ; r4 = vtable[slot]
|
|
/// mtctr r4
|
|
/// bcctrl
|
|
fn encode_pattern(buf: &mut [u8], offset: usize, vtable_addr: u32, slot_off: i32) {
|
|
let hi = (vtable_addr >> 16) as u16;
|
|
let lo = (vtable_addr & 0xFFFF) as i16;
|
|
let lis = (15u32 << 26) | (3 << 21) | (0 << 16) | (hi as u32);
|
|
// addi r3, r3, lo (signed) — note: addi is treated as signed
|
|
let addi = (14u32 << 26) | (3 << 21) | (3 << 16) | ((lo as u16) as u32);
|
|
let lwz = (32u32 << 26) | (4 << 21) | (3 << 16) | ((slot_off as u16) as u32);
|
|
// mtctr r4 = mtspr CTR(=9), r4. SPR_low (=9) → Rust bits 16-20;
|
|
// SPR_high (=0) → Rust bits 11-15. Rc bit 0.
|
|
let mtctr = (31u32 << 26) | (4 << 21) | (9 << 16) | (0 << 11) | (467 << 1);
|
|
let bcctrl = (19u32 << 26) | (20 << 21) | (528 << 1) | 1; // bcctrl 20, 0
|
|
let words = [lis, addi, lwz, mtctr, bcctrl];
|
|
for (i, w) in words.iter().enumerate() {
|
|
buf[offset + i * 4..offset + i * 4 + 4].copy_from_slice(&w.to_be_bytes());
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn detects_canonical_lis_addi_lwz_mtctr_bcctrl() {
|
|
let image_base = 0x82000000u32;
|
|
let text_va = 0x1000u32;
|
|
let pc_start = image_base + text_va;
|
|
let vtable_addr = 0x82010000u32;
|
|
|
|
// PE: just the .text we'll write the pattern into.
|
|
let mut pe = vec![0u8; 0x1100];
|
|
encode_pattern(&mut pe, text_va as usize, vtable_addr, 8); // slot 2
|
|
|
|
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
|
functions.insert(pc_start, FuncInfo {
|
|
start: pc_start,
|
|
end: pc_start + 5 * 4,
|
|
frame_size: 0,
|
|
saved_gprs: 0,
|
|
is_leaf: false,
|
|
is_saverestore: false,
|
|
pdata_validated: false,
|
|
pdata_length: None,
|
|
has_eh: false,
|
|
});
|
|
let func_analysis = FuncAnalysis {
|
|
functions,
|
|
save_gpr_base: None,
|
|
restore_gpr_base: None,
|
|
pdata_entries: Vec::new(),
|
|
};
|
|
|
|
let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB, 0xCC, 0xDD])];
|
|
let labels: HashMap<u32, String> = HashMap::new();
|
|
let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels);
|
|
|
|
assert_eq!(edges.len(), 1);
|
|
assert_eq!(edges[0].source, pc_start + 4 * 4); // bcctrl at 5th instruction
|
|
assert_eq!(edges[0].target, 0xCC); // slot 2
|
|
assert_eq!(edges[0].via_vtable, vtable_addr);
|
|
assert_eq!(edges[0].slot, 2);
|
|
}
|
|
|
|
#[test]
|
|
fn out_of_range_slot_yields_no_edge() {
|
|
let image_base = 0x82000000u32;
|
|
let text_va = 0x1000u32;
|
|
let pc_start = image_base + text_va;
|
|
let vtable_addr = 0x82010000u32;
|
|
|
|
let mut pe = vec![0u8; 0x1100];
|
|
// Encode slot 12, but vtable only has 4 methods.
|
|
encode_pattern(&mut pe, text_va as usize, vtable_addr, 48);
|
|
|
|
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
|
functions.insert(pc_start, FuncInfo {
|
|
start: pc_start,
|
|
end: pc_start + 5 * 4,
|
|
frame_size: 0,
|
|
saved_gprs: 0,
|
|
is_leaf: false,
|
|
is_saverestore: false,
|
|
pdata_validated: false,
|
|
pdata_length: None,
|
|
has_eh: false,
|
|
});
|
|
let func_analysis = FuncAnalysis {
|
|
functions,
|
|
save_gpr_base: None,
|
|
restore_gpr_base: None,
|
|
pdata_entries: Vec::new(),
|
|
};
|
|
|
|
let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB, 0xCC, 0xDD])];
|
|
let labels: HashMap<u32, String> = HashMap::new();
|
|
let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels);
|
|
assert_eq!(edges.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn label_in_middle_kills_state() {
|
|
let image_base = 0x82000000u32;
|
|
let text_va = 0x1000u32;
|
|
let pc_start = image_base + text_va;
|
|
let vtable_addr = 0x82010000u32;
|
|
|
|
let mut pe = vec![0u8; 0x1100];
|
|
encode_pattern(&mut pe, text_va as usize, vtable_addr, 0);
|
|
|
|
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
|
functions.insert(pc_start, FuncInfo {
|
|
start: pc_start,
|
|
end: pc_start + 5 * 4,
|
|
frame_size: 0,
|
|
saved_gprs: 0,
|
|
is_leaf: false,
|
|
is_saverestore: false,
|
|
pdata_validated: false,
|
|
pdata_length: None,
|
|
has_eh: false,
|
|
});
|
|
let func_analysis = FuncAnalysis {
|
|
functions,
|
|
save_gpr_base: None,
|
|
restore_gpr_base: None,
|
|
pdata_entries: Vec::new(),
|
|
};
|
|
|
|
let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB])];
|
|
|
|
// Label between addi and lwz — must kill the Const tracking.
|
|
let mut labels: HashMap<u32, String> = HashMap::new();
|
|
labels.insert(pc_start + 8, "loc_mid".to_string());
|
|
|
|
let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels);
|
|
assert_eq!(edges.len(), 0, "label in middle of pattern must kill register state");
|
|
}
|
|
}
|