//! Indirect-dispatch reachability for vtable-bound `bcctrl`/`bctrl` sites. //! //! Walks each detected function with a tiny per-basic-block register tracker, //! recognising the canonical MSVC PowerPC pattern that loads a slot from a //! statically-addressed vtable into CTR and indirectly calls it: //! //! ```text //! lis rA, hi //! addi rA, rA, lo ; rA = vtable_address //! lwz rB, slot*4(rA) ; rB = vtable[slot] //! mtctr rB ; CTR = vtable[slot] //! bcctrl ; indirect call → vtable[slot] //! ``` //! //! Pattern hits are emitted as `(source_pc, target_pc)` pairs that callers //! insert into the `xrefs` table with `kind='ind_call'`. //! //! ### What this does NOT cover //! //! - Vtable pointer loaded from a `this`-pointer field (`lwz rA, off(this)`) //! is the dominant pattern in real C++ code; resolving it requires //! alias / points-to analysis that's far beyond this layer's scope. //! - Indirect calls via function-pointer fields (callbacks) are similarly //! unresolvable without object-flow analysis. //! - Register state is intentionally killed at every label (basic-block //! boundary) — we don't try to do flow-sensitive merging across joins. //! //! Reference: IBM PowerPC ABI on register-save convention, plus the //! `xenia_analysis::xref` `lis+addi`/`lis+ori` tracker which we mirror //! conceptually. use std::collections::{BTreeMap, HashMap, HashSet}; use crate::func::FuncAnalysis; use crate::vtables::Vtable; /// One detected indirect-call edge: `bcctrl` at `source` jumps to `target`. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct IndirectEdge { pub source: u32, pub target: u32, /// Vtable the source resolved through. pub via_vtable: u32, /// Method slot index within the vtable. pub slot: u32, } #[derive(Debug, Clone, Copy)] enum RegVal { /// Register holds a known constant (e.g. after `lis+addi`). Const(u32), /// Register holds a method pointer loaded from a known vtable slot. MethodPtr { vtable_addr: u32, slot: u32, method_pc: u32, }, } const OP_ADDI: u32 = 14; const OP_ADDIS: u32 = 15; const OP_BCCTR: u32 = 19; // also covers blr — distinguish via XO const OP_LWZ: u32 = 32; const OP_ORI: u32 = 24; const OP_X_FORM: u32 = 31; // mtspr / mr / etc. /// Run the static indirect-dispatch scan. Returns one edge per resolvable /// `bcctrl` site. #[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))] pub fn analyze( pe: &[u8], image_base: u32, func_analysis: &FuncAnalysis, vtables: &[Vtable], labels: &HashMap, ) -> Vec { let started = std::time::Instant::now(); // Index vtables by their start VA so the lwz handler can decide // whether a given Const(addr) is "really" a vtable. let vtable_by_addr: BTreeMap = vtables.iter().map(|v| (v.address, v)).collect(); // Set of all "label"-bearing PCs in the analyzed binary. We treat each // label as a basic-block boundary (anything `loc_*` is a jump target, // so register state arriving at it is unreliable). let mut block_boundaries: HashSet = HashSet::with_capacity(labels.len()); for &addr in labels.keys() { block_boundaries.insert(addr); } let mut edges: Vec = Vec::new(); for (&fn_start, fi) in &func_analysis.functions { if fi.is_saverestore { continue; } let mut reg: [Option; 32] = [None; 32]; let mut ctr: Option = None; let mut pc = fn_start; while pc < fi.end { // Reset register state on basic-block entry. We don't reset on // the function entry itself (PC == fn_start) because labels and // function-starts coincide; the initial state is already None. if pc != fn_start && block_boundaries.contains(&pc) { reg = [None; 32]; ctr = None; } let instr = match read_instr(pe, image_base, pc) { Some(i) => i, None => break, }; let op = instr >> 26; let rd = ((instr >> 21) & 0x1F) as usize; let ra = ((instr >> 16) & 0x1F) as usize; let simm = ((instr & 0xFFFF) as i16) as i32; let uimm = instr & 0xFFFF; match op { // lis rD, IMM (== addis rD, r0, IMM) OP_ADDIS if ra == 0 => { reg[rd] = Some(RegVal::Const(uimm << 16)); } // addis rD, rA, IMM OP_ADDIS => { if let Some(RegVal::Const(b)) = reg[ra] { reg[rd] = Some(RegVal::Const(b.wrapping_add(uimm << 16))); } else { reg[rd] = None; } } // addi rD, rA, IMM OP_ADDI if ra != 0 => { if let Some(RegVal::Const(b)) = reg[ra] { reg[rd] = Some(RegVal::Const(b.wrapping_add(simm as u32))); } else { reg[rd] = None; } } // li rD, IMM (== addi rD, 0, IMM) OP_ADDI => { reg[rd] = Some(RegVal::Const(simm as u32)); } // ori rA, rS, IMM — note operand order: bits 21..25 = rS, 16..20 = rA OP_ORI => { let rs = rd; // bits 21..25 = source if let Some(RegVal::Const(b)) = reg[rs] { reg[ra] = Some(RegVal::Const(b | uimm)); } else { reg[ra] = None; } } // lwz rD, off(rA) — try to resolve as vtable slot load. OP_LWZ => { if ra != 0 && let Some(RegVal::Const(base)) = reg[ra] { let target = base.wrapping_add(simm as u32); // Two-step lookup so we accept both: // (a) base = exact vtable head, simm/4 = slot // (b) base + simm = exact vtable head (rare; // compiler hoists the slot offset into addi) let resolved = resolve_vtable_slot(target, &vtable_by_addr) .or_else(|| resolve_vtable_slot_via_off(base, simm, &vtable_by_addr)); reg[rd] = resolved.map(|(vt, slot, pc)| RegVal::MethodPtr { vtable_addr: vt, slot, method_pc: pc, }); } else { reg[rd] = None; } } // X-form: mtspr/mtctr, bcctrl, mr, etc. OP_X_FORM => { let xo = (instr >> 1) & 0x3FF; match xo { 467 => { // mtspr SPR, rS — PPC SPR field is split: high 5 bits // in PPC bits 16:20 (= Rust bits 11..15), low 5 bits // in PPC bits 11:15 (= Rust bits 16..20). Mirrors // the convention in `func.rs::is_mfspr_lr`. let spr = (((instr >> 11) & 0x1F) << 5) | ((instr >> 16) & 0x1F); if spr == 9 { ctr = reg[rd]; } // Otherwise no observable effect on tracked state. } // Anything that writes rD (most arithmetic, loads, etc.) clobbers it. // Conservative: invalidate rD on any X-form that has rD in bits 21..25 // and is NOT a comparison or branch. _ => { // Heuristic: most X-form ops with non-zero RC encode rD; we // invalidate to avoid stale Const propagation past arithmetic. // This is over-eager but safe (false negatives on edges, never // false positives). reg[rd] = None; } } } // bcctr/bcctrl — opcode 19, XO=528. LK in low bit. OP_BCCTR => { let xo = (instr >> 1) & 0x3FF; if xo == 528 { let lk = (instr & 1) != 0; if lk && let Some(RegVal::MethodPtr { vtable_addr, slot, method_pc }) = ctr { edges.push(IndirectEdge { source: pc, target: method_pc, via_vtable: vtable_addr, slot, }); } // After the call, CTR is preserved but rD register // values across the call boundary are not trustworthy. // Don't touch reg state — most ABIs preserve only // some regs anyway. } } // op 18: b / bl / ba / bla. LK=1 is a call; LK=0 is an // unconditional branch with no fall-through (next PC is // reached only via a different basic block, which the // label-based reset already handles). On a call, the // PowerPC ABI marks r0..r12 + ctr as volatile and // r13..r31 as non-volatile (callee-saved); preserve the // non-volatile half so vtable pointers loaded into r30/r31 // before a `bl` survive the call. 18 => { let lk = (instr & 1) != 0; if lk { for r in 0..=12 { reg[r] = None; } ctr = None; } // LK=0 (`b`) makes fall-through unreachable; nothing to do — // any next reachable PC will hit a label boundary. } // Conditional branches (op 16) fall through; preserve all reg // state for the fall-through path. The label-based join-point // invalidation bounds false-positive risk for jump-IN paths. 16 => { let lk = (instr & 1) != 0; if lk { for r in 0..=12 { reg[r] = None; } ctr = None; } } // Stores and loads we don't track explicitly clobber rD only // when rD is on the destination side; the conservative rule // is "any non-recognised opcode that may write rD invalidates it". 36..=55 => { // Loads write rD; stores don't. The safe pessimisation is // to invalidate rD for the load family (32..=35, 40..=43, etc.) // and leave it alone for stores. We've already handled lwz // above; for the rest, invalidate rD. if matches!(op, 32..=35 | 40..=43 | 48..=51) { reg[rd] = None; } } _ => {} } pc = pc.wrapping_add(4); } } let elapsed_ms = started.elapsed().as_millis() as f64; metrics::histogram!("analysis.phase_ms", "phase" => "indirect").record(elapsed_ms); tracing::info!( edges = edges.len(), elapsed_ms, "indirect-dispatch scan complete" ); edges } fn read_instr(pe: &[u8], image_base: u32, addr: u32) -> Option { let off = addr.wrapping_sub(image_base) as usize; if off + 4 > pe.len() { return None; } Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]])) } /// `target = base + simm` where `target` is an exact vtable head (rare, /// compiler hoists the slot offset into the addi). fn resolve_vtable_slot_via_off( base: u32, simm: i32, vtable_by_addr: &BTreeMap, ) -> Option<(u32, u32, u32)> { let target = base.wrapping_add(simm as u32); if let Some(v) = vtable_by_addr.get(&target) && !v.methods.is_empty() { return Some((v.address, 0, v.methods[0])); } None } /// `target` is an absolute address. If it falls inside a known vtable's /// `[address, address + length*4)` range AND is 4-aligned to a slot, /// return `(vtable_addr, slot, method_pc)`. fn resolve_vtable_slot( target: u32, vtable_by_addr: &BTreeMap, ) -> Option<(u32, u32, u32)> { // BTreeMap range search for the largest key ≤ target. let (&vt_addr, vt) = vtable_by_addr.range(..=target).next_back()?; if target < vt_addr { return None; } let off = target - vt_addr; if !off.is_multiple_of(4) { return None; } let slot = off / 4; if slot >= vt.length { return None; } let method_pc = *vt.methods.get(slot as usize)?; Some((vt_addr, slot, method_pc)) } #[cfg(test)] mod tests { use super::*; use crate::func::FuncInfo; use std::collections::BTreeMap; fn mk_vtable(addr: u32, methods: Vec) -> Vtable { Vtable { address: addr, length: methods.len() as u32, col_address: None, class_name: "ANON_test".into(), rtti_present: false, base_classes_json: None, methods, } } /// Encode the canonical pattern at PC `start`: /// lis r3, hi /// addi r3, r3, lo ; r3 = vtable_addr /// lwz r4, slot*4(r3) ; r4 = vtable[slot] /// mtctr r4 /// bcctrl fn encode_pattern(buf: &mut [u8], offset: usize, vtable_addr: u32, slot_off: i32) { let hi = (vtable_addr >> 16) as u16; let lo = (vtable_addr & 0xFFFF) as i16; let lis = (15u32 << 26) | (3 << 21) | (0 << 16) | (hi as u32); // addi r3, r3, lo (signed) — note: addi is treated as signed let addi = (14u32 << 26) | (3 << 21) | (3 << 16) | ((lo as u16) as u32); let lwz = (32u32 << 26) | (4 << 21) | (3 << 16) | ((slot_off as u16) as u32); // mtctr r4 = mtspr CTR(=9), r4. SPR_low (=9) → Rust bits 16-20; // SPR_high (=0) → Rust bits 11-15. Rc bit 0. let mtctr = (31u32 << 26) | (4 << 21) | (9 << 16) | (0 << 11) | (467 << 1); let bcctrl = (19u32 << 26) | (20 << 21) | (528 << 1) | 1; // bcctrl 20, 0 let words = [lis, addi, lwz, mtctr, bcctrl]; for (i, w) in words.iter().enumerate() { buf[offset + i * 4..offset + i * 4 + 4].copy_from_slice(&w.to_be_bytes()); } } #[test] fn detects_canonical_lis_addi_lwz_mtctr_bcctrl() { let image_base = 0x82000000u32; let text_va = 0x1000u32; let pc_start = image_base + text_va; let vtable_addr = 0x82010000u32; // PE: just the .text we'll write the pattern into. let mut pe = vec![0u8; 0x1100]; encode_pattern(&mut pe, text_va as usize, vtable_addr, 8); // slot 2 let mut functions: BTreeMap = BTreeMap::new(); functions.insert(pc_start, FuncInfo { start: pc_start, end: pc_start + 5 * 4, frame_size: 0, saved_gprs: 0, is_leaf: false, is_saverestore: false, pdata_validated: false, pdata_length: None, has_eh: false, }); let func_analysis = FuncAnalysis { functions, save_gpr_base: None, restore_gpr_base: None, pdata_entries: Vec::new(), }; let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB, 0xCC, 0xDD])]; let labels: HashMap = HashMap::new(); let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels); assert_eq!(edges.len(), 1); assert_eq!(edges[0].source, pc_start + 4 * 4); // bcctrl at 5th instruction assert_eq!(edges[0].target, 0xCC); // slot 2 assert_eq!(edges[0].via_vtable, vtable_addr); assert_eq!(edges[0].slot, 2); } #[test] fn out_of_range_slot_yields_no_edge() { let image_base = 0x82000000u32; let text_va = 0x1000u32; let pc_start = image_base + text_va; let vtable_addr = 0x82010000u32; let mut pe = vec![0u8; 0x1100]; // Encode slot 12, but vtable only has 4 methods. encode_pattern(&mut pe, text_va as usize, vtable_addr, 48); let mut functions: BTreeMap = BTreeMap::new(); functions.insert(pc_start, FuncInfo { start: pc_start, end: pc_start + 5 * 4, frame_size: 0, saved_gprs: 0, is_leaf: false, is_saverestore: false, pdata_validated: false, pdata_length: None, has_eh: false, }); let func_analysis = FuncAnalysis { functions, save_gpr_base: None, restore_gpr_base: None, pdata_entries: Vec::new(), }; let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB, 0xCC, 0xDD])]; let labels: HashMap = HashMap::new(); let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels); assert_eq!(edges.len(), 0); } #[test] fn label_in_middle_kills_state() { let image_base = 0x82000000u32; let text_va = 0x1000u32; let pc_start = image_base + text_va; let vtable_addr = 0x82010000u32; let mut pe = vec![0u8; 0x1100]; encode_pattern(&mut pe, text_va as usize, vtable_addr, 0); let mut functions: BTreeMap = BTreeMap::new(); functions.insert(pc_start, FuncInfo { start: pc_start, end: pc_start + 5 * 4, frame_size: 0, saved_gprs: 0, is_leaf: false, is_saverestore: false, pdata_validated: false, pdata_length: None, has_eh: false, }); let func_analysis = FuncAnalysis { functions, save_gpr_base: None, restore_gpr_base: None, pdata_entries: Vec::new(), }; let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB])]; // Label between addi and lwz — must kill the Const tracking. let mut labels: HashMap = HashMap::new(); labels.insert(pc_start + 8, "loc_mid".to_string()); let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels); assert_eq!(edges.len(), 0, "label in middle of pattern must kill register state"); } }