xenia-rs/crates/xenia-analysis/src/indirect.rs

//! Indirect-dispatch reachability for vtable-bound `bcctrl`/`bctrl` sites.
//!
//! Walks each detected function with a tiny per-basic-block register tracker,
//! recognising the canonical MSVC PowerPC pattern that loads a slot from a
//! statically-addressed vtable into CTR and indirectly calls it:
//!
//! ```text
//!   lis   rA, hi
//!   addi  rA, rA, lo                ; rA  = vtable_address
//!   lwz   rB, slot*4(rA)            ; rB  = vtable[slot]
//!   mtctr rB                        ; CTR = vtable[slot]
//!   bcctrl                          ; indirect call → vtable[slot]
//! ```
//!
//! Pattern hits are emitted as `(source_pc, target_pc)` pairs that callers
//! insert into the `xrefs` table with `kind='ind_call'`.
//!
//! ### What this does NOT cover
//!
//! - Vtable pointer loaded from a `this`-pointer field (`lwz rA, off(this)`)
//!   is the dominant pattern in real C++ code; resolving it requires
//!   alias / points-to analysis that's far beyond this layer's scope.
//! - Indirect calls via function-pointer fields (callbacks) are similarly
//!   unresolvable without object-flow analysis.
//! - Register state is intentionally killed at every label (basic-block
//!   boundary) — we don't try to do flow-sensitive merging across joins.
//!
//! Reference: IBM PowerPC ABI on register-save convention, plus the
//! `xenia_analysis::xref` `lis+addi`/`lis+ori` tracker which we mirror
//! conceptually.

use std::collections::{BTreeMap, HashMap, HashSet};

use crate::func::FuncAnalysis;
use crate::vtables::Vtable;

/// One detected indirect-call edge: `bcctrl` at `source` jumps to `target`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct IndirectEdge {
    pub source: u32,
    pub target: u32,
    /// Vtable the source resolved through.
    pub via_vtable: u32,
    /// Method slot index within the vtable.
    pub slot: u32,
}

#[derive(Debug, Clone, Copy)]
enum RegVal {
    /// Register holds a known constant (e.g. after `lis+addi`).
    Const(u32),
    /// Register holds a method pointer loaded from a known vtable slot.
    MethodPtr {
        vtable_addr: u32,
        slot: u32,
        method_pc: u32,
    },
}

const OP_ADDI:  u32 = 14;
const OP_ADDIS: u32 = 15;
const OP_BCCTR: u32 = 19;       // also covers blr — distinguish via XO
const OP_LWZ:   u32 = 32;
const OP_ORI:   u32 = 24;
const OP_X_FORM: u32 = 31;      // mtspr / mr / etc.

/// Run the static indirect-dispatch scan. Returns one edge per resolvable
/// `bcctrl` site.
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base)))]
pub fn analyze(
    pe: &[u8],
    image_base: u32,
    func_analysis: &FuncAnalysis,
    vtables: &[Vtable],
    labels: &HashMap<u32, String>,
) -> Vec<IndirectEdge> {
    let started = std::time::Instant::now();
    // Index vtables by their start VA so the lwz handler can decide
    // whether a given Const(addr) is "really" a vtable.
    let vtable_by_addr: BTreeMap<u32, &Vtable> =
        vtables.iter().map(|v| (v.address, v)).collect();

    // Set of all "label"-bearing PCs in the analyzed binary. We treat each
    // label as a basic-block boundary (anything `loc_*` is a jump target,
    // so register state arriving at it is unreliable).
    let mut block_boundaries: HashSet<u32> = HashSet::with_capacity(labels.len());
    for &addr in labels.keys() {
        block_boundaries.insert(addr);
    }

    let mut edges: Vec<IndirectEdge> = Vec::new();

    for (&fn_start, fi) in &func_analysis.functions {
        if fi.is_saverestore { continue; }
        let mut reg: [Option<RegVal>; 32] = [None; 32];
        let mut ctr: Option<RegVal> = None;
        let mut pc = fn_start;
        while pc < fi.end {
            // Reset register state on basic-block entry. We don't reset on
            // the function entry itself (PC == fn_start) because labels and
            // function-starts coincide; the initial state is already None.
            if pc != fn_start && block_boundaries.contains(&pc) {
                reg = [None; 32];
                ctr = None;
            }

            let instr = match read_instr(pe, image_base, pc) {
                Some(i) => i,
                None => break,
            };

            let op = instr >> 26;
            let rd = ((instr >> 21) & 0x1F) as usize;
            let ra = ((instr >> 16) & 0x1F) as usize;
            let simm = ((instr & 0xFFFF) as i16) as i32;
            let uimm = instr & 0xFFFF;

            match op {
                // lis rD, IMM (== addis rD, r0, IMM)
                OP_ADDIS if ra == 0 => {
                    reg[rd] = Some(RegVal::Const(uimm << 16));
                }
                // addis rD, rA, IMM
                OP_ADDIS => {
                    if let Some(RegVal::Const(b)) = reg[ra] {
                        reg[rd] = Some(RegVal::Const(b.wrapping_add(uimm << 16)));
                    } else {
                        reg[rd] = None;
                    }
                }
                // addi rD, rA, IMM
                OP_ADDI if ra != 0 => {
                    if let Some(RegVal::Const(b)) = reg[ra] {
                        reg[rd] = Some(RegVal::Const(b.wrapping_add(simm as u32)));
                    } else {
                        reg[rd] = None;
                    }
                }
                // li rD, IMM (== addi rD, 0, IMM)
                OP_ADDI => {
                    reg[rd] = Some(RegVal::Const(simm as u32));
                }
                // ori rA, rS, IMM — note operand order: bits 21..25 = rS, 16..20 = rA
                OP_ORI => {
                    let rs = rd; // bits 21..25 = source
                    if let Some(RegVal::Const(b)) = reg[rs] {
                        reg[ra] = Some(RegVal::Const(b | uimm));
                    } else {
                        reg[ra] = None;
                    }
                }
                // lwz rD, off(rA) — try to resolve as vtable slot load.
                OP_LWZ => {
                    if ra != 0
                        && let Some(RegVal::Const(base)) = reg[ra]
                    {
                        let target = base.wrapping_add(simm as u32);
                        // Two-step lookup so we accept both:
                        //   (a) base = exact vtable head, simm/4 = slot
                        //   (b) base + simm = exact vtable head (rare;
                        //       compiler hoists the slot offset into addi)
                        let resolved = resolve_vtable_slot(target, &vtable_by_addr)
                            .or_else(|| resolve_vtable_slot_via_off(base, simm, &vtable_by_addr));
                        reg[rd] = resolved.map(|(vt, slot, pc)| RegVal::MethodPtr {
                            vtable_addr: vt, slot, method_pc: pc,
                        });
                    } else {
                        reg[rd] = None;
                    }
                }
                // X-form: mtspr/mtctr, bcctrl, mr, etc.
                OP_X_FORM => {
                    let xo = (instr >> 1) & 0x3FF;
                    match xo {
                        467 => {
                            // mtspr SPR, rS — PPC SPR field is split: high 5 bits
                            // in PPC bits 16:20 (= Rust bits 11..15), low 5 bits
                            // in PPC bits 11:15 (= Rust bits 16..20). Mirrors
                            // the convention in `func.rs::is_mfspr_lr`.
                            let spr = (((instr >> 11) & 0x1F) << 5) | ((instr >> 16) & 0x1F);
                            if spr == 9 {
                                ctr = reg[rd];
                            }
                            // Otherwise no observable effect on tracked state.
                        }
                        // Anything that writes rD (most arithmetic, loads, etc.) clobbers it.
                        // Conservative: invalidate rD on any X-form that has rD in bits 21..25
                        // and is NOT a comparison or branch.
                        _ => {
                            // Heuristic: most X-form ops with non-zero RC encode rD; we
                            // invalidate to avoid stale Const propagation past arithmetic.
                            // This is over-eager but safe (false negatives on edges, never
                            // false positives).
                            reg[rd] = None;
                        }
                    }
                }
                // bcctr/bcctrl — opcode 19, XO=528. LK in low bit.
                OP_BCCTR => {
                    let xo = (instr >> 1) & 0x3FF;
                    if xo == 528 {
                        let lk = (instr & 1) != 0;
                        if lk
                            && let Some(RegVal::MethodPtr { vtable_addr, slot, method_pc }) = ctr
                        {
                            edges.push(IndirectEdge {
                                source: pc,
                                target: method_pc,
                                via_vtable: vtable_addr,
                                slot,
                            });
                        }
                        // After the call, CTR is preserved but rD register
                        // values across the call boundary are not trustworthy.
                        // Don't touch reg state — most ABIs preserve only
                        // some regs anyway.
                    }
                }
                // op 18: b / bl / ba / bla. LK=1 is a call; LK=0 is an
                // unconditional branch with no fall-through (next PC is
                // reached only via a different basic block, which the
                // label-based reset already handles). On a call, the
                // PowerPC ABI marks r0..r12 + ctr as volatile and
                // r13..r31 as non-volatile (callee-saved); preserve the
                // non-volatile half so vtable pointers loaded into r30/r31
                // before a `bl` survive the call.
                18 => {
                    let lk = (instr & 1) != 0;
                    if lk {
                        for r in 0..=12 { reg[r] = None; }
                        ctr = None;
                    }
                    // LK=0 (`b`) makes fall-through unreachable; nothing to do —
                    // any next reachable PC will hit a label boundary.
                }
                // Conditional branches (op 16) fall through; preserve all reg
                // state for the fall-through path. The label-based join-point
                // invalidation bounds false-positive risk for jump-IN paths.
                16 => {
                    let lk = (instr & 1) != 0;
                    if lk {
                        for r in 0..=12 { reg[r] = None; }
                        ctr = None;
                    }
                }
                // Stores and loads we don't track explicitly clobber rD only
                // when rD is on the destination side; the conservative rule
                // is "any non-recognised opcode that may write rD invalidates it".
                36..=55 => {
                    // Loads write rD; stores don't. The safe pessimisation is
                    // to invalidate rD for the load family (32..=35, 40..=43, etc.)
                    // and leave it alone for stores. We've already handled lwz
                    // above; for the rest, invalidate rD.
                    if matches!(op, 32..=35 | 40..=43 | 48..=51) {
                        reg[rd] = None;
                    }
                }
                _ => {}
            }

            pc = pc.wrapping_add(4);
        }
    }

    let elapsed_ms = started.elapsed().as_millis() as f64;
    metrics::histogram!("analysis.phase_ms", "phase" => "indirect").record(elapsed_ms);
    tracing::info!(
        edges = edges.len(),
        elapsed_ms,
        "indirect-dispatch scan complete"
    );
    edges
}

fn read_instr(pe: &[u8], image_base: u32, addr: u32) -> Option<u32> {
    let off = addr.wrapping_sub(image_base) as usize;
    if off + 4 > pe.len() { return None; }
    Some(u32::from_be_bytes([pe[off], pe[off + 1], pe[off + 2], pe[off + 3]]))
}

/// `target = base + simm` where `target` is an exact vtable head (rare,
/// compiler hoists the slot offset into the addi).
fn resolve_vtable_slot_via_off(
    base: u32,
    simm: i32,
    vtable_by_addr: &BTreeMap<u32, &Vtable>,
) -> Option<(u32, u32, u32)> {
    let target = base.wrapping_add(simm as u32);
    if let Some(v) = vtable_by_addr.get(&target)
        && !v.methods.is_empty()
    {
        return Some((v.address, 0, v.methods[0]));
    }
    None
}

/// `target` is an absolute address. If it falls inside a known vtable's
/// `[address, address + length*4)` range AND is 4-aligned to a slot,
/// return `(vtable_addr, slot, method_pc)`.
fn resolve_vtable_slot(
    target: u32,
    vtable_by_addr: &BTreeMap<u32, &Vtable>,
) -> Option<(u32, u32, u32)> {
    // BTreeMap range search for the largest key ≤ target.
    let (&vt_addr, vt) = vtable_by_addr.range(..=target).next_back()?;
    if target < vt_addr { return None; }
    let off = target - vt_addr;
    if !off.is_multiple_of(4) { return None; }
    let slot = off / 4;
    if slot >= vt.length { return None; }
    let method_pc = *vt.methods.get(slot as usize)?;
    Some((vt_addr, slot, method_pc))
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::func::FuncInfo;
    use std::collections::BTreeMap;

    fn mk_vtable(addr: u32, methods: Vec<u32>) -> Vtable {
        Vtable {
            address: addr,
            length: methods.len() as u32,
            col_address: None,
            class_name: "ANON_test".into(),
            rtti_present: false,
            base_classes_json: None,
            methods,
        }
    }

    /// Encode the canonical pattern at PC `start`:
    ///   lis   r3, hi
    ///   addi  r3, r3, lo                 ; r3 = vtable_addr
    ///   lwz   r4, slot*4(r3)             ; r4 = vtable[slot]
    ///   mtctr r4
    ///   bcctrl
    fn encode_pattern(buf: &mut [u8], offset: usize, vtable_addr: u32, slot_off: i32) {
        let hi = (vtable_addr >> 16) as u16;
        let lo = (vtable_addr & 0xFFFF) as i16;
        let lis = (15u32 << 26) | (3 << 21) | (0 << 16) | (hi as u32);
        // addi r3, r3, lo (signed) — note: addi is treated as signed
        let addi = (14u32 << 26) | (3 << 21) | (3 << 16) | ((lo as u16) as u32);
        let lwz = (32u32 << 26) | (4 << 21) | (3 << 16) | ((slot_off as u16) as u32);
        // mtctr r4 = mtspr CTR(=9), r4. SPR_low (=9) → Rust bits 16-20;
        // SPR_high (=0) → Rust bits 11-15. Rc bit 0.
        let mtctr = (31u32 << 26) | (4 << 21) | (9 << 16) | (0 << 11) | (467 << 1);
        let bcctrl = (19u32 << 26) | (20 << 21) | (528 << 1) | 1;       // bcctrl 20, 0
        let words = [lis, addi, lwz, mtctr, bcctrl];
        for (i, w) in words.iter().enumerate() {
            buf[offset + i * 4..offset + i * 4 + 4].copy_from_slice(&w.to_be_bytes());
        }
    }

    #[test]
    fn detects_canonical_lis_addi_lwz_mtctr_bcctrl() {
        let image_base = 0x82000000u32;
        let text_va = 0x1000u32;
        let pc_start = image_base + text_va;
        let vtable_addr = 0x82010000u32;

        // PE: just the .text we'll write the pattern into.
        let mut pe = vec![0u8; 0x1100];
        encode_pattern(&mut pe, text_va as usize, vtable_addr, 8); // slot 2

        let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
        functions.insert(pc_start, FuncInfo {
            start: pc_start,
            end: pc_start + 5 * 4,
            frame_size: 0,
            saved_gprs: 0,
            is_leaf: false,
            is_saverestore: false,
            pdata_validated: false,
            pdata_length: None,
            has_eh: false,
        });
        let func_analysis = FuncAnalysis {
            functions,
            save_gpr_base: None,
            restore_gpr_base: None,
            pdata_entries: Vec::new(),
        };

        let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB, 0xCC, 0xDD])];
        let labels: HashMap<u32, String> = HashMap::new();
        let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels);

        assert_eq!(edges.len(), 1);
        assert_eq!(edges[0].source, pc_start + 4 * 4); // bcctrl at 5th instruction
        assert_eq!(edges[0].target, 0xCC); // slot 2
        assert_eq!(edges[0].via_vtable, vtable_addr);
        assert_eq!(edges[0].slot, 2);
    }

    #[test]
    fn out_of_range_slot_yields_no_edge() {
        let image_base = 0x82000000u32;
        let text_va = 0x1000u32;
        let pc_start = image_base + text_va;
        let vtable_addr = 0x82010000u32;

        let mut pe = vec![0u8; 0x1100];
        // Encode slot 12, but vtable only has 4 methods.
        encode_pattern(&mut pe, text_va as usize, vtable_addr, 48);

        let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
        functions.insert(pc_start, FuncInfo {
            start: pc_start,
            end: pc_start + 5 * 4,
            frame_size: 0,
            saved_gprs: 0,
            is_leaf: false,
            is_saverestore: false,
            pdata_validated: false,
            pdata_length: None,
            has_eh: false,
        });
        let func_analysis = FuncAnalysis {
            functions,
            save_gpr_base: None,
            restore_gpr_base: None,
            pdata_entries: Vec::new(),
        };

        let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB, 0xCC, 0xDD])];
        let labels: HashMap<u32, String> = HashMap::new();
        let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels);
        assert_eq!(edges.len(), 0);
    }

    #[test]
    fn label_in_middle_kills_state() {
        let image_base = 0x82000000u32;
        let text_va = 0x1000u32;
        let pc_start = image_base + text_va;
        let vtable_addr = 0x82010000u32;

        let mut pe = vec![0u8; 0x1100];
        encode_pattern(&mut pe, text_va as usize, vtable_addr, 0);

        let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
        functions.insert(pc_start, FuncInfo {
            start: pc_start,
            end: pc_start + 5 * 4,
            frame_size: 0,
            saved_gprs: 0,
            is_leaf: false,
            is_saverestore: false,
            pdata_validated: false,
            pdata_length: None,
            has_eh: false,
        });
        let func_analysis = FuncAnalysis {
            functions,
            save_gpr_base: None,
            restore_gpr_base: None,
            pdata_entries: Vec::new(),
        };

        let vtables = vec![mk_vtable(vtable_addr, vec![0xAA, 0xBB])];

        // Label between addi and lwz — must kill the Const tracking.
        let mut labels: HashMap<u32, String> = HashMap::new();
        labels.insert(pc_start + 8, "loc_mid".to_string());

        let edges = analyze(&pe, image_base, &func_analysis, &vtables, &labels);
        assert_eq!(edges.len(), 0, "label in middle of pattern must kill register state");
    }
}