//! Function boundary detection via PPC prologue/epilogue pattern matching. //! //! Strategy (multi-pass): //! 1. Identify all `bl` (branch-and-link) targets — these are call sites, //! hence very likely function entry points. //! 2. Scan the save/restore GPR helper region and label it. //! 3. For each candidate entry, look for prologue patterns: //! a) `mfspr rN, LR` (typically r0 or r12) //! b) `bl __savegprlr_NN` (call into save stub) //! c) `stwu r1, -N(r1)` (allocate stack frame) //! If a prologue is confirmed, record the function and its stack frame size. //! 4. Walk forward from each function entry to find the epilogue: //! a) `blr` (return) //! b) `b __restgprlr_NN` (tail-branch into restore stub which returns) //! Mark the function's end address. //! 5. Detect leaf functions: `bl` targets that lack a prologue but eventually `blr`. use std::collections::{HashMap, HashSet, BTreeMap}; /// Information about a detected function. #[derive(Debug, Clone)] pub struct FuncInfo { /// Absolute start address. pub start: u32, /// Absolute end address (exclusive — one past last instruction). pub end: u32, /// Stack frame size (0 if unknown / leaf). pub frame_size: u32, /// Number of saved GPRs (via __savegprlr helper), 0 if unknown. pub saved_gprs: u32, /// True if this is a leaf function (no bl, no frame setup). pub is_leaf: bool, /// True if this is a save/restore GPR helper stub. pub is_saverestore: bool, } /// Result of the function analysis pass. pub struct FuncAnalysis { /// address → FuncInfo for every detected function, sorted by address. pub functions: BTreeMap, /// Addresses in the save-GPR region (start of __savegprlr block). pub save_gpr_base: Option, /// Addresses in the restore-GPR region (start of __restgprlr block). pub restore_gpr_base: Option, } // ── Instruction field helpers ────────────────────────────────────────────── fn op(instr: u32) -> u32 { (instr >> 26) & 0x3F } fn bits(instr: u32, hi: u32, lo: u32) -> u32 { (instr >> (31 - hi)) & ((1 << (hi - lo + 1)) - 1) } fn is_mfspr_lr(instr: u32) -> Option { // mfspr rD, LR → opcode 31, xo=339, spr=8 if op(instr) != 31 { return None; } let xo = bits(instr, 30, 21); if xo != 339 { return None; } let spr = (bits(instr, 20, 16) << 5) | bits(instr, 15, 11); if spr != 8 { return None; } Some(bits(instr, 10, 6)) // return rD } #[allow(dead_code)] fn is_mtspr_lr(instr: u32) -> bool { // mtspr LR, rS → opcode 31, xo=467, spr=8 if op(instr) != 31 { return false; } let xo = bits(instr, 30, 21); if xo != 467 { return false; } let spr = (bits(instr, 20, 16) << 5) | bits(instr, 15, 11); spr == 8 } fn is_stwu_r1(instr: u32) -> Option { // stwu r1, d(r1) → opcode 37, rS=1, rA=1 if op(instr) != 37 { return None; } let rs = bits(instr, 10, 6); let ra = bits(instr, 15, 11); if rs != 1 || ra != 1 { return None; } let d = ((instr & 0xFFFF) as i16) as i32; Some(d) // negative = frame allocation } fn is_blr(instr: u32) -> bool { instr == 0x4E800020 } fn is_bctr(instr: u32) -> bool { instr == 0x4E800420 } fn is_bl(instr: u32) -> Option { // bl target → opcode 18, LK=1, AA=0 if op(instr) != 18 { return None; } if instr & 1 == 0 { return None; } // must have LK bit if instr & 2 != 0 { return None; } // not absolute // Return the signed offset let li = instr & 0x03FFFFFC; Some(li) } fn is_b(instr: u32) -> Option { // b target → opcode 18, LK=0, AA=0 if op(instr) != 18 { return None; } if instr & 1 != 0 { return None; } // no LK bit if instr & 2 != 0 { return None; } // not absolute Some(instr & 0x03FFFFFC) } fn sign_ext26(val: u32) -> i32 { ((val << 6) as i32) >> 6 } fn bl_target(instr: u32, addr: u32) -> Option { is_bl(instr).map(|off| addr.wrapping_add(sign_ext26(off) as u32)) } fn b_target(instr: u32, addr: u32) -> Option { is_b(instr).map(|off| addr.wrapping_add(sign_ext26(off) as u32)) } // ── Read instruction from PE ─────────────────────────────────────────────── fn read_instr(pe: &[u8], abs_addr: u32, image_base: u32) -> Option { let off = abs_addr.wrapping_sub(image_base) as usize; if off + 4 > pe.len() { return None; } Some(u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]])) } // ── Detect the save/restore GPR helper stubs ─────────────────────────────── // // These are a well-known pattern emitted by the Xbox 360 linker. // Save block: a cascade of `std rN, offset(r1)` for r14..r31 + `stw r12, -8(r1)` + `blr` // Restore: a cascade of `ld rN, offset(r1)` for r14..r31 + `lwz r12, -8(r1)` + `mtspr LR, r12` + `blr` // // We detect the save block by finding 18 consecutive `std rN, ...(r1)` instructions // for r14 through r31. fn find_saverestore_stubs( pe: &[u8], image_base: u32, code_ranges: &[(u32, u32)], // (abs_start, abs_end) ) -> (Option, Option) { let mut save_base = None; let mut restore_base = None; for &(start, end) in code_ranges { let mut addr = start; while addr + 4 * 18 < end { // Check if this is `std r14, ...(r1)` — opcode 62 (std), rS=14, rA=1 let instr = match read_instr(pe, addr, image_base) { Some(i) => i, None => { addr += 4; continue; } }; if op(instr) == 62 && bits(instr, 10, 6) == 14 && bits(instr, 15, 11) == 1 && (instr & 3) == 0 { // Verify it's a cascade: r14, r15, ..., r31 let mut ok = true; for i in 0u32..18 { let check = match read_instr(pe, addr + i * 4, image_base) { Some(c) => c, None => { ok = false; break; } }; if op(check) != 62 || bits(check, 10, 6) != 14 + i || bits(check, 15, 11) != 1 { ok = false; break; } } if ok { save_base = Some(addr); // Restore block typically follows the save block // After save: stw r12, -8(r1) + blr, then restore starts let after_save = addr + 18 * 4 + 8; // skip stw r12 + blr let check = read_instr(pe, after_save, image_base); if let Some(c) = check { // Should be `ld r14, ...(r1)` — opcode 58, rT=14, rA=1 if op(c) == 58 && bits(c, 10, 6) == 14 && bits(c, 15, 11) == 1 { restore_base = Some(after_save); } } break; } } addr += 4; } if save_base.is_some() { break; } } (save_base, restore_base) } // ── Main analysis ────────────────────────────────────────────────────────── #[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point)))] pub fn analyze( pe: &[u8], image_base: u32, entry_point: u32, code_sections: &[(u32, u32, u32)], // (va_start, va_size, flags) ) -> FuncAnalysis { let started = std::time::Instant::now(); let code_ranges: Vec<(u32, u32)> = code_sections.iter() .map(|(va, sz, _)| (image_base + va, image_base + va + sz)) .collect(); // 1. Find save/restore stubs let (save_base, restore_base) = find_saverestore_stubs(pe, image_base, &code_ranges); if let Some(sb) = save_base { tracing::debug!(addr = format_args!("{:#010x}", sb), "__savegprlr stub"); } if let Some(rb) = restore_base { tracing::debug!(addr = format_args!("{:#010x}", rb), "__restgprlr stub"); } // Set of addresses in the save/restore region (to exclude from function detection) let mut saverestore_addrs: HashSet = HashSet::new(); if let Some(sb) = save_base { // Save block: 18 std + stw + blr = 20 instructions for i in 0..20 { saverestore_addrs.insert(sb + i * 4); } } if let Some(rb) = restore_base { // Restore block: 18 ld + lwz + mtspr + blr = 21 instructions for i in 0..21 { saverestore_addrs.insert(rb + i * 4); } } // 2. Collect all bl targets as candidate function entries let mut call_targets: HashSet = HashSet::new(); call_targets.insert(entry_point); for &(start, end) in &code_ranges { let mut addr = start; while addr < end { if let Some(instr) = read_instr(pe, addr, image_base) && let Some(target) = bl_target(instr, addr) { // Don't count calls into save/restore stubs as function entries if !saverestore_addrs.contains(&target) { call_targets.insert(target); } } addr += 4; } } tracing::debug!(candidates = call_targets.len(), "bl targets collected"); // 3. For each candidate, detect prologue and walk to epilogue let mut functions: BTreeMap = BTreeMap::new(); for &func_addr in &call_targets { if let Some(fi) = analyze_function(pe, image_base, func_addr, &code_ranges, save_base, restore_base) { functions.insert(func_addr, fi); } } // 4. Label save/restore stubs as special functions — one entry for the whole block if let Some(sb) = save_base { // The save block is one cascade: entry at each rN, falls through to blr // Treat as a single function with the first entry point functions.insert(sb, FuncInfo { start: sb, end: sb + 20 * 4, // 18 std + stw r12 + blr frame_size: 0, saved_gprs: 18, is_leaf: true, is_saverestore: true, }); } if let Some(rb) = restore_base { functions.insert(rb, FuncInfo { start: rb, end: rb + 21 * 4, // 18 ld + lwz r12 + mtspr LR + blr frame_size: 0, saved_gprs: 18, is_leaf: true, is_saverestore: true, }); } let elapsed_ms = started.elapsed().as_millis() as f64; metrics::histogram!("analysis.phase_ms", "phase" => "functions").record(elapsed_ms); tracing::info!( functions = functions.len(), elapsed_ms, "function detection complete" ); FuncAnalysis { functions, save_gpr_base: save_base, restore_gpr_base: restore_base, } } /// Analyze a single function starting at `func_addr`. fn analyze_function( pe: &[u8], image_base: u32, func_addr: u32, code_ranges: &[(u32, u32)], save_base: Option, restore_base: Option, ) -> Option { // Verify the address is within a code section let in_code = code_ranges.iter().any(|&(s, e)| func_addr >= s && func_addr < e); if !in_code { return None; } let instr0 = read_instr(pe, func_addr, image_base)?; let mut frame_size: u32 = 0; let mut saved_gprs: u32 = 0; let mut is_leaf = false; let mut prologue_len: u32 = 0; // Pattern A: mfspr rN, LR [+ bl __savegprlr_NN] + stwu r1, -N(r1) if let Some(_lr_reg) = is_mfspr_lr(instr0) { prologue_len = 4; let instr1 = read_instr(pe, func_addr + 4, image_base).unwrap_or(0); // Check if next is bl to save stub if let Some(target) = bl_target(instr1, func_addr + 4) && let Some(sb) = save_base && target >= sb && target < sb + 18 * 4 { let idx = (target - sb) / 4; saved_gprs = 18 - idx; prologue_len = 8; } // Next should be stwu r1, -N(r1) let stwu_instr = read_instr(pe, func_addr + prologue_len, image_base).unwrap_or(0); if let Some(d) = is_stwu_r1(stwu_instr) { frame_size = (-d) as u32; prologue_len += 4; } } // Pattern B: stwu r1, -N(r1) without mfspr (rare but possible for leaf-ish functions) else if let Some(d) = is_stwu_r1(instr0) { frame_size = (-d) as u32; prologue_len = 4; is_leaf = true; // no LR save = likely leaf (or uses CTR) } // Pattern C: no prologue — leaf function, just code until blr else { is_leaf = true; } // Walk forward to find the end of the function let max_range = code_ranges.iter() .find(|&&(s, e)| func_addr >= s && func_addr < e) .map(|&(_, e)| e) .unwrap_or(func_addr + 0x100000); let mut end_addr = func_addr + 4; let mut addr = func_addr + prologue_len; let scan_limit = std::cmp::min(addr + 0x100000, max_range); // 1MB max function while addr < scan_limit { let instr = match read_instr(pe, addr, image_base) { Some(i) => i, None => break, }; // Epilogue: blr if is_blr(instr) { end_addr = addr + 4; // Check if the instruction after blr looks like padding or another function // Sometimes there's trailing data after blr; we stop at the first blr // that isn't inside a branch-over pattern break; } // Epilogue: b __restgprlr_NN (tail branch into restore stub) if let Some(target) = b_target(instr, addr) && let Some(rb) = restore_base && target >= rb && target < rb + 18 * 4 { end_addr = addr + 4; break; } // Epilogue: bctr (indirect tail call — end of function) if is_bctr(instr) { end_addr = addr + 4; break; } addr += 4; } // If we didn't find any epilogue within a reasonable range, still emit // the function but mark end at the scan point if end_addr <= func_addr + 4 && prologue_len > 0 { end_addr = addr; } // Don't emit zero-size "functions" for addresses that are just data if end_addr <= func_addr + 4 && prologue_len == 0 { return None; } Some(FuncInfo { start: func_addr, end: end_addr, frame_size, saved_gprs, is_leaf, is_saverestore: false, }) } // ── Label generation ─────────────────────────────────────────────────────── impl FuncAnalysis { /// Generate labels for all detected functions. /// Call targets with confirmed prologues get `sub_XXXXXXXX`. /// Save/restore entries get `__savegprlr_NN` / `__restgprlr_NN`. pub fn generate_labels(&self) -> HashMap { let mut labels = HashMap::new(); for (&addr, fi) in &self.functions { if fi.is_saverestore { // Label the block start, plus individual register entry points if let Some(sb) = self.save_gpr_base && addr == sb { for i in 0u32..18 { let reg = 14 + i; labels.insert(sb + i * 4, format!("__savegprlr_{reg}")); } continue; } if let Some(rb) = self.restore_gpr_base && addr == rb { for i in 0u32..18 { let reg = 14 + i; labels.insert(rb + i * 4, format!("__restgprlr_{reg}")); } continue; } } labels.insert(addr, format!("sub_{addr:08X}")); } labels } /// Returns true if `addr` is the start of a detected function. pub fn is_function_start(&self, addr: u32) -> bool { self.functions.contains_key(&addr) } /// Get info for the function starting at `addr`. pub fn get(&self, addr: u32) -> Option<&FuncInfo> { self.functions.get(&addr) } }