Initial commit: xenia-rs workspace for Xbox 360 RE

Rust reimplementation of the xenia Xbox 360 emulator targeting reverse-
engineering and preservation, initially scoped to Project Sylpheed.

Includes:
- XEX2 loader (LZX decompression, AES decryption, PE parsing)
- XISO / XGD2 disc image VFS
- PPC interpreter with 200+ opcodes and VMX128 decoding
- Static analyzer: functions, cross-references, labels, asm + SQLite output
- HLE kernel covering the xboxkrnl/xam subset used by Sylpheed init
- Debugger with in-memory and SQLite-backed execution tracing
- `xenia-rs` CLI with extract/dis/exec commands that produce cumulative,
  superset SQLite databases and opt-in instruction/import/branch traces

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-04-16 23:11:49 +02:00
commit c694bb3f43
63 changed files with 13456 additions and 0 deletions

View File

@@ -0,0 +1,444 @@
//! Function boundary detection via PPC prologue/epilogue pattern matching.
//!
//! Strategy (multi-pass):
//! 1. Identify all `bl` (branch-and-link) targets — these are call sites,
//! hence very likely function entry points.
//! 2. Scan the save/restore GPR helper region and label it.
//! 3. For each candidate entry, look for prologue patterns:
//! a) `mfspr rN, LR` (typically r0 or r12)
//! b) `bl __savegprlr_NN` (call into save stub)
//! c) `stwu r1, -N(r1)` (allocate stack frame)
//! If a prologue is confirmed, record the function and its stack frame size.
//! 4. Walk forward from each function entry to find the epilogue:
//! a) `blr` (return)
//! b) `b __restgprlr_NN` (tail-branch into restore stub which returns)
//! Mark the function's end address.
//! 5. Detect leaf functions: `bl` targets that lack a prologue but eventually `blr`.
use std::collections::{HashMap, HashSet, BTreeMap};
/// Information about a detected function.
#[derive(Debug, Clone)]
pub struct FuncInfo {
/// Absolute start address.
pub start: u32,
/// Absolute end address (exclusive — one past last instruction).
pub end: u32,
/// Stack frame size (0 if unknown / leaf).
pub frame_size: u32,
/// Number of saved GPRs (via __savegprlr helper), 0 if unknown.
pub saved_gprs: u32,
/// True if this is a leaf function (no bl, no frame setup).
pub is_leaf: bool,
/// True if this is a save/restore GPR helper stub.
pub is_saverestore: bool,
}
/// Result of the function analysis pass.
pub struct FuncAnalysis {
/// address → FuncInfo for every detected function, sorted by address.
pub functions: BTreeMap<u32, FuncInfo>,
/// Addresses in the save-GPR region (start of __savegprlr block).
pub save_gpr_base: Option<u32>,
/// Addresses in the restore-GPR region (start of __restgprlr block).
pub restore_gpr_base: Option<u32>,
}
// ── Instruction field helpers ──────────────────────────────────────────────
fn op(instr: u32) -> u32 { (instr >> 26) & 0x3F }
fn bits(instr: u32, hi: u32, lo: u32) -> u32 {
(instr >> (31 - hi)) & ((1 << (hi - lo + 1)) - 1)
}
fn is_mfspr_lr(instr: u32) -> Option<u32> {
// mfspr rD, LR → opcode 31, xo=339, spr=8
if op(instr) != 31 { return None; }
let xo = bits(instr, 30, 21);
if xo != 339 { return None; }
let spr = (bits(instr, 20, 16) << 5) | bits(instr, 15, 11);
if spr != 8 { return None; }
Some(bits(instr, 10, 6)) // return rD
}
#[allow(dead_code)]
fn is_mtspr_lr(instr: u32) -> bool {
// mtspr LR, rS → opcode 31, xo=467, spr=8
if op(instr) != 31 { return false; }
let xo = bits(instr, 30, 21);
if xo != 467 { return false; }
let spr = (bits(instr, 20, 16) << 5) | bits(instr, 15, 11);
spr == 8
}
fn is_stwu_r1(instr: u32) -> Option<i32> {
// stwu r1, d(r1) → opcode 37, rS=1, rA=1
if op(instr) != 37 { return None; }
let rs = bits(instr, 10, 6);
let ra = bits(instr, 15, 11);
if rs != 1 || ra != 1 { return None; }
let d = ((instr & 0xFFFF) as i16) as i32;
Some(d) // negative = frame allocation
}
fn is_blr(instr: u32) -> bool {
instr == 0x4E800020
}
fn is_bctr(instr: u32) -> bool {
instr == 0x4E800420
}
fn is_bl(instr: u32) -> Option<u32> {
// bl target → opcode 18, LK=1, AA=0
if op(instr) != 18 { return None; }
if instr & 1 == 0 { return None; } // must have LK bit
if instr & 2 != 0 { return None; } // not absolute
// Return the signed offset
let li = instr & 0x03FFFFFC;
Some(li)
}
fn is_b(instr: u32) -> Option<u32> {
// b target → opcode 18, LK=0, AA=0
if op(instr) != 18 { return None; }
if instr & 1 != 0 { return None; } // no LK bit
if instr & 2 != 0 { return None; } // not absolute
Some(instr & 0x03FFFFFC)
}
fn sign_ext26(val: u32) -> i32 {
((val << 6) as i32) >> 6
}
fn bl_target(instr: u32, addr: u32) -> Option<u32> {
is_bl(instr).map(|off| addr.wrapping_add(sign_ext26(off) as u32))
}
fn b_target(instr: u32, addr: u32) -> Option<u32> {
is_b(instr).map(|off| addr.wrapping_add(sign_ext26(off) as u32))
}
// ── Read instruction from PE ───────────────────────────────────────────────
fn read_instr(pe: &[u8], abs_addr: u32, image_base: u32) -> Option<u32> {
let off = abs_addr.wrapping_sub(image_base) as usize;
if off + 4 > pe.len() { return None; }
Some(u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]]))
}
// ── Detect the save/restore GPR helper stubs ───────────────────────────────
//
// These are a well-known pattern emitted by the Xbox 360 linker.
// Save block: a cascade of `std rN, offset(r1)` for r14..r31 + `stw r12, -8(r1)` + `blr`
// Restore: a cascade of `ld rN, offset(r1)` for r14..r31 + `lwz r12, -8(r1)` + `mtspr LR, r12` + `blr`
//
// We detect the save block by finding 18 consecutive `std rN, ...(r1)` instructions
// for r14 through r31.
fn find_saverestore_stubs(
pe: &[u8],
image_base: u32,
code_ranges: &[(u32, u32)], // (abs_start, abs_end)
) -> (Option<u32>, Option<u32>) {
let mut save_base = None;
let mut restore_base = None;
for &(start, end) in code_ranges {
let mut addr = start;
while addr + 4 * 18 < end {
// Check if this is `std r14, ...(r1)` — opcode 62 (std), rS=14, rA=1
let instr = match read_instr(pe, addr, image_base) { Some(i) => i, None => { addr += 4; continue; } };
if op(instr) == 62 && bits(instr, 10, 6) == 14 && bits(instr, 15, 11) == 1 && (instr & 3) == 0 {
// Verify it's a cascade: r14, r15, ..., r31
let mut ok = true;
for i in 0u32..18 {
let check = match read_instr(pe, addr + i * 4, image_base) { Some(c) => c, None => { ok = false; break; } };
if op(check) != 62 || bits(check, 10, 6) != 14 + i || bits(check, 15, 11) != 1 {
ok = false;
break;
}
}
if ok {
save_base = Some(addr);
// Restore block typically follows the save block
// After save: stw r12, -8(r1) + blr, then restore starts
let after_save = addr + 18 * 4 + 8; // skip stw r12 + blr
let check = read_instr(pe, after_save, image_base);
if let Some(c) = check {
// Should be `ld r14, ...(r1)` — opcode 58, rT=14, rA=1
if op(c) == 58 && bits(c, 10, 6) == 14 && bits(c, 15, 11) == 1 {
restore_base = Some(after_save);
}
}
break;
}
}
addr += 4;
}
if save_base.is_some() { break; }
}
(save_base, restore_base)
}
// ── Main analysis ──────────────────────────────────────────────────────────
pub fn analyze(
pe: &[u8],
image_base: u32,
entry_point: u32,
code_sections: &[(u32, u32, u32)], // (va_start, va_size, flags)
) -> FuncAnalysis {
let code_ranges: Vec<(u32, u32)> = code_sections.iter()
.map(|(va, sz, _)| (image_base + va, image_base + va + sz))
.collect();
// 1. Find save/restore stubs
let (save_base, restore_base) = find_saverestore_stubs(pe, image_base, &code_ranges);
if let Some(sb) = save_base {
eprintln!("[func] __savegprlr stub at 0x{sb:08X}");
}
if let Some(rb) = restore_base {
eprintln!("[func] __restgprlr stub at 0x{rb:08X}");
}
// Set of addresses in the save/restore region (to exclude from function detection)
let mut saverestore_addrs: HashSet<u32> = HashSet::new();
if let Some(sb) = save_base {
// Save block: 18 std + stw + blr = 20 instructions
for i in 0..20 { saverestore_addrs.insert(sb + i * 4); }
}
if let Some(rb) = restore_base {
// Restore block: 18 ld + lwz + mtspr + blr = 21 instructions
for i in 0..21 { saverestore_addrs.insert(rb + i * 4); }
}
// 2. Collect all bl targets as candidate function entries
let mut call_targets: HashSet<u32> = HashSet::new();
call_targets.insert(entry_point);
for &(start, end) in &code_ranges {
let mut addr = start;
while addr < end {
if let Some(instr) = read_instr(pe, addr, image_base) {
if let Some(target) = bl_target(instr, addr) {
// Don't count calls into save/restore stubs as function entries
if !saverestore_addrs.contains(&target) {
call_targets.insert(target);
}
}
}
addr += 4;
}
}
eprintln!("[func] {} bl targets (candidate functions)", call_targets.len());
// 3. For each candidate, detect prologue and walk to epilogue
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
for &func_addr in &call_targets {
if let Some(fi) = analyze_function(pe, image_base, func_addr, &code_ranges, save_base, restore_base) {
functions.insert(func_addr, fi);
}
}
// 4. Label save/restore stubs as special functions — one entry for the whole block
if let Some(sb) = save_base {
// The save block is one cascade: entry at each rN, falls through to blr
// Treat as a single function with the first entry point
functions.insert(sb, FuncInfo {
start: sb,
end: sb + 20 * 4, // 18 std + stw r12 + blr
frame_size: 0,
saved_gprs: 18,
is_leaf: true,
is_saverestore: true,
});
}
if let Some(rb) = restore_base {
functions.insert(rb, FuncInfo {
start: rb,
end: rb + 21 * 4, // 18 ld + lwz r12 + mtspr LR + blr
frame_size: 0,
saved_gprs: 18,
is_leaf: true,
is_saverestore: true,
});
}
eprintln!("[func] {} functions detected", functions.len());
FuncAnalysis {
functions,
save_gpr_base: save_base,
restore_gpr_base: restore_base,
}
}
/// Analyze a single function starting at `func_addr`.
fn analyze_function(
pe: &[u8],
image_base: u32,
func_addr: u32,
code_ranges: &[(u32, u32)],
save_base: Option<u32>,
restore_base: Option<u32>,
) -> Option<FuncInfo> {
// Verify the address is within a code section
let in_code = code_ranges.iter().any(|&(s, e)| func_addr >= s && func_addr < e);
if !in_code { return None; }
let instr0 = read_instr(pe, func_addr, image_base)?;
let mut frame_size: u32 = 0;
let mut saved_gprs: u32 = 0;
let mut is_leaf = false;
let mut prologue_len: u32 = 0;
// Pattern A: mfspr rN, LR [+ bl __savegprlr_NN] + stwu r1, -N(r1)
if let Some(_lr_reg) = is_mfspr_lr(instr0) {
prologue_len = 4;
let instr1 = read_instr(pe, func_addr + 4, image_base).unwrap_or(0);
// Check if next is bl to save stub
if let Some(target) = bl_target(instr1, func_addr + 4) {
if let Some(sb) = save_base {
if target >= sb && target < sb + 18 * 4 {
let idx = (target - sb) / 4;
saved_gprs = 18 - idx;
prologue_len = 8;
}
}
}
// Next should be stwu r1, -N(r1)
let stwu_instr = read_instr(pe, func_addr + prologue_len, image_base).unwrap_or(0);
if let Some(d) = is_stwu_r1(stwu_instr) {
frame_size = (-d) as u32;
prologue_len += 4;
}
}
// Pattern B: stwu r1, -N(r1) without mfspr (rare but possible for leaf-ish functions)
else if let Some(d) = is_stwu_r1(instr0) {
frame_size = (-d) as u32;
prologue_len = 4;
is_leaf = true; // no LR save = likely leaf (or uses CTR)
}
// Pattern C: no prologue — leaf function, just code until blr
else {
is_leaf = true;
}
// Walk forward to find the end of the function
let max_range = code_ranges.iter()
.find(|&&(s, e)| func_addr >= s && func_addr < e)
.map(|&(_, e)| e)
.unwrap_or(func_addr + 0x100000);
let mut end_addr = func_addr + 4;
let mut addr = func_addr + prologue_len;
let scan_limit = std::cmp::min(addr + 0x100000, max_range); // 1MB max function
while addr < scan_limit {
let instr = match read_instr(pe, addr, image_base) {
Some(i) => i,
None => break,
};
// Epilogue: blr
if is_blr(instr) {
end_addr = addr + 4;
// Check if the instruction after blr looks like padding or another function
// Sometimes there's trailing data after blr; we stop at the first blr
// that isn't inside a branch-over pattern
break;
}
// Epilogue: b __restgprlr_NN (tail branch into restore stub)
if let Some(target) = b_target(instr, addr) {
if let Some(rb) = restore_base {
if target >= rb && target < rb + 18 * 4 {
end_addr = addr + 4;
break;
}
}
}
// Epilogue: bctr (indirect tail call — end of function)
if is_bctr(instr) {
end_addr = addr + 4;
break;
}
addr += 4;
}
// If we didn't find any epilogue within a reasonable range, still emit
// the function but mark end at the scan point
if end_addr <= func_addr + 4 && prologue_len > 0 {
end_addr = addr;
}
// Don't emit zero-size "functions" for addresses that are just data
if end_addr <= func_addr + 4 && prologue_len == 0 {
return None;
}
Some(FuncInfo {
start: func_addr,
end: end_addr,
frame_size,
saved_gprs,
is_leaf,
is_saverestore: false,
})
}
// ── Label generation ───────────────────────────────────────────────────────
impl FuncAnalysis {
/// Generate labels for all detected functions.
/// Call targets with confirmed prologues get `sub_XXXXXXXX`.
/// Save/restore entries get `__savegprlr_NN` / `__restgprlr_NN`.
pub fn generate_labels(&self) -> HashMap<u32, String> {
let mut labels = HashMap::new();
for (&addr, fi) in &self.functions {
if fi.is_saverestore {
// Label the block start, plus individual register entry points
if let Some(sb) = self.save_gpr_base {
if addr == sb {
for i in 0u32..18 {
let reg = 14 + i;
labels.insert(sb + i * 4, format!("__savegprlr_{reg}"));
}
continue;
}
}
if let Some(rb) = self.restore_gpr_base {
if addr == rb {
for i in 0u32..18 {
let reg = 14 + i;
labels.insert(rb + i * 4, format!("__restgprlr_{reg}"));
}
continue;
}
}
}
labels.insert(addr, format!("sub_{addr:08X}"));
}
labels
}
/// Returns true if `addr` is the start of a detected function.
pub fn is_function_start(&self, addr: u32) -> bool {
self.functions.contains_key(&addr)
}
/// Get info for the function starting at `addr`.
pub fn get(&self, addr: u32) -> Option<&FuncInfo> {
self.functions.get(&addr)
}
}