Initial commit: xenia-rs workspace for Xbox 360 RE
Rust reimplementation of the xenia Xbox 360 emulator targeting reverse- engineering and preservation, initially scoped to Project Sylpheed. Includes: - XEX2 loader (LZX decompression, AES decryption, PE parsing) - XISO / XGD2 disc image VFS - PPC interpreter with 200+ opcodes and VMX128 decoding - Static analyzer: functions, cross-references, labels, asm + SQLite output - HLE kernel covering the xboxkrnl/xam subset used by Sylpheed init - Debugger with in-memory and SQLite-backed execution tracing - `xenia-rs` CLI with extract/dis/exec commands that produce cumulative, superset SQLite databases and opt-in instruction/import/branch traces Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
296
crates/xenia-analysis/src/xref.rs
Normal file
296
crates/xenia-analysis/src/xref.rs
Normal file
@@ -0,0 +1,296 @@
|
||||
//! Cross-reference analysis for Xbox 360 PE images.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use xenia_xex::pe::PeSection;
|
||||
use crate::func::FuncAnalysis;
|
||||
|
||||
// ── Cross-reference types ────────────────────────────────────────────────
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum XrefKind {
|
||||
Call, // bl
|
||||
Jump, // b (unconditional)
|
||||
Branch, // bc / bXX (conditional)
|
||||
DataRead, // lwz, lbz, lhz, lha, lfs, lfd, etc. from resolved address
|
||||
DataWrite, // stw, stb, sth, stfs, stfd, etc. to resolved address
|
||||
DataRef, // address computed via lis+addi/ori but not directly loaded/stored
|
||||
}
|
||||
|
||||
impl XrefKind {
|
||||
pub fn tag(self) -> &'static str {
|
||||
match self {
|
||||
XrefKind::Call => "call",
|
||||
XrefKind::Jump => "j",
|
||||
XrefKind::Branch => "br",
|
||||
XrefKind::DataRead => "read",
|
||||
XrefKind::DataWrite => "write",
|
||||
XrefKind::DataRef => "ref",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_data(self) -> bool {
|
||||
matches!(self, XrefKind::DataRead | XrefKind::DataWrite | XrefKind::DataRef)
|
||||
}
|
||||
|
||||
pub fn db_tag(self) -> &'static str {
|
||||
self.tag()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Xref {
|
||||
pub source: u32,
|
||||
pub kind: XrefKind,
|
||||
}
|
||||
|
||||
pub type XrefMap = HashMap<u32, Vec<Xref>>;
|
||||
|
||||
/// Result of cross-reference analysis.
|
||||
pub struct XrefResult {
|
||||
pub labels: HashMap<u32, String>,
|
||||
pub xrefs: XrefMap,
|
||||
pub data_annotations: HashMap<u32, (u32, XrefKind)>,
|
||||
}
|
||||
|
||||
/// Perform full cross-reference analysis on a PE image.
|
||||
pub fn analyze_xrefs(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
entry_point: u32,
|
||||
sections: &[PeSection],
|
||||
func_analysis: &FuncAnalysis,
|
||||
import_map: &HashMap<u32, String>,
|
||||
) -> XrefResult {
|
||||
let func_labels = func_analysis.generate_labels();
|
||||
let mut labels: HashMap<u32, String> = func_labels;
|
||||
labels.insert(entry_point, "entry_point".to_string());
|
||||
|
||||
// Add import thunks as labels
|
||||
for (addr, name) in import_map {
|
||||
labels.insert(*addr, format!("__imp_{}", name.replace("::", "_")));
|
||||
}
|
||||
|
||||
// First pass: collect branch targets + cross-references from code sections
|
||||
let mut xrefs: XrefMap = HashMap::new();
|
||||
|
||||
for section in sections {
|
||||
if !section.is_code() { continue; }
|
||||
let va_start = section.virtual_address;
|
||||
let va_end = va_start + section.virtual_size;
|
||||
let file_start = section.virtual_address as usize;
|
||||
|
||||
let mut addr = va_start;
|
||||
while addr < va_end {
|
||||
let abs_addr = image_base + addr;
|
||||
let off = (addr - va_start) as usize + file_start;
|
||||
if off + 4 > pe.len() { break; }
|
||||
let instr = u32::from_be_bytes([
|
||||
pe[off], pe[off+1], pe[off+2], pe[off+3]
|
||||
]);
|
||||
|
||||
collect_branch_target(instr, abs_addr, &mut labels, &mut xrefs);
|
||||
addr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: resolve data references via lis+load/store pattern matching
|
||||
let mut data_annotations: HashMap<u32, (u32, XrefKind)> = HashMap::new();
|
||||
|
||||
// Build set of valid data address ranges for filtering false positives
|
||||
let data_ranges: Vec<(u32, u32)> = sections.iter()
|
||||
.map(|s| (image_base + s.virtual_address,
|
||||
image_base + s.virtual_address + s.virtual_size))
|
||||
.collect();
|
||||
|
||||
for section in sections {
|
||||
if !section.is_code() { continue; }
|
||||
let va_start = section.virtual_address;
|
||||
let va_end = va_start + section.virtual_size;
|
||||
let file_start = section.virtual_address as usize;
|
||||
|
||||
// Register state: track lis results. reg_hi[r] = Some(high_16_bits << 16)
|
||||
let mut reg_hi: [Option<u32>; 32] = [None; 32];
|
||||
|
||||
let mut addr = va_start;
|
||||
while addr < va_end {
|
||||
let abs_addr = image_base + addr;
|
||||
let off = (addr - va_start) as usize + file_start;
|
||||
if off + 4 > pe.len() { break; }
|
||||
let instr = u32::from_be_bytes([
|
||||
pe[off], pe[off+1], pe[off+2], pe[off+3]
|
||||
]);
|
||||
|
||||
let opcode = (instr >> 26) & 0x3F;
|
||||
let rd = ((instr >> 21) & 0x1F) as usize;
|
||||
let ra = ((instr >> 16) & 0x1F) as usize;
|
||||
let simm = ((instr & 0xFFFF) as i16) as i32;
|
||||
let uimm = (instr & 0xFFFF) as u32;
|
||||
|
||||
// Reset tracking on function boundaries (prologue = mfspr rN, LR)
|
||||
if opcode == 31 {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
if xo == 339 { // mfspr
|
||||
let spr = (((instr >> 16) & 0x1F) << 5) | ((instr >> 11) & 0x1F);
|
||||
if spr == 8 { // LR
|
||||
reg_hi = [None; 32];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match opcode {
|
||||
// lis rD, IMM (encoded as addis rD, r0, IMM)
|
||||
15 if ra == 0 => {
|
||||
reg_hi[rd] = Some(uimm << 16);
|
||||
}
|
||||
// addis rD, rA, IMM (rA != 0) — if rA has known lis, update
|
||||
15 if ra != 0 => {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
reg_hi[rd] = Some(base.wrapping_add(uimm << 16));
|
||||
} else {
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
}
|
||||
// addi rD, rA, IMM — compute full address if rA has known lis
|
||||
14 if ra != 0 => {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRef));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRef });
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
reg_hi[rd] = Some(data_addr); // propagate for chained access
|
||||
} else {
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
}
|
||||
// ori rA, rS, UIMM — compute full address
|
||||
24 => {
|
||||
let rs = rd; // source is bits 21-25 for ori
|
||||
if let Some(base) = reg_hi[rs] {
|
||||
let data_addr = base | uimm;
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRef));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRef });
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
reg_hi[ra] = Some(data_addr);
|
||||
} else {
|
||||
reg_hi[ra] = None;
|
||||
}
|
||||
}
|
||||
// Load instructions: lwz, lbz, lhz, lha, lfs, lfd, lwzu, etc.
|
||||
32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 46 | 48 | 49 | 50 | 51 => {
|
||||
if ra != 0 {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRead));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRead });
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Load into rD may clobber the tracked value
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
// Store instructions: stw, stb, sth, stfs, stfd, stwu, etc.
|
||||
36 | 37 | 38 | 39 | 44 | 45 | 47 | 52 | 53 | 54 | 55 => {
|
||||
if ra != 0 {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataWrite));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataWrite });
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Any other instruction writing to rD: invalidate
|
||||
_ => {
|
||||
// Conservatively invalidate for instructions that modify rD
|
||||
// (most ALU ops, loads, etc.)
|
||||
if opcode != 18 && opcode != 16 && opcode != 17 { // skip branch/sc
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
addr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
XrefResult { labels, xrefs, data_annotations }
|
||||
}
|
||||
|
||||
fn collect_branch_target(instr: u32, addr: u32, labels: &mut HashMap<u32, String>, xrefs: &mut XrefMap) {
|
||||
let op = (instr >> 26) & 0x3F;
|
||||
match op {
|
||||
18 => {
|
||||
// I-form: b/bl/ba/bla
|
||||
let li = sign_ext26(instr & 0x03FFFFFC);
|
||||
let aa = instr & 2 != 0;
|
||||
let lk = instr & 1 != 0;
|
||||
let target = if aa { li as u32 } else { addr.wrapping_add(li as u32) };
|
||||
labels.entry(target).or_insert_with(|| format!("loc_{target:08X}"));
|
||||
let kind = if lk { XrefKind::Call } else { XrefKind::Jump };
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind });
|
||||
}
|
||||
16 => {
|
||||
// B-form: bc/bcl
|
||||
let bd = sign_ext16(instr & 0xFFFC);
|
||||
let aa = instr & 2 != 0;
|
||||
let target = if aa { bd as u32 } else { addr.wrapping_add(bd as u32) };
|
||||
labels.entry(target).or_insert_with(|| format!("loc_{target:08X}"));
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind: XrefKind::Branch });
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn sign_ext16(val: u32) -> i32 {
|
||||
((val << 16) as i32) >> 16
|
||||
}
|
||||
|
||||
fn sign_ext26(val: u32) -> i32 {
|
||||
((val << 6) as i32) >> 6
|
||||
}
|
||||
|
||||
fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool {
|
||||
ranges.iter().any(|&(start, end)| addr >= start && addr < end)
|
||||
}
|
||||
|
||||
/// Find which section a data address falls in.
|
||||
pub fn section_for_addr<'a>(addr: u32, sections: &'a [PeSection], image_base: u32) -> Option<&'a str> {
|
||||
for s in sections {
|
||||
let start = image_base + s.virtual_address;
|
||||
let end = start + s.virtual_size;
|
||||
if addr >= start && addr < end {
|
||||
return Some(&s.name);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Resolve a source address to "function_name+0xNN" or just "0xADDR".
|
||||
pub fn resolve_source_label(
|
||||
addr: u32,
|
||||
func_analysis: &FuncAnalysis,
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> String {
|
||||
// Direct label hit?
|
||||
if let Some(lbl) = labels.get(&addr) {
|
||||
return lbl.clone();
|
||||
}
|
||||
|
||||
// Find the containing function (largest start <= addr)
|
||||
if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back() {
|
||||
if let Some(func_label) = labels.get(&func_start) {
|
||||
let offset = addr - func_start;
|
||||
return format!("{func_label}+0x{offset:X}");
|
||||
}
|
||||
}
|
||||
|
||||
format!("0x{addr:08X}")
|
||||
}
|
||||
Reference in New Issue
Block a user