//! Cross-reference analysis for Xbox 360 PE images. use std::collections::HashMap; use xenia_xex::pe::PeSection; use crate::func::FuncAnalysis; // ── Cross-reference types ──────────────────────────────────────────────── #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum XrefKind { Call, // bl IndirectCall, // bcctrl through a statically-resolvable vtable slot (M5) Jump, // b (unconditional) Branch, // bc / bXX (conditional) DataRead, // lwz, lbz, lhz, lha, lfs, lfd, etc. from resolved address DataWrite, // stw, stb, sth, stfs, stfd, etc. to resolved address DataRef, // address computed via lis+addi/ori but not directly loaded/stored } impl XrefKind { pub fn tag(self) -> &'static str { match self { XrefKind::Call => "call", XrefKind::IndirectCall => "ind_call", XrefKind::Jump => "j", XrefKind::Branch => "br", XrefKind::DataRead => "read", XrefKind::DataWrite => "write", XrefKind::DataRef => "ref", } } pub fn is_data(self) -> bool { matches!(self, XrefKind::DataRead | XrefKind::DataWrite | XrefKind::DataRef) } pub fn db_tag(self) -> &'static str { self.tag() } } /// Sub-classification of how `source`'s instruction computes its target /// address. Only meaningful for data xrefs (`read` / `write` / `ref`); call /// / jump / branch / ind_call rows store `None`. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] pub enum AddrMode { /// Standard signed-16 displacement: `lwz rD, simm(rA)`, `stw rS, simm(rA)`, /// FP D-forms (`lfs/lfd/stfs/stfd`), update variants. The dominant case. DForm, /// Address materialised via `lis + addi` register tracking — no /// load/store yet at this site. LisAddi, /// Address materialised via `lis + ori` register tracking. LisOri, /// Multi-word D-form: `lmw / stmw rS, simm(rA)` — emits one xref per /// register slot (32-rS slots starting at the resolved base). Multiword, /// X-form indexed: `stwx / stbx / sthx / stwux / stbux / sthux / stdx / /// stdux` plus AltiVec/VMX vector stores `stvx / stvxl / stvebx / /// stvehx / stvewx`. Static resolution requires both rA and rB /// constant. (M6 + VMX follow-up.) XFormIndexed, /// X-form byte-reverse: `stwbrx / sthbrx / lwbrx / lhbrx`. XFormByteRev, /// Reservation/atomic store-conditional: `stwcx. / stdcx.`. Atomic, /// Cache-line clear: `dcbz rA, rB` — clears 32 bytes at rA+rB. DCBZ, } impl AddrMode { pub fn tag(self) -> &'static str { match self { AddrMode::DForm => "d_form", AddrMode::LisAddi => "lis_addi", AddrMode::LisOri => "lis_ori", AddrMode::Multiword => "multiword", AddrMode::XFormIndexed => "x_form_indexed", AddrMode::XFormByteRev => "x_form_byterev", AddrMode::Atomic => "atomic", AddrMode::DCBZ => "dcbz", } } } #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct Xref { pub source: u32, pub kind: XrefKind, /// `None` for control-flow edges; `Some(...)` for data edges. pub addr_mode: Option, } pub type XrefMap = HashMap>; /// Result of cross-reference analysis. pub struct XrefResult { pub labels: HashMap, pub xrefs: XrefMap, pub data_annotations: HashMap, } /// Perform full cross-reference analysis on a PE image. #[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point)))] pub fn analyze_xrefs( pe: &[u8], image_base: u32, entry_point: u32, sections: &[PeSection], func_analysis: &FuncAnalysis, import_map: &HashMap, ) -> XrefResult { let started = std::time::Instant::now(); let func_labels = func_analysis.generate_labels(); let mut labels: HashMap = func_labels; labels.insert(entry_point, "entry_point".to_string()); // Add import thunks as labels for (addr, name) in import_map { labels.insert(*addr, format!("__imp_{}", name.replace("::", "_"))); } // First pass: collect branch targets + cross-references from code sections let mut xrefs: XrefMap = HashMap::new(); for section in sections { if !section.is_code() { continue; } let va_start = section.virtual_address; let va_end = va_start + section.virtual_size; let file_start = section.virtual_address as usize; let mut addr = va_start; while addr < va_end { let abs_addr = image_base + addr; let off = (addr - va_start) as usize + file_start; if off + 4 > pe.len() { break; } let instr = u32::from_be_bytes([ pe[off], pe[off+1], pe[off+2], pe[off+3] ]); collect_branch_target(instr, abs_addr, &mut labels, &mut xrefs); addr += 4; } } // Second pass: resolve data references via lis+load/store pattern matching let mut data_annotations: HashMap = HashMap::new(); // Build set of valid data address ranges for filtering false positives let data_ranges: Vec<(u32, u32)> = sections.iter() .map(|s| (image_base + s.virtual_address, image_base + s.virtual_address + s.virtual_size)) .collect(); for section in sections { if !section.is_code() { continue; } let va_start = section.virtual_address; let va_end = va_start + section.virtual_size; let file_start = section.virtual_address as usize; // Register state: track lis results. reg_hi[r] = Some(high_16_bits << 16) let mut reg_hi: [Option; 32] = [None; 32]; let mut addr = va_start; while addr < va_end { let abs_addr = image_base + addr; let off = (addr - va_start) as usize + file_start; if off + 4 > pe.len() { break; } let instr = u32::from_be_bytes([ pe[off], pe[off+1], pe[off+2], pe[off+3] ]); let opcode = (instr >> 26) & 0x3F; let rd = ((instr >> 21) & 0x1F) as usize; let ra = ((instr >> 16) & 0x1F) as usize; let simm = ((instr & 0xFFFF) as i16) as i32; let uimm = instr & 0xFFFF; // Reset tracking on function boundaries (prologue = mfspr rN, LR) if opcode == 31 { let xo = (instr >> 1) & 0x3FF; if xo == 339 { // mfspr let spr = (((instr >> 16) & 0x1F) << 5) | ((instr >> 11) & 0x1F); if spr == 8 { // LR reg_hi = [None; 32]; } } } match opcode { // lis rD, IMM (encoded as addis rD, r0, IMM) 15 if ra == 0 => { reg_hi[rd] = Some(uimm << 16); } // addis rD, rA, IMM (rA != 0) — if rA has known lis, update 15 if ra != 0 => { if let Some(base) = reg_hi[ra] { reg_hi[rd] = Some(base.wrapping_add(uimm << 16)); } else { reg_hi[rd] = None; } } // addi rD, rA, IMM — compute full address if rA has known lis 14 if ra != 0 => { if let Some(base) = reg_hi[ra] { let data_addr = base.wrapping_add(simm as u32); if is_in_ranges(data_addr, &data_ranges) { data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRef)); xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRef, addr_mode: Some(AddrMode::LisAddi), }); labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}")); } reg_hi[rd] = Some(data_addr); // propagate for chained access } else { reg_hi[rd] = None; } } // ori rA, rS, UIMM — compute full address 24 => { let rs = rd; // source is bits 21-25 for ori if let Some(base) = reg_hi[rs] { let data_addr = base | uimm; if is_in_ranges(data_addr, &data_ranges) { data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRef)); xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRef, addr_mode: Some(AddrMode::LisOri), }); labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}")); } reg_hi[ra] = Some(data_addr); } else { reg_hi[ra] = None; } } // Load instructions: lwz, lbz, lhz, lha, lfs, lfd, lwzu, etc. 32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 48 | 49 | 50 | 51 => { if ra != 0 && let Some(base) = reg_hi[ra] { let data_addr = base.wrapping_add(simm as u32); if is_in_ranges(data_addr, &data_ranges) { data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRead)); xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRead, addr_mode: Some(AddrMode::DForm), }); labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}")); } } // Load into rD may clobber the tracked value reg_hi[rd] = None; } // lmw rD, simm(rA) — D-form multi-word load. Reads (32-rD) // consecutive 4-byte words starting at base+simm into // rD..r31. Emits one DataRead per slot. 46 => { if ra != 0 && let Some(base) = reg_hi[ra] { let mut addr_w = base.wrapping_add(simm as u32); for _slot in (rd as u32)..32 { if is_in_ranges(addr_w, &data_ranges) { data_annotations.insert(abs_addr, (addr_w, XrefKind::DataRead)); xrefs.entry(addr_w).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRead, addr_mode: Some(AddrMode::Multiword), }); labels.entry(addr_w).or_insert_with(|| format!("dat_{addr_w:08X}")); } addr_w = addr_w.wrapping_add(4); } } reg_hi[rd] = None; } // Store instructions: stw, stb, sth, stfs, stfd, stwu, etc. 36 | 37 | 38 | 39 | 44 | 45 | 52 | 53 | 54 | 55 => { if ra != 0 && let Some(base) = reg_hi[ra] { let data_addr = base.wrapping_add(simm as u32); if is_in_ranges(data_addr, &data_ranges) { data_annotations.insert(abs_addr, (data_addr, XrefKind::DataWrite)); xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataWrite, addr_mode: Some(AddrMode::DForm), }); labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}")); } } } // stmw rS, simm(rA) — D-form multi-word store. Writes // (32-rS) consecutive 4-byte words from rS..r31 to // base+simm onward. Emits one DataWrite per slot. 47 => { if ra != 0 && let Some(base) = reg_hi[ra] { let mut addr_w = base.wrapping_add(simm as u32); for _slot in (rd as u32)..32 { if is_in_ranges(addr_w, &data_ranges) { data_annotations.insert(abs_addr, (addr_w, XrefKind::DataWrite)); xrefs.entry(addr_w).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataWrite, addr_mode: Some(AddrMode::Multiword), }); labels.entry(addr_w).or_insert_with(|| format!("dat_{addr_w:08X}")); } addr_w = addr_w.wrapping_add(4); } } } // X-form: opcode 31 — indexed loads/stores, atomic ops, dcbz. // We can't statically resolve `rA + rB` without tracking rB // too; we record an xref ONLY when rB is also a known // constant (rare) OR when rB is r0 (which encodes as zero). // Falls through to the generic-clobber arm afterwards via // the explicit reg_hi update. 31 => { let xo = (instr >> 1) & 0x3FF; let rb = ((instr >> 11) & 0x1F) as usize; let resolve_rab = |reg_hi: &[Option; 32]| -> Option { let a = if ra == 0 { Some(0u32) } else { reg_hi[ra] }; let b = if rb == 0 { Some(0u32) } else { reg_hi[rb] }; match (a, b) { (Some(av), Some(bv)) => Some(av.wrapping_add(bv)), _ => None, } }; let mode_for_xo = |xo: u32| -> Option<(AddrMode, XrefKind)> { match xo { // Atomic store-conditional 150 => Some((AddrMode::Atomic, XrefKind::DataWrite)), // stwcx. 214 => Some((AddrMode::Atomic, XrefKind::DataWrite)), // stdcx. // Byte-reverse stores 662 => Some((AddrMode::XFormByteRev, XrefKind::DataWrite)), // stwbrx 918 => Some((AddrMode::XFormByteRev, XrefKind::DataWrite)), // sthbrx // Byte-reverse loads 534 => Some((AddrMode::XFormByteRev, XrefKind::DataRead)), // lwbrx 790 => Some((AddrMode::XFormByteRev, XrefKind::DataRead)), // lhbrx // dcbz — cache-line zero (32-byte clear). Treat as a write. 1014 => Some((AddrMode::DCBZ, XrefKind::DataWrite)), // Plain X-form indexed stores (the common ones) 151 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stwx 215 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stbx 407 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // sthx 183 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stwux 247 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stbux 439 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // sthux 149 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stdx 181 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stdux // Plain X-form indexed loads 23 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lwzx 87 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lbzx 279 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhzx 343 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhax 55 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lwzux 119 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lbzux 311 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhzux 375 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhaux 21 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // ldx 53 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // ldux // AltiVec/VMX (opcode 31) loads & stores. Element // variants store one byte/halfword/word; full // `stvx` stores 16 bytes. Address resolution still // requires both rA and rB constant — common only // in static-table setup loops. 231 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stvx 487 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stvxl 135 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stvebx 167 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stvehx 199 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stvewx // AltiVec/VMX loads — same XO range, kind=read. 103 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lvx 359 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lvxl 7 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lvebx 39 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lvehx 71 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lvewx _ => None, } }; if let Some((addr_mode, kind)) = mode_for_xo(xo) && let Some(data_addr) = resolve_rab(®_hi) && is_in_ranges(data_addr, &data_ranges) { data_annotations.insert(abs_addr, (data_addr, kind)); xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind, addr_mode: Some(addr_mode), }); labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}")); } // Fall through: any X-form op may write rD; invalidate. reg_hi[rd] = None; } // Any other instruction writing to rD: invalidate _ => { // Conservatively invalidate for instructions that modify rD // (most ALU ops, loads, etc.) if opcode != 18 && opcode != 16 && opcode != 17 { // skip branch/sc reg_hi[rd] = None; } } } addr += 4; } } let elapsed_ms = started.elapsed().as_millis() as f64; metrics::histogram!("analysis.phase_ms", "phase" => "xrefs").record(elapsed_ms); let total_xrefs: usize = xrefs.values().map(|v| v.len()).sum(); tracing::info!( labels = labels.len(), xrefs = total_xrefs, data_annotations = data_annotations.len(), elapsed_ms, "xref analysis complete" ); XrefResult { labels, xrefs, data_annotations } } fn collect_branch_target(instr: u32, addr: u32, labels: &mut HashMap, xrefs: &mut XrefMap) { let op = (instr >> 26) & 0x3F; match op { 18 => { // I-form: b/bl/ba/bla let li = sign_ext26(instr & 0x03FFFFFC); let aa = instr & 2 != 0; let lk = instr & 1 != 0; let target = if aa { li as u32 } else { addr.wrapping_add(li as u32) }; labels.entry(target).or_insert_with(|| format!("loc_{target:08X}")); let kind = if lk { XrefKind::Call } else { XrefKind::Jump }; xrefs.entry(target).or_default().push(Xref { source: addr, kind, addr_mode: None }); } 16 => { // B-form: bc/bcl let bd = sign_ext16(instr & 0xFFFC); let aa = instr & 2 != 0; let target = if aa { bd as u32 } else { addr.wrapping_add(bd as u32) }; labels.entry(target).or_insert_with(|| format!("loc_{target:08X}")); xrefs.entry(target).or_default().push(Xref { source: addr, kind: XrefKind::Branch, addr_mode: None }); } _ => {} } } fn sign_ext16(val: u32) -> i32 { ((val << 16) as i32) >> 16 } fn sign_ext26(val: u32) -> i32 { ((val << 6) as i32) >> 6 } fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool { ranges.iter().any(|&(start, end)| addr >= start && addr < end) } /// Find which section a data address falls in. pub fn section_for_addr(addr: u32, sections: &[PeSection], image_base: u32) -> Option<&str> { for s in sections { let start = image_base + s.virtual_address; let end = start + s.virtual_size; if addr >= start && addr < end { return Some(&s.name); } } None } /// Resolve a source address to "function_name+0xNN" or just "0xADDR". pub fn resolve_source_label( addr: u32, func_analysis: &FuncAnalysis, labels: &HashMap, ) -> String { // Direct label hit? if let Some(lbl) = labels.get(&addr) { return lbl.clone(); } // Find the containing function (largest start <= addr) if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back() && let Some(func_label) = labels.get(&func_start) { let offset = addr - func_start; return format!("{func_label}+0x{offset:X}"); } format!("0x{addr:08X}") } #[cfg(test)] mod tests { use super::*; #[test] fn addr_mode_tags_are_distinct() { let modes = [ AddrMode::DForm, AddrMode::LisAddi, AddrMode::LisOri, AddrMode::Multiword, AddrMode::XFormIndexed, AddrMode::XFormByteRev, AddrMode::Atomic, AddrMode::DCBZ, ]; let tags: std::collections::HashSet<&str> = modes.iter().map(|m| m.tag()).collect(); assert_eq!(tags.len(), modes.len(), "every AddrMode variant must have a unique tag"); } #[test] fn xref_struct_carries_addr_mode_for_data_edges() { let x = Xref { source: 0x1234, kind: XrefKind::DataWrite, addr_mode: Some(AddrMode::DForm) }; assert_eq!(x.addr_mode.unwrap().tag(), "d_form"); } #[test] fn xref_struct_addr_mode_is_none_for_call_edges() { let x = Xref { source: 0x1234, kind: XrefKind::Call, addr_mode: None }; assert!(x.addr_mode.is_none()); } }