xenia-analysis: unify disasm via xenia-cpu, split ingest/analyze, add sinks
The old src/ppc.rs that re-implemented PPC formatting collapses into a 30-line shim that delegates to xenia-cpu's single-source-of-truth disasm. A new disasm.rs wraps the shared iterator and feeds enriched items (analysis context: function membership, xrefs, mnemonics) into pluggable sinks. Sinks split: text.rs (objdump-like output), json.rs (JSONL stream matching the new xenia dis --json mode), duckdb.rs (the analysis DB ingest). db.rs is restructured into ingest_instructions + write_analysis_results so a run can stop after raw ingest, and a new target_hex column lands on the instructions table. sql_views.rs adds five additive views layered on top of the raw tables. Tests: assert-based JSON-fixture goldens (disasm_goldens) and a PRAGMA-table_info schema golden (db_schema_golden) covering all ingested tables and the SQL views. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -184,12 +184,14 @@ fn find_saverestore_stubs(
|
||||
|
||||
// ── Main analysis ──────────────────────────────────────────────────────────
|
||||
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point)))]
|
||||
pub fn analyze(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
entry_point: u32,
|
||||
code_sections: &[(u32, u32, u32)], // (va_start, va_size, flags)
|
||||
) -> FuncAnalysis {
|
||||
let started = std::time::Instant::now();
|
||||
let code_ranges: Vec<(u32, u32)> = code_sections.iter()
|
||||
.map(|(va, sz, _)| (image_base + va, image_base + va + sz))
|
||||
.collect();
|
||||
@@ -197,10 +199,10 @@ pub fn analyze(
|
||||
// 1. Find save/restore stubs
|
||||
let (save_base, restore_base) = find_saverestore_stubs(pe, image_base, &code_ranges);
|
||||
if let Some(sb) = save_base {
|
||||
eprintln!("[func] __savegprlr stub at 0x{sb:08X}");
|
||||
tracing::debug!(addr = format_args!("{:#010x}", sb), "__savegprlr stub");
|
||||
}
|
||||
if let Some(rb) = restore_base {
|
||||
eprintln!("[func] __restgprlr stub at 0x{rb:08X}");
|
||||
tracing::debug!(addr = format_args!("{:#010x}", rb), "__restgprlr stub");
|
||||
}
|
||||
|
||||
// Set of addresses in the save/restore region (to exclude from function detection)
|
||||
@@ -221,18 +223,17 @@ pub fn analyze(
|
||||
for &(start, end) in &code_ranges {
|
||||
let mut addr = start;
|
||||
while addr < end {
|
||||
if let Some(instr) = read_instr(pe, addr, image_base) {
|
||||
if let Some(target) = bl_target(instr, addr) {
|
||||
if let Some(instr) = read_instr(pe, addr, image_base)
|
||||
&& let Some(target) = bl_target(instr, addr) {
|
||||
// Don't count calls into save/restore stubs as function entries
|
||||
if !saverestore_addrs.contains(&target) {
|
||||
call_targets.insert(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
addr += 4;
|
||||
}
|
||||
}
|
||||
eprintln!("[func] {} bl targets (candidate functions)", call_targets.len());
|
||||
tracing::debug!(candidates = call_targets.len(), "bl targets collected");
|
||||
|
||||
// 3. For each candidate, detect prologue and walk to epilogue
|
||||
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
||||
@@ -267,7 +268,13 @@ pub fn analyze(
|
||||
});
|
||||
}
|
||||
|
||||
eprintln!("[func] {} functions detected", functions.len());
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "functions").record(elapsed_ms);
|
||||
tracing::info!(
|
||||
functions = functions.len(),
|
||||
elapsed_ms,
|
||||
"function detection complete"
|
||||
);
|
||||
|
||||
FuncAnalysis {
|
||||
functions,
|
||||
@@ -302,15 +309,13 @@ fn analyze_function(
|
||||
let instr1 = read_instr(pe, func_addr + 4, image_base).unwrap_or(0);
|
||||
|
||||
// Check if next is bl to save stub
|
||||
if let Some(target) = bl_target(instr1, func_addr + 4) {
|
||||
if let Some(sb) = save_base {
|
||||
if target >= sb && target < sb + 18 * 4 {
|
||||
if let Some(target) = bl_target(instr1, func_addr + 4)
|
||||
&& let Some(sb) = save_base
|
||||
&& target >= sb && target < sb + 18 * 4 {
|
||||
let idx = (target - sb) / 4;
|
||||
saved_gprs = 18 - idx;
|
||||
prologue_len = 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Next should be stwu r1, -N(r1)
|
||||
let stwu_instr = read_instr(pe, func_addr + prologue_len, image_base).unwrap_or(0);
|
||||
@@ -356,14 +361,12 @@ fn analyze_function(
|
||||
}
|
||||
|
||||
// Epilogue: b __restgprlr_NN (tail branch into restore stub)
|
||||
if let Some(target) = b_target(instr, addr) {
|
||||
if let Some(rb) = restore_base {
|
||||
if target >= rb && target < rb + 18 * 4 {
|
||||
if let Some(target) = b_target(instr, addr)
|
||||
&& let Some(rb) = restore_base
|
||||
&& target >= rb && target < rb + 18 * 4 {
|
||||
end_addr = addr + 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Epilogue: bctr (indirect tail call — end of function)
|
||||
if is_bctr(instr) {
|
||||
@@ -407,24 +410,22 @@ impl FuncAnalysis {
|
||||
for (&addr, fi) in &self.functions {
|
||||
if fi.is_saverestore {
|
||||
// Label the block start, plus individual register entry points
|
||||
if let Some(sb) = self.save_gpr_base {
|
||||
if addr == sb {
|
||||
if let Some(sb) = self.save_gpr_base
|
||||
&& addr == sb {
|
||||
for i in 0u32..18 {
|
||||
let reg = 14 + i;
|
||||
labels.insert(sb + i * 4, format!("__savegprlr_{reg}"));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if let Some(rb) = self.restore_gpr_base {
|
||||
if addr == rb {
|
||||
if let Some(rb) = self.restore_gpr_base
|
||||
&& addr == rb {
|
||||
for i in 0u32..18 {
|
||||
let reg = 14 + i;
|
||||
labels.insert(rb + i * 4, format!("__restgprlr_{reg}"));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
labels.insert(addr, format!("sub_{addr:08X}"));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user