xenia-analysis: unify disasm via xenia-cpu, split ingest/analyze, add sinks

The old src/ppc.rs that re-implemented PPC formatting collapses into
a 30-line shim that delegates to xenia-cpu's single-source-of-truth
disasm. A new disasm.rs wraps the shared iterator and feeds enriched
items (analysis context: function membership, xrefs, mnemonics) into
pluggable sinks.

Sinks split: text.rs (objdump-like output), json.rs (JSONL stream
matching the new xenia dis --json mode), duckdb.rs (the analysis DB
ingest). db.rs is restructured into ingest_instructions +
write_analysis_results so a run can stop after raw ingest, and a new
target_hex column lands on the instructions table. sql_views.rs adds
five additive views layered on top of the raw tables.

Tests: assert-based JSON-fixture goldens (disasm_goldens) and a
PRAGMA-table_info schema golden (db_schema_golden) covering all
ingested tables and the SQL views.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 16:28:06 +02:00
parent c36cca14f9
commit 45e15d7885
15 changed files with 1194 additions and 1757 deletions

View File

@@ -184,12 +184,14 @@ fn find_saverestore_stubs(
// ── Main analysis ──────────────────────────────────────────────────────────
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point)))]
pub fn analyze(
pe: &[u8],
image_base: u32,
entry_point: u32,
code_sections: &[(u32, u32, u32)], // (va_start, va_size, flags)
) -> FuncAnalysis {
let started = std::time::Instant::now();
let code_ranges: Vec<(u32, u32)> = code_sections.iter()
.map(|(va, sz, _)| (image_base + va, image_base + va + sz))
.collect();
@@ -197,10 +199,10 @@ pub fn analyze(
// 1. Find save/restore stubs
let (save_base, restore_base) = find_saverestore_stubs(pe, image_base, &code_ranges);
if let Some(sb) = save_base {
eprintln!("[func] __savegprlr stub at 0x{sb:08X}");
tracing::debug!(addr = format_args!("{:#010x}", sb), "__savegprlr stub");
}
if let Some(rb) = restore_base {
eprintln!("[func] __restgprlr stub at 0x{rb:08X}");
tracing::debug!(addr = format_args!("{:#010x}", rb), "__restgprlr stub");
}
// Set of addresses in the save/restore region (to exclude from function detection)
@@ -221,18 +223,17 @@ pub fn analyze(
for &(start, end) in &code_ranges {
let mut addr = start;
while addr < end {
if let Some(instr) = read_instr(pe, addr, image_base) {
if let Some(target) = bl_target(instr, addr) {
if let Some(instr) = read_instr(pe, addr, image_base)
&& let Some(target) = bl_target(instr, addr) {
// Don't count calls into save/restore stubs as function entries
if !saverestore_addrs.contains(&target) {
call_targets.insert(target);
}
}
}
addr += 4;
}
}
eprintln!("[func] {} bl targets (candidate functions)", call_targets.len());
tracing::debug!(candidates = call_targets.len(), "bl targets collected");
// 3. For each candidate, detect prologue and walk to epilogue
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
@@ -267,7 +268,13 @@ pub fn analyze(
});
}
eprintln!("[func] {} functions detected", functions.len());
let elapsed_ms = started.elapsed().as_millis() as f64;
metrics::histogram!("analysis.phase_ms", "phase" => "functions").record(elapsed_ms);
tracing::info!(
functions = functions.len(),
elapsed_ms,
"function detection complete"
);
FuncAnalysis {
functions,
@@ -302,15 +309,13 @@ fn analyze_function(
let instr1 = read_instr(pe, func_addr + 4, image_base).unwrap_or(0);
// Check if next is bl to save stub
if let Some(target) = bl_target(instr1, func_addr + 4) {
if let Some(sb) = save_base {
if target >= sb && target < sb + 18 * 4 {
if let Some(target) = bl_target(instr1, func_addr + 4)
&& let Some(sb) = save_base
&& target >= sb && target < sb + 18 * 4 {
let idx = (target - sb) / 4;
saved_gprs = 18 - idx;
prologue_len = 8;
}
}
}
// Next should be stwu r1, -N(r1)
let stwu_instr = read_instr(pe, func_addr + prologue_len, image_base).unwrap_or(0);
@@ -356,14 +361,12 @@ fn analyze_function(
}
// Epilogue: b __restgprlr_NN (tail branch into restore stub)
if let Some(target) = b_target(instr, addr) {
if let Some(rb) = restore_base {
if target >= rb && target < rb + 18 * 4 {
if let Some(target) = b_target(instr, addr)
&& let Some(rb) = restore_base
&& target >= rb && target < rb + 18 * 4 {
end_addr = addr + 4;
break;
}
}
}
// Epilogue: bctr (indirect tail call — end of function)
if is_bctr(instr) {
@@ -407,24 +410,22 @@ impl FuncAnalysis {
for (&addr, fi) in &self.functions {
if fi.is_saverestore {
// Label the block start, plus individual register entry points
if let Some(sb) = self.save_gpr_base {
if addr == sb {
if let Some(sb) = self.save_gpr_base
&& addr == sb {
for i in 0u32..18 {
let reg = 14 + i;
labels.insert(sb + i * 4, format!("__savegprlr_{reg}"));
}
continue;
}
}
if let Some(rb) = self.restore_gpr_base {
if addr == rb {
if let Some(rb) = self.restore_gpr_base
&& addr == rb {
for i in 0u32..18 {
let reg = 14 + i;
labels.insert(rb + i * 4, format!("__restgprlr_{reg}"));
}
continue;
}
}
}
labels.insert(addr, format!("sub_{addr:08X}"));
}