xenia-analysis: unify disasm via xenia-cpu, split ingest/analyze, add sinks

The old src/ppc.rs that re-implemented PPC formatting collapses into
a 30-line shim that delegates to xenia-cpu's single-source-of-truth
disasm. A new disasm.rs wraps the shared iterator and feeds enriched
items (analysis context: function membership, xrefs, mnemonics) into
pluggable sinks.

Sinks split: text.rs (objdump-like output), json.rs (JSONL stream
matching the new xenia dis --json mode), duckdb.rs (the analysis DB
ingest). db.rs is restructured into ingest_instructions +
write_analysis_results so a run can stop after raw ingest, and a new
target_hex column lands on the instructions table. sql_views.rs adds
five additive views layered on top of the raw tables.

Tests: assert-based JSON-fixture goldens (disasm_goldens) and a
PRAGMA-table_info schema golden (db_schema_golden) covering all
ingested tables and the SQL views.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 16:28:06 +02:00
parent c36cca14f9
commit 45e15d7885
15 changed files with 1194 additions and 1757 deletions

View File

@@ -53,6 +53,7 @@ pub struct XrefResult {
}
/// Perform full cross-reference analysis on a PE image.
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point)))]
pub fn analyze_xrefs(
pe: &[u8],
image_base: u32,
@@ -61,6 +62,7 @@ pub fn analyze_xrefs(
func_analysis: &FuncAnalysis,
import_map: &HashMap<u32, String>,
) -> XrefResult {
let started = std::time::Instant::now();
let func_labels = func_analysis.generate_labels();
let mut labels: HashMap<u32, String> = func_labels;
labels.insert(entry_point, "entry_point".to_string());
@@ -124,7 +126,7 @@ pub fn analyze_xrefs(
let rd = ((instr >> 21) & 0x1F) as usize;
let ra = ((instr >> 16) & 0x1F) as usize;
let simm = ((instr & 0xFFFF) as i16) as i32;
let uimm = (instr & 0xFFFF) as u32;
let uimm = instr & 0xFFFF;
// Reset tracking on function boundaries (prologue = mfspr rN, LR)
if opcode == 31 {
@@ -181,8 +183,8 @@ pub fn analyze_xrefs(
}
// Load instructions: lwz, lbz, lhz, lha, lfs, lfd, lwzu, etc.
32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 46 | 48 | 49 | 50 | 51 => {
if ra != 0 {
if let Some(base) = reg_hi[ra] {
if ra != 0
&& let Some(base) = reg_hi[ra] {
let data_addr = base.wrapping_add(simm as u32);
if is_in_ranges(data_addr, &data_ranges) {
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRead));
@@ -190,14 +192,13 @@ pub fn analyze_xrefs(
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
}
}
}
// Load into rD may clobber the tracked value
reg_hi[rd] = None;
}
// Store instructions: stw, stb, sth, stfs, stfd, stwu, etc.
36 | 37 | 38 | 39 | 44 | 45 | 47 | 52 | 53 | 54 | 55 => {
if ra != 0 {
if let Some(base) = reg_hi[ra] {
if ra != 0
&& let Some(base) = reg_hi[ra] {
let data_addr = base.wrapping_add(simm as u32);
if is_in_ranges(data_addr, &data_ranges) {
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataWrite));
@@ -205,7 +206,6 @@ pub fn analyze_xrefs(
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
}
}
}
}
// Any other instruction writing to rD: invalidate
_ => {
@@ -221,6 +221,17 @@ pub fn analyze_xrefs(
}
}
let elapsed_ms = started.elapsed().as_millis() as f64;
metrics::histogram!("analysis.phase_ms", "phase" => "xrefs").record(elapsed_ms);
let total_xrefs: usize = xrefs.values().map(|v| v.len()).sum();
tracing::info!(
labels = labels.len(),
xrefs = total_xrefs,
data_annotations = data_annotations.len(),
elapsed_ms,
"xref analysis complete"
);
XrefResult { labels, xrefs, data_annotations }
}
@@ -262,7 +273,7 @@ fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool {
}
/// Find which section a data address falls in.
pub fn section_for_addr<'a>(addr: u32, sections: &'a [PeSection], image_base: u32) -> Option<&'a str> {
pub fn section_for_addr(addr: u32, sections: &[PeSection], image_base: u32) -> Option<&str> {
for s in sections {
let start = image_base + s.virtual_address;
let end = start + s.virtual_size;
@@ -285,12 +296,11 @@ pub fn resolve_source_label(
}
// Find the containing function (largest start <= addr)
if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back() {
if let Some(func_label) = labels.get(&func_start) {
if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back()
&& let Some(func_label) = labels.get(&func_start) {
let offset = addr - func_start;
return format!("{func_label}+0x{offset:X}");
}
}
format!("0x{addr:08X}")
}