xenia-analysis: unify disasm via xenia-cpu, split ingest/analyze, add sinks

The old src/ppc.rs that re-implemented PPC formatting collapses into
a 30-line shim that delegates to xenia-cpu's single-source-of-truth
disasm. A new disasm.rs wraps the shared iterator and feeds enriched
items (analysis context: function membership, xrefs, mnemonics) into
pluggable sinks.

Sinks split: text.rs (objdump-like output), json.rs (JSONL stream
matching the new xenia dis --json mode), duckdb.rs (the analysis DB
ingest). db.rs is restructured into ingest_instructions +
write_analysis_results so a run can stop after raw ingest, and a new
target_hex column lands on the instructions table. sql_views.rs adds
five additive views layered on top of the raw tables.

Tests: assert-based JSON-fixture goldens (disasm_goldens) and a
PRAGMA-table_info schema golden (db_schema_golden) covering all
ingested tables and the SQL views.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 16:28:06 +02:00
parent c36cca14f9
commit 45e15d7885
15 changed files with 1194 additions and 1757 deletions

View File

@@ -6,8 +6,10 @@ use std::io::Write;
use xenia_xex::header::ImportLibrary;
use xenia_xex::pe::PeSection;
use crate::disasm::enrich_section;
use crate::func::FuncAnalysis;
use crate::xref::{XrefKind, Xref, XrefMap, section_for_addr, resolve_source_label};
use crate::sinks::text::write_instr_line;
use crate::xref::{XrefKind, Xref, XrefMap, resolve_source_label};
/// Metadata passed to the formatter (avoids exposing full Xex2Header internals).
pub struct DisasmInfo<'a> {
@@ -88,11 +90,14 @@ pub fn write_asm(
writeln!(out)?;
let mut in_function = false;
let mut addr = va_start;
while addr < va_end {
let abs_addr = info.image_base + addr;
let off = (addr - va_start) as usize + file_start;
if off + 4 > pe.len() { break; }
let abs_start = info.image_base + va_start;
let abs_end = info.image_base + va_end;
let items = enrich_section(
pe, info.image_base, &section.name, abs_start, abs_end, func_analysis, labels,
);
for ri in items {
let abs_addr = ri.item.addr;
// Function start? Emit separator + header
if let Some(fi) = func_analysis.get(abs_addr) {
@@ -126,7 +131,6 @@ pub fn write_asm(
writeln!(out, "; FUNCTION: {lbl}{detail_str}")?;
}
// Xrefs for function entry
if let Some(xref_lines) = format_xrefs(abs_addr, xrefs, func_analysis, labels) {
for line in &xref_lines {
writeln!(out, "{line}")?;
@@ -141,7 +145,6 @@ pub fn write_asm(
if let Some(lbl) = labels.get(&abs_addr) {
if !func_analysis.is_function_start(abs_addr) {
writeln!(out)?;
// Xrefs for local labels
if let Some(xref_lines) = format_xrefs(abs_addr, xrefs, func_analysis, labels) {
for line in &xref_lines {
writeln!(out, "{line}")?;
@@ -159,37 +162,8 @@ pub fn write_asm(
writeln!(out, " ; IMPORT: {imp_name}")?;
}
let instr = u32::from_be_bytes([
pe[off], pe[off+1], pe[off+2], pe[off+3]
]);
let decoded = crate::ppc::disasm(instr, abs_addr);
let disasm_text = decoded.display().to_string();
// Annotate branch targets with label names
let mut annotated = annotate_branch(&disasm_text, labels);
// Annotate data references
if let Some(&(data_addr, kind)) = data_annotations.get(&abs_addr) {
let tag = match kind {
XrefKind::DataRead => "[R]",
XrefKind::DataWrite => "[W]",
_ => "[&]",
};
let sec = section_for_addr(data_addr, info.sections, info.image_base)
.unwrap_or("?");
let data_lbl = labels.get(&data_addr)
.map(|s| format!(" = {s}"))
.unwrap_or_default();
if !annotated.contains("; ->") {
annotated = format!("{annotated:<40} ; {tag} 0x{data_addr:08X} ({sec}){data_lbl}");
} else {
annotated = format!("{annotated} {tag} 0x{data_addr:08X} ({sec}){data_lbl}");
}
}
writeln!(out, " {:08X}: {:08X} {}", abs_addr, instr, annotated)?;
addr += 4;
let data_annot = data_annotations.get(&abs_addr).copied();
write_instr_line(out, &ri, labels, info.sections, info.image_base, data_annot)?;
}
if in_function {
writeln!(out, "; end function")?;
@@ -298,21 +272,3 @@ fn format_xrefs(
Some(lines)
}
fn annotate_branch(disasm: &str, labels: &HashMap<u32, String>) -> String {
if let Some(pos) = disasm.find("0x") {
let hex_start = pos + 2;
let hex_end = disasm[hex_start..].find(|c: char| !c.is_ascii_hexdigit())
.map(|i| hex_start + i)
.unwrap_or(disasm.len());
let hex_str = &disasm[hex_start..hex_end];
if hex_str.len() == 8 {
if let Ok(addr) = u32::from_str_radix(hex_str, 16) {
if let Some(lbl) = labels.get(&addr) {
return format!("{disasm:<40} ; -> {lbl}");
}
}
}
}
disasm.to_string()
}