xenia-analysis: unify disasm via xenia-cpu, split ingest/analyze, add sinks
The old src/ppc.rs that re-implemented PPC formatting collapses into a 30-line shim that delegates to xenia-cpu's single-source-of-truth disasm. A new disasm.rs wraps the shared iterator and feeds enriched items (analysis context: function membership, xrefs, mnemonics) into pluggable sinks. Sinks split: text.rs (objdump-like output), json.rs (JSONL stream matching the new xenia dis --json mode), duckdb.rs (the analysis DB ingest). db.rs is restructured into ingest_instructions + write_analysis_results so a run can stop after raw ingest, and a new target_hex column lands on the instructions table. sql_views.rs adds five additive views layered on top of the raw tables. Tests: assert-based JSON-fixture goldens (disasm_goldens) and a PRAGMA-table_info schema golden (db_schema_golden) covering all ingested tables and the SQL views. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
58
crates/xenia-analysis/src/sinks/text.rs
Normal file
58
crates/xenia-analysis/src/sinks/text.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
//! Text sink — renders one .asm instruction line with optional
|
||||
//! branch-target / data-ref annotations.
|
||||
//!
|
||||
//! The full `write_asm` orchestration (section headers, function prologue
|
||||
//! info, xref comment blocks, hex-dump of data sections) stays in
|
||||
//! [`crate::formatter`]; this sink only owns the per-instruction line.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io::{self, Write};
|
||||
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
use crate::disasm::RichDisasmItem;
|
||||
use crate::xref::{XrefKind, section_for_addr};
|
||||
|
||||
/// Render one instruction line:
|
||||
/// ` 82000000: 60000000 nop`
|
||||
/// ` 82000004: 4800FFFC bl 0x82000000 ; -> entry_point`
|
||||
/// ` 82000010: 812A0000 lwz r9, 0(r10) ; [R] 0x828A0000 (.rdata) = dat_…`
|
||||
pub fn write_instr_line<W: Write + ?Sized>(
|
||||
out: &mut W,
|
||||
item: &RichDisasmItem<'_>,
|
||||
labels: &HashMap<u32, String>,
|
||||
sections: &[PeSection],
|
||||
image_base: u32,
|
||||
data_annotation: Option<(u32, XrefKind)>,
|
||||
) -> io::Result<()> {
|
||||
let disasm_text = item.item.text.display();
|
||||
|
||||
// Branch-target → label annotation. Uses the structured `branch_target`
|
||||
// field (cleaner than the legacy "find 0x in disasm string" regex).
|
||||
let mut annotated = match item.item.text.branch_target {
|
||||
Some(target) => match labels.get(&target) {
|
||||
Some(lbl) => format!("{disasm_text:<40} ; -> {lbl}"),
|
||||
None => disasm_text.to_string(),
|
||||
},
|
||||
None => disasm_text.to_string(),
|
||||
};
|
||||
|
||||
if let Some((data_addr, kind)) = data_annotation {
|
||||
let tag = match kind {
|
||||
XrefKind::DataRead => "[R]",
|
||||
XrefKind::DataWrite => "[W]",
|
||||
_ => "[&]",
|
||||
};
|
||||
let sec = section_for_addr(data_addr, sections, image_base).unwrap_or("?");
|
||||
let data_lbl = labels.get(&data_addr)
|
||||
.map(|s| format!(" = {s}"))
|
||||
.unwrap_or_default();
|
||||
if !annotated.contains("; ->") {
|
||||
annotated = format!("{annotated:<40} ; {tag} 0x{data_addr:08X} ({sec}){data_lbl}");
|
||||
} else {
|
||||
annotated = format!("{annotated} {tag} 0x{data_addr:08X} ({sec}){data_lbl}");
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(out, " {:08X}: {:08X} {}", item.item.addr, item.item.raw, annotated)
|
||||
}
|
||||
Reference in New Issue
Block a user