xenia-analysis: unify disasm via xenia-cpu, split ingest/analyze, add sinks
The old src/ppc.rs that re-implemented PPC formatting collapses into a 30-line shim that delegates to xenia-cpu's single-source-of-truth disasm. A new disasm.rs wraps the shared iterator and feeds enriched items (analysis context: function membership, xrefs, mnemonics) into pluggable sinks. Sinks split: text.rs (objdump-like output), json.rs (JSONL stream matching the new xenia dis --json mode), duckdb.rs (the analysis DB ingest). db.rs is restructured into ingest_instructions + write_analysis_results so a run can stop after raw ingest, and a new target_hex column lands on the instructions table. sql_views.rs adds five additive views layered on top of the raw tables. Tests: assert-based JSON-fixture goldens (disasm_goldens) and a PRAGMA-table_info schema golden (db_schema_golden) covering all ingested tables and the SQL views. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -53,6 +53,7 @@ pub struct XrefResult {
|
||||
}
|
||||
|
||||
/// Perform full cross-reference analysis on a PE image.
|
||||
#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point)))]
|
||||
pub fn analyze_xrefs(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
@@ -61,6 +62,7 @@ pub fn analyze_xrefs(
|
||||
func_analysis: &FuncAnalysis,
|
||||
import_map: &HashMap<u32, String>,
|
||||
) -> XrefResult {
|
||||
let started = std::time::Instant::now();
|
||||
let func_labels = func_analysis.generate_labels();
|
||||
let mut labels: HashMap<u32, String> = func_labels;
|
||||
labels.insert(entry_point, "entry_point".to_string());
|
||||
@@ -124,7 +126,7 @@ pub fn analyze_xrefs(
|
||||
let rd = ((instr >> 21) & 0x1F) as usize;
|
||||
let ra = ((instr >> 16) & 0x1F) as usize;
|
||||
let simm = ((instr & 0xFFFF) as i16) as i32;
|
||||
let uimm = (instr & 0xFFFF) as u32;
|
||||
let uimm = instr & 0xFFFF;
|
||||
|
||||
// Reset tracking on function boundaries (prologue = mfspr rN, LR)
|
||||
if opcode == 31 {
|
||||
@@ -181,8 +183,8 @@ pub fn analyze_xrefs(
|
||||
}
|
||||
// Load instructions: lwz, lbz, lhz, lha, lfs, lfd, lwzu, etc.
|
||||
32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 46 | 48 | 49 | 50 | 51 => {
|
||||
if ra != 0 {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
if ra != 0
|
||||
&& let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRead));
|
||||
@@ -190,14 +192,13 @@ pub fn analyze_xrefs(
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Load into rD may clobber the tracked value
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
// Store instructions: stw, stb, sth, stfs, stfd, stwu, etc.
|
||||
36 | 37 | 38 | 39 | 44 | 45 | 47 | 52 | 53 | 54 | 55 => {
|
||||
if ra != 0 {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
if ra != 0
|
||||
&& let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataWrite));
|
||||
@@ -205,7 +206,6 @@ pub fn analyze_xrefs(
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Any other instruction writing to rD: invalidate
|
||||
_ => {
|
||||
@@ -221,6 +221,17 @@ pub fn analyze_xrefs(
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed_ms = started.elapsed().as_millis() as f64;
|
||||
metrics::histogram!("analysis.phase_ms", "phase" => "xrefs").record(elapsed_ms);
|
||||
let total_xrefs: usize = xrefs.values().map(|v| v.len()).sum();
|
||||
tracing::info!(
|
||||
labels = labels.len(),
|
||||
xrefs = total_xrefs,
|
||||
data_annotations = data_annotations.len(),
|
||||
elapsed_ms,
|
||||
"xref analysis complete"
|
||||
);
|
||||
|
||||
XrefResult { labels, xrefs, data_annotations }
|
||||
}
|
||||
|
||||
@@ -262,7 +273,7 @@ fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool {
|
||||
}
|
||||
|
||||
/// Find which section a data address falls in.
|
||||
pub fn section_for_addr<'a>(addr: u32, sections: &'a [PeSection], image_base: u32) -> Option<&'a str> {
|
||||
pub fn section_for_addr(addr: u32, sections: &[PeSection], image_base: u32) -> Option<&str> {
|
||||
for s in sections {
|
||||
let start = image_base + s.virtual_address;
|
||||
let end = start + s.virtual_size;
|
||||
@@ -285,12 +296,11 @@ pub fn resolve_source_label(
|
||||
}
|
||||
|
||||
// Find the containing function (largest start <= addr)
|
||||
if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back() {
|
||||
if let Some(func_label) = labels.get(&func_start) {
|
||||
if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back()
|
||||
&& let Some(func_label) = labels.get(&func_start) {
|
||||
let offset = addr - func_start;
|
||||
return format!("{func_label}+0x{offset:X}");
|
||||
}
|
||||
}
|
||||
|
||||
format!("0x{addr:08X}")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user