xenia-analysis: unify disasm via xenia-cpu, split ingest/analyze, add sinks
The old src/ppc.rs that re-implemented PPC formatting collapses into a 30-line shim that delegates to xenia-cpu's single-source-of-truth disasm. A new disasm.rs wraps the shared iterator and feeds enriched items (analysis context: function membership, xrefs, mnemonics) into pluggable sinks. Sinks split: text.rs (objdump-like output), json.rs (JSONL stream matching the new xenia dis --json mode), duckdb.rs (the analysis DB ingest). db.rs is restructured into ingest_instructions + write_analysis_results so a run can stop after raw ingest, and a new target_hex column lands on the instructions table. sql_views.rs adds five additive views layered on top of the raw tables. Tests: assert-based JSON-fixture goldens (disasm_goldens) and a PRAGMA-table_info schema golden (db_schema_golden) covering all ingested tables and the SQL views. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
244
crates/xenia-analysis/tests/db_schema_golden.rs
Normal file
244
crates/xenia-analysis/tests/db_schema_golden.rs
Normal file
@@ -0,0 +1,244 @@
|
||||
//! DB schema golden — locks the column layout (names + types) of every
|
||||
//! table written by `DbWriter`. A schema change here without a fixture
|
||||
//! update fails the test, forcing a conscious decision before downstream
|
||||
//! query consumers break.
|
||||
//!
|
||||
//! The fixture is constructed in-process (no XEX/ISO needed): a small
|
||||
//! synthetic PE-shaped byte slice with one `.text` section of 4
|
||||
//! instructions, plus an empty import-library list and one detected
|
||||
//! function.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::io::Write;
|
||||
|
||||
use duckdb::Connection;
|
||||
|
||||
use xenia_analysis::DbWriter;
|
||||
use xenia_analysis::formatter::DisasmInfo;
|
||||
use xenia_analysis::func::{FuncAnalysis, FuncInfo};
|
||||
use xenia_analysis::xref::XrefMap;
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
/// Build a 16-byte `.text` section: 4 instructions (mflr / nop / blr / nop).
|
||||
fn synthetic_pe() -> (Vec<u8>, Vec<PeSection>, Vec<xenia_xex::header::ImportLibrary>) {
|
||||
// VA layout: image_base + 0x1000 = .text start (so RVA = 0x1000).
|
||||
// The DB writer expects pe[rva] to hold the byte at that RVA, so the
|
||||
// buffer must be at least 0x1000 + section_size bytes long.
|
||||
const RVA: usize = 0x1000;
|
||||
const TEXT: [u32; 4] = [
|
||||
// mfspr r12, LR (a.k.a. mflr r12) — opcode 31, xo 339, spr 8 (LR).
|
||||
// Encoded with spr halves swapped per the ISA: spr_field = (8<<5).
|
||||
(31u32 << 26) | (12 << 21) | ((8 << 5) << 11) | (339 << 1),
|
||||
0x60000000, // nop (ori r0, r0, 0)
|
||||
(19u32 << 26) | (20 << 21) | (16 << 1), // blr (bclr 20, 0)
|
||||
0x60000000, // nop
|
||||
];
|
||||
|
||||
let mut pe = vec![0u8; RVA + 16];
|
||||
for (i, &word) in TEXT.iter().enumerate() {
|
||||
pe[RVA + i * 4..RVA + i * 4 + 4].copy_from_slice(&word.to_be_bytes());
|
||||
}
|
||||
|
||||
let sections = vec![PeSection {
|
||||
name: ".text".to_string(),
|
||||
virtual_address: 0x1000,
|
||||
virtual_size: 16,
|
||||
raw_offset: 0x1000,
|
||||
raw_size: 16,
|
||||
flags: 0x60000020, // CODE | EXECUTE | READ
|
||||
}];
|
||||
|
||||
let import_libraries = vec![]; // No imports in the fixture.
|
||||
(pe, sections, import_libraries)
|
||||
}
|
||||
|
||||
fn synthetic_func_analysis(image_base: u32) -> FuncAnalysis {
|
||||
// Single function covering all four .text instructions.
|
||||
let entry = image_base + 0x1000;
|
||||
let mut functions = BTreeMap::new();
|
||||
functions.insert(
|
||||
entry,
|
||||
FuncInfo {
|
||||
start: entry,
|
||||
end: entry + 16,
|
||||
frame_size: 0,
|
||||
saved_gprs: 0,
|
||||
is_leaf: true,
|
||||
is_saverestore: false,
|
||||
},
|
||||
);
|
||||
FuncAnalysis {
|
||||
functions,
|
||||
save_gpr_base: None,
|
||||
restore_gpr_base: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn db_schema_matches_expected_columns() {
|
||||
let (pe, sections, libs) = synthetic_pe();
|
||||
let image_base = 0x82000000u32;
|
||||
let entry = image_base + 0x1000;
|
||||
|
||||
let info = DisasmInfo {
|
||||
image_base,
|
||||
entry_point: entry,
|
||||
original_pe_name: Some("synthetic.exe"),
|
||||
title_id: Some(0xDEADBEEF),
|
||||
media_id: Some(0xCAFEF00D),
|
||||
sections: §ions,
|
||||
import_libraries: &libs,
|
||||
};
|
||||
|
||||
let func_analysis = synthetic_func_analysis(image_base);
|
||||
let mut labels: HashMap<u32, String> = HashMap::new();
|
||||
labels.insert(entry, "entry_point".to_string());
|
||||
let xrefs: XrefMap = XrefMap::new();
|
||||
|
||||
let tmp = std::env::temp_dir().join("xenia_rs_schema_golden.duckdb");
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
|
||||
{
|
||||
let mut w = DbWriter::open_fresh(&tmp).expect("open fresh DB");
|
||||
w.write_base(&info).expect("write_base");
|
||||
w.ingest_instructions(&pe, &info, &func_analysis, &labels)
|
||||
.expect("ingest_instructions");
|
||||
w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs)
|
||||
.expect("write_analysis_results");
|
||||
w.create_sql_views().expect("create_sql_views");
|
||||
}
|
||||
|
||||
let conn = Connection::open(&tmp).expect("reopen DB");
|
||||
|
||||
// Lock the column layout per table. Pairs are (name, type).
|
||||
let expected: &[(&str, &[(&str, &str)])] = &[
|
||||
("metadata", &[
|
||||
("key", "VARCHAR"),
|
||||
("value", "VARCHAR"),
|
||||
]),
|
||||
("sections", &[
|
||||
("name", "VARCHAR"),
|
||||
("virtual_address", "BIGINT"),
|
||||
("virtual_size", "BIGINT"),
|
||||
("raw_offset", "BIGINT"),
|
||||
("raw_size", "BIGINT"),
|
||||
("flags", "BIGINT"),
|
||||
("is_code", "BOOLEAN"),
|
||||
]),
|
||||
("imports", &[
|
||||
("library", "VARCHAR"),
|
||||
("ordinal", "BIGINT"),
|
||||
("name", "VARCHAR"),
|
||||
("record_type", "BIGINT"),
|
||||
("address", "BIGINT"),
|
||||
]),
|
||||
("instructions", &[
|
||||
("address", "BIGINT"),
|
||||
("raw", "BIGINT"),
|
||||
("mnemonic", "VARCHAR"),
|
||||
("operands", "VARCHAR"),
|
||||
("disasm", "VARCHAR"),
|
||||
("ext_mnemonic", "VARCHAR"),
|
||||
("ext_operands", "VARCHAR"),
|
||||
("ext_disasm", "VARCHAR"),
|
||||
("target_hex", "BIGINT"),
|
||||
("section", "VARCHAR"),
|
||||
("function", "BIGINT"),
|
||||
("label", "VARCHAR"),
|
||||
]),
|
||||
("functions", &[
|
||||
("address", "BIGINT"),
|
||||
("name", "VARCHAR"),
|
||||
("end_address", "BIGINT"),
|
||||
("frame_size", "BIGINT"),
|
||||
("saved_gprs", "BIGINT"),
|
||||
("is_leaf", "BOOLEAN"),
|
||||
("is_saverestore", "BOOLEAN"),
|
||||
]),
|
||||
("labels", &[
|
||||
("address", "BIGINT"),
|
||||
("name", "VARCHAR"),
|
||||
("kind", "VARCHAR"),
|
||||
]),
|
||||
("xrefs", &[
|
||||
("source", "BIGINT"),
|
||||
("target", "BIGINT"),
|
||||
("kind", "VARCHAR"),
|
||||
("instruction", "VARCHAR"),
|
||||
("source_func", "BIGINT"),
|
||||
("source_label", "VARCHAR"),
|
||||
("target_label", "VARCHAR"),
|
||||
]),
|
||||
];
|
||||
|
||||
let mut errs: Vec<String> = Vec::new();
|
||||
for (table, cols) in expected {
|
||||
let mut stmt = conn
|
||||
.prepare(&format!("PRAGMA table_info('{}')", table))
|
||||
.unwrap_or_else(|e| panic!("prepare PRAGMA for {table}: {e}"));
|
||||
let rows: Vec<(String, String)> = stmt
|
||||
.query_map([], |row| {
|
||||
let name: String = row.get(1)?;
|
||||
let ty: String = row.get(2)?;
|
||||
Ok((name, ty))
|
||||
})
|
||||
.expect("query")
|
||||
.map(|r| r.unwrap())
|
||||
.collect();
|
||||
|
||||
if rows.len() != cols.len() {
|
||||
writeln!(
|
||||
std::io::stderr(),
|
||||
"{table}: column count mismatch (got {}, expected {})",
|
||||
rows.len(),
|
||||
cols.len()
|
||||
).ok();
|
||||
errs.push(format!("{table}: count {} vs {}", rows.len(), cols.len()));
|
||||
}
|
||||
for (i, (got, expected_col)) in rows.iter().zip(cols.iter()).enumerate() {
|
||||
if got.0 != expected_col.0 || got.1 != expected_col.1 {
|
||||
errs.push(format!(
|
||||
"{table} col {i}: got ({}, {}) expected ({}, {})",
|
||||
got.0, got.1, expected_col.0, expected_col.1
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(errs.is_empty(), "schema drift detected:\n {}", errs.join("\n "));
|
||||
|
||||
// Verify row counts in the populated tables.
|
||||
let n_instr: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM instructions", [], |r| r.get(0))
|
||||
.unwrap();
|
||||
assert_eq!(n_instr, 4, "expected 4 instruction rows from the synthetic PE");
|
||||
|
||||
// The synthetic mflr should produce target_hex = NULL, blr likewise (indirect).
|
||||
let n_with_target: i64 = conn
|
||||
.query_row("SELECT COUNT(target_hex) FROM instructions", [], |r| r.get(0))
|
||||
.unwrap();
|
||||
assert_eq!(n_with_target, 0, "indirect-only fixture should have no direct branch targets");
|
||||
|
||||
// SQL views must be queryable. The `_` in SQL LIKE is a single-char
|
||||
// wildcard, so we list the names explicitly rather than `LIKE 'v_%'`
|
||||
// (which also matches DuckDB's built-in `views` system view).
|
||||
let expected_views = [
|
||||
"v_branch_xrefs",
|
||||
"v_call_graph",
|
||||
"v_function_first_instruction",
|
||||
"v_imports_called",
|
||||
"v_reachability_from_entry",
|
||||
];
|
||||
for v in expected_views {
|
||||
let exists: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM duckdb_views() WHERE view_name = ?",
|
||||
[v],
|
||||
|r| r.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(exists, 1, "missing SQL view: {v}");
|
||||
}
|
||||
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
}
|
||||
123
crates/xenia-analysis/tests/disasm_goldens.rs
Normal file
123
crates/xenia-analysis/tests/disasm_goldens.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
//! Analysis-side goldens: every row in the xenia-cpu fixtures must
|
||||
//! round-trip cleanly through the [`xenia_analysis::ppc`] shim. This
|
||||
//! pins the shim's behaviour to the canonical `xenia_cpu::disasm::format`
|
||||
//! output so that any future refactor of the shim layer surfaces here.
|
||||
//!
|
||||
//! Loads the same JSON fixtures committed under
|
||||
//! `crates/xenia-cpu/tests/golden/`. No separate analysis-side fixture
|
||||
//! files — the cpu canon is the source of truth.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GoldenRow {
|
||||
label: String,
|
||||
raw: String,
|
||||
addr: String,
|
||||
mnemonic: String,
|
||||
operands: String,
|
||||
#[serde(default)]
|
||||
ext_mnemonic: Option<String>,
|
||||
#[serde(default)]
|
||||
ext_operands: Option<String>,
|
||||
#[serde(default)]
|
||||
branch_target: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GoldenFile {
|
||||
rows: Vec<GoldenRow>,
|
||||
}
|
||||
|
||||
fn cpu_fixture(name: &str) -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("..")
|
||||
.join("xenia-cpu")
|
||||
.join("tests")
|
||||
.join("golden")
|
||||
.join(name)
|
||||
}
|
||||
|
||||
fn parse_hex(s: &str) -> u32 {
|
||||
let trimmed = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")).unwrap_or(s);
|
||||
u32::from_str_radix(trimmed, 16).expect("hex u32")
|
||||
}
|
||||
|
||||
/// Verify the shim's `Decoded { base, ext }` mirrors the canonical fields
|
||||
/// from `xenia_cpu::disasm::format` for every fixture row.
|
||||
fn check_fixture(fixture_name: &str) {
|
||||
let path = cpu_fixture(fixture_name);
|
||||
assert!(
|
||||
path.exists(),
|
||||
"missing fixture {} — run `cargo test -p xenia-cpu --test disasm_goldens` to (re)generate it",
|
||||
path.display()
|
||||
);
|
||||
let src = std::fs::read_to_string(&path).unwrap();
|
||||
let golden: GoldenFile = serde_json::from_str(&src).unwrap();
|
||||
|
||||
for row in &golden.rows {
|
||||
let raw = parse_hex(&row.raw);
|
||||
let addr = parse_hex(&row.addr);
|
||||
|
||||
let canonical =
|
||||
xenia_cpu::disasm::format(&xenia_cpu::decode(raw, addr));
|
||||
let shim = xenia_analysis::ppc::disasm(raw, addr);
|
||||
|
||||
assert_eq!(
|
||||
shim.base, canonical.disasm,
|
||||
"shim.base drifted for {} (raw={})",
|
||||
row.label, row.raw,
|
||||
);
|
||||
assert_eq!(
|
||||
shim.ext, canonical.ext_disasm,
|
||||
"shim.ext drifted for {} (raw={})",
|
||||
row.label, row.raw,
|
||||
);
|
||||
|
||||
// Also pin against the fixture's structured fields — guards against
|
||||
// someone changing the cpu canon without regenerating the fixture.
|
||||
assert_eq!(canonical.mnemonic, row.mnemonic, "mnemonic drift: {}", row.label);
|
||||
assert_eq!(canonical.operands, row.operands, "operands drift: {}", row.label);
|
||||
assert_eq!(canonical.ext_mnemonic, row.ext_mnemonic, "ext_mnemonic drift: {}", row.label);
|
||||
assert_eq!(canonical.ext_operands, row.ext_operands, "ext_operands drift: {}", row.label);
|
||||
|
||||
let target_str = canonical.branch_target.map(|t| format!("0x{t:08X}"));
|
||||
assert_eq!(target_str, row.branch_target, "branch_target drift: {}", row.label);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn analysis_shim_matches_base_mnemonics() {
|
||||
check_fixture("base_mnemonics.json");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn analysis_shim_matches_extended_mnemonics() {
|
||||
check_fixture("extended_mnemonics.json");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn analysis_shim_matches_vmx128_registers() {
|
||||
check_fixture("vmx128_registers.json");
|
||||
}
|
||||
|
||||
/// Spot-check that the shim's `display()` returns the extended form when
|
||||
/// present and falls back to the base otherwise. This is the contract
|
||||
/// `formatter.rs` and the .asm output rely on.
|
||||
#[test]
|
||||
fn shim_display_prefers_extended() {
|
||||
// ori r0, r0, 0 → base "ori r0, r0, 0x0", ext "nop"
|
||||
let d = xenia_analysis::ppc::disasm(0x60000000, 0);
|
||||
assert_eq!(d.display(), "nop");
|
||||
|
||||
// addi r3, r1, 16 → no extended form, display falls back to base
|
||||
let raw = (14u32 << 26) | (3 << 21) | (1 << 16) | 16;
|
||||
let d = xenia_analysis::ppc::disasm(raw, 0);
|
||||
assert!(
|
||||
d.ext.is_none(),
|
||||
"addi r3, r1, 16 has no extended form (only addi r3, r0, … → li)"
|
||||
);
|
||||
assert_eq!(d.display(), d.base);
|
||||
}
|
||||
Reference in New Issue
Block a user