xenia-analysis: unify disasm via xenia-cpu, split ingest/analyze, add sinks
The old src/ppc.rs that re-implemented PPC formatting collapses into a 30-line shim that delegates to xenia-cpu's single-source-of-truth disasm. A new disasm.rs wraps the shared iterator and feeds enriched items (analysis context: function membership, xrefs, mnemonics) into pluggable sinks. Sinks split: text.rs (objdump-like output), json.rs (JSONL stream matching the new xenia dis --json mode), duckdb.rs (the analysis DB ingest). db.rs is restructured into ingest_instructions + write_analysis_results so a run can stop after raw ingest, and a new target_hex column lands on the instructions table. sql_views.rs adds five additive views layered on top of the raw tables. Tests: assert-based JSON-fixture goldens (disasm_goldens) and a PRAGMA-table_info schema golden (db_schema_golden) covering all ingested tables and the SQL views. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
141
crates/xenia-analysis/src/sql_views.rs
Normal file
141
crates/xenia-analysis/src/sql_views.rs
Normal file
@@ -0,0 +1,141 @@
|
||||
//! Additive SQL views over the Phase-3 ingest tables.
|
||||
//!
|
||||
//! These views are created when `--analyze=sql` or `--analyze=both` is set.
|
||||
//! They are *not* a replacement for the Rust passes ([`crate::xref`],
|
||||
//! [`crate::func`]) — those still own data-ref resolution and prologue
|
||||
//! pattern matching. The views cover the cleanly-relational parts:
|
||||
//!
|
||||
//! - branch xrefs (self-join on `instructions.target_hex`)
|
||||
//! - call graph + reachability (recursive CTE over `xrefs`)
|
||||
//! - convenience joins (function-first-instruction, imports-called)
|
||||
//!
|
||||
//! All views are read-only and stable across re-creation: dropping and
|
||||
//! recreating the database via [`crate::db::DbWriter::open_fresh`] re-runs
|
||||
//! these definitions.
|
||||
//!
|
||||
//! ## Cross-check semantics
|
||||
//!
|
||||
//! `v_branch_xrefs` is intended to produce *exactly* the same `(source,
|
||||
//! target, kind)` tuples as the Rust `xref.rs` first pass — given the same
|
||||
//! input image. [`crate::db::DbWriter::cross_check_branch_xrefs`] queries
|
||||
//! the symmetric difference and returns the row counts; both should be
|
||||
//! zero. A non-zero count means the formatter's `mnemonic` column or the
|
||||
//! kind-classification CASE drifted out of agreement with `xref.rs`, and
|
||||
//! is worth a one-line warning at log time.
|
||||
|
||||
/// `(view_name, CREATE VIEW … SQL)` pairs in the order they must run.
|
||||
/// Later views may depend on earlier ones (e.g. `v_call_graph` reads
|
||||
/// `xrefs`, which is the Rust-pass table; `v_branch_xrefs` is independent).
|
||||
pub const ALL_VIEWS: &[(&str, &str)] = &[
|
||||
("v_branch_xrefs", V_BRANCH_XREFS),
|
||||
("v_call_graph", V_CALL_GRAPH),
|
||||
("v_reachability_from_entry", V_REACHABILITY_FROM_ENTRY),
|
||||
("v_function_first_instruction", V_FUNCTION_FIRST_INSTRUCTION),
|
||||
("v_imports_called", V_IMPORTS_CALLED),
|
||||
];
|
||||
|
||||
/// Branch cross-references derived purely from `instructions.target_hex`.
|
||||
///
|
||||
/// Mirrors the kind classification in [`crate::xref::collect_branch_target`]
|
||||
/// and the short tags returned by [`crate::xref::XrefKind::tag`] (which are
|
||||
/// what `xrefs.kind` actually stores):
|
||||
/// - I-form (`b`/`bl`/`ba`/`bla`): `bl`/`bla` → `"call"`, `b`/`ba` → `"j"`
|
||||
/// - B-form (`bc`/`bcl`/`bca`/`bcla`): always → `"br"`
|
||||
///
|
||||
/// Indirect branches (`bclr`/`bcctr`) leave `target_hex` NULL and are
|
||||
/// excluded from this view by design.
|
||||
const V_BRANCH_XREFS: &str = "
|
||||
CREATE OR REPLACE VIEW v_branch_xrefs AS
|
||||
SELECT
|
||||
address AS source,
|
||||
target_hex AS target,
|
||||
CASE
|
||||
WHEN mnemonic IN ('bl', 'bla') THEN 'call'
|
||||
WHEN mnemonic IN ('b', 'ba') THEN 'j'
|
||||
WHEN mnemonic IN ('bc', 'bcl', 'bca', 'bcla') THEN 'br'
|
||||
ELSE 'br'
|
||||
END AS kind,
|
||||
mnemonic AS instruction,
|
||||
function AS source_func
|
||||
FROM instructions
|
||||
WHERE target_hex IS NOT NULL;
|
||||
";
|
||||
|
||||
/// Call-graph edges resolved against function names.
|
||||
///
|
||||
/// Reads from `xrefs` (the Rust-pass table) — this is the canonical source
|
||||
/// for *all* edge kinds, including indirect/data; SQL can't reconstruct the
|
||||
/// data-ref edges cleanly because they require register tracking. For pure
|
||||
/// branch edges, `v_branch_xrefs` produces equivalent rows directly from
|
||||
/// `instructions`.
|
||||
const V_CALL_GRAPH: &str = "
|
||||
CREATE OR REPLACE VIEW v_call_graph AS
|
||||
SELECT
|
||||
x.source AS caller_addr,
|
||||
cf.name AS caller_name,
|
||||
x.target AS callee_addr,
|
||||
tf.name AS callee_name,
|
||||
x.kind AS edge_kind
|
||||
FROM xrefs x
|
||||
LEFT JOIN functions cf ON cf.address = x.source_func
|
||||
LEFT JOIN functions tf ON tf.address = x.target
|
||||
WHERE x.kind = 'call';
|
||||
";
|
||||
|
||||
/// Transitive function-level reachability from the entry point over
|
||||
/// call/jump/branch edges. Useful for finding dead code
|
||||
/// (`SELECT address FROM functions
|
||||
/// WHERE address NOT IN (SELECT addr FROM v_reachability_from_entry)`)
|
||||
/// and for scoping analysis to the live subset.
|
||||
///
|
||||
/// Seeds from the function containing the `entry_point` label and walks
|
||||
/// the recursive closure: a reachable function's instructions branch into
|
||||
/// the functions enclosing the branch targets, which are then reachable
|
||||
/// in turn. `UNION` (not `UNION ALL`) deduplicates to handle call-graph
|
||||
/// cycles (recursive functions, mutually-recursive pairs).
|
||||
const V_REACHABILITY_FROM_ENTRY: &str = "
|
||||
CREATE OR REPLACE VIEW v_reachability_from_entry AS
|
||||
WITH RECURSIVE reach(fn) AS (
|
||||
SELECT i.function FROM instructions i
|
||||
JOIN labels l ON l.address = i.address
|
||||
WHERE l.name = 'entry_point' AND i.function IS NOT NULL
|
||||
UNION
|
||||
SELECT tgt.function FROM xrefs x
|
||||
JOIN instructions src ON src.address = x.source
|
||||
JOIN instructions tgt ON tgt.address = x.target
|
||||
JOIN reach r ON src.function = r.fn
|
||||
WHERE x.kind IN ('call', 'j', 'br')
|
||||
AND tgt.function IS NOT NULL
|
||||
)
|
||||
SELECT fn AS addr FROM reach;
|
||||
";
|
||||
|
||||
/// Convenience join: each function's first decoded instruction. Useful for
|
||||
/// quickly inspecting prologue patterns without computing offsets manually.
|
||||
const V_FUNCTION_FIRST_INSTRUCTION: &str = "
|
||||
CREATE OR REPLACE VIEW v_function_first_instruction AS
|
||||
SELECT
|
||||
f.address AS function_addr,
|
||||
f.name AS function_name,
|
||||
i.raw AS first_raw,
|
||||
i.disasm AS first_disasm,
|
||||
i.ext_disasm AS first_ext_disasm
|
||||
FROM functions f
|
||||
JOIN instructions i ON i.address = f.address;
|
||||
";
|
||||
|
||||
/// Per-function summary of which kernel/library imports it calls. Joins
|
||||
/// xrefs (call edges) against the labels table to surface import names.
|
||||
const V_IMPORTS_CALLED: &str = "
|
||||
CREATE OR REPLACE VIEW v_imports_called AS
|
||||
SELECT
|
||||
x.source_func AS function_addr,
|
||||
f.name AS function_name,
|
||||
x.target AS import_addr,
|
||||
l.name AS import_name
|
||||
FROM xrefs x
|
||||
JOIN labels l ON l.address = x.target
|
||||
LEFT JOIN functions f ON f.address = x.source_func
|
||||
WHERE x.kind = 'call'
|
||||
AND l.kind = 'import';
|
||||
";
|
||||
Reference in New Issue
Block a user