//! Additive SQL views over the Phase-3 ingest tables. //! //! These views are created when `--analyze=sql` or `--analyze=both` is set. //! They are *not* a replacement for the Rust passes ([`crate::xref`], //! [`crate::func`]) — those still own data-ref resolution and prologue //! pattern matching. The views cover the cleanly-relational parts: //! //! - branch xrefs (self-join on `instructions.target_hex`) //! - call graph + reachability (recursive CTE over `xrefs`) //! - convenience joins (function-first-instruction, imports-called) //! //! All views are read-only and stable across re-creation: dropping and //! recreating the database via [`crate::db::DbWriter::open_fresh`] re-runs //! these definitions. //! //! ## Cross-check semantics //! //! `v_branch_xrefs` is intended to produce *exactly* the same `(source, //! target, kind)` tuples as the Rust `xref.rs` first pass — given the same //! input image. [`crate::db::DbWriter::cross_check_branch_xrefs`] queries //! the symmetric difference and returns the row counts; both should be //! zero. A non-zero count means the formatter's `mnemonic` column or the //! kind-classification CASE drifted out of agreement with `xref.rs`, and //! is worth a one-line warning at log time. /// `(view_name, CREATE VIEW … SQL)` pairs in the order they must run. /// Later views may depend on earlier ones (e.g. `v_call_graph` reads /// `xrefs`, which is the Rust-pass table; `v_branch_xrefs` is independent). pub const ALL_VIEWS: &[(&str, &str)] = &[ ("v_branch_xrefs", V_BRANCH_XREFS), ("v_call_graph", V_CALL_GRAPH), ("v_reachability_from_entry", V_REACHABILITY_FROM_ENTRY), ("v_function_first_instruction", V_FUNCTION_FIRST_INSTRUCTION), ("v_imports_called", V_IMPORTS_CALLED), ]; /// Branch cross-references derived purely from `instructions.target_hex`. /// /// Mirrors the kind classification in [`crate::xref::collect_branch_target`] /// and the short tags returned by [`crate::xref::XrefKind::tag`] (which are /// what `xrefs.kind` actually stores): /// - I-form (`b`/`bl`/`ba`/`bla`): `bl`/`bla` → `"call"`, `b`/`ba` → `"j"` /// - B-form (`bc`/`bcl`/`bca`/`bcla`): always → `"br"` /// /// Indirect branches (`bclr`/`bcctr`) leave `target_hex` NULL and are /// excluded from this view by design. const V_BRANCH_XREFS: &str = " CREATE OR REPLACE VIEW v_branch_xrefs AS SELECT address AS source, target_hex AS target, CASE WHEN mnemonic IN ('bl', 'bla') THEN 'call' WHEN mnemonic IN ('b', 'ba') THEN 'j' WHEN mnemonic IN ('bc', 'bcl', 'bca', 'bcla') THEN 'br' ELSE 'br' END AS kind, mnemonic AS instruction, function AS source_func FROM instructions WHERE target_hex IS NOT NULL; "; /// Call-graph edges resolved against function names. /// /// Reads from `xrefs` (the Rust-pass table) — this is the canonical source /// for *all* edge kinds, including indirect/data; SQL can't reconstruct the /// data-ref edges cleanly because they require register tracking. For pure /// branch edges, `v_branch_xrefs` produces equivalent rows directly from /// `instructions`. const V_CALL_GRAPH: &str = " CREATE OR REPLACE VIEW v_call_graph AS SELECT x.source AS caller_addr, cf.name AS caller_name, x.target AS callee_addr, tf.name AS callee_name, x.kind AS edge_kind FROM xrefs x LEFT JOIN functions cf ON cf.address = x.source_func LEFT JOIN functions tf ON tf.address = x.target WHERE x.kind = 'call'; "; /// Transitive function-level reachability from the entry point over /// call/jump/branch edges. Useful for finding dead code /// (`SELECT address FROM functions /// WHERE address NOT IN (SELECT addr FROM v_reachability_from_entry)`) /// and for scoping analysis to the live subset. /// /// Seeds from the function containing the `entry_point` label and walks /// the recursive closure: a reachable function's instructions branch into /// the functions enclosing the branch targets, which are then reachable /// in turn. `UNION` (not `UNION ALL`) deduplicates to handle call-graph /// cycles (recursive functions, mutually-recursive pairs). const V_REACHABILITY_FROM_ENTRY: &str = " CREATE OR REPLACE VIEW v_reachability_from_entry AS WITH RECURSIVE reach(fn) AS ( SELECT i.function FROM instructions i JOIN labels l ON l.address = i.address WHERE l.name = 'entry_point' AND i.function IS NOT NULL UNION SELECT tgt.function FROM xrefs x JOIN instructions src ON src.address = x.source JOIN instructions tgt ON tgt.address = x.target JOIN reach r ON src.function = r.fn WHERE x.kind IN ('call', 'j', 'br') AND tgt.function IS NOT NULL ) SELECT fn AS addr FROM reach; "; /// Convenience join: each function's first decoded instruction. Useful for /// quickly inspecting prologue patterns without computing offsets manually. const V_FUNCTION_FIRST_INSTRUCTION: &str = " CREATE OR REPLACE VIEW v_function_first_instruction AS SELECT f.address AS function_addr, f.name AS function_name, i.raw AS first_raw, i.disasm AS first_disasm, i.ext_disasm AS first_ext_disasm FROM functions f JOIN instructions i ON i.address = f.address; "; /// Per-function summary of which kernel/library imports it calls. Joins /// xrefs (call edges) against the labels table to surface import names. const V_IMPORTS_CALLED: &str = " CREATE OR REPLACE VIEW v_imports_called AS SELECT x.source_func AS function_addr, f.name AS function_name, x.target AS import_addr, l.name AS import_name FROM xrefs x JOIN labels l ON l.address = x.target LEFT JOIN functions f ON f.address = x.source_func WHERE x.kind = 'call' AND l.kind = 'import'; ";