Files
xenia-rs/crates/xenia-analysis/tests/db_schema_golden.rs
MechaCat02 38d8871e8d M6: addr_mode column on xrefs + extended store/load classes
Adds finer-grained addressing-mode classification to every data xref row
plus new dispatch for instruction families not previously emitted:
- New `xrefs.addr_mode VARCHAR NULL` column. NULL for control-flow edges
  (call / ind_call / j / br); one of d_form / lis_addi / lis_ori /
  multiword / x_form_indexed / x_form_byterev / atomic / dcbz for data
  edges. Index idx_xrefs_addr_mode.
- New `xenia_analysis::xref::AddrMode` enum + Xref::addr_mode field.
- Opcode 46/47 (lmw/stmw) expand to one xref per slot — D-form multi-word
  load/store now resolves all (32-rS) consecutive addresses.
- Opcode 31 X-form dispatch — stwx/stbx/sthx/stwux/stbux/sthux/stdx/stdux,
  lwzx/lbzx/lhzx/lhax/lwzux/lbzux/lhzux/lhaux/ldx/ldux,
  stwcx./stdcx. (atomic),
  stwbrx/sthbrx/lwbrx/lhbrx (byte-reverse),
  dcbz (cache-line clear).
- X-form rows are emitted ONLY when both rA and rB resolve to known
  constants (rare but present); the dominant runtime-indexed pattern
  remains correctly skipped.

Sylpheed yield (regen on master + merge):
- 442 newly-detected x_form_indexed reads (lwzx/lhzx into static tables).
- 40 newly-detected atomic writes (stwcx./stdcx. with resolvable address).
- 28,834 lis_addi refs, 18,485 d_form reads, 3,288 d_form writes — every
  pre-existing data row now tagged.
- 0 multiword / dcbz / byterev (these instructions exist but aren't on
  lis+addi-tracked code paths).

Tests 633→636 (+3 xref unit tests covering AddrMode tag uniqueness,
data-edge addr_mode round-trip, control-edge None invariant). Schema
golden updated (xrefs gains addr_mode column).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 21:38:47 +02:00

295 lines
10 KiB
Rust

//! DB schema golden — locks the column layout (names + types) of every
//! table written by `DbWriter`. A schema change here without a fixture
//! update fails the test, forcing a conscious decision before downstream
//! query consumers break.
//!
//! The fixture is constructed in-process (no XEX/ISO needed): a small
//! synthetic PE-shaped byte slice with one `.text` section of 4
//! instructions, plus an empty import-library list and one detected
//! function.
use std::collections::{BTreeMap, HashMap};
use std::io::Write;
use duckdb::Connection;
use xenia_analysis::DbWriter;
use xenia_analysis::formatter::DisasmInfo;
use xenia_analysis::func::{FuncAnalysis, FuncInfo};
use xenia_analysis::xref::XrefMap;
use xenia_xex::pe::PeSection;
/// Build a 16-byte `.text` section: 4 instructions (mflr / nop / blr / nop).
fn synthetic_pe() -> (Vec<u8>, Vec<PeSection>, Vec<xenia_xex::header::ImportLibrary>) {
// VA layout: image_base + 0x1000 = .text start (so RVA = 0x1000).
// The DB writer expects pe[rva] to hold the byte at that RVA, so the
// buffer must be at least 0x1000 + section_size bytes long.
const RVA: usize = 0x1000;
const TEXT: [u32; 4] = [
// mfspr r12, LR (a.k.a. mflr r12) — opcode 31, xo 339, spr 8 (LR).
// Encoded with spr halves swapped per the ISA: spr_field = (8<<5).
(31u32 << 26) | (12 << 21) | ((8 << 5) << 11) | (339 << 1),
0x60000000, // nop (ori r0, r0, 0)
(19u32 << 26) | (20 << 21) | (16 << 1), // blr (bclr 20, 0)
0x60000000, // nop
];
let mut pe = vec![0u8; RVA + 16];
for (i, &word) in TEXT.iter().enumerate() {
pe[RVA + i * 4..RVA + i * 4 + 4].copy_from_slice(&word.to_be_bytes());
}
let sections = vec![PeSection {
name: ".text".to_string(),
virtual_address: 0x1000,
virtual_size: 16,
raw_offset: 0x1000,
raw_size: 16,
flags: 0x60000020, // CODE | EXECUTE | READ
}];
let import_libraries = vec![]; // No imports in the fixture.
(pe, sections, import_libraries)
}
fn synthetic_func_analysis(image_base: u32) -> FuncAnalysis {
// Single function covering all four .text instructions.
let entry = image_base + 0x1000;
let mut functions = BTreeMap::new();
functions.insert(
entry,
FuncInfo {
start: entry,
end: entry + 16,
frame_size: 0,
saved_gprs: 0,
is_leaf: true,
is_saverestore: false,
pdata_validated: false,
pdata_length: None,
},
);
FuncAnalysis {
functions,
save_gpr_base: None,
restore_gpr_base: None,
pdata_entries: Vec::new(),
}
}
#[test]
fn db_schema_matches_expected_columns() {
let (pe, sections, libs) = synthetic_pe();
let image_base = 0x82000000u32;
let entry = image_base + 0x1000;
let info = DisasmInfo {
image_base,
entry_point: entry,
original_pe_name: Some("synthetic.exe"),
title_id: Some(0xDEADBEEF),
media_id: Some(0xCAFEF00D),
sections: &sections,
import_libraries: &libs,
};
let func_analysis = synthetic_func_analysis(image_base);
let mut labels: HashMap<u32, String> = HashMap::new();
labels.insert(entry, "entry_point".to_string());
let xrefs: XrefMap = XrefMap::new();
let tmp = std::env::temp_dir().join("xenia_rs_schema_golden.duckdb");
let _ = std::fs::remove_file(&tmp);
{
let mut w = DbWriter::open_fresh(&tmp).expect("open fresh DB");
w.write_base(&info).expect("write_base");
w.ingest_instructions(&pe, &info, &func_analysis, &labels)
.expect("ingest_instructions");
w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs, &[], &[])
.expect("write_analysis_results");
w.create_sql_views().expect("create_sql_views");
}
let conn = Connection::open(&tmp).expect("reopen DB");
// Lock the column layout per table. Pairs are (name, type).
let expected: &[(&str, &[(&str, &str)])] = &[
("metadata", &[
("key", "VARCHAR"),
("value", "VARCHAR"),
]),
("sections", &[
("name", "VARCHAR"),
("virtual_address", "BIGINT"),
("virtual_size", "BIGINT"),
("raw_offset", "BIGINT"),
("raw_size", "BIGINT"),
("flags", "BIGINT"),
("is_code", "BOOLEAN"),
]),
("imports", &[
("library", "VARCHAR"),
("ordinal", "BIGINT"),
("name", "VARCHAR"),
("record_type", "BIGINT"),
("address", "BIGINT"),
]),
("instructions", &[
("address", "BIGINT"),
("raw", "BIGINT"),
("mnemonic", "VARCHAR"),
("operands", "VARCHAR"),
("disasm", "VARCHAR"),
("ext_mnemonic", "VARCHAR"),
("ext_operands", "VARCHAR"),
("ext_disasm", "VARCHAR"),
("target_hex", "BIGINT"),
("section", "VARCHAR"),
("function", "BIGINT"),
("label", "VARCHAR"),
]),
("functions", &[
("address", "BIGINT"),
("name", "VARCHAR"),
("end_address", "BIGINT"),
("frame_size", "BIGINT"),
("saved_gprs", "BIGINT"),
("is_leaf", "BOOLEAN"),
("is_saverestore", "BOOLEAN"),
("pdata_validated", "BOOLEAN"),
("pdata_length", "BIGINT"),
]),
("pdata_entries", &[
("begin_address", "BIGINT"),
("end_address", "BIGINT"),
("function_length", "BIGINT"),
("prolog_length", "BIGINT"),
("flags", "BIGINT"),
]),
("labels", &[
("address", "BIGINT"),
("name", "VARCHAR"),
("kind", "VARCHAR"),
]),
("demangled_names", &[
("address", "BIGINT"),
("mangled", "VARCHAR"),
("raw_demangled", "VARCHAR"),
("namespace_path", "VARCHAR"),
("class_name", "VARCHAR"),
("method_name", "VARCHAR"),
("params_signature", "VARCHAR"),
]),
("vtables", &[
("address", "BIGINT"),
("length", "BIGINT"),
("col_address", "BIGINT"),
("class_name", "VARCHAR"),
("rtti_present", "BOOLEAN"),
("base_classes_json", "VARCHAR"),
]),
("methods", &[
("vtable_address", "BIGINT"),
("slot", "BIGINT"),
("function_address", "BIGINT"),
("mangled_name", "VARCHAR"),
("demangled_name", "VARCHAR"),
]),
("classes", &[
("name", "VARCHAR"),
("vtable_address", "BIGINT"),
("rtti_present", "BOOLEAN"),
("base_classes_json", "VARCHAR"),
]),
("strings", &[
("address", "BIGINT"),
("encoding", "VARCHAR"),
("length", "BIGINT"),
("content", "VARCHAR"),
]),
("xrefs", &[
("source", "BIGINT"),
("target", "BIGINT"),
("kind", "VARCHAR"),
("addr_mode", "VARCHAR"),
("instruction", "VARCHAR"),
("source_func", "BIGINT"),
("source_label", "VARCHAR"),
("target_label", "VARCHAR"),
]),
];
let mut errs: Vec<String> = Vec::new();
for (table, cols) in expected {
let mut stmt = conn
.prepare(&format!("PRAGMA table_info('{}')", table))
.unwrap_or_else(|e| panic!("prepare PRAGMA for {table}: {e}"));
let rows: Vec<(String, String)> = stmt
.query_map([], |row| {
let name: String = row.get(1)?;
let ty: String = row.get(2)?;
Ok((name, ty))
})
.expect("query")
.map(|r| r.unwrap())
.collect();
if rows.len() != cols.len() {
writeln!(
std::io::stderr(),
"{table}: column count mismatch (got {}, expected {})",
rows.len(),
cols.len()
).ok();
errs.push(format!("{table}: count {} vs {}", rows.len(), cols.len()));
}
for (i, (got, expected_col)) in rows.iter().zip(cols.iter()).enumerate() {
if got.0 != expected_col.0 || got.1 != expected_col.1 {
errs.push(format!(
"{table} col {i}: got ({}, {}) expected ({}, {})",
got.0, got.1, expected_col.0, expected_col.1
));
}
}
}
assert!(errs.is_empty(), "schema drift detected:\n {}", errs.join("\n "));
// Verify row counts in the populated tables.
let n_instr: i64 = conn
.query_row("SELECT COUNT(*) FROM instructions", [], |r| r.get(0))
.unwrap();
assert_eq!(n_instr, 4, "expected 4 instruction rows from the synthetic PE");
// The synthetic mflr should produce target_hex = NULL, blr likewise (indirect).
let n_with_target: i64 = conn
.query_row("SELECT COUNT(target_hex) FROM instructions", [], |r| r.get(0))
.unwrap();
assert_eq!(n_with_target, 0, "indirect-only fixture should have no direct branch targets");
// SQL views must be queryable. The `_` in SQL LIKE is a single-char
// wildcard, so we list the names explicitly rather than `LIKE 'v_%'`
// (which also matches DuckDB's built-in `views` system view).
let expected_views = [
"v_branch_xrefs",
"v_call_graph",
"v_function_first_instruction",
"v_imports_called",
"v_indirect_reachability_from_entry",
"v_reachability_from_entry",
];
for v in expected_views {
let exists: i64 = conn
.query_row(
"SELECT COUNT(*) FROM duckdb_views() WHERE view_name = ?",
[v],
|r| r.get(0),
)
.unwrap();
assert_eq!(exists, 1, "missing SQL view: {v}");
}
let _ = std::fs::remove_file(&tmp);
}