Closes the four remaining deferred follow-up items in one bundle. All four are smaller-scope and additive; lockstep determinism unaffected (analyzer-only changes). ## M9.5 — __CxxFrameHandler scope-table parsing - New `xenia_analysis::eh_scope` module. Magic-scans .rdata for the three documented MSVC FuncInfo signatures (0x19930520/21/22) on 4-byte alignment. Each match is parsed as the documented struct (BE u32 fields), with sanity caps on max_state / n_try_blocks / pointer validity. - Walks pUnwindMap (UnwindMapEntry, 8 bytes) and pTryBlockMap (TryBlockMapEntry, 20 bytes) into one row each. - New tables eh_funcinfo, eh_unwind_map, eh_try_blocks. - Sylpheed yield: 2,588 FuncInfo (all version 0x19930522) / 10,019 unwind entries / 315 try-blocks. ## M11.5 — Static-init driver chain detection - New `xenia_analysis::static_init` module. Walks every function looking for the canonical _initterm loop: lwz cursor; mtctr; bcctrl; addi cursor, cursor, 4 bounded by a compare against another constant register. Extracts (array_start, array_end) and reads the array. - Reuses `function_pointer_arrays` table — drivers' arrays land with kind='static_init' (replacing M11's prologue-heuristic output where the structurally-grounded pattern fires). - Sylpheed yield: 0 drivers detected — the binary's static-init structure does not match the canonical CRT loop. Infrastructure ready; future M11.6 can relax. ## VMX vector-store xrefs (M6 follow-up) - Adds AltiVec/VMX X-form load/store XOs to the M6 opcode-31 dispatch: lvx/lvxl/lvebx/lvehx/lvewx (reads) and stvx/stvxl/stvebx/stvehx/stvewx (writes), all addr_mode= 'x_form_indexed'. Static resolution still requires both rA and rB constant. - Sylpheed yield: 110 newly-detected stvx writes. ## Shift_JIS + UTF-8 localised-string detection (M7 follow-up) - Extends `xenia_analysis::strings::analyze` with scan_shift_jis (JIS X 0208 lead/trail byte ranges + half-width katakana pass-through) and scan_utf8 (2- and 3-byte sequences). At least one multi-byte unit required so pure-ASCII strings aren't double-counted. - SJIS bytes rendered as \xHH escapes for diagnostic readability; full SJIS→UTF-8 decoding deferred. - Sylpheed yield: 790 Shift_JIS strings (Japanese debug + UI text) + 39 UTF-8. ## Tests - +2 EH (parses_minimal_funcinfo_v0, rejects_bogus_max_state) - +2 static_init (detects_canonical_initterm_loop, rejects_function_without_pattern) - +2 strings (detects_shift_jis_string, detects_utf8_multibyte_string) Tests 649→655 (+6 unit tests). DB schema golden + write_analysis_results signature updated for new EH parameter. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
363 lines
12 KiB
Rust
363 lines
12 KiB
Rust
//! DB schema golden — locks the column layout (names + types) of every
|
|
//! table written by `DbWriter`. A schema change here without a fixture
|
|
//! update fails the test, forcing a conscious decision before downstream
|
|
//! query consumers break.
|
|
//!
|
|
//! The fixture is constructed in-process (no XEX/ISO needed): a small
|
|
//! synthetic PE-shaped byte slice with one `.text` section of 4
|
|
//! instructions, plus an empty import-library list and one detected
|
|
//! function.
|
|
|
|
use std::collections::{BTreeMap, HashMap};
|
|
use std::io::Write;
|
|
|
|
use duckdb::Connection;
|
|
|
|
use xenia_analysis::DbWriter;
|
|
use xenia_analysis::formatter::DisasmInfo;
|
|
use xenia_analysis::func::{FuncAnalysis, FuncInfo};
|
|
use xenia_analysis::xref::XrefMap;
|
|
use xenia_xex::pe::PeSection;
|
|
|
|
/// Build a 16-byte `.text` section: 4 instructions (mflr / nop / blr / nop).
|
|
fn synthetic_pe() -> (Vec<u8>, Vec<PeSection>, Vec<xenia_xex::header::ImportLibrary>) {
|
|
// VA layout: image_base + 0x1000 = .text start (so RVA = 0x1000).
|
|
// The DB writer expects pe[rva] to hold the byte at that RVA, so the
|
|
// buffer must be at least 0x1000 + section_size bytes long.
|
|
const RVA: usize = 0x1000;
|
|
const TEXT: [u32; 4] = [
|
|
// mfspr r12, LR (a.k.a. mflr r12) — opcode 31, xo 339, spr 8 (LR).
|
|
// Encoded with spr halves swapped per the ISA: spr_field = (8<<5).
|
|
(31u32 << 26) | (12 << 21) | ((8 << 5) << 11) | (339 << 1),
|
|
0x60000000, // nop (ori r0, r0, 0)
|
|
(19u32 << 26) | (20 << 21) | (16 << 1), // blr (bclr 20, 0)
|
|
0x60000000, // nop
|
|
];
|
|
|
|
let mut pe = vec![0u8; RVA + 16];
|
|
for (i, &word) in TEXT.iter().enumerate() {
|
|
pe[RVA + i * 4..RVA + i * 4 + 4].copy_from_slice(&word.to_be_bytes());
|
|
}
|
|
|
|
let sections = vec![PeSection {
|
|
name: ".text".to_string(),
|
|
virtual_address: 0x1000,
|
|
virtual_size: 16,
|
|
raw_offset: 0x1000,
|
|
raw_size: 16,
|
|
flags: 0x60000020, // CODE | EXECUTE | READ
|
|
}];
|
|
|
|
let import_libraries = vec![]; // No imports in the fixture.
|
|
(pe, sections, import_libraries)
|
|
}
|
|
|
|
fn synthetic_func_analysis(image_base: u32) -> FuncAnalysis {
|
|
// Single function covering all four .text instructions.
|
|
let entry = image_base + 0x1000;
|
|
let mut functions = BTreeMap::new();
|
|
functions.insert(
|
|
entry,
|
|
FuncInfo {
|
|
start: entry,
|
|
end: entry + 16,
|
|
frame_size: 0,
|
|
saved_gprs: 0,
|
|
is_leaf: true,
|
|
is_saverestore: false,
|
|
pdata_validated: false,
|
|
pdata_length: None,
|
|
has_eh: false,
|
|
},
|
|
);
|
|
FuncAnalysis {
|
|
functions,
|
|
save_gpr_base: None,
|
|
restore_gpr_base: None,
|
|
pdata_entries: Vec::new(),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn db_schema_matches_expected_columns() {
|
|
let (pe, sections, libs) = synthetic_pe();
|
|
let image_base = 0x82000000u32;
|
|
let entry = image_base + 0x1000;
|
|
|
|
let info = DisasmInfo {
|
|
image_base,
|
|
entry_point: entry,
|
|
original_pe_name: Some("synthetic.exe"),
|
|
title_id: Some(0xDEADBEEF),
|
|
media_id: Some(0xCAFEF00D),
|
|
sections: §ions,
|
|
import_libraries: &libs,
|
|
};
|
|
|
|
let func_analysis = synthetic_func_analysis(image_base);
|
|
let mut labels: HashMap<u32, String> = HashMap::new();
|
|
labels.insert(entry, "entry_point".to_string());
|
|
let xrefs: XrefMap = XrefMap::new();
|
|
|
|
let tmp = std::env::temp_dir().join("xenia_rs_schema_golden.duckdb");
|
|
let _ = std::fs::remove_file(&tmp);
|
|
|
|
{
|
|
let mut w = DbWriter::open_fresh(&tmp).expect("open fresh DB");
|
|
w.write_base(&info).expect("write_base");
|
|
w.ingest_instructions(&pe, &info, &func_analysis, &labels)
|
|
.expect("ingest_instructions");
|
|
w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs, &[], &[], &[], None, &[])
|
|
.expect("write_analysis_results");
|
|
w.create_sql_views().expect("create_sql_views");
|
|
}
|
|
|
|
let conn = Connection::open(&tmp).expect("reopen DB");
|
|
|
|
// Lock the column layout per table. Pairs are (name, type).
|
|
let expected: &[(&str, &[(&str, &str)])] = &[
|
|
("metadata", &[
|
|
("key", "VARCHAR"),
|
|
("value", "VARCHAR"),
|
|
]),
|
|
("sections", &[
|
|
("name", "VARCHAR"),
|
|
("virtual_address", "BIGINT"),
|
|
("virtual_size", "BIGINT"),
|
|
("raw_offset", "BIGINT"),
|
|
("raw_size", "BIGINT"),
|
|
("flags", "BIGINT"),
|
|
("is_code", "BOOLEAN"),
|
|
]),
|
|
("imports", &[
|
|
("library", "VARCHAR"),
|
|
("ordinal", "BIGINT"),
|
|
("name", "VARCHAR"),
|
|
("record_type", "BIGINT"),
|
|
("address", "BIGINT"),
|
|
]),
|
|
("instructions", &[
|
|
("address", "BIGINT"),
|
|
("raw", "BIGINT"),
|
|
("mnemonic", "VARCHAR"),
|
|
("operands", "VARCHAR"),
|
|
("disasm", "VARCHAR"),
|
|
("ext_mnemonic", "VARCHAR"),
|
|
("ext_operands", "VARCHAR"),
|
|
("ext_disasm", "VARCHAR"),
|
|
("target_hex", "BIGINT"),
|
|
("section", "VARCHAR"),
|
|
("function", "BIGINT"),
|
|
("label", "VARCHAR"),
|
|
]),
|
|
("functions", &[
|
|
("address", "BIGINT"),
|
|
("name", "VARCHAR"),
|
|
("end_address", "BIGINT"),
|
|
("frame_size", "BIGINT"),
|
|
("saved_gprs", "BIGINT"),
|
|
("is_leaf", "BOOLEAN"),
|
|
("is_saverestore", "BOOLEAN"),
|
|
("pdata_validated", "BOOLEAN"),
|
|
("pdata_length", "BIGINT"),
|
|
("has_eh", "BOOLEAN"),
|
|
]),
|
|
("pdata_entries", &[
|
|
("begin_address", "BIGINT"),
|
|
("end_address", "BIGINT"),
|
|
("function_length", "BIGINT"),
|
|
("prolog_length", "BIGINT"),
|
|
("flags", "BIGINT"),
|
|
]),
|
|
("labels", &[
|
|
("address", "BIGINT"),
|
|
("name", "VARCHAR"),
|
|
("kind", "VARCHAR"),
|
|
]),
|
|
("demangled_names", &[
|
|
("address", "BIGINT"),
|
|
("mangled", "VARCHAR"),
|
|
("raw_demangled", "VARCHAR"),
|
|
("namespace_path", "VARCHAR"),
|
|
("class_name", "VARCHAR"),
|
|
("method_name", "VARCHAR"),
|
|
("params_signature", "VARCHAR"),
|
|
]),
|
|
("vtables", &[
|
|
("address", "BIGINT"),
|
|
("length", "BIGINT"),
|
|
("col_address", "BIGINT"),
|
|
("class_name", "VARCHAR"),
|
|
("rtti_present", "BOOLEAN"),
|
|
("base_classes_json", "VARCHAR"),
|
|
]),
|
|
("methods", &[
|
|
("vtable_address", "BIGINT"),
|
|
("slot", "BIGINT"),
|
|
("function_address", "BIGINT"),
|
|
("mangled_name", "VARCHAR"),
|
|
("demangled_name", "VARCHAR"),
|
|
]),
|
|
("classes", &[
|
|
("name", "VARCHAR"),
|
|
("vtable_address", "BIGINT"),
|
|
("rtti_present", "BOOLEAN"),
|
|
("base_classes_json", "VARCHAR"),
|
|
]),
|
|
("strings", &[
|
|
("address", "BIGINT"),
|
|
("encoding", "VARCHAR"),
|
|
("length", "BIGINT"),
|
|
("content", "VARCHAR"),
|
|
]),
|
|
("tls_info", &[
|
|
("raw_data_start", "BIGINT"),
|
|
("raw_data_end", "BIGINT"),
|
|
("index_address", "BIGINT"),
|
|
("callback_array", "BIGINT"),
|
|
("zero_fill_size", "BIGINT"),
|
|
("characteristics", "BIGINT"),
|
|
]),
|
|
("tls_callbacks", &[
|
|
("slot", "BIGINT"),
|
|
("address", "BIGINT"),
|
|
]),
|
|
("function_pointer_arrays", &[
|
|
("address", "BIGINT"),
|
|
("length", "BIGINT"),
|
|
("kind", "VARCHAR"),
|
|
]),
|
|
("function_pointer_array_entries", &[
|
|
("array_address", "BIGINT"),
|
|
("slot", "BIGINT"),
|
|
("function_address", "BIGINT"),
|
|
]),
|
|
("indirect_dispatch_sites", &[
|
|
("dispatch_pc", "BIGINT"),
|
|
("vptr_offset", "BIGINT"),
|
|
("slot", "BIGINT"),
|
|
("candidate_count", "BIGINT"),
|
|
]),
|
|
("indirect_dispatch_candidates", &[
|
|
("dispatch_pc", "BIGINT"),
|
|
("vtable_address", "BIGINT"),
|
|
("method_address", "BIGINT"),
|
|
]),
|
|
("vptr_writes", &[
|
|
("writer_pc", "BIGINT"),
|
|
("vtable_address", "BIGINT"),
|
|
("vptr_offset", "BIGINT"),
|
|
("writer_function", "BIGINT"),
|
|
]),
|
|
("eh_funcinfo", &[
|
|
("address", "BIGINT"),
|
|
("magic", "BIGINT"),
|
|
("max_state", "BIGINT"),
|
|
("p_unwind_map", "BIGINT"),
|
|
("n_try_blocks", "BIGINT"),
|
|
("p_try_block_map", "BIGINT"),
|
|
("n_ip_map_entries", "BIGINT"),
|
|
("p_ip_to_state_map", "BIGINT"),
|
|
("p_es_type_list", "BIGINT"),
|
|
("eh_flags", "BIGINT"),
|
|
]),
|
|
("eh_unwind_map", &[
|
|
("funcinfo_address", "BIGINT"),
|
|
("state_index", "BIGINT"),
|
|
("to_state", "BIGINT"),
|
|
("action_pc", "BIGINT"),
|
|
]),
|
|
("eh_try_blocks", &[
|
|
("funcinfo_address", "BIGINT"),
|
|
("try_index", "BIGINT"),
|
|
("try_low", "BIGINT"),
|
|
("try_high", "BIGINT"),
|
|
("catch_high", "BIGINT"),
|
|
("n_catches", "BIGINT"),
|
|
("p_handler_array", "BIGINT"),
|
|
]),
|
|
("xrefs", &[
|
|
("source", "BIGINT"),
|
|
("target", "BIGINT"),
|
|
("kind", "VARCHAR"),
|
|
("addr_mode", "VARCHAR"),
|
|
("instruction", "VARCHAR"),
|
|
("source_func", "BIGINT"),
|
|
("source_label", "VARCHAR"),
|
|
("target_label", "VARCHAR"),
|
|
]),
|
|
];
|
|
|
|
let mut errs: Vec<String> = Vec::new();
|
|
for (table, cols) in expected {
|
|
let mut stmt = conn
|
|
.prepare(&format!("PRAGMA table_info('{}')", table))
|
|
.unwrap_or_else(|e| panic!("prepare PRAGMA for {table}: {e}"));
|
|
let rows: Vec<(String, String)> = stmt
|
|
.query_map([], |row| {
|
|
let name: String = row.get(1)?;
|
|
let ty: String = row.get(2)?;
|
|
Ok((name, ty))
|
|
})
|
|
.expect("query")
|
|
.map(|r| r.unwrap())
|
|
.collect();
|
|
|
|
if rows.len() != cols.len() {
|
|
writeln!(
|
|
std::io::stderr(),
|
|
"{table}: column count mismatch (got {}, expected {})",
|
|
rows.len(),
|
|
cols.len()
|
|
).ok();
|
|
errs.push(format!("{table}: count {} vs {}", rows.len(), cols.len()));
|
|
}
|
|
for (i, (got, expected_col)) in rows.iter().zip(cols.iter()).enumerate() {
|
|
if got.0 != expected_col.0 || got.1 != expected_col.1 {
|
|
errs.push(format!(
|
|
"{table} col {i}: got ({}, {}) expected ({}, {})",
|
|
got.0, got.1, expected_col.0, expected_col.1
|
|
));
|
|
}
|
|
}
|
|
}
|
|
|
|
assert!(errs.is_empty(), "schema drift detected:\n {}", errs.join("\n "));
|
|
|
|
// Verify row counts in the populated tables.
|
|
let n_instr: i64 = conn
|
|
.query_row("SELECT COUNT(*) FROM instructions", [], |r| r.get(0))
|
|
.unwrap();
|
|
assert_eq!(n_instr, 4, "expected 4 instruction rows from the synthetic PE");
|
|
|
|
// The synthetic mflr should produce target_hex = NULL, blr likewise (indirect).
|
|
let n_with_target: i64 = conn
|
|
.query_row("SELECT COUNT(target_hex) FROM instructions", [], |r| r.get(0))
|
|
.unwrap();
|
|
assert_eq!(n_with_target, 0, "indirect-only fixture should have no direct branch targets");
|
|
|
|
// SQL views must be queryable. The `_` in SQL LIKE is a single-char
|
|
// wildcard, so we list the names explicitly rather than `LIKE 'v_%'`
|
|
// (which also matches DuckDB's built-in `views` system view).
|
|
let expected_views = [
|
|
"v_branch_xrefs",
|
|
"v_call_graph",
|
|
"v_function_first_instruction",
|
|
"v_imports_called",
|
|
"v_indirect_reachability_from_entry",
|
|
"v_reachability_from_entry",
|
|
];
|
|
for v in expected_views {
|
|
let exists: i64 = conn
|
|
.query_row(
|
|
"SELECT COUNT(*) FROM duckdb_views() WHERE view_name = ?",
|
|
[v],
|
|
|r| r.get(0),
|
|
)
|
|
.unwrap();
|
|
assert_eq!(exists, 1, "missing SQL view: {v}");
|
|
}
|
|
|
|
let _ = std::fs::remove_file(&tmp);
|
|
}
|