M8+M9+M10+M11+M12: LOW-tier milestones — funcptr-arrays, EH flag, TLS, lr-trace

Five LOW-priority milestones bundled. Total ~700 LOC across 11 files.

## M9 — has_eh derived from pdata.flags exception bit
- New `functions.has_eh BOOLEAN NOT NULL` column. Derived from M1's
  already-parsed `pdata.flags` (bit 31 of the packed word — the
  exception-handler-present flag, distinct from bit 30 which is the
  always-1 32-bit-code flag). Index idx_functions_has_eh.
- Sylpheed: 2,975 of 23,073 pdata-validated functions have EH (12.9%).

## M10 — .tls section / IMAGE_TLS_DIRECTORY32 parser
- New `xenia_xex::tls::parse_tls` parses the directory + zero-terminated
  callback array. Returns None when the binary has no .tls section.
- New `tls_info` (singleton row) + `tls_callbacks(slot, address)` tables.
- New `DbWriter::write_tls()` no-ops on None.
- Sylpheed has no .tls section → 0 rows; infra ready for binaries with
  __declspec(thread).

## M8 + M11 — function_pointer_arrays (dispatch tables + static initialisers)
- New `xenia_analysis::funcptr_arrays::analyze` widens M3's vtable scan:
  detects runs of ≥2 function pointers in .rdata and classifies each as
  `vtable` (M3 re-emit), `dispatch_table` (M8), or `static_init` (M11)
  via a constructor-prologue heuristic (mfspr + small stwu).
- New tables `function_pointer_arrays(address PK, length, kind)` and
  `function_pointer_array_entries(array_address, slot, function_address)`.
- Sylpheed: 722 vtables + 388 dispatch_tables = 1,110 arrays / 6,347 slots.
  0 static_init detected (Sylpheed's ctors don't all match the
  conservative heuristic; M11.5 future work can chain via the entry-
  point's static-init driver).

## M12 — --lr-trace runtime canary-diff harness
- New CLI `exec --lr-trace=PC[,PC,...]` and `--lr-trace-out=PATH` flags.
  Symbolic resolution (Class::method, Class::*) via M4 lookup. Env vars
  XENIA_LR_TRACE / XENIA_LR_TRACE_OUT also work.
- New `KernelState::lr_trace_pcs` + `lr_trace_writer` + helper
  `fire_lr_trace_if_match(hw_id)` invoked from the per-instr probe slot.
- JSONL output: pc/tid/hw/cycle/r3/r4/r5/r6/lr — superset of what
  xenia-canary's --log_lr_on_pc patch emits, with a cycle counter for
  cross-run reproducibility. Diff-friendly via `jq`.
- Lockstep digest unaffected: smoke test on entry-point PC fires once
  with cycle=0/lr=BCBCBCBC/all-GPR-zero (correct initial state).

Tests 636→640 (+2 TLS tests, +2 funcptr_arrays tests). Schema golden
updated for new tables + has_eh column. Lockstep determinism preserved
(instructions=2000005 ×2 reruns identical).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-08 22:29:35 +02:00
parent 85d1603124
commit 5af792c9fc
11 changed files with 852 additions and 16 deletions

View File

@@ -306,7 +306,8 @@ impl DbWriter {
///
/// `vtables` is the M3 result; pass an empty slice when the caller has
/// not run the vtable scan (the tables are still created, just empty).
/// `strings` is the M7 result; same convention.
/// `strings` is the M7 result; same convention. `funcptr_arrays` is the
/// M8/M11 result.
#[tracing::instrument(skip_all, name = "db.write_analysis_results")]
pub fn write_analysis_results(
&mut self,
@@ -317,6 +318,7 @@ impl DbWriter {
xrefs: &XrefMap,
vtables: &[crate::vtables::Vtable],
strings: &[crate::strings::DetectedString],
funcptr_arrays: &[crate::funcptr_arrays::FuncPtrArray],
) -> anyhow::Result<()> {
self.conn.execute_batch("
CREATE TABLE functions (
@@ -328,7 +330,8 @@ impl DbWriter {
is_leaf BOOLEAN NOT NULL, -- true if the function has no outgoing calls
is_saverestore BOOLEAN NOT NULL, -- true if __savegprlr_* / __restgprlr_* stub
pdata_validated BOOLEAN NOT NULL, -- true if .pdata RUNTIME_FUNCTION exists at this VA
pdata_length BIGINT -- length in bytes per .pdata; NULL if no pdata entry
pdata_length BIGINT, -- length in bytes per .pdata; NULL if no pdata entry
has_eh BOOLEAN NOT NULL -- M9: pdata exception-flag bit set; function has C++ EH/SEH
);
CREATE TABLE pdata_entries (
@@ -377,6 +380,33 @@ impl DbWriter {
content VARCHAR NOT NULL -- UTF-8 representation of the string
);
CREATE TABLE tls_info (
raw_data_start BIGINT NOT NULL, -- VA of TLS template start
raw_data_end BIGINT NOT NULL, -- VA one-past-end of TLS template
index_address BIGINT NOT NULL, -- VA of u32 the loader writes the assigned slot index into
callback_array BIGINT NOT NULL, -- VA of zero-terminated callback array (0 if none)
zero_fill_size BIGINT NOT NULL, -- bytes of zero-fill appended after raw template
characteristics BIGINT NOT NULL -- IMAGE_TLS_DIRECTORY characteristics flags
);
CREATE TABLE tls_callbacks (
slot BIGINT PRIMARY KEY, -- 0-based index in the callback array
address BIGINT NOT NULL -- VA of callback function
);
CREATE TABLE function_pointer_arrays (
address BIGINT PRIMARY KEY, -- absolute VA of the array's first slot
length BIGINT NOT NULL, -- number of slots
kind VARCHAR NOT NULL -- 'vtable' (M3) | 'dispatch_table' (M8) | 'static_init' (M11)
);
CREATE TABLE function_pointer_array_entries (
array_address BIGINT NOT NULL, -- FK to function_pointer_arrays.address
slot BIGINT NOT NULL, -- 0-based slot index
function_address BIGINT NOT NULL, -- VA of the function this slot points at
PRIMARY KEY (array_address, slot)
);
CREATE TABLE demangled_names (
address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string)
mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ)
@@ -406,11 +436,13 @@ impl DbWriter {
insert_vtables(&self.conn, vtables, pe, info.image_base)?;
insert_methods_and_classes(&self.conn, vtables, labels)?;
insert_strings(&self.conn, strings)?;
insert_funcptr_arrays(&self.conn, funcptr_arrays)?;
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
let indices = [
("idx_functions_name", "CREATE INDEX idx_functions_name ON functions(name)"),
("idx_functions_pdata_validated", "CREATE INDEX idx_functions_pdata_validated ON functions(pdata_validated)"),
("idx_functions_has_eh", "CREATE INDEX idx_functions_has_eh ON functions(has_eh)"),
("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"),
("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"),
("idx_demangled_address", "CREATE INDEX idx_demangled_address ON demangled_names(address)"),
@@ -420,6 +452,8 @@ impl DbWriter {
("idx_classes_rtti", "CREATE INDEX idx_classes_rtti ON classes(rtti_present)"),
("idx_strings_encoding", "CREATE INDEX idx_strings_encoding ON strings(encoding)"),
("idx_xrefs_addr_mode", "CREATE INDEX idx_xrefs_addr_mode ON xrefs(addr_mode)"),
("idx_fparrays_kind", "CREATE INDEX idx_fparrays_kind ON function_pointer_arrays(kind)"),
("idx_fpentries_function", "CREATE INDEX idx_fpentries_function ON function_pointer_array_entries(function_address)"),
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
@@ -448,7 +482,39 @@ impl DbWriter {
xrefs: &XrefMap,
) -> anyhow::Result<()> {
self.ingest_instructions(pe, info, func_analysis, labels)?;
self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[], &[])?;
self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[], &[], &[])?;
Ok(())
}
/// M10 — write the parsed `.tls` directory + callback array. No-op
/// when `tls` is `None` (binary has no `.tls` section).
#[tracing::instrument(skip_all, name = "db.write_tls")]
pub fn write_tls(
&mut self,
tls: Option<&xenia_xex::tls::TlsInfo>,
) -> anyhow::Result<()> {
let Some(t) = tls else { return Ok(()); };
self.conn.execute(
"INSERT INTO tls_info (raw_data_start, raw_data_end, index_address,
callback_array, zero_fill_size, characteristics)
VALUES (?, ?, ?, ?, ?, ?)",
params![
t.raw_data_start as i64,
t.raw_data_end as i64,
t.index_address as i64,
t.callback_array as i64,
t.zero_fill_size as i64,
t.characteristics as i64,
],
)?;
let mut stmt = self.conn.prepare(
"INSERT INTO tls_callbacks (slot, address) VALUES (?, ?)"
)?;
for (i, cb) in t.callbacks.iter().enumerate() {
stmt.execute(params![i as i64, cb.address as i64])?;
}
metrics::counter!("db.rows", "table" => "tls_callbacks").increment(t.callbacks.len() as u64);
tracing::info!(rows = t.callbacks.len(), table = "tls_callbacks", "tls write complete");
Ok(())
}
@@ -755,8 +821,8 @@ fn insert_functions(
let mut stmt = conn.prepare(
"INSERT INTO functions
(address, name, end_address, frame_size, saved_gprs, is_leaf, is_saverestore,
pdata_validated, pdata_length)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
pdata_validated, pdata_length, has_eh)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
)?;
for (&addr, fi) in &func_analysis.functions {
let name = labels.get(&addr)
@@ -772,6 +838,7 @@ fn insert_functions(
fi.is_saverestore,
fi.pdata_validated,
fi.pdata_length.map(|n| n as i64),
fi.has_eh,
])?;
}
Ok(())
@@ -884,6 +951,37 @@ fn insert_strings(
Ok(())
}
fn insert_funcptr_arrays(
conn: &Connection,
arrays: &[crate::funcptr_arrays::FuncPtrArray],
) -> anyhow::Result<()> {
if arrays.is_empty() { return Ok(()); }
let mut stmt_arr = conn.prepare(
"INSERT INTO function_pointer_arrays (address, length, kind) VALUES (?, ?, ?)
ON CONFLICT DO NOTHING"
)?;
let mut stmt_ent = conn.prepare(
"INSERT INTO function_pointer_array_entries (array_address, slot, function_address)
VALUES (?, ?, ?) ON CONFLICT DO NOTHING"
)?;
let mut n_arr = 0u64;
let mut n_ent = 0u64;
for a in arrays {
let inserted = stmt_arr.execute(params![
a.address as i64, a.length as i64, a.kind,
])?;
if inserted > 0 { n_arr += 1; }
for (i, &fn_va) in a.entries.iter().enumerate() {
stmt_ent.execute(params![a.address as i64, i as i64, fn_va as i64])?;
n_ent += 1;
}
}
metrics::counter!("db.rows", "table" => "function_pointer_arrays").increment(n_arr);
metrics::counter!("db.rows", "table" => "function_pointer_array_entries").increment(n_ent);
tracing::info!(arrays = n_arr, entries = n_ent, "function-pointer arrays insert complete");
Ok(())
}
fn insert_demangled_from_labels(
conn: &Connection,
labels: &HashMap<u32, String>,