M8+M9+M10+M11+M12: LOW-tier milestones — funcptr-arrays, EH flag, TLS, lr-trace

Five LOW-priority milestones bundled. Total ~700 LOC across 11 files.

## M9 — has_eh derived from pdata.flags exception bit
- New `functions.has_eh BOOLEAN NOT NULL` column. Derived from M1's
  already-parsed `pdata.flags` (bit 31 of the packed word — the
  exception-handler-present flag, distinct from bit 30 which is the
  always-1 32-bit-code flag). Index idx_functions_has_eh.
- Sylpheed: 2,975 of 23,073 pdata-validated functions have EH (12.9%).

## M10 — .tls section / IMAGE_TLS_DIRECTORY32 parser
- New `xenia_xex::tls::parse_tls` parses the directory + zero-terminated
  callback array. Returns None when the binary has no .tls section.
- New `tls_info` (singleton row) + `tls_callbacks(slot, address)` tables.
- New `DbWriter::write_tls()` no-ops on None.
- Sylpheed has no .tls section → 0 rows; infra ready for binaries with
  __declspec(thread).

## M8 + M11 — function_pointer_arrays (dispatch tables + static initialisers)
- New `xenia_analysis::funcptr_arrays::analyze` widens M3's vtable scan:
  detects runs of ≥2 function pointers in .rdata and classifies each as
  `vtable` (M3 re-emit), `dispatch_table` (M8), or `static_init` (M11)
  via a constructor-prologue heuristic (mfspr + small stwu).
- New tables `function_pointer_arrays(address PK, length, kind)` and
  `function_pointer_array_entries(array_address, slot, function_address)`.
- Sylpheed: 722 vtables + 388 dispatch_tables = 1,110 arrays / 6,347 slots.
  0 static_init detected (Sylpheed's ctors don't all match the
  conservative heuristic; M11.5 future work can chain via the entry-
  point's static-init driver).

## M12 — --lr-trace runtime canary-diff harness
- New CLI `exec --lr-trace=PC[,PC,...]` and `--lr-trace-out=PATH` flags.
  Symbolic resolution (Class::method, Class::*) via M4 lookup. Env vars
  XENIA_LR_TRACE / XENIA_LR_TRACE_OUT also work.
- New `KernelState::lr_trace_pcs` + `lr_trace_writer` + helper
  `fire_lr_trace_if_match(hw_id)` invoked from the per-instr probe slot.
- JSONL output: pc/tid/hw/cycle/r3/r4/r5/r6/lr — superset of what
  xenia-canary's --log_lr_on_pc patch emits, with a cycle counter for
  cross-run reproducibility. Diff-friendly via `jq`.
- Lockstep digest unaffected: smoke test on entry-point PC fires once
  with cycle=0/lr=BCBCBCBC/all-GPR-zero (correct initial state).

Tests 636→640 (+2 TLS tests, +2 funcptr_arrays tests). Schema golden
updated for new tables + has_eh column. Lockstep determinism preserved
(instructions=2000005 ×2 reruns identical).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-08 22:29:35 +02:00
parent 85d1603124
commit 5af792c9fc
11 changed files with 852 additions and 16 deletions

View File

@@ -230,6 +230,18 @@ enum Commands {
/// Default: `sylpheed.db` next to the .iso file when present.
#[arg(long)]
probe_db: Option<String>,
/// M12 — comma-separated PCs to capture as JSONL records on every
/// fire. Designed to diff against xenia-canary's `--log_lr_on_pc`
/// patch. Each record carries pc/tid/hw/cycle/r3/r4/r5/r6/lr.
/// Symbolic resolution (`Class::method`) is supported via M4 and
/// reads `--probe-db`. Settable via `XENIA_LR_TRACE`.
/// Read-only; lockstep digest unaffected.
#[arg(long)]
lr_trace: Option<String>,
/// M12 — write `--lr-trace` JSONL to this file (one record per
/// line). Stdout when omitted.
#[arg(long)]
lr_trace_out: Option<String>,
},
/// Browse XISO disc image contents
Browse {
@@ -391,6 +403,8 @@ fn main() -> Result<()> {
mem_watch,
dump_section,
probe_db,
lr_trace,
lr_trace_out,
} => cmd_exec(
&path,
max_instructions,
@@ -415,6 +429,8 @@ fn main() -> Result<()> {
mem_watch.as_deref(),
dump_section.as_deref(),
probe_db.as_deref(),
lr_trace.as_deref(),
lr_trace_out.as_deref(),
),
Commands::Browse { path } => cmd_browse(&path),
Commands::Info { path } => cmd_info(&path),
@@ -644,6 +660,8 @@ fn cmd_exec(
mem_watch: Option<&str>,
dump_section: Option<&str>,
probe_db: Option<&str>,
lr_trace: Option<&str>,
lr_trace_out: Option<&str>,
) -> Result<()> {
cmd_exec_inner(
path,
@@ -669,6 +687,8 @@ fn cmd_exec(
mem_watch,
dump_section,
probe_db,
lr_trace,
lr_trace_out,
None,
None,
false,
@@ -713,6 +733,8 @@ fn cmd_check(
None, // mem_watch — same
None, // dump_section — same
None, // probe_db — same
None, // lr_trace — same
None, // lr_trace_out — same
out,
expect,
stable_digest,
@@ -743,6 +765,8 @@ fn cmd_exec_inner(
mem_watch: Option<&str>,
dump_section: Option<&str>,
probe_db: Option<&str>,
lr_trace: Option<&str>,
lr_trace_out: Option<&str>,
digest_out: Option<&str>,
digest_expect: Option<&str>,
stable_digest: bool,
@@ -1080,6 +1104,50 @@ fn cmd_exec_inner(
}
}
// M12 — LR trace (canary-diff). Same token grammar as --pc-probe;
// optional `--lr-trace-out=PATH` redirects JSONL to a file.
let lr_trace_combined: Option<String> = match (
lr_trace, std::env::var("XENIA_LR_TRACE").ok(),
) {
(Some(s), _) => Some(s.to_string()),
(None, Some(s)) if !s.is_empty() => Some(s),
_ => None,
};
if let Some(list) = lr_trace_combined {
for token in list.split(',').map(str::trim).filter(|s| !s.is_empty()) {
let pcs = xenia_analysis::lookup::resolve_probe_token(probe_db_path.as_deref(), token)
.map_err(|e| anyhow::anyhow!("--lr-trace {token:?}: {e}"))?;
for pc in pcs {
kernel.lr_trace_pcs.insert(pc);
}
}
// Open the writer if --lr-trace-out is set.
let out_combined: Option<String> = match (
lr_trace_out.map(|s| s.to_string()),
std::env::var("XENIA_LR_TRACE_OUT").ok(),
) {
(Some(s), _) => Some(s),
(None, Some(s)) if !s.is_empty() => Some(s),
_ => None,
};
if let Some(p) = out_combined {
let f = std::fs::File::create(&p)
.map_err(|e| anyhow::anyhow!("--lr-trace-out {p:?}: {e}"))?;
kernel.lr_trace_writer = Some(std::sync::Mutex::new(f));
}
if !quiet && !kernel.lr_trace_pcs.is_empty() {
let mut pcs: Vec<u32> = kernel.lr_trace_pcs.iter().copied().collect();
pcs.sort_unstable();
let strs: Vec<String> = pcs.iter().map(|p| format!("{p:#010x}")).collect();
tracing::info!(
"lr-trace armed: {} ({}); sink={}",
kernel.lr_trace_pcs.len(),
strs.join(", "),
if kernel.lr_trace_writer.is_some() { "file" } else { "stdout" },
);
}
}
// Diagnostic. Parse `--dump-addr=0x828F3D08,...` (or
// `XENIA_DUMP_ADDR=...`) into `kernel.dump_addrs`. The contents
// are dumped at end-of-run by `dump_thread_diagnostic`. Pure
@@ -2131,6 +2199,7 @@ fn worker_prologue(
// the helper, no overhead on the hot path.
kernel.fire_ctor_probe_if_match(hw_id, mem);
kernel.fire_branch_probe_if_match(hw_id);
kernel.fire_lr_trace_if_match(hw_id);
if mem.has_mem_watch() {
let ctx = kernel.scheduler.ctx(hw_id);
@@ -4129,6 +4198,26 @@ fn cmd_dis(
let strings = xenia_analysis::strings::analyze(&pe_image, base, &sections);
info!(strings = strings.len(), "string scan complete");
// .tls directory parse (M10). None for binaries without a .tls section.
let tls_info = xenia_xex::tls::parse_tls(&pe_image, base, &sections);
if let Some(ref t) = tls_info {
info!(callbacks = t.callbacks.len(), "tls directory parsed");
} else {
info!("no .tls section present");
}
// Generic function-pointer-array scan (M8 + M11). Re-emits M3 vtables
// plus dispatch tables and static-init tables in `.rdata`.
let fparrays = xenia_analysis::funcptr_arrays::analyze(
&pe_image, base, &sections, &function_starts, &vtables,
);
info!(
funcptr_arrays = fparrays.len(),
dispatch_tables = fparrays.iter().filter(|a| a.kind == "dispatch_table").count(),
static_inits = fparrays.iter().filter(|a| a.kind == "static_init").count(),
"function-pointer array scan complete",
);
// Build DisasmInfo
let disasm_info = xenia_analysis::formatter::DisasmInfo {
image_base: base,
@@ -4154,7 +4243,9 @@ fn cmd_dis(
&xref_result.xrefs,
&vtables,
&strings,
&fparrays,
)?;
w.write_tls(tls_info.as_ref())?;
if matches!(analyze, AnalyzeMode::Sql | AnalyzeMode::Both) {
w.create_sql_views()?;
info!(db = %db, "SQL views created");