xenia-analysis: unify disasm via xenia-cpu, split ingest/analyze, add sinks

The old src/ppc.rs that re-implemented PPC formatting collapses into
a 30-line shim that delegates to xenia-cpu's single-source-of-truth
disasm. A new disasm.rs wraps the shared iterator and feeds enriched
items (analysis context: function membership, xrefs, mnemonics) into
pluggable sinks.

Sinks split: text.rs (objdump-like output), json.rs (JSONL stream
matching the new xenia dis --json mode), duckdb.rs (the analysis DB
ingest). db.rs is restructured into ingest_instructions +
write_analysis_results so a run can stop after raw ingest, and a new
target_hex column lands on the instructions table. sql_views.rs adds
five additive views layered on top of the raw tables.

Tests: assert-based JSON-fixture goldens (disasm_goldens) and a
PRAGMA-table_info schema golden (db_schema_golden) covering all
ingested tables and the SQL views.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-01 16:28:06 +02:00
parent c36cca14f9
commit 45e15d7885
15 changed files with 1194 additions and 1757 deletions

View File

@@ -1,4 +1,4 @@
//! SQLite database writer for xenia-rs.
//! DuckDB writer for xenia-rs.
//!
//! Layered, streaming writes shared by `extract`, `dis`, and `exec`.
//! Each command's output is a superset of the previous:
@@ -6,19 +6,119 @@
//! - `dis --db` -> base + disasm tables (functions, labels, instructions, xrefs)
//! - `exec --db` -> base + disasm + opt-in trace tables (exec_trace, import_calls, branch_trace)
//!
//! Performance: streaming commits every 100k rows, no end-of-run ANALYZE,
//! progress messages before each index build.
//! Bulk inserts use the DuckDB Appender API, which bypasses the SQL layer and
//! writes directly to columnar storage — no transaction batching required.
//!
//! Trace kind values for `branch_trace.kind`:
//! - "call" : any branch with LK set (raw & 1 == 1)
//! - "return" : bclrx without LK
//! - "jump" : bcctrx without LK
//! - "branch" : bx/bcx without LK
//! - `"call"` : any branch with LK set (raw & 1 == 1)
//! - `"return"` : bclrx without LK
//! - `"jump"` : bcctrx without LK
//! - `"branch"` : bx/bcx without LK
//!
//! # Schema
//!
//! ## `metadata`
//! Key-value table. One row per XEX header field. Values are strings.
//!
//! | key | value format | meaning |
//! |--------------------|------------------|----------------------------------------------------|
//! | `image_base` | `"0xXXXXXXXX"` | Virtual address where the PE image is mapped |
//! | `entry_point` | `"0xXXXXXXXX"` | Absolute VA of the XEX entry point |
//! | `original_pe_name` | string | Original PE filename from XEX optional headers |
//! | `title_id` | `"0xXXXXXXXX"` | Xbox 360 Title ID (identifies the game) |
//! | `media_id` | `"0xXXXXXXXX"` | Disc/media ID (identifies the specific disc build) |
//!
//! ## `sections`
//! One row per PE section (`.text`, `.data`, etc.).
//! - `name` — PE section name
//! - `virtual_address` — RVA relative to `image_base` where the section is mapped in memory
//! - `virtual_size` — Size in memory; may exceed `raw_size` due to BSS zero-fill
//! - `raw_offset` — Byte offset of section data within the XEX/PE file
//! - `raw_size` — Size of section data on disk
//! - `flags` — `IMAGE_SCN_*` characteristics bit field
//! - `is_code` — `true` if `IMAGE_SCN_CNT_CODE` is set
//!
//! ## `imports`
//! One row per import record from the XEX import descriptor table.
//! - `library` — Module name (e.g. `xboxkrnl.exe`, `xam.xex`)
//! - `ordinal` — Numeric ordinal identifying the export within the library
//! - `name` — Resolved human-readable symbol name; `NULL` if not in symbol table
//! - `record_type` — XEX import record type: `0` = function thunk, `1` = variable
//! - `address` — Absolute VA of the import thunk or variable in the binary
//!
//! ## `functions`
//! One row per detected function (from prologue analysis).
//! - `address` — Absolute VA of the function entry point (PK)
//! - `name` — Symbol name, or `sub_XXXXXXXX` if unresolved
//! - `end_address` — Absolute VA of last instruction + 4 (exclusive end)
//! - `frame_size` — Stack frame size in bytes (from prologue)
//! - `saved_gprs` — Bitmask of GPRs saved in prologue (bit N set ⇒ rN is saved)
//! - `is_leaf` — `true` if the function has no outgoing calls (no `bl`/`blr`)
//! - `is_saverestore` — `true` if this is a `__savegprlr_*`/`__restgprlr_*` compiler stub
//!
//! ## `labels`
//! One row per named address; superset of functions.
//! - `address` — Absolute VA (PK)
//! - `name` — Symbol name
//! - `kind` — One of: `function`, `import`, `saverestore`, `local`, `data`, `other`
//!
//! ## `instructions`
//! One row per disassembled instruction.
//! - `address` — Absolute VA (PK)
//! - `raw` — 4-byte big-endian instruction word as integer
//! - `mnemonic` — Base mnemonic (e.g. `stw`, `bl`, `cmpwi`)
//! - `operands` — Operand string from base disassembly
//! - `disasm` — Full base disassembly string (`mnemonic + " " + operands`)
//! - `ext_mnemonic` — Simplified mnemonic (e.g. `mr` for `or rX,rY,rY`); `NULL` if none
//! - `ext_operands` — Operands for the extended form; `NULL` if none
//! - `ext_disasm` — Full extended disassembly string; `NULL` if none
//! - `target_hex` — Resolved absolute branch target for `b`/`bc` (and link/AA variants); `NULL` for indirect or non-branch instructions. SQL views (`v_branch_xrefs`) self-join on this column.
//! - `section` — Name of the PE section containing this instruction
//! - `function` — VA of the enclosing function; `NULL` if not inside a detected function
//! - `label` — Label name at this address; `NULL` if none
//!
//! ## `xrefs`
//! One row per cross-reference edge (call, jump, data access).
//! - `source` — Absolute VA of the instruction making the reference
//! - `target` — Absolute VA of the referenced destination
//! - `kind` — Reference type as the short tag from [`crate::xref::XrefKind::tag`]:
//! `call`, `j` (jump), `br` (branch), `read` (data_read),
//! `write` (data_write), `ref` (data_ref).
//! Note: this is a different convention from `branch_trace.kind`,
//! which uses the long names (`call` / `return` / `jump` / `branch`).
//! - `instruction` — Mnemonic of the source instruction; `NULL` if address is not in binary
//! - `source_func` — VA of the function containing `source`; `NULL` if unknown
//! - `source_label` — Label at `source`; `NULL` if none
//! - `target_label` — Label at `target`; `NULL` if none
//!
//! ## `exec_trace` *(opt-in: `--trace-instructions`)*
//! One row per executed instruction.
//! - `address` — Absolute VA of the instruction
//! - `cycle` — Monotonic instruction counter (execution order)
//! - `r3`, `r4`, `lr`, `sp` — Snapshot of key GPRs at time of execution
//!
//! ## `import_calls` *(opt-in: `--trace-imports`)*
//! One row per intercepted kernel/import call.
//! - `address` — VA of the import thunk
//! - `cycle` — Instruction counter at point of interception
//! - `module` — Library name (e.g. `xboxkrnl.exe`)
//! - `ordinal` — Numeric ordinal within the module
//! - `name` — Resolved symbol name
//! - `arg_r3``arg_r6` — First four call arguments (PowerPC ABI: r3r6)
//! - `return_value` — Value in r3 after the call returns
//!
//! ## `branch_trace` *(opt-in: `--trace-branches`)*
//! One row per taken branch.
//! - `cycle` — Instruction counter
//! - `source` — VA of the branch instruction
//! - `target` — VA of the branch destination
//! - `kind` — `call`, `return`, `jump`, or `branch` (see top-level doc)
//! - `lr` — Link register value at time of branch
use std::collections::HashMap;
use std::path::Path;
use rusqlite::{Connection, params};
use duckdb::{Connection, params};
use crate::func::FuncAnalysis;
use crate::xref::{XrefMap, resolve_source_label};
@@ -26,12 +126,9 @@ use crate::formatter::DisasmInfo;
const DEFAULT_BATCH_SIZE: u64 = 100_000;
/// Number of rows per DB commit / trace buffer flush.
/// Configurable via the `XENIA_DB_BATCH_SIZE` env var (default 100_000).
/// Used for:
/// - `instructions` and `xrefs` streaming commits in `write_disasm`
/// - `exec_trace` and `branch_trace` buffer thresholds during exec
/// (`import_calls` always flushes at 1000 — low volume, not worth scaling.)
/// Rows per trace buffer flush. Configurable via `XENIA_DB_BATCH_SIZE` env var (default 100_000).
/// Applies to `exec_trace` and `branch_trace` buffer thresholds.
/// `import_calls` always flushes at 1000 — low volume, not worth scaling.
fn batch_size() -> u64 {
use std::sync::OnceLock;
static CACHED: OnceLock<u64> = OnceLock::new();
@@ -94,12 +191,6 @@ impl DbWriter {
std::fs::remove_file(path)?;
}
let conn = Connection::open(path)?;
conn.execute_batch("
PRAGMA journal_mode = OFF;
PRAGMA synchronous = OFF;
PRAGMA locking_mode = EXCLUSIVE;
PRAGMA temp_store = MEMORY;
")?;
let cap = batch_size() as usize;
Ok(Self {
conn,
@@ -118,29 +209,30 @@ impl DbWriter {
// ── Base layer (written by extract/dis/exec) ─────────────────────────────
/// Write metadata, sections, imports tables and their indices.
#[tracing::instrument(skip_all, name = "db.write_base")]
pub fn write_base(&mut self, info: &DisasmInfo) -> anyhow::Result<()> {
self.conn.execute_batch("
CREATE TABLE metadata (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
key VARCHAR PRIMARY KEY, -- header field name
value VARCHAR NOT NULL -- hex-formatted or plain string value
);
CREATE TABLE sections (
name TEXT NOT NULL,
virtual_address INTEGER NOT NULL,
virtual_size INTEGER NOT NULL,
raw_offset INTEGER NOT NULL,
raw_size INTEGER NOT NULL,
flags INTEGER NOT NULL,
is_code BOOLEAN NOT NULL
name VARCHAR NOT NULL, -- PE section name (e.g. .text, .rdata)
virtual_address BIGINT NOT NULL, -- RVA relative to image_base
virtual_size BIGINT NOT NULL, -- size in memory; may exceed raw_size (BSS)
raw_offset BIGINT NOT NULL, -- byte offset of section data in the file
raw_size BIGINT NOT NULL, -- size of section data on disk
flags BIGINT NOT NULL, -- IMAGE_SCN_* characteristics bit field
is_code BOOLEAN NOT NULL -- true if IMAGE_SCN_CNT_CODE is set
);
CREATE TABLE imports (
library TEXT NOT NULL,
ordinal INTEGER NOT NULL,
name TEXT,
record_type INTEGER NOT NULL,
address INTEGER NOT NULL
library VARCHAR NOT NULL, -- module name (e.g. xboxkrnl.exe, xam.xex)
ordinal BIGINT NOT NULL, -- ordinal identifying the export within the library
name VARCHAR, -- resolved symbol name; NULL if not in symbol table
record_type BIGINT NOT NULL, -- 0 = function thunk, 1 = variable
address BIGINT NOT NULL -- absolute VA of the thunk or variable
);
")?;
@@ -150,15 +242,69 @@ impl DbWriter {
self.conn.execute_batch("
CREATE INDEX idx_imports_library ON imports(library);
CREATE INDEX idx_imports_name ON imports(name);
CREATE INDEX idx_imports_name ON imports(name);
")?;
Ok(())
}
// ── Disasm layer (written by dis/exec) ───────────────────────────────────
/// Write functions, labels, instructions, xrefs tables and indices.
pub fn write_disasm(
/// Phase-3 ingest pass — purely mechanical disasm rows. Creates the
/// `instructions` table (and its indices) and streams every code-section
/// instruction through the iterator + DuckDB sink. Does NOT touch
/// `functions` / `labels` / `xrefs` — that's [`Self::write_analysis_results`].
///
/// `func_analysis` and `labels` are still required at this layer because
/// each row carries the rolling-window `function` and `label` columns for
/// downstream queries.
#[tracing::instrument(skip_all, name = "db.ingest_instructions")]
pub fn ingest_instructions(
&mut self,
pe: &[u8],
info: &DisasmInfo,
func_analysis: &FuncAnalysis,
labels: &HashMap<u32, String>,
) -> anyhow::Result<()> {
self.conn.execute_batch("
CREATE TABLE instructions (
address BIGINT PRIMARY KEY, -- absolute VA
raw BIGINT NOT NULL, -- 4-byte big-endian instruction word as integer
mnemonic VARCHAR NOT NULL, -- base mnemonic (e.g. stw, bl, cmpwi)
operands VARCHAR NOT NULL, -- operand string from base disassembly
disasm VARCHAR NOT NULL, -- full base disassembly (mnemonic + operands)
ext_mnemonic VARCHAR, -- simplified mnemonic (e.g. mr); NULL if none
ext_operands VARCHAR, -- operands for the extended form; NULL if none
ext_disasm VARCHAR, -- full extended disassembly string; NULL if none
target_hex BIGINT, -- resolved absolute target for direct branches; NULL for indirect/non-branch
section VARCHAR NOT NULL, -- PE section name containing this instruction
function BIGINT, -- VA of the enclosing function; NULL if unknown
label VARCHAR -- label at this address; NULL if none
);
")?;
insert_instructions_streaming(&self.conn, pe, info, func_analysis, labels)?;
let indices = [
("idx_instructions_function", "CREATE INDEX idx_instructions_function ON instructions(function)"),
("idx_instructions_mnemonic", "CREATE INDEX idx_instructions_mnemonic ON instructions(mnemonic)"),
("idx_instructions_ext_mnemonic", "CREATE INDEX idx_instructions_ext_mnemonic ON instructions(ext_mnemonic)"),
("idx_instructions_section", "CREATE INDEX idx_instructions_section ON instructions(section)"),
("idx_instructions_label", "CREATE INDEX idx_instructions_label ON instructions(label)"),
("idx_instructions_target_hex", "CREATE INDEX idx_instructions_target_hex ON instructions(target_hex)"),
];
for (name, sql) in indices {
tracing::debug!(index = name, "creating instructions index");
self.conn.execute_batch(sql)?;
}
Ok(())
}
/// Phase-3 analyze pass — writes the Rust-pass-derived tables
/// (`functions`, `labels`, `xrefs`) and their indices. Always executes
/// in `--analyze=rust` and `--analyze=both` modes; skipped only when
/// the caller deliberately chooses a Rust-free DB layout.
#[tracing::instrument(skip_all, name = "db.write_analysis_results")]
pub fn write_analysis_results(
&mut self,
pe: &[u8],
info: &DisasmInfo,
@@ -168,74 +314,111 @@ impl DbWriter {
) -> anyhow::Result<()> {
self.conn.execute_batch("
CREATE TABLE functions (
address INTEGER PRIMARY KEY,
name TEXT NOT NULL,
end_address INTEGER NOT NULL,
frame_size INTEGER NOT NULL,
saved_gprs INTEGER NOT NULL,
is_leaf BOOLEAN NOT NULL,
is_saverestore BOOLEAN NOT NULL
address BIGINT PRIMARY KEY, -- absolute VA of entry point
name VARCHAR NOT NULL, -- symbol name, or sub_XXXXXXXX if unresolved
end_address BIGINT NOT NULL, -- VA of last instruction + 4 (exclusive end)
frame_size BIGINT NOT NULL, -- stack frame size in bytes (from prologue)
saved_gprs BIGINT NOT NULL, -- bitmask of GPRs saved in prologue (bit N = rN)
is_leaf BOOLEAN NOT NULL, -- true if the function has no outgoing calls
is_saverestore BOOLEAN NOT NULL -- true if __savegprlr_* / __restgprlr_* stub
);
CREATE TABLE labels (
address INTEGER PRIMARY KEY,
name TEXT NOT NULL,
kind TEXT NOT NULL
);
CREATE TABLE instructions (
address INTEGER PRIMARY KEY,
raw INTEGER NOT NULL,
mnemonic TEXT NOT NULL,
operands TEXT NOT NULL,
disasm TEXT NOT NULL,
ext_mnemonic TEXT,
ext_operands TEXT,
ext_disasm TEXT,
section TEXT NOT NULL,
function INTEGER,
label TEXT
address BIGINT PRIMARY KEY, -- absolute VA
name VARCHAR NOT NULL, -- symbol name
kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other
);
CREATE TABLE xrefs (
source INTEGER NOT NULL,
target INTEGER NOT NULL,
kind TEXT NOT NULL,
instruction TEXT,
source_func INTEGER,
source_label TEXT,
target_label TEXT
source BIGINT NOT NULL, -- VA of the referencing instruction
target BIGINT NOT NULL, -- VA of the referenced destination
kind VARCHAR NOT NULL, -- call | jump | branch | data_read | data_write | data_ref
instruction VARCHAR, -- mnemonic of source instruction; NULL if not in binary
source_func BIGINT, -- VA of the function containing source; NULL if unknown
source_label VARCHAR, -- label at source; NULL if none
target_label VARCHAR -- label at target; NULL if none
);
")?;
insert_functions(&self.conn, func_analysis, labels)?;
insert_labels(&self.conn, labels)?;
insert_instructions_streaming(&self.conn, pe, info, func_analysis, labels)?;
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
let indices = [
("idx_functions_name", "CREATE INDEX idx_functions_name ON functions(name)"),
("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"),
("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"),
("idx_instructions_function", "CREATE INDEX idx_instructions_function ON instructions(function)"),
("idx_instructions_mnemonic", "CREATE INDEX idx_instructions_mnemonic ON instructions(mnemonic)"),
("idx_instructions_ext_mnemonic","CREATE INDEX idx_instructions_ext_mnemonic ON instructions(ext_mnemonic)"),
("idx_instructions_section", "CREATE INDEX idx_instructions_section ON instructions(section)"),
("idx_instructions_label", "CREATE INDEX idx_instructions_label ON instructions(label)"),
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
("idx_xrefs_kind", "CREATE INDEX idx_xrefs_kind ON xrefs(kind)"),
("idx_xrefs_instruction", "CREATE INDEX idx_xrefs_instruction ON xrefs(instruction)"),
("idx_xrefs_target_label", "CREATE INDEX idx_xrefs_target_label ON xrefs(target_label)"),
("idx_functions_name", "CREATE INDEX idx_functions_name ON functions(name)"),
("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"),
("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"),
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
("idx_xrefs_kind", "CREATE INDEX idx_xrefs_kind ON xrefs(kind)"),
("idx_xrefs_instruction", "CREATE INDEX idx_xrefs_instruction ON xrefs(instruction)"),
("idx_xrefs_target_label", "CREATE INDEX idx_xrefs_target_label ON xrefs(target_label)"),
];
for (name, sql) in indices {
eprintln!("[db] creating {name}...");
tracing::debug!(index = name, "creating analysis index");
self.conn.execute_batch(sql)?;
}
Ok(())
}
/// Back-compat wrapper for callers that want the full pre-Phase-3
/// "everything in one shot" behaviour. Equivalent to
/// `ingest_instructions` + `write_analysis_results`.
#[tracing::instrument(skip_all, name = "db.write_disasm")]
pub fn write_disasm(
&mut self,
pe: &[u8],
info: &DisasmInfo,
func_analysis: &FuncAnalysis,
labels: &HashMap<u32, String>,
xrefs: &XrefMap,
) -> anyhow::Result<()> {
self.ingest_instructions(pe, info, func_analysis, labels)?;
self.write_analysis_results(pe, info, func_analysis, labels, xrefs)?;
Ok(())
}
/// Phase-3 SQL-views layer — defines additive read-only views over
/// `instructions` (and optionally `xrefs`/`functions`/`labels`).
/// See [`crate::sql_views`] for the SQL definitions.
///
/// Called when `--analyze=sql` or `--analyze=both` is in effect.
#[tracing::instrument(skip_all, name = "db.create_sql_views")]
pub fn create_sql_views(&mut self) -> anyhow::Result<()> {
for (name, sql) in crate::sql_views::ALL_VIEWS {
tracing::debug!(view = name, "creating SQL view");
self.conn.execute_batch(sql)?;
}
Ok(())
}
/// Cross-check: count branch xrefs found by the SQL view that are absent
/// from the Rust-pass `xrefs` table (and vice versa). Returns
/// `(sql_only, rust_only)` row counts. Both should be zero — the two
/// surfaces produce identical edges by construction. A non-zero count
/// signals drift between the formatter's `mnemonic` column and
/// `xref.rs`'s opcode classification, and is logged as a warning by the
/// caller.
#[tracing::instrument(skip_all, name = "db.cross_check_branch_xrefs")]
pub fn cross_check_branch_xrefs(&self) -> anyhow::Result<(u64, u64)> {
let sql_only: i64 = self.conn.query_row(
"SELECT COUNT(*) FROM v_branch_xrefs vb \
LEFT JOIN xrefs x \
ON x.source = vb.source AND x.target = vb.target AND x.kind = vb.kind \
WHERE x.source IS NULL",
[], |row| row.get(0)
)?;
let rust_only: i64 = self.conn.query_row(
"SELECT COUNT(*) FROM xrefs x \
LEFT JOIN v_branch_xrefs vb \
ON vb.source = x.source AND vb.target = x.target AND vb.kind = x.kind \
WHERE x.kind IN ('call','j','br') AND vb.source IS NULL",
[], |row| row.get(0)
)?;
Ok((sql_only as u64, rust_only as u64))
}
// ── Trace layer (written by exec when flags enabled) ─────────────────────
/// Create the opt-in trace tables. No-op if all flags are false.
@@ -251,49 +434,43 @@ impl DbWriter {
if trace_instructions {
self.conn.execute_batch("
CREATE TABLE IF NOT EXISTS exec_trace (
id INTEGER PRIMARY KEY,
address INTEGER NOT NULL,
cycle INTEGER NOT NULL,
r3 INTEGER NOT NULL,
r4 INTEGER NOT NULL,
lr INTEGER NOT NULL,
sp INTEGER NOT NULL
CREATE TABLE exec_trace (
address BIGINT NOT NULL, -- absolute VA of the instruction
cycle BIGINT NOT NULL, -- monotonic instruction counter (execution order)
r3 BIGINT NOT NULL, -- r3 at time of execution
r4 BIGINT NOT NULL, -- r4 at time of execution
lr BIGINT NOT NULL, -- link register
sp BIGINT NOT NULL -- stack pointer
);
DELETE FROM exec_trace;
")?;
}
if trace_imports {
self.conn.execute_batch("
CREATE TABLE IF NOT EXISTS import_calls (
id INTEGER PRIMARY KEY,
address INTEGER NOT NULL,
cycle INTEGER NOT NULL,
module TEXT NOT NULL,
ordinal INTEGER NOT NULL,
name TEXT NOT NULL,
arg_r3 INTEGER NOT NULL,
arg_r4 INTEGER NOT NULL,
arg_r5 INTEGER NOT NULL,
arg_r6 INTEGER NOT NULL,
return_value INTEGER NOT NULL
CREATE TABLE import_calls (
address BIGINT NOT NULL, -- VA of the import thunk
cycle BIGINT NOT NULL, -- instruction counter at interception
module VARCHAR NOT NULL, -- library name (e.g. xboxkrnl.exe)
ordinal BIGINT NOT NULL, -- ordinal within the module
name VARCHAR NOT NULL, -- resolved symbol name
arg_r3 BIGINT NOT NULL, -- first argument (r3)
arg_r4 BIGINT NOT NULL, -- second argument (r4)
arg_r5 BIGINT NOT NULL, -- third argument (r5)
arg_r6 BIGINT NOT NULL, -- fourth argument (r6)
return_value BIGINT NOT NULL -- r3 after the call returns
);
DELETE FROM import_calls;
")?;
}
if trace_branches {
self.conn.execute_batch("
CREATE TABLE IF NOT EXISTS branch_trace (
id INTEGER PRIMARY KEY,
cycle INTEGER NOT NULL,
source INTEGER NOT NULL,
target INTEGER NOT NULL,
kind TEXT NOT NULL,
lr INTEGER NOT NULL
CREATE TABLE branch_trace (
cycle BIGINT NOT NULL, -- instruction counter
source BIGINT NOT NULL, -- VA of the branch instruction
target BIGINT NOT NULL, -- VA of the branch destination
kind VARCHAR NOT NULL, -- call | return | jump | branch
lr BIGINT NOT NULL -- link register at time of branch
);
DELETE FROM branch_trace;
")?;
}
@@ -326,109 +503,99 @@ impl DbWriter {
fn flush_exec(&mut self) {
if self.exec_buffer.is_empty() { return; }
let tx = self.conn.unchecked_transaction().unwrap();
{
let mut stmt = tx.prepare_cached(
"INSERT INTO exec_trace (address, cycle, r3, r4, lr, sp) VALUES (?1, ?2, ?3, ?4, ?5, ?6)"
).unwrap();
for e in &self.exec_buffer {
stmt.execute(params![
e.address as i64,
e.cycle as i64,
e.r3 as i64,
e.r4 as i64,
e.lr as i64,
e.sp as i64,
]).ok();
}
let mut appender = self.conn.appender("exec_trace").unwrap();
for e in &self.exec_buffer {
appender.append_row(params![
e.address as i64,
e.cycle as i64,
e.r3 as i64,
e.r4 as i64,
e.lr as i64,
e.sp as i64,
]).ok();
}
tx.commit().ok();
appender.flush().ok();
self.exec_count += self.exec_buffer.len() as u64;
self.exec_buffer.clear();
}
fn flush_imports(&mut self) {
if self.import_buffer.is_empty() { return; }
let tx = self.conn.unchecked_transaction().unwrap();
{
let mut stmt = tx.prepare_cached(
"INSERT INTO import_calls (address, cycle, module, ordinal, name, arg_r3, arg_r4, arg_r5, arg_r6, return_value)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)"
).unwrap();
for e in &self.import_buffer {
stmt.execute(params![
e.address as i64,
e.cycle as i64,
e.module,
e.ordinal as i64,
e.name,
e.arg_r3 as i64,
e.arg_r4 as i64,
e.arg_r5 as i64,
e.arg_r6 as i64,
e.return_value as i64,
]).ok();
}
let mut appender = self.conn.appender("import_calls").unwrap();
for e in &self.import_buffer {
appender.append_row(params![
e.address as i64,
e.cycle as i64,
e.module.as_str(),
e.ordinal as i64,
e.name.as_str(),
e.arg_r3 as i64,
e.arg_r4 as i64,
e.arg_r5 as i64,
e.arg_r6 as i64,
e.return_value as i64,
]).ok();
}
tx.commit().ok();
appender.flush().ok();
self.import_count += self.import_buffer.len() as u64;
self.import_buffer.clear();
}
fn flush_branches(&mut self) {
if self.branch_buffer.is_empty() { return; }
let tx = self.conn.unchecked_transaction().unwrap();
{
let mut stmt = tx.prepare_cached(
"INSERT INTO branch_trace (cycle, source, target, kind, lr) VALUES (?1, ?2, ?3, ?4, ?5)"
).unwrap();
for e in &self.branch_buffer {
stmt.execute(params![
e.cycle as i64,
e.source as i64,
e.target as i64,
e.kind,
e.lr as i64,
]).ok();
}
let mut appender = self.conn.appender("branch_trace").unwrap();
for e in &self.branch_buffer {
appender.append_row(params![
e.cycle as i64,
e.source as i64,
e.target as i64,
e.kind,
e.lr as i64,
]).ok();
}
tx.commit().ok();
appender.flush().ok();
self.branch_count += self.branch_buffer.len() as u64;
self.branch_buffer.clear();
}
/// Flush remaining trace buffers and create their indices.
#[tracing::instrument(skip_all, name = "db.finalize_traces")]
pub fn finalize_traces(&mut self) -> anyhow::Result<()> {
self.flush_exec();
self.flush_imports();
self.flush_branches();
if self.trace_instructions {
eprintln!("[db] creating idx_exec_trace_address...");
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_exec_trace_address ON exec_trace(address);")?;
eprintln!("[db] creating idx_exec_trace_cycle...");
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_exec_trace_cycle ON exec_trace(cycle);")?;
tracing::debug!("creating idx_exec_trace_address");
self.conn.execute_batch("CREATE INDEX idx_exec_trace_address ON exec_trace(address);")?;
tracing::debug!("creating idx_exec_trace_cycle");
self.conn.execute_batch("CREATE INDEX idx_exec_trace_cycle ON exec_trace(cycle);")?;
}
if self.trace_imports {
eprintln!("[db] creating idx_import_calls_name...");
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_import_calls_name ON import_calls(name);")?;
eprintln!("[db] creating idx_import_calls_cycle...");
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_import_calls_cycle ON import_calls(cycle);")?;
tracing::debug!("creating idx_import_calls_name");
self.conn.execute_batch("CREATE INDEX idx_import_calls_name ON import_calls(name);")?;
tracing::debug!("creating idx_import_calls_cycle");
self.conn.execute_batch("CREATE INDEX idx_import_calls_cycle ON import_calls(cycle);")?;
}
if self.trace_branches {
eprintln!("[db] creating idx_branch_trace_source...");
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_source ON branch_trace(source);")?;
eprintln!("[db] creating idx_branch_trace_target...");
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_target ON branch_trace(target);")?;
eprintln!("[db] creating idx_branch_trace_kind...");
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_kind ON branch_trace(kind);")?;
eprintln!("[db] creating idx_branch_trace_cycle...");
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_cycle ON branch_trace(cycle);")?;
tracing::debug!("creating idx_branch_trace_source");
self.conn.execute_batch("CREATE INDEX idx_branch_trace_source ON branch_trace(source);")?;
tracing::debug!("creating idx_branch_trace_target");
self.conn.execute_batch("CREATE INDEX idx_branch_trace_target ON branch_trace(target);")?;
tracing::debug!("creating idx_branch_trace_kind");
self.conn.execute_batch("CREATE INDEX idx_branch_trace_kind ON branch_trace(kind);")?;
tracing::debug!("creating idx_branch_trace_cycle");
self.conn.execute_batch("CREATE INDEX idx_branch_trace_cycle ON branch_trace(cycle);")?;
}
eprintln!(
"[db] trace totals: {} instructions, {} imports, {} branches",
self.exec_count, self.import_count, self.branch_count
metrics::counter!("db.rows", "table" => "exec_trace").increment(self.exec_count);
metrics::counter!("db.rows", "table" => "import_calls").increment(self.import_count);
metrics::counter!("db.rows", "table" => "branch_trace").increment(self.branch_count);
tracing::info!(
instructions = self.exec_count,
imports = self.import_count,
branches = self.branch_count,
"trace totals"
);
Ok(())
}
@@ -453,7 +620,7 @@ pub fn write_db(
// ── Helpers ────────────────────────────────────────────────────────────────
fn insert_metadata(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> {
let mut stmt = conn.prepare("INSERT INTO metadata (key, value) VALUES (?1, ?2)")?;
let mut stmt = conn.prepare("INSERT INTO metadata (key, value) VALUES (?, ?)")?;
stmt.execute(params!["image_base", format!("0x{:08X}", info.image_base)])?;
stmt.execute(params!["entry_point", format!("0x{:08X}", info.entry_point)])?;
if let Some(name) = info.original_pe_name {
@@ -471,7 +638,7 @@ fn insert_metadata(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> {
fn insert_sections(conn: &Connection, sections: &[xenia_xex::pe::PeSection]) -> anyhow::Result<()> {
let mut stmt = conn.prepare(
"INSERT INTO sections (name, virtual_address, virtual_size, raw_offset, raw_size, flags, is_code)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"
VALUES (?, ?, ?, ?, ?, ?, ?)"
)?;
for s in sections {
stmt.execute(params![
@@ -481,7 +648,7 @@ fn insert_sections(conn: &Connection, sections: &[xenia_xex::pe::PeSection]) ->
s.raw_offset as i64,
s.raw_size as i64,
s.flags as i64,
s.is_code() as i32,
s.is_code(),
])?;
}
Ok(())
@@ -490,7 +657,7 @@ fn insert_sections(conn: &Connection, sections: &[xenia_xex::pe::PeSection]) ->
fn insert_imports(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> {
let mut stmt = conn.prepare(
"INSERT INTO imports (library, ordinal, name, record_type, address)
VALUES (?1, ?2, ?3, ?4, ?5)"
VALUES (?, ?, ?, ?, ?)"
)?;
for lib in info.import_libraries {
for imp in &lib.imports {
@@ -514,7 +681,7 @@ fn insert_functions(
) -> anyhow::Result<()> {
let mut stmt = conn.prepare(
"INSERT INTO functions (address, name, end_address, frame_size, saved_gprs, is_leaf, is_saverestore)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"
VALUES (?, ?, ?, ?, ?, ?, ?)"
)?;
for (&addr, fi) in &func_analysis.functions {
let name = labels.get(&addr)
@@ -526,8 +693,8 @@ fn insert_functions(
fi.end as i64,
fi.frame_size as i64,
fi.saved_gprs as i64,
fi.is_leaf as i32,
fi.is_saverestore as i32,
fi.is_leaf,
fi.is_saverestore,
])?;
}
Ok(())
@@ -538,7 +705,7 @@ fn insert_labels(
labels: &HashMap<u32, String>,
) -> anyhow::Result<()> {
let mut stmt = conn.prepare(
"INSERT OR IGNORE INTO labels (address, name, kind) VALUES (?1, ?2, ?3)"
"INSERT INTO labels (address, name, kind) VALUES (?, ?, ?) ON CONFLICT DO NOTHING"
)?;
for (&addr, name) in labels {
let kind = if name.starts_with("sub_") || name == "entry_point" {
@@ -566,78 +733,22 @@ fn insert_instructions_streaming(
func_analysis: &FuncAnalysis,
labels: &HashMap<u32, String>,
) -> anyhow::Result<()> {
let mut tx = conn.unchecked_transaction()?;
let mut count: u64 = 0;
let mut since_commit: u64 = 0;
let mut appender = conn.appender("instructions")?;
let mut total: u64 = 0;
for section in info.sections {
if !section.is_code() { continue; }
let va_start = section.virtual_address;
let va_end = va_start + section.virtual_size;
let file_start = section.virtual_address as usize;
let mut current_func: Option<u32> = None;
let mut addr = va_start;
while addr < va_end {
let abs_addr = info.image_base + addr;
let off = (addr - va_start) as usize + file_start;
if off + 4 > pe.len() { break; }
if func_analysis.is_function_start(abs_addr) {
current_func = Some(abs_addr);
}
let instr = u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]]);
let decoded = crate::ppc::disasm(instr, abs_addr);
let (mnemonic, operands) = split_disasm(&decoded.base);
let (ext_mnemonic, ext_operands, ext_disasm): (Option<&str>, Option<&str>, Option<&str>) =
match &decoded.ext {
Some(ext) => {
let (em, eo) = split_disasm(ext);
(Some(em), Some(eo), Some(ext.as_str()))
}
None => (None, None, None),
};
let label = labels.get(&abs_addr).map(|s| s.as_str());
{
let mut stmt = tx.prepare_cached(
"INSERT INTO instructions (address, raw, mnemonic, operands, disasm, ext_mnemonic, ext_operands, ext_disasm, section, function, label)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)"
)?;
stmt.execute(params![
abs_addr as i64,
instr as i64,
mnemonic,
operands,
decoded.base,
ext_mnemonic,
ext_operands,
ext_disasm,
section.name,
current_func.map(|a| a as i64),
label,
])?;
}
count += 1;
since_commit += 1;
addr += 4;
if since_commit >= batch_size() {
tx.commit()?;
eprintln!("[db] instructions: {count} committed");
tx = conn.unchecked_transaction()?;
since_commit = 0;
}
}
let va_start = info.image_base + section.virtual_address;
let va_end = info.image_base + section.virtual_address + section.virtual_size;
let items = crate::disasm::enrich_section(
pe, info.image_base, &section.name, va_start, va_end, func_analysis, labels,
);
total += crate::sinks::duckdb::append_instructions(&mut appender, items)?;
}
tx.commit()?;
eprintln!("[db] inserted {count} instructions");
appender.flush()?;
metrics::counter!("db.rows", "table" => "instructions").increment(total);
tracing::info!(rows = total, table = "instructions", "bulk insert complete");
Ok(())
}
@@ -649,9 +760,8 @@ fn insert_xrefs_streaming(
func_analysis: &FuncAnalysis,
labels: &HashMap<u32, String>,
) -> anyhow::Result<()> {
let mut tx = conn.unchecked_transaction()?;
let mut appender = conn.appender("xrefs")?;
let mut count: u64 = 0;
let mut since_commit: u64 = 0;
for (&target, refs) in xrefs {
let target_label = labels.get(&target).map(|s| s.as_str());
@@ -663,10 +773,11 @@ fn insert_xrefs_streaming(
let off = xref.source.wrapping_sub(image_base) as usize;
if off + 4 <= pe.len() {
let raw = u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]]);
let decoded = crate::ppc::disasm(raw, xref.source);
let display = decoded.display().to_string();
let (mnem, _) = split_disasm(&display);
Some(mnem.to_string())
let d = xenia_cpu::decode(raw, xref.source);
let t = xenia_cpu::disasm::format(&d);
// Prefer the simplified mnemonic when present (matches what
// a human reading the .asm file sees for that line).
Some(t.ext_mnemonic.unwrap_or(t.mnemonic))
} else {
None
}
@@ -681,47 +792,22 @@ fn insert_xrefs_streaming(
xref.source, func_analysis, labels,
);
{
let mut stmt = tx.prepare_cached(
"INSERT INTO xrefs (source, target, kind, instruction, source_func, source_label, target_label)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"
)?;
stmt.execute(params![
xref.source as i64,
target as i64,
kind,
instruction,
source_func,
source_label,
target_label,
])?;
}
appender.append_row(params![
xref.source as i64,
target as i64,
kind,
instruction.as_deref(),
source_func,
source_label.as_str(),
target_label,
])?;
count += 1;
since_commit += 1;
if since_commit >= batch_size() {
tx.commit()?;
eprintln!("[db] xrefs: {count} committed");
tx = conn.unchecked_transaction()?;
since_commit = 0;
}
}
}
tx.commit()?;
eprintln!("[db] inserted {count} xrefs");
appender.flush()?;
metrics::counter!("db.rows", "table" => "xrefs").increment(count);
tracing::info!(rows = count, table = "xrefs", "bulk insert complete");
Ok(())
}
/// Split "mnemonic operands" into (mnemonic, operands).
fn split_disasm(disasm: &str) -> (&str, &str) {
let trimmed = disasm.trim();
if let Some(pos) = trimmed.find(|c: char| c.is_whitespace()) {
let mnemonic = &trimmed[..pos];
let operands = trimmed[pos..].trim_start();
(mnemonic, operands)
} else {
(trimmed, "")
}
}