//! DuckDB writer for xenia-rs. //! //! Layered, streaming writes shared by `extract`, `dis`, and `exec`. //! Each command's output is a superset of the previous: //! - `extract --db` -> base tables (metadata, sections, imports) //! - `dis --db` -> base + disasm tables (functions, labels, instructions, xrefs) //! - `exec --db` -> base + disasm + opt-in trace tables (exec_trace, import_calls, branch_trace) //! //! Bulk inserts use the DuckDB Appender API, which bypasses the SQL layer and //! writes directly to columnar storage — no transaction batching required. //! //! Trace kind values for `branch_trace.kind`: //! - `"call"` : any branch with LK set (raw & 1 == 1) //! - `"return"` : bclrx without LK //! - `"jump"` : bcctrx without LK //! - `"branch"` : bx/bcx without LK //! //! # Schema //! //! ## `metadata` //! Key-value table. One row per XEX header field. Values are strings. //! //! | key | value format | meaning | //! |--------------------|------------------|----------------------------------------------------| //! | `image_base` | `"0xXXXXXXXX"` | Virtual address where the PE image is mapped | //! | `entry_point` | `"0xXXXXXXXX"` | Absolute VA of the XEX entry point | //! | `original_pe_name` | string | Original PE filename from XEX optional headers | //! | `title_id` | `"0xXXXXXXXX"` | Xbox 360 Title ID (identifies the game) | //! | `media_id` | `"0xXXXXXXXX"` | Disc/media ID (identifies the specific disc build) | //! //! ## `sections` //! One row per PE section (`.text`, `.data`, etc.). //! - `name` — PE section name //! - `virtual_address` — RVA relative to `image_base` where the section is mapped in memory //! - `virtual_size` — Size in memory; may exceed `raw_size` due to BSS zero-fill //! - `raw_offset` — Byte offset of section data within the XEX/PE file //! - `raw_size` — Size of section data on disk //! - `flags` — `IMAGE_SCN_*` characteristics bit field //! - `is_code` — `true` if `IMAGE_SCN_CNT_CODE` is set //! //! ## `imports` //! One row per import record from the XEX import descriptor table. //! - `library` — Module name (e.g. `xboxkrnl.exe`, `xam.xex`) //! - `ordinal` — Numeric ordinal identifying the export within the library //! - `name` — Resolved human-readable symbol name; `NULL` if not in symbol table //! - `record_type` — XEX import record type: `0` = function thunk, `1` = variable //! - `address` — Absolute VA of the import thunk or variable in the binary //! //! ## `functions` //! One row per detected function (from prologue analysis). //! - `address` — Absolute VA of the function entry point (PK) //! - `name` — Symbol name, or `sub_XXXXXXXX` if unresolved //! - `end_address` — Absolute VA of last instruction + 4 (exclusive end) //! - `frame_size` — Stack frame size in bytes (from prologue) //! - `saved_gprs` — Bitmask of GPRs saved in prologue (bit N set ⇒ rN is saved) //! - `is_leaf` — `true` if the function has no outgoing calls (no `bl`/`blr`) //! - `is_saverestore` — `true` if this is a `__savegprlr_*`/`__restgprlr_*` compiler stub //! //! ## `labels` //! One row per named address; superset of functions. //! - `address` — Absolute VA (PK) //! - `name` — Symbol name //! - `kind` — One of: `function`, `import`, `saverestore`, `local`, `data`, `other` //! //! ## `instructions` //! One row per disassembled instruction. //! - `address` — Absolute VA (PK) //! - `raw` — 4-byte big-endian instruction word as integer //! - `mnemonic` — Base mnemonic (e.g. `stw`, `bl`, `cmpwi`) //! - `operands` — Operand string from base disassembly //! - `disasm` — Full base disassembly string (`mnemonic + " " + operands`) //! - `ext_mnemonic` — Simplified mnemonic (e.g. `mr` for `or rX,rY,rY`); `NULL` if none //! - `ext_operands` — Operands for the extended form; `NULL` if none //! - `ext_disasm` — Full extended disassembly string; `NULL` if none //! - `target_hex` — Resolved absolute branch target for `b`/`bc` (and link/AA variants); `NULL` for indirect or non-branch instructions. SQL views (`v_branch_xrefs`) self-join on this column. //! - `section` — Name of the PE section containing this instruction //! - `function` — VA of the enclosing function; `NULL` if not inside a detected function //! - `label` — Label name at this address; `NULL` if none //! //! ## `xrefs` //! One row per cross-reference edge (call, jump, data access). //! - `source` — Absolute VA of the instruction making the reference //! - `target` — Absolute VA of the referenced destination //! - `kind` — Reference type as the short tag from [`crate::xref::XrefKind::tag`]: //! `call`, `j` (jump), `br` (branch), `read` (data_read), //! `write` (data_write), `ref` (data_ref). //! Note: this is a different convention from `branch_trace.kind`, //! which uses the long names (`call` / `return` / `jump` / `branch`). //! - `instruction` — Mnemonic of the source instruction; `NULL` if address is not in binary //! - `source_func` — VA of the function containing `source`; `NULL` if unknown //! - `source_label` — Label at `source`; `NULL` if none //! - `target_label` — Label at `target`; `NULL` if none //! //! ## `exec_trace` *(opt-in: `--trace-instructions`)* //! One row per executed instruction. //! - `address` — Absolute VA of the instruction //! - `cycle` — Monotonic instruction counter (execution order) //! - `r3`, `r4`, `lr`, `sp` — Snapshot of key GPRs at time of execution //! //! ## `import_calls` *(opt-in: `--trace-imports`)* //! One row per intercepted kernel/import call. //! - `address` — VA of the import thunk //! - `cycle` — Instruction counter at point of interception //! - `module` — Library name (e.g. `xboxkrnl.exe`) //! - `ordinal` — Numeric ordinal within the module //! - `name` — Resolved symbol name //! - `arg_r3`–`arg_r6` — First four call arguments (PowerPC ABI: r3–r6) //! - `return_value` — Value in r3 after the call returns //! //! ## `branch_trace` *(opt-in: `--trace-branches`)* //! One row per taken branch. //! - `cycle` — Instruction counter //! - `source` — VA of the branch instruction //! - `target` — VA of the branch destination //! - `kind` — `call`, `return`, `jump`, or `branch` (see top-level doc) //! - `lr` — Link register value at time of branch use std::collections::HashMap; use std::path::Path; use duckdb::{Connection, params}; use crate::func::FuncAnalysis; use crate::xref::{XrefMap, resolve_source_label}; use crate::formatter::DisasmInfo; const DEFAULT_BATCH_SIZE: u64 = 100_000; /// Rows per trace buffer flush. Configurable via `XENIA_DB_BATCH_SIZE` env var (default 100_000). /// Applies to `exec_trace` and `branch_trace` buffer thresholds. /// `import_calls` always flushes at 1000 — low volume, not worth scaling. fn batch_size() -> u64 { use std::sync::OnceLock; static CACHED: OnceLock = OnceLock::new(); *CACHED.get_or_init(|| { std::env::var("XENIA_DB_BATCH_SIZE") .ok() .and_then(|s| s.parse::().ok()) .filter(|&n| n > 0) .unwrap_or(DEFAULT_BATCH_SIZE) }) } pub struct ExecTraceEntry { pub address: u32, pub cycle: u64, pub r3: u64, pub r4: u64, pub lr: u64, pub sp: u64, } pub struct ImportCallEntry { pub address: u32, pub cycle: u64, pub module: String, pub ordinal: u16, pub name: String, pub arg_r3: u64, pub arg_r4: u64, pub arg_r5: u64, pub arg_r6: u64, pub return_value: u64, } pub struct BranchTraceEntry { pub source: u32, pub target: u32, pub cycle: u64, pub kind: &'static str, pub lr: u64, } pub struct DbWriter { conn: Connection, exec_buffer: Vec, import_buffer: Vec, branch_buffer: Vec, exec_count: u64, import_count: u64, branch_count: u64, trace_instructions: bool, trace_imports: bool, trace_branches: bool, } impl DbWriter { /// Open a fresh database at `path`, removing any existing file first. pub fn open_fresh(path: &Path) -> anyhow::Result { if path.exists() { std::fs::remove_file(path)?; } let conn = Connection::open(path)?; let cap = batch_size() as usize; Ok(Self { conn, exec_buffer: Vec::with_capacity(cap), import_buffer: Vec::with_capacity(1024), branch_buffer: Vec::with_capacity(cap), exec_count: 0, import_count: 0, branch_count: 0, trace_instructions: false, trace_imports: false, trace_branches: false, }) } // ── Base layer (written by extract/dis/exec) ───────────────────────────── /// Write metadata, sections, imports tables and their indices. #[tracing::instrument(skip_all, name = "db.write_base")] pub fn write_base(&mut self, info: &DisasmInfo) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE metadata ( key VARCHAR PRIMARY KEY, -- header field name value VARCHAR NOT NULL -- hex-formatted or plain string value ); CREATE TABLE sections ( name VARCHAR NOT NULL, -- PE section name (e.g. .text, .rdata) virtual_address BIGINT NOT NULL, -- RVA relative to image_base virtual_size BIGINT NOT NULL, -- size in memory; may exceed raw_size (BSS) raw_offset BIGINT NOT NULL, -- byte offset of section data in the file raw_size BIGINT NOT NULL, -- size of section data on disk flags BIGINT NOT NULL, -- IMAGE_SCN_* characteristics bit field is_code BOOLEAN NOT NULL -- true if IMAGE_SCN_CNT_CODE is set ); CREATE TABLE imports ( library VARCHAR NOT NULL, -- module name (e.g. xboxkrnl.exe, xam.xex) ordinal BIGINT NOT NULL, -- ordinal identifying the export within the library name VARCHAR, -- resolved symbol name; NULL if not in symbol table record_type BIGINT NOT NULL, -- 0 = function thunk, 1 = variable address BIGINT NOT NULL -- absolute VA of the thunk or variable ); ")?; insert_metadata(&self.conn, info)?; insert_sections(&self.conn, info.sections)?; insert_imports(&self.conn, info)?; self.conn.execute_batch(" CREATE INDEX idx_imports_library ON imports(library); CREATE INDEX idx_imports_name ON imports(name); ")?; Ok(()) } // ── Disasm layer (written by dis/exec) ─────────────────────────────────── /// Phase-3 ingest pass — purely mechanical disasm rows. Creates the /// `instructions` table (and its indices) and streams every code-section /// instruction through the iterator + DuckDB sink. Does NOT touch /// `functions` / `labels` / `xrefs` — that's [`Self::write_analysis_results`]. /// /// `func_analysis` and `labels` are still required at this layer because /// each row carries the rolling-window `function` and `label` columns for /// downstream queries. #[tracing::instrument(skip_all, name = "db.ingest_instructions")] pub fn ingest_instructions( &mut self, pe: &[u8], info: &DisasmInfo, func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE instructions ( address BIGINT PRIMARY KEY, -- absolute VA raw BIGINT NOT NULL, -- 4-byte big-endian instruction word as integer mnemonic VARCHAR NOT NULL, -- base mnemonic (e.g. stw, bl, cmpwi) operands VARCHAR NOT NULL, -- operand string from base disassembly disasm VARCHAR NOT NULL, -- full base disassembly (mnemonic + operands) ext_mnemonic VARCHAR, -- simplified mnemonic (e.g. mr); NULL if none ext_operands VARCHAR, -- operands for the extended form; NULL if none ext_disasm VARCHAR, -- full extended disassembly string; NULL if none target_hex BIGINT, -- resolved absolute target for direct branches; NULL for indirect/non-branch section VARCHAR NOT NULL, -- PE section name containing this instruction function BIGINT, -- VA of the enclosing function; NULL if unknown label VARCHAR -- label at this address; NULL if none ); ")?; insert_instructions_streaming(&self.conn, pe, info, func_analysis, labels)?; let indices = [ ("idx_instructions_function", "CREATE INDEX idx_instructions_function ON instructions(function)"), ("idx_instructions_mnemonic", "CREATE INDEX idx_instructions_mnemonic ON instructions(mnemonic)"), ("idx_instructions_ext_mnemonic", "CREATE INDEX idx_instructions_ext_mnemonic ON instructions(ext_mnemonic)"), ("idx_instructions_section", "CREATE INDEX idx_instructions_section ON instructions(section)"), ("idx_instructions_label", "CREATE INDEX idx_instructions_label ON instructions(label)"), ("idx_instructions_target_hex", "CREATE INDEX idx_instructions_target_hex ON instructions(target_hex)"), ]; for (name, sql) in indices { tracing::debug!(index = name, "creating instructions index"); self.conn.execute_batch(sql)?; } Ok(()) } /// Phase-3 analyze pass — writes the Rust-pass-derived tables /// (`functions`, `labels`, `xrefs`) and their indices. Always executes /// in `--analyze=rust` and `--analyze=both` modes; skipped only when /// the caller deliberately chooses a Rust-free DB layout. /// /// `vtables` is the M3 result; pass an empty slice when the caller has /// not run the vtable scan (the tables are still created, just empty). /// `strings` is the M7 result; same convention. `funcptr_arrays` is the /// M8/M11 result. `typed_ind` is the M5.5 result. `eh_records` is the /// M9.5 result. #[tracing::instrument(skip_all, name = "db.write_analysis_results")] pub fn write_analysis_results( &mut self, pe: &[u8], info: &DisasmInfo, func_analysis: &FuncAnalysis, labels: &HashMap, xrefs: &XrefMap, vtables: &[crate::vtables::Vtable], strings: &[crate::strings::DetectedString], funcptr_arrays: &[crate::funcptr_arrays::FuncPtrArray], typed_ind: Option<&crate::ind_dispatch_typed::TypedIndirectResult>, eh_records: &[crate::eh_scope::EhFuncInfo], ) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE functions ( address BIGINT PRIMARY KEY, -- absolute VA of entry point name VARCHAR NOT NULL, -- symbol name, or sub_XXXXXXXX if unresolved end_address BIGINT NOT NULL, -- VA of last instruction + 4 (exclusive end) frame_size BIGINT NOT NULL, -- stack frame size in bytes (from prologue) saved_gprs BIGINT NOT NULL, -- bitmask of GPRs saved in prologue (bit N = rN) is_leaf BOOLEAN NOT NULL, -- true if the function has no outgoing calls is_saverestore BOOLEAN NOT NULL, -- true if __savegprlr_* / __restgprlr_* stub pdata_validated BOOLEAN NOT NULL, -- true if .pdata RUNTIME_FUNCTION exists at this VA pdata_length BIGINT, -- length in bytes per .pdata; NULL if no pdata entry has_eh BOOLEAN NOT NULL -- M9: pdata exception-flag bit set; function has C++ EH/SEH ); CREATE TABLE pdata_entries ( begin_address BIGINT PRIMARY KEY, -- absolute VA of function start (RUNTIME_FUNCTION.BeginAddress) end_address BIGINT NOT NULL, -- begin_address + function_length (exclusive) function_length BIGINT NOT NULL, -- function size in bytes prolog_length BIGINT NOT NULL, -- prolog size in bytes flags BIGINT NOT NULL -- raw 2-bit flags (bit 1=32-bit-code, bit 0=exception) ); CREATE TABLE labels ( address BIGINT PRIMARY KEY, -- absolute VA name VARCHAR NOT NULL, -- symbol name kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other ); CREATE TABLE vtables ( address BIGINT PRIMARY KEY, -- absolute VA of vtable[0] length BIGINT NOT NULL, -- number of method slots col_address BIGINT, -- VA of CompleteObjectLocator (NULL when no RTTI) class_name VARCHAR NOT NULL, -- demangled class name OR ANON_Class_ when stripped rtti_present BOOLEAN NOT NULL, -- true when COL → TypeDescriptor walk succeeded base_classes_json VARCHAR -- JSON array of base class names (NULL if none / parse failure) ); CREATE TABLE methods ( vtable_address BIGINT NOT NULL, -- vtable this slot belongs to slot BIGINT NOT NULL, -- 0-based slot index function_address BIGINT NOT NULL, -- VA of the function this slot points at mangled_name VARCHAR, -- raw label name when mangled (?...) demangled_name VARCHAR, -- LLVM-style demangled output PRIMARY KEY (vtable_address, slot) ); CREATE TABLE classes ( name VARCHAR PRIMARY KEY, -- class name (demangled or ANON_*) vtable_address BIGINT NOT NULL, -- representative vtable (first detected) rtti_present BOOLEAN NOT NULL, base_classes_json VARCHAR -- JSON of base class names (NULL when stripped) ); CREATE TABLE strings ( address BIGINT PRIMARY KEY, -- absolute VA of first byte encoding VARCHAR NOT NULL, -- 'ascii' or 'utf16le' length BIGINT NOT NULL, -- length in bytes (excluding NUL terminator) content VARCHAR NOT NULL -- UTF-8 representation of the string ); CREATE TABLE tls_info ( raw_data_start BIGINT NOT NULL, -- VA of TLS template start raw_data_end BIGINT NOT NULL, -- VA one-past-end of TLS template index_address BIGINT NOT NULL, -- VA of u32 the loader writes the assigned slot index into callback_array BIGINT NOT NULL, -- VA of zero-terminated callback array (0 if none) zero_fill_size BIGINT NOT NULL, -- bytes of zero-fill appended after raw template characteristics BIGINT NOT NULL -- IMAGE_TLS_DIRECTORY characteristics flags ); CREATE TABLE tls_callbacks ( slot BIGINT PRIMARY KEY, -- 0-based index in the callback array address BIGINT NOT NULL -- VA of callback function ); CREATE TABLE function_pointer_arrays ( address BIGINT PRIMARY KEY, -- absolute VA of the array's first slot length BIGINT NOT NULL, -- number of slots kind VARCHAR NOT NULL -- 'vtable' (M3) | 'dispatch_table' (M8) | 'static_init' (M11) ); CREATE TABLE function_pointer_array_entries ( array_address BIGINT NOT NULL, -- FK to function_pointer_arrays.address slot BIGINT NOT NULL, -- 0-based slot index function_address BIGINT NOT NULL, -- VA of the function this slot points at PRIMARY KEY (array_address, slot) ); -- M5.5 — typed indirect-dispatch resolutions. Each row is one -- bcctrl site that matched the canonical lwz vt, off(this); -- lwz fn, slot(vt); mtctr; bcctrl pattern. candidate_count > 1 -- means the analysis could not pick a single class; downstream -- queries should treat such rows as reachability-only. CREATE TABLE indirect_dispatch_sites ( dispatch_pc BIGINT PRIMARY KEY, vptr_offset BIGINT NOT NULL, slot BIGINT NOT NULL, candidate_count BIGINT NOT NULL ); -- M5.5 — one row per (dispatch site × candidate vtable). The -- ind_call xref edges in the `xrefs` table are derived from -- this; this view lets you join back to vtable / method info. CREATE TABLE indirect_dispatch_candidates ( dispatch_pc BIGINT NOT NULL, vtable_address BIGINT NOT NULL, method_address BIGINT NOT NULL, PRIMARY KEY (dispatch_pc, vtable_address) ); -- M5.5 — every detected `stw rVtable, vptr_off(rThis)` writer -- found in any function. Useful for diagnosing why a class -- has (or does not have) coverage in the dispatch resolver. CREATE TABLE vptr_writes ( writer_pc BIGINT NOT NULL, vtable_address BIGINT NOT NULL, vptr_offset BIGINT NOT NULL, writer_function BIGINT NOT NULL, PRIMARY KEY (writer_pc, vtable_address, vptr_offset) ); -- M9.5 — MSVC __CxxFrameHandler scope-table records found by -- magic-number scan in .rdata. CREATE TABLE eh_funcinfo ( address BIGINT PRIMARY KEY, magic BIGINT NOT NULL, -- 0x19930520/21/22 max_state BIGINT NOT NULL, p_unwind_map BIGINT NOT NULL, n_try_blocks BIGINT NOT NULL, p_try_block_map BIGINT NOT NULL, n_ip_map_entries BIGINT NOT NULL, p_ip_to_state_map BIGINT NOT NULL, p_es_type_list BIGINT, eh_flags BIGINT ); CREATE TABLE eh_unwind_map ( funcinfo_address BIGINT NOT NULL, -- FK to eh_funcinfo.address state_index BIGINT NOT NULL, to_state BIGINT NOT NULL, action_pc BIGINT NOT NULL, PRIMARY KEY (funcinfo_address, state_index) ); CREATE TABLE eh_try_blocks ( funcinfo_address BIGINT NOT NULL, -- FK to eh_funcinfo.address try_index BIGINT NOT NULL, try_low BIGINT NOT NULL, try_high BIGINT NOT NULL, catch_high BIGINT NOT NULL, n_catches BIGINT NOT NULL, p_handler_array BIGINT NOT NULL, PRIMARY KEY (funcinfo_address, try_index) ); CREATE TABLE demangled_names ( address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string) mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ) raw_demangled VARCHAR NOT NULL, -- LLVM-style demangled output (or mangled string on parse failure) namespace_path VARCHAR, -- e.g. xe::apu (NULL = global / parser failure) class_name VARCHAR, -- e.g. AudioSystem (NULL = free function / parser failure) method_name VARCHAR, -- e.g. Setup (NULL on parser failure) params_signature VARCHAR -- contents of the outermost (...) (NULL = not a function) ); CREATE TABLE xrefs ( source BIGINT NOT NULL, -- VA of the referencing instruction target BIGINT NOT NULL, -- VA of the referenced destination kind VARCHAR NOT NULL, -- call | ind_call | j | br | read | write | ref addr_mode VARCHAR, -- M6 sub-classification of how source computes target (NULL for control-flow) instruction VARCHAR, -- mnemonic of source instruction; NULL if not in binary source_func BIGINT, -- VA of the function containing source; NULL if unknown source_label VARCHAR, -- label at source; NULL if none target_label VARCHAR -- label at target; NULL if none ); ")?; insert_functions(&self.conn, func_analysis, labels)?; insert_pdata_entries(&self.conn, &func_analysis.pdata_entries)?; insert_labels(&self.conn, labels)?; insert_demangled_from_labels(&self.conn, labels, info.import_libraries)?; insert_vtables(&self.conn, vtables, pe, info.image_base)?; insert_methods_and_classes(&self.conn, vtables, labels)?; insert_strings(&self.conn, strings)?; insert_funcptr_arrays(&self.conn, funcptr_arrays)?; if let Some(t) = typed_ind { insert_typed_ind_dispatch(&self.conn, t)?; } insert_eh_records(&self.conn, eh_records)?; insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?; let indices = [ ("idx_functions_name", "CREATE INDEX idx_functions_name ON functions(name)"), ("idx_functions_pdata_validated", "CREATE INDEX idx_functions_pdata_validated ON functions(pdata_validated)"), ("idx_functions_has_eh", "CREATE INDEX idx_functions_has_eh ON functions(has_eh)"), ("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"), ("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"), ("idx_demangled_address", "CREATE INDEX idx_demangled_address ON demangled_names(address)"), ("idx_demangled_class", "CREATE INDEX idx_demangled_class ON demangled_names(class_name)"), ("idx_demangled_method", "CREATE INDEX idx_demangled_method ON demangled_names(method_name)"), ("idx_methods_function", "CREATE INDEX idx_methods_function ON methods(function_address)"), ("idx_classes_rtti", "CREATE INDEX idx_classes_rtti ON classes(rtti_present)"), ("idx_strings_encoding", "CREATE INDEX idx_strings_encoding ON strings(encoding)"), ("idx_xrefs_addr_mode", "CREATE INDEX idx_xrefs_addr_mode ON xrefs(addr_mode)"), ("idx_fparrays_kind", "CREATE INDEX idx_fparrays_kind ON function_pointer_arrays(kind)"), ("idx_fpentries_function", "CREATE INDEX idx_fpentries_function ON function_pointer_array_entries(function_address)"), ("idx_indcand_method", "CREATE INDEX idx_indcand_method ON indirect_dispatch_candidates(method_address)"), ("idx_indcand_vtable", "CREATE INDEX idx_indcand_vtable ON indirect_dispatch_candidates(vtable_address)"), ("idx_indsites_offset_slot", "CREATE INDEX idx_indsites_offset_slot ON indirect_dispatch_sites(vptr_offset, slot)"), ("idx_vptrw_vtable", "CREATE INDEX idx_vptrw_vtable ON vptr_writes(vtable_address)"), ("idx_vptrw_offset", "CREATE INDEX idx_vptrw_offset ON vptr_writes(vptr_offset)"), ("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"), ("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"), ("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"), ("idx_xrefs_kind", "CREATE INDEX idx_xrefs_kind ON xrefs(kind)"), ("idx_xrefs_instruction", "CREATE INDEX idx_xrefs_instruction ON xrefs(instruction)"), ("idx_xrefs_target_label", "CREATE INDEX idx_xrefs_target_label ON xrefs(target_label)"), ]; for (name, sql) in indices { tracing::debug!(index = name, "creating analysis index"); self.conn.execute_batch(sql)?; } Ok(()) } /// Back-compat wrapper for callers that want the full pre-Phase-3 /// "everything in one shot" behaviour. Equivalent to /// `ingest_instructions` + `write_analysis_results` with no M3 vtables / /// M7 strings. #[tracing::instrument(skip_all, name = "db.write_disasm")] pub fn write_disasm( &mut self, pe: &[u8], info: &DisasmInfo, func_analysis: &FuncAnalysis, labels: &HashMap, xrefs: &XrefMap, ) -> anyhow::Result<()> { self.ingest_instructions(pe, info, func_analysis, labels)?; self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[], &[], &[], None, &[])?; Ok(()) } /// M10 — write the parsed `.tls` directory + callback array. No-op /// when `tls` is `None` (binary has no `.tls` section). #[tracing::instrument(skip_all, name = "db.write_tls")] pub fn write_tls( &mut self, tls: Option<&xenia_xex::tls::TlsInfo>, ) -> anyhow::Result<()> { let Some(t) = tls else { return Ok(()); }; self.conn.execute( "INSERT INTO tls_info (raw_data_start, raw_data_end, index_address, callback_array, zero_fill_size, characteristics) VALUES (?, ?, ?, ?, ?, ?)", params![ t.raw_data_start as i64, t.raw_data_end as i64, t.index_address as i64, t.callback_array as i64, t.zero_fill_size as i64, t.characteristics as i64, ], )?; let mut stmt = self.conn.prepare( "INSERT INTO tls_callbacks (slot, address) VALUES (?, ?)" )?; for (i, cb) in t.callbacks.iter().enumerate() { stmt.execute(params![i as i64, cb.address as i64])?; } metrics::counter!("db.rows", "table" => "tls_callbacks").increment(t.callbacks.len() as u64); tracing::info!(rows = t.callbacks.len(), table = "tls_callbacks", "tls write complete"); Ok(()) } /// Phase-3 SQL-views layer — defines additive read-only views over /// `instructions` (and optionally `xrefs`/`functions`/`labels`). /// See [`crate::sql_views`] for the SQL definitions. /// /// Called when `--analyze=sql` or `--analyze=both` is in effect. #[tracing::instrument(skip_all, name = "db.create_sql_views")] pub fn create_sql_views(&mut self) -> anyhow::Result<()> { for (name, sql) in crate::sql_views::ALL_VIEWS { tracing::debug!(view = name, "creating SQL view"); self.conn.execute_batch(sql)?; } Ok(()) } /// Cross-check: count branch xrefs found by the SQL view that are absent /// from the Rust-pass `xrefs` table (and vice versa). Returns /// `(sql_only, rust_only)` row counts. Both should be zero — the two /// surfaces produce identical edges by construction. A non-zero count /// signals drift between the formatter's `mnemonic` column and /// `xref.rs`'s opcode classification, and is logged as a warning by the /// caller. #[tracing::instrument(skip_all, name = "db.cross_check_branch_xrefs")] pub fn cross_check_branch_xrefs(&self) -> anyhow::Result<(u64, u64)> { let sql_only: i64 = self.conn.query_row( "SELECT COUNT(*) FROM v_branch_xrefs vb \ LEFT JOIN xrefs x \ ON x.source = vb.source AND x.target = vb.target AND x.kind = vb.kind \ WHERE x.source IS NULL", [], |row| row.get(0) )?; let rust_only: i64 = self.conn.query_row( "SELECT COUNT(*) FROM xrefs x \ LEFT JOIN v_branch_xrefs vb \ ON vb.source = x.source AND vb.target = x.target AND vb.kind = x.kind \ WHERE x.kind IN ('call','j','br') AND vb.source IS NULL", [], |row| row.get(0) )?; Ok((sql_only as u64, rust_only as u64)) } // ── Trace layer (written by exec when flags enabled) ───────────────────── /// Create the opt-in trace tables. No-op if all flags are false. pub fn prepare_trace_tables( &mut self, trace_instructions: bool, trace_imports: bool, trace_branches: bool, ) -> anyhow::Result<()> { self.trace_instructions = trace_instructions; self.trace_imports = trace_imports; self.trace_branches = trace_branches; if trace_instructions { self.conn.execute_batch(" CREATE TABLE exec_trace ( address BIGINT NOT NULL, -- absolute VA of the instruction cycle BIGINT NOT NULL, -- monotonic instruction counter (execution order) r3 BIGINT NOT NULL, -- r3 at time of execution r4 BIGINT NOT NULL, -- r4 at time of execution lr BIGINT NOT NULL, -- link register sp BIGINT NOT NULL -- stack pointer ); ")?; } if trace_imports { self.conn.execute_batch(" CREATE TABLE import_calls ( address BIGINT NOT NULL, -- VA of the import thunk cycle BIGINT NOT NULL, -- instruction counter at interception module VARCHAR NOT NULL, -- library name (e.g. xboxkrnl.exe) ordinal BIGINT NOT NULL, -- ordinal within the module name VARCHAR NOT NULL, -- resolved symbol name arg_r3 BIGINT NOT NULL, -- first argument (r3) arg_r4 BIGINT NOT NULL, -- second argument (r4) arg_r5 BIGINT NOT NULL, -- third argument (r5) arg_r6 BIGINT NOT NULL, -- fourth argument (r6) return_value BIGINT NOT NULL -- r3 after the call returns ); ")?; } if trace_branches { self.conn.execute_batch(" CREATE TABLE branch_trace ( cycle BIGINT NOT NULL, -- instruction counter source BIGINT NOT NULL, -- VA of the branch instruction target BIGINT NOT NULL, -- VA of the branch destination kind VARCHAR NOT NULL, -- call | return | jump | branch lr BIGINT NOT NULL -- link register at time of branch ); ")?; } Ok(()) } pub fn log_instruction(&mut self, entry: ExecTraceEntry) { if !self.trace_instructions { return; } self.exec_buffer.push(entry); if self.exec_buffer.len() as u64 >= batch_size() { self.flush_exec(); } } pub fn log_import_call(&mut self, entry: ImportCallEntry) { if !self.trace_imports { return; } self.import_buffer.push(entry); if self.import_buffer.len() >= 1000 { self.flush_imports(); } } pub fn log_branch(&mut self, entry: BranchTraceEntry) { if !self.trace_branches { return; } self.branch_buffer.push(entry); if self.branch_buffer.len() as u64 >= batch_size() { self.flush_branches(); } } fn flush_exec(&mut self) { if self.exec_buffer.is_empty() { return; } let mut appender = self.conn.appender("exec_trace").unwrap(); for e in &self.exec_buffer { appender.append_row(params![ e.address as i64, e.cycle as i64, e.r3 as i64, e.r4 as i64, e.lr as i64, e.sp as i64, ]).ok(); } appender.flush().ok(); self.exec_count += self.exec_buffer.len() as u64; self.exec_buffer.clear(); } fn flush_imports(&mut self) { if self.import_buffer.is_empty() { return; } let mut appender = self.conn.appender("import_calls").unwrap(); for e in &self.import_buffer { appender.append_row(params![ e.address as i64, e.cycle as i64, e.module.as_str(), e.ordinal as i64, e.name.as_str(), e.arg_r3 as i64, e.arg_r4 as i64, e.arg_r5 as i64, e.arg_r6 as i64, e.return_value as i64, ]).ok(); } appender.flush().ok(); self.import_count += self.import_buffer.len() as u64; self.import_buffer.clear(); } fn flush_branches(&mut self) { if self.branch_buffer.is_empty() { return; } let mut appender = self.conn.appender("branch_trace").unwrap(); for e in &self.branch_buffer { appender.append_row(params![ e.cycle as i64, e.source as i64, e.target as i64, e.kind, e.lr as i64, ]).ok(); } appender.flush().ok(); self.branch_count += self.branch_buffer.len() as u64; self.branch_buffer.clear(); } /// Flush remaining trace buffers and create their indices. #[tracing::instrument(skip_all, name = "db.finalize_traces")] pub fn finalize_traces(&mut self) -> anyhow::Result<()> { self.flush_exec(); self.flush_imports(); self.flush_branches(); if self.trace_instructions { tracing::debug!("creating idx_exec_trace_address"); self.conn.execute_batch("CREATE INDEX idx_exec_trace_address ON exec_trace(address);")?; tracing::debug!("creating idx_exec_trace_cycle"); self.conn.execute_batch("CREATE INDEX idx_exec_trace_cycle ON exec_trace(cycle);")?; } if self.trace_imports { tracing::debug!("creating idx_import_calls_name"); self.conn.execute_batch("CREATE INDEX idx_import_calls_name ON import_calls(name);")?; tracing::debug!("creating idx_import_calls_cycle"); self.conn.execute_batch("CREATE INDEX idx_import_calls_cycle ON import_calls(cycle);")?; } if self.trace_branches { tracing::debug!("creating idx_branch_trace_source"); self.conn.execute_batch("CREATE INDEX idx_branch_trace_source ON branch_trace(source);")?; tracing::debug!("creating idx_branch_trace_target"); self.conn.execute_batch("CREATE INDEX idx_branch_trace_target ON branch_trace(target);")?; tracing::debug!("creating idx_branch_trace_kind"); self.conn.execute_batch("CREATE INDEX idx_branch_trace_kind ON branch_trace(kind);")?; tracing::debug!("creating idx_branch_trace_cycle"); self.conn.execute_batch("CREATE INDEX idx_branch_trace_cycle ON branch_trace(cycle);")?; } metrics::counter!("db.rows", "table" => "exec_trace").increment(self.exec_count); metrics::counter!("db.rows", "table" => "import_calls").increment(self.import_count); metrics::counter!("db.rows", "table" => "branch_trace").increment(self.branch_count); tracing::info!( instructions = self.exec_count, imports = self.import_count, branches = self.branch_count, "trace totals" ); Ok(()) } } /// Backwards-compatible wrapper that writes the full base + disasm layers. pub fn write_db( path: &Path, pe: &[u8], info: &DisasmInfo, func_analysis: &FuncAnalysis, labels: &HashMap, _import_map: &HashMap, xrefs: &XrefMap, ) -> anyhow::Result<()> { let mut w = DbWriter::open_fresh(path)?; w.write_base(info)?; w.write_disasm(pe, info, func_analysis, labels, xrefs)?; Ok(()) } // ── Helpers ──────────────────────────────────────────────────────────────── fn insert_metadata(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> { let mut stmt = conn.prepare("INSERT INTO metadata (key, value) VALUES (?, ?)")?; stmt.execute(params!["image_base", format!("0x{:08X}", info.image_base)])?; stmt.execute(params!["entry_point", format!("0x{:08X}", info.entry_point)])?; if let Some(name) = info.original_pe_name { stmt.execute(params!["original_pe_name", name])?; } if let Some(title_id) = info.title_id { stmt.execute(params!["title_id", format!("0x{:08X}", title_id)])?; } if let Some(media_id) = info.media_id { stmt.execute(params!["media_id", format!("0x{:08X}", media_id)])?; } Ok(()) } fn insert_sections(conn: &Connection, sections: &[xenia_xex::pe::PeSection]) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO sections (name, virtual_address, virtual_size, raw_offset, raw_size, flags, is_code) VALUES (?, ?, ?, ?, ?, ?, ?)" )?; for s in sections { stmt.execute(params![ s.name, s.virtual_address as i64, s.virtual_size as i64, s.raw_offset as i64, s.raw_size as i64, s.flags as i64, s.is_code(), ])?; } Ok(()) } fn insert_imports(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO imports (library, ordinal, name, record_type, address) VALUES (?, ?, ?, ?, ?)" )?; for lib in info.import_libraries { for imp in &lib.imports { let resolved = crate::resolve_ordinal(&lib.name, imp.ordinal); stmt.execute(params![ lib.name, imp.ordinal as i64, resolved, imp.record_type as i64, imp.address as i64, ])?; } } Ok(()) } fn insert_functions( conn: &Connection, func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO functions (address, name, end_address, frame_size, saved_gprs, is_leaf, is_saverestore, pdata_validated, pdata_length, has_eh) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" )?; for (&addr, fi) in &func_analysis.functions { let name = labels.get(&addr) .cloned() .unwrap_or_else(|| format!("sub_{addr:08X}")); stmt.execute(params![ addr as i64, name, fi.end as i64, fi.frame_size as i64, fi.saved_gprs as i64, fi.is_leaf, fi.is_saverestore, fi.pdata_validated, fi.pdata_length.map(|n| n as i64), fi.has_eh, ])?; } Ok(()) } fn insert_vtables( conn: &Connection, vtables: &[crate::vtables::Vtable], _pe: &[u8], _image_base: u32, ) -> anyhow::Result<()> { if vtables.is_empty() { return Ok(()); } let mut stmt = conn.prepare( "INSERT INTO vtables (address, length, col_address, class_name, rtti_present, base_classes_json) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT DO NOTHING" )?; let mut count = 0u64; for v in vtables { stmt.execute(params![ v.address as i64, v.length as i64, v.col_address.map(|a| a as i64), v.class_name.as_str(), v.rtti_present, v.base_classes_json.as_deref(), ])?; count += 1; } metrics::counter!("db.rows", "table" => "vtables").increment(count); tracing::info!(rows = count, table = "vtables", "bulk insert complete"); Ok(()) } fn insert_methods_and_classes( conn: &Connection, vtables: &[crate::vtables::Vtable], labels: &HashMap, ) -> anyhow::Result<()> { if vtables.is_empty() { return Ok(()); } // methods rows let methods = crate::vtables::methods_table(vtables, labels); if !methods.is_empty() { let mut stmt = conn.prepare( "INSERT INTO methods (vtable_address, slot, function_address, mangled_name, demangled_name) VALUES (?, ?, ?, ?, ?) ON CONFLICT DO NOTHING" )?; for (vt_addr, slot, fn_addr, mangled, demangled) in &methods { stmt.execute(params![ *vt_addr as i64, *slot as i64, *fn_addr as i64, mangled.as_deref(), demangled.as_deref(), ])?; } metrics::counter!("db.rows", "table" => "methods").increment(methods.len() as u64); tracing::info!(rows = methods.len(), table = "methods", "bulk insert complete"); } // classes rows (deduped by class_name, first-detected wins) let classes = crate::vtables::classes_table(vtables); if !classes.is_empty() { let mut stmt = conn.prepare( "INSERT INTO classes (name, vtable_address, rtti_present, base_classes_json) VALUES (?, ?, ?, ?) ON CONFLICT DO NOTHING" )?; for (name, vt_addr, rtti, bases) in &classes { stmt.execute(params![ name.as_str(), *vt_addr as i64, *rtti, bases.as_deref(), ])?; } metrics::counter!("db.rows", "table" => "classes").increment(classes.len() as u64); tracing::info!(rows = classes.len(), table = "classes", "bulk insert complete"); } Ok(()) } fn insert_strings( conn: &Connection, strings: &[crate::strings::DetectedString], ) -> anyhow::Result<()> { if strings.is_empty() { return Ok(()); } let mut stmt = conn.prepare( "INSERT INTO strings (address, encoding, length, content) VALUES (?, ?, ?, ?) ON CONFLICT DO NOTHING" )?; let mut count = 0u64; for s in strings { stmt.execute(params![ s.address as i64, s.encoding, s.length as i64, s.content.as_str(), ])?; count += 1; } metrics::counter!("db.rows", "table" => "strings").increment(count); tracing::info!(rows = count, table = "strings", "bulk insert complete"); Ok(()) } fn insert_eh_records( conn: &Connection, records: &[crate::eh_scope::EhFuncInfo], ) -> anyhow::Result<()> { if records.is_empty() { return Ok(()); } let mut stmt_fi = conn.prepare( "INSERT INTO eh_funcinfo (address, magic, max_state, p_unwind_map, n_try_blocks, p_try_block_map, n_ip_map_entries, p_ip_to_state_map, p_es_type_list, eh_flags) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT DO NOTHING" )?; let mut stmt_unwind = conn.prepare( "INSERT INTO eh_unwind_map (funcinfo_address, state_index, to_state, action_pc) VALUES (?, ?, ?, ?) ON CONFLICT DO NOTHING" )?; let mut stmt_try = conn.prepare( "INSERT INTO eh_try_blocks (funcinfo_address, try_index, try_low, try_high, catch_high, n_catches, p_handler_array) VALUES (?, ?, ?, ?, ?, ?, ?) ON CONFLICT DO NOTHING" )?; let mut n_fi = 0u64; let mut n_unwind = 0u64; let mut n_try = 0u64; for r in records { stmt_fi.execute(params![ r.address as i64, r.magic as i64, r.max_state as i64, r.p_unwind_map as i64, r.n_try_blocks as i64, r.p_try_block_map as i64, r.n_ip_map_entries as i64, r.p_ip_to_state_map as i64, r.p_es_type_list.map(|p| p as i64), r.eh_flags.map(|f| f as i64), ])?; n_fi += 1; for (i, e) in r.unwind_map.iter().enumerate() { stmt_unwind.execute(params![ r.address as i64, i as i64, e.to_state as i64, e.action_pc as i64, ])?; n_unwind += 1; } for (i, t) in r.try_blocks.iter().enumerate() { stmt_try.execute(params![ r.address as i64, i as i64, t.try_low as i64, t.try_high as i64, t.catch_high as i64, t.n_catches as i64, t.p_handler_array as i64, ])?; n_try += 1; } } metrics::counter!("db.rows", "table" => "eh_funcinfo").increment(n_fi); metrics::counter!("db.rows", "table" => "eh_unwind_map").increment(n_unwind); metrics::counter!("db.rows", "table" => "eh_try_blocks").increment(n_try); tracing::info!( funcinfo = n_fi, unwind = n_unwind, try_blocks = n_try, "EH scope-table insert complete" ); Ok(()) } fn insert_typed_ind_dispatch( conn: &Connection, t: &crate::ind_dispatch_typed::TypedIndirectResult, ) -> anyhow::Result<()> { if !t.dispatches.is_empty() { let mut stmt_site = conn.prepare( "INSERT INTO indirect_dispatch_sites (dispatch_pc, vptr_offset, slot, candidate_count) VALUES (?, ?, ?, ?) ON CONFLICT DO NOTHING" )?; let mut stmt_cand = conn.prepare( "INSERT INTO indirect_dispatch_candidates (dispatch_pc, vtable_address, method_address) VALUES (?, ?, ?) ON CONFLICT DO NOTHING" )?; let mut n_sites = 0u64; let mut n_cand = 0u64; for d in &t.dispatches { stmt_site.execute(params![ d.dispatch_pc as i64, d.vptr_offset as i64, d.slot as i64, d.candidate_vtables.len() as i64, ])?; n_sites += 1; for (vt, m) in d.candidate_vtables.iter().zip(d.method_pcs.iter()) { stmt_cand.execute(params![ d.dispatch_pc as i64, *vt as i64, *m as i64, ])?; n_cand += 1; } } metrics::counter!("db.rows", "table" => "indirect_dispatch_sites").increment(n_sites); metrics::counter!("db.rows", "table" => "indirect_dispatch_candidates").increment(n_cand); tracing::info!(sites = n_sites, candidates = n_cand, "typed indirect-dispatch insert complete"); } if !t.vptr_writes.is_empty() { let mut stmt = conn.prepare( "INSERT INTO vptr_writes (writer_pc, vtable_address, vptr_offset, writer_function) VALUES (?, ?, ?, ?) ON CONFLICT DO NOTHING" )?; let mut n = 0u64; for w in &t.vptr_writes { stmt.execute(params![ w.writer_pc as i64, w.vtable_addr as i64, w.vptr_offset as i64, w.writer_function as i64, ])?; n += 1; } metrics::counter!("db.rows", "table" => "vptr_writes").increment(n); tracing::info!(rows = n, "vptr_writes insert complete"); } Ok(()) } fn insert_funcptr_arrays( conn: &Connection, arrays: &[crate::funcptr_arrays::FuncPtrArray], ) -> anyhow::Result<()> { if arrays.is_empty() { return Ok(()); } let mut stmt_arr = conn.prepare( "INSERT INTO function_pointer_arrays (address, length, kind) VALUES (?, ?, ?) ON CONFLICT DO NOTHING" )?; let mut stmt_ent = conn.prepare( "INSERT INTO function_pointer_array_entries (array_address, slot, function_address) VALUES (?, ?, ?) ON CONFLICT DO NOTHING" )?; let mut n_arr = 0u64; let mut n_ent = 0u64; for a in arrays { let inserted = stmt_arr.execute(params![ a.address as i64, a.length as i64, a.kind, ])?; if inserted > 0 { n_arr += 1; } for (i, &fn_va) in a.entries.iter().enumerate() { stmt_ent.execute(params![a.address as i64, i as i64, fn_va as i64])?; n_ent += 1; } } metrics::counter!("db.rows", "table" => "function_pointer_arrays").increment(n_arr); metrics::counter!("db.rows", "table" => "function_pointer_array_entries").increment(n_ent); tracing::info!(arrays = n_arr, entries = n_ent, "function-pointer arrays insert complete"); Ok(()) } fn insert_demangled_from_labels( conn: &Connection, labels: &HashMap, import_libraries: &[xenia_xex::header::ImportLibrary], ) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO demangled_names (address, mangled, raw_demangled, namespace_path, class_name, method_name, params_signature) VALUES (?, ?, ?, ?, ?, ?, ?)" )?; let mut count = 0u64; for (&addr, name) in labels { // The label table holds raw symbol names (`?...@...`). Imports come // wrapped as `__imp__`; strip the `__imp__` prefix to // recover any mangled inner name (rare for kernel imports but // defensive). For now, skip imports entirely — they're handled below // via `import_libraries`. if name.starts_with("__imp_") { continue; } if let Some(d) = crate::demangle::demangle(name) { stmt.execute(params![ addr as i64, d.mangled, d.raw_demangled, d.namespace_path, d.class_name, d.method_name, d.params_signature, ])?; count += 1; } } // Defensive: also demangle any import name that happens to be mangled. for lib in import_libraries { for imp in &lib.imports { let resolved = crate::resolve_ordinal(&lib.name, imp.ordinal); if let Some(name) = resolved && let Some(d) = crate::demangle::demangle(name) { stmt.execute(params![ imp.address as i64, d.mangled, d.raw_demangled, d.namespace_path, d.class_name, d.method_name, d.params_signature, ])?; count += 1; } } } metrics::counter!("db.rows", "table" => "demangled_names").increment(count); tracing::info!(rows = count, table = "demangled_names", "demangler complete"); Ok(()) } fn insert_pdata_entries( conn: &Connection, entries: &[xenia_xex::pdata::PdataEntry], ) -> anyhow::Result<()> { if entries.is_empty() { return Ok(()); } let mut stmt = conn.prepare( "INSERT INTO pdata_entries (begin_address, end_address, function_length, prolog_length, flags) VALUES (?, ?, ?, ?, ?) ON CONFLICT DO NOTHING" )?; for e in entries { stmt.execute(params![ e.begin_address as i64, e.end_address() as i64, e.function_length as i64, e.prolog_length as i64, e.flags as i64, ])?; } Ok(()) } fn insert_labels( conn: &Connection, labels: &HashMap, ) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO labels (address, name, kind) VALUES (?, ?, ?) ON CONFLICT DO NOTHING" )?; for (&addr, name) in labels { let kind = if name.starts_with("sub_") || name == "entry_point" { "function" } else if name.starts_with("__imp_") { "import" } else if name.starts_with("__savegprlr_") || name.starts_with("__restgprlr_") { "saverestore" } else if name.starts_with("loc_") { "local" } else if name.starts_with("dat_") { "data" } else { "other" }; stmt.execute(params![addr as i64, name, kind])?; } Ok(()) } fn insert_instructions_streaming( conn: &Connection, pe: &[u8], info: &DisasmInfo, func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { let mut appender = conn.appender("instructions")?; let mut total: u64 = 0; for section in info.sections { if !section.is_code() { continue; } let va_start = info.image_base + section.virtual_address; let va_end = info.image_base + section.virtual_address + section.virtual_size; let items = crate::disasm::enrich_section( pe, info.image_base, §ion.name, va_start, va_end, func_analysis, labels, ); total += crate::sinks::duckdb::append_instructions(&mut appender, items)?; } appender.flush()?; metrics::counter!("db.rows", "table" => "instructions").increment(total); tracing::info!(rows = total, table = "instructions", "bulk insert complete"); Ok(()) } fn insert_xrefs_streaming( conn: &Connection, xrefs: &XrefMap, pe: &[u8], image_base: u32, func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { let mut appender = conn.appender("xrefs")?; let mut count: u64 = 0; for (&target, refs) in xrefs { let target_label = labels.get(&target).map(|s| s.as_str()); for xref in refs { let kind = xref.kind.db_tag(); let instruction: Option = { let off = xref.source.wrapping_sub(image_base) as usize; if off + 4 <= pe.len() { let raw = u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]]); let d = xenia_cpu::decode(raw, xref.source); let t = xenia_cpu::disasm::format(&d); // Prefer the simplified mnemonic when present (matches what // a human reading the .asm file sees for that line). Some(t.ext_mnemonic.unwrap_or(t.mnemonic)) } else { None } }; let source_func = func_analysis.functions .range(..=xref.source) .next_back() .map(|(&a, _)| a as i64); let source_label = resolve_source_label( xref.source, func_analysis, labels, ); let addr_mode = xref.addr_mode.map(|m| m.tag()); appender.append_row(params![ xref.source as i64, target as i64, kind, addr_mode, instruction.as_deref(), source_func, source_label.as_str(), target_label, ])?; count += 1; } } appender.flush()?; metrics::counter!("db.rows", "table" => "xrefs").increment(count); tracing::info!(rows = count, table = "xrefs", "bulk insert complete"); Ok(()) }