//! DuckDB writer for xenia-rs. //! //! Layered, streaming writes shared by `extract`, `dis`, and `exec`. //! Each command's output is a superset of the previous: //! - `extract --db` -> base tables (metadata, sections, imports) //! - `dis --db` -> base + disasm tables (functions, labels, instructions, xrefs) //! - `exec --db` -> base + disasm + opt-in trace tables (exec_trace, import_calls, branch_trace) //! //! Bulk inserts use the DuckDB Appender API, which bypasses the SQL layer and //! writes directly to columnar storage — no transaction batching required. //! //! Trace kind values for `branch_trace.kind`: //! - `"call"` : any branch with LK set (raw & 1 == 1) //! - `"return"` : bclrx without LK //! - `"jump"` : bcctrx without LK //! - `"branch"` : bx/bcx without LK //! //! # Schema //! //! ## `metadata` //! Key-value table. One row per XEX header field. Values are strings. //! //! | key | value format | meaning | //! |--------------------|------------------|----------------------------------------------------| //! | `image_base` | `"0xXXXXXXXX"` | Virtual address where the PE image is mapped | //! | `entry_point` | `"0xXXXXXXXX"` | Absolute VA of the XEX entry point | //! | `original_pe_name` | string | Original PE filename from XEX optional headers | //! | `title_id` | `"0xXXXXXXXX"` | Xbox 360 Title ID (identifies the game) | //! | `media_id` | `"0xXXXXXXXX"` | Disc/media ID (identifies the specific disc build) | //! //! ## `sections` //! One row per PE section (`.text`, `.data`, etc.). //! - `name` — PE section name //! - `virtual_address` — RVA relative to `image_base` where the section is mapped in memory //! - `virtual_size` — Size in memory; may exceed `raw_size` due to BSS zero-fill //! - `raw_offset` — Byte offset of section data within the XEX/PE file //! - `raw_size` — Size of section data on disk //! - `flags` — `IMAGE_SCN_*` characteristics bit field //! - `is_code` — `true` if `IMAGE_SCN_CNT_CODE` is set //! //! ## `imports` //! One row per import record from the XEX import descriptor table. //! - `library` — Module name (e.g. `xboxkrnl.exe`, `xam.xex`) //! - `ordinal` — Numeric ordinal identifying the export within the library //! - `name` — Resolved human-readable symbol name; `NULL` if not in symbol table //! - `record_type` — XEX import record type: `0` = function thunk, `1` = variable //! - `address` — Absolute VA of the import thunk or variable in the binary //! //! ## `functions` //! One row per detected function (from prologue analysis). //! - `address` — Absolute VA of the function entry point (PK) //! - `name` — Symbol name, or `sub_XXXXXXXX` if unresolved //! - `end_address` — Absolute VA of last instruction + 4 (exclusive end) //! - `frame_size` — Stack frame size in bytes (from prologue) //! - `saved_gprs` — Bitmask of GPRs saved in prologue (bit N set ⇒ rN is saved) //! - `is_leaf` — `true` if the function has no outgoing calls (no `bl`/`blr`) //! - `is_saverestore` — `true` if this is a `__savegprlr_*`/`__restgprlr_*` compiler stub //! //! ## `labels` //! One row per named address; superset of functions. //! - `address` — Absolute VA (PK) //! - `name` — Symbol name //! - `kind` — One of: `function`, `import`, `saverestore`, `local`, `data`, `other` //! //! ## `instructions` //! One row per disassembled instruction. //! - `address` — Absolute VA (PK) //! - `raw` — 4-byte big-endian instruction word as integer //! - `mnemonic` — Base mnemonic (e.g. `stw`, `bl`, `cmpwi`) //! - `operands` — Operand string from base disassembly //! - `disasm` — Full base disassembly string (`mnemonic + " " + operands`) //! - `ext_mnemonic` — Simplified mnemonic (e.g. `mr` for `or rX,rY,rY`); `NULL` if none //! - `ext_operands` — Operands for the extended form; `NULL` if none //! - `ext_disasm` — Full extended disassembly string; `NULL` if none //! - `target_hex` — Resolved absolute branch target for `b`/`bc` (and link/AA variants); `NULL` for indirect or non-branch instructions. SQL views (`v_branch_xrefs`) self-join on this column. //! - `section` — Name of the PE section containing this instruction //! - `function` — VA of the enclosing function; `NULL` if not inside a detected function //! - `label` — Label name at this address; `NULL` if none //! //! ## `xrefs` //! One row per cross-reference edge (call, jump, data access). //! - `source` — Absolute VA of the instruction making the reference //! - `target` — Absolute VA of the referenced destination //! - `kind` — Reference type as the short tag from [`crate::xref::XrefKind::tag`]: //! `call`, `j` (jump), `br` (branch), `read` (data_read), //! `write` (data_write), `ref` (data_ref). //! Note: this is a different convention from `branch_trace.kind`, //! which uses the long names (`call` / `return` / `jump` / `branch`). //! - `instruction` — Mnemonic of the source instruction; `NULL` if address is not in binary //! - `source_func` — VA of the function containing `source`; `NULL` if unknown //! - `source_label` — Label at `source`; `NULL` if none //! - `target_label` — Label at `target`; `NULL` if none //! //! ## `exec_trace` *(opt-in: `--trace-instructions`)* //! One row per executed instruction. //! - `address` — Absolute VA of the instruction //! - `cycle` — Monotonic instruction counter (execution order) //! - `r3`, `r4`, `lr`, `sp` — Snapshot of key GPRs at time of execution //! //! ## `import_calls` *(opt-in: `--trace-imports`)* //! One row per intercepted kernel/import call. //! - `address` — VA of the import thunk //! - `cycle` — Instruction counter at point of interception //! - `module` — Library name (e.g. `xboxkrnl.exe`) //! - `ordinal` — Numeric ordinal within the module //! - `name` — Resolved symbol name //! - `arg_r3`–`arg_r6` — First four call arguments (PowerPC ABI: r3–r6) //! - `return_value` — Value in r3 after the call returns //! //! ## `branch_trace` *(opt-in: `--trace-branches`)* //! One row per taken branch. //! - `cycle` — Instruction counter //! - `source` — VA of the branch instruction //! - `target` — VA of the branch destination //! - `kind` — `call`, `return`, `jump`, or `branch` (see top-level doc) //! - `lr` — Link register value at time of branch use std::collections::HashMap; use std::path::Path; use duckdb::{Connection, params}; use crate::func::FuncAnalysis; use crate::xref::{XrefMap, resolve_source_label}; use crate::formatter::DisasmInfo; const DEFAULT_BATCH_SIZE: u64 = 100_000; /// Rows per trace buffer flush. Configurable via `XENIA_DB_BATCH_SIZE` env var (default 100_000). /// Applies to `exec_trace` and `branch_trace` buffer thresholds. /// `import_calls` always flushes at 1000 — low volume, not worth scaling. fn batch_size() -> u64 { use std::sync::OnceLock; static CACHED: OnceLock = OnceLock::new(); *CACHED.get_or_init(|| { std::env::var("XENIA_DB_BATCH_SIZE") .ok() .and_then(|s| s.parse::().ok()) .filter(|&n| n > 0) .unwrap_or(DEFAULT_BATCH_SIZE) }) } pub struct ExecTraceEntry { pub address: u32, pub cycle: u64, pub r3: u64, pub r4: u64, pub lr: u64, pub sp: u64, } pub struct ImportCallEntry { pub address: u32, pub cycle: u64, pub module: String, pub ordinal: u16, pub name: String, pub arg_r3: u64, pub arg_r4: u64, pub arg_r5: u64, pub arg_r6: u64, pub return_value: u64, } pub struct BranchTraceEntry { pub source: u32, pub target: u32, pub cycle: u64, pub kind: &'static str, pub lr: u64, } pub struct DbWriter { conn: Connection, exec_buffer: Vec, import_buffer: Vec, branch_buffer: Vec, exec_count: u64, import_count: u64, branch_count: u64, trace_instructions: bool, trace_imports: bool, trace_branches: bool, } impl DbWriter { /// Open a fresh database at `path`, removing any existing file first. pub fn open_fresh(path: &Path) -> anyhow::Result { if path.exists() { std::fs::remove_file(path)?; } let conn = Connection::open(path)?; let cap = batch_size() as usize; Ok(Self { conn, exec_buffer: Vec::with_capacity(cap), import_buffer: Vec::with_capacity(1024), branch_buffer: Vec::with_capacity(cap), exec_count: 0, import_count: 0, branch_count: 0, trace_instructions: false, trace_imports: false, trace_branches: false, }) } // ── Base layer (written by extract/dis/exec) ───────────────────────────── /// Write metadata, sections, imports tables and their indices. #[tracing::instrument(skip_all, name = "db.write_base")] pub fn write_base(&mut self, info: &DisasmInfo) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE metadata ( key VARCHAR PRIMARY KEY, -- header field name value VARCHAR NOT NULL -- hex-formatted or plain string value ); CREATE TABLE sections ( name VARCHAR NOT NULL, -- PE section name (e.g. .text, .rdata) virtual_address BIGINT NOT NULL, -- RVA relative to image_base virtual_size BIGINT NOT NULL, -- size in memory; may exceed raw_size (BSS) raw_offset BIGINT NOT NULL, -- byte offset of section data in the file raw_size BIGINT NOT NULL, -- size of section data on disk flags BIGINT NOT NULL, -- IMAGE_SCN_* characteristics bit field is_code BOOLEAN NOT NULL -- true if IMAGE_SCN_CNT_CODE is set ); CREATE TABLE imports ( library VARCHAR NOT NULL, -- module name (e.g. xboxkrnl.exe, xam.xex) ordinal BIGINT NOT NULL, -- ordinal identifying the export within the library name VARCHAR, -- resolved symbol name; NULL if not in symbol table record_type BIGINT NOT NULL, -- 0 = function thunk, 1 = variable address BIGINT NOT NULL -- absolute VA of the thunk or variable ); ")?; insert_metadata(&self.conn, info)?; insert_sections(&self.conn, info.sections)?; insert_imports(&self.conn, info)?; self.conn.execute_batch(" CREATE INDEX idx_imports_library ON imports(library); CREATE INDEX idx_imports_name ON imports(name); ")?; Ok(()) } // ── Disasm layer (written by dis/exec) ─────────────────────────────────── /// Phase-3 ingest pass — purely mechanical disasm rows. Creates the /// `instructions` table (and its indices) and streams every code-section /// instruction through the iterator + DuckDB sink. Does NOT touch /// `functions` / `labels` / `xrefs` — that's [`Self::write_analysis_results`]. /// /// `func_analysis` and `labels` are still required at this layer because /// each row carries the rolling-window `function` and `label` columns for /// downstream queries. #[tracing::instrument(skip_all, name = "db.ingest_instructions")] pub fn ingest_instructions( &mut self, pe: &[u8], info: &DisasmInfo, func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE instructions ( address BIGINT PRIMARY KEY, -- absolute VA raw BIGINT NOT NULL, -- 4-byte big-endian instruction word as integer mnemonic VARCHAR NOT NULL, -- base mnemonic (e.g. stw, bl, cmpwi) operands VARCHAR NOT NULL, -- operand string from base disassembly disasm VARCHAR NOT NULL, -- full base disassembly (mnemonic + operands) ext_mnemonic VARCHAR, -- simplified mnemonic (e.g. mr); NULL if none ext_operands VARCHAR, -- operands for the extended form; NULL if none ext_disasm VARCHAR, -- full extended disassembly string; NULL if none target_hex BIGINT, -- resolved absolute target for direct branches; NULL for indirect/non-branch section VARCHAR NOT NULL, -- PE section name containing this instruction function BIGINT, -- VA of the enclosing function; NULL if unknown label VARCHAR -- label at this address; NULL if none ); ")?; insert_instructions_streaming(&self.conn, pe, info, func_analysis, labels)?; let indices = [ ("idx_instructions_function", "CREATE INDEX idx_instructions_function ON instructions(function)"), ("idx_instructions_mnemonic", "CREATE INDEX idx_instructions_mnemonic ON instructions(mnemonic)"), ("idx_instructions_ext_mnemonic", "CREATE INDEX idx_instructions_ext_mnemonic ON instructions(ext_mnemonic)"), ("idx_instructions_section", "CREATE INDEX idx_instructions_section ON instructions(section)"), ("idx_instructions_label", "CREATE INDEX idx_instructions_label ON instructions(label)"), ("idx_instructions_target_hex", "CREATE INDEX idx_instructions_target_hex ON instructions(target_hex)"), ]; for (name, sql) in indices { tracing::debug!(index = name, "creating instructions index"); self.conn.execute_batch(sql)?; } Ok(()) } /// Phase-3 analyze pass — writes the Rust-pass-derived tables /// (`functions`, `labels`, `xrefs`) and their indices. Always executes /// in `--analyze=rust` and `--analyze=both` modes; skipped only when /// the caller deliberately chooses a Rust-free DB layout. #[tracing::instrument(skip_all, name = "db.write_analysis_results")] pub fn write_analysis_results( &mut self, pe: &[u8], info: &DisasmInfo, func_analysis: &FuncAnalysis, labels: &HashMap, xrefs: &XrefMap, ) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE functions ( address BIGINT PRIMARY KEY, -- absolute VA of entry point name VARCHAR NOT NULL, -- symbol name, or sub_XXXXXXXX if unresolved end_address BIGINT NOT NULL, -- VA of last instruction + 4 (exclusive end) frame_size BIGINT NOT NULL, -- stack frame size in bytes (from prologue) saved_gprs BIGINT NOT NULL, -- bitmask of GPRs saved in prologue (bit N = rN) is_leaf BOOLEAN NOT NULL, -- true if the function has no outgoing calls is_saverestore BOOLEAN NOT NULL -- true if __savegprlr_* / __restgprlr_* stub ); CREATE TABLE labels ( address BIGINT PRIMARY KEY, -- absolute VA name VARCHAR NOT NULL, -- symbol name kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other ); CREATE TABLE xrefs ( source BIGINT NOT NULL, -- VA of the referencing instruction target BIGINT NOT NULL, -- VA of the referenced destination kind VARCHAR NOT NULL, -- call | jump | branch | data_read | data_write | data_ref instruction VARCHAR, -- mnemonic of source instruction; NULL if not in binary source_func BIGINT, -- VA of the function containing source; NULL if unknown source_label VARCHAR, -- label at source; NULL if none target_label VARCHAR -- label at target; NULL if none ); ")?; insert_functions(&self.conn, func_analysis, labels)?; insert_labels(&self.conn, labels)?; insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?; let indices = [ ("idx_functions_name", "CREATE INDEX idx_functions_name ON functions(name)"), ("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"), ("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"), ("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"), ("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"), ("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"), ("idx_xrefs_kind", "CREATE INDEX idx_xrefs_kind ON xrefs(kind)"), ("idx_xrefs_instruction", "CREATE INDEX idx_xrefs_instruction ON xrefs(instruction)"), ("idx_xrefs_target_label", "CREATE INDEX idx_xrefs_target_label ON xrefs(target_label)"), ]; for (name, sql) in indices { tracing::debug!(index = name, "creating analysis index"); self.conn.execute_batch(sql)?; } Ok(()) } /// Back-compat wrapper for callers that want the full pre-Phase-3 /// "everything in one shot" behaviour. Equivalent to /// `ingest_instructions` + `write_analysis_results`. #[tracing::instrument(skip_all, name = "db.write_disasm")] pub fn write_disasm( &mut self, pe: &[u8], info: &DisasmInfo, func_analysis: &FuncAnalysis, labels: &HashMap, xrefs: &XrefMap, ) -> anyhow::Result<()> { self.ingest_instructions(pe, info, func_analysis, labels)?; self.write_analysis_results(pe, info, func_analysis, labels, xrefs)?; Ok(()) } /// Phase-3 SQL-views layer — defines additive read-only views over /// `instructions` (and optionally `xrefs`/`functions`/`labels`). /// See [`crate::sql_views`] for the SQL definitions. /// /// Called when `--analyze=sql` or `--analyze=both` is in effect. #[tracing::instrument(skip_all, name = "db.create_sql_views")] pub fn create_sql_views(&mut self) -> anyhow::Result<()> { for (name, sql) in crate::sql_views::ALL_VIEWS { tracing::debug!(view = name, "creating SQL view"); self.conn.execute_batch(sql)?; } Ok(()) } /// Cross-check: count branch xrefs found by the SQL view that are absent /// from the Rust-pass `xrefs` table (and vice versa). Returns /// `(sql_only, rust_only)` row counts. Both should be zero — the two /// surfaces produce identical edges by construction. A non-zero count /// signals drift between the formatter's `mnemonic` column and /// `xref.rs`'s opcode classification, and is logged as a warning by the /// caller. #[tracing::instrument(skip_all, name = "db.cross_check_branch_xrefs")] pub fn cross_check_branch_xrefs(&self) -> anyhow::Result<(u64, u64)> { let sql_only: i64 = self.conn.query_row( "SELECT COUNT(*) FROM v_branch_xrefs vb \ LEFT JOIN xrefs x \ ON x.source = vb.source AND x.target = vb.target AND x.kind = vb.kind \ WHERE x.source IS NULL", [], |row| row.get(0) )?; let rust_only: i64 = self.conn.query_row( "SELECT COUNT(*) FROM xrefs x \ LEFT JOIN v_branch_xrefs vb \ ON vb.source = x.source AND vb.target = x.target AND vb.kind = x.kind \ WHERE x.kind IN ('call','j','br') AND vb.source IS NULL", [], |row| row.get(0) )?; Ok((sql_only as u64, rust_only as u64)) } // ── Trace layer (written by exec when flags enabled) ───────────────────── /// Create the opt-in trace tables. No-op if all flags are false. pub fn prepare_trace_tables( &mut self, trace_instructions: bool, trace_imports: bool, trace_branches: bool, ) -> anyhow::Result<()> { self.trace_instructions = trace_instructions; self.trace_imports = trace_imports; self.trace_branches = trace_branches; if trace_instructions { self.conn.execute_batch(" CREATE TABLE exec_trace ( address BIGINT NOT NULL, -- absolute VA of the instruction cycle BIGINT NOT NULL, -- monotonic instruction counter (execution order) r3 BIGINT NOT NULL, -- r3 at time of execution r4 BIGINT NOT NULL, -- r4 at time of execution lr BIGINT NOT NULL, -- link register sp BIGINT NOT NULL -- stack pointer ); ")?; } if trace_imports { self.conn.execute_batch(" CREATE TABLE import_calls ( address BIGINT NOT NULL, -- VA of the import thunk cycle BIGINT NOT NULL, -- instruction counter at interception module VARCHAR NOT NULL, -- library name (e.g. xboxkrnl.exe) ordinal BIGINT NOT NULL, -- ordinal within the module name VARCHAR NOT NULL, -- resolved symbol name arg_r3 BIGINT NOT NULL, -- first argument (r3) arg_r4 BIGINT NOT NULL, -- second argument (r4) arg_r5 BIGINT NOT NULL, -- third argument (r5) arg_r6 BIGINT NOT NULL, -- fourth argument (r6) return_value BIGINT NOT NULL -- r3 after the call returns ); ")?; } if trace_branches { self.conn.execute_batch(" CREATE TABLE branch_trace ( cycle BIGINT NOT NULL, -- instruction counter source BIGINT NOT NULL, -- VA of the branch instruction target BIGINT NOT NULL, -- VA of the branch destination kind VARCHAR NOT NULL, -- call | return | jump | branch lr BIGINT NOT NULL -- link register at time of branch ); ")?; } Ok(()) } pub fn log_instruction(&mut self, entry: ExecTraceEntry) { if !self.trace_instructions { return; } self.exec_buffer.push(entry); if self.exec_buffer.len() as u64 >= batch_size() { self.flush_exec(); } } pub fn log_import_call(&mut self, entry: ImportCallEntry) { if !self.trace_imports { return; } self.import_buffer.push(entry); if self.import_buffer.len() >= 1000 { self.flush_imports(); } } pub fn log_branch(&mut self, entry: BranchTraceEntry) { if !self.trace_branches { return; } self.branch_buffer.push(entry); if self.branch_buffer.len() as u64 >= batch_size() { self.flush_branches(); } } fn flush_exec(&mut self) { if self.exec_buffer.is_empty() { return; } let mut appender = self.conn.appender("exec_trace").unwrap(); for e in &self.exec_buffer { appender.append_row(params![ e.address as i64, e.cycle as i64, e.r3 as i64, e.r4 as i64, e.lr as i64, e.sp as i64, ]).ok(); } appender.flush().ok(); self.exec_count += self.exec_buffer.len() as u64; self.exec_buffer.clear(); } fn flush_imports(&mut self) { if self.import_buffer.is_empty() { return; } let mut appender = self.conn.appender("import_calls").unwrap(); for e in &self.import_buffer { appender.append_row(params![ e.address as i64, e.cycle as i64, e.module.as_str(), e.ordinal as i64, e.name.as_str(), e.arg_r3 as i64, e.arg_r4 as i64, e.arg_r5 as i64, e.arg_r6 as i64, e.return_value as i64, ]).ok(); } appender.flush().ok(); self.import_count += self.import_buffer.len() as u64; self.import_buffer.clear(); } fn flush_branches(&mut self) { if self.branch_buffer.is_empty() { return; } let mut appender = self.conn.appender("branch_trace").unwrap(); for e in &self.branch_buffer { appender.append_row(params![ e.cycle as i64, e.source as i64, e.target as i64, e.kind, e.lr as i64, ]).ok(); } appender.flush().ok(); self.branch_count += self.branch_buffer.len() as u64; self.branch_buffer.clear(); } /// Flush remaining trace buffers and create their indices. #[tracing::instrument(skip_all, name = "db.finalize_traces")] pub fn finalize_traces(&mut self) -> anyhow::Result<()> { self.flush_exec(); self.flush_imports(); self.flush_branches(); if self.trace_instructions { tracing::debug!("creating idx_exec_trace_address"); self.conn.execute_batch("CREATE INDEX idx_exec_trace_address ON exec_trace(address);")?; tracing::debug!("creating idx_exec_trace_cycle"); self.conn.execute_batch("CREATE INDEX idx_exec_trace_cycle ON exec_trace(cycle);")?; } if self.trace_imports { tracing::debug!("creating idx_import_calls_name"); self.conn.execute_batch("CREATE INDEX idx_import_calls_name ON import_calls(name);")?; tracing::debug!("creating idx_import_calls_cycle"); self.conn.execute_batch("CREATE INDEX idx_import_calls_cycle ON import_calls(cycle);")?; } if self.trace_branches { tracing::debug!("creating idx_branch_trace_source"); self.conn.execute_batch("CREATE INDEX idx_branch_trace_source ON branch_trace(source);")?; tracing::debug!("creating idx_branch_trace_target"); self.conn.execute_batch("CREATE INDEX idx_branch_trace_target ON branch_trace(target);")?; tracing::debug!("creating idx_branch_trace_kind"); self.conn.execute_batch("CREATE INDEX idx_branch_trace_kind ON branch_trace(kind);")?; tracing::debug!("creating idx_branch_trace_cycle"); self.conn.execute_batch("CREATE INDEX idx_branch_trace_cycle ON branch_trace(cycle);")?; } metrics::counter!("db.rows", "table" => "exec_trace").increment(self.exec_count); metrics::counter!("db.rows", "table" => "import_calls").increment(self.import_count); metrics::counter!("db.rows", "table" => "branch_trace").increment(self.branch_count); tracing::info!( instructions = self.exec_count, imports = self.import_count, branches = self.branch_count, "trace totals" ); Ok(()) } } /// Backwards-compatible wrapper that writes the full base + disasm layers. pub fn write_db( path: &Path, pe: &[u8], info: &DisasmInfo, func_analysis: &FuncAnalysis, labels: &HashMap, _import_map: &HashMap, xrefs: &XrefMap, ) -> anyhow::Result<()> { let mut w = DbWriter::open_fresh(path)?; w.write_base(info)?; w.write_disasm(pe, info, func_analysis, labels, xrefs)?; Ok(()) } // ── Helpers ──────────────────────────────────────────────────────────────── fn insert_metadata(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> { let mut stmt = conn.prepare("INSERT INTO metadata (key, value) VALUES (?, ?)")?; stmt.execute(params!["image_base", format!("0x{:08X}", info.image_base)])?; stmt.execute(params!["entry_point", format!("0x{:08X}", info.entry_point)])?; if let Some(name) = info.original_pe_name { stmt.execute(params!["original_pe_name", name])?; } if let Some(title_id) = info.title_id { stmt.execute(params!["title_id", format!("0x{:08X}", title_id)])?; } if let Some(media_id) = info.media_id { stmt.execute(params!["media_id", format!("0x{:08X}", media_id)])?; } Ok(()) } fn insert_sections(conn: &Connection, sections: &[xenia_xex::pe::PeSection]) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO sections (name, virtual_address, virtual_size, raw_offset, raw_size, flags, is_code) VALUES (?, ?, ?, ?, ?, ?, ?)" )?; for s in sections { stmt.execute(params![ s.name, s.virtual_address as i64, s.virtual_size as i64, s.raw_offset as i64, s.raw_size as i64, s.flags as i64, s.is_code(), ])?; } Ok(()) } fn insert_imports(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO imports (library, ordinal, name, record_type, address) VALUES (?, ?, ?, ?, ?)" )?; for lib in info.import_libraries { for imp in &lib.imports { let resolved = crate::resolve_ordinal(&lib.name, imp.ordinal); stmt.execute(params![ lib.name, imp.ordinal as i64, resolved, imp.record_type as i64, imp.address as i64, ])?; } } Ok(()) } fn insert_functions( conn: &Connection, func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO functions (address, name, end_address, frame_size, saved_gprs, is_leaf, is_saverestore) VALUES (?, ?, ?, ?, ?, ?, ?)" )?; for (&addr, fi) in &func_analysis.functions { let name = labels.get(&addr) .cloned() .unwrap_or_else(|| format!("sub_{addr:08X}")); stmt.execute(params![ addr as i64, name, fi.end as i64, fi.frame_size as i64, fi.saved_gprs as i64, fi.is_leaf, fi.is_saverestore, ])?; } Ok(()) } fn insert_labels( conn: &Connection, labels: &HashMap, ) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO labels (address, name, kind) VALUES (?, ?, ?) ON CONFLICT DO NOTHING" )?; for (&addr, name) in labels { let kind = if name.starts_with("sub_") || name == "entry_point" { "function" } else if name.starts_with("__imp_") { "import" } else if name.starts_with("__savegprlr_") || name.starts_with("__restgprlr_") { "saverestore" } else if name.starts_with("loc_") { "local" } else if name.starts_with("dat_") { "data" } else { "other" }; stmt.execute(params![addr as i64, name, kind])?; } Ok(()) } fn insert_instructions_streaming( conn: &Connection, pe: &[u8], info: &DisasmInfo, func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { let mut appender = conn.appender("instructions")?; let mut total: u64 = 0; for section in info.sections { if !section.is_code() { continue; } let va_start = info.image_base + section.virtual_address; let va_end = info.image_base + section.virtual_address + section.virtual_size; let items = crate::disasm::enrich_section( pe, info.image_base, §ion.name, va_start, va_end, func_analysis, labels, ); total += crate::sinks::duckdb::append_instructions(&mut appender, items)?; } appender.flush()?; metrics::counter!("db.rows", "table" => "instructions").increment(total); tracing::info!(rows = total, table = "instructions", "bulk insert complete"); Ok(()) } fn insert_xrefs_streaming( conn: &Connection, xrefs: &XrefMap, pe: &[u8], image_base: u32, func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { let mut appender = conn.appender("xrefs")?; let mut count: u64 = 0; for (&target, refs) in xrefs { let target_label = labels.get(&target).map(|s| s.as_str()); for xref in refs { let kind = xref.kind.db_tag(); let instruction: Option = { let off = xref.source.wrapping_sub(image_base) as usize; if off + 4 <= pe.len() { let raw = u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]]); let d = xenia_cpu::decode(raw, xref.source); let t = xenia_cpu::disasm::format(&d); // Prefer the simplified mnemonic when present (matches what // a human reading the .asm file sees for that line). Some(t.ext_mnemonic.unwrap_or(t.mnemonic)) } else { None } }; let source_func = func_analysis.functions .range(..=xref.source) .next_back() .map(|(&a, _)| a as i64); let source_label = resolve_source_label( xref.source, func_analysis, labels, ); appender.append_row(params![ xref.source as i64, target as i64, kind, instruction.as_deref(), source_func, source_label.as_str(), target_label, ])?; count += 1; } } appender.flush()?; metrics::counter!("db.rows", "table" => "xrefs").increment(count); tracing::info!(rows = count, table = "xrefs", "bulk insert complete"); Ok(()) }