diff --git a/crates/xenia-analysis/Cargo.toml b/crates/xenia-analysis/Cargo.toml index 942542a..d8c9fc2 100644 --- a/crates/xenia-analysis/Cargo.toml +++ b/crates/xenia-analysis/Cargo.toml @@ -7,7 +7,10 @@ build = "build.rs" [dependencies] xenia-xex = { workspace = true } +xenia-cpu = { workspace = true } serde = { workspace = true } +serde_json = { workspace = true } anyhow = { workspace = true } tracing = { workspace = true } -rusqlite = { workspace = true } +metrics = { workspace = true } +duckdb = { workspace = true } diff --git a/crates/xenia-analysis/src/db.rs b/crates/xenia-analysis/src/db.rs index 5a6c464..4c1c6df 100644 --- a/crates/xenia-analysis/src/db.rs +++ b/crates/xenia-analysis/src/db.rs @@ -1,4 +1,4 @@ -//! SQLite database writer for xenia-rs. +//! DuckDB writer for xenia-rs. //! //! Layered, streaming writes shared by `extract`, `dis`, and `exec`. //! Each command's output is a superset of the previous: @@ -6,19 +6,119 @@ //! - `dis --db` -> base + disasm tables (functions, labels, instructions, xrefs) //! - `exec --db` -> base + disasm + opt-in trace tables (exec_trace, import_calls, branch_trace) //! -//! Performance: streaming commits every 100k rows, no end-of-run ANALYZE, -//! progress messages before each index build. +//! Bulk inserts use the DuckDB Appender API, which bypasses the SQL layer and +//! writes directly to columnar storage — no transaction batching required. //! //! Trace kind values for `branch_trace.kind`: -//! - "call" : any branch with LK set (raw & 1 == 1) -//! - "return" : bclrx without LK -//! - "jump" : bcctrx without LK -//! - "branch" : bx/bcx without LK +//! - `"call"` : any branch with LK set (raw & 1 == 1) +//! - `"return"` : bclrx without LK +//! - `"jump"` : bcctrx without LK +//! - `"branch"` : bx/bcx without LK +//! +//! # Schema +//! +//! ## `metadata` +//! Key-value table. One row per XEX header field. Values are strings. +//! +//! | key | value format | meaning | +//! |--------------------|------------------|----------------------------------------------------| +//! | `image_base` | `"0xXXXXXXXX"` | Virtual address where the PE image is mapped | +//! | `entry_point` | `"0xXXXXXXXX"` | Absolute VA of the XEX entry point | +//! | `original_pe_name` | string | Original PE filename from XEX optional headers | +//! | `title_id` | `"0xXXXXXXXX"` | Xbox 360 Title ID (identifies the game) | +//! | `media_id` | `"0xXXXXXXXX"` | Disc/media ID (identifies the specific disc build) | +//! +//! ## `sections` +//! One row per PE section (`.text`, `.data`, etc.). +//! - `name` — PE section name +//! - `virtual_address` — RVA relative to `image_base` where the section is mapped in memory +//! - `virtual_size` — Size in memory; may exceed `raw_size` due to BSS zero-fill +//! - `raw_offset` — Byte offset of section data within the XEX/PE file +//! - `raw_size` — Size of section data on disk +//! - `flags` — `IMAGE_SCN_*` characteristics bit field +//! - `is_code` — `true` if `IMAGE_SCN_CNT_CODE` is set +//! +//! ## `imports` +//! One row per import record from the XEX import descriptor table. +//! - `library` — Module name (e.g. `xboxkrnl.exe`, `xam.xex`) +//! - `ordinal` — Numeric ordinal identifying the export within the library +//! - `name` — Resolved human-readable symbol name; `NULL` if not in symbol table +//! - `record_type` — XEX import record type: `0` = function thunk, `1` = variable +//! - `address` — Absolute VA of the import thunk or variable in the binary +//! +//! ## `functions` +//! One row per detected function (from prologue analysis). +//! - `address` — Absolute VA of the function entry point (PK) +//! - `name` — Symbol name, or `sub_XXXXXXXX` if unresolved +//! - `end_address` — Absolute VA of last instruction + 4 (exclusive end) +//! - `frame_size` — Stack frame size in bytes (from prologue) +//! - `saved_gprs` — Bitmask of GPRs saved in prologue (bit N set ⇒ rN is saved) +//! - `is_leaf` — `true` if the function has no outgoing calls (no `bl`/`blr`) +//! - `is_saverestore` — `true` if this is a `__savegprlr_*`/`__restgprlr_*` compiler stub +//! +//! ## `labels` +//! One row per named address; superset of functions. +//! - `address` — Absolute VA (PK) +//! - `name` — Symbol name +//! - `kind` — One of: `function`, `import`, `saverestore`, `local`, `data`, `other` +//! +//! ## `instructions` +//! One row per disassembled instruction. +//! - `address` — Absolute VA (PK) +//! - `raw` — 4-byte big-endian instruction word as integer +//! - `mnemonic` — Base mnemonic (e.g. `stw`, `bl`, `cmpwi`) +//! - `operands` — Operand string from base disassembly +//! - `disasm` — Full base disassembly string (`mnemonic + " " + operands`) +//! - `ext_mnemonic` — Simplified mnemonic (e.g. `mr` for `or rX,rY,rY`); `NULL` if none +//! - `ext_operands` — Operands for the extended form; `NULL` if none +//! - `ext_disasm` — Full extended disassembly string; `NULL` if none +//! - `target_hex` — Resolved absolute branch target for `b`/`bc` (and link/AA variants); `NULL` for indirect or non-branch instructions. SQL views (`v_branch_xrefs`) self-join on this column. +//! - `section` — Name of the PE section containing this instruction +//! - `function` — VA of the enclosing function; `NULL` if not inside a detected function +//! - `label` — Label name at this address; `NULL` if none +//! +//! ## `xrefs` +//! One row per cross-reference edge (call, jump, data access). +//! - `source` — Absolute VA of the instruction making the reference +//! - `target` — Absolute VA of the referenced destination +//! - `kind` — Reference type as the short tag from [`crate::xref::XrefKind::tag`]: +//! `call`, `j` (jump), `br` (branch), `read` (data_read), +//! `write` (data_write), `ref` (data_ref). +//! Note: this is a different convention from `branch_trace.kind`, +//! which uses the long names (`call` / `return` / `jump` / `branch`). +//! - `instruction` — Mnemonic of the source instruction; `NULL` if address is not in binary +//! - `source_func` — VA of the function containing `source`; `NULL` if unknown +//! - `source_label` — Label at `source`; `NULL` if none +//! - `target_label` — Label at `target`; `NULL` if none +//! +//! ## `exec_trace` *(opt-in: `--trace-instructions`)* +//! One row per executed instruction. +//! - `address` — Absolute VA of the instruction +//! - `cycle` — Monotonic instruction counter (execution order) +//! - `r3`, `r4`, `lr`, `sp` — Snapshot of key GPRs at time of execution +//! +//! ## `import_calls` *(opt-in: `--trace-imports`)* +//! One row per intercepted kernel/import call. +//! - `address` — VA of the import thunk +//! - `cycle` — Instruction counter at point of interception +//! - `module` — Library name (e.g. `xboxkrnl.exe`) +//! - `ordinal` — Numeric ordinal within the module +//! - `name` — Resolved symbol name +//! - `arg_r3`–`arg_r6` — First four call arguments (PowerPC ABI: r3–r6) +//! - `return_value` — Value in r3 after the call returns +//! +//! ## `branch_trace` *(opt-in: `--trace-branches`)* +//! One row per taken branch. +//! - `cycle` — Instruction counter +//! - `source` — VA of the branch instruction +//! - `target` — VA of the branch destination +//! - `kind` — `call`, `return`, `jump`, or `branch` (see top-level doc) +//! - `lr` — Link register value at time of branch use std::collections::HashMap; use std::path::Path; -use rusqlite::{Connection, params}; +use duckdb::{Connection, params}; use crate::func::FuncAnalysis; use crate::xref::{XrefMap, resolve_source_label}; @@ -26,12 +126,9 @@ use crate::formatter::DisasmInfo; const DEFAULT_BATCH_SIZE: u64 = 100_000; -/// Number of rows per DB commit / trace buffer flush. -/// Configurable via the `XENIA_DB_BATCH_SIZE` env var (default 100_000). -/// Used for: -/// - `instructions` and `xrefs` streaming commits in `write_disasm` -/// - `exec_trace` and `branch_trace` buffer thresholds during exec -/// (`import_calls` always flushes at 1000 — low volume, not worth scaling.) +/// Rows per trace buffer flush. Configurable via `XENIA_DB_BATCH_SIZE` env var (default 100_000). +/// Applies to `exec_trace` and `branch_trace` buffer thresholds. +/// `import_calls` always flushes at 1000 — low volume, not worth scaling. fn batch_size() -> u64 { use std::sync::OnceLock; static CACHED: OnceLock = OnceLock::new(); @@ -94,12 +191,6 @@ impl DbWriter { std::fs::remove_file(path)?; } let conn = Connection::open(path)?; - conn.execute_batch(" - PRAGMA journal_mode = OFF; - PRAGMA synchronous = OFF; - PRAGMA locking_mode = EXCLUSIVE; - PRAGMA temp_store = MEMORY; - ")?; let cap = batch_size() as usize; Ok(Self { conn, @@ -118,29 +209,30 @@ impl DbWriter { // ── Base layer (written by extract/dis/exec) ───────────────────────────── /// Write metadata, sections, imports tables and their indices. + #[tracing::instrument(skip_all, name = "db.write_base")] pub fn write_base(&mut self, info: &DisasmInfo) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE metadata ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL + key VARCHAR PRIMARY KEY, -- header field name + value VARCHAR NOT NULL -- hex-formatted or plain string value ); CREATE TABLE sections ( - name TEXT NOT NULL, - virtual_address INTEGER NOT NULL, - virtual_size INTEGER NOT NULL, - raw_offset INTEGER NOT NULL, - raw_size INTEGER NOT NULL, - flags INTEGER NOT NULL, - is_code BOOLEAN NOT NULL + name VARCHAR NOT NULL, -- PE section name (e.g. .text, .rdata) + virtual_address BIGINT NOT NULL, -- RVA relative to image_base + virtual_size BIGINT NOT NULL, -- size in memory; may exceed raw_size (BSS) + raw_offset BIGINT NOT NULL, -- byte offset of section data in the file + raw_size BIGINT NOT NULL, -- size of section data on disk + flags BIGINT NOT NULL, -- IMAGE_SCN_* characteristics bit field + is_code BOOLEAN NOT NULL -- true if IMAGE_SCN_CNT_CODE is set ); CREATE TABLE imports ( - library TEXT NOT NULL, - ordinal INTEGER NOT NULL, - name TEXT, - record_type INTEGER NOT NULL, - address INTEGER NOT NULL + library VARCHAR NOT NULL, -- module name (e.g. xboxkrnl.exe, xam.xex) + ordinal BIGINT NOT NULL, -- ordinal identifying the export within the library + name VARCHAR, -- resolved symbol name; NULL if not in symbol table + record_type BIGINT NOT NULL, -- 0 = function thunk, 1 = variable + address BIGINT NOT NULL -- absolute VA of the thunk or variable ); ")?; @@ -150,15 +242,69 @@ impl DbWriter { self.conn.execute_batch(" CREATE INDEX idx_imports_library ON imports(library); - CREATE INDEX idx_imports_name ON imports(name); + CREATE INDEX idx_imports_name ON imports(name); ")?; Ok(()) } // ── Disasm layer (written by dis/exec) ─────────────────────────────────── - /// Write functions, labels, instructions, xrefs tables and indices. - pub fn write_disasm( + /// Phase-3 ingest pass — purely mechanical disasm rows. Creates the + /// `instructions` table (and its indices) and streams every code-section + /// instruction through the iterator + DuckDB sink. Does NOT touch + /// `functions` / `labels` / `xrefs` — that's [`Self::write_analysis_results`]. + /// + /// `func_analysis` and `labels` are still required at this layer because + /// each row carries the rolling-window `function` and `label` columns for + /// downstream queries. + #[tracing::instrument(skip_all, name = "db.ingest_instructions")] + pub fn ingest_instructions( + &mut self, + pe: &[u8], + info: &DisasmInfo, + func_analysis: &FuncAnalysis, + labels: &HashMap, + ) -> anyhow::Result<()> { + self.conn.execute_batch(" + CREATE TABLE instructions ( + address BIGINT PRIMARY KEY, -- absolute VA + raw BIGINT NOT NULL, -- 4-byte big-endian instruction word as integer + mnemonic VARCHAR NOT NULL, -- base mnemonic (e.g. stw, bl, cmpwi) + operands VARCHAR NOT NULL, -- operand string from base disassembly + disasm VARCHAR NOT NULL, -- full base disassembly (mnemonic + operands) + ext_mnemonic VARCHAR, -- simplified mnemonic (e.g. mr); NULL if none + ext_operands VARCHAR, -- operands for the extended form; NULL if none + ext_disasm VARCHAR, -- full extended disassembly string; NULL if none + target_hex BIGINT, -- resolved absolute target for direct branches; NULL for indirect/non-branch + section VARCHAR NOT NULL, -- PE section name containing this instruction + function BIGINT, -- VA of the enclosing function; NULL if unknown + label VARCHAR -- label at this address; NULL if none + ); + ")?; + + insert_instructions_streaming(&self.conn, pe, info, func_analysis, labels)?; + + let indices = [ + ("idx_instructions_function", "CREATE INDEX idx_instructions_function ON instructions(function)"), + ("idx_instructions_mnemonic", "CREATE INDEX idx_instructions_mnemonic ON instructions(mnemonic)"), + ("idx_instructions_ext_mnemonic", "CREATE INDEX idx_instructions_ext_mnemonic ON instructions(ext_mnemonic)"), + ("idx_instructions_section", "CREATE INDEX idx_instructions_section ON instructions(section)"), + ("idx_instructions_label", "CREATE INDEX idx_instructions_label ON instructions(label)"), + ("idx_instructions_target_hex", "CREATE INDEX idx_instructions_target_hex ON instructions(target_hex)"), + ]; + for (name, sql) in indices { + tracing::debug!(index = name, "creating instructions index"); + self.conn.execute_batch(sql)?; + } + Ok(()) + } + + /// Phase-3 analyze pass — writes the Rust-pass-derived tables + /// (`functions`, `labels`, `xrefs`) and their indices. Always executes + /// in `--analyze=rust` and `--analyze=both` modes; skipped only when + /// the caller deliberately chooses a Rust-free DB layout. + #[tracing::instrument(skip_all, name = "db.write_analysis_results")] + pub fn write_analysis_results( &mut self, pe: &[u8], info: &DisasmInfo, @@ -168,74 +314,111 @@ impl DbWriter { ) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE functions ( - address INTEGER PRIMARY KEY, - name TEXT NOT NULL, - end_address INTEGER NOT NULL, - frame_size INTEGER NOT NULL, - saved_gprs INTEGER NOT NULL, - is_leaf BOOLEAN NOT NULL, - is_saverestore BOOLEAN NOT NULL + address BIGINT PRIMARY KEY, -- absolute VA of entry point + name VARCHAR NOT NULL, -- symbol name, or sub_XXXXXXXX if unresolved + end_address BIGINT NOT NULL, -- VA of last instruction + 4 (exclusive end) + frame_size BIGINT NOT NULL, -- stack frame size in bytes (from prologue) + saved_gprs BIGINT NOT NULL, -- bitmask of GPRs saved in prologue (bit N = rN) + is_leaf BOOLEAN NOT NULL, -- true if the function has no outgoing calls + is_saverestore BOOLEAN NOT NULL -- true if __savegprlr_* / __restgprlr_* stub ); CREATE TABLE labels ( - address INTEGER PRIMARY KEY, - name TEXT NOT NULL, - kind TEXT NOT NULL - ); - - CREATE TABLE instructions ( - address INTEGER PRIMARY KEY, - raw INTEGER NOT NULL, - mnemonic TEXT NOT NULL, - operands TEXT NOT NULL, - disasm TEXT NOT NULL, - ext_mnemonic TEXT, - ext_operands TEXT, - ext_disasm TEXT, - section TEXT NOT NULL, - function INTEGER, - label TEXT + address BIGINT PRIMARY KEY, -- absolute VA + name VARCHAR NOT NULL, -- symbol name + kind VARCHAR NOT NULL -- function | import | saverestore | local | data | other ); CREATE TABLE xrefs ( - source INTEGER NOT NULL, - target INTEGER NOT NULL, - kind TEXT NOT NULL, - instruction TEXT, - source_func INTEGER, - source_label TEXT, - target_label TEXT + source BIGINT NOT NULL, -- VA of the referencing instruction + target BIGINT NOT NULL, -- VA of the referenced destination + kind VARCHAR NOT NULL, -- call | jump | branch | data_read | data_write | data_ref + instruction VARCHAR, -- mnemonic of source instruction; NULL if not in binary + source_func BIGINT, -- VA of the function containing source; NULL if unknown + source_label VARCHAR, -- label at source; NULL if none + target_label VARCHAR -- label at target; NULL if none ); ")?; insert_functions(&self.conn, func_analysis, labels)?; insert_labels(&self.conn, labels)?; - insert_instructions_streaming(&self.conn, pe, info, func_analysis, labels)?; insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?; let indices = [ - ("idx_functions_name", "CREATE INDEX idx_functions_name ON functions(name)"), - ("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"), - ("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"), - ("idx_instructions_function", "CREATE INDEX idx_instructions_function ON instructions(function)"), - ("idx_instructions_mnemonic", "CREATE INDEX idx_instructions_mnemonic ON instructions(mnemonic)"), - ("idx_instructions_ext_mnemonic","CREATE INDEX idx_instructions_ext_mnemonic ON instructions(ext_mnemonic)"), - ("idx_instructions_section", "CREATE INDEX idx_instructions_section ON instructions(section)"), - ("idx_instructions_label", "CREATE INDEX idx_instructions_label ON instructions(label)"), - ("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"), - ("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"), - ("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"), - ("idx_xrefs_kind", "CREATE INDEX idx_xrefs_kind ON xrefs(kind)"), - ("idx_xrefs_instruction", "CREATE INDEX idx_xrefs_instruction ON xrefs(instruction)"), - ("idx_xrefs_target_label", "CREATE INDEX idx_xrefs_target_label ON xrefs(target_label)"), + ("idx_functions_name", "CREATE INDEX idx_functions_name ON functions(name)"), + ("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"), + ("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"), + ("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"), + ("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"), + ("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"), + ("idx_xrefs_kind", "CREATE INDEX idx_xrefs_kind ON xrefs(kind)"), + ("idx_xrefs_instruction", "CREATE INDEX idx_xrefs_instruction ON xrefs(instruction)"), + ("idx_xrefs_target_label", "CREATE INDEX idx_xrefs_target_label ON xrefs(target_label)"), ]; for (name, sql) in indices { - eprintln!("[db] creating {name}..."); + tracing::debug!(index = name, "creating analysis index"); self.conn.execute_batch(sql)?; } Ok(()) } + /// Back-compat wrapper for callers that want the full pre-Phase-3 + /// "everything in one shot" behaviour. Equivalent to + /// `ingest_instructions` + `write_analysis_results`. + #[tracing::instrument(skip_all, name = "db.write_disasm")] + pub fn write_disasm( + &mut self, + pe: &[u8], + info: &DisasmInfo, + func_analysis: &FuncAnalysis, + labels: &HashMap, + xrefs: &XrefMap, + ) -> anyhow::Result<()> { + self.ingest_instructions(pe, info, func_analysis, labels)?; + self.write_analysis_results(pe, info, func_analysis, labels, xrefs)?; + Ok(()) + } + + /// Phase-3 SQL-views layer — defines additive read-only views over + /// `instructions` (and optionally `xrefs`/`functions`/`labels`). + /// See [`crate::sql_views`] for the SQL definitions. + /// + /// Called when `--analyze=sql` or `--analyze=both` is in effect. + #[tracing::instrument(skip_all, name = "db.create_sql_views")] + pub fn create_sql_views(&mut self) -> anyhow::Result<()> { + for (name, sql) in crate::sql_views::ALL_VIEWS { + tracing::debug!(view = name, "creating SQL view"); + self.conn.execute_batch(sql)?; + } + Ok(()) + } + + /// Cross-check: count branch xrefs found by the SQL view that are absent + /// from the Rust-pass `xrefs` table (and vice versa). Returns + /// `(sql_only, rust_only)` row counts. Both should be zero — the two + /// surfaces produce identical edges by construction. A non-zero count + /// signals drift between the formatter's `mnemonic` column and + /// `xref.rs`'s opcode classification, and is logged as a warning by the + /// caller. + #[tracing::instrument(skip_all, name = "db.cross_check_branch_xrefs")] + pub fn cross_check_branch_xrefs(&self) -> anyhow::Result<(u64, u64)> { + let sql_only: i64 = self.conn.query_row( + "SELECT COUNT(*) FROM v_branch_xrefs vb \ + LEFT JOIN xrefs x \ + ON x.source = vb.source AND x.target = vb.target AND x.kind = vb.kind \ + WHERE x.source IS NULL", + [], |row| row.get(0) + )?; + let rust_only: i64 = self.conn.query_row( + "SELECT COUNT(*) FROM xrefs x \ + LEFT JOIN v_branch_xrefs vb \ + ON vb.source = x.source AND vb.target = x.target AND vb.kind = x.kind \ + WHERE x.kind IN ('call','j','br') AND vb.source IS NULL", + [], |row| row.get(0) + )?; + Ok((sql_only as u64, rust_only as u64)) + } + // ── Trace layer (written by exec when flags enabled) ───────────────────── /// Create the opt-in trace tables. No-op if all flags are false. @@ -251,49 +434,43 @@ impl DbWriter { if trace_instructions { self.conn.execute_batch(" - CREATE TABLE IF NOT EXISTS exec_trace ( - id INTEGER PRIMARY KEY, - address INTEGER NOT NULL, - cycle INTEGER NOT NULL, - r3 INTEGER NOT NULL, - r4 INTEGER NOT NULL, - lr INTEGER NOT NULL, - sp INTEGER NOT NULL + CREATE TABLE exec_trace ( + address BIGINT NOT NULL, -- absolute VA of the instruction + cycle BIGINT NOT NULL, -- monotonic instruction counter (execution order) + r3 BIGINT NOT NULL, -- r3 at time of execution + r4 BIGINT NOT NULL, -- r4 at time of execution + lr BIGINT NOT NULL, -- link register + sp BIGINT NOT NULL -- stack pointer ); - DELETE FROM exec_trace; ")?; } if trace_imports { self.conn.execute_batch(" - CREATE TABLE IF NOT EXISTS import_calls ( - id INTEGER PRIMARY KEY, - address INTEGER NOT NULL, - cycle INTEGER NOT NULL, - module TEXT NOT NULL, - ordinal INTEGER NOT NULL, - name TEXT NOT NULL, - arg_r3 INTEGER NOT NULL, - arg_r4 INTEGER NOT NULL, - arg_r5 INTEGER NOT NULL, - arg_r6 INTEGER NOT NULL, - return_value INTEGER NOT NULL + CREATE TABLE import_calls ( + address BIGINT NOT NULL, -- VA of the import thunk + cycle BIGINT NOT NULL, -- instruction counter at interception + module VARCHAR NOT NULL, -- library name (e.g. xboxkrnl.exe) + ordinal BIGINT NOT NULL, -- ordinal within the module + name VARCHAR NOT NULL, -- resolved symbol name + arg_r3 BIGINT NOT NULL, -- first argument (r3) + arg_r4 BIGINT NOT NULL, -- second argument (r4) + arg_r5 BIGINT NOT NULL, -- third argument (r5) + arg_r6 BIGINT NOT NULL, -- fourth argument (r6) + return_value BIGINT NOT NULL -- r3 after the call returns ); - DELETE FROM import_calls; ")?; } if trace_branches { self.conn.execute_batch(" - CREATE TABLE IF NOT EXISTS branch_trace ( - id INTEGER PRIMARY KEY, - cycle INTEGER NOT NULL, - source INTEGER NOT NULL, - target INTEGER NOT NULL, - kind TEXT NOT NULL, - lr INTEGER NOT NULL + CREATE TABLE branch_trace ( + cycle BIGINT NOT NULL, -- instruction counter + source BIGINT NOT NULL, -- VA of the branch instruction + target BIGINT NOT NULL, -- VA of the branch destination + kind VARCHAR NOT NULL, -- call | return | jump | branch + lr BIGINT NOT NULL -- link register at time of branch ); - DELETE FROM branch_trace; ")?; } @@ -326,109 +503,99 @@ impl DbWriter { fn flush_exec(&mut self) { if self.exec_buffer.is_empty() { return; } - let tx = self.conn.unchecked_transaction().unwrap(); - { - let mut stmt = tx.prepare_cached( - "INSERT INTO exec_trace (address, cycle, r3, r4, lr, sp) VALUES (?1, ?2, ?3, ?4, ?5, ?6)" - ).unwrap(); - for e in &self.exec_buffer { - stmt.execute(params![ - e.address as i64, - e.cycle as i64, - e.r3 as i64, - e.r4 as i64, - e.lr as i64, - e.sp as i64, - ]).ok(); - } + let mut appender = self.conn.appender("exec_trace").unwrap(); + for e in &self.exec_buffer { + appender.append_row(params![ + e.address as i64, + e.cycle as i64, + e.r3 as i64, + e.r4 as i64, + e.lr as i64, + e.sp as i64, + ]).ok(); } - tx.commit().ok(); + appender.flush().ok(); self.exec_count += self.exec_buffer.len() as u64; self.exec_buffer.clear(); } fn flush_imports(&mut self) { if self.import_buffer.is_empty() { return; } - let tx = self.conn.unchecked_transaction().unwrap(); - { - let mut stmt = tx.prepare_cached( - "INSERT INTO import_calls (address, cycle, module, ordinal, name, arg_r3, arg_r4, arg_r5, arg_r6, return_value) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)" - ).unwrap(); - for e in &self.import_buffer { - stmt.execute(params![ - e.address as i64, - e.cycle as i64, - e.module, - e.ordinal as i64, - e.name, - e.arg_r3 as i64, - e.arg_r4 as i64, - e.arg_r5 as i64, - e.arg_r6 as i64, - e.return_value as i64, - ]).ok(); - } + let mut appender = self.conn.appender("import_calls").unwrap(); + for e in &self.import_buffer { + appender.append_row(params![ + e.address as i64, + e.cycle as i64, + e.module.as_str(), + e.ordinal as i64, + e.name.as_str(), + e.arg_r3 as i64, + e.arg_r4 as i64, + e.arg_r5 as i64, + e.arg_r6 as i64, + e.return_value as i64, + ]).ok(); } - tx.commit().ok(); + appender.flush().ok(); self.import_count += self.import_buffer.len() as u64; self.import_buffer.clear(); } fn flush_branches(&mut self) { if self.branch_buffer.is_empty() { return; } - let tx = self.conn.unchecked_transaction().unwrap(); - { - let mut stmt = tx.prepare_cached( - "INSERT INTO branch_trace (cycle, source, target, kind, lr) VALUES (?1, ?2, ?3, ?4, ?5)" - ).unwrap(); - for e in &self.branch_buffer { - stmt.execute(params![ - e.cycle as i64, - e.source as i64, - e.target as i64, - e.kind, - e.lr as i64, - ]).ok(); - } + let mut appender = self.conn.appender("branch_trace").unwrap(); + for e in &self.branch_buffer { + appender.append_row(params![ + e.cycle as i64, + e.source as i64, + e.target as i64, + e.kind, + e.lr as i64, + ]).ok(); } - tx.commit().ok(); + appender.flush().ok(); self.branch_count += self.branch_buffer.len() as u64; self.branch_buffer.clear(); } /// Flush remaining trace buffers and create their indices. + #[tracing::instrument(skip_all, name = "db.finalize_traces")] pub fn finalize_traces(&mut self) -> anyhow::Result<()> { self.flush_exec(); self.flush_imports(); self.flush_branches(); if self.trace_instructions { - eprintln!("[db] creating idx_exec_trace_address..."); - self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_exec_trace_address ON exec_trace(address);")?; - eprintln!("[db] creating idx_exec_trace_cycle..."); - self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_exec_trace_cycle ON exec_trace(cycle);")?; + tracing::debug!("creating idx_exec_trace_address"); + self.conn.execute_batch("CREATE INDEX idx_exec_trace_address ON exec_trace(address);")?; + tracing::debug!("creating idx_exec_trace_cycle"); + self.conn.execute_batch("CREATE INDEX idx_exec_trace_cycle ON exec_trace(cycle);")?; } if self.trace_imports { - eprintln!("[db] creating idx_import_calls_name..."); - self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_import_calls_name ON import_calls(name);")?; - eprintln!("[db] creating idx_import_calls_cycle..."); - self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_import_calls_cycle ON import_calls(cycle);")?; + tracing::debug!("creating idx_import_calls_name"); + self.conn.execute_batch("CREATE INDEX idx_import_calls_name ON import_calls(name);")?; + tracing::debug!("creating idx_import_calls_cycle"); + self.conn.execute_batch("CREATE INDEX idx_import_calls_cycle ON import_calls(cycle);")?; } if self.trace_branches { - eprintln!("[db] creating idx_branch_trace_source..."); - self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_source ON branch_trace(source);")?; - eprintln!("[db] creating idx_branch_trace_target..."); - self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_target ON branch_trace(target);")?; - eprintln!("[db] creating idx_branch_trace_kind..."); - self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_kind ON branch_trace(kind);")?; - eprintln!("[db] creating idx_branch_trace_cycle..."); - self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_cycle ON branch_trace(cycle);")?; + tracing::debug!("creating idx_branch_trace_source"); + self.conn.execute_batch("CREATE INDEX idx_branch_trace_source ON branch_trace(source);")?; + tracing::debug!("creating idx_branch_trace_target"); + self.conn.execute_batch("CREATE INDEX idx_branch_trace_target ON branch_trace(target);")?; + tracing::debug!("creating idx_branch_trace_kind"); + self.conn.execute_batch("CREATE INDEX idx_branch_trace_kind ON branch_trace(kind);")?; + tracing::debug!("creating idx_branch_trace_cycle"); + self.conn.execute_batch("CREATE INDEX idx_branch_trace_cycle ON branch_trace(cycle);")?; } - eprintln!( - "[db] trace totals: {} instructions, {} imports, {} branches", - self.exec_count, self.import_count, self.branch_count + metrics::counter!("db.rows", "table" => "exec_trace").increment(self.exec_count); + metrics::counter!("db.rows", "table" => "import_calls").increment(self.import_count); + metrics::counter!("db.rows", "table" => "branch_trace").increment(self.branch_count); + tracing::info!( + instructions = self.exec_count, + imports = self.import_count, + branches = self.branch_count, + "trace totals" ); Ok(()) } @@ -453,7 +620,7 @@ pub fn write_db( // ── Helpers ──────────────────────────────────────────────────────────────── fn insert_metadata(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> { - let mut stmt = conn.prepare("INSERT INTO metadata (key, value) VALUES (?1, ?2)")?; + let mut stmt = conn.prepare("INSERT INTO metadata (key, value) VALUES (?, ?)")?; stmt.execute(params!["image_base", format!("0x{:08X}", info.image_base)])?; stmt.execute(params!["entry_point", format!("0x{:08X}", info.entry_point)])?; if let Some(name) = info.original_pe_name { @@ -471,7 +638,7 @@ fn insert_metadata(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> { fn insert_sections(conn: &Connection, sections: &[xenia_xex::pe::PeSection]) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO sections (name, virtual_address, virtual_size, raw_offset, raw_size, flags, is_code) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)" + VALUES (?, ?, ?, ?, ?, ?, ?)" )?; for s in sections { stmt.execute(params![ @@ -481,7 +648,7 @@ fn insert_sections(conn: &Connection, sections: &[xenia_xex::pe::PeSection]) -> s.raw_offset as i64, s.raw_size as i64, s.flags as i64, - s.is_code() as i32, + s.is_code(), ])?; } Ok(()) @@ -490,7 +657,7 @@ fn insert_sections(conn: &Connection, sections: &[xenia_xex::pe::PeSection]) -> fn insert_imports(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO imports (library, ordinal, name, record_type, address) - VALUES (?1, ?2, ?3, ?4, ?5)" + VALUES (?, ?, ?, ?, ?)" )?; for lib in info.import_libraries { for imp in &lib.imports { @@ -514,7 +681,7 @@ fn insert_functions( ) -> anyhow::Result<()> { let mut stmt = conn.prepare( "INSERT INTO functions (address, name, end_address, frame_size, saved_gprs, is_leaf, is_saverestore) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)" + VALUES (?, ?, ?, ?, ?, ?, ?)" )?; for (&addr, fi) in &func_analysis.functions { let name = labels.get(&addr) @@ -526,8 +693,8 @@ fn insert_functions( fi.end as i64, fi.frame_size as i64, fi.saved_gprs as i64, - fi.is_leaf as i32, - fi.is_saverestore as i32, + fi.is_leaf, + fi.is_saverestore, ])?; } Ok(()) @@ -538,7 +705,7 @@ fn insert_labels( labels: &HashMap, ) -> anyhow::Result<()> { let mut stmt = conn.prepare( - "INSERT OR IGNORE INTO labels (address, name, kind) VALUES (?1, ?2, ?3)" + "INSERT INTO labels (address, name, kind) VALUES (?, ?, ?) ON CONFLICT DO NOTHING" )?; for (&addr, name) in labels { let kind = if name.starts_with("sub_") || name == "entry_point" { @@ -566,78 +733,22 @@ fn insert_instructions_streaming( func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { - let mut tx = conn.unchecked_transaction()?; - let mut count: u64 = 0; - let mut since_commit: u64 = 0; + let mut appender = conn.appender("instructions")?; + let mut total: u64 = 0; for section in info.sections { if !section.is_code() { continue; } - - let va_start = section.virtual_address; - let va_end = va_start + section.virtual_size; - let file_start = section.virtual_address as usize; - - let mut current_func: Option = None; - let mut addr = va_start; - - while addr < va_end { - let abs_addr = info.image_base + addr; - let off = (addr - va_start) as usize + file_start; - if off + 4 > pe.len() { break; } - - if func_analysis.is_function_start(abs_addr) { - current_func = Some(abs_addr); - } - - let instr = u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]]); - let decoded = crate::ppc::disasm(instr, abs_addr); - let (mnemonic, operands) = split_disasm(&decoded.base); - - let (ext_mnemonic, ext_operands, ext_disasm): (Option<&str>, Option<&str>, Option<&str>) = - match &decoded.ext { - Some(ext) => { - let (em, eo) = split_disasm(ext); - (Some(em), Some(eo), Some(ext.as_str())) - } - None => (None, None, None), - }; - let label = labels.get(&abs_addr).map(|s| s.as_str()); - - { - let mut stmt = tx.prepare_cached( - "INSERT INTO instructions (address, raw, mnemonic, operands, disasm, ext_mnemonic, ext_operands, ext_disasm, section, function, label) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)" - )?; - stmt.execute(params![ - abs_addr as i64, - instr as i64, - mnemonic, - operands, - decoded.base, - ext_mnemonic, - ext_operands, - ext_disasm, - section.name, - current_func.map(|a| a as i64), - label, - ])?; - } - - count += 1; - since_commit += 1; - addr += 4; - - if since_commit >= batch_size() { - tx.commit()?; - eprintln!("[db] instructions: {count} committed"); - tx = conn.unchecked_transaction()?; - since_commit = 0; - } - } + let va_start = info.image_base + section.virtual_address; + let va_end = info.image_base + section.virtual_address + section.virtual_size; + let items = crate::disasm::enrich_section( + pe, info.image_base, §ion.name, va_start, va_end, func_analysis, labels, + ); + total += crate::sinks::duckdb::append_instructions(&mut appender, items)?; } - tx.commit()?; - eprintln!("[db] inserted {count} instructions"); + appender.flush()?; + metrics::counter!("db.rows", "table" => "instructions").increment(total); + tracing::info!(rows = total, table = "instructions", "bulk insert complete"); Ok(()) } @@ -649,9 +760,8 @@ fn insert_xrefs_streaming( func_analysis: &FuncAnalysis, labels: &HashMap, ) -> anyhow::Result<()> { - let mut tx = conn.unchecked_transaction()?; + let mut appender = conn.appender("xrefs")?; let mut count: u64 = 0; - let mut since_commit: u64 = 0; for (&target, refs) in xrefs { let target_label = labels.get(&target).map(|s| s.as_str()); @@ -663,10 +773,11 @@ fn insert_xrefs_streaming( let off = xref.source.wrapping_sub(image_base) as usize; if off + 4 <= pe.len() { let raw = u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]]); - let decoded = crate::ppc::disasm(raw, xref.source); - let display = decoded.display().to_string(); - let (mnem, _) = split_disasm(&display); - Some(mnem.to_string()) + let d = xenia_cpu::decode(raw, xref.source); + let t = xenia_cpu::disasm::format(&d); + // Prefer the simplified mnemonic when present (matches what + // a human reading the .asm file sees for that line). + Some(t.ext_mnemonic.unwrap_or(t.mnemonic)) } else { None } @@ -681,47 +792,22 @@ fn insert_xrefs_streaming( xref.source, func_analysis, labels, ); - { - let mut stmt = tx.prepare_cached( - "INSERT INTO xrefs (source, target, kind, instruction, source_func, source_label, target_label) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)" - )?; - stmt.execute(params![ - xref.source as i64, - target as i64, - kind, - instruction, - source_func, - source_label, - target_label, - ])?; - } + appender.append_row(params![ + xref.source as i64, + target as i64, + kind, + instruction.as_deref(), + source_func, + source_label.as_str(), + target_label, + ])?; count += 1; - since_commit += 1; - - if since_commit >= batch_size() { - tx.commit()?; - eprintln!("[db] xrefs: {count} committed"); - tx = conn.unchecked_transaction()?; - since_commit = 0; - } } } - tx.commit()?; - eprintln!("[db] inserted {count} xrefs"); + appender.flush()?; + metrics::counter!("db.rows", "table" => "xrefs").increment(count); + tracing::info!(rows = count, table = "xrefs", "bulk insert complete"); Ok(()) } - -/// Split "mnemonic operands" into (mnemonic, operands). -fn split_disasm(disasm: &str) -> (&str, &str) { - let trimmed = disasm.trim(); - if let Some(pos) = trimmed.find(|c: char| c.is_whitespace()) { - let mnemonic = &trimmed[..pos]; - let operands = trimmed[pos..].trim_start(); - (mnemonic, operands) - } else { - (trimmed, "") - } -} diff --git a/crates/xenia-analysis/src/disasm.rs b/crates/xenia-analysis/src/disasm.rs new file mode 100644 index 0000000..d1a2761 --- /dev/null +++ b/crates/xenia-analysis/src/disasm.rs @@ -0,0 +1,51 @@ +//! Analysis-side enrichment over [`xenia_cpu::disasm::iter_disasm`]. +//! +//! Turns a stream of decoder-only [`xenia_cpu::disasm::DisasmItem`]s into a +//! stream of [`RichDisasmItem`]s carrying section name + enclosing function + +//! label name. The three sinks in [`crate::sinks`] (text, JSON, DuckDB) all +//! consume `RichDisasmItem`. + +use std::collections::HashMap; + +use xenia_cpu::disasm::DisasmItem; + +use crate::func::FuncAnalysis; + +/// `DisasmItem` plus the analysis context (section/function/label). +#[derive(Debug, Clone)] +pub struct RichDisasmItem<'a> { + pub item: DisasmItem, + pub section: &'a str, + pub function: Option, + pub label: Option<&'a str>, +} + +/// Walk one code section, yielding rich items annotated with section name, +/// rolling-window enclosing function, and label-at-address. +/// +/// The `function` field tracks the most recent function-start the iterator +/// has crossed — matching the legacy `current_func` behaviour in +/// `db.rs::insert_instructions_streaming`. +pub fn enrich_section<'a>( + image: &'a [u8], + image_base: u32, + section_name: &'a str, + va_start: u32, + va_end: u32, + func_analysis: &'a FuncAnalysis, + labels: &'a HashMap, +) -> impl Iterator> + 'a { + let mut current_func: Option = None; + xenia_cpu::disasm::iter_disasm(image, image_base, va_start, va_end).map(move |item| { + if func_analysis.is_function_start(item.addr) { + current_func = Some(item.addr); + } + let label = labels.get(&item.addr).map(|s| s.as_str()); + RichDisasmItem { + item, + section: section_name, + function: current_func, + label, + } + }) +} diff --git a/crates/xenia-analysis/src/formatter.rs b/crates/xenia-analysis/src/formatter.rs index fac4e07..f0746ec 100644 --- a/crates/xenia-analysis/src/formatter.rs +++ b/crates/xenia-analysis/src/formatter.rs @@ -6,8 +6,10 @@ use std::io::Write; use xenia_xex::header::ImportLibrary; use xenia_xex::pe::PeSection; +use crate::disasm::enrich_section; use crate::func::FuncAnalysis; -use crate::xref::{XrefKind, Xref, XrefMap, section_for_addr, resolve_source_label}; +use crate::sinks::text::write_instr_line; +use crate::xref::{XrefKind, Xref, XrefMap, resolve_source_label}; /// Metadata passed to the formatter (avoids exposing full Xex2Header internals). pub struct DisasmInfo<'a> { @@ -88,11 +90,14 @@ pub fn write_asm( writeln!(out)?; let mut in_function = false; - let mut addr = va_start; - while addr < va_end { - let abs_addr = info.image_base + addr; - let off = (addr - va_start) as usize + file_start; - if off + 4 > pe.len() { break; } + let abs_start = info.image_base + va_start; + let abs_end = info.image_base + va_end; + + let items = enrich_section( + pe, info.image_base, §ion.name, abs_start, abs_end, func_analysis, labels, + ); + for ri in items { + let abs_addr = ri.item.addr; // Function start? Emit separator + header if let Some(fi) = func_analysis.get(abs_addr) { @@ -126,7 +131,6 @@ pub fn write_asm( writeln!(out, "; FUNCTION: {lbl}{detail_str}")?; } - // Xrefs for function entry if let Some(xref_lines) = format_xrefs(abs_addr, xrefs, func_analysis, labels) { for line in &xref_lines { writeln!(out, "{line}")?; @@ -141,7 +145,6 @@ pub fn write_asm( if let Some(lbl) = labels.get(&abs_addr) { if !func_analysis.is_function_start(abs_addr) { writeln!(out)?; - // Xrefs for local labels if let Some(xref_lines) = format_xrefs(abs_addr, xrefs, func_analysis, labels) { for line in &xref_lines { writeln!(out, "{line}")?; @@ -159,37 +162,8 @@ pub fn write_asm( writeln!(out, " ; IMPORT: {imp_name}")?; } - let instr = u32::from_be_bytes([ - pe[off], pe[off+1], pe[off+2], pe[off+3] - ]); - - let decoded = crate::ppc::disasm(instr, abs_addr); - let disasm_text = decoded.display().to_string(); - - // Annotate branch targets with label names - let mut annotated = annotate_branch(&disasm_text, labels); - - // Annotate data references - if let Some(&(data_addr, kind)) = data_annotations.get(&abs_addr) { - let tag = match kind { - XrefKind::DataRead => "[R]", - XrefKind::DataWrite => "[W]", - _ => "[&]", - }; - let sec = section_for_addr(data_addr, info.sections, info.image_base) - .unwrap_or("?"); - let data_lbl = labels.get(&data_addr) - .map(|s| format!(" = {s}")) - .unwrap_or_default(); - if !annotated.contains("; ->") { - annotated = format!("{annotated:<40} ; {tag} 0x{data_addr:08X} ({sec}){data_lbl}"); - } else { - annotated = format!("{annotated} {tag} 0x{data_addr:08X} ({sec}){data_lbl}"); - } - } - - writeln!(out, " {:08X}: {:08X} {}", abs_addr, instr, annotated)?; - addr += 4; + let data_annot = data_annotations.get(&abs_addr).copied(); + write_instr_line(out, &ri, labels, info.sections, info.image_base, data_annot)?; } if in_function { writeln!(out, "; end function")?; @@ -298,21 +272,3 @@ fn format_xrefs( Some(lines) } - -fn annotate_branch(disasm: &str, labels: &HashMap) -> String { - if let Some(pos) = disasm.find("0x") { - let hex_start = pos + 2; - let hex_end = disasm[hex_start..].find(|c: char| !c.is_ascii_hexdigit()) - .map(|i| hex_start + i) - .unwrap_or(disasm.len()); - let hex_str = &disasm[hex_start..hex_end]; - if hex_str.len() == 8 { - if let Ok(addr) = u32::from_str_radix(hex_str, 16) { - if let Some(lbl) = labels.get(&addr) { - return format!("{disasm:<40} ; -> {lbl}"); - } - } - } - } - disasm.to_string() -} diff --git a/crates/xenia-analysis/src/func.rs b/crates/xenia-analysis/src/func.rs index b004d8e..16a07d0 100644 --- a/crates/xenia-analysis/src/func.rs +++ b/crates/xenia-analysis/src/func.rs @@ -184,12 +184,14 @@ fn find_saverestore_stubs( // ── Main analysis ────────────────────────────────────────────────────────── +#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point)))] pub fn analyze( pe: &[u8], image_base: u32, entry_point: u32, code_sections: &[(u32, u32, u32)], // (va_start, va_size, flags) ) -> FuncAnalysis { + let started = std::time::Instant::now(); let code_ranges: Vec<(u32, u32)> = code_sections.iter() .map(|(va, sz, _)| (image_base + va, image_base + va + sz)) .collect(); @@ -197,10 +199,10 @@ pub fn analyze( // 1. Find save/restore stubs let (save_base, restore_base) = find_saverestore_stubs(pe, image_base, &code_ranges); if let Some(sb) = save_base { - eprintln!("[func] __savegprlr stub at 0x{sb:08X}"); + tracing::debug!(addr = format_args!("{:#010x}", sb), "__savegprlr stub"); } if let Some(rb) = restore_base { - eprintln!("[func] __restgprlr stub at 0x{rb:08X}"); + tracing::debug!(addr = format_args!("{:#010x}", rb), "__restgprlr stub"); } // Set of addresses in the save/restore region (to exclude from function detection) @@ -221,18 +223,17 @@ pub fn analyze( for &(start, end) in &code_ranges { let mut addr = start; while addr < end { - if let Some(instr) = read_instr(pe, addr, image_base) { - if let Some(target) = bl_target(instr, addr) { + if let Some(instr) = read_instr(pe, addr, image_base) + && let Some(target) = bl_target(instr, addr) { // Don't count calls into save/restore stubs as function entries if !saverestore_addrs.contains(&target) { call_targets.insert(target); } } - } addr += 4; } } - eprintln!("[func] {} bl targets (candidate functions)", call_targets.len()); + tracing::debug!(candidates = call_targets.len(), "bl targets collected"); // 3. For each candidate, detect prologue and walk to epilogue let mut functions: BTreeMap = BTreeMap::new(); @@ -267,7 +268,13 @@ pub fn analyze( }); } - eprintln!("[func] {} functions detected", functions.len()); + let elapsed_ms = started.elapsed().as_millis() as f64; + metrics::histogram!("analysis.phase_ms", "phase" => "functions").record(elapsed_ms); + tracing::info!( + functions = functions.len(), + elapsed_ms, + "function detection complete" + ); FuncAnalysis { functions, @@ -302,15 +309,13 @@ fn analyze_function( let instr1 = read_instr(pe, func_addr + 4, image_base).unwrap_or(0); // Check if next is bl to save stub - if let Some(target) = bl_target(instr1, func_addr + 4) { - if let Some(sb) = save_base { - if target >= sb && target < sb + 18 * 4 { + if let Some(target) = bl_target(instr1, func_addr + 4) + && let Some(sb) = save_base + && target >= sb && target < sb + 18 * 4 { let idx = (target - sb) / 4; saved_gprs = 18 - idx; prologue_len = 8; } - } - } // Next should be stwu r1, -N(r1) let stwu_instr = read_instr(pe, func_addr + prologue_len, image_base).unwrap_or(0); @@ -356,14 +361,12 @@ fn analyze_function( } // Epilogue: b __restgprlr_NN (tail branch into restore stub) - if let Some(target) = b_target(instr, addr) { - if let Some(rb) = restore_base { - if target >= rb && target < rb + 18 * 4 { + if let Some(target) = b_target(instr, addr) + && let Some(rb) = restore_base + && target >= rb && target < rb + 18 * 4 { end_addr = addr + 4; break; } - } - } // Epilogue: bctr (indirect tail call — end of function) if is_bctr(instr) { @@ -407,24 +410,22 @@ impl FuncAnalysis { for (&addr, fi) in &self.functions { if fi.is_saverestore { // Label the block start, plus individual register entry points - if let Some(sb) = self.save_gpr_base { - if addr == sb { + if let Some(sb) = self.save_gpr_base + && addr == sb { for i in 0u32..18 { let reg = 14 + i; labels.insert(sb + i * 4, format!("__savegprlr_{reg}")); } continue; } - } - if let Some(rb) = self.restore_gpr_base { - if addr == rb { + if let Some(rb) = self.restore_gpr_base + && addr == rb { for i in 0u32..18 { let reg = 14 + i; labels.insert(rb + i * 4, format!("__restgprlr_{reg}")); } continue; } - } } labels.insert(addr, format!("sub_{addr:08X}")); } diff --git a/crates/xenia-analysis/src/lib.rs b/crates/xenia-analysis/src/lib.rs index 6828ff3..e3f6639 100644 --- a/crates/xenia-analysis/src/lib.rs +++ b/crates/xenia-analysis/src/lib.rs @@ -2,9 +2,13 @@ pub mod ppc; pub mod func; pub mod xref; pub mod db; +pub mod disasm; pub mod formatter; +pub mod sinks; +pub mod sql_views; mod ordinals; pub use ordinals::resolve_ordinal; pub use xref::{XrefKind, Xref, XrefMap, resolve_source_label}; pub use db::{DbWriter, ExecTraceEntry, ImportCallEntry, BranchTraceEntry}; +pub use disasm::{RichDisasmItem, enrich_section}; diff --git a/crates/xenia-analysis/src/ppc.rs b/crates/xenia-analysis/src/ppc.rs index d8f9761..2f7beda 100644 --- a/crates/xenia-analysis/src/ppc.rs +++ b/crates/xenia-analysis/src/ppc.rs @@ -1,4 +1,11 @@ -//! PowerPC (big-endian, 32-bit) disassembler for Xbox 360 Xenon. +//! Back-compat shim. The full PPC disassembler now lives in +//! [`xenia_cpu::disasm`] (single source of truth, sitting on top of the +//! canonical decoder). This module preserves the legacy `Decoded { base, ext }` +//! surface so existing call sites keep compiling while the analysis crate +//! migrates to `DisasmText` directly. + +use xenia_cpu::decoder::decode; +use xenia_cpu::disasm::format; /// Decoded instruction carrying both base and (optional) extended mnemonic forms. pub struct Decoded { @@ -7,1370 +14,15 @@ pub struct Decoded { } impl Decoded { - fn base_only(s: String) -> Self { Self { base: s, ext: None } } - fn with_ext(base: String, ext: String) -> Self { Self { base, ext: Some(ext) } } /// Returns the preferred display form (extended if available, else base). - pub fn display(&self) -> &str { self.ext.as_deref().unwrap_or(&self.base) } + pub fn display(&self) -> &str { + self.ext.as_deref().unwrap_or(&self.base) + } } /// Disassemble one 32-bit big-endian PowerPC instruction. pub fn disasm(instr: u32, addr: u32) -> Decoded { - let op = (instr >> 26) & 0x3F; - match op { - 2 => decode_tdi(instr), - 3 => decode_twi(instr), - 4 => decode_op4(instr), - 5 => decode_op5(instr), - 6 => decode_op6(instr), - 7 => decode_d_mul("mulli", instr), - 8 => decode_d_sub("subfic", instr), - 10 => decode_cmp_imm("cmpli", instr, false), - 11 => decode_cmp_imm("cmpi", instr, true), - 12 => decode_d_add("addic", instr), - 13 => decode_d_add("addic.", instr), - 14 => decode_addi(instr), - 15 => decode_addis(instr), - 16 => decode_bc(instr, addr), - 17 => Decoded::base_only("sc".to_string()), - 18 => decode_b(instr, addr), - 19 => decode_op19(instr), - 20 => decode_rlwimi(instr), - 21 => decode_rlwinm(instr), - 23 => decode_rlwnm(instr), - 24 => decode_ori(instr), - 25 => decode_oris(instr), - 26 => decode_d_logic("xori", instr), - 27 => decode_d_logic("xoris", instr), - 28 => decode_d_logic("andi.", instr), - 29 => decode_d_logic("andis.", instr), - 30 => decode_op30(instr), - 31 => decode_op31(instr), - 32 => decode_ls("lwz", instr), - 33 => decode_ls("lwzu", instr), - 34 => decode_ls("lbz", instr), - 35 => decode_ls("lbzu", instr), - 36 => decode_ls("stw", instr), - 37 => decode_ls("stwu", instr), - 38 => decode_ls("stb", instr), - 39 => decode_ls("stbu", instr), - 40 => decode_ls("lhz", instr), - 41 => decode_ls("lhzu", instr), - 42 => decode_ls("lha", instr), - 43 => decode_ls("lhau", instr), - 44 => decode_ls("sth", instr), - 45 => decode_ls("sthu", instr), - 46 => decode_ls("lmw", instr), - 47 => decode_ls("stmw", instr), - 48 => decode_ls("lfs", instr), - 49 => decode_ls("lfsu", instr), - 50 => decode_ls("lfd", instr), - 51 => decode_ls("lfdu", instr), - 52 => decode_ls("stfs", instr), - 53 => decode_ls("stfsu", instr), - 54 => decode_ls("stfd", instr), - 55 => decode_ls("stfdu", instr), - 58 => decode_ds_form(instr), - 59 => decode_op59(instr), - 62 => decode_ds_store(instr), - 63 => decode_op63(instr), - _ => Decoded::base_only(format!(".long 0x{instr:08X}")), - } -} - -// ── Register names ────────────────────────────────────────────────────────── - -fn gpr(r: u32) -> String { format!("r{r}") } -fn fpr(r: u32) -> String { format!("f{r}") } -fn crb(b: u32) -> String { - let cr = b / 4; - let bit = b % 4; - let bit_name = ["lt", "gt", "eq", "so"][bit as usize]; - if cr == 0 { bit_name.to_string() } else { format!("4*cr{cr}+{bit_name}") } -} - -fn spr_name(spr: u32) -> String { - match spr { - 1 => "XER".into(), - 8 => "LR".into(), - 9 => "CTR".into(), - _ => format!("spr{spr}"), - } -} - -fn vr(r: u32) -> String { format!("v{r}") } - -// ── Field extraction ──────────────────────────────────────────────────────── - -fn bits(instr: u32, hi: u32, lo: u32) -> u32 { - (instr >> (31 - hi)) & ((1 << (hi - lo + 1)) - 1) -} - -fn sign_ext(val: u32, bits: u32) -> i32 { - let shift = 32 - bits; - ((val << shift) as i32) >> shift -} - -// ── VMX128 extended register extraction ───────────────────────────────────── -// Xbox 360 VMX128 uses 7-bit vector registers (v0-v127) split across -// non-contiguous bit positions in the instruction word. - -fn vd128(instr: u32) -> u32 { - bits(instr, 10, 6) | (bits(instr, 29, 28) << 5) -} -fn va128(instr: u32) -> u32 { - bits(instr, 15, 11) | (bits(instr, 26, 26) << 5) | (bits(instr, 21, 21) << 6) -} -fn vb128(instr: u32) -> u32 { - bits(instr, 20, 16) | (bits(instr, 31, 30) << 5) -} - -// ── Shared helpers ────────────────────────────────────────────────────────── - -/// Map trap TO field to condition suffix: 16→"lt", 4→"eq", etc. -fn trap_cond(to: u32) -> Option<&'static str> { - match to { - 16 => Some("lt"), - 4 => Some("eq"), - 8 => Some("gt"), - 12 => Some("ge"), - 20 => Some("le"), - 24 => Some("ne"), - 31 => Some(""), // unconditional - _ => None, - } -} - -/// Decode BO/BI into a condition suffix and CR prefix for simplified branches. -/// Returns Some((cond_suffix, cr_prefix)) e.g. ("eq", "") or ("lt", "cr2, "). -fn cond_branch_ext(bo: u32, bi: u32) -> Option<(&'static str, String)> { - let cond_true = bo & 0x08 != 0; - let no_cond = bo & 0x10 != 0; - let decr = bo & 0x04 == 0; - if no_cond || decr { return None; } - - let cr_field = bi / 4; - let cr_bit = bi % 4; - let cond_name = match (cr_bit, cond_true) { - (0, true) => "lt", (0, false) => "ge", - (1, true) => "gt", (1, false) => "le", - (2, true) => "eq", (2, false) => "ne", - (3, true) => "so", (3, false) => "ns", - _ => return None, - }; - let cr = if cr_field == 0 { String::new() } else { format!("cr{cr_field}, ") }; - Some((cond_name, cr)) -} - -// ── D-form: addi / li / subi ──────────────────────────────────────────────── - -fn decode_addi(instr: u32) -> Decoded { - let rt = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let imm = sign_ext(instr & 0xFFFF, 16); - let base = format!("addi {}, {}, {}", gpr(rt), gpr(ra), imm); - if ra == 0 { - Decoded::with_ext(base, format!("li {}, {}", gpr(rt), imm)) - } else if imm < 0 { - Decoded::with_ext(base, format!("subi {}, {}, {}", gpr(rt), gpr(ra), -imm)) - } else { - Decoded::base_only(base) - } -} - -fn decode_addis(instr: u32) -> Decoded { - let rt = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let imm = sign_ext(instr & 0xFFFF, 16); - let base = format!("addis {}, {}, 0x{:X}", gpr(rt), gpr(ra), imm as u16 as u32); - if ra == 0 { - Decoded::with_ext(base, format!("lis {}, 0x{:X}", gpr(rt), imm as u16 as u32)) - } else if imm < 0 { - Decoded::with_ext(base, format!("subis {}, {}, 0x{:X}", gpr(rt), gpr(ra), (-imm) as u16 as u32)) - } else { - Decoded::base_only(base) - } -} - -fn decode_d_add(mnem: &str, instr: u32) -> Decoded { - let rt = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let imm = sign_ext(instr & 0xFFFF, 16); - let base = format!("{mnem:<8}{}, {}, {}", gpr(rt), gpr(ra), imm); - if imm < 0 { - // addic → subic, addic. → subic. - let ext_mnem = mnem.replace("addic", "subic"); - Decoded::with_ext(base, format!("{ext_mnem:<8}{}, {}, {}", gpr(rt), gpr(ra), -imm)) - } else { - Decoded::base_only(base) - } -} - -fn decode_d_sub(mnem: &str, instr: u32) -> Decoded { - let rt = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let imm = sign_ext(instr & 0xFFFF, 16); - Decoded::base_only(format!("{mnem:<8}{}, {}, {}", gpr(rt), gpr(ra), imm)) -} - -fn decode_d_mul(mnem: &str, instr: u32) -> Decoded { - let rt = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let imm = sign_ext(instr & 0xFFFF, 16); - Decoded::base_only(format!("{mnem:<8}{}, {}, {}", gpr(rt), gpr(ra), imm)) -} - -fn decode_tdi(instr: u32) -> Decoded { - let to = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let imm = sign_ext(instr & 0xFFFF, 16); - let base = format!("tdi {}, {}, {}", to, gpr(ra), imm); - if let Some(cond) = trap_cond(to) { - if cond.is_empty() { - Decoded::base_only(base) - } else { - Decoded::with_ext(base, format!("td{}i{: Decoded { - let to = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let imm = sign_ext(instr & 0xFFFF, 16); - let base = format!("twi {}, {}, {}", to, gpr(ra), imm); - if let Some(cond) = trap_cond(to) { - if cond.is_empty() { - // TO=31 unconditional: no immediate form makes sense; keep base - Decoded::base_only(base) - } else { - Decoded::with_ext(base, format!("tw{}i{: Decoded { - let rs = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let uimm = instr & 0xFFFF; - let base = format!("ori {}, {}, 0x{:X}", gpr(ra), gpr(rs), uimm); - if rs == 0 && ra == 0 && uimm == 0 { - Decoded::with_ext(base, "nop".to_string()) - } else { - Decoded::base_only(base) - } -} - -fn decode_oris(instr: u32) -> Decoded { - let rs = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let uimm = instr & 0xFFFF; - Decoded::base_only(format!("oris {}, {}, 0x{:X}", gpr(ra), gpr(rs), uimm)) -} - -fn decode_d_logic(mnem: &str, instr: u32) -> Decoded { - let rs = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let uimm = instr & 0xFFFF; - Decoded::base_only(format!("{mnem:<8}{}, {}, 0x{:X}", gpr(ra), gpr(rs), uimm)) -} - -// ── Compare immediate ─────────────────────────────────────────────────────── - -fn decode_cmp_imm(mnem: &str, instr: u32, signed: bool) -> Decoded { - let bf = bits(instr, 8, 6); - let l_bit = bits(instr, 10, 10); - let ra = bits(instr, 15, 11); - let imm = if signed { - format!("{}", sign_ext(instr & 0xFFFF, 16)) - } else { - format!("0x{:X}", instr & 0xFFFF) - }; - let cr = if bf == 0 { String::new() } else { format!("cr{bf}, ") }; - let base = format!("{mnem:<8}{cr}{l_bit}, {}, {}", gpr(ra), imm); - - // Extended: cmpi → cmpwi/cmpdi, cmpli → cmplwi/cmpldi - let size = if l_bit == 0 { "w" } else { "d" }; - let ext_mnem = if mnem == "cmpi" { - format!("cmp{size}i") - } else { - format!("cmpl{size}i") - }; - let ext = format!("{ext_mnem:<8}{cr}{}, {}", gpr(ra), imm); - Decoded::with_ext(base, ext) -} - -// ── Load/store D-form ─────────────────────────────────────────────────────── - -fn decode_ls(mnem: &str, instr: u32) -> Decoded { - let rt = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let d = sign_ext(instr & 0xFFFF, 16); - let rn = if mnem.starts_with("lf") || mnem.starts_with("stf") { fpr(rt) } else { gpr(rt) }; - Decoded::base_only(format!("{mnem:<8}{}, {}({})", rn, d, gpr(ra))) -} - -// ── DS-form (ld/ldu/lwa) ─────────────────────────────────────────────────── - -fn decode_ds_form(instr: u32) -> Decoded { - let rt = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let ds = sign_ext(instr & 0xFFFC, 16); - let xo = instr & 3; - let mnem = match xo { 0 => "ld", 1 => "ldu", 2 => "lwa", _ => "ld?" }; - Decoded::base_only(format!("{mnem:<8}{}, {}({})", gpr(rt), ds, gpr(ra))) -} - -fn decode_ds_store(instr: u32) -> Decoded { - let rs = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let ds = sign_ext(instr & 0xFFFC, 16); - let xo = instr & 3; - let mnem = match xo { 0 => "std", 1 => "stdu", _ => "std?" }; - Decoded::base_only(format!("{mnem:<8}{}, {}({})", gpr(rs), ds, gpr(ra))) -} - -// ── I-form: b / bl / ba / bla ─────────────────────────────────────────────── - -fn decode_b(instr: u32, addr: u32) -> Decoded { - let li = sign_ext(instr & 0x03FFFFFC, 26); - let aa = instr & 2 != 0; - let lk = instr & 1 != 0; - let target = if aa { li as u32 } else { addr.wrapping_add(li as u32) }; - let mnem = match (aa, lk) { - (false, false) => "b", - (false, true) => "bl", - (true, false) => "ba", - (true, true) => "bla", - }; - Decoded::base_only(format!("{mnem:<8}0x{target:08X}")) -} - -// ── B-form: bc / bcl ──────────────────────────────────────────────────────── - -fn decode_bc(instr: u32, addr: u32) -> Decoded { - let bo = bits(instr, 10, 6); - let bi = bits(instr, 15, 11); - let bd = sign_ext(instr & 0xFFFC, 16); - let aa = instr & 2 != 0; - let lk = instr & 1 != 0; - let target = if aa { bd as u32 } else { addr.wrapping_add(bd as u32) }; - - let a = if aa { "a" } else { "" }; - let l = if lk { "l" } else { "" }; - let base = format!("bc{a}{l:<5}{bo}, {}, 0x{target:08X}", crb(bi)); - - // Simplified mnemonics - let cr_field = bi / 4; - let cr_bit = bi % 4; - let decr = bo & 0x04 == 0; - let uncond = bo & 0x10 != 0; // branch always if set - - if uncond && !decr { - return Decoded::with_ext(base, format!("b{a}{l:<7}0x{target:08X}")); - } - - let cond_true = bo & 0x08 != 0; - let cond_name = match (cr_bit, cond_true) { - (0, true) => "lt", (0, false) => "ge", - (1, true) => "gt", (1, false) => "le", - (2, true) => "eq", (2, false) => "ne", - (3, true) => "so", (3, false) => "ns", - _ => "??", - }; - - let cr = if cr_field == 0 { String::new() } else { format!("cr{cr_field}, ") }; - - let ext = if decr { - let z = if bo & 0x02 != 0 { "z" } else { "nz" }; - format!("bd{z}{cond_name}{a}{l:<4}{cr}0x{target:08X}") - } else { - format!("b{cond_name}{a}{l:<6}{cr}0x{target:08X}") - }; - Decoded::with_ext(base, ext) -} - -// ── Opcode 19 (XL-form): blr, bctr, crand, etc ───────────────────────────── - -fn decode_op19(instr: u32) -> Decoded { - let xo = bits(instr, 30, 21); - let bo = bits(instr, 10, 6); - let bi = bits(instr, 15, 11); - let lk = instr & 1 != 0; - let l = if lk { "l" } else { "" }; - - match xo { - 16 => { // bclr - let base = format!("bclr{l:<4}{bo}, {}", crb(bi)); - if bo == 20 && bi == 0 { - let ext = if lk { "blrl" } else { "blr" }; - return Decoded::with_ext(base, ext.to_string()); - } - // Simplified conditional: beqlr, bnelr, etc. - if let Some((cond, cr)) = cond_branch_ext(bo, bi) { - let cr_no_comma = cr.trim_end_matches(", "); - if cr_no_comma.is_empty() { - return Decoded::with_ext(base, format!("b{cond}lr{l}")); - } else { - return Decoded::with_ext(base, format!("b{cond}lr{l:<4}{cr_no_comma}")); - } - } - // bdnzlr / bdzlr - let decr = bo & 0x04 == 0; - let uncond = bo & 0x10 != 0; - if decr && uncond { - let z = if bo & 0x02 != 0 { "z" } else { "nz" }; - return Decoded::with_ext(base, format!("bd{z}lr{l}")); - } - Decoded::base_only(base) - } - 528 => { // bcctr - let base = format!("bcctr{l:<3}{bo}, {}", crb(bi)); - if bo == 20 && bi == 0 { - let ext = if lk { "bctrl" } else { "bctr" }; - return Decoded::with_ext(base, ext.to_string()); - } - if let Some((cond, cr)) = cond_branch_ext(bo, bi) { - let cr_no_comma = cr.trim_end_matches(", "); - if cr_no_comma.is_empty() { - return Decoded::with_ext(base, format!("b{cond}ctr{l}")); - } else { - return Decoded::with_ext(base, format!("b{cond}ctr{l:<3}{cr_no_comma}")); - } - } - Decoded::base_only(base) - } - 0 => Decoded::base_only(format!("mcrf cr{}, cr{}", bits(instr, 8, 6), bits(instr, 13, 11))), - 150 => Decoded::base_only("isync".into()), - 50 => Decoded::base_only("rfi".into()), - // CR logical operations with simplified forms - 33 => { // crnor - let bt = bits(instr, 10, 6); let ba = bits(instr, 15, 11); let bb = bits(instr, 20, 16); - let base = format!("crnor {}, {}, {}", crb(bt), crb(ba), crb(bb)); - if ba == bb { - Decoded::with_ext(base, format!("crnot {}, {}", crb(bt), crb(ba))) - } else { Decoded::base_only(base) } - } - 193 => { // crxor - let bt = bits(instr, 10, 6); let ba = bits(instr, 15, 11); let bb = bits(instr, 20, 16); - let base = format!("crxor {}, {}, {}", crb(bt), crb(ba), crb(bb)); - if bt == ba && ba == bb { - Decoded::with_ext(base, format!("crclr {}", crb(bt))) - } else { Decoded::base_only(base) } - } - 289 => { // creqv - let bt = bits(instr, 10, 6); let ba = bits(instr, 15, 11); let bb = bits(instr, 20, 16); - let base = format!("creqv {}, {}, {}", crb(bt), crb(ba), crb(bb)); - if bt == ba && ba == bb { - Decoded::with_ext(base, format!("crset {}", crb(bt))) - } else { Decoded::base_only(base) } - } - 449 => { // cror - let bt = bits(instr, 10, 6); let ba = bits(instr, 15, 11); let bb = bits(instr, 20, 16); - let base = format!("cror {}, {}, {}", crb(bt), crb(ba), crb(bb)); - if ba == bb { - Decoded::with_ext(base, format!("crmove {}, {}", crb(bt), crb(ba))) - } else { Decoded::base_only(base) } - } - 129 => Decoded::base_only(format!("crandc {}, {}, {}", crb(bits(instr, 10, 6)), crb(bits(instr, 15, 11)), crb(bits(instr, 20, 16)))), - 225 => Decoded::base_only(format!("crnand {}, {}, {}", crb(bits(instr, 10, 6)), crb(bits(instr, 15, 11)), crb(bits(instr, 20, 16)))), - 257 => Decoded::base_only(format!("crand {}, {}, {}", crb(bits(instr, 10, 6)), crb(bits(instr, 15, 11)), crb(bits(instr, 20, 16)))), - 417 => Decoded::base_only(format!("crorc {}, {}, {}", crb(bits(instr, 10, 6)), crb(bits(instr, 15, 11)), crb(bits(instr, 20, 16)))), - _ => Decoded::base_only(format!(".long 0x{instr:08X} ; op19 xo={xo}")), - } -} - -// ── Rotate/mask instructions ──────────────────────────────────────────────── - -fn decode_rlwimi(instr: u32) -> Decoded { - let rs = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let sh = bits(instr, 20, 16); - let mb = bits(instr, 25, 21); - let me = bits(instr, 30, 26); - let rc = if instr & 1 != 0 { "." } else { "" }; - let base = format!("rlwimi{rc:<2}{}, {}, {}, {}, {}", gpr(ra), gpr(rs), sh, mb, me); - // inslwi rA,rS,n,b = rlwimi rA,rS,32-b,b,b+n-1 → sh=32-b, n=me-mb+1 - if mb <= me && sh == (32u32.wrapping_sub(mb)) % 32 && sh != 31u32.wrapping_sub(me) { - let n = me - mb + 1; - let b = mb; - return Decoded::with_ext(base, format!("inslwi{rc:<2}{}, {}, {}, {}", gpr(ra), gpr(rs), n, b)); - } - // insrwi rA,rS,n,b = rlwimi rA,rS,32-(b+n),b,b+n-1 → sh=32-(me+1)=31-me, n=me-mb+1 - if mb <= me && sh == 31u32.wrapping_sub(me) % 32 { - let n = me - mb + 1; - let b = mb; - return Decoded::with_ext(base, format!("insrwi{rc:<2}{}, {}, {}, {}", gpr(ra), gpr(rs), n, b)); - } - Decoded::base_only(base) -} - -fn decode_rlwinm(instr: u32) -> Decoded { - let rs = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let sh = bits(instr, 20, 16); - let mb = bits(instr, 25, 21); - let me = bits(instr, 30, 26); - let rc = if instr & 1 != 0 { "." } else { "" }; - let base = format!("rlwinm{rc:<2}{}, {}, {}, {}, {}", gpr(ra), gpr(rs), sh, mb, me); - - // Priority-ordered simplified forms: - // slwi: shift left word immediate - if sh > 0 && mb == 0 && me == 31 - sh { - return Decoded::with_ext(base, format!("slwi{rc:<4}{}, {}, {}", gpr(ra), gpr(rs), sh)); - } - // srwi: shift right word immediate - if sh > 0 && me == 31 && sh + mb == 32 { - return Decoded::with_ext(base, format!("srwi{rc:<4}{}, {}, {}", gpr(ra), gpr(rs), 32 - sh)); - } - // rotlwi: rotate left word immediate (full 32-bit rotate) - if sh > 0 && mb == 0 && me == 31 { - return Decoded::with_ext(base, format!("rotlwi{rc:<2}{}, {}, {}", gpr(ra), gpr(rs), sh)); - } - // clrlwi: clear left n bits → rlwinm rA,rS,0,n,31 - if sh == 0 && me == 31 && mb > 0 { - return Decoded::with_ext(base, format!("clrlwi{rc:<2}{}, {}, {}", gpr(ra), gpr(rs), mb)); - } - // clrrwi: clear right n bits → rlwinm rA,rS,0,0,31-n - if sh == 0 && mb == 0 && me < 31 { - return Decoded::with_ext(base, format!("clrrwi{rc:<2}{}, {}, {}", gpr(ra), gpr(rs), 31 - me)); - } - // extlwi: extract and left-justify → rlwinm rA,rS,b,0,n-1 - if mb == 0 && sh > 0 && me < 31 { - let n = me + 1; - return Decoded::with_ext(base, format!("extlwi{rc:<2}{}, {}, {}, {}", gpr(ra), gpr(rs), n, sh)); - } - // extrwi: extract and right-justify → rlwinm rA,rS,b+n,32-n,31 - if me == 31 && mb > 0 && sh > 0 { - let n = 32 - mb; - let b = sh.wrapping_sub(n) % 32; - return Decoded::with_ext(base, format!("extrwi{rc:<2}{}, {}, {}, {}", gpr(ra), gpr(rs), n, b)); - } - Decoded::base_only(base) -} - -fn decode_rlwnm(instr: u32) -> Decoded { - let rs = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let rb = bits(instr, 20, 16); - let mb = bits(instr, 25, 21); - let me = bits(instr, 30, 26); - let rc = if instr & 1 != 0 { "." } else { "" }; - let base = format!("rlwnm{rc:<3}{}, {}, {}, {}, {}", gpr(ra), gpr(rs), gpr(rb), mb, me); - // rotlw: full rotate by register - if mb == 0 && me == 31 { - return Decoded::with_ext(base, format!("rotlw{rc:<3}{}, {}, {}", gpr(ra), gpr(rs), gpr(rb))); - } - Decoded::base_only(base) -} - -// ── Opcode 30 (MD/MDS-form: 64-bit rotate) ───────────────────────────────── - -fn decode_op30(instr: u32) -> Decoded { - let rs = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let rc = if instr & 1 != 0 { "." } else { "" }; - let sh = bits(instr, 20, 16) | (bits(instr, 30, 30) << 5); - let mb_me = bits(instr, 25, 21) | (bits(instr, 26, 26) << 5); - - let md_xo = bits(instr, 29, 27); // MD-form: bits 27-29 - - match md_xo { - 0 => { // rldicl: Rotate Left Doubleword Immediate then Clear Left - let mb = mb_me; - let base = format!("rldicl{rc:<2}{}, {}, {}, {}", gpr(ra), gpr(rs), sh, mb); - if sh == 0 && mb > 0 { - return Decoded::with_ext(base, format!("clrldi{rc:<2}{}, {}, {}", gpr(ra), gpr(rs), mb)); - } - if mb > 0 && sh == (64u32.wrapping_sub(mb)) & 63 { - return Decoded::with_ext(base, format!("srdi{rc:<4}{}, {}, {}", gpr(ra), gpr(rs), mb)); - } - if sh > 0 && mb == 0 { - return Decoded::with_ext(base, format!("rotldi{rc:<2}{}, {}, {}", gpr(ra), gpr(rs), sh)); - } - return Decoded::base_only(base); - } - 1 => { // rldicr: Rotate Left Doubleword Immediate then Clear Right - let me = mb_me; - let base = format!("rldicr{rc:<2}{}, {}, {}, {}", gpr(ra), gpr(rs), sh, me); - if sh > 0 && me == (63u32.wrapping_sub(sh)) & 63 { - return Decoded::with_ext(base, format!("sldi{rc:<4}{}, {}, {}", gpr(ra), gpr(rs), sh)); - } - if sh == 0 && me < 63 { - return Decoded::with_ext(base, format!("clrrdi{rc:<2}{}, {}, {}", gpr(ra), gpr(rs), 63 - me)); - } - return Decoded::base_only(base); - } - 2 => { // rldic: Rotate Left Doubleword Immediate then Clear - let mb = mb_me; - return Decoded::base_only(format!("rldic{rc:<3}{}, {}, {}, {}", gpr(ra), gpr(rs), sh, mb)); - } - 3 => { // rldimi: Rotate Left Doubleword Immediate then Mask Insert - let mb = mb_me; - let base = format!("rldimi{rc:<2}{}, {}, {}, {}", gpr(ra), gpr(rs), sh, mb); - if mb > 0 { - let n = (64u32.wrapping_sub(sh).wrapping_sub(mb)) & 63; - if n > 0 { - return Decoded::with_ext(base, format!("insrdi{rc:<2}{}, {}, {}, {}", gpr(ra), gpr(rs), n, mb)); - } - } - return Decoded::base_only(base); - } - _ => {} // Fall through to MDS-form - } - - // MDS-form: bits 27-30 - let mds_xo = bits(instr, 30, 27); - let rb = bits(instr, 20, 16); - match mds_xo { - 8 => { // rldcl: Rotate Left Doubleword then Clear Left - let mb = mb_me; - let base = format!("rldcl{rc:<3}{}, {}, {}, {}", gpr(ra), gpr(rs), gpr(rb), mb); - if mb == 0 { - return Decoded::with_ext(base, format!("rotld{rc:<3}{}, {}, {}", gpr(ra), gpr(rs), gpr(rb))); - } - return Decoded::base_only(base); - } - 9 => { // rldcr: Rotate Left Doubleword then Clear Right - let me = mb_me; - return Decoded::base_only(format!("rldcr{rc:<3}{}, {}, {}, {}", gpr(ra), gpr(rs), gpr(rb), me)); - } - _ => {} - } - - Decoded::base_only(format!(".long 0x{instr:08X} ; op30")) -} - -// ── Opcode 31 (X/XO/XFX forms) ───────────────────────────────────────────── - -fn decode_op31(instr: u32) -> Decoded { - let xo_10 = bits(instr, 30, 21); // bits 21-30 - let xo_9 = bits(instr, 30, 22); // bits 22-30 - let rt = bits(instr, 10, 6); - let ra = bits(instr, 15, 11); - let rb = bits(instr, 20, 16); - let rc = if instr & 1 != 0 { "." } else { "" }; - let oe = bits(instr, 21, 21) != 0; - - // XO-form (bits 22-30) - match xo_9 { - 266 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("add{o}{rc:<5}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 10 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("addc{o}{rc:<4}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 138 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("adde{o}{rc:<4}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 234 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("addme{o}{rc:<3}{}, {}", gpr(rt), gpr(ra))); } - 202 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("addze{o}{rc:<3}{}, {}", gpr(rt), gpr(ra))); } - // subf rD, rA, rB = rD = rB - rA → ext: sub rD, rB, rA (natural operand order) - 40 => { - let o = if oe {"o"} else {""}; - let base = format!("subf{o}{rc:<4}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb)); - let ext = format!("sub{o}{rc:<5}{}, {}, {}", gpr(rt), gpr(rb), gpr(ra)); - return Decoded::with_ext(base, ext); - } - 8 => { - let o = if oe {"o"} else {""}; - let base = format!("subfc{o}{rc:<3}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb)); - let ext = format!("subc{o}{rc:<4}{}, {}, {}", gpr(rt), gpr(rb), gpr(ra)); - return Decoded::with_ext(base, ext); - } - 136 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("subfe{o}{rc:<3}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 232 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("subfme{o}{rc:<2}{}, {}", gpr(rt), gpr(ra))); } - 200 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("subfze{o}{rc:<2}{}, {}", gpr(rt), gpr(ra))); } - 104 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("neg{o}{rc:<5}{}, {}", gpr(rt), gpr(ra))); } - 235 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("mullw{o}{rc:<3}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 75 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("mulhw{o}{rc:<3}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 11 => { return Decoded::base_only(format!("mulhwu{rc:<2}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 491 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("divw{o}{rc:<4}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 459 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("divwu{o}{rc:<3}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - // 64-bit multiply/divide - 233 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("mulld{o}{rc:<3}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 73 => { return Decoded::base_only(format!("mulhd{rc:<4}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 9 => { return Decoded::base_only(format!("mulhdu{rc:<2}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 489 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("divd{o}{rc:<4}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - 457 => { let o = if oe {"o"} else {""}; return Decoded::base_only(format!("divdu{o}{rc:<3}{}, {}, {}", gpr(rt), gpr(ra), gpr(rb))); } - _ => {} - } - - // X-form (bits 21-30) - match xo_10 { - // Trap word (register form) - 4 => { - let to = rt; // TO field in bits 6-10 - let base = format!("tw {}, {}, {}", to, gpr(ra), gpr(rb)); - if to == 31 && ra == 0 && rb == 0 { - return Decoded::with_ext(base, "trap".to_string()); - } - if let Some(cond) = trap_cond(to) { - if !cond.is_empty() { - return Decoded::with_ext(base, format!("tw{cond:<6}{}, {}", gpr(ra), gpr(rb))); - } - } - return Decoded::base_only(base); - } - // Compare — with cmpw/cmpd extended forms - 0 => { - let bf = bits(instr, 8, 6); - let l_bit = bits(instr, 10, 10); - let cr = if bf == 0 { String::new() } else { format!("cr{bf}, ") }; - let base = format!("cmp {cr}{l_bit}, {}, {}", gpr(ra), gpr(rb)); - let size = if l_bit == 0 { "w" } else { "d" }; - let ext = format!("cmp{size:<5}{cr}{}, {}", gpr(ra), gpr(rb)); - return Decoded::with_ext(base, ext); - } - 32 => { - let bf = bits(instr, 8, 6); - let l_bit = bits(instr, 10, 10); - let cr = if bf == 0 { String::new() } else { format!("cr{bf}, ") }; - let base = format!("cmpl {cr}{l_bit}, {}, {}", gpr(ra), gpr(rb)); - let size = if l_bit == 0 { "w" } else { "d" }; - let ext = format!("cmpl{size:<4}{cr}{}, {}", gpr(ra), gpr(rb)); - return Decoded::with_ext(base, ext); - } - // Logic — with mr/not extended forms - 28 => { - let base = format!("and{rc:<5}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb)); - if rt == rb { return Decoded::with_ext(base, format!("mr{rc:<6}{}, {}", gpr(ra), gpr(rt))); } - return Decoded::base_only(base); - } - 60 => return Decoded::base_only(format!("andc{rc:<4}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - 444 => { - let base = format!("or{rc:<6}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb)); - if rt == rb { return Decoded::with_ext(base, format!("mr{rc:<6}{}, {}", gpr(ra), gpr(rt))); } - return Decoded::base_only(base); - } - 412 => return Decoded::base_only(format!("orc{rc:<5}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - 316 => return Decoded::base_only(format!("xor{rc:<5}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - 476 => return Decoded::base_only(format!("nand{rc:<4}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - 124 => { - let base = format!("nor{rc:<5}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb)); - if rt == rb { return Decoded::with_ext(base, format!("not{rc:<5}{}, {}", gpr(ra), gpr(rt))); } - return Decoded::base_only(base); - } - 284 => return Decoded::base_only(format!("eqv{rc:<5}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - // Extend - 954 => return Decoded::base_only(format!("extsb{rc:<3}{}, {}", gpr(ra), gpr(rt))), - 922 => return Decoded::base_only(format!("extsh{rc:<3}{}, {}", gpr(ra), gpr(rt))), - 986 => return Decoded::base_only(format!("extsw{rc:<3}{}, {}", gpr(ra), gpr(rt))), - 26 => return Decoded::base_only(format!("cntlzw{rc:<2}{}, {}", gpr(ra), gpr(rt))), - 58 => return Decoded::base_only(format!("cntlzd{rc:<2}{}, {}", gpr(ra), gpr(rt))), - // Shift (32-bit and 64-bit) - 24 => return Decoded::base_only(format!("slw{rc:<5}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - 536 => return Decoded::base_only(format!("srw{rc:<5}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - 792 => return Decoded::base_only(format!("sraw{rc:<4}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - 824 => { let sh = bits(instr, 20, 16); return Decoded::base_only(format!("srawi{rc:<3}{}, {}, {}", gpr(ra), gpr(rt), sh)); } - 27 => return Decoded::base_only(format!("sld{rc:<5}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - 539 => return Decoded::base_only(format!("srd{rc:<5}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - 794 => return Decoded::base_only(format!("srad{rc:<4}{}, {}, {}", gpr(ra), gpr(rt), gpr(rb))), - // Load indexed - 23 => return Decoded::base_only(format!("lwzx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 55 => return Decoded::base_only(format!("lwzux {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 87 => return Decoded::base_only(format!("lbzx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 119 => return Decoded::base_only(format!("lbzux {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 279 => return Decoded::base_only(format!("lhzx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 311 => return Decoded::base_only(format!("lhzux {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 343 => return Decoded::base_only(format!("lhax {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 375 => return Decoded::base_only(format!("lhaux {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 534 => return Decoded::base_only(format!("lwbrx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 790 => return Decoded::base_only(format!("lhbrx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 20 => return Decoded::base_only(format!("lwarx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 84 => return Decoded::base_only(format!("ldarx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - // 64-bit load indexed - 21 => return Decoded::base_only(format!("ldx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 53 => return Decoded::base_only(format!("ldux {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 341 => return Decoded::base_only(format!("lwax {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 373 => return Decoded::base_only(format!("lwaux {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 532 => return Decoded::base_only(format!("ldbrx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 533 => return Decoded::base_only(format!("lswx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 597 => return Decoded::base_only(format!("lswi {}, {}, {}", gpr(rt), gpr(ra), rb)), - // Store indexed - 151 => return Decoded::base_only(format!("stwx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 183 => return Decoded::base_only(format!("stwux {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 215 => return Decoded::base_only(format!("stbx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 247 => return Decoded::base_only(format!("stbux {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 407 => return Decoded::base_only(format!("sthx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 439 => return Decoded::base_only(format!("sthux {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 662 => return Decoded::base_only(format!("stwbrx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 918 => return Decoded::base_only(format!("sthbrx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 150 => return Decoded::base_only(format!("stwcx. {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - // 64-bit store indexed - 149 => return Decoded::base_only(format!("stdx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 181 => return Decoded::base_only(format!("stdux {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 214 => return Decoded::base_only(format!("stdcx. {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 660 => return Decoded::base_only(format!("stdbrx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 661 => return Decoded::base_only(format!("stswx {}, {}, {}", gpr(rt), gpr(ra), gpr(rb))), - 725 => return Decoded::base_only(format!("stswi {}, {}, {}", gpr(rt), gpr(ra), rb)), - // FP load/store indexed - 535 => return Decoded::base_only(format!("lfsx {}, {}, {}", fpr(rt), gpr(ra), gpr(rb))), - 567 => return Decoded::base_only(format!("lfsux {}, {}, {}", fpr(rt), gpr(ra), gpr(rb))), - 599 => return Decoded::base_only(format!("lfdx {}, {}, {}", fpr(rt), gpr(ra), gpr(rb))), - 631 => return Decoded::base_only(format!("lfdux {}, {}, {}", fpr(rt), gpr(ra), gpr(rb))), - 663 => return Decoded::base_only(format!("stfsx {}, {}, {}", fpr(rt), gpr(ra), gpr(rb))), - 695 => return Decoded::base_only(format!("stfsux {}, {}, {}", fpr(rt), gpr(ra), gpr(rb))), - 727 => return Decoded::base_only(format!("stfdx {}, {}, {}", fpr(rt), gpr(ra), gpr(rb))), - 759 => return Decoded::base_only(format!("stfdux {}, {}, {}", fpr(rt), gpr(ra), gpr(rb))), - 983 => return Decoded::base_only(format!("stfiwx {}, {}, {}", fpr(rt), gpr(ra), gpr(rb))), - // Trap doubleword (register form) - 68 => { - let to = rt; - let base = format!("td {}, {}, {}", to, gpr(ra), gpr(rb)); - if to == 31 && ra == 0 && rb == 0 { - return Decoded::with_ext(base, "trap".to_string()); - } - if let Some(cond) = trap_cond(to) { - if !cond.is_empty() { - return Decoded::with_ext(base, format!("td{cond:<6}{}, {}", gpr(ra), gpr(rb))); - } - } - return Decoded::base_only(base); - } - // AltiVec load indexed (standard 5-bit vr0-vr31) - 6 => return Decoded::base_only(format!("lvsl {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 38 => return Decoded::base_only(format!("lvsr {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 7 => return Decoded::base_only(format!("lvebx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 39 => return Decoded::base_only(format!("lvehx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 71 => return Decoded::base_only(format!("lvewx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 103 => return Decoded::base_only(format!("lvx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 359 => return Decoded::base_only(format!("lvxl {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 519 => return Decoded::base_only(format!("lvlx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 551 => return Decoded::base_only(format!("lvrx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 775 => return Decoded::base_only(format!("lvlxl {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 807 => return Decoded::base_only(format!("lvrxl {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - // AltiVec store indexed - 135 => return Decoded::base_only(format!("stvebx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 167 => return Decoded::base_only(format!("stvehx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 199 => return Decoded::base_only(format!("stvewx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 231 => return Decoded::base_only(format!("stvx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 487 => return Decoded::base_only(format!("stvxl {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 647 => return Decoded::base_only(format!("stvlx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 679 => return Decoded::base_only(format!("stvrx {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 903 => return Decoded::base_only(format!("stvlxl {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - 935 => return Decoded::base_only(format!("stvrxl {}, {}, {}", vr(rt), gpr(ra), gpr(rb))), - // MSR - 83 => return Decoded::base_only(format!("mfmsr {}", gpr(rt))), - 146 => return Decoded::base_only(format!("mtmsr {}", gpr(rt))), - 178 => return Decoded::base_only(format!("mtmsrd {}", gpr(rt))), - // Time base - 371 => { - let tbr = (bits(instr, 20, 16) << 5) | bits(instr, 15, 11); - let base = format!("mftb {}, {}", gpr(rt), tbr); - return match tbr { - 268 => Decoded::with_ext(base, format!("mftb {}", gpr(rt))), - 269 => Decoded::with_ext(base, format!("mftbu {}", gpr(rt))), - _ => Decoded::base_only(base), - }; - } - // Condition register XER - 512 => { - let crd = bits(instr, 8, 6); - return Decoded::base_only(format!("mcrxr cr{crd}")); - } - // SPR — with mflr/mfctr/mfxer / mtlr/mtctr/mtxer extended forms - 339 => { - let spr_raw = (bits(instr, 20, 16) << 5) | bits(instr, 15, 11); - let base = format!("mfspr {}, {}", gpr(rt), spr_name(spr_raw)); - let ext = match spr_raw { - 8 => Some(format!("mflr {}", gpr(rt))), - 9 => Some(format!("mfctr {}", gpr(rt))), - 1 => Some(format!("mfxer {}", gpr(rt))), - _ => None, - }; - return match ext { - Some(e) => Decoded::with_ext(base, e), - None => Decoded::base_only(base), - }; - } - 467 => { - let spr_raw = (bits(instr, 20, 16) << 5) | bits(instr, 15, 11); - let base = format!("mtspr {}, {}", spr_name(spr_raw), gpr(rt)); - let ext = match spr_raw { - 8 => Some(format!("mtlr {}", gpr(rt))), - 9 => Some(format!("mtctr {}", gpr(rt))), - 1 => Some(format!("mtxer {}", gpr(rt))), - _ => None, - }; - return match ext { - Some(e) => Decoded::with_ext(base, e), - None => Decoded::base_only(base), - }; - } - 19 => return Decoded::base_only(format!("mfcr {}", gpr(rt))), - 144 => { - let fxm = bits(instr, 19, 12); - let base = format!("mtcrf 0x{:02X}, {}", fxm, gpr(rt)); - if fxm == 0xFF { - return Decoded::with_ext(base, format!("mtcr {}", gpr(rt))); - } - return Decoded::base_only(base); - } - // Cache/sync — with lwsync extended form - 598 => { - let l_field = bits(instr, 10, 9); - let base = format!("sync {}", l_field); - if l_field == 0 { - return Decoded::with_ext(base, "sync".to_string()); - } else if l_field == 1 { - return Decoded::with_ext(base, "lwsync".to_string()); - } - return Decoded::base_only(base); - } - 854 => return Decoded::base_only("eieio".into()), - 86 => return Decoded::base_only(format!("dcbf {}, {}", gpr(ra), gpr(rb))), - 54 => return Decoded::base_only(format!("dcbst {}, {}", gpr(ra), gpr(rb))), - 278 => return Decoded::base_only(format!("dcbt {}, {}", gpr(ra), gpr(rb))), - 246 => return Decoded::base_only(format!("dcbtst {}, {}", gpr(ra), gpr(rb))), - 1014 => { - if rt == 1 { - return Decoded::base_only(format!("dcbz128 {}, {}", gpr(ra), gpr(rb))); - } - return Decoded::base_only(format!("dcbz {}, {}", gpr(ra), gpr(rb))); - } - 470 => return Decoded::base_only(format!("dcbi {}, {}", gpr(ra), gpr(rb))), - 982 => return Decoded::base_only(format!("icbi {}, {}", gpr(ra), gpr(rb))), - 566 => return Decoded::base_only("tlbsync".into()), - _ => {} - } - - // XS-form: sradi (bits 21-29, bit 30 is sh[5]) - let xo_9b = bits(instr, 29, 21); - if xo_9b == 413 { - let sh = bits(instr, 20, 16) | (bits(instr, 30, 30) << 5); - return Decoded::base_only(format!("sradi{rc:<3}{}, {}, {}", gpr(ra), gpr(rt), sh)); - } - - Decoded::base_only(format!(".long 0x{instr:08X} ; op31 xo10={xo_10} xo9={xo_9}")) -} - -// ── Opcode 59 (FP single-precision) ──────────────────────────────────────── - -fn decode_op59(instr: u32) -> Decoded { - let xo = bits(instr, 30, 26); - let frt = bits(instr, 10, 6); - let fra = bits(instr, 15, 11); - let frb = bits(instr, 20, 16); - let frc = bits(instr, 25, 21); - let rc = if instr & 1 != 0 { "." } else { "" }; - - Decoded::base_only(match xo { - 18 => format!("fdivs{rc:<3}{}, {}, {}", fpr(frt), fpr(fra), fpr(frb)), - 20 => format!("fsubs{rc:<3}{}, {}, {}", fpr(frt), fpr(fra), fpr(frb)), - 21 => format!("fadds{rc:<3}{}, {}, {}", fpr(frt), fpr(fra), fpr(frb)), - 22 => format!("fsqrts{rc:<2}{}, {}", fpr(frt), fpr(frb)), - 24 => format!("fres{rc:<4}{}, {}", fpr(frt), fpr(frb)), - 25 => format!("fmuls{rc:<3}{}, {}, {}", fpr(frt), fpr(fra), fpr(frc)), - 28 => format!("fmsubs{rc:<2}{}, {}, {}, {}", fpr(frt), fpr(fra), fpr(frc), fpr(frb)), - 29 => format!("fmadds{rc:<2}{}, {}, {}, {}", fpr(frt), fpr(fra), fpr(frc), fpr(frb)), - 30 => format!("fnmsubs{rc} {}, {}, {}, {}", fpr(frt), fpr(fra), fpr(frc), fpr(frb)), - 31 => format!("fnmadds{rc} {}, {}, {}, {}", fpr(frt), fpr(fra), fpr(frc), fpr(frb)), - _ => format!(".long 0x{instr:08X} ; op59 xo={xo}"), - }) -} - -// ── Opcode 63 (FP double-precision / X-form FP) ──────────────────────────── - -fn decode_op63(instr: u32) -> Decoded { - let xo_a = bits(instr, 30, 26); // A-form xo (bits 26-30) - let xo_x = bits(instr, 30, 21); // X-form xo (bits 21-30) - let frt = bits(instr, 10, 6); - let fra = bits(instr, 15, 11); - let frb = bits(instr, 20, 16); - let frc = bits(instr, 25, 21); - let rc = if instr & 1 != 0 { "." } else { "" }; - - // A-form first - match xo_a { - 18 => return Decoded::base_only(format!("fdiv{rc:<4}{}, {}, {}", fpr(frt), fpr(fra), fpr(frb))), - 20 => return Decoded::base_only(format!("fsub{rc:<4}{}, {}, {}", fpr(frt), fpr(fra), fpr(frb))), - 21 => return Decoded::base_only(format!("fadd{rc:<4}{}, {}, {}", fpr(frt), fpr(fra), fpr(frb))), - 22 => return Decoded::base_only(format!("fsqrt{rc:<3}{}, {}", fpr(frt), fpr(frb))), - 23 => return Decoded::base_only(format!("fsel{rc:<4}{}, {}, {}, {}", fpr(frt), fpr(fra), fpr(frc), fpr(frb))), - 25 => return Decoded::base_only(format!("fmul{rc:<4}{}, {}, {}", fpr(frt), fpr(fra), fpr(frc))), - 26 => return Decoded::base_only(format!("frsqrte{rc} {}, {}", fpr(frt), fpr(frb))), - 28 => return Decoded::base_only(format!("fmsub{rc:<3}{}, {}, {}, {}", fpr(frt), fpr(fra), fpr(frc), fpr(frb))), - 29 => return Decoded::base_only(format!("fmadd{rc:<3}{}, {}, {}, {}", fpr(frt), fpr(fra), fpr(frc), fpr(frb))), - 30 => return Decoded::base_only(format!("fnmsub{rc:<2}{}, {}, {}, {}", fpr(frt), fpr(fra), fpr(frc), fpr(frb))), - 31 => return Decoded::base_only(format!("fnmadd{rc:<2}{}, {}, {}, {}", fpr(frt), fpr(fra), fpr(frc), fpr(frb))), - _ => {} - } - - // X-form - match xo_x { - 0 => { let bf = bits(instr, 8, 6); return Decoded::base_only(format!("fcmpu cr{bf}, {}, {}", fpr(fra), fpr(frb))); } - 32 => { let bf = bits(instr, 8, 6); return Decoded::base_only(format!("fcmpo cr{bf}, {}, {}", fpr(fra), fpr(frb))); } - 12 => return Decoded::base_only(format!("frsp{rc:<4}{}, {}", fpr(frt), fpr(frb))), - 14 => return Decoded::base_only(format!("fctiw{rc:<3}{}, {}", fpr(frt), fpr(frb))), - 15 => return Decoded::base_only(format!("fctiwz{rc:<2}{}, {}", fpr(frt), fpr(frb))), - 40 => return Decoded::base_only(format!("fneg{rc:<4}{}, {}", fpr(frt), fpr(frb))), - 72 => return Decoded::base_only(format!("fmr{rc:<5}{}, {}", fpr(frt), fpr(frb))), - 136 => return Decoded::base_only(format!("fnabs{rc:<3}{}, {}", fpr(frt), fpr(frb))), - 264 => return Decoded::base_only(format!("fabs{rc:<4}{}, {}", fpr(frt), fpr(frb))), - 583 => return Decoded::base_only(format!("mffs{rc:<4}{}", fpr(frt))), - 711 => return Decoded::base_only(format!("mtfsf 0x{:02X}, {}", bits(instr, 17, 10), fpr(frb))), - // 64-bit FP conversions - 814 => return Decoded::base_only(format!("fctid{rc:<3}{}, {}", fpr(frt), fpr(frb))), - 815 => return Decoded::base_only(format!("fctidz{rc:<2}{}, {}", fpr(frt), fpr(frb))), - 846 => return Decoded::base_only(format!("fcfid{rc:<3}{}, {}", fpr(frt), fpr(frb))), - // FPSCR bit manipulation - 38 => return Decoded::base_only(format!("mtfsb1{rc:<2}{}", bits(instr, 10, 6))), - 70 => return Decoded::base_only(format!("mtfsb0{rc:<2}{}", bits(instr, 10, 6))), - 134 => { - let bf = bits(instr, 8, 6); - let imm = bits(instr, 19, 16); - return Decoded::base_only(format!("mtfsfi{rc:<2}cr{bf}, {}", imm)); - } - 64 => { - let bf = bits(instr, 8, 6); - let bfa = bits(instr, 13, 11); - return Decoded::base_only(format!("mcrfs cr{bf}, cr{bfa}")); - } - _ => {} - } - - Decoded::base_only(format!(".long 0x{instr:08X} ; op63 xo_a={xo_a} xo_x={xo_x}")) -} - -// ── Opcode 4 (AltiVec / VMX128 load-store) ───────────────────────────────── - -fn decode_op4(instr: u32) -> Decoded { - let vd = bits(instr, 10, 6); - let va = bits(instr, 15, 11); - let vb = bits(instr, 20, 16); - - // 1. VMX128 load/store (VX128_1 form): key = (bits 21-27 << 4) | bits 30-31 - let vmx_ls = (bits(instr, 27, 21) << 4) | bits(instr, 31, 30); - let vd128_val = vd128(instr); - let vmx_ls_name = match vmx_ls { - 0b00000000011 => Some("lvsl128"), - 0b00001000011 => Some("lvsr128"), - 0b00010000011 => Some("lvewx128"), - 0b00011000011 => Some("lvx128"), - 0b01011000011 => Some("lvxl128"), - 0b10000000011 => Some("lvlx128"), - 0b10001000011 => Some("lvrx128"), - 0b11000000011 => Some("lvlxl128"), - 0b11001000011 => Some("lvrxl128"), - 0b00110000011 => Some("stvewx128"), - 0b00111000011 => Some("stvx128"), - 0b01111000011 => Some("stvxl128"), - 0b10100000011 => Some("stvlx128"), - 0b10101000011 => Some("stvrx128"), - 0b11100000011 => Some("stvlxl128"), - 0b11101000011 => Some("stvrxl128"), - _ => None, - }; - if let Some(mnem) = vmx_ls_name { - return Decoded::base_only(format!("{mnem:<12}{}, {}, {}", vr(vd128_val), gpr(va), gpr(vb))); - } - - // 2. VX-form: bits 21-31 (11-bit key) - let vx = bits(instr, 31, 21); - - // 3-operand: VD, VA, VB - let vx_3op = match vx { - 0 => Some("vaddubm"), 2 => Some("vmaxub"), 4 => Some("vrlb"), - 8 => Some("vmuloub"), 10 => Some("vaddfp"), 12 => Some("vmrghb"), - 14 => Some("vpkuhum"), 64 => Some("vadduhm"), 66 => Some("vmaxuh"), - 68 => Some("vrlh"), 72 => Some("vmulouh"), 74 => Some("vsubfp"), - 76 => Some("vmrghh"), 78 => Some("vpkuwum"), 128 => Some("vadduwm"), - 130 => Some("vmaxuw"), 132 => Some("vrlw"), 140 => Some("vmrghw"), - 142 => Some("vpkuhus"), 206 => Some("vpkuwus"), - 258 => Some("vmaxsb"), 260 => Some("vslb"), 264 => Some("vmulosb"), - 268 => Some("vmrglb"), 270 => Some("vpkshus"), - 322 => Some("vmaxsh"), 324 => Some("vslh"), 328 => Some("vmulosh"), - 332 => Some("vmrglh"), 334 => Some("vpkswus"), - 384 => Some("vaddcuw"), 386 => Some("vmaxsw"), 388 => Some("vslw"), - 396 => Some("vmrglw"), 398 => Some("vpkshss"), - 452 => Some("vsl"), 462 => Some("vpkswss"), - 512 => Some("vaddubs"), 514 => Some("vminub"), 516 => Some("vsrb"), - 520 => Some("vmuleub"), - 576 => Some("vadduhs"), 578 => Some("vminuh"), 580 => Some("vsrh"), - 584 => Some("vmuleuh"), - 640 => Some("vadduws"), 642 => Some("vminuw"), 644 => Some("vsrw"), - 708 => Some("vsr"), - 768 => Some("vaddsbs"), 770 => Some("vminsb"), 772 => Some("vsrab"), - 776 => Some("vmulesb"), 782 => Some("vpkpx"), - 832 => Some("vaddshs"), 834 => Some("vminsh"), 836 => Some("vsrah"), - 840 => Some("vmulesh"), - 896 => Some("vaddsws"), 898 => Some("vminsw"), 900 => Some("vsraw"), - 1024 => Some("vsububm"), 1026 => Some("vavgub"), 1028 => Some("vand"), - 1034 => Some("vmaxfp"), 1036 => Some("vslo"), - 1088 => Some("vsubuhm"), 1090 => Some("vavguh"), 1092 => Some("vandc"), - 1098 => Some("vminfp"), 1100 => Some("vsro"), - 1152 => Some("vsubuwm"), 1154 => Some("vavguw"), 1156 => Some("vor"), - 1220 => Some("vxor"), - 1282 => Some("vavgsb"), 1284 => Some("vnor"), - 1346 => Some("vavgsh"), - 1408 => Some("vsubcuw"), 1410 => Some("vavgsw"), - 1536 => Some("vsububs"), 1544 => Some("vsum4ubs"), - 1600 => Some("vsubuhs"), 1608 => Some("vsum4shs"), - 1664 => Some("vsubuws"), 1672 => Some("vsum2sws"), - 1792 => Some("vsubsbs"), 1800 => Some("vsum4sbs"), - 1856 => Some("vsubshs"), - 1920 => Some("vsubsws"), 1928 => Some("vsumsws"), - _ => None, - }; - if let Some(mnem) = vx_3op { - return Decoded::base_only(format!("{mnem:<8}{}, {}, {}", vr(vd), vr(va), vr(vb))); - } - - // Unary: VD, VB (VA field unused) - let vx_unary = match vx { - 266 => Some("vrefp"), 330 => Some("vrsqrtefp"), 394 => Some("vexptefp"), - 458 => Some("vlogefp"), 522 => Some("vrfin"), 586 => Some("vrfiz"), - 650 => Some("vrfip"), 714 => Some("vrfim"), - 526 => Some("vupkhsb"), 590 => Some("vupkhsh"), - 654 => Some("vupklsb"), 718 => Some("vupklsh"), - 846 => Some("vupkhpx"), 974 => Some("vupklpx"), - _ => None, - }; - if let Some(mnem) = vx_unary { - return Decoded::base_only(format!("{mnem:<8}{}, {}", vr(vd), vr(vb))); - } - - // VD, VB, UIMM (VA field = UIMM) - let vx_uimm = match vx { - 524 => Some("vspltb"), 588 => Some("vsplth"), 652 => Some("vspltw"), - 778 => Some("vcfux"), 842 => Some("vcfsx"), - 906 => Some("vctuxs"), 970 => Some("vctsxs"), - _ => None, - }; - if let Some(mnem) = vx_uimm { - return Decoded::base_only(format!("{mnem:<8}{}, {}, {}", vr(vd), vr(vb), va)); - } - - // VD, SIMM (VA field = sign-extended 5-bit immediate) - match vx { - 780 => { - let simm = sign_ext(va, 5); - return Decoded::base_only(format!("vspltisb {}, {}", vr(vd), simm)); - } - 844 => { - let simm = sign_ext(va, 5); - return Decoded::base_only(format!("vspltish {}, {}", vr(vd), simm)); - } - 908 => { - let simm = sign_ext(va, 5); - return Decoded::base_only(format!("vspltisw {}, {}", vr(vd), simm)); - } - // Special - 1540 => return Decoded::base_only(format!("mfvscr {}", vr(vd))), - 1604 => return Decoded::base_only(format!("mtvscr {}", vr(vb))), - _ => {} - } - - // 3. VC-form (compare): bits 22-31 (10-bit key), Rc in bit 21 - let vc = bits(instr, 31, 22); - let vc_rc = if bits(instr, 21, 21) != 0 { "." } else { "" }; - let vc_cmp = match vc { - 6 => Some("vcmpequb"), 70 => Some("vcmpequh"), 134 => Some("vcmpequw"), - 198 => Some("vcmpeqfp"), 454 => Some("vcmpgefp"), - 518 => Some("vcmpgtub"), 582 => Some("vcmpgtuh"), 646 => Some("vcmpgtuw"), - 710 => Some("vcmpgtfp"), - 774 => Some("vcmpgtsb"), 838 => Some("vcmpgtsh"), 902 => Some("vcmpgtsw"), - 966 => Some("vcmpbfp"), - _ => None, - }; - if let Some(mnem) = vc_cmp { - let full = format!("{mnem}{vc_rc}"); - return Decoded::base_only(format!("{full:<12}{}, {}, {}", vr(vd), vr(va), vr(vb))); - } - - // 4. VA-form: bits 26-31 (6-bit key), 4-operand VD, VA, VB, VC - let va_key = bits(instr, 31, 26); - let vc_reg = bits(instr, 25, 21); - - let va_4op = match va_key { - 32 => Some("vmhaddshs"), 33 => Some("vmhraddshs"), - 34 => Some("vmladduhm"), - 36 => Some("vmsumubm"), 37 => Some("vmsummbm"), - 38 => Some("vmsumuhm"), 39 => Some("vmsumuhs"), - 40 => Some("vmsumshm"), 41 => Some("vmsumshs"), - 42 => Some("vsel"), 43 => Some("vperm"), - _ => None, - }; - if let Some(mnem) = va_4op { - return Decoded::base_only(format!("{mnem:<12}{}, {}, {}, {}", vr(vd), vr(va), vr(vb), vr(vc_reg))); - } - - match va_key { - 44 => { // vsldoi VD, VA, VB, SH - let sh = bits(instr, 25, 22); - return Decoded::base_only(format!("vsldoi {}, {}, {}, {}", vr(vd), vr(va), vr(vb), sh)); - } - 46 => return Decoded::base_only(format!("vmaddfp {}, {}, {}, {}", vr(vd), vr(va), vr(vc_reg), vr(vb))), - 47 => return Decoded::base_only(format!("vnmsubfp {}, {}, {}, {}", vr(vd), vr(va), vr(vc_reg), vr(vb))), - _ => {} - } - - // 5. vsldoi128: bit 27 == 1 (VX128_5 form) - if bits(instr, 27, 27) == 1 { - let vd_128 = vd128(instr); - let va_128 = va128(instr); - let vb_128 = vb128(instr); - let sh = bits(instr, 25, 22); - return Decoded::base_only(format!("vsldoi128 {}, {}, {}, {}", vr(vd_128), vr(va_128), vr(vb_128), sh)); - } - - Decoded::base_only(format!(".long 0x{instr:08X} ; op4")) -} - -// ── Opcode 5 (VMX128 operations) ─────────────────────────────────────────── - -fn decode_op5(instr: u32) -> Decoded { - let vd = vd128(instr); - let va = va128(instr); - let vb = vb128(instr); - let vc = bits(instr, 25, 23); // 3-bit VC field - - // Table 1: vperm128 — key = (bit 22 << 5) | bit 27 - let key1 = (bits(instr, 22, 22) << 5) | bits(instr, 27, 27); - if key1 == 0 { - return Decoded::base_only(format!("vperm128 {}, {}, {}, {}", vr(vd), vr(va), vr(vb), vc)); - } - - // Table 2: key = (bits 22-25 << 2) | bit 27 - let key2 = (bits(instr, 25, 22) << 2) | bits(instr, 27, 27); - - // 3-operand VD, VA, VB - let op5_3 = match key2 { - 0b000001 => Some("vaddfp128"), - 0b000101 => Some("vsubfp128"), - 0b001001 => Some("vmulfp128"), - 0b011001 => Some("vmsum3fp128"), - 0b011101 => Some("vmsum4fp128"), - 0b100000 => Some("vpkshss128"), - 0b100100 => Some("vpkshus128"), - 0b101000 => Some("vpkswss128"), - 0b101100 => Some("vpkswus128"), - 0b110000 => Some("vpkuhum128"), - 0b110100 => Some("vpkuhus128"), - 0b111000 => Some("vpkuwum128"), - 0b111100 => Some("vpkuwus128"), - 0b100001 => Some("vand128"), - 0b100101 => Some("vandc128"), - 0b101001 => Some("vnor128"), - 0b101101 => Some("vor128"), - 0b110001 => Some("vxor128"), - 0b110101 => Some("vsel128"), - 0b111001 => Some("vslo128"), - 0b111101 => Some("vsro128"), - _ => None, - }; - if let Some(mnem) = op5_3 { - return Decoded::base_only(format!("{mnem:<12}{}, {}, {}", vr(vd), vr(va), vr(vb))); - } - - // 4-operand VD, VA, VB, VC (3-bit) - let op5_4 = match key2 { - 0b001101 => Some("vmaddfp128"), - 0b010001 => Some("vmaddcfp128"), - 0b010101 => Some("vnmsubfp128"), - _ => None, - }; - if let Some(mnem) = op5_4 { - return Decoded::base_only(format!("{mnem:<12}{}, {}, {}, {}", vr(vd), vr(va), vr(vb), vc)); - } - - Decoded::base_only(format!(".long 0x{instr:08X} ; op5")) -} - -// ── Opcode 6 (VMX128 special operations) ─────────────────────────────────── - -fn decode_op6(instr: u32) -> Decoded { - let vd = vd128(instr); - let vb = vb128(instr); - - // Table 1: vpermwi128 (kVX128_P form) - let key1 = (bits(instr, 22, 21) << 5) | bits(instr, 27, 26); - if key1 == 0b0100001 { - let uimm = bits(instr, 15, 11) | (bits(instr, 25, 23) << 5); - return Decoded::base_only(format!("vpermwi128 {}, {}, 0x{:X}", vr(vd), vr(vb), uimm)); - } - - // Table 2: vpkd3d128, vrlimi128 (kVX128_4 form) - let key2 = (bits(instr, 23, 21) << 4) | bits(instr, 27, 26); - match key2 { - 0b1100001 => { - let imm = bits(instr, 15, 11); - let z = bits(instr, 25, 24); - return Decoded::base_only(format!("vpkd3d128 {}, {}, {}, {}", vr(vd), vr(vb), imm, z)); - } - 0b1110001 => { - let imm = bits(instr, 15, 11); - let z = bits(instr, 25, 24); - return Decoded::base_only(format!("vrlimi128 {}, {}, {}, {}", vr(vd), vr(vb), imm, z)); - } - _ => {} - } - - // Table 3: kVX128_3 form (key = bits 21-27) - let key3 = bits(instr, 27, 21); - let uimm3 = bits(instr, 15, 11); - match key3 { - // Unary: VD, VB - 0b0110011 => return Decoded::base_only(format!("vrfim128 {}, {}", vr(vd), vr(vb))), - 0b0110111 => return Decoded::base_only(format!("vrfin128 {}, {}", vr(vd), vr(vb))), - 0b0111011 => return Decoded::base_only(format!("vrfip128 {}, {}", vr(vd), vr(vb))), - 0b0111111 => return Decoded::base_only(format!("vrfiz128 {}, {}", vr(vd), vr(vb))), - 0b1100011 => return Decoded::base_only(format!("vrefp128 {}, {}", vr(vd), vr(vb))), - 0b1100111 => return Decoded::base_only(format!("vrsqrtefp128 {}, {}", vr(vd), vr(vb))), - 0b1101011 => return Decoded::base_only(format!("vexptefp128 {}, {}", vr(vd), vr(vb))), - 0b1101111 => return Decoded::base_only(format!("vlogefp128 {}, {}", vr(vd), vr(vb))), - // VD, VB, UIMM - 0b0100011 => return Decoded::base_only(format!("vcfpsxws128 {}, {}, {}", vr(vd), vr(vb), uimm3)), - 0b0100111 => return Decoded::base_only(format!("vcfpuxws128 {}, {}, {}", vr(vd), vr(vb), uimm3)), - 0b0101011 => return Decoded::base_only(format!("vcsxwfp128 {}, {}, {}", vr(vd), vr(vb), uimm3)), - 0b0101111 => return Decoded::base_only(format!("vcuxwfp128 {}, {}, {}", vr(vd), vr(vb), uimm3)), - 0b1110011 => return Decoded::base_only(format!("vspltw128 {}, {}, {}", vr(vd), vr(vb), uimm3)), - 0b1111111 => return Decoded::base_only(format!("vupkd3d128 {}, {}, {}", vr(vd), vr(vb), uimm3)), - // VD, SIMM - 0b1110111 => { - let simm = sign_ext(uimm3, 5); - return Decoded::base_only(format!("vspltisw128 {}, {}", vr(vd), simm)); - } - _ => {} - } - - // Table 4: Compare (kVX128_R form) - // key = (bits 22-24 << 3) | bit 27 - let key4 = (bits(instr, 24, 22) << 3) | bits(instr, 27, 27); - let va = va128(instr); - let rc6 = if bits(instr, 25, 25) != 0 { "." } else { "" }; - let cmp_name = match key4 { - 0b000000 => Some("vcmpeqfp128"), - 0b001000 => Some("vcmpgefp128"), - 0b010000 => Some("vcmpgtfp128"), - 0b011000 => Some("vcmpbfp128"), - 0b100000 => Some("vcmpequw128"), - _ => None, - }; - if let Some(mnem) = cmp_name { - let full = format!("{mnem}{rc6}"); - return Decoded::base_only(format!("{full:<14}{}, {}, {}", vr(vd), vr(va), vr(vb))); - } - - // Table 5: Shift/rotate/misc (kVX128 form) - // key = (bits 22-25 << 2) | bit 27 - let key5 = (bits(instr, 25, 22) << 2) | bits(instr, 27, 27); - let shift_name = match key5 { - 0b000101 => Some("vrlw128"), - 0b001101 => Some("vslw128"), - 0b010101 => Some("vsraw128"), - 0b011101 => Some("vsrw128"), - 0b101000 => Some("vmaxfp128"), - 0b101100 => Some("vminfp128"), - 0b110000 => Some("vmrghw128"), - 0b110100 => Some("vmrglw128"), - 0b111000 => Some("vupkhsb128"), - 0b111100 => Some("vupklsb128"), - _ => None, - }; - if let Some(mnem) = shift_name { - return Decoded::base_only(format!("{mnem:<12}{}, {}, {}", vr(vd), vr(va), vr(vb))); - } - - Decoded::base_only(format!(".long 0x{instr:08X} ; op6")) + let d = decode(instr, addr); + let t = format(&d); + Decoded { base: t.disasm, ext: t.ext_disasm } } diff --git a/crates/xenia-analysis/src/sinks/duckdb.rs b/crates/xenia-analysis/src/sinks/duckdb.rs new file mode 100644 index 0000000..1d20b95 --- /dev/null +++ b/crates/xenia-analysis/src/sinks/duckdb.rs @@ -0,0 +1,37 @@ +//! DuckDB sink — appends rich disasm items to the `instructions` table. +//! +//! Column layout matches [`crate::db`]: address, raw, mnemonic, operands, +//! disasm, ext_mnemonic, ext_operands, ext_disasm, section, function, label. + +use duckdb::{Appender, params}; + +use crate::disasm::RichDisasmItem; + +/// Append every item to the appender. Returns the number of rows written. +/// Does NOT flush — the caller decides when to flush, since multiple +/// section iterators typically share one appender. +pub fn append_instructions<'a>( + appender: &mut Appender<'_>, + items: impl IntoIterator>, +) -> duckdb::Result { + let mut count: u64 = 0; + for ri in items { + let t = &ri.item.text; + appender.append_row(params![ + ri.item.addr as i64, + ri.item.raw as i64, + t.mnemonic.as_str(), + t.operands.as_str(), + t.disasm.as_str(), + t.ext_mnemonic.as_deref(), + t.ext_operands.as_deref(), + t.ext_disasm.as_deref(), + t.branch_target.map(|t| t as i64), + ri.section, + ri.function.map(|f| f as i64), + ri.label, + ])?; + count += 1; + } + Ok(count) +} diff --git a/crates/xenia-analysis/src/sinks/json.rs b/crates/xenia-analysis/src/sinks/json.rs new file mode 100644 index 0000000..2af660e --- /dev/null +++ b/crates/xenia-analysis/src/sinks/json.rs @@ -0,0 +1,63 @@ +//! JSON Lines sink — one structured row per line, constant memory. +//! +//! Suited for piping into `jq`, importing into pandas / DuckDB's +//! `read_json_auto`, or feeding downstream tooling that expects a +//! line-delimited stream rather than a single megaobject. + +use std::io::{self, Write}; + +use serde::Serialize; + +use crate::disasm::RichDisasmItem; + +#[derive(Serialize)] +struct JsonRow<'a> { + addr: u32, + raw: u32, + mnemonic: &'a str, + operands: &'a str, + disasm: &'a str, + #[serde(skip_serializing_if = "Option::is_none")] + ext_mnemonic: Option<&'a str>, + #[serde(skip_serializing_if = "Option::is_none")] + ext_operands: Option<&'a str>, + #[serde(skip_serializing_if = "Option::is_none")] + ext_disasm: Option<&'a str>, + #[serde(skip_serializing_if = "Option::is_none")] + branch_target: Option, + section: &'a str, + #[serde(skip_serializing_if = "Option::is_none")] + function: Option, + #[serde(skip_serializing_if = "Option::is_none")] + label: Option<&'a str>, +} + +/// Write each item as a single JSON object on its own line. Returns the +/// number of rows written. +pub fn write_jsonl<'a, W: Write>( + out: &mut W, + items: impl IntoIterator>, +) -> io::Result { + let mut count: u64 = 0; + for ri in items { + let t = &ri.item.text; + let row = JsonRow { + addr: ri.item.addr, + raw: ri.item.raw, + mnemonic: &t.mnemonic, + operands: &t.operands, + disasm: &t.disasm, + ext_mnemonic: t.ext_mnemonic.as_deref(), + ext_operands: t.ext_operands.as_deref(), + ext_disasm: t.ext_disasm.as_deref(), + branch_target: t.branch_target, + section: ri.section, + function: ri.function, + label: ri.label, + }; + serde_json::to_writer(&mut *out, &row)?; + out.write_all(b"\n")?; + count += 1; + } + Ok(count) +} diff --git a/crates/xenia-analysis/src/sinks/mod.rs b/crates/xenia-analysis/src/sinks/mod.rs new file mode 100644 index 0000000..fdc7afe --- /dev/null +++ b/crates/xenia-analysis/src/sinks/mod.rs @@ -0,0 +1,8 @@ +//! Output sinks for [`crate::disasm::RichDisasmItem`] streams. +//! +//! Each sink consumes the same iterator shape and writes to a different +//! medium: human-readable .asm text, JSON Lines, or DuckDB rows. + +pub mod duckdb; +pub mod json; +pub mod text; diff --git a/crates/xenia-analysis/src/sinks/text.rs b/crates/xenia-analysis/src/sinks/text.rs new file mode 100644 index 0000000..008f6e4 --- /dev/null +++ b/crates/xenia-analysis/src/sinks/text.rs @@ -0,0 +1,58 @@ +//! Text sink — renders one .asm instruction line with optional +//! branch-target / data-ref annotations. +//! +//! The full `write_asm` orchestration (section headers, function prologue +//! info, xref comment blocks, hex-dump of data sections) stays in +//! [`crate::formatter`]; this sink only owns the per-instruction line. + +use std::collections::HashMap; +use std::io::{self, Write}; + +use xenia_xex::pe::PeSection; + +use crate::disasm::RichDisasmItem; +use crate::xref::{XrefKind, section_for_addr}; + +/// Render one instruction line: +/// ` 82000000: 60000000 nop` +/// ` 82000004: 4800FFFC bl 0x82000000 ; -> entry_point` +/// ` 82000010: 812A0000 lwz r9, 0(r10) ; [R] 0x828A0000 (.rdata) = dat_…` +pub fn write_instr_line( + out: &mut W, + item: &RichDisasmItem<'_>, + labels: &HashMap, + sections: &[PeSection], + image_base: u32, + data_annotation: Option<(u32, XrefKind)>, +) -> io::Result<()> { + let disasm_text = item.item.text.display(); + + // Branch-target → label annotation. Uses the structured `branch_target` + // field (cleaner than the legacy "find 0x in disasm string" regex). + let mut annotated = match item.item.text.branch_target { + Some(target) => match labels.get(&target) { + Some(lbl) => format!("{disasm_text:<40} ; -> {lbl}"), + None => disasm_text.to_string(), + }, + None => disasm_text.to_string(), + }; + + if let Some((data_addr, kind)) = data_annotation { + let tag = match kind { + XrefKind::DataRead => "[R]", + XrefKind::DataWrite => "[W]", + _ => "[&]", + }; + let sec = section_for_addr(data_addr, sections, image_base).unwrap_or("?"); + let data_lbl = labels.get(&data_addr) + .map(|s| format!(" = {s}")) + .unwrap_or_default(); + if !annotated.contains("; ->") { + annotated = format!("{annotated:<40} ; {tag} 0x{data_addr:08X} ({sec}){data_lbl}"); + } else { + annotated = format!("{annotated} {tag} 0x{data_addr:08X} ({sec}){data_lbl}"); + } + } + + writeln!(out, " {:08X}: {:08X} {}", item.item.addr, item.item.raw, annotated) +} diff --git a/crates/xenia-analysis/src/sql_views.rs b/crates/xenia-analysis/src/sql_views.rs new file mode 100644 index 0000000..a2ad3d4 --- /dev/null +++ b/crates/xenia-analysis/src/sql_views.rs @@ -0,0 +1,141 @@ +//! Additive SQL views over the Phase-3 ingest tables. +//! +//! These views are created when `--analyze=sql` or `--analyze=both` is set. +//! They are *not* a replacement for the Rust passes ([`crate::xref`], +//! [`crate::func`]) — those still own data-ref resolution and prologue +//! pattern matching. The views cover the cleanly-relational parts: +//! +//! - branch xrefs (self-join on `instructions.target_hex`) +//! - call graph + reachability (recursive CTE over `xrefs`) +//! - convenience joins (function-first-instruction, imports-called) +//! +//! All views are read-only and stable across re-creation: dropping and +//! recreating the database via [`crate::db::DbWriter::open_fresh`] re-runs +//! these definitions. +//! +//! ## Cross-check semantics +//! +//! `v_branch_xrefs` is intended to produce *exactly* the same `(source, +//! target, kind)` tuples as the Rust `xref.rs` first pass — given the same +//! input image. [`crate::db::DbWriter::cross_check_branch_xrefs`] queries +//! the symmetric difference and returns the row counts; both should be +//! zero. A non-zero count means the formatter's `mnemonic` column or the +//! kind-classification CASE drifted out of agreement with `xref.rs`, and +//! is worth a one-line warning at log time. + +/// `(view_name, CREATE VIEW … SQL)` pairs in the order they must run. +/// Later views may depend on earlier ones (e.g. `v_call_graph` reads +/// `xrefs`, which is the Rust-pass table; `v_branch_xrefs` is independent). +pub const ALL_VIEWS: &[(&str, &str)] = &[ + ("v_branch_xrefs", V_BRANCH_XREFS), + ("v_call_graph", V_CALL_GRAPH), + ("v_reachability_from_entry", V_REACHABILITY_FROM_ENTRY), + ("v_function_first_instruction", V_FUNCTION_FIRST_INSTRUCTION), + ("v_imports_called", V_IMPORTS_CALLED), +]; + +/// Branch cross-references derived purely from `instructions.target_hex`. +/// +/// Mirrors the kind classification in [`crate::xref::collect_branch_target`] +/// and the short tags returned by [`crate::xref::XrefKind::tag`] (which are +/// what `xrefs.kind` actually stores): +/// - I-form (`b`/`bl`/`ba`/`bla`): `bl`/`bla` → `"call"`, `b`/`ba` → `"j"` +/// - B-form (`bc`/`bcl`/`bca`/`bcla`): always → `"br"` +/// +/// Indirect branches (`bclr`/`bcctr`) leave `target_hex` NULL and are +/// excluded from this view by design. +const V_BRANCH_XREFS: &str = " +CREATE OR REPLACE VIEW v_branch_xrefs AS +SELECT + address AS source, + target_hex AS target, + CASE + WHEN mnemonic IN ('bl', 'bla') THEN 'call' + WHEN mnemonic IN ('b', 'ba') THEN 'j' + WHEN mnemonic IN ('bc', 'bcl', 'bca', 'bcla') THEN 'br' + ELSE 'br' + END AS kind, + mnemonic AS instruction, + function AS source_func +FROM instructions +WHERE target_hex IS NOT NULL; +"; + +/// Call-graph edges resolved against function names. +/// +/// Reads from `xrefs` (the Rust-pass table) — this is the canonical source +/// for *all* edge kinds, including indirect/data; SQL can't reconstruct the +/// data-ref edges cleanly because they require register tracking. For pure +/// branch edges, `v_branch_xrefs` produces equivalent rows directly from +/// `instructions`. +const V_CALL_GRAPH: &str = " +CREATE OR REPLACE VIEW v_call_graph AS +SELECT + x.source AS caller_addr, + cf.name AS caller_name, + x.target AS callee_addr, + tf.name AS callee_name, + x.kind AS edge_kind +FROM xrefs x +LEFT JOIN functions cf ON cf.address = x.source_func +LEFT JOIN functions tf ON tf.address = x.target +WHERE x.kind = 'call'; +"; + +/// Transitive function-level reachability from the entry point over +/// call/jump/branch edges. Useful for finding dead code +/// (`SELECT address FROM functions +/// WHERE address NOT IN (SELECT addr FROM v_reachability_from_entry)`) +/// and for scoping analysis to the live subset. +/// +/// Seeds from the function containing the `entry_point` label and walks +/// the recursive closure: a reachable function's instructions branch into +/// the functions enclosing the branch targets, which are then reachable +/// in turn. `UNION` (not `UNION ALL`) deduplicates to handle call-graph +/// cycles (recursive functions, mutually-recursive pairs). +const V_REACHABILITY_FROM_ENTRY: &str = " +CREATE OR REPLACE VIEW v_reachability_from_entry AS +WITH RECURSIVE reach(fn) AS ( + SELECT i.function FROM instructions i + JOIN labels l ON l.address = i.address + WHERE l.name = 'entry_point' AND i.function IS NOT NULL + UNION + SELECT tgt.function FROM xrefs x + JOIN instructions src ON src.address = x.source + JOIN instructions tgt ON tgt.address = x.target + JOIN reach r ON src.function = r.fn + WHERE x.kind IN ('call', 'j', 'br') + AND tgt.function IS NOT NULL +) +SELECT fn AS addr FROM reach; +"; + +/// Convenience join: each function's first decoded instruction. Useful for +/// quickly inspecting prologue patterns without computing offsets manually. +const V_FUNCTION_FIRST_INSTRUCTION: &str = " +CREATE OR REPLACE VIEW v_function_first_instruction AS +SELECT + f.address AS function_addr, + f.name AS function_name, + i.raw AS first_raw, + i.disasm AS first_disasm, + i.ext_disasm AS first_ext_disasm +FROM functions f +JOIN instructions i ON i.address = f.address; +"; + +/// Per-function summary of which kernel/library imports it calls. Joins +/// xrefs (call edges) against the labels table to surface import names. +const V_IMPORTS_CALLED: &str = " +CREATE OR REPLACE VIEW v_imports_called AS +SELECT + x.source_func AS function_addr, + f.name AS function_name, + x.target AS import_addr, + l.name AS import_name +FROM xrefs x +JOIN labels l ON l.address = x.target +LEFT JOIN functions f ON f.address = x.source_func +WHERE x.kind = 'call' + AND l.kind = 'import'; +"; diff --git a/crates/xenia-analysis/src/xref.rs b/crates/xenia-analysis/src/xref.rs index d5028ce..170be17 100644 --- a/crates/xenia-analysis/src/xref.rs +++ b/crates/xenia-analysis/src/xref.rs @@ -53,6 +53,7 @@ pub struct XrefResult { } /// Perform full cross-reference analysis on a PE image. +#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point)))] pub fn analyze_xrefs( pe: &[u8], image_base: u32, @@ -61,6 +62,7 @@ pub fn analyze_xrefs( func_analysis: &FuncAnalysis, import_map: &HashMap, ) -> XrefResult { + let started = std::time::Instant::now(); let func_labels = func_analysis.generate_labels(); let mut labels: HashMap = func_labels; labels.insert(entry_point, "entry_point".to_string()); @@ -124,7 +126,7 @@ pub fn analyze_xrefs( let rd = ((instr >> 21) & 0x1F) as usize; let ra = ((instr >> 16) & 0x1F) as usize; let simm = ((instr & 0xFFFF) as i16) as i32; - let uimm = (instr & 0xFFFF) as u32; + let uimm = instr & 0xFFFF; // Reset tracking on function boundaries (prologue = mfspr rN, LR) if opcode == 31 { @@ -181,8 +183,8 @@ pub fn analyze_xrefs( } // Load instructions: lwz, lbz, lhz, lha, lfs, lfd, lwzu, etc. 32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 46 | 48 | 49 | 50 | 51 => { - if ra != 0 { - if let Some(base) = reg_hi[ra] { + if ra != 0 + && let Some(base) = reg_hi[ra] { let data_addr = base.wrapping_add(simm as u32); if is_in_ranges(data_addr, &data_ranges) { data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRead)); @@ -190,14 +192,13 @@ pub fn analyze_xrefs( labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}")); } } - } // Load into rD may clobber the tracked value reg_hi[rd] = None; } // Store instructions: stw, stb, sth, stfs, stfd, stwu, etc. 36 | 37 | 38 | 39 | 44 | 45 | 47 | 52 | 53 | 54 | 55 => { - if ra != 0 { - if let Some(base) = reg_hi[ra] { + if ra != 0 + && let Some(base) = reg_hi[ra] { let data_addr = base.wrapping_add(simm as u32); if is_in_ranges(data_addr, &data_ranges) { data_annotations.insert(abs_addr, (data_addr, XrefKind::DataWrite)); @@ -205,7 +206,6 @@ pub fn analyze_xrefs( labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}")); } } - } } // Any other instruction writing to rD: invalidate _ => { @@ -221,6 +221,17 @@ pub fn analyze_xrefs( } } + let elapsed_ms = started.elapsed().as_millis() as f64; + metrics::histogram!("analysis.phase_ms", "phase" => "xrefs").record(elapsed_ms); + let total_xrefs: usize = xrefs.values().map(|v| v.len()).sum(); + tracing::info!( + labels = labels.len(), + xrefs = total_xrefs, + data_annotations = data_annotations.len(), + elapsed_ms, + "xref analysis complete" + ); + XrefResult { labels, xrefs, data_annotations } } @@ -262,7 +273,7 @@ fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool { } /// Find which section a data address falls in. -pub fn section_for_addr<'a>(addr: u32, sections: &'a [PeSection], image_base: u32) -> Option<&'a str> { +pub fn section_for_addr(addr: u32, sections: &[PeSection], image_base: u32) -> Option<&str> { for s in sections { let start = image_base + s.virtual_address; let end = start + s.virtual_size; @@ -285,12 +296,11 @@ pub fn resolve_source_label( } // Find the containing function (largest start <= addr) - if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back() { - if let Some(func_label) = labels.get(&func_start) { + if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back() + && let Some(func_label) = labels.get(&func_start) { let offset = addr - func_start; return format!("{func_label}+0x{offset:X}"); } - } format!("0x{addr:08X}") } diff --git a/crates/xenia-analysis/tests/db_schema_golden.rs b/crates/xenia-analysis/tests/db_schema_golden.rs new file mode 100644 index 0000000..6bb2b45 --- /dev/null +++ b/crates/xenia-analysis/tests/db_schema_golden.rs @@ -0,0 +1,244 @@ +//! DB schema golden — locks the column layout (names + types) of every +//! table written by `DbWriter`. A schema change here without a fixture +//! update fails the test, forcing a conscious decision before downstream +//! query consumers break. +//! +//! The fixture is constructed in-process (no XEX/ISO needed): a small +//! synthetic PE-shaped byte slice with one `.text` section of 4 +//! instructions, plus an empty import-library list and one detected +//! function. + +use std::collections::{BTreeMap, HashMap}; +use std::io::Write; + +use duckdb::Connection; + +use xenia_analysis::DbWriter; +use xenia_analysis::formatter::DisasmInfo; +use xenia_analysis::func::{FuncAnalysis, FuncInfo}; +use xenia_analysis::xref::XrefMap; +use xenia_xex::pe::PeSection; + +/// Build a 16-byte `.text` section: 4 instructions (mflr / nop / blr / nop). +fn synthetic_pe() -> (Vec, Vec, Vec) { + // VA layout: image_base + 0x1000 = .text start (so RVA = 0x1000). + // The DB writer expects pe[rva] to hold the byte at that RVA, so the + // buffer must be at least 0x1000 + section_size bytes long. + const RVA: usize = 0x1000; + const TEXT: [u32; 4] = [ + // mfspr r12, LR (a.k.a. mflr r12) — opcode 31, xo 339, spr 8 (LR). + // Encoded with spr halves swapped per the ISA: spr_field = (8<<5). + (31u32 << 26) | (12 << 21) | ((8 << 5) << 11) | (339 << 1), + 0x60000000, // nop (ori r0, r0, 0) + (19u32 << 26) | (20 << 21) | (16 << 1), // blr (bclr 20, 0) + 0x60000000, // nop + ]; + + let mut pe = vec![0u8; RVA + 16]; + for (i, &word) in TEXT.iter().enumerate() { + pe[RVA + i * 4..RVA + i * 4 + 4].copy_from_slice(&word.to_be_bytes()); + } + + let sections = vec![PeSection { + name: ".text".to_string(), + virtual_address: 0x1000, + virtual_size: 16, + raw_offset: 0x1000, + raw_size: 16, + flags: 0x60000020, // CODE | EXECUTE | READ + }]; + + let import_libraries = vec![]; // No imports in the fixture. + (pe, sections, import_libraries) +} + +fn synthetic_func_analysis(image_base: u32) -> FuncAnalysis { + // Single function covering all four .text instructions. + let entry = image_base + 0x1000; + let mut functions = BTreeMap::new(); + functions.insert( + entry, + FuncInfo { + start: entry, + end: entry + 16, + frame_size: 0, + saved_gprs: 0, + is_leaf: true, + is_saverestore: false, + }, + ); + FuncAnalysis { + functions, + save_gpr_base: None, + restore_gpr_base: None, + } +} + +#[test] +fn db_schema_matches_expected_columns() { + let (pe, sections, libs) = synthetic_pe(); + let image_base = 0x82000000u32; + let entry = image_base + 0x1000; + + let info = DisasmInfo { + image_base, + entry_point: entry, + original_pe_name: Some("synthetic.exe"), + title_id: Some(0xDEADBEEF), + media_id: Some(0xCAFEF00D), + sections: §ions, + import_libraries: &libs, + }; + + let func_analysis = synthetic_func_analysis(image_base); + let mut labels: HashMap = HashMap::new(); + labels.insert(entry, "entry_point".to_string()); + let xrefs: XrefMap = XrefMap::new(); + + let tmp = std::env::temp_dir().join("xenia_rs_schema_golden.duckdb"); + let _ = std::fs::remove_file(&tmp); + + { + let mut w = DbWriter::open_fresh(&tmp).expect("open fresh DB"); + w.write_base(&info).expect("write_base"); + w.ingest_instructions(&pe, &info, &func_analysis, &labels) + .expect("ingest_instructions"); + w.write_analysis_results(&pe, &info, &func_analysis, &labels, &xrefs) + .expect("write_analysis_results"); + w.create_sql_views().expect("create_sql_views"); + } + + let conn = Connection::open(&tmp).expect("reopen DB"); + + // Lock the column layout per table. Pairs are (name, type). + let expected: &[(&str, &[(&str, &str)])] = &[ + ("metadata", &[ + ("key", "VARCHAR"), + ("value", "VARCHAR"), + ]), + ("sections", &[ + ("name", "VARCHAR"), + ("virtual_address", "BIGINT"), + ("virtual_size", "BIGINT"), + ("raw_offset", "BIGINT"), + ("raw_size", "BIGINT"), + ("flags", "BIGINT"), + ("is_code", "BOOLEAN"), + ]), + ("imports", &[ + ("library", "VARCHAR"), + ("ordinal", "BIGINT"), + ("name", "VARCHAR"), + ("record_type", "BIGINT"), + ("address", "BIGINT"), + ]), + ("instructions", &[ + ("address", "BIGINT"), + ("raw", "BIGINT"), + ("mnemonic", "VARCHAR"), + ("operands", "VARCHAR"), + ("disasm", "VARCHAR"), + ("ext_mnemonic", "VARCHAR"), + ("ext_operands", "VARCHAR"), + ("ext_disasm", "VARCHAR"), + ("target_hex", "BIGINT"), + ("section", "VARCHAR"), + ("function", "BIGINT"), + ("label", "VARCHAR"), + ]), + ("functions", &[ + ("address", "BIGINT"), + ("name", "VARCHAR"), + ("end_address", "BIGINT"), + ("frame_size", "BIGINT"), + ("saved_gprs", "BIGINT"), + ("is_leaf", "BOOLEAN"), + ("is_saverestore", "BOOLEAN"), + ]), + ("labels", &[ + ("address", "BIGINT"), + ("name", "VARCHAR"), + ("kind", "VARCHAR"), + ]), + ("xrefs", &[ + ("source", "BIGINT"), + ("target", "BIGINT"), + ("kind", "VARCHAR"), + ("instruction", "VARCHAR"), + ("source_func", "BIGINT"), + ("source_label", "VARCHAR"), + ("target_label", "VARCHAR"), + ]), + ]; + + let mut errs: Vec = Vec::new(); + for (table, cols) in expected { + let mut stmt = conn + .prepare(&format!("PRAGMA table_info('{}')", table)) + .unwrap_or_else(|e| panic!("prepare PRAGMA for {table}: {e}")); + let rows: Vec<(String, String)> = stmt + .query_map([], |row| { + let name: String = row.get(1)?; + let ty: String = row.get(2)?; + Ok((name, ty)) + }) + .expect("query") + .map(|r| r.unwrap()) + .collect(); + + if rows.len() != cols.len() { + writeln!( + std::io::stderr(), + "{table}: column count mismatch (got {}, expected {})", + rows.len(), + cols.len() + ).ok(); + errs.push(format!("{table}: count {} vs {}", rows.len(), cols.len())); + } + for (i, (got, expected_col)) in rows.iter().zip(cols.iter()).enumerate() { + if got.0 != expected_col.0 || got.1 != expected_col.1 { + errs.push(format!( + "{table} col {i}: got ({}, {}) expected ({}, {})", + got.0, got.1, expected_col.0, expected_col.1 + )); + } + } + } + + assert!(errs.is_empty(), "schema drift detected:\n {}", errs.join("\n ")); + + // Verify row counts in the populated tables. + let n_instr: i64 = conn + .query_row("SELECT COUNT(*) FROM instructions", [], |r| r.get(0)) + .unwrap(); + assert_eq!(n_instr, 4, "expected 4 instruction rows from the synthetic PE"); + + // The synthetic mflr should produce target_hex = NULL, blr likewise (indirect). + let n_with_target: i64 = conn + .query_row("SELECT COUNT(target_hex) FROM instructions", [], |r| r.get(0)) + .unwrap(); + assert_eq!(n_with_target, 0, "indirect-only fixture should have no direct branch targets"); + + // SQL views must be queryable. The `_` in SQL LIKE is a single-char + // wildcard, so we list the names explicitly rather than `LIKE 'v_%'` + // (which also matches DuckDB's built-in `views` system view). + let expected_views = [ + "v_branch_xrefs", + "v_call_graph", + "v_function_first_instruction", + "v_imports_called", + "v_reachability_from_entry", + ]; + for v in expected_views { + let exists: i64 = conn + .query_row( + "SELECT COUNT(*) FROM duckdb_views() WHERE view_name = ?", + [v], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(exists, 1, "missing SQL view: {v}"); + } + + let _ = std::fs::remove_file(&tmp); +} diff --git a/crates/xenia-analysis/tests/disasm_goldens.rs b/crates/xenia-analysis/tests/disasm_goldens.rs new file mode 100644 index 0000000..67970d0 --- /dev/null +++ b/crates/xenia-analysis/tests/disasm_goldens.rs @@ -0,0 +1,123 @@ +//! Analysis-side goldens: every row in the xenia-cpu fixtures must +//! round-trip cleanly through the [`xenia_analysis::ppc`] shim. This +//! pins the shim's behaviour to the canonical `xenia_cpu::disasm::format` +//! output so that any future refactor of the shim layer surfaces here. +//! +//! Loads the same JSON fixtures committed under +//! `crates/xenia-cpu/tests/golden/`. No separate analysis-side fixture +//! files — the cpu canon is the source of truth. + +use std::path::PathBuf; + +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +struct GoldenRow { + label: String, + raw: String, + addr: String, + mnemonic: String, + operands: String, + #[serde(default)] + ext_mnemonic: Option, + #[serde(default)] + ext_operands: Option, + #[serde(default)] + branch_target: Option, +} + +#[derive(Debug, Deserialize)] +struct GoldenFile { + rows: Vec, +} + +fn cpu_fixture(name: &str) -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("xenia-cpu") + .join("tests") + .join("golden") + .join(name) +} + +fn parse_hex(s: &str) -> u32 { + let trimmed = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")).unwrap_or(s); + u32::from_str_radix(trimmed, 16).expect("hex u32") +} + +/// Verify the shim's `Decoded { base, ext }` mirrors the canonical fields +/// from `xenia_cpu::disasm::format` for every fixture row. +fn check_fixture(fixture_name: &str) { + let path = cpu_fixture(fixture_name); + assert!( + path.exists(), + "missing fixture {} — run `cargo test -p xenia-cpu --test disasm_goldens` to (re)generate it", + path.display() + ); + let src = std::fs::read_to_string(&path).unwrap(); + let golden: GoldenFile = serde_json::from_str(&src).unwrap(); + + for row in &golden.rows { + let raw = parse_hex(&row.raw); + let addr = parse_hex(&row.addr); + + let canonical = + xenia_cpu::disasm::format(&xenia_cpu::decode(raw, addr)); + let shim = xenia_analysis::ppc::disasm(raw, addr); + + assert_eq!( + shim.base, canonical.disasm, + "shim.base drifted for {} (raw={})", + row.label, row.raw, + ); + assert_eq!( + shim.ext, canonical.ext_disasm, + "shim.ext drifted for {} (raw={})", + row.label, row.raw, + ); + + // Also pin against the fixture's structured fields — guards against + // someone changing the cpu canon without regenerating the fixture. + assert_eq!(canonical.mnemonic, row.mnemonic, "mnemonic drift: {}", row.label); + assert_eq!(canonical.operands, row.operands, "operands drift: {}", row.label); + assert_eq!(canonical.ext_mnemonic, row.ext_mnemonic, "ext_mnemonic drift: {}", row.label); + assert_eq!(canonical.ext_operands, row.ext_operands, "ext_operands drift: {}", row.label); + + let target_str = canonical.branch_target.map(|t| format!("0x{t:08X}")); + assert_eq!(target_str, row.branch_target, "branch_target drift: {}", row.label); + } +} + +#[test] +fn analysis_shim_matches_base_mnemonics() { + check_fixture("base_mnemonics.json"); +} + +#[test] +fn analysis_shim_matches_extended_mnemonics() { + check_fixture("extended_mnemonics.json"); +} + +#[test] +fn analysis_shim_matches_vmx128_registers() { + check_fixture("vmx128_registers.json"); +} + +/// Spot-check that the shim's `display()` returns the extended form when +/// present and falls back to the base otherwise. This is the contract +/// `formatter.rs` and the .asm output rely on. +#[test] +fn shim_display_prefers_extended() { + // ori r0, r0, 0 → base "ori r0, r0, 0x0", ext "nop" + let d = xenia_analysis::ppc::disasm(0x60000000, 0); + assert_eq!(d.display(), "nop"); + + // addi r3, r1, 16 → no extended form, display falls back to base + let raw = (14u32 << 26) | (3 << 21) | (1 << 16) | 16; + let d = xenia_analysis::ppc::disasm(raw, 0); + assert!( + d.ext.is_none(), + "addi r3, r1, 16 has no extended form (only addi r3, r0, … → li)" + ); + assert_eq!(d.display(), d.base); +}