Initial commit: xenia-rs workspace for Xbox 360 RE
Rust reimplementation of the xenia Xbox 360 emulator targeting reverse- engineering and preservation, initially scoped to Project Sylpheed. Includes: - XEX2 loader (LZX decompression, AES decryption, PE parsing) - XISO / XGD2 disc image VFS - PPC interpreter with 200+ opcodes and VMX128 decoding - Static analyzer: functions, cross-references, labels, asm + SQLite output - HLE kernel covering the xboxkrnl/xam subset used by Sylpheed init - Debugger with in-memory and SQLite-backed execution tracing - `xenia-rs` CLI with extract/dis/exec commands that produce cumulative, superset SQLite databases and opt-in instruction/import/branch traces Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
727
crates/xenia-analysis/src/db.rs
Normal file
727
crates/xenia-analysis/src/db.rs
Normal file
@@ -0,0 +1,727 @@
|
||||
//! SQLite database writer for xenia-rs.
|
||||
//!
|
||||
//! Layered, streaming writes shared by `extract`, `dis`, and `exec`.
|
||||
//! Each command's output is a superset of the previous:
|
||||
//! - `extract --db` -> base tables (metadata, sections, imports)
|
||||
//! - `dis --db` -> base + disasm tables (functions, labels, instructions, xrefs)
|
||||
//! - `exec --db` -> base + disasm + opt-in trace tables (exec_trace, import_calls, branch_trace)
|
||||
//!
|
||||
//! Performance: streaming commits every 100k rows, no end-of-run ANALYZE,
|
||||
//! progress messages before each index build.
|
||||
//!
|
||||
//! Trace kind values for `branch_trace.kind`:
|
||||
//! - "call" : any branch with LK set (raw & 1 == 1)
|
||||
//! - "return" : bclrx without LK
|
||||
//! - "jump" : bcctrx without LK
|
||||
//! - "branch" : bx/bcx without LK
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use rusqlite::{Connection, params};
|
||||
|
||||
use crate::func::FuncAnalysis;
|
||||
use crate::xref::{XrefMap, resolve_source_label};
|
||||
use crate::formatter::DisasmInfo;
|
||||
|
||||
const DEFAULT_BATCH_SIZE: u64 = 100_000;
|
||||
|
||||
/// Number of rows per DB commit / trace buffer flush.
|
||||
/// Configurable via the `XENIA_DB_BATCH_SIZE` env var (default 100_000).
|
||||
/// Used for:
|
||||
/// - `instructions` and `xrefs` streaming commits in `write_disasm`
|
||||
/// - `exec_trace` and `branch_trace` buffer thresholds during exec
|
||||
/// (`import_calls` always flushes at 1000 — low volume, not worth scaling.)
|
||||
fn batch_size() -> u64 {
|
||||
use std::sync::OnceLock;
|
||||
static CACHED: OnceLock<u64> = OnceLock::new();
|
||||
*CACHED.get_or_init(|| {
|
||||
std::env::var("XENIA_DB_BATCH_SIZE")
|
||||
.ok()
|
||||
.and_then(|s| s.parse::<u64>().ok())
|
||||
.filter(|&n| n > 0)
|
||||
.unwrap_or(DEFAULT_BATCH_SIZE)
|
||||
})
|
||||
}
|
||||
|
||||
pub struct ExecTraceEntry {
|
||||
pub address: u32,
|
||||
pub cycle: u64,
|
||||
pub r3: u64,
|
||||
pub r4: u64,
|
||||
pub lr: u64,
|
||||
pub sp: u64,
|
||||
}
|
||||
|
||||
pub struct ImportCallEntry {
|
||||
pub address: u32,
|
||||
pub cycle: u64,
|
||||
pub module: String,
|
||||
pub ordinal: u16,
|
||||
pub name: String,
|
||||
pub arg_r3: u64,
|
||||
pub arg_r4: u64,
|
||||
pub arg_r5: u64,
|
||||
pub arg_r6: u64,
|
||||
pub return_value: u64,
|
||||
}
|
||||
|
||||
pub struct BranchTraceEntry {
|
||||
pub source: u32,
|
||||
pub target: u32,
|
||||
pub cycle: u64,
|
||||
pub kind: &'static str,
|
||||
pub lr: u64,
|
||||
}
|
||||
|
||||
pub struct DbWriter {
|
||||
conn: Connection,
|
||||
exec_buffer: Vec<ExecTraceEntry>,
|
||||
import_buffer: Vec<ImportCallEntry>,
|
||||
branch_buffer: Vec<BranchTraceEntry>,
|
||||
exec_count: u64,
|
||||
import_count: u64,
|
||||
branch_count: u64,
|
||||
trace_instructions: bool,
|
||||
trace_imports: bool,
|
||||
trace_branches: bool,
|
||||
}
|
||||
|
||||
impl DbWriter {
|
||||
/// Open a fresh database at `path`, removing any existing file first.
|
||||
pub fn open_fresh(path: &Path) -> anyhow::Result<Self> {
|
||||
if path.exists() {
|
||||
std::fs::remove_file(path)?;
|
||||
}
|
||||
let conn = Connection::open(path)?;
|
||||
conn.execute_batch("
|
||||
PRAGMA journal_mode = OFF;
|
||||
PRAGMA synchronous = OFF;
|
||||
PRAGMA locking_mode = EXCLUSIVE;
|
||||
PRAGMA temp_store = MEMORY;
|
||||
")?;
|
||||
let cap = batch_size() as usize;
|
||||
Ok(Self {
|
||||
conn,
|
||||
exec_buffer: Vec::with_capacity(cap),
|
||||
import_buffer: Vec::with_capacity(1024),
|
||||
branch_buffer: Vec::with_capacity(cap),
|
||||
exec_count: 0,
|
||||
import_count: 0,
|
||||
branch_count: 0,
|
||||
trace_instructions: false,
|
||||
trace_imports: false,
|
||||
trace_branches: false,
|
||||
})
|
||||
}
|
||||
|
||||
// ── Base layer (written by extract/dis/exec) ─────────────────────────────
|
||||
|
||||
/// Write metadata, sections, imports tables and their indices.
|
||||
pub fn write_base(&mut self, info: &DisasmInfo) -> anyhow::Result<()> {
|
||||
self.conn.execute_batch("
|
||||
CREATE TABLE metadata (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE sections (
|
||||
name TEXT NOT NULL,
|
||||
virtual_address INTEGER NOT NULL,
|
||||
virtual_size INTEGER NOT NULL,
|
||||
raw_offset INTEGER NOT NULL,
|
||||
raw_size INTEGER NOT NULL,
|
||||
flags INTEGER NOT NULL,
|
||||
is_code BOOLEAN NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE imports (
|
||||
library TEXT NOT NULL,
|
||||
ordinal INTEGER NOT NULL,
|
||||
name TEXT,
|
||||
record_type INTEGER NOT NULL,
|
||||
address INTEGER NOT NULL
|
||||
);
|
||||
")?;
|
||||
|
||||
insert_metadata(&self.conn, info)?;
|
||||
insert_sections(&self.conn, info.sections)?;
|
||||
insert_imports(&self.conn, info)?;
|
||||
|
||||
self.conn.execute_batch("
|
||||
CREATE INDEX idx_imports_library ON imports(library);
|
||||
CREATE INDEX idx_imports_name ON imports(name);
|
||||
")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ── Disasm layer (written by dis/exec) ───────────────────────────────────
|
||||
|
||||
/// Write functions, labels, instructions, xrefs tables and indices.
|
||||
pub fn write_disasm(
|
||||
&mut self,
|
||||
pe: &[u8],
|
||||
info: &DisasmInfo,
|
||||
func_analysis: &FuncAnalysis,
|
||||
labels: &HashMap<u32, String>,
|
||||
xrefs: &XrefMap,
|
||||
) -> anyhow::Result<()> {
|
||||
self.conn.execute_batch("
|
||||
CREATE TABLE functions (
|
||||
address INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
end_address INTEGER NOT NULL,
|
||||
frame_size INTEGER NOT NULL,
|
||||
saved_gprs INTEGER NOT NULL,
|
||||
is_leaf BOOLEAN NOT NULL,
|
||||
is_saverestore BOOLEAN NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE labels (
|
||||
address INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE instructions (
|
||||
address INTEGER PRIMARY KEY,
|
||||
raw INTEGER NOT NULL,
|
||||
mnemonic TEXT NOT NULL,
|
||||
operands TEXT NOT NULL,
|
||||
disasm TEXT NOT NULL,
|
||||
ext_mnemonic TEXT,
|
||||
ext_operands TEXT,
|
||||
ext_disasm TEXT,
|
||||
section TEXT NOT NULL,
|
||||
function INTEGER,
|
||||
label TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE xrefs (
|
||||
source INTEGER NOT NULL,
|
||||
target INTEGER NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
instruction TEXT,
|
||||
source_func INTEGER,
|
||||
source_label TEXT,
|
||||
target_label TEXT
|
||||
);
|
||||
")?;
|
||||
|
||||
insert_functions(&self.conn, func_analysis, labels)?;
|
||||
insert_labels(&self.conn, labels)?;
|
||||
insert_instructions_streaming(&self.conn, pe, info, func_analysis, labels)?;
|
||||
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
|
||||
|
||||
let indices = [
|
||||
("idx_functions_name", "CREATE INDEX idx_functions_name ON functions(name)"),
|
||||
("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"),
|
||||
("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"),
|
||||
("idx_instructions_function", "CREATE INDEX idx_instructions_function ON instructions(function)"),
|
||||
("idx_instructions_mnemonic", "CREATE INDEX idx_instructions_mnemonic ON instructions(mnemonic)"),
|
||||
("idx_instructions_ext_mnemonic","CREATE INDEX idx_instructions_ext_mnemonic ON instructions(ext_mnemonic)"),
|
||||
("idx_instructions_section", "CREATE INDEX idx_instructions_section ON instructions(section)"),
|
||||
("idx_instructions_label", "CREATE INDEX idx_instructions_label ON instructions(label)"),
|
||||
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
|
||||
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
|
||||
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
|
||||
("idx_xrefs_kind", "CREATE INDEX idx_xrefs_kind ON xrefs(kind)"),
|
||||
("idx_xrefs_instruction", "CREATE INDEX idx_xrefs_instruction ON xrefs(instruction)"),
|
||||
("idx_xrefs_target_label", "CREATE INDEX idx_xrefs_target_label ON xrefs(target_label)"),
|
||||
];
|
||||
for (name, sql) in indices {
|
||||
eprintln!("[db] creating {name}...");
|
||||
self.conn.execute_batch(sql)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ── Trace layer (written by exec when flags enabled) ─────────────────────
|
||||
|
||||
/// Create the opt-in trace tables. No-op if all flags are false.
|
||||
pub fn prepare_trace_tables(
|
||||
&mut self,
|
||||
trace_instructions: bool,
|
||||
trace_imports: bool,
|
||||
trace_branches: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
self.trace_instructions = trace_instructions;
|
||||
self.trace_imports = trace_imports;
|
||||
self.trace_branches = trace_branches;
|
||||
|
||||
if trace_instructions {
|
||||
self.conn.execute_batch("
|
||||
CREATE TABLE IF NOT EXISTS exec_trace (
|
||||
id INTEGER PRIMARY KEY,
|
||||
address INTEGER NOT NULL,
|
||||
cycle INTEGER NOT NULL,
|
||||
r3 INTEGER NOT NULL,
|
||||
r4 INTEGER NOT NULL,
|
||||
lr INTEGER NOT NULL,
|
||||
sp INTEGER NOT NULL
|
||||
);
|
||||
DELETE FROM exec_trace;
|
||||
")?;
|
||||
}
|
||||
|
||||
if trace_imports {
|
||||
self.conn.execute_batch("
|
||||
CREATE TABLE IF NOT EXISTS import_calls (
|
||||
id INTEGER PRIMARY KEY,
|
||||
address INTEGER NOT NULL,
|
||||
cycle INTEGER NOT NULL,
|
||||
module TEXT NOT NULL,
|
||||
ordinal INTEGER NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
arg_r3 INTEGER NOT NULL,
|
||||
arg_r4 INTEGER NOT NULL,
|
||||
arg_r5 INTEGER NOT NULL,
|
||||
arg_r6 INTEGER NOT NULL,
|
||||
return_value INTEGER NOT NULL
|
||||
);
|
||||
DELETE FROM import_calls;
|
||||
")?;
|
||||
}
|
||||
|
||||
if trace_branches {
|
||||
self.conn.execute_batch("
|
||||
CREATE TABLE IF NOT EXISTS branch_trace (
|
||||
id INTEGER PRIMARY KEY,
|
||||
cycle INTEGER NOT NULL,
|
||||
source INTEGER NOT NULL,
|
||||
target INTEGER NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
lr INTEGER NOT NULL
|
||||
);
|
||||
DELETE FROM branch_trace;
|
||||
")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn log_instruction(&mut self, entry: ExecTraceEntry) {
|
||||
if !self.trace_instructions { return; }
|
||||
self.exec_buffer.push(entry);
|
||||
if self.exec_buffer.len() as u64 >= batch_size() {
|
||||
self.flush_exec();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn log_import_call(&mut self, entry: ImportCallEntry) {
|
||||
if !self.trace_imports { return; }
|
||||
self.import_buffer.push(entry);
|
||||
if self.import_buffer.len() >= 1000 {
|
||||
self.flush_imports();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn log_branch(&mut self, entry: BranchTraceEntry) {
|
||||
if !self.trace_branches { return; }
|
||||
self.branch_buffer.push(entry);
|
||||
if self.branch_buffer.len() as u64 >= batch_size() {
|
||||
self.flush_branches();
|
||||
}
|
||||
}
|
||||
|
||||
fn flush_exec(&mut self) {
|
||||
if self.exec_buffer.is_empty() { return; }
|
||||
let tx = self.conn.unchecked_transaction().unwrap();
|
||||
{
|
||||
let mut stmt = tx.prepare_cached(
|
||||
"INSERT INTO exec_trace (address, cycle, r3, r4, lr, sp) VALUES (?1, ?2, ?3, ?4, ?5, ?6)"
|
||||
).unwrap();
|
||||
for e in &self.exec_buffer {
|
||||
stmt.execute(params![
|
||||
e.address as i64,
|
||||
e.cycle as i64,
|
||||
e.r3 as i64,
|
||||
e.r4 as i64,
|
||||
e.lr as i64,
|
||||
e.sp as i64,
|
||||
]).ok();
|
||||
}
|
||||
}
|
||||
tx.commit().ok();
|
||||
self.exec_count += self.exec_buffer.len() as u64;
|
||||
self.exec_buffer.clear();
|
||||
}
|
||||
|
||||
fn flush_imports(&mut self) {
|
||||
if self.import_buffer.is_empty() { return; }
|
||||
let tx = self.conn.unchecked_transaction().unwrap();
|
||||
{
|
||||
let mut stmt = tx.prepare_cached(
|
||||
"INSERT INTO import_calls (address, cycle, module, ordinal, name, arg_r3, arg_r4, arg_r5, arg_r6, return_value)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)"
|
||||
).unwrap();
|
||||
for e in &self.import_buffer {
|
||||
stmt.execute(params![
|
||||
e.address as i64,
|
||||
e.cycle as i64,
|
||||
e.module,
|
||||
e.ordinal as i64,
|
||||
e.name,
|
||||
e.arg_r3 as i64,
|
||||
e.arg_r4 as i64,
|
||||
e.arg_r5 as i64,
|
||||
e.arg_r6 as i64,
|
||||
e.return_value as i64,
|
||||
]).ok();
|
||||
}
|
||||
}
|
||||
tx.commit().ok();
|
||||
self.import_count += self.import_buffer.len() as u64;
|
||||
self.import_buffer.clear();
|
||||
}
|
||||
|
||||
fn flush_branches(&mut self) {
|
||||
if self.branch_buffer.is_empty() { return; }
|
||||
let tx = self.conn.unchecked_transaction().unwrap();
|
||||
{
|
||||
let mut stmt = tx.prepare_cached(
|
||||
"INSERT INTO branch_trace (cycle, source, target, kind, lr) VALUES (?1, ?2, ?3, ?4, ?5)"
|
||||
).unwrap();
|
||||
for e in &self.branch_buffer {
|
||||
stmt.execute(params![
|
||||
e.cycle as i64,
|
||||
e.source as i64,
|
||||
e.target as i64,
|
||||
e.kind,
|
||||
e.lr as i64,
|
||||
]).ok();
|
||||
}
|
||||
}
|
||||
tx.commit().ok();
|
||||
self.branch_count += self.branch_buffer.len() as u64;
|
||||
self.branch_buffer.clear();
|
||||
}
|
||||
|
||||
/// Flush remaining trace buffers and create their indices.
|
||||
pub fn finalize_traces(&mut self) -> anyhow::Result<()> {
|
||||
self.flush_exec();
|
||||
self.flush_imports();
|
||||
self.flush_branches();
|
||||
|
||||
if self.trace_instructions {
|
||||
eprintln!("[db] creating idx_exec_trace_address...");
|
||||
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_exec_trace_address ON exec_trace(address);")?;
|
||||
eprintln!("[db] creating idx_exec_trace_cycle...");
|
||||
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_exec_trace_cycle ON exec_trace(cycle);")?;
|
||||
}
|
||||
if self.trace_imports {
|
||||
eprintln!("[db] creating idx_import_calls_name...");
|
||||
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_import_calls_name ON import_calls(name);")?;
|
||||
eprintln!("[db] creating idx_import_calls_cycle...");
|
||||
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_import_calls_cycle ON import_calls(cycle);")?;
|
||||
}
|
||||
if self.trace_branches {
|
||||
eprintln!("[db] creating idx_branch_trace_source...");
|
||||
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_source ON branch_trace(source);")?;
|
||||
eprintln!("[db] creating idx_branch_trace_target...");
|
||||
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_target ON branch_trace(target);")?;
|
||||
eprintln!("[db] creating idx_branch_trace_kind...");
|
||||
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_kind ON branch_trace(kind);")?;
|
||||
eprintln!("[db] creating idx_branch_trace_cycle...");
|
||||
self.conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_branch_trace_cycle ON branch_trace(cycle);")?;
|
||||
}
|
||||
|
||||
eprintln!(
|
||||
"[db] trace totals: {} instructions, {} imports, {} branches",
|
||||
self.exec_count, self.import_count, self.branch_count
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Backwards-compatible wrapper that writes the full base + disasm layers.
|
||||
pub fn write_db(
|
||||
path: &Path,
|
||||
pe: &[u8],
|
||||
info: &DisasmInfo,
|
||||
func_analysis: &FuncAnalysis,
|
||||
labels: &HashMap<u32, String>,
|
||||
_import_map: &HashMap<u32, String>,
|
||||
xrefs: &XrefMap,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut w = DbWriter::open_fresh(path)?;
|
||||
w.write_base(info)?;
|
||||
w.write_disasm(pe, info, func_analysis, labels, xrefs)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ── Helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
fn insert_metadata(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> {
|
||||
let mut stmt = conn.prepare("INSERT INTO metadata (key, value) VALUES (?1, ?2)")?;
|
||||
stmt.execute(params!["image_base", format!("0x{:08X}", info.image_base)])?;
|
||||
stmt.execute(params!["entry_point", format!("0x{:08X}", info.entry_point)])?;
|
||||
if let Some(name) = info.original_pe_name {
|
||||
stmt.execute(params!["original_pe_name", name])?;
|
||||
}
|
||||
if let Some(title_id) = info.title_id {
|
||||
stmt.execute(params!["title_id", format!("0x{:08X}", title_id)])?;
|
||||
}
|
||||
if let Some(media_id) = info.media_id {
|
||||
stmt.execute(params!["media_id", format!("0x{:08X}", media_id)])?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_sections(conn: &Connection, sections: &[xenia_xex::pe::PeSection]) -> anyhow::Result<()> {
|
||||
let mut stmt = conn.prepare(
|
||||
"INSERT INTO sections (name, virtual_address, virtual_size, raw_offset, raw_size, flags, is_code)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"
|
||||
)?;
|
||||
for s in sections {
|
||||
stmt.execute(params![
|
||||
s.name,
|
||||
s.virtual_address as i64,
|
||||
s.virtual_size as i64,
|
||||
s.raw_offset as i64,
|
||||
s.raw_size as i64,
|
||||
s.flags as i64,
|
||||
s.is_code() as i32,
|
||||
])?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_imports(conn: &Connection, info: &DisasmInfo) -> anyhow::Result<()> {
|
||||
let mut stmt = conn.prepare(
|
||||
"INSERT INTO imports (library, ordinal, name, record_type, address)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5)"
|
||||
)?;
|
||||
for lib in info.import_libraries {
|
||||
for imp in &lib.imports {
|
||||
let resolved = crate::resolve_ordinal(&lib.name, imp.ordinal);
|
||||
stmt.execute(params![
|
||||
lib.name,
|
||||
imp.ordinal as i64,
|
||||
resolved,
|
||||
imp.record_type as i64,
|
||||
imp.address as i64,
|
||||
])?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_functions(
|
||||
conn: &Connection,
|
||||
func_analysis: &FuncAnalysis,
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut stmt = conn.prepare(
|
||||
"INSERT INTO functions (address, name, end_address, frame_size, saved_gprs, is_leaf, is_saverestore)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"
|
||||
)?;
|
||||
for (&addr, fi) in &func_analysis.functions {
|
||||
let name = labels.get(&addr)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| format!("sub_{addr:08X}"));
|
||||
stmt.execute(params![
|
||||
addr as i64,
|
||||
name,
|
||||
fi.end as i64,
|
||||
fi.frame_size as i64,
|
||||
fi.saved_gprs as i64,
|
||||
fi.is_leaf as i32,
|
||||
fi.is_saverestore as i32,
|
||||
])?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_labels(
|
||||
conn: &Connection,
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut stmt = conn.prepare(
|
||||
"INSERT OR IGNORE INTO labels (address, name, kind) VALUES (?1, ?2, ?3)"
|
||||
)?;
|
||||
for (&addr, name) in labels {
|
||||
let kind = if name.starts_with("sub_") || name == "entry_point" {
|
||||
"function"
|
||||
} else if name.starts_with("__imp_") {
|
||||
"import"
|
||||
} else if name.starts_with("__savegprlr_") || name.starts_with("__restgprlr_") {
|
||||
"saverestore"
|
||||
} else if name.starts_with("loc_") {
|
||||
"local"
|
||||
} else if name.starts_with("dat_") {
|
||||
"data"
|
||||
} else {
|
||||
"other"
|
||||
};
|
||||
stmt.execute(params![addr as i64, name, kind])?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_instructions_streaming(
|
||||
conn: &Connection,
|
||||
pe: &[u8],
|
||||
info: &DisasmInfo,
|
||||
func_analysis: &FuncAnalysis,
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut tx = conn.unchecked_transaction()?;
|
||||
let mut count: u64 = 0;
|
||||
let mut since_commit: u64 = 0;
|
||||
|
||||
for section in info.sections {
|
||||
if !section.is_code() { continue; }
|
||||
|
||||
let va_start = section.virtual_address;
|
||||
let va_end = va_start + section.virtual_size;
|
||||
let file_start = section.virtual_address as usize;
|
||||
|
||||
let mut current_func: Option<u32> = None;
|
||||
let mut addr = va_start;
|
||||
|
||||
while addr < va_end {
|
||||
let abs_addr = info.image_base + addr;
|
||||
let off = (addr - va_start) as usize + file_start;
|
||||
if off + 4 > pe.len() { break; }
|
||||
|
||||
if func_analysis.is_function_start(abs_addr) {
|
||||
current_func = Some(abs_addr);
|
||||
}
|
||||
|
||||
let instr = u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]]);
|
||||
let decoded = crate::ppc::disasm(instr, abs_addr);
|
||||
let (mnemonic, operands) = split_disasm(&decoded.base);
|
||||
|
||||
let (ext_mnemonic, ext_operands, ext_disasm): (Option<&str>, Option<&str>, Option<&str>) =
|
||||
match &decoded.ext {
|
||||
Some(ext) => {
|
||||
let (em, eo) = split_disasm(ext);
|
||||
(Some(em), Some(eo), Some(ext.as_str()))
|
||||
}
|
||||
None => (None, None, None),
|
||||
};
|
||||
let label = labels.get(&abs_addr).map(|s| s.as_str());
|
||||
|
||||
{
|
||||
let mut stmt = tx.prepare_cached(
|
||||
"INSERT INTO instructions (address, raw, mnemonic, operands, disasm, ext_mnemonic, ext_operands, ext_disasm, section, function, label)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)"
|
||||
)?;
|
||||
stmt.execute(params![
|
||||
abs_addr as i64,
|
||||
instr as i64,
|
||||
mnemonic,
|
||||
operands,
|
||||
decoded.base,
|
||||
ext_mnemonic,
|
||||
ext_operands,
|
||||
ext_disasm,
|
||||
section.name,
|
||||
current_func.map(|a| a as i64),
|
||||
label,
|
||||
])?;
|
||||
}
|
||||
|
||||
count += 1;
|
||||
since_commit += 1;
|
||||
addr += 4;
|
||||
|
||||
if since_commit >= batch_size() {
|
||||
tx.commit()?;
|
||||
eprintln!("[db] instructions: {count} committed");
|
||||
tx = conn.unchecked_transaction()?;
|
||||
since_commit = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tx.commit()?;
|
||||
eprintln!("[db] inserted {count} instructions");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_xrefs_streaming(
|
||||
conn: &Connection,
|
||||
xrefs: &XrefMap,
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
func_analysis: &FuncAnalysis,
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut tx = conn.unchecked_transaction()?;
|
||||
let mut count: u64 = 0;
|
||||
let mut since_commit: u64 = 0;
|
||||
|
||||
for (&target, refs) in xrefs {
|
||||
let target_label = labels.get(&target).map(|s| s.as_str());
|
||||
|
||||
for xref in refs {
|
||||
let kind = xref.kind.db_tag();
|
||||
|
||||
let instruction: Option<String> = {
|
||||
let off = xref.source.wrapping_sub(image_base) as usize;
|
||||
if off + 4 <= pe.len() {
|
||||
let raw = u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]]);
|
||||
let decoded = crate::ppc::disasm(raw, xref.source);
|
||||
let display = decoded.display().to_string();
|
||||
let (mnem, _) = split_disasm(&display);
|
||||
Some(mnem.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let source_func = func_analysis.functions
|
||||
.range(..=xref.source)
|
||||
.next_back()
|
||||
.map(|(&a, _)| a as i64);
|
||||
|
||||
let source_label = resolve_source_label(
|
||||
xref.source, func_analysis, labels,
|
||||
);
|
||||
|
||||
{
|
||||
let mut stmt = tx.prepare_cached(
|
||||
"INSERT INTO xrefs (source, target, kind, instruction, source_func, source_label, target_label)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"
|
||||
)?;
|
||||
stmt.execute(params![
|
||||
xref.source as i64,
|
||||
target as i64,
|
||||
kind,
|
||||
instruction,
|
||||
source_func,
|
||||
source_label,
|
||||
target_label,
|
||||
])?;
|
||||
}
|
||||
|
||||
count += 1;
|
||||
since_commit += 1;
|
||||
|
||||
if since_commit >= batch_size() {
|
||||
tx.commit()?;
|
||||
eprintln!("[db] xrefs: {count} committed");
|
||||
tx = conn.unchecked_transaction()?;
|
||||
since_commit = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tx.commit()?;
|
||||
eprintln!("[db] inserted {count} xrefs");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Split "mnemonic operands" into (mnemonic, operands).
|
||||
fn split_disasm(disasm: &str) -> (&str, &str) {
|
||||
let trimmed = disasm.trim();
|
||||
if let Some(pos) = trimmed.find(|c: char| c.is_whitespace()) {
|
||||
let mnemonic = &trimmed[..pos];
|
||||
let operands = trimmed[pos..].trim_start();
|
||||
(mnemonic, operands)
|
||||
} else {
|
||||
(trimmed, "")
|
||||
}
|
||||
}
|
||||
318
crates/xenia-analysis/src/formatter.rs
Normal file
318
crates/xenia-analysis/src/formatter.rs
Normal file
@@ -0,0 +1,318 @@
|
||||
//! Assembly text output formatter for Xbox 360 disassembly.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
|
||||
use xenia_xex::header::ImportLibrary;
|
||||
use xenia_xex::pe::PeSection;
|
||||
|
||||
use crate::func::FuncAnalysis;
|
||||
use crate::xref::{XrefKind, Xref, XrefMap, section_for_addr, resolve_source_label};
|
||||
|
||||
/// Metadata passed to the formatter (avoids exposing full Xex2Header internals).
|
||||
pub struct DisasmInfo<'a> {
|
||||
pub image_base: u32,
|
||||
pub entry_point: u32,
|
||||
pub original_pe_name: Option<&'a str>,
|
||||
pub title_id: Option<u32>,
|
||||
pub media_id: Option<u32>,
|
||||
pub sections: &'a [PeSection],
|
||||
pub import_libraries: &'a [ImportLibrary],
|
||||
}
|
||||
|
||||
/// Write full disassembly to the output stream.
|
||||
pub fn write_asm(
|
||||
out: &mut dyn Write,
|
||||
pe: &[u8],
|
||||
info: &DisasmInfo,
|
||||
func_analysis: &FuncAnalysis,
|
||||
labels: &HashMap<u32, String>,
|
||||
import_map: &HashMap<u32, String>,
|
||||
xrefs: &XrefMap,
|
||||
data_annotations: &HashMap<u32, (u32, XrefKind)>,
|
||||
) -> anyhow::Result<()> {
|
||||
// Header
|
||||
writeln!(out, "; ============================================================================")?;
|
||||
writeln!(out, "; Xbox 360 Disassembly — generated by xenia-rs")?;
|
||||
if let Some(name) = info.original_pe_name {
|
||||
writeln!(out, "; Original PE: {name}")?;
|
||||
}
|
||||
if let (Some(title_id), Some(media_id)) = (info.title_id, info.media_id) {
|
||||
writeln!(out, "; Title ID: 0x{title_id:08X} Media ID: 0x{media_id:08X}")?;
|
||||
}
|
||||
writeln!(out, "; Image base: 0x{:08X} Entry point: 0x{:08X}", info.image_base, info.entry_point)?;
|
||||
writeln!(out, "; Functions detected: {}", func_analysis.functions.len())?;
|
||||
writeln!(out, "; ============================================================================")?;
|
||||
writeln!(out)?;
|
||||
|
||||
// Import declarations
|
||||
if !info.import_libraries.is_empty() {
|
||||
writeln!(out, "; ── Imports ─────────────────────────────────────────────────────────────────")?;
|
||||
for lib in info.import_libraries {
|
||||
writeln!(out, "; Library: {}", lib.name)?;
|
||||
for imp in &lib.imports {
|
||||
let resolved = crate::resolve_ordinal(&lib.name, imp.ordinal);
|
||||
let name = resolved.unwrap_or("???");
|
||||
let kind = if imp.record_type == 1 { "thunk" } else { "var" };
|
||||
writeln!(out, "; [{kind}] 0x{:08X} ordinal 0x{:04X} = {}", imp.address, imp.ordinal, name)?;
|
||||
}
|
||||
}
|
||||
writeln!(out)?;
|
||||
}
|
||||
|
||||
// Disassemble each section
|
||||
for section in info.sections {
|
||||
writeln!(out, "; ── Section: {:8} VA=0x{:08X} Size=0x{:08X} Flags=0x{:08X} ──",
|
||||
section.name, section.virtual_address, section.virtual_size, section.flags)?;
|
||||
|
||||
let va_start = section.virtual_address;
|
||||
let va_end = va_start + section.virtual_size;
|
||||
let file_start = section.virtual_address as usize;
|
||||
|
||||
// Pre-sort data labels in this section for break-at-label hex dump
|
||||
let section_labels_sorted: Vec<u32> = if !section.is_code() {
|
||||
let sec_start = info.image_base + va_start;
|
||||
let sec_end = info.image_base + va_end;
|
||||
let mut addrs: Vec<u32> = labels.keys()
|
||||
.filter(|&&a| a >= sec_start && a < sec_end)
|
||||
.copied()
|
||||
.collect();
|
||||
addrs.sort();
|
||||
addrs
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
if section.is_code() {
|
||||
writeln!(out, ".text")?;
|
||||
writeln!(out)?;
|
||||
|
||||
let mut in_function = false;
|
||||
let mut addr = va_start;
|
||||
while addr < va_end {
|
||||
let abs_addr = info.image_base + addr;
|
||||
let off = (addr - va_start) as usize + file_start;
|
||||
if off + 4 > pe.len() { break; }
|
||||
|
||||
// Function start? Emit separator + header
|
||||
if let Some(fi) = func_analysis.get(abs_addr) {
|
||||
if in_function {
|
||||
writeln!(out, "; end function")?;
|
||||
}
|
||||
writeln!(out)?;
|
||||
writeln!(out, "; ──────────────────────────────────────────────────────────────────────────")?;
|
||||
|
||||
let lbl = labels.get(&abs_addr).cloned()
|
||||
.unwrap_or_else(|| format!("sub_{abs_addr:08X}"));
|
||||
|
||||
if fi.is_saverestore {
|
||||
writeln!(out, "; FUNCTION: {lbl} (save/restore GPR helper)")?;
|
||||
} else if fi.is_leaf {
|
||||
writeln!(out, "; FUNCTION: {lbl} (leaf)")?;
|
||||
} else {
|
||||
let mut details = Vec::new();
|
||||
if fi.frame_size > 0 {
|
||||
details.push(format!("frame={}", fi.frame_size));
|
||||
}
|
||||
if fi.saved_gprs > 0 {
|
||||
let first_reg = 32 - fi.saved_gprs;
|
||||
details.push(format!("saves r{first_reg}-r31"));
|
||||
}
|
||||
let detail_str = if details.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!(" ({})", details.join(", "))
|
||||
};
|
||||
writeln!(out, "; FUNCTION: {lbl}{detail_str}")?;
|
||||
}
|
||||
|
||||
// Xrefs for function entry
|
||||
if let Some(xref_lines) = format_xrefs(abs_addr, xrefs, func_analysis, labels) {
|
||||
for line in &xref_lines {
|
||||
writeln!(out, "{line}")?;
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(out, "; ──────────────────────────────────────────────────────────────────────────")?;
|
||||
in_function = true;
|
||||
}
|
||||
|
||||
// Label
|
||||
if let Some(lbl) = labels.get(&abs_addr) {
|
||||
if !func_analysis.is_function_start(abs_addr) {
|
||||
writeln!(out)?;
|
||||
// Xrefs for local labels
|
||||
if let Some(xref_lines) = format_xrefs(abs_addr, xrefs, func_analysis, labels) {
|
||||
for line in &xref_lines {
|
||||
writeln!(out, "{line}")?;
|
||||
}
|
||||
}
|
||||
writeln!(out, "{lbl}:")?;
|
||||
} else {
|
||||
writeln!(out)?;
|
||||
writeln!(out, "{lbl}:")?;
|
||||
}
|
||||
}
|
||||
|
||||
// Import thunk annotation
|
||||
if let Some(imp_name) = import_map.get(&abs_addr) {
|
||||
writeln!(out, " ; IMPORT: {imp_name}")?;
|
||||
}
|
||||
|
||||
let instr = u32::from_be_bytes([
|
||||
pe[off], pe[off+1], pe[off+2], pe[off+3]
|
||||
]);
|
||||
|
||||
let decoded = crate::ppc::disasm(instr, abs_addr);
|
||||
let disasm_text = decoded.display().to_string();
|
||||
|
||||
// Annotate branch targets with label names
|
||||
let mut annotated = annotate_branch(&disasm_text, labels);
|
||||
|
||||
// Annotate data references
|
||||
if let Some(&(data_addr, kind)) = data_annotations.get(&abs_addr) {
|
||||
let tag = match kind {
|
||||
XrefKind::DataRead => "[R]",
|
||||
XrefKind::DataWrite => "[W]",
|
||||
_ => "[&]",
|
||||
};
|
||||
let sec = section_for_addr(data_addr, info.sections, info.image_base)
|
||||
.unwrap_or("?");
|
||||
let data_lbl = labels.get(&data_addr)
|
||||
.map(|s| format!(" = {s}"))
|
||||
.unwrap_or_default();
|
||||
if !annotated.contains("; ->") {
|
||||
annotated = format!("{annotated:<40} ; {tag} 0x{data_addr:08X} ({sec}){data_lbl}");
|
||||
} else {
|
||||
annotated = format!("{annotated} {tag} 0x{data_addr:08X} ({sec}){data_lbl}");
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(out, " {:08X}: {:08X} {}", abs_addr, instr, annotated)?;
|
||||
addr += 4;
|
||||
}
|
||||
if in_function {
|
||||
writeln!(out, "; end function")?;
|
||||
}
|
||||
} else {
|
||||
// Data section: hex dump
|
||||
writeln!(out, ".data")?;
|
||||
writeln!(out)?;
|
||||
|
||||
let mut addr = va_start;
|
||||
while addr < va_end {
|
||||
let abs_addr = info.image_base + addr;
|
||||
let off = (addr - va_start) as usize + file_start;
|
||||
|
||||
if let Some(lbl) = labels.get(&abs_addr) {
|
||||
writeln!(out)?;
|
||||
// Xrefs for data labels
|
||||
if let Some(xref_lines) = format_xrefs(abs_addr, xrefs, func_analysis, labels) {
|
||||
for line in &xref_lines {
|
||||
writeln!(out, "{line}")?;
|
||||
}
|
||||
}
|
||||
writeln!(out, "{lbl}:")?;
|
||||
}
|
||||
|
||||
// Emit up to 16 bytes per line, but break at label boundaries
|
||||
let mut line_end = std::cmp::min(addr + 16, va_end);
|
||||
for &lbl_addr in §ion_labels_sorted {
|
||||
let lbl_va = lbl_addr - info.image_base;
|
||||
if lbl_va > addr && lbl_va < line_end {
|
||||
line_end = lbl_va;
|
||||
break;
|
||||
}
|
||||
if lbl_va >= line_end { break; }
|
||||
}
|
||||
let byte_count = (line_end - addr) as usize;
|
||||
if off + byte_count > pe.len() { break; }
|
||||
|
||||
write!(out, " {:08X}: ", abs_addr)?;
|
||||
for i in 0..byte_count {
|
||||
write!(out, "{:02X}", pe[off + i])?;
|
||||
if i % 4 == 3 { write!(out, " ")?; }
|
||||
}
|
||||
// ASCII representation
|
||||
let pad = (16 - byte_count) * 2 + (16 - byte_count) / 4;
|
||||
write!(out, "{:>width$} |", "", width = pad)?;
|
||||
for i in 0..byte_count {
|
||||
let b = pe[off + i];
|
||||
let ch = if b.is_ascii_graphic() || b == b' ' { b as char } else { '.' };
|
||||
write!(out, "{ch}")?;
|
||||
}
|
||||
writeln!(out, "|")?;
|
||||
|
||||
addr = line_end;
|
||||
}
|
||||
}
|
||||
writeln!(out)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const XREF_DISPLAY_LIMIT: usize = 8;
|
||||
|
||||
fn format_xrefs(
|
||||
target: u32,
|
||||
xrefs: &XrefMap,
|
||||
func_analysis: &FuncAnalysis,
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> Option<Vec<String>> {
|
||||
let refs = xrefs.get(&target)?;
|
||||
if refs.is_empty() { return None; }
|
||||
|
||||
let mut sorted: Vec<Xref> = refs.clone();
|
||||
sorted.sort();
|
||||
sorted.dedup();
|
||||
|
||||
let total = sorted.len();
|
||||
let mut lines = Vec::new();
|
||||
|
||||
let calls = sorted.iter().filter(|x| x.kind == XrefKind::Call).count();
|
||||
let jumps = sorted.iter().filter(|x| x.kind == XrefKind::Jump).count();
|
||||
let branches = sorted.iter().filter(|x| x.kind == XrefKind::Branch).count();
|
||||
let reads = sorted.iter().filter(|x| x.kind == XrefKind::DataRead).count();
|
||||
let writes = sorted.iter().filter(|x| x.kind == XrefKind::DataWrite).count();
|
||||
let data_refs = sorted.iter().filter(|x| x.kind == XrefKind::DataRef).count();
|
||||
|
||||
let mut summary_parts = Vec::new();
|
||||
if calls > 0 { summary_parts.push(format!("{calls} call{}", if calls != 1 { "s" } else { "" })); }
|
||||
if jumps > 0 { summary_parts.push(format!("{jumps} jump{}", if jumps != 1 { "s" } else { "" })); }
|
||||
if branches > 0 { summary_parts.push(format!("{branches} branch{}", if branches != 1 { "es" } else { "" })); }
|
||||
if reads > 0 { summary_parts.push(format!("{reads} read{}", if reads != 1 { "s" } else { "" })); }
|
||||
if writes > 0 { summary_parts.push(format!("{writes} write{}", if writes != 1 { "s" } else { "" })); }
|
||||
if data_refs > 0 { summary_parts.push(format!("{data_refs} ref{}", if data_refs != 1 { "s" } else { "" })); }
|
||||
|
||||
lines.push(format!("; XREF: {} ({})", summary_parts.join(", "), total));
|
||||
|
||||
for (i, xref) in sorted.iter().enumerate() {
|
||||
if i >= XREF_DISPLAY_LIMIT {
|
||||
lines.push(format!("; ... and {} more", total - XREF_DISPLAY_LIMIT));
|
||||
break;
|
||||
}
|
||||
let source_label = resolve_source_label(xref.source, func_analysis, labels);
|
||||
lines.push(format!("; {} from {}", xref.kind.tag(), source_label));
|
||||
}
|
||||
|
||||
Some(lines)
|
||||
}
|
||||
|
||||
fn annotate_branch(disasm: &str, labels: &HashMap<u32, String>) -> String {
|
||||
if let Some(pos) = disasm.find("0x") {
|
||||
let hex_start = pos + 2;
|
||||
let hex_end = disasm[hex_start..].find(|c: char| !c.is_ascii_hexdigit())
|
||||
.map(|i| hex_start + i)
|
||||
.unwrap_or(disasm.len());
|
||||
let hex_str = &disasm[hex_start..hex_end];
|
||||
if hex_str.len() == 8 {
|
||||
if let Ok(addr) = u32::from_str_radix(hex_str, 16) {
|
||||
if let Some(lbl) = labels.get(&addr) {
|
||||
return format!("{disasm:<40} ; -> {lbl}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
disasm.to_string()
|
||||
}
|
||||
444
crates/xenia-analysis/src/func.rs
Normal file
444
crates/xenia-analysis/src/func.rs
Normal file
@@ -0,0 +1,444 @@
|
||||
//! Function boundary detection via PPC prologue/epilogue pattern matching.
|
||||
//!
|
||||
//! Strategy (multi-pass):
|
||||
//! 1. Identify all `bl` (branch-and-link) targets — these are call sites,
|
||||
//! hence very likely function entry points.
|
||||
//! 2. Scan the save/restore GPR helper region and label it.
|
||||
//! 3. For each candidate entry, look for prologue patterns:
|
||||
//! a) `mfspr rN, LR` (typically r0 or r12)
|
||||
//! b) `bl __savegprlr_NN` (call into save stub)
|
||||
//! c) `stwu r1, -N(r1)` (allocate stack frame)
|
||||
//! If a prologue is confirmed, record the function and its stack frame size.
|
||||
//! 4. Walk forward from each function entry to find the epilogue:
|
||||
//! a) `blr` (return)
|
||||
//! b) `b __restgprlr_NN` (tail-branch into restore stub which returns)
|
||||
//! Mark the function's end address.
|
||||
//! 5. Detect leaf functions: `bl` targets that lack a prologue but eventually `blr`.
|
||||
|
||||
use std::collections::{HashMap, HashSet, BTreeMap};
|
||||
|
||||
/// Information about a detected function.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FuncInfo {
|
||||
/// Absolute start address.
|
||||
pub start: u32,
|
||||
/// Absolute end address (exclusive — one past last instruction).
|
||||
pub end: u32,
|
||||
/// Stack frame size (0 if unknown / leaf).
|
||||
pub frame_size: u32,
|
||||
/// Number of saved GPRs (via __savegprlr helper), 0 if unknown.
|
||||
pub saved_gprs: u32,
|
||||
/// True if this is a leaf function (no bl, no frame setup).
|
||||
pub is_leaf: bool,
|
||||
/// True if this is a save/restore GPR helper stub.
|
||||
pub is_saverestore: bool,
|
||||
}
|
||||
|
||||
/// Result of the function analysis pass.
|
||||
pub struct FuncAnalysis {
|
||||
/// address → FuncInfo for every detected function, sorted by address.
|
||||
pub functions: BTreeMap<u32, FuncInfo>,
|
||||
/// Addresses in the save-GPR region (start of __savegprlr block).
|
||||
pub save_gpr_base: Option<u32>,
|
||||
/// Addresses in the restore-GPR region (start of __restgprlr block).
|
||||
pub restore_gpr_base: Option<u32>,
|
||||
}
|
||||
|
||||
// ── Instruction field helpers ──────────────────────────────────────────────
|
||||
|
||||
fn op(instr: u32) -> u32 { (instr >> 26) & 0x3F }
|
||||
fn bits(instr: u32, hi: u32, lo: u32) -> u32 {
|
||||
(instr >> (31 - hi)) & ((1 << (hi - lo + 1)) - 1)
|
||||
}
|
||||
|
||||
fn is_mfspr_lr(instr: u32) -> Option<u32> {
|
||||
// mfspr rD, LR → opcode 31, xo=339, spr=8
|
||||
if op(instr) != 31 { return None; }
|
||||
let xo = bits(instr, 30, 21);
|
||||
if xo != 339 { return None; }
|
||||
let spr = (bits(instr, 20, 16) << 5) | bits(instr, 15, 11);
|
||||
if spr != 8 { return None; }
|
||||
Some(bits(instr, 10, 6)) // return rD
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn is_mtspr_lr(instr: u32) -> bool {
|
||||
// mtspr LR, rS → opcode 31, xo=467, spr=8
|
||||
if op(instr) != 31 { return false; }
|
||||
let xo = bits(instr, 30, 21);
|
||||
if xo != 467 { return false; }
|
||||
let spr = (bits(instr, 20, 16) << 5) | bits(instr, 15, 11);
|
||||
spr == 8
|
||||
}
|
||||
|
||||
fn is_stwu_r1(instr: u32) -> Option<i32> {
|
||||
// stwu r1, d(r1) → opcode 37, rS=1, rA=1
|
||||
if op(instr) != 37 { return None; }
|
||||
let rs = bits(instr, 10, 6);
|
||||
let ra = bits(instr, 15, 11);
|
||||
if rs != 1 || ra != 1 { return None; }
|
||||
let d = ((instr & 0xFFFF) as i16) as i32;
|
||||
Some(d) // negative = frame allocation
|
||||
}
|
||||
|
||||
fn is_blr(instr: u32) -> bool {
|
||||
instr == 0x4E800020
|
||||
}
|
||||
|
||||
fn is_bctr(instr: u32) -> bool {
|
||||
instr == 0x4E800420
|
||||
}
|
||||
|
||||
fn is_bl(instr: u32) -> Option<u32> {
|
||||
// bl target → opcode 18, LK=1, AA=0
|
||||
if op(instr) != 18 { return None; }
|
||||
if instr & 1 == 0 { return None; } // must have LK bit
|
||||
if instr & 2 != 0 { return None; } // not absolute
|
||||
// Return the signed offset
|
||||
let li = instr & 0x03FFFFFC;
|
||||
Some(li)
|
||||
}
|
||||
|
||||
fn is_b(instr: u32) -> Option<u32> {
|
||||
// b target → opcode 18, LK=0, AA=0
|
||||
if op(instr) != 18 { return None; }
|
||||
if instr & 1 != 0 { return None; } // no LK bit
|
||||
if instr & 2 != 0 { return None; } // not absolute
|
||||
Some(instr & 0x03FFFFFC)
|
||||
}
|
||||
|
||||
fn sign_ext26(val: u32) -> i32 {
|
||||
((val << 6) as i32) >> 6
|
||||
}
|
||||
|
||||
fn bl_target(instr: u32, addr: u32) -> Option<u32> {
|
||||
is_bl(instr).map(|off| addr.wrapping_add(sign_ext26(off) as u32))
|
||||
}
|
||||
|
||||
fn b_target(instr: u32, addr: u32) -> Option<u32> {
|
||||
is_b(instr).map(|off| addr.wrapping_add(sign_ext26(off) as u32))
|
||||
}
|
||||
|
||||
// ── Read instruction from PE ───────────────────────────────────────────────
|
||||
|
||||
fn read_instr(pe: &[u8], abs_addr: u32, image_base: u32) -> Option<u32> {
|
||||
let off = abs_addr.wrapping_sub(image_base) as usize;
|
||||
if off + 4 > pe.len() { return None; }
|
||||
Some(u32::from_be_bytes([pe[off], pe[off+1], pe[off+2], pe[off+3]]))
|
||||
}
|
||||
|
||||
// ── Detect the save/restore GPR helper stubs ───────────────────────────────
|
||||
//
|
||||
// These are a well-known pattern emitted by the Xbox 360 linker.
|
||||
// Save block: a cascade of `std rN, offset(r1)` for r14..r31 + `stw r12, -8(r1)` + `blr`
|
||||
// Restore: a cascade of `ld rN, offset(r1)` for r14..r31 + `lwz r12, -8(r1)` + `mtspr LR, r12` + `blr`
|
||||
//
|
||||
// We detect the save block by finding 18 consecutive `std rN, ...(r1)` instructions
|
||||
// for r14 through r31.
|
||||
|
||||
fn find_saverestore_stubs(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
code_ranges: &[(u32, u32)], // (abs_start, abs_end)
|
||||
) -> (Option<u32>, Option<u32>) {
|
||||
let mut save_base = None;
|
||||
let mut restore_base = None;
|
||||
|
||||
for &(start, end) in code_ranges {
|
||||
let mut addr = start;
|
||||
while addr + 4 * 18 < end {
|
||||
// Check if this is `std r14, ...(r1)` — opcode 62 (std), rS=14, rA=1
|
||||
let instr = match read_instr(pe, addr, image_base) { Some(i) => i, None => { addr += 4; continue; } };
|
||||
if op(instr) == 62 && bits(instr, 10, 6) == 14 && bits(instr, 15, 11) == 1 && (instr & 3) == 0 {
|
||||
// Verify it's a cascade: r14, r15, ..., r31
|
||||
let mut ok = true;
|
||||
for i in 0u32..18 {
|
||||
let check = match read_instr(pe, addr + i * 4, image_base) { Some(c) => c, None => { ok = false; break; } };
|
||||
if op(check) != 62 || bits(check, 10, 6) != 14 + i || bits(check, 15, 11) != 1 {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ok {
|
||||
save_base = Some(addr);
|
||||
// Restore block typically follows the save block
|
||||
// After save: stw r12, -8(r1) + blr, then restore starts
|
||||
let after_save = addr + 18 * 4 + 8; // skip stw r12 + blr
|
||||
let check = read_instr(pe, after_save, image_base);
|
||||
if let Some(c) = check {
|
||||
// Should be `ld r14, ...(r1)` — opcode 58, rT=14, rA=1
|
||||
if op(c) == 58 && bits(c, 10, 6) == 14 && bits(c, 15, 11) == 1 {
|
||||
restore_base = Some(after_save);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
addr += 4;
|
||||
}
|
||||
if save_base.is_some() { break; }
|
||||
}
|
||||
|
||||
(save_base, restore_base)
|
||||
}
|
||||
|
||||
// ── Main analysis ──────────────────────────────────────────────────────────
|
||||
|
||||
pub fn analyze(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
entry_point: u32,
|
||||
code_sections: &[(u32, u32, u32)], // (va_start, va_size, flags)
|
||||
) -> FuncAnalysis {
|
||||
let code_ranges: Vec<(u32, u32)> = code_sections.iter()
|
||||
.map(|(va, sz, _)| (image_base + va, image_base + va + sz))
|
||||
.collect();
|
||||
|
||||
// 1. Find save/restore stubs
|
||||
let (save_base, restore_base) = find_saverestore_stubs(pe, image_base, &code_ranges);
|
||||
if let Some(sb) = save_base {
|
||||
eprintln!("[func] __savegprlr stub at 0x{sb:08X}");
|
||||
}
|
||||
if let Some(rb) = restore_base {
|
||||
eprintln!("[func] __restgprlr stub at 0x{rb:08X}");
|
||||
}
|
||||
|
||||
// Set of addresses in the save/restore region (to exclude from function detection)
|
||||
let mut saverestore_addrs: HashSet<u32> = HashSet::new();
|
||||
if let Some(sb) = save_base {
|
||||
// Save block: 18 std + stw + blr = 20 instructions
|
||||
for i in 0..20 { saverestore_addrs.insert(sb + i * 4); }
|
||||
}
|
||||
if let Some(rb) = restore_base {
|
||||
// Restore block: 18 ld + lwz + mtspr + blr = 21 instructions
|
||||
for i in 0..21 { saverestore_addrs.insert(rb + i * 4); }
|
||||
}
|
||||
|
||||
// 2. Collect all bl targets as candidate function entries
|
||||
let mut call_targets: HashSet<u32> = HashSet::new();
|
||||
call_targets.insert(entry_point);
|
||||
|
||||
for &(start, end) in &code_ranges {
|
||||
let mut addr = start;
|
||||
while addr < end {
|
||||
if let Some(instr) = read_instr(pe, addr, image_base) {
|
||||
if let Some(target) = bl_target(instr, addr) {
|
||||
// Don't count calls into save/restore stubs as function entries
|
||||
if !saverestore_addrs.contains(&target) {
|
||||
call_targets.insert(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
addr += 4;
|
||||
}
|
||||
}
|
||||
eprintln!("[func] {} bl targets (candidate functions)", call_targets.len());
|
||||
|
||||
// 3. For each candidate, detect prologue and walk to epilogue
|
||||
let mut functions: BTreeMap<u32, FuncInfo> = BTreeMap::new();
|
||||
|
||||
for &func_addr in &call_targets {
|
||||
if let Some(fi) = analyze_function(pe, image_base, func_addr, &code_ranges, save_base, restore_base) {
|
||||
functions.insert(func_addr, fi);
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Label save/restore stubs as special functions — one entry for the whole block
|
||||
if let Some(sb) = save_base {
|
||||
// The save block is one cascade: entry at each rN, falls through to blr
|
||||
// Treat as a single function with the first entry point
|
||||
functions.insert(sb, FuncInfo {
|
||||
start: sb,
|
||||
end: sb + 20 * 4, // 18 std + stw r12 + blr
|
||||
frame_size: 0,
|
||||
saved_gprs: 18,
|
||||
is_leaf: true,
|
||||
is_saverestore: true,
|
||||
});
|
||||
}
|
||||
if let Some(rb) = restore_base {
|
||||
functions.insert(rb, FuncInfo {
|
||||
start: rb,
|
||||
end: rb + 21 * 4, // 18 ld + lwz r12 + mtspr LR + blr
|
||||
frame_size: 0,
|
||||
saved_gprs: 18,
|
||||
is_leaf: true,
|
||||
is_saverestore: true,
|
||||
});
|
||||
}
|
||||
|
||||
eprintln!("[func] {} functions detected", functions.len());
|
||||
|
||||
FuncAnalysis {
|
||||
functions,
|
||||
save_gpr_base: save_base,
|
||||
restore_gpr_base: restore_base,
|
||||
}
|
||||
}
|
||||
|
||||
/// Analyze a single function starting at `func_addr`.
|
||||
fn analyze_function(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
func_addr: u32,
|
||||
code_ranges: &[(u32, u32)],
|
||||
save_base: Option<u32>,
|
||||
restore_base: Option<u32>,
|
||||
) -> Option<FuncInfo> {
|
||||
// Verify the address is within a code section
|
||||
let in_code = code_ranges.iter().any(|&(s, e)| func_addr >= s && func_addr < e);
|
||||
if !in_code { return None; }
|
||||
|
||||
let instr0 = read_instr(pe, func_addr, image_base)?;
|
||||
|
||||
let mut frame_size: u32 = 0;
|
||||
let mut saved_gprs: u32 = 0;
|
||||
let mut is_leaf = false;
|
||||
let mut prologue_len: u32 = 0;
|
||||
|
||||
// Pattern A: mfspr rN, LR [+ bl __savegprlr_NN] + stwu r1, -N(r1)
|
||||
if let Some(_lr_reg) = is_mfspr_lr(instr0) {
|
||||
prologue_len = 4;
|
||||
let instr1 = read_instr(pe, func_addr + 4, image_base).unwrap_or(0);
|
||||
|
||||
// Check if next is bl to save stub
|
||||
if let Some(target) = bl_target(instr1, func_addr + 4) {
|
||||
if let Some(sb) = save_base {
|
||||
if target >= sb && target < sb + 18 * 4 {
|
||||
let idx = (target - sb) / 4;
|
||||
saved_gprs = 18 - idx;
|
||||
prologue_len = 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Next should be stwu r1, -N(r1)
|
||||
let stwu_instr = read_instr(pe, func_addr + prologue_len, image_base).unwrap_or(0);
|
||||
if let Some(d) = is_stwu_r1(stwu_instr) {
|
||||
frame_size = (-d) as u32;
|
||||
prologue_len += 4;
|
||||
}
|
||||
}
|
||||
// Pattern B: stwu r1, -N(r1) without mfspr (rare but possible for leaf-ish functions)
|
||||
else if let Some(d) = is_stwu_r1(instr0) {
|
||||
frame_size = (-d) as u32;
|
||||
prologue_len = 4;
|
||||
is_leaf = true; // no LR save = likely leaf (or uses CTR)
|
||||
}
|
||||
// Pattern C: no prologue — leaf function, just code until blr
|
||||
else {
|
||||
is_leaf = true;
|
||||
}
|
||||
|
||||
// Walk forward to find the end of the function
|
||||
let max_range = code_ranges.iter()
|
||||
.find(|&&(s, e)| func_addr >= s && func_addr < e)
|
||||
.map(|&(_, e)| e)
|
||||
.unwrap_or(func_addr + 0x100000);
|
||||
|
||||
let mut end_addr = func_addr + 4;
|
||||
let mut addr = func_addr + prologue_len;
|
||||
let scan_limit = std::cmp::min(addr + 0x100000, max_range); // 1MB max function
|
||||
|
||||
while addr < scan_limit {
|
||||
let instr = match read_instr(pe, addr, image_base) {
|
||||
Some(i) => i,
|
||||
None => break,
|
||||
};
|
||||
|
||||
// Epilogue: blr
|
||||
if is_blr(instr) {
|
||||
end_addr = addr + 4;
|
||||
// Check if the instruction after blr looks like padding or another function
|
||||
// Sometimes there's trailing data after blr; we stop at the first blr
|
||||
// that isn't inside a branch-over pattern
|
||||
break;
|
||||
}
|
||||
|
||||
// Epilogue: b __restgprlr_NN (tail branch into restore stub)
|
||||
if let Some(target) = b_target(instr, addr) {
|
||||
if let Some(rb) = restore_base {
|
||||
if target >= rb && target < rb + 18 * 4 {
|
||||
end_addr = addr + 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Epilogue: bctr (indirect tail call — end of function)
|
||||
if is_bctr(instr) {
|
||||
end_addr = addr + 4;
|
||||
break;
|
||||
}
|
||||
|
||||
addr += 4;
|
||||
}
|
||||
|
||||
// If we didn't find any epilogue within a reasonable range, still emit
|
||||
// the function but mark end at the scan point
|
||||
if end_addr <= func_addr + 4 && prologue_len > 0 {
|
||||
end_addr = addr;
|
||||
}
|
||||
|
||||
// Don't emit zero-size "functions" for addresses that are just data
|
||||
if end_addr <= func_addr + 4 && prologue_len == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(FuncInfo {
|
||||
start: func_addr,
|
||||
end: end_addr,
|
||||
frame_size,
|
||||
saved_gprs,
|
||||
is_leaf,
|
||||
is_saverestore: false,
|
||||
})
|
||||
}
|
||||
|
||||
// ── Label generation ───────────────────────────────────────────────────────
|
||||
|
||||
impl FuncAnalysis {
|
||||
/// Generate labels for all detected functions.
|
||||
/// Call targets with confirmed prologues get `sub_XXXXXXXX`.
|
||||
/// Save/restore entries get `__savegprlr_NN` / `__restgprlr_NN`.
|
||||
pub fn generate_labels(&self) -> HashMap<u32, String> {
|
||||
let mut labels = HashMap::new();
|
||||
|
||||
for (&addr, fi) in &self.functions {
|
||||
if fi.is_saverestore {
|
||||
// Label the block start, plus individual register entry points
|
||||
if let Some(sb) = self.save_gpr_base {
|
||||
if addr == sb {
|
||||
for i in 0u32..18 {
|
||||
let reg = 14 + i;
|
||||
labels.insert(sb + i * 4, format!("__savegprlr_{reg}"));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if let Some(rb) = self.restore_gpr_base {
|
||||
if addr == rb {
|
||||
for i in 0u32..18 {
|
||||
let reg = 14 + i;
|
||||
labels.insert(rb + i * 4, format!("__restgprlr_{reg}"));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
labels.insert(addr, format!("sub_{addr:08X}"));
|
||||
}
|
||||
|
||||
labels
|
||||
}
|
||||
|
||||
/// Returns true if `addr` is the start of a detected function.
|
||||
pub fn is_function_start(&self, addr: u32) -> bool {
|
||||
self.functions.contains_key(&addr)
|
||||
}
|
||||
|
||||
/// Get info for the function starting at `addr`.
|
||||
pub fn get(&self, addr: u32) -> Option<&FuncInfo> {
|
||||
self.functions.get(&addr)
|
||||
}
|
||||
}
|
||||
10
crates/xenia-analysis/src/lib.rs
Normal file
10
crates/xenia-analysis/src/lib.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
pub mod ppc;
|
||||
pub mod func;
|
||||
pub mod xref;
|
||||
pub mod db;
|
||||
pub mod formatter;
|
||||
|
||||
mod ordinals;
|
||||
pub use ordinals::resolve_ordinal;
|
||||
pub use xref::{XrefKind, Xref, XrefMap, resolve_source_label};
|
||||
pub use db::{DbWriter, ExecTraceEntry, ImportCallEntry, BranchTraceEntry};
|
||||
1
crates/xenia-analysis/src/ordinals.rs
Normal file
1
crates/xenia-analysis/src/ordinals.rs
Normal file
@@ -0,0 +1 @@
|
||||
include!(concat!(env!("OUT_DIR"), "/ordinals.rs"));
|
||||
1376
crates/xenia-analysis/src/ppc.rs
Normal file
1376
crates/xenia-analysis/src/ppc.rs
Normal file
File diff suppressed because it is too large
Load Diff
296
crates/xenia-analysis/src/xref.rs
Normal file
296
crates/xenia-analysis/src/xref.rs
Normal file
@@ -0,0 +1,296 @@
|
||||
//! Cross-reference analysis for Xbox 360 PE images.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use xenia_xex::pe::PeSection;
|
||||
use crate::func::FuncAnalysis;
|
||||
|
||||
// ── Cross-reference types ────────────────────────────────────────────────
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum XrefKind {
|
||||
Call, // bl
|
||||
Jump, // b (unconditional)
|
||||
Branch, // bc / bXX (conditional)
|
||||
DataRead, // lwz, lbz, lhz, lha, lfs, lfd, etc. from resolved address
|
||||
DataWrite, // stw, stb, sth, stfs, stfd, etc. to resolved address
|
||||
DataRef, // address computed via lis+addi/ori but not directly loaded/stored
|
||||
}
|
||||
|
||||
impl XrefKind {
|
||||
pub fn tag(self) -> &'static str {
|
||||
match self {
|
||||
XrefKind::Call => "call",
|
||||
XrefKind::Jump => "j",
|
||||
XrefKind::Branch => "br",
|
||||
XrefKind::DataRead => "read",
|
||||
XrefKind::DataWrite => "write",
|
||||
XrefKind::DataRef => "ref",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_data(self) -> bool {
|
||||
matches!(self, XrefKind::DataRead | XrefKind::DataWrite | XrefKind::DataRef)
|
||||
}
|
||||
|
||||
pub fn db_tag(self) -> &'static str {
|
||||
self.tag()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Xref {
|
||||
pub source: u32,
|
||||
pub kind: XrefKind,
|
||||
}
|
||||
|
||||
pub type XrefMap = HashMap<u32, Vec<Xref>>;
|
||||
|
||||
/// Result of cross-reference analysis.
|
||||
pub struct XrefResult {
|
||||
pub labels: HashMap<u32, String>,
|
||||
pub xrefs: XrefMap,
|
||||
pub data_annotations: HashMap<u32, (u32, XrefKind)>,
|
||||
}
|
||||
|
||||
/// Perform full cross-reference analysis on a PE image.
|
||||
pub fn analyze_xrefs(
|
||||
pe: &[u8],
|
||||
image_base: u32,
|
||||
entry_point: u32,
|
||||
sections: &[PeSection],
|
||||
func_analysis: &FuncAnalysis,
|
||||
import_map: &HashMap<u32, String>,
|
||||
) -> XrefResult {
|
||||
let func_labels = func_analysis.generate_labels();
|
||||
let mut labels: HashMap<u32, String> = func_labels;
|
||||
labels.insert(entry_point, "entry_point".to_string());
|
||||
|
||||
// Add import thunks as labels
|
||||
for (addr, name) in import_map {
|
||||
labels.insert(*addr, format!("__imp_{}", name.replace("::", "_")));
|
||||
}
|
||||
|
||||
// First pass: collect branch targets + cross-references from code sections
|
||||
let mut xrefs: XrefMap = HashMap::new();
|
||||
|
||||
for section in sections {
|
||||
if !section.is_code() { continue; }
|
||||
let va_start = section.virtual_address;
|
||||
let va_end = va_start + section.virtual_size;
|
||||
let file_start = section.virtual_address as usize;
|
||||
|
||||
let mut addr = va_start;
|
||||
while addr < va_end {
|
||||
let abs_addr = image_base + addr;
|
||||
let off = (addr - va_start) as usize + file_start;
|
||||
if off + 4 > pe.len() { break; }
|
||||
let instr = u32::from_be_bytes([
|
||||
pe[off], pe[off+1], pe[off+2], pe[off+3]
|
||||
]);
|
||||
|
||||
collect_branch_target(instr, abs_addr, &mut labels, &mut xrefs);
|
||||
addr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: resolve data references via lis+load/store pattern matching
|
||||
let mut data_annotations: HashMap<u32, (u32, XrefKind)> = HashMap::new();
|
||||
|
||||
// Build set of valid data address ranges for filtering false positives
|
||||
let data_ranges: Vec<(u32, u32)> = sections.iter()
|
||||
.map(|s| (image_base + s.virtual_address,
|
||||
image_base + s.virtual_address + s.virtual_size))
|
||||
.collect();
|
||||
|
||||
for section in sections {
|
||||
if !section.is_code() { continue; }
|
||||
let va_start = section.virtual_address;
|
||||
let va_end = va_start + section.virtual_size;
|
||||
let file_start = section.virtual_address as usize;
|
||||
|
||||
// Register state: track lis results. reg_hi[r] = Some(high_16_bits << 16)
|
||||
let mut reg_hi: [Option<u32>; 32] = [None; 32];
|
||||
|
||||
let mut addr = va_start;
|
||||
while addr < va_end {
|
||||
let abs_addr = image_base + addr;
|
||||
let off = (addr - va_start) as usize + file_start;
|
||||
if off + 4 > pe.len() { break; }
|
||||
let instr = u32::from_be_bytes([
|
||||
pe[off], pe[off+1], pe[off+2], pe[off+3]
|
||||
]);
|
||||
|
||||
let opcode = (instr >> 26) & 0x3F;
|
||||
let rd = ((instr >> 21) & 0x1F) as usize;
|
||||
let ra = ((instr >> 16) & 0x1F) as usize;
|
||||
let simm = ((instr & 0xFFFF) as i16) as i32;
|
||||
let uimm = (instr & 0xFFFF) as u32;
|
||||
|
||||
// Reset tracking on function boundaries (prologue = mfspr rN, LR)
|
||||
if opcode == 31 {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
if xo == 339 { // mfspr
|
||||
let spr = (((instr >> 16) & 0x1F) << 5) | ((instr >> 11) & 0x1F);
|
||||
if spr == 8 { // LR
|
||||
reg_hi = [None; 32];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match opcode {
|
||||
// lis rD, IMM (encoded as addis rD, r0, IMM)
|
||||
15 if ra == 0 => {
|
||||
reg_hi[rd] = Some(uimm << 16);
|
||||
}
|
||||
// addis rD, rA, IMM (rA != 0) — if rA has known lis, update
|
||||
15 if ra != 0 => {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
reg_hi[rd] = Some(base.wrapping_add(uimm << 16));
|
||||
} else {
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
}
|
||||
// addi rD, rA, IMM — compute full address if rA has known lis
|
||||
14 if ra != 0 => {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRef));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRef });
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
reg_hi[rd] = Some(data_addr); // propagate for chained access
|
||||
} else {
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
}
|
||||
// ori rA, rS, UIMM — compute full address
|
||||
24 => {
|
||||
let rs = rd; // source is bits 21-25 for ori
|
||||
if let Some(base) = reg_hi[rs] {
|
||||
let data_addr = base | uimm;
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRef));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRef });
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
reg_hi[ra] = Some(data_addr);
|
||||
} else {
|
||||
reg_hi[ra] = None;
|
||||
}
|
||||
}
|
||||
// Load instructions: lwz, lbz, lhz, lha, lfs, lfd, lwzu, etc.
|
||||
32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 46 | 48 | 49 | 50 | 51 => {
|
||||
if ra != 0 {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRead));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRead });
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Load into rD may clobber the tracked value
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
// Store instructions: stw, stb, sth, stfs, stfd, stwu, etc.
|
||||
36 | 37 | 38 | 39 | 44 | 45 | 47 | 52 | 53 | 54 | 55 => {
|
||||
if ra != 0 {
|
||||
if let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataWrite));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataWrite });
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Any other instruction writing to rD: invalidate
|
||||
_ => {
|
||||
// Conservatively invalidate for instructions that modify rD
|
||||
// (most ALU ops, loads, etc.)
|
||||
if opcode != 18 && opcode != 16 && opcode != 17 { // skip branch/sc
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
addr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
XrefResult { labels, xrefs, data_annotations }
|
||||
}
|
||||
|
||||
fn collect_branch_target(instr: u32, addr: u32, labels: &mut HashMap<u32, String>, xrefs: &mut XrefMap) {
|
||||
let op = (instr >> 26) & 0x3F;
|
||||
match op {
|
||||
18 => {
|
||||
// I-form: b/bl/ba/bla
|
||||
let li = sign_ext26(instr & 0x03FFFFFC);
|
||||
let aa = instr & 2 != 0;
|
||||
let lk = instr & 1 != 0;
|
||||
let target = if aa { li as u32 } else { addr.wrapping_add(li as u32) };
|
||||
labels.entry(target).or_insert_with(|| format!("loc_{target:08X}"));
|
||||
let kind = if lk { XrefKind::Call } else { XrefKind::Jump };
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind });
|
||||
}
|
||||
16 => {
|
||||
// B-form: bc/bcl
|
||||
let bd = sign_ext16(instr & 0xFFFC);
|
||||
let aa = instr & 2 != 0;
|
||||
let target = if aa { bd as u32 } else { addr.wrapping_add(bd as u32) };
|
||||
labels.entry(target).or_insert_with(|| format!("loc_{target:08X}"));
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind: XrefKind::Branch });
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn sign_ext16(val: u32) -> i32 {
|
||||
((val << 16) as i32) >> 16
|
||||
}
|
||||
|
||||
fn sign_ext26(val: u32) -> i32 {
|
||||
((val << 6) as i32) >> 6
|
||||
}
|
||||
|
||||
fn is_in_ranges(addr: u32, ranges: &[(u32, u32)]) -> bool {
|
||||
ranges.iter().any(|&(start, end)| addr >= start && addr < end)
|
||||
}
|
||||
|
||||
/// Find which section a data address falls in.
|
||||
pub fn section_for_addr<'a>(addr: u32, sections: &'a [PeSection], image_base: u32) -> Option<&'a str> {
|
||||
for s in sections {
|
||||
let start = image_base + s.virtual_address;
|
||||
let end = start + s.virtual_size;
|
||||
if addr >= start && addr < end {
|
||||
return Some(&s.name);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Resolve a source address to "function_name+0xNN" or just "0xADDR".
|
||||
pub fn resolve_source_label(
|
||||
addr: u32,
|
||||
func_analysis: &FuncAnalysis,
|
||||
labels: &HashMap<u32, String>,
|
||||
) -> String {
|
||||
// Direct label hit?
|
||||
if let Some(lbl) = labels.get(&addr) {
|
||||
return lbl.clone();
|
||||
}
|
||||
|
||||
// Find the containing function (largest start <= addr)
|
||||
if let Some((&func_start, _fi)) = func_analysis.functions.range(..=addr).next_back() {
|
||||
if let Some(func_label) = labels.get(&func_start) {
|
||||
let offset = addr - func_start;
|
||||
return format!("{func_label}+0x{offset:X}");
|
||||
}
|
||||
}
|
||||
|
||||
format!("0x{addr:08X}")
|
||||
}
|
||||
Reference in New Issue
Block a user