diff --git a/crates/xenia-analysis/SCHEMA.md b/crates/xenia-analysis/SCHEMA.md new file mode 100644 index 0000000..65e707c --- /dev/null +++ b/crates/xenia-analysis/SCHEMA.md @@ -0,0 +1,106 @@ +# `xenia-analysis` schema reference + +Authoritative documentation for the DuckDB tables and SQL views produced by +`xenia-rs dis --db sylpheed.db`. Track schema changes here alongside any +update to the `db_schema_golden` test fixture. + +The base + disasm tables (`metadata`, `sections`, `imports`, `functions`, +`labels`, `instructions`, `xrefs`, opt-in `exec_trace` / `import_calls` / +`branch_trace`) are documented inline in `src/db.rs` doc comment. This file +collects layered analysis additions and forward-work notes. + +--- + +## Layer M1 — `.pdata` boundary correction (landed) + +### Schema additions +- `functions.pdata_validated BOOLEAN NOT NULL` — `true` when the row's + `address` matches a `RUNTIME_FUNCTION.BeginAddress` from `.pdata`. Linker + ground truth. +- `functions.pdata_length BIGINT NULL` — `function_length` (bytes) from the + matching pdata entry; `NULL` when the row is prologue-only. +- New table `pdata_entries(begin_address BIGINT PRIMARY KEY, end_address + BIGINT, function_length BIGINT, prolog_length BIGINT, flags BIGINT)` — every + parsed `.pdata` `RUNTIME_FUNCTION` entry (raw, before any merge with + prologue analysis). +- Index `idx_functions_pdata_validated` on `functions(pdata_validated)`. + +### What this layer does +- Parses `.pdata` 8-byte `RUNTIME_FUNCTION` entries (PowerPC PE32 layout): + word 0 `BeginAddress` (absolute VA), word 1 packed + `{prolog_length:8, function_length:22, flags:2}`, both big-endian. +- Unions pdata `BeginAddress` values into the function-candidate set fed to + the prologue walker, so functions our prologue heuristic missed still get + rows. +- When pdata supplies a longer `function_length` than the prologue walk + found, extends `end_address` to the pdata-implied end (catches mis-split + where the walker stopped at an early `blr`). +- After the walker, performs a forward pass that trims `function.end` to the + next start when they overlap (catches mis-merge where one row spanned two + prologues — the audit-031 `sub_824D23B0` / `sub_824D29F0` case). + +### What this layer does NOT do +- Does not adjust prolog-derived `frame_size` / `saved_gprs` from `.pdata`'s + `prolog_length` field — those remain prologue-only inferences. +- Does not classify functions further than the existing `is_leaf` / + `is_saverestore` columns. Class membership is M3. +- Does not detect functions whose entries are missing from BOTH `.pdata` + and the bl-target scan (extremely rare; would require executable-byte + linear sweep). + +### Reference docs +- Microsoft PE32+ exception data spec for PowerPC RUNTIME_FUNCTION. +- xenia-canary `src/xenia/cpu/xex_module.cc:1570-1587` — canary's reference + parser (extracts `BeginAddress` only; we additionally decode word 1). + +### Validation queries +```sql +-- All pdata entries found +SELECT COUNT(*) FROM pdata_entries; -- ~23073 for Sylpheed +-- Functions cross-validated against pdata +SELECT COUNT(*) FROM functions WHERE pdata_validated; +-- Functions detected ONLY by prologue (orphans of pdata) +SELECT COUNT(*) FROM functions WHERE NOT pdata_validated; +-- Pdata orphans NOT yet in functions (should be 0 after this layer) +SELECT COUNT(*) FROM pdata_entries p +LEFT JOIN functions f ON f.address = p.begin_address +WHERE f.address IS NULL; +-- Audit-031 mis-merge resolved: 0x824D29F0 should have its own row +SELECT name FROM functions WHERE address = 2186674160; -- 0x824D29F0 +``` + +--- + +## Layer M2 — MSVC C++ name demangler (planned) + +Adds `demangled_names(address, mangled, namespace_path, class_name, +method_name, params_signature, raw_demangled)`. Populates from any label / +import / RTTI string starting with `?`. Falls back to `raw_demangled = mangled` +when the parser cannot decode (e.g. exotic templates). See +`crates/xenia-analysis/src/demangle.rs` (when landed). + +## Layer M3 — Vtable + RTTI detection (planned) + +Adds `vtables`, `methods`, `classes` tables. Heuristic vtable scan over +`.rdata` + `.data`, optional MSVC RTTI `CompleteObjectLocator → TypeDescriptor` +walk, anonymous-class fallback when RTTI is stripped. See +`crates/xenia-analysis/src/vtables.rs` (when landed). + +## Layer M4 — Class-aware probe targeting (planned) + +CLI extension only — no schema changes. `--pc-probe=Class::method` and +`--pc-probe-class=ClassName` resolve via M3's tables. See +`crates/xenia-analysis/src/lookup.rs` (when landed). + +--- + +## Forward work (M5–M12, not yet landed) + +- **M5** — indirect-dispatch reachability via vtable+CTR dataflow. +- **M6** — extended `xrefs.kind='write'` for indexed/byte-reverse/multiword/VMX/DCBZ/atomic stores with `addr_mode` column. +- **M7** — `.rdata` ASCII / UTF-16 string pool detection cross-referenced with PCs. +- **M8** — dispatch-table heuristics beyond vtables (e.g. function-pointer arrays in `.data`). +- **M9** — `__CxxFrameHandler` exception scope-table parsing. +- **M10** — `.tls` section / TLS slot tracking. +- **M11** — `__xc_a` / `__xc_z` static-initializer driver detection. +- **M12** — comparative-PC-trace mode for canary diff (runtime side, not analyzer). diff --git a/crates/xenia-analysis/src/db.rs b/crates/xenia-analysis/src/db.rs index 4c1c6df..02fab6a 100644 --- a/crates/xenia-analysis/src/db.rs +++ b/crates/xenia-analysis/src/db.rs @@ -314,13 +314,23 @@ impl DbWriter { ) -> anyhow::Result<()> { self.conn.execute_batch(" CREATE TABLE functions ( - address BIGINT PRIMARY KEY, -- absolute VA of entry point - name VARCHAR NOT NULL, -- symbol name, or sub_XXXXXXXX if unresolved - end_address BIGINT NOT NULL, -- VA of last instruction + 4 (exclusive end) - frame_size BIGINT NOT NULL, -- stack frame size in bytes (from prologue) - saved_gprs BIGINT NOT NULL, -- bitmask of GPRs saved in prologue (bit N = rN) - is_leaf BOOLEAN NOT NULL, -- true if the function has no outgoing calls - is_saverestore BOOLEAN NOT NULL -- true if __savegprlr_* / __restgprlr_* stub + address BIGINT PRIMARY KEY, -- absolute VA of entry point + name VARCHAR NOT NULL, -- symbol name, or sub_XXXXXXXX if unresolved + end_address BIGINT NOT NULL, -- VA of last instruction + 4 (exclusive end) + frame_size BIGINT NOT NULL, -- stack frame size in bytes (from prologue) + saved_gprs BIGINT NOT NULL, -- bitmask of GPRs saved in prologue (bit N = rN) + is_leaf BOOLEAN NOT NULL, -- true if the function has no outgoing calls + is_saverestore BOOLEAN NOT NULL, -- true if __savegprlr_* / __restgprlr_* stub + pdata_validated BOOLEAN NOT NULL, -- true if .pdata RUNTIME_FUNCTION exists at this VA + pdata_length BIGINT -- length in bytes per .pdata; NULL if no pdata entry + ); + + CREATE TABLE pdata_entries ( + begin_address BIGINT PRIMARY KEY, -- absolute VA of function start (RUNTIME_FUNCTION.BeginAddress) + end_address BIGINT NOT NULL, -- begin_address + function_length (exclusive) + function_length BIGINT NOT NULL, -- function size in bytes + prolog_length BIGINT NOT NULL, -- prolog size in bytes + flags BIGINT NOT NULL -- raw 2-bit flags (bit 1=32-bit-code, bit 0=exception) ); CREATE TABLE labels ( @@ -341,11 +351,13 @@ impl DbWriter { ")?; insert_functions(&self.conn, func_analysis, labels)?; + insert_pdata_entries(&self.conn, &func_analysis.pdata_entries)?; insert_labels(&self.conn, labels)?; insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?; let indices = [ ("idx_functions_name", "CREATE INDEX idx_functions_name ON functions(name)"), + ("idx_functions_pdata_validated", "CREATE INDEX idx_functions_pdata_validated ON functions(pdata_validated)"), ("idx_labels_kind", "CREATE INDEX idx_labels_kind ON labels(kind)"), ("idx_labels_name", "CREATE INDEX idx_labels_name ON labels(name)"), ("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"), @@ -680,8 +692,10 @@ fn insert_functions( labels: &HashMap, ) -> anyhow::Result<()> { let mut stmt = conn.prepare( - "INSERT INTO functions (address, name, end_address, frame_size, saved_gprs, is_leaf, is_saverestore) - VALUES (?, ?, ?, ?, ?, ?, ?)" + "INSERT INTO functions + (address, name, end_address, frame_size, saved_gprs, is_leaf, is_saverestore, + pdata_validated, pdata_length) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)" )?; for (&addr, fi) in &func_analysis.functions { let name = labels.get(&addr) @@ -695,6 +709,33 @@ fn insert_functions( fi.saved_gprs as i64, fi.is_leaf, fi.is_saverestore, + fi.pdata_validated, + fi.pdata_length.map(|n| n as i64), + ])?; + } + Ok(()) +} + +fn insert_pdata_entries( + conn: &Connection, + entries: &[xenia_xex::pdata::PdataEntry], +) -> anyhow::Result<()> { + if entries.is_empty() { + return Ok(()); + } + let mut stmt = conn.prepare( + "INSERT INTO pdata_entries + (begin_address, end_address, function_length, prolog_length, flags) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT DO NOTHING" + )?; + for e in entries { + stmt.execute(params![ + e.begin_address as i64, + e.end_address() as i64, + e.function_length as i64, + e.prolog_length as i64, + e.flags as i64, ])?; } Ok(()) diff --git a/crates/xenia-analysis/src/func.rs b/crates/xenia-analysis/src/func.rs index 16a07d0..0790007 100644 --- a/crates/xenia-analysis/src/func.rs +++ b/crates/xenia-analysis/src/func.rs @@ -32,6 +32,13 @@ pub struct FuncInfo { pub is_leaf: bool, /// True if this is a save/restore GPR helper stub. pub is_saverestore: bool, + /// True if `.pdata` has a RUNTIME_FUNCTION whose `BeginAddress` matches `start`. + /// Authoritative ground truth from the linker; rows without this flag are + /// prologue-detected only and may carry boundary errors. + pub pdata_validated: bool, + /// Function size in bytes per `.pdata`'s `function_length` field, if known. + /// Absent (None) when this row is prologue-only. + pub pdata_length: Option, } /// Result of the function analysis pass. @@ -42,6 +49,9 @@ pub struct FuncAnalysis { pub save_gpr_base: Option, /// Addresses in the restore-GPR region (start of __restgprlr block). pub restore_gpr_base: Option, + /// Raw `.pdata` entries from the binary, in original order. Empty when no + /// `.pdata` was supplied. Mirrored into the DB as `pdata_entries`. + pub pdata_entries: Vec, } // ── Instruction field helpers ────────────────────────────────────────────── @@ -190,6 +200,29 @@ pub fn analyze( image_base: u32, entry_point: u32, code_sections: &[(u32, u32, u32)], // (va_start, va_size, flags) +) -> FuncAnalysis { + analyze_with_pdata(pe, image_base, entry_point, code_sections, &[]) +} + +/// Same as [`analyze`] but also unions `.pdata` `RUNTIME_FUNCTION` entries +/// into the candidate set. Each surviving function carries `pdata_validated` +/// when its start matches a pdata `BeginAddress`, and `pdata_length` when +/// the linker-supplied length disagrees with the prologue walk. +/// +/// Pdata entries that have no prologue match (orphans) are still emitted, +/// using the linker-supplied length to bound the function. +/// +/// What this layer does NOT do: +/// - Does not edit the `prolog_length` we'd derive from prologue analysis; +/// `frame_size` and `saved_gprs` remain best-effort prologue inferences. +/// - Does not infer base/derived call edges — that's M3+M5. +#[tracing::instrument(skip_all, fields(image_base = format_args!("{:#010x}", image_base), entry_point = format_args!("{:#010x}", entry_point), pdata_entries = pdata.len()))] +pub fn analyze_with_pdata( + pe: &[u8], + image_base: u32, + entry_point: u32, + code_sections: &[(u32, u32, u32)], + pdata: &[xenia_xex::pdata::PdataEntry], ) -> FuncAnalysis { let started = std::time::Instant::now(); let code_ranges: Vec<(u32, u32)> = code_sections.iter() @@ -216,7 +249,8 @@ pub fn analyze( for i in 0..21 { saverestore_addrs.insert(rb + i * 4); } } - // 2. Collect all bl targets as candidate function entries + // 2. Collect all bl targets as candidate function entries. + // Union: bl targets ∪ pdata BeginAddresses ∪ entry_point. let mut call_targets: HashSet = HashSet::new(); call_targets.insert(entry_point); @@ -233,14 +267,58 @@ pub fn analyze( addr += 4; } } - tracing::debug!(candidates = call_targets.len(), "bl targets collected"); - // 3. For each candidate, detect prologue and walk to epilogue + // Index pdata by begin_address for O(1) prologue → length lookup. + let pdata_by_begin: HashMap = + pdata.iter().map(|e| (e.begin_address, e)).collect(); + for e in pdata { + if !saverestore_addrs.contains(&e.begin_address) { + call_targets.insert(e.begin_address); + } + } + tracing::debug!( + candidates = call_targets.len(), + pdata_entries = pdata.len(), + "function candidates (bl ∪ pdata)" + ); + + // 3. For each candidate, detect prologue and walk to epilogue. Pdata + // metadata is layered on after the prologue walk so a missing prologue + // still yields an entry when pdata covers it. let mut functions: BTreeMap = BTreeMap::new(); for &func_addr in &call_targets { - if let Some(fi) = analyze_function(pe, image_base, func_addr, &code_ranges, save_base, restore_base) { + let pdata_entry = pdata_by_begin.get(&func_addr).copied(); + + if let Some(mut fi) = analyze_function( + pe, image_base, func_addr, &code_ranges, save_base, restore_base, + ) { + if let Some(p) = pdata_entry { + fi.pdata_validated = true; + fi.pdata_length = Some(p.function_length); + // If the prologue walk ended too early, trust pdata's length. + let pdata_end = p.begin_address.wrapping_add(p.function_length); + if pdata_end > fi.end { + fi.end = pdata_end; + } + } functions.insert(func_addr, fi); + } else if let Some(p) = pdata_entry { + // Orphan: pdata claims a function here but no prologue matched. + // Emit a synthetic entry so the row exists for downstream queries. + functions.insert( + func_addr, + FuncInfo { + start: func_addr, + end: p.begin_address.wrapping_add(p.function_length), + frame_size: 0, + saved_gprs: 0, + is_leaf: false, + is_saverestore: false, + pdata_validated: true, + pdata_length: Some(p.function_length), + }, + ); } } @@ -255,6 +333,8 @@ pub fn analyze( saved_gprs: 18, is_leaf: true, is_saverestore: true, + pdata_validated: pdata_by_begin.contains_key(&sb), + pdata_length: pdata_by_begin.get(&sb).map(|p| p.function_length), }); } if let Some(rb) = restore_base { @@ -265,13 +345,33 @@ pub fn analyze( saved_gprs: 18, is_leaf: true, is_saverestore: true, + pdata_validated: pdata_by_begin.contains_key(&rb), + pdata_length: pdata_by_begin.get(&rb).map(|p| p.function_length), }); } + // 5. Fix up `end_address` collisions: if function A's `end` overlaps + // function B's `start` (B > A), trim A. This catches mis-merged + // prologue walks where pdata revealed an interleaved second prologue. + // We do this in a single forward pass. + let starts: Vec = functions.keys().copied().collect(); + for i in 0..starts.len().saturating_sub(1) { + let cur = starts[i]; + let next = starts[i + 1]; + if let Some(fi) = functions.get_mut(&cur) + && fi.end > next + { + fi.end = next; + } + } + let elapsed_ms = started.elapsed().as_millis() as f64; metrics::histogram!("analysis.phase_ms", "phase" => "functions").record(elapsed_ms); + let pdata_validated_count = functions.values().filter(|f| f.pdata_validated).count(); tracing::info!( functions = functions.len(), + pdata_entries = pdata.len(), + pdata_validated = pdata_validated_count, elapsed_ms, "function detection complete" ); @@ -280,6 +380,7 @@ pub fn analyze( functions, save_gpr_base: save_base, restore_gpr_base: restore_base, + pdata_entries: pdata.to_vec(), } } @@ -395,6 +496,8 @@ fn analyze_function( saved_gprs, is_leaf, is_saverestore: false, + pdata_validated: false, + pdata_length: None, }) } diff --git a/crates/xenia-analysis/tests/db_schema_golden.rs b/crates/xenia-analysis/tests/db_schema_golden.rs index 6bb2b45..821a548 100644 --- a/crates/xenia-analysis/tests/db_schema_golden.rs +++ b/crates/xenia-analysis/tests/db_schema_golden.rs @@ -65,12 +65,15 @@ fn synthetic_func_analysis(image_base: u32) -> FuncAnalysis { saved_gprs: 0, is_leaf: true, is_saverestore: false, + pdata_validated: false, + pdata_length: None, }, ); FuncAnalysis { functions, save_gpr_base: None, restore_gpr_base: None, + pdata_entries: Vec::new(), } } @@ -154,6 +157,15 @@ fn db_schema_matches_expected_columns() { ("saved_gprs", "BIGINT"), ("is_leaf", "BOOLEAN"), ("is_saverestore", "BOOLEAN"), + ("pdata_validated", "BOOLEAN"), + ("pdata_length", "BIGINT"), + ]), + ("pdata_entries", &[ + ("begin_address", "BIGINT"), + ("end_address", "BIGINT"), + ("function_length", "BIGINT"), + ("prolog_length", "BIGINT"), + ("flags", "BIGINT"), ]), ("labels", &[ ("address", "BIGINT"), diff --git a/crates/xenia-app/src/main.rs b/crates/xenia-app/src/main.rs index 50cecbb..8e6ef0e 100644 --- a/crates/xenia-app/src/main.rs +++ b/crates/xenia-app/src/main.rs @@ -4024,13 +4024,21 @@ fn cmd_dis( } info!(thunks = import_map.len(), "resolved import thunks"); - // Function analysis + // Function analysis (with .pdata-validated boundaries when present) let code_sections: Vec<(u32, u32, u32)> = sections.iter() .filter(|s| s.is_code()) .map(|s| (s.virtual_address, s.virtual_size, s.flags)) .collect(); - let func_analysis = xenia_analysis::func::analyze(&pe_image, base, entry, &code_sections); - info!(functions = func_analysis.functions.len(), "function detection complete"); + let pdata_entries = xenia_xex::pdata::parse_pdata(&pe_image, base, §ions); + info!(pdata_entries = pdata_entries.len(), "parsed .pdata RUNTIME_FUNCTION entries"); + let func_analysis = xenia_analysis::func::analyze_with_pdata( + &pe_image, base, entry, &code_sections, &pdata_entries, + ); + info!( + functions = func_analysis.functions.len(), + pdata_validated = func_analysis.functions.values().filter(|f| f.pdata_validated).count(), + "function detection complete", + ); // Cross-reference analysis let xref_result = xenia_analysis::xref::analyze_xrefs( diff --git a/crates/xenia-xex/src/lib.rs b/crates/xenia-xex/src/lib.rs index 755a618..c6490f9 100644 --- a/crates/xenia-xex/src/lib.rs +++ b/crates/xenia-xex/src/lib.rs @@ -2,5 +2,6 @@ pub mod header; pub mod loader; pub mod lzx; pub mod pe; +pub mod pdata; pub use header::Xex2Header; diff --git a/crates/xenia-xex/src/pdata.rs b/crates/xenia-xex/src/pdata.rs new file mode 100644 index 0000000..8b78466 --- /dev/null +++ b/crates/xenia-xex/src/pdata.rs @@ -0,0 +1,216 @@ +//! PE32 `.pdata` exception data parser for PowerPC Xbox 360 binaries. +//! +//! Each `RUNTIME_FUNCTION` entry is 8 bytes, big-endian on disk: +//! ```text +//! word 0: BeginAddress (absolute VA, not RVA — Xbox 360 convention) +//! word 1: packed metadata (read as a single big-endian u32; MSVC +//! bit-field layout packs LSB-first): +//! bits 0.. 7 (low 8) : prolog_length (instruction count, dwords) +//! bits 8..29 (mid 22): function_length (instruction count, dwords) +//! bit 30 : 32-bit code flag (always 1 on PPC) +//! bit 31 : exception-handler-present flag +//! ``` +//! +//! Reference: Microsoft PE32+ exception data spec (PowerPC RUNTIME_FUNCTION); +//! xenia-canary `src/xenia/cpu/xex_module.cc:1570-1587` (canary only reads +//! `BeginAddress`; the metadata layout above is the authoritative spec). +//! +//! `BeginAddress = 0` terminates the table early in some images (canary breaks +//! on this; we mirror). + +use crate::pe::PeSection; + +/// One parsed `RUNTIME_FUNCTION` entry. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PdataEntry { + /// Absolute VA of the function's first instruction. + pub begin_address: u32, + /// Function size in bytes (function_length_dwords * 4). + pub function_length: u32, + /// Prolog size in bytes (prolog_length_dwords * 4). + pub prolog_length: u32, + /// Raw 2-bit flags from the packed word (bit 1 = 32-bit-code, bit 0 = exception). + pub flags: u8, +} + +impl PdataEntry { + /// One-past-the-last instruction (exclusive). + pub fn end_address(&self) -> u32 { + self.begin_address.wrapping_add(self.function_length) + } +} + +/// Parse the `.pdata` section out of a decompressed PE image. +/// +/// `pe` is the full image buffer (image_base-relative); `image_base` and the +/// `.pdata` section descriptor come from `xenia_xex::pe::parse_sections`. +/// Returns an empty vec if no `.pdata` section is present or it falls outside +/// the buffer — never an error (the caller already validated the section list). +pub fn parse_pdata(pe: &[u8], image_base: u32, sections: &[PeSection]) -> Vec { + let pdata = match sections.iter().find(|s| s.name == ".pdata") { + Some(s) => s, + None => return Vec::new(), + }; + + let off = pdata.virtual_address as usize; + let len = pdata.virtual_size as usize; + if off.saturating_add(len) > pe.len() { + return Vec::new(); + } + + // Each entry is 8 bytes; truncate any partial trailing entry. + let n_entries = len / 8; + let mut out = Vec::with_capacity(n_entries); + + for i in 0..n_entries { + let p = off + i * 8; + let begin = u32::from_be_bytes([pe[p], pe[p + 1], pe[p + 2], pe[p + 3]]); + let meta = u32::from_be_bytes([pe[p + 4], pe[p + 5], pe[p + 6], pe[p + 7]]); + + // Sentinel: BeginAddress=0 marks early termination (canary `xex_module.cc:1583`). + if begin == 0 { + break; + } + + let prolog_dwords = meta & 0xFF; + let function_dwords = (meta >> 8) & 0x003F_FFFF; + let flags = ((meta >> 30) & 0x3) as u8; + + out.push(PdataEntry { + begin_address: begin, + function_length: function_dwords * 4, + prolog_length: prolog_dwords * 4, + flags, + }); + } + + // Sanity: drop any entry whose begin_address falls outside the image bounds. + // Image high water = image_base + the largest virtual_address+virtual_size. + let high = sections + .iter() + .map(|s| image_base.wrapping_add(s.virtual_address).wrapping_add(s.virtual_size)) + .max() + .unwrap_or(u32::MAX); + out.retain(|e| e.begin_address >= image_base && e.begin_address < high); + + out +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::pe::PeSection; + + fn mk_pe(image_base: u32, text_va: u32, text_size: u32, pdata: &[(u32, u32)]) -> (Vec, Vec) { + // Build a synthetic PE image with .text and .pdata. + // Layout: pdata at RVA 0x1000, .text at RVA 0x2000. + let pdata_rva = 0x1000u32; + let pdata_size = (pdata.len() * 8) as u32; + let total = (text_va + text_size).max(pdata_rva + pdata_size) as usize; + let mut buf = vec![0u8; total]; + + for (i, &(begin, packed)) in pdata.iter().enumerate() { + let p = pdata_rva as usize + i * 8; + buf[p..p + 4].copy_from_slice(&begin.to_be_bytes()); + buf[p + 4..p + 8].copy_from_slice(&packed.to_be_bytes()); + } + + let sections = vec![ + PeSection { + name: ".pdata".into(), + virtual_address: pdata_rva, + virtual_size: pdata_size, + raw_offset: pdata_rva, + raw_size: pdata_size, + flags: 0x4000_0040, // INITIALIZED_DATA | READ + }, + PeSection { + name: ".text".into(), + virtual_address: text_va, + virtual_size: text_size, + raw_offset: text_va, + raw_size: text_size, + flags: 0x6000_0020, // CODE | EXECUTE | READ + }, + ]; + let _ = image_base; // image_base only matters for high-water bound + (buf, sections) + } + + /// Pack metadata in the on-disk layout: prolog in low 8 bits, function + /// in next 22, flags in top 2. + fn pack(prolog_dwords: u32, function_dwords: u32, flags: u32) -> u32 { + ((flags & 0x3) << 30) | ((function_dwords & 0x3F_FFFF) << 8) | (prolog_dwords & 0xFF) + } + + #[test] + fn parses_simple_pdata() { + // function at 0x82001000, 32 bytes long (8 dwords), 8-dword prolog (32 bytes). + let packed = pack(8, 8, 0b01); // 32-bit-code flag set + let (pe, sections) = mk_pe(0x8200_0000, 0x2000, 0x100, &[(0x8200_1000, packed)]); + let entries = parse_pdata(&pe, 0x8200_0000, §ions); + + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].begin_address, 0x8200_1000); + assert_eq!(entries[0].prolog_length, 32); + assert_eq!(entries[0].function_length, 32); + assert_eq!(entries[0].flags, 0b01); + assert_eq!(entries[0].end_address(), 0x8200_1020); + } + + #[test] + fn stops_on_zero_sentinel() { + let packed = pack(4, 4, 0b01); + let entries = vec![ + (0x8200_1000, packed), + (0u32, 0u32), // sentinel + (0x8200_2000, packed), + ]; + let (pe, sections) = mk_pe(0x8200_0000, 0x2000, 0x4000, &entries); + let parsed = parse_pdata(&pe, 0x8200_0000, §ions); + assert_eq!(parsed.len(), 1); + assert_eq!(parsed[0].begin_address, 0x8200_1000); + } + + #[test] + fn drops_out_of_range_entries() { + let packed = pack(4, 4, 0b01); + let entries = vec![ + (0x8200_1000, packed), + (0x4000_0000, packed), // outside image — drop + ]; + let (pe, sections) = mk_pe(0x8200_0000, 0x2000, 0x100, &entries); + let parsed = parse_pdata(&pe, 0x8200_0000, §ions); + assert_eq!(parsed.len(), 1); + } + + #[test] + fn decodes_real_world_layout() { + // Mimics a real-world entry: function_length 306 dwords (1224 bytes), + // 0 prolog dwords, 32-bit-code flag set. Verify the bit-packed value + // round-trips correctly through parse_pdata. + let packed = pack(0, 306, 0b01); + let begin = 0x8200_2000u32; // inside the synthetic .text region + let (pe, sections) = mk_pe(0x8200_0000, 0x2000, 0x1000, &[(begin, packed)]); + let entries = parse_pdata(&pe, 0x8200_0000, §ions); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].function_length, 306 * 4); + assert_eq!(entries[0].prolog_length, 0); + assert_eq!(entries[0].flags, 0b01); + assert_eq!(entries[0].end_address(), begin + 1224); + } + + #[test] + fn returns_empty_when_no_pdata_section() { + let sections = vec![PeSection { + name: ".text".into(), + virtual_address: 0x1000, + virtual_size: 0x100, + raw_offset: 0x1000, + raw_size: 0x100, + flags: 0x6000_0020, + }]; + let pe = vec![0u8; 0x2000]; + assert!(parse_pdata(&pe, 0x8200_0000, §ions).is_empty()); + } +}