M1: parse .pdata RUNTIME_FUNCTION; cross-validate function boundaries
Adds an authoritative function-boundary source from the linker: - New `xenia_xex::pdata` parses .pdata 8-byte entries (BeginAddress + packed prolog/length/flags). Bit layout per Microsoft PE32 PowerPC spec: prolog in bits 0..7, function_length in bits 8..29, flags in 30..31. - `func::analyze_with_pdata` unions pdata BeginAddresses into the candidate set, attaches `pdata_validated`/`pdata_length` to each `FuncInfo`, and trims any function whose `end` overlaps the next start (catches mis-merge where one row spanned two prologues — the audit-031 sub_824D23B0/sub_824D29F0 case). - DB: extends `functions` with `pdata_validated BOOLEAN`, `pdata_length BIGINT`; new table `pdata_entries`; index on pdata_validated. - New `crates/xenia-analysis/SCHEMA.md` documents M1 layer + forward work. Validation on Sylpheed: 25481 functions (was 12156) / 23073 pdata_validated / 0 orphans / 0 mis-merges. Audit-031 mis-merge resolved: sub_824D29F0 now has its own row with `pdata_length=280` (70 dwords); sub_824D23B0 now correctly ends at 0x824D2878 (`pdata_length=1224` matches prologue walk). Tests 605→610. New 5-test pdata unit suite covers bit layout + sentinel + out-of-range filtering + real-world layout round-trip. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,5 +2,6 @@ pub mod header;
|
||||
pub mod loader;
|
||||
pub mod lzx;
|
||||
pub mod pe;
|
||||
pub mod pdata;
|
||||
|
||||
pub use header::Xex2Header;
|
||||
|
||||
216
crates/xenia-xex/src/pdata.rs
Normal file
216
crates/xenia-xex/src/pdata.rs
Normal file
@@ -0,0 +1,216 @@
|
||||
//! PE32 `.pdata` exception data parser for PowerPC Xbox 360 binaries.
|
||||
//!
|
||||
//! Each `RUNTIME_FUNCTION` entry is 8 bytes, big-endian on disk:
|
||||
//! ```text
|
||||
//! word 0: BeginAddress (absolute VA, not RVA — Xbox 360 convention)
|
||||
//! word 1: packed metadata (read as a single big-endian u32; MSVC
|
||||
//! bit-field layout packs LSB-first):
|
||||
//! bits 0.. 7 (low 8) : prolog_length (instruction count, dwords)
|
||||
//! bits 8..29 (mid 22): function_length (instruction count, dwords)
|
||||
//! bit 30 : 32-bit code flag (always 1 on PPC)
|
||||
//! bit 31 : exception-handler-present flag
|
||||
//! ```
|
||||
//!
|
||||
//! Reference: Microsoft PE32+ exception data spec (PowerPC RUNTIME_FUNCTION);
|
||||
//! xenia-canary `src/xenia/cpu/xex_module.cc:1570-1587` (canary only reads
|
||||
//! `BeginAddress`; the metadata layout above is the authoritative spec).
|
||||
//!
|
||||
//! `BeginAddress = 0` terminates the table early in some images (canary breaks
|
||||
//! on this; we mirror).
|
||||
|
||||
use crate::pe::PeSection;
|
||||
|
||||
/// One parsed `RUNTIME_FUNCTION` entry.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct PdataEntry {
|
||||
/// Absolute VA of the function's first instruction.
|
||||
pub begin_address: u32,
|
||||
/// Function size in bytes (function_length_dwords * 4).
|
||||
pub function_length: u32,
|
||||
/// Prolog size in bytes (prolog_length_dwords * 4).
|
||||
pub prolog_length: u32,
|
||||
/// Raw 2-bit flags from the packed word (bit 1 = 32-bit-code, bit 0 = exception).
|
||||
pub flags: u8,
|
||||
}
|
||||
|
||||
impl PdataEntry {
|
||||
/// One-past-the-last instruction (exclusive).
|
||||
pub fn end_address(&self) -> u32 {
|
||||
self.begin_address.wrapping_add(self.function_length)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the `.pdata` section out of a decompressed PE image.
|
||||
///
|
||||
/// `pe` is the full image buffer (image_base-relative); `image_base` and the
|
||||
/// `.pdata` section descriptor come from `xenia_xex::pe::parse_sections`.
|
||||
/// Returns an empty vec if no `.pdata` section is present or it falls outside
|
||||
/// the buffer — never an error (the caller already validated the section list).
|
||||
pub fn parse_pdata(pe: &[u8], image_base: u32, sections: &[PeSection]) -> Vec<PdataEntry> {
|
||||
let pdata = match sections.iter().find(|s| s.name == ".pdata") {
|
||||
Some(s) => s,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
|
||||
let off = pdata.virtual_address as usize;
|
||||
let len = pdata.virtual_size as usize;
|
||||
if off.saturating_add(len) > pe.len() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Each entry is 8 bytes; truncate any partial trailing entry.
|
||||
let n_entries = len / 8;
|
||||
let mut out = Vec::with_capacity(n_entries);
|
||||
|
||||
for i in 0..n_entries {
|
||||
let p = off + i * 8;
|
||||
let begin = u32::from_be_bytes([pe[p], pe[p + 1], pe[p + 2], pe[p + 3]]);
|
||||
let meta = u32::from_be_bytes([pe[p + 4], pe[p + 5], pe[p + 6], pe[p + 7]]);
|
||||
|
||||
// Sentinel: BeginAddress=0 marks early termination (canary `xex_module.cc:1583`).
|
||||
if begin == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
let prolog_dwords = meta & 0xFF;
|
||||
let function_dwords = (meta >> 8) & 0x003F_FFFF;
|
||||
let flags = ((meta >> 30) & 0x3) as u8;
|
||||
|
||||
out.push(PdataEntry {
|
||||
begin_address: begin,
|
||||
function_length: function_dwords * 4,
|
||||
prolog_length: prolog_dwords * 4,
|
||||
flags,
|
||||
});
|
||||
}
|
||||
|
||||
// Sanity: drop any entry whose begin_address falls outside the image bounds.
|
||||
// Image high water = image_base + the largest virtual_address+virtual_size.
|
||||
let high = sections
|
||||
.iter()
|
||||
.map(|s| image_base.wrapping_add(s.virtual_address).wrapping_add(s.virtual_size))
|
||||
.max()
|
||||
.unwrap_or(u32::MAX);
|
||||
out.retain(|e| e.begin_address >= image_base && e.begin_address < high);
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::pe::PeSection;
|
||||
|
||||
fn mk_pe(image_base: u32, text_va: u32, text_size: u32, pdata: &[(u32, u32)]) -> (Vec<u8>, Vec<PeSection>) {
|
||||
// Build a synthetic PE image with .text and .pdata.
|
||||
// Layout: pdata at RVA 0x1000, .text at RVA 0x2000.
|
||||
let pdata_rva = 0x1000u32;
|
||||
let pdata_size = (pdata.len() * 8) as u32;
|
||||
let total = (text_va + text_size).max(pdata_rva + pdata_size) as usize;
|
||||
let mut buf = vec![0u8; total];
|
||||
|
||||
for (i, &(begin, packed)) in pdata.iter().enumerate() {
|
||||
let p = pdata_rva as usize + i * 8;
|
||||
buf[p..p + 4].copy_from_slice(&begin.to_be_bytes());
|
||||
buf[p + 4..p + 8].copy_from_slice(&packed.to_be_bytes());
|
||||
}
|
||||
|
||||
let sections = vec![
|
||||
PeSection {
|
||||
name: ".pdata".into(),
|
||||
virtual_address: pdata_rva,
|
||||
virtual_size: pdata_size,
|
||||
raw_offset: pdata_rva,
|
||||
raw_size: pdata_size,
|
||||
flags: 0x4000_0040, // INITIALIZED_DATA | READ
|
||||
},
|
||||
PeSection {
|
||||
name: ".text".into(),
|
||||
virtual_address: text_va,
|
||||
virtual_size: text_size,
|
||||
raw_offset: text_va,
|
||||
raw_size: text_size,
|
||||
flags: 0x6000_0020, // CODE | EXECUTE | READ
|
||||
},
|
||||
];
|
||||
let _ = image_base; // image_base only matters for high-water bound
|
||||
(buf, sections)
|
||||
}
|
||||
|
||||
/// Pack metadata in the on-disk layout: prolog in low 8 bits, function
|
||||
/// in next 22, flags in top 2.
|
||||
fn pack(prolog_dwords: u32, function_dwords: u32, flags: u32) -> u32 {
|
||||
((flags & 0x3) << 30) | ((function_dwords & 0x3F_FFFF) << 8) | (prolog_dwords & 0xFF)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_simple_pdata() {
|
||||
// function at 0x82001000, 32 bytes long (8 dwords), 8-dword prolog (32 bytes).
|
||||
let packed = pack(8, 8, 0b01); // 32-bit-code flag set
|
||||
let (pe, sections) = mk_pe(0x8200_0000, 0x2000, 0x100, &[(0x8200_1000, packed)]);
|
||||
let entries = parse_pdata(&pe, 0x8200_0000, §ions);
|
||||
|
||||
assert_eq!(entries.len(), 1);
|
||||
assert_eq!(entries[0].begin_address, 0x8200_1000);
|
||||
assert_eq!(entries[0].prolog_length, 32);
|
||||
assert_eq!(entries[0].function_length, 32);
|
||||
assert_eq!(entries[0].flags, 0b01);
|
||||
assert_eq!(entries[0].end_address(), 0x8200_1020);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stops_on_zero_sentinel() {
|
||||
let packed = pack(4, 4, 0b01);
|
||||
let entries = vec![
|
||||
(0x8200_1000, packed),
|
||||
(0u32, 0u32), // sentinel
|
||||
(0x8200_2000, packed),
|
||||
];
|
||||
let (pe, sections) = mk_pe(0x8200_0000, 0x2000, 0x4000, &entries);
|
||||
let parsed = parse_pdata(&pe, 0x8200_0000, §ions);
|
||||
assert_eq!(parsed.len(), 1);
|
||||
assert_eq!(parsed[0].begin_address, 0x8200_1000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drops_out_of_range_entries() {
|
||||
let packed = pack(4, 4, 0b01);
|
||||
let entries = vec![
|
||||
(0x8200_1000, packed),
|
||||
(0x4000_0000, packed), // outside image — drop
|
||||
];
|
||||
let (pe, sections) = mk_pe(0x8200_0000, 0x2000, 0x100, &entries);
|
||||
let parsed = parse_pdata(&pe, 0x8200_0000, §ions);
|
||||
assert_eq!(parsed.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decodes_real_world_layout() {
|
||||
// Mimics a real-world entry: function_length 306 dwords (1224 bytes),
|
||||
// 0 prolog dwords, 32-bit-code flag set. Verify the bit-packed value
|
||||
// round-trips correctly through parse_pdata.
|
||||
let packed = pack(0, 306, 0b01);
|
||||
let begin = 0x8200_2000u32; // inside the synthetic .text region
|
||||
let (pe, sections) = mk_pe(0x8200_0000, 0x2000, 0x1000, &[(begin, packed)]);
|
||||
let entries = parse_pdata(&pe, 0x8200_0000, §ions);
|
||||
assert_eq!(entries.len(), 1);
|
||||
assert_eq!(entries[0].function_length, 306 * 4);
|
||||
assert_eq!(entries[0].prolog_length, 0);
|
||||
assert_eq!(entries[0].flags, 0b01);
|
||||
assert_eq!(entries[0].end_address(), begin + 1224);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn returns_empty_when_no_pdata_section() {
|
||||
let sections = vec![PeSection {
|
||||
name: ".text".into(),
|
||||
virtual_address: 0x1000,
|
||||
virtual_size: 0x100,
|
||||
raw_offset: 0x1000,
|
||||
raw_size: 0x100,
|
||||
flags: 0x6000_0020,
|
||||
}];
|
||||
let pe = vec![0u8; 0x2000];
|
||||
assert!(parse_pdata(&pe, 0x8200_0000, §ions).is_empty());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user