feat: add PE image decompression and extraction pipeline (M5)

Implement full decrypt + decompress pipeline for XEX2 PE extraction:
- decompress.rs: None, Basic (zero-fill), and Normal (LZX) decompression
- extract.rs: orchestrates decryption then decompression
- Wire up CLI extract command to write PE files
- LZX decompression via lzxd crate with per-frame chunk processing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-03-28 21:48:23 +01:00
parent ac24488444
commit c665868b1b
7 changed files with 376 additions and 13 deletions

187
src/decompress.rs Normal file
View File

@@ -0,0 +1,187 @@
/// Decompression routines for XEX2 PE image payloads.
///
/// Supports three compression modes:
/// - None: raw data copy
/// - Basic: block-based data copy with zero-fill gaps
/// - Normal: de-blocking + LZX frame-by-frame decompression
use crate::error::{Result, Xex2Error};
use crate::optional::{BasicCompressionBlock, CompressedBlockInfo};
use crate::util::{read_u16_be, read_u32_be};
/// Returns the payload data as-is (no compression).
pub fn decompress_none(data: &[u8]) -> Vec<u8> {
data.to_vec()
}
/// Decompresses basic (zero-fill) compressed data.
///
/// Each block specifies `data_size` bytes to copy from the source, followed by
/// `zero_size` bytes of zeros.
pub fn decompress_basic(data: &[u8], blocks: &[BasicCompressionBlock]) -> Result<Vec<u8>> {
let total_size: u64 = blocks
.iter()
.map(|b| b.data_size as u64 + b.zero_size as u64)
.sum();
let mut output = Vec::with_capacity(total_size as usize);
let mut src_offset = 0usize;
for block in blocks {
let ds = block.data_size as usize;
let zs = block.zero_size as usize;
if src_offset + ds > data.len() {
return Err(Xex2Error::DecompressionFailed(format!(
"basic block reads past end of data: offset {src_offset} + size {ds} > {}",
data.len()
)));
}
output.extend_from_slice(&data[src_offset..src_offset + ds]);
output.resize(output.len() + zs, 0);
src_offset += ds;
}
Ok(output)
}
/// Decompresses normal (LZX) compressed data.
///
/// Walks the chained block structure, extracting compressed LZX frames, then
/// decompresses each frame using the lzxd crate. Each 2-byte chunk_size within
/// a block corresponds to one LZX frame of up to 32KB uncompressed output.
pub fn decompress_normal(
data: &[u8],
window_size: u32,
first_block: &CompressedBlockInfo,
image_size: u32,
) -> Result<Vec<u8>> {
let ws = match window_size {
0x8000 => lzxd::WindowSize::KB32,
0x10000 => lzxd::WindowSize::KB64,
0x20000 => lzxd::WindowSize::KB128,
0x40000 => lzxd::WindowSize::KB256,
other => {
return Err(Xex2Error::DecompressionFailed(format!(
"unsupported LZX window size: 0x{other:X}"
)));
}
};
let mut decoder = lzxd::Lzxd::new(ws);
let mut output = Vec::with_capacity(image_size as usize);
let mut remaining = image_size as usize;
let mut source_offset = 0usize;
let mut current_block = first_block.clone();
while current_block.block_size != 0 && remaining > 0 {
if source_offset + current_block.block_size as usize > data.len() {
return Err(Xex2Error::DecompressionFailed(format!(
"block at offset {source_offset} extends past data (block_size={}, data_len={})",
current_block.block_size,
data.len()
)));
}
let block_end = source_offset + current_block.block_size as usize;
// Read next block info from start of this block's data (24 bytes)
let next_block_size = read_u32_be(data, source_offset)?;
let mut next_block_hash = [0u8; 20];
next_block_hash.copy_from_slice(&data[source_offset + 4..source_offset + 24]);
// Skip past the 24-byte block header
let mut chunk_offset = source_offset + 24;
// Process each compressed chunk (= one LZX frame)
while chunk_offset < block_end && remaining > 0 {
let chunk_size = read_u16_be(data, chunk_offset)? as usize;
chunk_offset += 2;
if chunk_size == 0 {
break;
}
if chunk_offset + chunk_size > block_end {
return Err(Xex2Error::DecompressionFailed(format!(
"chunk at offset {chunk_offset} extends past block end {block_end}"
)));
}
// Each chunk decompresses to up to 32KB (MAX_CHUNK_SIZE)
let frame_output_size = remaining.min(lzxd::MAX_CHUNK_SIZE);
let compressed_chunk = &data[chunk_offset..chunk_offset + chunk_size];
let decompressed = decoder
.decompress_next(compressed_chunk, frame_output_size)
.map_err(|e| Xex2Error::DecompressionFailed(format!("LZX error: {e}")))?;
output.extend_from_slice(decompressed);
remaining -= decompressed.len();
chunk_offset += chunk_size;
}
// Advance to next block
source_offset = block_end;
current_block = CompressedBlockInfo {
block_size: next_block_size,
block_hash: next_block_hash,
};
}
Ok(output)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_decompress_none() {
let data = vec![1, 2, 3, 4, 5];
let result = decompress_none(&data);
assert_eq!(result, data);
}
#[test]
fn test_decompress_basic_simple() {
let data = vec![0xAA, 0xBB, 0xCC, 0xDD];
let blocks = vec![
BasicCompressionBlock {
data_size: 2,
zero_size: 3,
},
BasicCompressionBlock {
data_size: 2,
zero_size: 1,
},
];
let result = decompress_basic(&data, &blocks).unwrap();
assert_eq!(result, vec![0xAA, 0xBB, 0, 0, 0, 0xCC, 0xDD, 0]);
}
#[test]
fn test_decompress_basic_empty() {
let result = decompress_basic(&[], &[]).unwrap();
assert!(result.is_empty());
}
#[test]
fn test_decompress_basic_zero_only() {
let blocks = vec![BasicCompressionBlock {
data_size: 0,
zero_size: 10,
}];
let result = decompress_basic(&[], &blocks).unwrap();
assert_eq!(result, vec![0u8; 10]);
}
#[test]
fn test_decompress_basic_overflow() {
let data = vec![0xAA];
let blocks = vec![BasicCompressionBlock {
data_size: 100,
zero_size: 0,
}];
assert!(decompress_basic(&data, &blocks).is_err());
}
}

68
src/extract.rs Normal file
View File

@@ -0,0 +1,68 @@
/// PE image extraction pipeline: decrypt → decompress → raw PE bytes.
use crate::crypto;
use crate::decompress;
use crate::error::{Result, Xex2Error};
use crate::optional::{CompressionInfo, CompressionType, EncryptionType};
use crate::Xex2File;
/// Extracts the PE image from a parsed XEX2 file.
///
/// Reads the encrypted/compressed payload from `data` (the full XEX2 file),
/// decrypts it if needed, decompresses it based on the file format info, and
/// returns the raw PE image bytes.
pub fn extract_pe_image(data: &[u8], xex: &Xex2File) -> Result<Vec<u8>> {
let fmt = xex
.optional_headers
.file_format_info
.as_ref()
.ok_or_else(|| {
Xex2Error::DecompressionFailed("missing FILE_FORMAT_INFO header".into())
})?;
let payload_offset = xex.header.header_size as usize;
if payload_offset > data.len() {
return Err(Xex2Error::DecompressionFailed(format!(
"header_size (0x{:X}) exceeds file size (0x{:X})",
payload_offset,
data.len()
)));
}
// Copy payload so we can decrypt in-place
let mut payload = data[payload_offset..].to_vec();
// Step 1: Decrypt if needed
if fmt.encryption_type == EncryptionType::Normal {
let session_key = crypto::derive_session_key(&xex.security_info.aes_key);
crypto::decrypt_in_place(&session_key, &mut payload);
}
// Step 2: Decompress based on compression type
let pe_image = match &fmt.compression_info {
CompressionInfo::None => decompress::decompress_none(&payload),
CompressionInfo::Basic { blocks } => decompress::decompress_basic(&payload, blocks)?,
CompressionInfo::Normal {
window_size,
first_block,
} => decompress::decompress_normal(
&payload,
*window_size,
first_block,
xex.security_info.image_size,
)?,
CompressionInfo::Delta => {
return Err(Xex2Error::DecompressionFailed(
"delta compression is not supported".into(),
));
}
};
// Sanity check: for unencrypted files, treat Unknown compression type through Delta path
if fmt.compression_type == CompressionType::Delta {
return Err(Xex2Error::DecompressionFailed(
"delta compression is not supported".into(),
));
}
Ok(pe_image)
}

View File

@@ -7,8 +7,10 @@
//! structured information from XEX2 files.
pub mod crypto;
pub mod decompress;
pub mod display;
pub mod error;
pub mod extract;
pub mod header;
pub mod optional;
pub mod security;

View File

@@ -45,15 +45,28 @@ fn cmd_inspect(path: &PathBuf) {
}
fn cmd_extract(path: &PathBuf, output: Option<PathBuf>) {
let _output_path = output.unwrap_or_else(|| path.with_extension("exe"));
let output_path = output.unwrap_or_else(|| path.with_extension("exe"));
let data = read_file(path);
let _xex = parse_xex(&data);
let xex = parse_xex(&data);
// TODO(M5): decrypt + decompress pipeline
// TODO(M6): verify PE and write to output_path
eprintln!("Error: extraction not yet implemented (coming in M5/M6)");
process::exit(1);
let pe_image = match xex2tractor::extract::extract_pe_image(&data, &xex) {
Ok(img) => img,
Err(e) => {
eprintln!("Error extracting PE image: {e}");
process::exit(1);
}
};
// TODO(M6): verify PE headers before writing
if let Err(e) = std::fs::write(&output_path, &pe_image) {
eprintln!("Error writing {}: {e}", output_path.display());
process::exit(1);
}
println!("Extracted PE image to {}", output_path.display());
println!(" Input: {} ({} bytes)", path.display(), data.len());
println!(" Output: {} ({} bytes)", output_path.display(), pe_image.len());
}
fn read_file(path: &PathBuf) -> Vec<u8> {