From c665868b1b18f10f40a192e0e9a730a78d1c27e0 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sat, 28 Mar 2026 21:48:23 +0100 Subject: [PATCH] feat: add PE image decompression and extraction pipeline (M5) Implement full decrypt + decompress pipeline for XEX2 PE extraction: - decompress.rs: None, Basic (zero-fill), and Normal (LZX) decompression - extract.rs: orchestrates decryption then decompression - Wire up CLI extract command to write PE files - LZX decompression via lzxd crate with per-frame chunk processing Co-Authored-By: Claude Opus 4.6 --- Cargo.lock | 9 ++- Cargo.toml | 3 +- src/decompress.rs | 187 +++++++++++++++++++++++++++++++++++++++++++ src/extract.rs | 68 ++++++++++++++++ src/lib.rs | 2 + src/main.rs | 25 ++++-- tests/integration.rs | 95 ++++++++++++++++++++-- 7 files changed, 376 insertions(+), 13 deletions(-) create mode 100644 src/decompress.rs create mode 100644 src/extract.rs diff --git a/Cargo.lock b/Cargo.lock index 2d35860..8ff55cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -200,6 +200,12 @@ version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +[[package]] +name = "lzxd" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b29dffab797218e12e4df08ef5d15ab9efca2504038b1b32b9b32fc844b39c9" + [[package]] name = "once_cell_polyfill" version = "1.70.2" @@ -282,9 +288,10 @@ dependencies = [ [[package]] name = "xex2tractor" -version = "0.4.0" +version = "0.5.0" dependencies = [ "aes", "cbc", "clap", + "lzxd", ] diff --git a/Cargo.toml b/Cargo.toml index a5961cf..ef00f96 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xex2tractor" -version = "0.4.0" +version = "0.5.0" edition = "2024" description = "A tool for extracting and inspecting Xbox 360 XEX2 executable files" license = "MIT" @@ -9,3 +9,4 @@ license = "MIT" aes = "0.8.4" cbc = "0.1.2" clap = { version = "4.6.0", features = ["derive"] } +lzxd = "0.2.6" diff --git a/src/decompress.rs b/src/decompress.rs new file mode 100644 index 0000000..534131e --- /dev/null +++ b/src/decompress.rs @@ -0,0 +1,187 @@ +/// Decompression routines for XEX2 PE image payloads. +/// +/// Supports three compression modes: +/// - None: raw data copy +/// - Basic: block-based data copy with zero-fill gaps +/// - Normal: de-blocking + LZX frame-by-frame decompression +use crate::error::{Result, Xex2Error}; +use crate::optional::{BasicCompressionBlock, CompressedBlockInfo}; +use crate::util::{read_u16_be, read_u32_be}; + +/// Returns the payload data as-is (no compression). +pub fn decompress_none(data: &[u8]) -> Vec { + data.to_vec() +} + +/// Decompresses basic (zero-fill) compressed data. +/// +/// Each block specifies `data_size` bytes to copy from the source, followed by +/// `zero_size` bytes of zeros. +pub fn decompress_basic(data: &[u8], blocks: &[BasicCompressionBlock]) -> Result> { + let total_size: u64 = blocks + .iter() + .map(|b| b.data_size as u64 + b.zero_size as u64) + .sum(); + let mut output = Vec::with_capacity(total_size as usize); + let mut src_offset = 0usize; + + for block in blocks { + let ds = block.data_size as usize; + let zs = block.zero_size as usize; + + if src_offset + ds > data.len() { + return Err(Xex2Error::DecompressionFailed(format!( + "basic block reads past end of data: offset {src_offset} + size {ds} > {}", + data.len() + ))); + } + + output.extend_from_slice(&data[src_offset..src_offset + ds]); + output.resize(output.len() + zs, 0); + src_offset += ds; + } + + Ok(output) +} + +/// Decompresses normal (LZX) compressed data. +/// +/// Walks the chained block structure, extracting compressed LZX frames, then +/// decompresses each frame using the lzxd crate. Each 2-byte chunk_size within +/// a block corresponds to one LZX frame of up to 32KB uncompressed output. +pub fn decompress_normal( + data: &[u8], + window_size: u32, + first_block: &CompressedBlockInfo, + image_size: u32, +) -> Result> { + let ws = match window_size { + 0x8000 => lzxd::WindowSize::KB32, + 0x10000 => lzxd::WindowSize::KB64, + 0x20000 => lzxd::WindowSize::KB128, + 0x40000 => lzxd::WindowSize::KB256, + other => { + return Err(Xex2Error::DecompressionFailed(format!( + "unsupported LZX window size: 0x{other:X}" + ))); + } + }; + + let mut decoder = lzxd::Lzxd::new(ws); + let mut output = Vec::with_capacity(image_size as usize); + let mut remaining = image_size as usize; + let mut source_offset = 0usize; + let mut current_block = first_block.clone(); + + while current_block.block_size != 0 && remaining > 0 { + if source_offset + current_block.block_size as usize > data.len() { + return Err(Xex2Error::DecompressionFailed(format!( + "block at offset {source_offset} extends past data (block_size={}, data_len={})", + current_block.block_size, + data.len() + ))); + } + + let block_end = source_offset + current_block.block_size as usize; + + // Read next block info from start of this block's data (24 bytes) + let next_block_size = read_u32_be(data, source_offset)?; + let mut next_block_hash = [0u8; 20]; + next_block_hash.copy_from_slice(&data[source_offset + 4..source_offset + 24]); + + // Skip past the 24-byte block header + let mut chunk_offset = source_offset + 24; + + // Process each compressed chunk (= one LZX frame) + while chunk_offset < block_end && remaining > 0 { + let chunk_size = read_u16_be(data, chunk_offset)? as usize; + chunk_offset += 2; + + if chunk_size == 0 { + break; + } + + if chunk_offset + chunk_size > block_end { + return Err(Xex2Error::DecompressionFailed(format!( + "chunk at offset {chunk_offset} extends past block end {block_end}" + ))); + } + + // Each chunk decompresses to up to 32KB (MAX_CHUNK_SIZE) + let frame_output_size = remaining.min(lzxd::MAX_CHUNK_SIZE); + let compressed_chunk = &data[chunk_offset..chunk_offset + chunk_size]; + + let decompressed = decoder + .decompress_next(compressed_chunk, frame_output_size) + .map_err(|e| Xex2Error::DecompressionFailed(format!("LZX error: {e}")))?; + + output.extend_from_slice(decompressed); + remaining -= decompressed.len(); + chunk_offset += chunk_size; + } + + // Advance to next block + source_offset = block_end; + current_block = CompressedBlockInfo { + block_size: next_block_size, + block_hash: next_block_hash, + }; + } + + Ok(output) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_decompress_none() { + let data = vec![1, 2, 3, 4, 5]; + let result = decompress_none(&data); + assert_eq!(result, data); + } + + #[test] + fn test_decompress_basic_simple() { + let data = vec![0xAA, 0xBB, 0xCC, 0xDD]; + let blocks = vec![ + BasicCompressionBlock { + data_size: 2, + zero_size: 3, + }, + BasicCompressionBlock { + data_size: 2, + zero_size: 1, + }, + ]; + let result = decompress_basic(&data, &blocks).unwrap(); + assert_eq!(result, vec![0xAA, 0xBB, 0, 0, 0, 0xCC, 0xDD, 0]); + } + + #[test] + fn test_decompress_basic_empty() { + let result = decompress_basic(&[], &[]).unwrap(); + assert!(result.is_empty()); + } + + #[test] + fn test_decompress_basic_zero_only() { + let blocks = vec![BasicCompressionBlock { + data_size: 0, + zero_size: 10, + }]; + let result = decompress_basic(&[], &blocks).unwrap(); + assert_eq!(result, vec![0u8; 10]); + } + + #[test] + fn test_decompress_basic_overflow() { + let data = vec![0xAA]; + let blocks = vec![BasicCompressionBlock { + data_size: 100, + zero_size: 0, + }]; + assert!(decompress_basic(&data, &blocks).is_err()); + } +} diff --git a/src/extract.rs b/src/extract.rs new file mode 100644 index 0000000..abc204f --- /dev/null +++ b/src/extract.rs @@ -0,0 +1,68 @@ +/// PE image extraction pipeline: decrypt → decompress → raw PE bytes. +use crate::crypto; +use crate::decompress; +use crate::error::{Result, Xex2Error}; +use crate::optional::{CompressionInfo, CompressionType, EncryptionType}; +use crate::Xex2File; + +/// Extracts the PE image from a parsed XEX2 file. +/// +/// Reads the encrypted/compressed payload from `data` (the full XEX2 file), +/// decrypts it if needed, decompresses it based on the file format info, and +/// returns the raw PE image bytes. +pub fn extract_pe_image(data: &[u8], xex: &Xex2File) -> Result> { + let fmt = xex + .optional_headers + .file_format_info + .as_ref() + .ok_or_else(|| { + Xex2Error::DecompressionFailed("missing FILE_FORMAT_INFO header".into()) + })?; + + let payload_offset = xex.header.header_size as usize; + if payload_offset > data.len() { + return Err(Xex2Error::DecompressionFailed(format!( + "header_size (0x{:X}) exceeds file size (0x{:X})", + payload_offset, + data.len() + ))); + } + + // Copy payload so we can decrypt in-place + let mut payload = data[payload_offset..].to_vec(); + + // Step 1: Decrypt if needed + if fmt.encryption_type == EncryptionType::Normal { + let session_key = crypto::derive_session_key(&xex.security_info.aes_key); + crypto::decrypt_in_place(&session_key, &mut payload); + } + + // Step 2: Decompress based on compression type + let pe_image = match &fmt.compression_info { + CompressionInfo::None => decompress::decompress_none(&payload), + CompressionInfo::Basic { blocks } => decompress::decompress_basic(&payload, blocks)?, + CompressionInfo::Normal { + window_size, + first_block, + } => decompress::decompress_normal( + &payload, + *window_size, + first_block, + xex.security_info.image_size, + )?, + CompressionInfo::Delta => { + return Err(Xex2Error::DecompressionFailed( + "delta compression is not supported".into(), + )); + } + }; + + // Sanity check: for unencrypted files, treat Unknown compression type through Delta path + if fmt.compression_type == CompressionType::Delta { + return Err(Xex2Error::DecompressionFailed( + "delta compression is not supported".into(), + )); + } + + Ok(pe_image) +} diff --git a/src/lib.rs b/src/lib.rs index b7cb39e..fffdc46 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,8 +7,10 @@ //! structured information from XEX2 files. pub mod crypto; +pub mod decompress; pub mod display; pub mod error; +pub mod extract; pub mod header; pub mod optional; pub mod security; diff --git a/src/main.rs b/src/main.rs index 8fb9f00..25a82d0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -45,15 +45,28 @@ fn cmd_inspect(path: &PathBuf) { } fn cmd_extract(path: &PathBuf, output: Option) { - let _output_path = output.unwrap_or_else(|| path.with_extension("exe")); + let output_path = output.unwrap_or_else(|| path.with_extension("exe")); let data = read_file(path); - let _xex = parse_xex(&data); + let xex = parse_xex(&data); - // TODO(M5): decrypt + decompress pipeline - // TODO(M6): verify PE and write to output_path - eprintln!("Error: extraction not yet implemented (coming in M5/M6)"); - process::exit(1); + let pe_image = match xex2tractor::extract::extract_pe_image(&data, &xex) { + Ok(img) => img, + Err(e) => { + eprintln!("Error extracting PE image: {e}"); + process::exit(1); + } + }; + + // TODO(M6): verify PE headers before writing + if let Err(e) = std::fs::write(&output_path, &pe_image) { + eprintln!("Error writing {}: {e}", output_path.display()); + process::exit(1); + } + + println!("Extracted PE image to {}", output_path.display()); + println!(" Input: {} ({} bytes)", path.display(), data.len()); + println!(" Output: {} ({} bytes)", output_path.display(), pe_image.len()); } fn read_file(path: &PathBuf) -> Vec { diff --git a/tests/integration.rs b/tests/integration.rs index 9728a5a..21e2838 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1,4 +1,5 @@ use xex2tractor::crypto; +use xex2tractor::extract; use xex2tractor::header::{ModuleFlags, XEX2_MAGIC}; use xex2tractor::optional::{CompressionInfo, CompressionType, EncryptionType, SystemFlags}; use xex2tractor::security::{ImageFlags, MediaFlags, RegionFlags}; @@ -311,16 +312,100 @@ fn test_cli_inspect_missing_file() { assert!(!output.status.success()); } +// ── Extraction tests ───────────────────────────────────────────────────────── + #[test] -fn test_cli_extract_not_yet_implemented() { +fn test_extract_pe_image() { + let data = sample_data(); + let xex = xex2tractor::parse(&data).unwrap(); + + let pe_image = extract::extract_pe_image(&data, &xex).unwrap(); + + // Output size should match security_info.image_size + assert_eq!(pe_image.len(), xex.security_info.image_size as usize); +} + +#[test] +fn test_extract_pe_starts_with_mz() { + let data = sample_data(); + let xex = xex2tractor::parse(&data).unwrap(); + + let pe_image = extract::extract_pe_image(&data, &xex).unwrap(); + + // PE image must start with MZ signature (0x4D5A) + assert_eq!(pe_image[0], 0x4D, "first byte should be 'M'"); + assert_eq!(pe_image[1], 0x5A, "second byte should be 'Z'"); +} + +#[test] +fn test_extract_pe_has_valid_pe_header() { + let data = sample_data(); + let xex = xex2tractor::parse(&data).unwrap(); + + let pe_image = extract::extract_pe_image(&data, &xex).unwrap(); + + // Read e_lfanew from DOS header (offset 0x3C, little-endian per PE spec) + let e_lfanew = u32::from_le_bytes([ + pe_image[0x3C], + pe_image[0x3D], + pe_image[0x3E], + pe_image[0x3F], + ]) as usize; + + // PE signature at e_lfanew: "PE\0\0" + assert_eq!(&pe_image[e_lfanew..e_lfanew + 4], b"PE\0\0"); + + // Machine type at e_lfanew + 4: 0x01F2 (IMAGE_FILE_MACHINE_POWERPCBE, little-endian) + let machine = u16::from_le_bytes([pe_image[e_lfanew + 4], pe_image[e_lfanew + 5]]); + assert_eq!(machine, 0x01F2, "machine should be POWERPCBE"); +} + +#[test] +fn test_cli_extract_writes_file() { let path = format!("{}/tests/data/default.xex", env!("CARGO_MANIFEST_DIR")); + let output_path = format!("{}/target/test_extract_output.exe", env!("CARGO_MANIFEST_DIR")); + + // Clean up any previous test output + let _ = std::fs::remove_file(&output_path); + + let output = std::process::Command::new(env!("CARGO_BIN_EXE_xex2tractor")) + .args(["extract", &path, &output_path]) + .output() + .expect("failed to run xex2tractor"); + + assert!(output.status.success(), "CLI extract should succeed"); + + // Verify output file exists and starts with MZ + let extracted = std::fs::read(&output_path).expect("should be able to read extracted file"); + assert!(extracted.len() > 2); + assert_eq!(extracted[0], 0x4D); // 'M' + assert_eq!(extracted[1], 0x5A); // 'Z' + + // Clean up + let _ = std::fs::remove_file(&output_path); +} + +#[test] +fn test_cli_extract_default_output_path() { + let path = format!("{}/tests/data/default.xex", env!("CARGO_MANIFEST_DIR")); + let expected_output = format!("{}/tests/data/default.exe", env!("CARGO_MANIFEST_DIR")); + + // Clean up any previous test output + let _ = std::fs::remove_file(&expected_output); + let output = std::process::Command::new(env!("CARGO_BIN_EXE_xex2tractor")) .args(["extract", &path]) .output() .expect("failed to run xex2tractor"); - // Extract should fail with "not yet implemented" for now - assert!(!output.status.success()); - let stderr = String::from_utf8_lossy(&output.stderr); - assert!(stderr.contains("not yet implemented")); + assert!(output.status.success(), "CLI extract should succeed"); + + // Verify default output path was used + assert!( + std::fs::metadata(&expected_output).is_ok(), + "default output file should exist" + ); + + // Clean up + let _ = std::fs::remove_file(&expected_output); }