#!/usr/bin/env python3 """Phase C — first byte-diff between two engines' loaded XEX images. Inputs: --canary PATH canary's image.bin (loaded XEX bytes) --ours PATH ours's image.bin --pe PATH third-party reference: extracted .pe (xex-extract output) --xex-json PATH xex.json metadata (used for section names AND import-slot canonicalization — record_type=0 slots are 4 bytes, record_type=1 thunks are 16 bytes) --image-base HEX guest VA base of image (default 0x82000000) Two passes: 1) Raw byte-diff. Reports first diff between canary and ours. 2) Canonicalized byte-diff. Masks XEX import slots (legitimate engine-specific runtime patches per tripstone #2) and reports the first remaining diff — that's the candidate REAL divergence. If canonical pass shows no remaining diffs, the engines load the XEX into byte-identical state and the original sha256 mismatch is fully explained by import patches. """ import argparse import json import struct import sys PE_SLOT_RT0 = 4 # variable-import slot is 4 bytes (one BE u32) PE_SLOT_RT1 = 16 # thunk slot is 16 bytes (lis+li+mtctr+bctr or shim) def collect_import_ranges(xex_meta: dict) -> list: """Return list of (start_va, end_va) covering every XEX import slot.""" ranges = [] for lib in xex_meta["import_libraries"]: for imp in lib["imports"]: addr = imp["address"] rt = imp["record_type"] if rt == 0: ranges.append((addr, addr + PE_SLOT_RT0)) elif rt == 1: ranges.append((addr, addr + PE_SLOT_RT1)) return ranges def merge_ranges(ranges: list) -> list: if not ranges: return [] ranges = sorted(ranges) merged = [list(ranges[0])] for s, e in ranges[1:]: if s <= merged[-1][1]: merged[-1][1] = max(merged[-1][1], e) else: merged.append([s, e]) return [tuple(p) for p in merged] def canonicalize(image: bytes, ranges_va: list, image_base: int) -> bytes: """Return a copy of image with import-slot byte ranges replaced by 0xCD. 0xCD is the Win32 'uninitialized stack' marker — a sentinel that's extremely unlikely to occur naturally so any leakage is visible. """ buf = bytearray(image) for sva, eva in ranges_va: s = sva - image_base e = eva - image_base if s < 0 or e > len(buf): continue for i in range(s, e): buf[i] = 0xCD return bytes(buf) def find_first_diff(a: bytes, b: bytes) -> int: n = min(len(a), len(b)) block = 1 << 16 for off in range(0, n, block): end = min(off + block, n) if a[off:end] != b[off:end]: for i in range(off, end): if a[i] != b[i]: return i if len(a) != len(b): return n return -1 def find_diff_runs(a: bytes, b: bytes, max_runs: int = 16) -> list: n = min(len(a), len(b)) runs = [] i = 0 while i < n and len(runs) < max_runs: if a[i] != b[i]: j = i while j < n and a[j] != b[j]: j += 1 runs.append((i, j)) i = j else: i += 1 return runs def classify_offset(off: int, sections: list) -> str: for s in sections: vstart = s["virtual_address"] vend = vstart + s["virtual_size"] if vstart <= off < vend: return f'{s["name"]} (off=+{off - vstart:#x} into section)' if sections and off < sections[0]["virtual_address"]: return f'PE header (before first section va=0x{sections[0]["virtual_address"]:x})' return f'unmapped (past last section)' def hex_context(buf: bytes, off: int, radius: int = 16) -> str: lo = max(0, off - radius) hi = min(len(buf), off + radius + 1) return " ".join(f"{b:02x}" for b in buf[lo:hi]) def sha256_hex(data: bytes) -> str: import hashlib return hashlib.sha256(data).hexdigest() def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--canary", required=True) ap.add_argument("--ours", required=True) ap.add_argument("--pe", required=True) ap.add_argument("--xex-json", required=True) ap.add_argument("--image-base", default="0x82000000") ap.add_argument("--out", help="optional report path") args = ap.parse_args() image_base = int(args.image_base, 16) canary = open(args.canary, "rb").read() ours = open(args.ours, "rb").read() pe = open(args.pe, "rb").read() meta = json.load(open(args.xex_json)) sections_sorted = sorted(meta["sections"], key=lambda s: s["virtual_address"]) import_ranges_va = merge_ranges(collect_import_ranges(meta)) report = [] p = report.append p("# Phase C — first byte-diff report") p("") p(f"- canary image.bin: {args.canary} ({len(canary)} bytes)") p(f"- ours image.bin: {args.ours} ({len(ours)} bytes)") p(f"- pe reference: {args.pe} ({len(pe)} bytes)") p(f"- image_base: {args.image_base}") p(f"- import-slot ranges (merged): {len(import_ranges_va)}, " f"bytes={sum(e - s for s, e in import_ranges_va)}") p("") p("## Raw byte hashes") p(f"- canary sha256: `{sha256_hex(canary)}`") p(f"- ours sha256: `{sha256_hex(ours)}`") p(f"- pe sha256: `{sha256_hex(pe)}`") p("") # ---- Pass 1: raw diff ---- p("## Pass 1 — raw byte-diff (uncanonicalized)") p("") first = find_first_diff(canary, ours) if first == -1: p("- canary == ours ✅ (no raw diff)") else: va = image_base + first p(f"- first byte-diff at off=0x{first:08x} VA=0x{va:08x}") p(f"- classification: {classify_offset(first, sections_sorted)}") p(f"- canary byte: 0x{canary[first]:02x}") p(f"- ours byte: 0x{ours[first]:02x}") if first < len(pe): p(f"- pe ref byte: 0x{pe[first]:02x}") p("") p(f"context canary: `{hex_context(canary, first)}`") p(f"context ours : `{hex_context(ours, first)}`") p(f"context pe : `{hex_context(pe, first)}`") p("") # ---- Pass 2: canonicalized diff ---- can_canon = canonicalize(canary, import_ranges_va, image_base) ours_canon = canonicalize(ours, import_ranges_va, image_base) pe_canon = canonicalize(pe, import_ranges_va, image_base) p("## Pass 2 — canonicalized (import slots masked to 0xCD)") p("") p(f"- canary canonical sha256: `{sha256_hex(can_canon)}`") p(f"- ours canonical sha256: `{sha256_hex(ours_canon)}`") p(f"- pe canonical sha256: `{sha256_hex(pe_canon)}`") p("") first_canon = find_first_diff(can_canon, ours_canon) if first_canon == -1: p("- **canary == ours canonical match ✅**") p("- the image_loaded_sha256 mismatch is **fully explained** by " "legitimate engine-specific import-thunk patches.") p("- NO real engine divergence at this layer.") else: va = image_base + first_canon p(f"- first canonical byte-diff at off=0x{first_canon:08x} VA=0x{va:08x}") p(f"- classification: {classify_offset(first_canon, sections_sorted)}") p(f"- canary byte: 0x{can_canon[first_canon]:02x}") p(f"- ours byte: 0x{ours_canon[first_canon]:02x}") if first_canon < len(pe_canon): pb = pe_canon[first_canon] p(f"- pe ref byte: 0x{pb:02x}") cmw = can_canon[first_canon] == pb omw = ours_canon[first_canon] == pb if cmw and not omw: p("- verdict: **ours is wrong** at this byte (canary == .pe)") elif omw and not cmw: p("- verdict: **canary is wrong** at this byte (ours == .pe)") else: p("- verdict: neither matches .pe — possible relocation patch or .pe stale") p("") # Cross-check vs .pe p("## Pass 3 — engine vs .pe ground truth (canonicalized)") p("") first_c_vs_pe = find_first_diff(can_canon, pe_canon) first_o_vs_pe = find_first_diff(ours_canon, pe_canon) if first_c_vs_pe == -1: p("- canary canonical == pe canonical ✅") else: p(f"- canary != pe first at off=0x{first_c_vs_pe:08x} VA=0x{image_base + first_c_vs_pe:08x} " f"({classify_offset(first_c_vs_pe, sections_sorted)})") if first_o_vs_pe == -1: p("- ours canonical == pe canonical ✅") else: p(f"- ours != pe first at off=0x{first_o_vs_pe:08x} VA=0x{image_base + first_o_vs_pe:08x} " f"({classify_offset(first_o_vs_pe, sections_sorted)})") p("") # Summary raw_diff_count = sum(1 for i in range(min(len(canary), len(ours))) if canary[i] != ours[i]) canon_diff_count = sum( 1 for i in range(min(len(can_canon), len(ours_canon))) if can_canon[i] != ours_canon[i] ) p("## Summary") p("") p(f"- bytes differing raw: {raw_diff_count}") p(f"- bytes differing canonical: {canon_diff_count}") p(f"- import-slot mask bytes: " f"{sum(e - s for s, e in import_ranges_va)}") text = "\n".join(report) if args.out: open(args.out, "w").write(text + "\n") print(text) return 0 if (first_canon == -1) else 1 if __name__ == "__main__": sys.exit(main())