Files
xenia-rs/audit-runs/phase-c-first-divergence/first-diff.py
MechaCat02 ef93a4fa14 handoff: VSync/event-wedge fixes + iterate 2.A–2.BC research notes
Source changes (dormant parity infra, retained from iterate 2.AI/2.AO):
- xenia-kernel/exports.rs: nt_create_event manual_reset polarity +
  related event wiring
- xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity

Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the
iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps
(.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as
regenerable local artifacts — see memory + HANDOFF for the running findings.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-05 07:19:08 +02:00

260 lines
9.1 KiB
Python

#!/usr/bin/env python3
"""Phase C — first byte-diff between two engines' loaded XEX images.
Inputs:
--canary PATH canary's image.bin (loaded XEX bytes)
--ours PATH ours's image.bin
--pe PATH third-party reference: extracted .pe (xex-extract output)
--xex-json PATH xex.json metadata (used for section names AND import-slot
canonicalization — record_type=0 slots are 4 bytes,
record_type=1 thunks are 16 bytes)
--image-base HEX guest VA base of image (default 0x82000000)
Two passes:
1) Raw byte-diff. Reports first diff between canary and ours.
2) Canonicalized byte-diff. Masks XEX import slots (legitimate
engine-specific runtime patches per tripstone #2) and reports the
first remaining diff — that's the candidate REAL divergence.
If canonical pass shows no remaining diffs, the engines load the XEX
into byte-identical state and the original sha256 mismatch is fully
explained by import patches.
"""
import argparse
import json
import struct
import sys
PE_SLOT_RT0 = 4 # variable-import slot is 4 bytes (one BE u32)
PE_SLOT_RT1 = 16 # thunk slot is 16 bytes (lis+li+mtctr+bctr or shim)
def collect_import_ranges(xex_meta: dict) -> list:
"""Return list of (start_va, end_va) covering every XEX import slot."""
ranges = []
for lib in xex_meta["import_libraries"]:
for imp in lib["imports"]:
addr = imp["address"]
rt = imp["record_type"]
if rt == 0:
ranges.append((addr, addr + PE_SLOT_RT0))
elif rt == 1:
ranges.append((addr, addr + PE_SLOT_RT1))
return ranges
def merge_ranges(ranges: list) -> list:
if not ranges:
return []
ranges = sorted(ranges)
merged = [list(ranges[0])]
for s, e in ranges[1:]:
if s <= merged[-1][1]:
merged[-1][1] = max(merged[-1][1], e)
else:
merged.append([s, e])
return [tuple(p) for p in merged]
def canonicalize(image: bytes, ranges_va: list, image_base: int) -> bytes:
"""Return a copy of image with import-slot byte ranges replaced by 0xCD.
0xCD is the Win32 'uninitialized stack' marker — a sentinel that's
extremely unlikely to occur naturally so any leakage is visible.
"""
buf = bytearray(image)
for sva, eva in ranges_va:
s = sva - image_base
e = eva - image_base
if s < 0 or e > len(buf):
continue
for i in range(s, e):
buf[i] = 0xCD
return bytes(buf)
def find_first_diff(a: bytes, b: bytes) -> int:
n = min(len(a), len(b))
block = 1 << 16
for off in range(0, n, block):
end = min(off + block, n)
if a[off:end] != b[off:end]:
for i in range(off, end):
if a[i] != b[i]:
return i
if len(a) != len(b):
return n
return -1
def find_diff_runs(a: bytes, b: bytes, max_runs: int = 16) -> list:
n = min(len(a), len(b))
runs = []
i = 0
while i < n and len(runs) < max_runs:
if a[i] != b[i]:
j = i
while j < n and a[j] != b[j]:
j += 1
runs.append((i, j))
i = j
else:
i += 1
return runs
def classify_offset(off: int, sections: list) -> str:
for s in sections:
vstart = s["virtual_address"]
vend = vstart + s["virtual_size"]
if vstart <= off < vend:
return f'{s["name"]} (off=+{off - vstart:#x} into section)'
if sections and off < sections[0]["virtual_address"]:
return f'PE header (before first section va=0x{sections[0]["virtual_address"]:x})'
return f'unmapped (past last section)'
def hex_context(buf: bytes, off: int, radius: int = 16) -> str:
lo = max(0, off - radius)
hi = min(len(buf), off + radius + 1)
return " ".join(f"{b:02x}" for b in buf[lo:hi])
def sha256_hex(data: bytes) -> str:
import hashlib
return hashlib.sha256(data).hexdigest()
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--canary", required=True)
ap.add_argument("--ours", required=True)
ap.add_argument("--pe", required=True)
ap.add_argument("--xex-json", required=True)
ap.add_argument("--image-base", default="0x82000000")
ap.add_argument("--out", help="optional report path")
args = ap.parse_args()
image_base = int(args.image_base, 16)
canary = open(args.canary, "rb").read()
ours = open(args.ours, "rb").read()
pe = open(args.pe, "rb").read()
meta = json.load(open(args.xex_json))
sections_sorted = sorted(meta["sections"], key=lambda s: s["virtual_address"])
import_ranges_va = merge_ranges(collect_import_ranges(meta))
report = []
p = report.append
p("# Phase C — first byte-diff report")
p("")
p(f"- canary image.bin: {args.canary} ({len(canary)} bytes)")
p(f"- ours image.bin: {args.ours} ({len(ours)} bytes)")
p(f"- pe reference: {args.pe} ({len(pe)} bytes)")
p(f"- image_base: {args.image_base}")
p(f"- import-slot ranges (merged): {len(import_ranges_va)}, "
f"bytes={sum(e - s for s, e in import_ranges_va)}")
p("")
p("## Raw byte hashes")
p(f"- canary sha256: `{sha256_hex(canary)}`")
p(f"- ours sha256: `{sha256_hex(ours)}`")
p(f"- pe sha256: `{sha256_hex(pe)}`")
p("")
# ---- Pass 1: raw diff ----
p("## Pass 1 — raw byte-diff (uncanonicalized)")
p("")
first = find_first_diff(canary, ours)
if first == -1:
p("- canary == ours ✅ (no raw diff)")
else:
va = image_base + first
p(f"- first byte-diff at off=0x{first:08x} VA=0x{va:08x}")
p(f"- classification: {classify_offset(first, sections_sorted)}")
p(f"- canary byte: 0x{canary[first]:02x}")
p(f"- ours byte: 0x{ours[first]:02x}")
if first < len(pe):
p(f"- pe ref byte: 0x{pe[first]:02x}")
p("")
p(f"context canary: `{hex_context(canary, first)}`")
p(f"context ours : `{hex_context(ours, first)}`")
p(f"context pe : `{hex_context(pe, first)}`")
p("")
# ---- Pass 2: canonicalized diff ----
can_canon = canonicalize(canary, import_ranges_va, image_base)
ours_canon = canonicalize(ours, import_ranges_va, image_base)
pe_canon = canonicalize(pe, import_ranges_va, image_base)
p("## Pass 2 — canonicalized (import slots masked to 0xCD)")
p("")
p(f"- canary canonical sha256: `{sha256_hex(can_canon)}`")
p(f"- ours canonical sha256: `{sha256_hex(ours_canon)}`")
p(f"- pe canonical sha256: `{sha256_hex(pe_canon)}`")
p("")
first_canon = find_first_diff(can_canon, ours_canon)
if first_canon == -1:
p("- **canary == ours canonical match ✅**")
p("- the image_loaded_sha256 mismatch is **fully explained** by "
"legitimate engine-specific import-thunk patches.")
p("- NO real engine divergence at this layer.")
else:
va = image_base + first_canon
p(f"- first canonical byte-diff at off=0x{first_canon:08x} VA=0x{va:08x}")
p(f"- classification: {classify_offset(first_canon, sections_sorted)}")
p(f"- canary byte: 0x{can_canon[first_canon]:02x}")
p(f"- ours byte: 0x{ours_canon[first_canon]:02x}")
if first_canon < len(pe_canon):
pb = pe_canon[first_canon]
p(f"- pe ref byte: 0x{pb:02x}")
cmw = can_canon[first_canon] == pb
omw = ours_canon[first_canon] == pb
if cmw and not omw:
p("- verdict: **ours is wrong** at this byte (canary == .pe)")
elif omw and not cmw:
p("- verdict: **canary is wrong** at this byte (ours == .pe)")
else:
p("- verdict: neither matches .pe — possible relocation patch or .pe stale")
p("")
# Cross-check vs .pe
p("## Pass 3 — engine vs .pe ground truth (canonicalized)")
p("")
first_c_vs_pe = find_first_diff(can_canon, pe_canon)
first_o_vs_pe = find_first_diff(ours_canon, pe_canon)
if first_c_vs_pe == -1:
p("- canary canonical == pe canonical ✅")
else:
p(f"- canary != pe first at off=0x{first_c_vs_pe:08x} VA=0x{image_base + first_c_vs_pe:08x} "
f"({classify_offset(first_c_vs_pe, sections_sorted)})")
if first_o_vs_pe == -1:
p("- ours canonical == pe canonical ✅")
else:
p(f"- ours != pe first at off=0x{first_o_vs_pe:08x} VA=0x{image_base + first_o_vs_pe:08x} "
f"({classify_offset(first_o_vs_pe, sections_sorted)})")
p("")
# Summary
raw_diff_count = sum(1 for i in range(min(len(canary), len(ours))) if canary[i] != ours[i])
canon_diff_count = sum(
1 for i in range(min(len(can_canon), len(ours_canon))) if can_canon[i] != ours_canon[i]
)
p("## Summary")
p("")
p(f"- bytes differing raw: {raw_diff_count}")
p(f"- bytes differing canonical: {canon_diff_count}")
p(f"- import-slot mask bytes: "
f"{sum(e - s for s, e in import_ranges_va)}")
text = "\n".join(report)
if args.out:
open(args.out, "w").write(text + "\n")
print(text)
return 0 if (first_canon == -1) else 1
if __name__ == "__main__":
sys.exit(main())