Files
xenia-rs/audit-runs/audit-027-v40-mem-diff/diff_v40.py
MechaCat02 8e709b0a24 chore: track audit-runs summary artifacts (md/csv/diff/txt/json/etc)
Snapshot of every non-log artifact under audit-runs/ from audits 003
through 058: findings.md per audit, comparison CSVs, probe diffs,
schema docs, register-dump txts, lr-trace JSONL streams, the saved
canary patch diffs, etc. ~284 files / ~52 MB total.

Excluded (per .gitignore): probe stdout/stderr/log streams (the raw
firehose), guest-memory dumps under audit-026/027/029 (4.5 GB of
.bin files; *.bin pattern added to .gitignore this commit).

Also adds the orphan audit-058-sub825070F0-activation directory that
a subagent accidentally created at project-root instead of
under xenia-rs/audit-runs/; relocated to its proper home.

Purpose: cross-machine continuity. With these summaries committed,
a fresh clone gives the next session the full per-audit context
(findings + tables + cascade predictions) without dependence on
local-only working tree.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 21:36:41 +02:00

180 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""Comprehensive dword-level diff of canary's v40 vs ours.
For every 4-byte BE-aligned dword in [0x40000000, 0x7F000000):
- canary_dw = canary[i*4..i*4+4] interpreted as BE u32
- ours_dw = ours [i*4..i*4+4] interpreted as BE u32
Records:
CASE A (primary): canary_dw in 0x82000000..0x82A00000 (game-code addr) AND ours_dw != canary_dw
CASE B (inverse): ours_dw in 0x82000000..0x82A00000 AND canary_dw != ours_dw
Produces:
diff.txt full sorted A-list (truncated for very large outputs)
diff-b.txt inverse B-list
histogram.txt bucket count by canary PC's 0x1000-aligned function
l1-hits.txt specific renderer cluster L1 PC hits (from cluster_l1_pcs.txt)
tables.txt runs of >=4 consecutive dwords with canary-PC where ours differs
anchors.txt diff entries within +/-0x100 of named anchor addresses (e.g. 0x40BA9A80)
pages.txt which 64K pages contain divergences and how many
"""
import struct
import sys
import os
from collections import defaultdict
V40_BASE = 0x40000000
V40_LEN = 0x3F000000
PC_LO = 0x82000000
PC_HI = 0x82A00000
NAMED_ANCHORS = {
0x40BA9A80: "audit-016 listener struct (heap)",
}
def main():
here = os.path.dirname(os.path.abspath(__file__))
canary_path = os.path.join(here, "canary-v40.bin")
ours_path = os.path.join(here, "ours-v40.bin")
# Load cluster L1 PCs (full set)
cluster_pcs = {}
cl1_path = os.path.join(here, "cluster_l1_pcs.txt")
if os.path.exists(cl1_path):
with open(cl1_path) as f:
for ln in f:
ln = ln.strip()
if not ln: continue
parts = ln.split()
addr = int(parts[0], 16)
name = parts[1] if len(parts) > 1 else f"sub_{addr:08X}"
cluster_pcs[addr] = name
# Hand-curated narrow set (for emphasis if cluster_pcs is empty)
NARROW = {
0x822919C8: "sub_822919C8",
0x82293448: "sub_82293448",
0x82288028: "sub_82288028",
0x82292D80: "sub_82292d80",
0x822851E0: "sub_822851e0",
0x82286BC8: "sub_82286bc8",
}
canary = open(canary_path, "rb").read()
ours = open(ours_path, "rb").read()
assert len(canary) == V40_LEN, len(canary)
assert len(ours) == V40_LEN, len(ours)
a_list = [] # canary has PC, ours different
b_list = [] # ours has PC, canary different
page_size = 65536
page_a_count = defaultdict(int)
for i in range(0, V40_LEN, 4):
cdw = struct.unpack_from(">I", canary, i)[0]
odw = struct.unpack_from(">I", ours, i)[0]
if cdw == odw:
continue
addr = V40_BASE + i
if PC_LO <= cdw < PC_HI:
a_list.append((addr, cdw, odw))
page_a_count[addr & ~(page_size - 1)] += 1
if PC_LO <= odw < PC_HI:
b_list.append((addr, cdw, odw))
print(f"[i] case A divergences (canary has PC, ours differs): {len(a_list)}")
print(f"[i] case B divergences (ours has PC, canary differs): {len(b_list)}")
LIMIT = 200000 # cap diff.txt
with open(os.path.join(here, "diff.txt"), "w") as f:
f.write(f"# A-list: {len(a_list)} entries -- canary has 0x82xxxxxx PC, ours differs\n")
f.write(f"# (truncated to first {LIMIT} if larger)\n")
for addr, c, o in a_list[:LIMIT]:
f.write(f"addr={addr:#010x} canary={c:#010x} ours={o:#010x}\n")
with open(os.path.join(here, "diff-b.txt"), "w") as f:
f.write(f"# B-list: {len(b_list)} entries -- ours has 0x82xxxxxx PC, canary differs\n")
for addr, c, o in b_list[:LIMIT]:
f.write(f"addr={addr:#010x} canary={c:#010x} ours={o:#010x}\n")
# Histogram by canary PC value (0x1000-aligned)
bucket = defaultdict(int)
for _addr, c, _o in a_list:
bucket[c & ~0xFFF] += 1
sorted_b = sorted(bucket.items(), key=lambda x: -x[1])
with open(os.path.join(here, "histogram.txt"), "w") as f:
f.write("# canary PC value bucket (0x1000-aligned) -> count of A-list entries\n")
for k, v in sorted_b:
f.write(f"{k:#010x} {v}\n")
print(f"[i] top 20 PC buckets (canary value):")
for k, v in sorted_b[:20]:
print(f" {k:#010x} {v}")
# L1 PC hits (full 116 + narrow)
l1_hits = []
narrow_hits = []
for addr, c, o in a_list:
if c in cluster_pcs:
l1_hits.append((addr, c, o, cluster_pcs[c]))
if c in NARROW:
narrow_hits.append((addr, c, o, NARROW[c]))
with open(os.path.join(here, "l1-hits.txt"), "w") as f:
f.write(f"# Renderer cluster L1 PC hits in canary's v40 (broad set, count={len(l1_hits)})\n")
for addr, c, o, name in l1_hits:
f.write(f"addr={addr:#010x} canary={c:#010x} ours={o:#010x} // {name}\n")
f.write(f"\n# Narrow hand-picked subset (count={len(narrow_hits)})\n")
for addr, c, o, name in narrow_hits:
f.write(f"addr={addr:#010x} canary={c:#010x} ours={o:#010x} // {name}\n")
print(f"[i] L1 PC hits (broad 116-fn cluster): {len(l1_hits)}")
print(f"[i] L1 PC hits (narrow 6-fn picks): {len(narrow_hits)}")
for addr, c, o, name in l1_hits[:30]:
print(f" addr={addr:#010x} canary={c:#010x} // {name}")
# Table detection: runs of 4+ consecutive 4-byte dwords where canary
# has any 0x82xxxxxx and ours differs.
runs = []
a_sorted = sorted(a_list, key=lambda x: x[0])
j = 0
while j < len(a_sorted):
start = j
while j + 1 < len(a_sorted) and a_sorted[j+1][0] == a_sorted[j][0] + 4:
j += 1
if j - start + 1 >= 4:
entries = a_sorted[start:j+1]
zero_count = sum(1 for _a, _c, o in entries if o == 0)
runs.append((entries[0][0], len(entries), zero_count, entries))
j += 1
runs.sort(key=lambda r: -r[1])
with open(os.path.join(here, "tables.txt"), "w") as f:
f.write(f"# Consecutive A-list runs (>=4 dwords): {len(runs)} runs\n\n")
for base, length, zeros, entries in runs[:120]:
f.write(f"=== run base={base:#010x} length={length} zeros_in_ours={zeros} ===\n")
for addr, c, o in entries[:64]:
f.write(f" +{addr-base:#06x}: canary={c:#010x} ours={o:#010x}\n")
if length > 64:
f.write(f" ... and {length-64} more\n")
f.write("\n")
print(f"[i] table-shaped runs (>=4 consecutive A-list dwords): {len(runs)}")
for base, length, zeros, _ in runs[:12]:
print(f" base={base:#010x} length={length} zeros={zeros}")
# Pages with divergences
page_sorted = sorted(page_a_count.items(), key=lambda x: -x[1])
with open(os.path.join(here, "pages.txt"), "w") as f:
f.write(f"# 64K pages with A-list divergences (count={len(page_sorted)})\n")
for pg, cnt in page_sorted:
f.write(f"page={pg:#010x} count={cnt}\n")
print(f"[i] pages with divergences: {len(page_sorted)}")
for pg, cnt in page_sorted[:10]:
print(f" page={pg:#010x} divergent_dwords={cnt}")
# Anchor neighborhood
with open(os.path.join(here, "anchors.txt"), "w") as f:
f.write("# Diff entries within +/-0x100 of named anchor addresses\n\n")
for anchor, name in NAMED_ANCHORS.items():
f.write(f"=== {anchor:#010x} ({name}) ===\n")
for addr, c, o in a_list:
if abs(addr - anchor) <= 0x100:
f.write(f" addr={addr:#010x} canary={c:#010x} ours={o:#010x}\n")
f.write("\n")
if __name__ == "__main__":
main()