#!/usr/bin/env python3
"""Comprehensive dword-level diff of canary's v80 vs ours.

For every 4-byte BE-aligned dword in [0x80000000, 0x90000000):
  - canary_dw = canary[i*4..i*4+4] interpreted as BE u32
  - ours_dw   = ours  [i*4..i*4+4] interpreted as BE u32

Records:
  CASE A (primary): canary_dw in 0x82000000..0x82A00000 (game-code addr) AND ours_dw != canary_dw
  CASE B (inverse): ours_dw in 0x82000000..0x82A00000 AND canary_dw != ours_dw

Produces:
  diff.txt    full sorted A-list
  diff-b.txt  inverse B-list (smaller, often empty)
  histogram.txt   bucket count by canary PC's 0x1000-aligned function
  l1-hits.txt     specific renderer cluster L1 PC hits
  tables.txt      runs of >=4 consecutive dwords with same divergence shape
"""
import struct
import sys
import os
from collections import defaultdict

V80_BASE = 0x80000000
V80_LEN  = 0x10000000
PC_LO    = 0x82000000
PC_HI    = 0x82A00000

L1_PCS = {
    0x822919C8: "sub_822919C8",
    0x82293448: "sub_82293448",
    0x82288028: "sub_82288028",
    0x82292D80: "sub_82292d80",
    0x822851E0: "sub_822851e0",
    0x82286BC8: "sub_82286bc8",
    # also worth flagging from the AUDIT-025 audio path:
    0x82006CF4: "audio_system_vtable_0x82006CF4",  # unlikely in v80 but worth logging
    0x824D23B0: "sub_824D23B0_audio_KeSetEvent",
}

NAMED_ANCHORS = {
    0x828F3D08: "0x100c dispatcher",
    0x828F3EC0: "0x1004 dispatcher",
    0x828F4070: "0x15e4 worker singleton",
    0x828F4838: "audit-023 listener struct",
    0x828A3230: "audio buffer-completion semaphore",
    0x828A3254: "audit-025 audio wait target",
    0x40BA9A80: "audit-016 listener struct (heap)",
}

def main():
    here = os.path.dirname(os.path.abspath(__file__))
    canary_path = os.path.join(here, "canary-v80.bin")
    ours_path   = os.path.join(here, "ours-v80.bin")
    canary = open(canary_path, "rb").read()
    ours   = open(ours_path, "rb").read()
    assert len(canary) == V80_LEN, len(canary)
    assert len(ours)   == V80_LEN, len(ours)

    a_list = []  # canary has PC, ours different
    b_list = []  # ours has PC, canary different
    for i in range(0, V80_LEN, 4):
        cdw = struct.unpack_from(">I", canary, i)[0]
        odw = struct.unpack_from(">I", ours,   i)[0]
        if cdw == odw:
            continue
        addr = V80_BASE + i
        if PC_LO <= cdw < PC_HI:
            a_list.append((addr, cdw, odw))
        if PC_LO <= odw < PC_HI:
            b_list.append((addr, cdw, odw))

    print(f"[i] case A divergences (canary has PC, ours differs): {len(a_list)}")
    print(f"[i] case B divergences (ours has PC, canary differs): {len(b_list)}")

    with open(os.path.join(here, "diff.txt"), "w") as f:
        f.write(f"# A-list: {len(a_list)} entries — canary has 0x82xxxxxx PC, ours differs\n")
        for addr, c, o in a_list:
            f.write(f"addr={addr:#010x}  canary={c:#010x}  ours={o:#010x}\n")
    with open(os.path.join(here, "diff-b.txt"), "w") as f:
        f.write(f"# B-list: {len(b_list)} entries — ours has 0x82xxxxxx PC, canary differs\n")
        for addr, c, o in b_list:
            f.write(f"addr={addr:#010x}  canary={c:#010x}  ours={o:#010x}\n")

    # Histogram by canary PC value (0x1000-aligned)
    bucket = defaultdict(int)
    for _addr, c, _o in a_list:
        bucket[c & ~0xFFF] += 1
    sorted_b = sorted(bucket.items(), key=lambda x: -x[1])
    with open(os.path.join(here, "histogram.txt"), "w") as f:
        f.write("# canary PC value bucket (0x1000-aligned) -> count of A-list entries\n")
        for k, v in sorted_b:
            f.write(f"{k:#010x}  {v}\n")
    print(f"[i] top 10 PC buckets (canary value):")
    for k, v in sorted_b[:10]:
        print(f"    {k:#010x}  {v}")

    # L1 PC explicit hits
    l1_hits = []
    for addr, c, o in a_list:
        if c in L1_PCS:
            l1_hits.append((addr, c, o, L1_PCS[c]))
    with open(os.path.join(here, "l1-hits.txt"), "w") as f:
        f.write(f"# Renderer cluster L1 PC hits in canary's v80 (count={len(l1_hits)})\n")
        for addr, c, o, name in l1_hits:
            f.write(f"addr={addr:#010x}  canary={c:#010x}  ours={o:#010x}  // {name}\n")
    print(f"[i] L1 PC hits: {len(l1_hits)}")
    for addr, c, o, name in l1_hits[:20]:
        print(f"    addr={addr:#010x}  canary={c:#010x}  // {name}")

    # Table detection: runs of 4+ consecutive 4-byte dwords where canary
    # has any 0x82xxxxxx and ours has zero (or 0xFFFFFFFF sentinel).
    addr_set_a = {a for a, _c, _o in a_list}
    runs = []
    i = 0
    a_sorted = sorted(a_list, key=lambda x: x[0])
    j = 0
    while j < len(a_sorted):
        start = j
        while j + 1 < len(a_sorted) and a_sorted[j+1][0] == a_sorted[j][0] + 4:
            j += 1
        if j - start + 1 >= 4:
            entries = a_sorted[start:j+1]
            zero_count = sum(1 for _a, _c, o in entries if o == 0)
            runs.append((entries[0][0], len(entries), zero_count, entries))
        j += 1
    runs.sort(key=lambda r: -r[1])
    with open(os.path.join(here, "tables.txt"), "w") as f:
        f.write(f"# Consecutive A-list runs (>=4 dwords): {len(runs)} runs\n\n")
        for base, length, zeros, entries in runs[:80]:
            f.write(f"=== run base={base:#010x} length={length} zeros_in_ours={zeros} ===\n")
            for addr, c, o in entries[:32]:
                f.write(f"  +{addr-base:#06x}: canary={c:#010x}  ours={o:#010x}\n")
            if length > 32:
                f.write(f"  ... and {length-32} more\n")
            f.write("\n")
    print(f"[i] table-shaped runs (>=4 consecutive A-list dwords): {len(runs)}")
    for base, length, zeros, _ in runs[:8]:
        print(f"    base={base:#010x}  length={length}  zeros={zeros}")

    # Anchor-address neighborhood reports
    with open(os.path.join(here, "anchors.txt"), "w") as f:
        f.write("# Diff entries within ±0x100 of named anchor addresses\n\n")
        for anchor, name in NAMED_ANCHORS.items():
            f.write(f"=== {anchor:#010x} ({name}) ===\n")
            for addr, c, o in a_list:
                if abs(addr - anchor) <= 0x100:
                    f.write(f"  addr={addr:#010x}  canary={c:#010x}  ours={o:#010x}\n")
            f.write("\n")

if __name__ == "__main__":
    main()