xenia-rs/audit-runs/audit-029-physical-mem-diff/diff_physical.py

#!/usr/bin/env python3
"""One-sided PC enumeration for canary's physical heap.

Our impl has NO separate physical-memory region (architectural difference,
exposed by audit-027 -- MmAllocatePhysicalMemoryEx folds into the v40 bump
allocator at 0x40000000+). Both 0xA0000000 and 0xE0000000 alias dumps from
our impl yielded 0 committed pages, as does flat 0x00000000.

Therefore: every 0x82xxxxxx PC in canary-physical.bin is automatically a
divergence vs our impl (we have zeros there). This script enumerates them.

Outputs:
  diff.txt           every (canary_phys_addr, pc) pair
  histogram.txt      bucket count by PC's 0x1000-aligned function
  l1-hits.txt        renderer-cluster L1 PC hits (broad + narrow)
  audit017-hits.txt  audit-017 chain PC hits
  v40table-hits.txt  the 18 PCs from our v40 table at 0x40211900
  tables.txt         consecutive PC runs (>=4 dwords)
  pages.txt          per-page divergence count
  pc-summary.txt     summary by canary-physical address
"""
import struct
import os
from collections import defaultdict

PHYS_LEN = 0x20000000
PC_LO = 0x82000000
PC_HI = 0x82A00000

# audit-017 chain PCs
AUDIT017_CHAIN = {
    0x82184318: "sub_82184318",
    0x82184374: "0x82184374 (writer)",
    0x82187768: "sub_82187768",
    0x82187dd0: "sub_82187DD0",
    0x82183ca8: "sub_82183CA8",
    0x822919c8: "sub_822919C8",
    0x82186760: "sub_82186760",
    0x821c88d0: "sub_821C88D0",
}

# Narrow audit-009 cluster L1 set
NARROW_L1 = {
    0x822919C8: "sub_822919C8",
    0x82293448: "sub_82293448",
    0x82288028: "sub_82288028",
    0x82292D80: "sub_82292d80",
    0x822851E0: "sub_822851e0",
    0x82286BC8: "sub_82286bc8",
}

# 18 PCs in our v40 table at 0x40211900 (cross-reference)
V40_TABLE_PCS = {
    0x82183ae8, 0x82187e38, 0x8218cf10, 0x82191b18,
    0x821958c8, 0x82197448, 0x82199600, 0x82199ab0,
    0x821a3a50, 0x821ac770, 0x821b0378, 0x821b41f0,
    0x821b7178, 0x821ba1c8, 0x821bd470, 0x821bfad0,
    0x821c0288, 0x821c09d8,
}

def main():
    here = os.path.dirname(os.path.abspath(__file__))
    canary = open(os.path.join(here, "canary-physical.bin"), "rb").read()
    assert len(canary) == PHYS_LEN, len(canary)

    # Load broad cluster L1 set
    cluster_pcs = {}
    cl1 = os.path.join(here, "cluster_l1_pcs.txt")
    if os.path.exists(cl1):
        with open(cl1) as f:
            for ln in f:
                ln = ln.strip()
                if not ln: continue
                parts = ln.split()
                cluster_pcs[int(parts[0], 16)] = parts[1] if len(parts) > 1 else f"sub_{int(parts[0],16):08X}"

    a_list = []  # (phys_addr, pc)
    page_size = 4096
    page_count = defaultdict(int)
    bucket = defaultdict(int)
    for i in range(0, PHYS_LEN, 4):
        dw = struct.unpack_from(">I", canary, i)[0]
        if PC_LO <= dw < PC_HI:
            a_list.append((i, dw))
            page_count[i & ~(page_size - 1)] += 1
            bucket[dw & ~0xFFF] += 1

    print(f"[i] total 0x82xxxxxx PC dwords on canary physical heap: {len(a_list)}")

    LIMIT = 200000
    with open(os.path.join(here, "diff.txt"), "w") as f:
        f.write(f"# {len(a_list)} 0x82xxxxxx PC dwords on canary's physical heap\n")
        f.write(f"# (ours has no committed pages in this region, so all are divergent)\n")
        f.write(f"# (truncated to first {LIMIT} if larger)\n")
        for paddr, pc in a_list[:LIMIT]:
            f.write(f"phys={paddr:#010x}  pc={pc:#010x}\n")

    sorted_b = sorted(bucket.items(), key=lambda x: -x[1])
    with open(os.path.join(here, "histogram.txt"), "w") as f:
        f.write("# canary PC value bucket (0x1000-aligned) -> occurrence count on physical heap\n")
        for k, v in sorted_b:
            f.write(f"{k:#010x}  {v}\n")
    print(f"[i] top 25 PC buckets (canary physical-heap occurrences):")
    for k, v in sorted_b[:25]:
        print(f"    {k:#010x}  {v}")

    # Cluster L1 hits
    l1_hits_broad = []
    l1_hits_narrow = []
    for paddr, pc in a_list:
        if pc in cluster_pcs:
            l1_hits_broad.append((paddr, pc, cluster_pcs[pc]))
        if pc in NARROW_L1:
            l1_hits_narrow.append((paddr, pc, NARROW_L1[pc]))
    with open(os.path.join(here, "l1-hits.txt"), "w") as f:
        f.write(f"# Renderer cluster L1 PC hits in canary's physical heap (broad set, count={len(l1_hits_broad)})\n")
        for paddr, pc, name in l1_hits_broad:
            f.write(f"phys={paddr:#010x}  pc={pc:#010x}  // {name}\n")
        f.write(f"\n# Narrow hand-picked subset (count={len(l1_hits_narrow)})\n")
        for paddr, pc, name in l1_hits_narrow:
            f.write(f"phys={paddr:#010x}  pc={pc:#010x}  // {name}\n")
    print(f"[i] L1 PC hits (broad 116-fn cluster): {len(l1_hits_broad)}")
    print(f"[i] L1 PC hits (narrow 6-fn picks):    {len(l1_hits_narrow)}")
    for paddr, pc, name in l1_hits_broad[:30]:
        print(f"    phys={paddr:#010x}  pc={pc:#010x}  // {name}")

    # audit-017 chain hits
    a17_hits = []
    for paddr, pc in a_list:
        if pc in AUDIT017_CHAIN:
            a17_hits.append((paddr, pc, AUDIT017_CHAIN[pc]))
    with open(os.path.join(here, "audit017-hits.txt"), "w") as f:
        f.write(f"# audit-017 chain PC hits (count={len(a17_hits)})\n")
        for paddr, pc, name in a17_hits:
            f.write(f"phys={paddr:#010x}  pc={pc:#010x}  // {name}\n")
    print(f"[i] audit-017 chain PC hits: {len(a17_hits)}")
    for paddr, pc, name in a17_hits[:30]:
        print(f"    phys={paddr:#010x}  pc={pc:#010x}  // {name}")

    # v40 table cross-reference
    v40_hits = []
    for paddr, pc in a_list:
        if pc in V40_TABLE_PCS:
            v40_hits.append((paddr, pc))
    with open(os.path.join(here, "v40table-hits.txt"), "w") as f:
        f.write(f"# Hits where canary stores one of the 18 PCs from our v40 table at 0x40211900\n")
        f.write(f"# (audit-027 hypothesis: this table belongs on physical heap in canary)\n")
        f.write(f"# count={len(v40_hits)}\n")
        for paddr, pc in v40_hits:
            f.write(f"phys={paddr:#010x}  pc={pc:#010x}\n")
    print(f"[i] v40-table PC hits on canary physical: {len(v40_hits)}")
    if v40_hits:
        print(f"    sample addrs:")
        for paddr, pc in v40_hits[:20]:
            print(f"      phys={paddr:#010x}  pc={pc:#010x}")

    # Tables: consecutive PC runs >= 4 dwords
    runs = []
    a_sorted = sorted(a_list)
    j = 0
    while j < len(a_sorted):
        start = j
        while j + 1 < len(a_sorted) and a_sorted[j+1][0] == a_sorted[j][0] + 4:
            j += 1
        if j - start + 1 >= 4:
            entries = a_sorted[start:j+1]
            runs.append((entries[0][0], len(entries), entries))
        j += 1
    runs.sort(key=lambda r: -r[1])
    with open(os.path.join(here, "tables.txt"), "w") as f:
        f.write(f"# Consecutive PC dword runs (>=4 dwords): {len(runs)} runs\n\n")
        for base, length, entries in runs[:200]:
            f.write(f"=== run base={base:#010x} length={length} ===\n")
            for paddr, pc in entries[:64]:
                f.write(f"  +{paddr-base:#06x}: pc={pc:#010x}\n")
            if length > 64:
                f.write(f"  ... and {length-64} more\n")
            f.write("\n")
    print(f"[i] table-shaped runs (>=4 consecutive PC dwords on canary physical): {len(runs)}")
    for base, length, _ in runs[:15]:
        print(f"    base={base:#010x}  length={length}")

    # Pages with PC content
    page_sorted = sorted(page_count.items(), key=lambda x: -x[1])
    with open(os.path.join(here, "pages.txt"), "w") as f:
        f.write(f"# 4K pages with PC dwords on canary's physical heap (count={len(page_sorted)})\n")
        for pg, cnt in page_sorted:
            f.write(f"page={pg:#010x}  pc_count={cnt}\n")
    print(f"[i] 4K pages containing PCs: {len(page_sorted)}")

    # Larger 64K page-region summary
    region_count = defaultdict(int)
    for paddr, pc in a_list:
        region_count[paddr & ~0xFFFF] += 1
    with open(os.path.join(here, "pc-summary.txt"), "w") as f:
        f.write(f"# 64K-aligned region PC density on canary's physical heap\n")
        for region, cnt in sorted(region_count.items(), key=lambda x: -x[1]):
            f.write(f"region={region:#010x}  pc_count={cnt}\n")
    print(f"[i] 64K-aligned regions with PCs: {len(region_count)}")
    for r, c in sorted(region_count.items(), key=lambda x: -x[1])[:15]:
        print(f"    region={r:#010x}  pc_count={c}")

if __name__ == "__main__":
    main()