xenia-rs/audit-runs/audit-029-physical-mem-diff/extract_physical.py

#!/usr/bin/env python3
"""Extract canary's physical heap (5th heap) as a flat 512MB binary.

Walks the canary Memory::Save dump in heap order (v00, v40, v80, v90, physical),
skipping past the first four, then for `physical` writes each committed 4096-byte
page to its file offset (page_idx * 4096). Uncommitted pages stay zero.

Per memory.cc:222-242:
  v00      base 0x00000000 size 0x40000000 pgsz 4096   -> 262144 pages
  v40      base 0x40000000 size 0x3F000000 pgsz 65536  ->  16128 pages
  v80      base 0x80000000 size 0x10000000 pgsz 65536  ->   4096 pages
  v90      base 0x90000000 size 0x10000000 pgsz 4096   ->  65536 pages
  physical base 0x00000000 size 0x20000000 pgsz 4096   -> 131072 pages

Each per-page header is an 8-byte qword; state is at qword bits 60-61 (per
audit-022 empirical finding). state==0 means uncommitted (no payload follows);
otherwise (state & K_COMMIT)==2 means a 4096/65536-byte payload follows.

NOTE: This file format is reverse-engineered. The audit-026/027 scripts
implicitly handled the reserved-but-not-committed case by checking
`state != 0 and (state & K_COMMIT)`. We mirror the same logic here.
"""
import struct
import sys
import os

HEAPS = [
    ("v00000000", 0x00000000, 0x40000000, 4096),
    ("v40000000", 0x40000000, 0x3F000000, 65536),
    ("v80000000", 0x80000000, 0x10000000, 65536),
    ("v90000000", 0x90000000, 0x10000000, 4096),
    ("physical",  0x00000000, 0x20000000, 4096),
]
K_COMMIT = 0x2

def main():
    src = sys.argv[1] if len(sys.argv) > 1 else \
        "/home/fabi/RE Project Sylpheed/xenia-rs/audit-runs/audit-024a-canary-diff/canary-memory.dump"
    out = sys.argv[2] if len(sys.argv) > 2 else \
        os.path.join(os.path.dirname(__file__), "canary-physical.bin")
    with open(src, "rb") as f:
        data = f.read()
    print(f"[i] dump size: {len(data)} bytes ({len(data)/1024/1024:.1f} MiB)")
    cursor = 0
    out_buf = None
    for name, base, size, page_size in HEAPS:
        page_count = size // page_size
        committed = 0
        if name == "physical":
            out_buf = bytearray(size)
        for i in range(page_count):
            qword = struct.unpack_from("<Q", data, cursor)[0]
            cursor += 8
            state = (qword >> 60) & 0x3
            if state != 0 and (state & K_COMMIT):
                if name == "physical":
                    out_buf[i*page_size:(i+1)*page_size] = data[cursor:cursor+page_size]
                cursor += page_size
                committed += 1
        print(f"[i] {name}: pages={page_count} committed={committed}")
    print(f"[i] total parsed: {cursor:#x} (file size: {len(data):#x})")
    with open(out, "wb") as f:
        f.write(out_buf)
    print(f"[i] wrote {len(out_buf)} bytes to {out}")

if __name__ == "__main__":
    main()