xenia-rs/audit-runs/audit-023-canary-diff/parse_dump.py

#!/usr/bin/env python3
"""
Parse canary's Memory::Save dump.

Format (canary memory.cc Memory::Save -> heaps_.<heap>.Save iterates pages):
  for each heap in [v00000000, v40000000, v80000000, v90000000, physical]:
    for each page (heap_size / page_size pages):
      uint64_t qword (LE host-endian)
      if state != 0 and (state bits) & kMemoryAllocationCommit (=2):
        page_size raw bytes (host-endian, but guest PPC big-endian written through guest stores)

PageEntry bitfield (LE qword):
  base_address          : 20 (bits 0-19)
  region_page_count     : 20 (bits 20-39)
  allocation_protect    : 4  (bits 40-43)
  current_protect       : 4  (bits 44-47)
  state                 : 2  (bits 48-49) -- 0x1 reserve, 0x2 commit
  reserved              : 14
"""

import struct
import sys

HEAPS = [
    ("v00000000", 0x00000000, 0x40000000, 4096),
    ("v40000000", 0x40000000, 0x3F000000, 65536),
    ("v80000000", 0x80000000, 0x10000000, 65536),
    ("v90000000", 0x90000000, 0x10000000, 4096),
    ("physical",  0x00000000, 0x20000000, 4096),  # logical name, separate space
]

K_COMMIT = 0x2

def parse(path):
    """Walk the dump; yield (heap_name, page_idx, qword, page_offset_in_file_or_None)."""
    with open(path, "rb") as f:
        data = f.read()
    print(f"[i] dump size: {len(data)} bytes ({len(data)/1024/1024:.1f} MiB)")

    cursor = 0
    heap_index = {}
    for name, base, size, page_size in HEAPS:
        page_count = size // page_size
        print(f"[i] heap {name} base={base:#010x} size={size:#x} pages={page_count} ps={page_size}")
        heap_meta = {
            "base": base, "size": size, "page_size": page_size,
            "page_count": page_count,
            "pages": {},  # page_idx -> (qword, page_data_offset_or_None)
            "start_offset": cursor,
        }
        committed = 0
        for i in range(page_count):
            if cursor + 8 > len(data):
                print(f"[!] truncated reading page header heap={name} page={i} cursor={cursor}")
                heap_index[name] = heap_meta
                return heap_index, data
            qword = struct.unpack_from("<Q", data, cursor)[0]
            cursor += 8
            # Empirically: gcc/clang lays out PageEntry bitfields with
            # state at bits 60-61 (not 48-49 as a naive declaration-order
            # mapping would predict). Determined by walking the dump and
            # confirming cursor lands exactly at file size.
            state = (qword >> 60) & 0x3
            if state != 0 and (state & K_COMMIT):
                page_off = cursor
                cursor += page_size
                committed += 1
                heap_meta["pages"][i] = (qword, page_off)
                if cursor > len(data):
                    print(f"[!] truncated reading page bytes heap={name} page={i}")
                    heap_index[name] = heap_meta
                    return heap_index, data
            else:
                heap_meta["pages"][i] = (qword, None)
        heap_meta["end_offset"] = cursor
        heap_meta["committed_pages"] = committed
        heap_index[name] = heap_meta
        print(f"[i]   committed_pages={committed} cursor_after={cursor:#x}")
    print(f"[i] total parsed bytes: {cursor:#x}; remaining tail: {len(data)-cursor}")
    return heap_index, data

def read_addr(heap_index, data, guest_addr, length):
    """Read length bytes starting at guest_addr from the canary memory dump.

    Picks the right heap by base/size."""
    for name in ("v00000000", "v40000000", "v80000000", "v90000000"):
        meta = heap_index[name]
        if meta["base"] <= guest_addr < meta["base"] + meta["size"]:
            page_size = meta["page_size"]
            rel = guest_addr - meta["base"]
            page_idx = rel // page_size
            page_off_in = rel % page_size
            qword, page_off_in_file = meta["pages"][page_idx]
            if page_off_in_file is None:
                return None, name, page_idx, qword, "uncommitted"
            # collect bytes spanning multiple pages if needed
            out = bytearray()
            remaining = length
            while remaining > 0:
                qword2, off2 = meta["pages"][page_idx]
                if off2 is None:
                    return None, name, page_idx, qword2, "uncommitted (mid-read)"
                in_page_avail = page_size - page_off_in
                take = min(remaining, in_page_avail)
                out.extend(data[off2 + page_off_in : off2 + page_off_in + take])
                remaining -= take
                page_idx += 1
                page_off_in = 0
            return bytes(out), name, None, qword, "committed"
    return None, None, None, None, "no heap"

def main():
    if len(sys.argv) < 2:
        print("usage: parse_dump.py <dump_path> [addr1[,addr2,...]] [length]")
        sys.exit(1)
    path = sys.argv[1]
    addr_args = sys.argv[2] if len(sys.argv) > 2 else "0x828F4070"
    length = int(sys.argv[3], 0) if len(sys.argv) > 3 else 0x100
    addrs = [int(a, 0) for a in addr_args.split(",")]

    heap_index, data = parse(path)
    print()
    for addr in addrs:
        res, heap_name, page_idx, qword, status = read_addr(heap_index, data, addr, length)
        print(f"=== addr {addr:#010x} (heap={heap_name}) status={status} ===")
        if res is None:
            print(f"  qword={qword:#x} page_idx={page_idx}")
            continue
        # Pretty-print: 16 bytes per row, big-endian word interpretation
        for i in range(0, len(res), 16):
            row = res[i:i+16]
            hexpart = " ".join(f"{b:02x}" for b in row)
            words = []
            for w in range(0, len(row), 4):
                if w+4 <= len(row):
                    val = struct.unpack(">I", row[w:w+4])[0]
                    words.append(f"{val:08x}")
            print(f"  +{i:04x}: {hexpart:<48} | {' '.join(words)}")
        print()

if __name__ == "__main__":
    main()