Files
xenia-rs/audit-runs/audit-023-canary-diff/parse_dump.py
MechaCat02 8e709b0a24 chore: track audit-runs summary artifacts (md/csv/diff/txt/json/etc)
Snapshot of every non-log artifact under audit-runs/ from audits 003
through 058: findings.md per audit, comparison CSVs, probe diffs,
schema docs, register-dump txts, lr-trace JSONL streams, the saved
canary patch diffs, etc. ~284 files / ~52 MB total.

Excluded (per .gitignore): probe stdout/stderr/log streams (the raw
firehose), guest-memory dumps under audit-026/027/029 (4.5 GB of
.bin files; *.bin pattern added to .gitignore this commit).

Also adds the orphan audit-058-sub825070F0-activation directory that
a subagent accidentally created at project-root instead of
under xenia-rs/audit-runs/; relocated to its proper home.

Purpose: cross-machine continuity. With these summaries committed,
a fresh clone gives the next session the full per-audit context
(findings + tables + cascade predictions) without dependence on
local-only working tree.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 21:36:41 +02:00

143 lines
5.8 KiB
Python

#!/usr/bin/env python3
"""
Parse canary's Memory::Save dump.
Format (canary memory.cc Memory::Save -> heaps_.<heap>.Save iterates pages):
for each heap in [v00000000, v40000000, v80000000, v90000000, physical]:
for each page (heap_size / page_size pages):
uint64_t qword (LE host-endian)
if state != 0 and (state bits) & kMemoryAllocationCommit (=2):
page_size raw bytes (host-endian, but guest PPC big-endian written through guest stores)
PageEntry bitfield (LE qword):
base_address : 20 (bits 0-19)
region_page_count : 20 (bits 20-39)
allocation_protect : 4 (bits 40-43)
current_protect : 4 (bits 44-47)
state : 2 (bits 48-49) -- 0x1 reserve, 0x2 commit
reserved : 14
"""
import struct
import sys
HEAPS = [
("v00000000", 0x00000000, 0x40000000, 4096),
("v40000000", 0x40000000, 0x3F000000, 65536),
("v80000000", 0x80000000, 0x10000000, 65536),
("v90000000", 0x90000000, 0x10000000, 4096),
("physical", 0x00000000, 0x20000000, 4096), # logical name, separate space
]
K_COMMIT = 0x2
def parse(path):
"""Walk the dump; yield (heap_name, page_idx, qword, page_offset_in_file_or_None)."""
with open(path, "rb") as f:
data = f.read()
print(f"[i] dump size: {len(data)} bytes ({len(data)/1024/1024:.1f} MiB)")
cursor = 0
heap_index = {}
for name, base, size, page_size in HEAPS:
page_count = size // page_size
print(f"[i] heap {name} base={base:#010x} size={size:#x} pages={page_count} ps={page_size}")
heap_meta = {
"base": base, "size": size, "page_size": page_size,
"page_count": page_count,
"pages": {}, # page_idx -> (qword, page_data_offset_or_None)
"start_offset": cursor,
}
committed = 0
for i in range(page_count):
if cursor + 8 > len(data):
print(f"[!] truncated reading page header heap={name} page={i} cursor={cursor}")
heap_index[name] = heap_meta
return heap_index, data
qword = struct.unpack_from("<Q", data, cursor)[0]
cursor += 8
# Empirically: gcc/clang lays out PageEntry bitfields with
# state at bits 60-61 (not 48-49 as a naive declaration-order
# mapping would predict). Determined by walking the dump and
# confirming cursor lands exactly at file size.
state = (qword >> 60) & 0x3
if state != 0 and (state & K_COMMIT):
page_off = cursor
cursor += page_size
committed += 1
heap_meta["pages"][i] = (qword, page_off)
if cursor > len(data):
print(f"[!] truncated reading page bytes heap={name} page={i}")
heap_index[name] = heap_meta
return heap_index, data
else:
heap_meta["pages"][i] = (qword, None)
heap_meta["end_offset"] = cursor
heap_meta["committed_pages"] = committed
heap_index[name] = heap_meta
print(f"[i] committed_pages={committed} cursor_after={cursor:#x}")
print(f"[i] total parsed bytes: {cursor:#x}; remaining tail: {len(data)-cursor}")
return heap_index, data
def read_addr(heap_index, data, guest_addr, length):
"""Read length bytes starting at guest_addr from the canary memory dump.
Picks the right heap by base/size."""
for name in ("v00000000", "v40000000", "v80000000", "v90000000"):
meta = heap_index[name]
if meta["base"] <= guest_addr < meta["base"] + meta["size"]:
page_size = meta["page_size"]
rel = guest_addr - meta["base"]
page_idx = rel // page_size
page_off_in = rel % page_size
qword, page_off_in_file = meta["pages"][page_idx]
if page_off_in_file is None:
return None, name, page_idx, qword, "uncommitted"
# collect bytes spanning multiple pages if needed
out = bytearray()
remaining = length
while remaining > 0:
qword2, off2 = meta["pages"][page_idx]
if off2 is None:
return None, name, page_idx, qword2, "uncommitted (mid-read)"
in_page_avail = page_size - page_off_in
take = min(remaining, in_page_avail)
out.extend(data[off2 + page_off_in : off2 + page_off_in + take])
remaining -= take
page_idx += 1
page_off_in = 0
return bytes(out), name, None, qword, "committed"
return None, None, None, None, "no heap"
def main():
if len(sys.argv) < 2:
print("usage: parse_dump.py <dump_path> [addr1[,addr2,...]] [length]")
sys.exit(1)
path = sys.argv[1]
addr_args = sys.argv[2] if len(sys.argv) > 2 else "0x828F4070"
length = int(sys.argv[3], 0) if len(sys.argv) > 3 else 0x100
addrs = [int(a, 0) for a in addr_args.split(",")]
heap_index, data = parse(path)
print()
for addr in addrs:
res, heap_name, page_idx, qword, status = read_addr(heap_index, data, addr, length)
print(f"=== addr {addr:#010x} (heap={heap_name}) status={status} ===")
if res is None:
print(f" qword={qword:#x} page_idx={page_idx}")
continue
# Pretty-print: 16 bytes per row, big-endian word interpretation
for i in range(0, len(res), 16):
row = res[i:i+16]
hexpart = " ".join(f"{b:02x}" for b in row)
words = []
for w in range(0, len(row), 4):
if w+4 <= len(row):
val = struct.unpack(">I", row[w:w+4])[0]
words.append(f"{val:08x}")
print(f" +{i:04x}: {hexpart:<48} | {' '.join(words)}")
print()
if __name__ == "__main__":
main()