Source changes (dormant parity infra, retained from iterate 2.AI/2.AO): - xenia-kernel/exports.rs: nt_create_event manual_reset polarity + related event wiring - xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps (.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as regenerable local artifacts — see memory + HANDOFF for the running findings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
219 lines
9.5 KiB
Python
219 lines
9.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Phase Non-match Investigation: per-tid profile builder.
|
|
Streams a large canary cold jsonl and produces:
|
|
- per-tid event counts
|
|
- thread.create info (entry_pc, parent_tid, ctx_ptr, priority, name)
|
|
- thread.exit info (if any)
|
|
- top kernel.call by name (per tid)
|
|
- NtSetEvent handle distribution (per tid)
|
|
- wait.begin handle distribution (per tid)
|
|
- parent's last few kernel.calls + ExCreateThread LR (per spawn)
|
|
|
|
Usage:
|
|
python3 build_profiles.py <canary.jsonl> <output_dir>
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import os
|
|
import collections
|
|
from pathlib import Path
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) != 3:
|
|
print("usage: build_profiles.py <canary.jsonl> <output_dir>", file=sys.stderr)
|
|
sys.exit(1)
|
|
src = sys.argv[1]
|
|
out_dir = Path(sys.argv[2])
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Per-tid aggregations.
|
|
tid_event_count = collections.Counter()
|
|
tid_first_event = {} # tid -> first event (likely thread.create)
|
|
tid_thread_create = {} # tid -> thread.create payload (extracted)
|
|
tid_thread_exit = {} # tid -> (idx, host_ns)
|
|
tid_call_names = collections.defaultdict(collections.Counter) # tid -> Counter[fn_name]
|
|
tid_ntset_handles = collections.defaultdict(collections.Counter) # tid -> Counter[raw_handle]
|
|
tid_wait_handles = collections.defaultdict(collections.Counter) # tid -> Counter[raw_handle]
|
|
|
|
# Spawn-chain capture.
|
|
# For every kernel.call ExCreateThread / ExCreateThreadEx events: record (tid, idx, return_pc_LR, host_ns)
|
|
# For every thread.create event: record (tid, parent_tid_in_payload, parent_call_idx)
|
|
# We then match: for each thread.create, find the most recent ExCreateThread call from parent_tid prior to host_ns.
|
|
excreate_events = [] # list of {tid, idx, host_ns, name, lr, ctx}
|
|
create_thread_events = [] # list of full payloads
|
|
|
|
# Handle.create map: raw_handle (hex string lower) -> (object_type, sid, first_seen_tid)
|
|
handle_create = {}
|
|
|
|
# rolling per-tid last-call cache so we can capture LR
|
|
# For now: we extract any "lr" field present in kernel.call.
|
|
|
|
total_lines = 0
|
|
progress_every = 500_000
|
|
import time
|
|
t0 = time.time()
|
|
with open(src, 'r', encoding='utf-8', errors='replace') as f:
|
|
for line in f:
|
|
total_lines += 1
|
|
if total_lines % progress_every == 0:
|
|
elapsed = time.time() - t0
|
|
print(f" lines={total_lines:>10,} elapsed={elapsed:6.1f}s rate={total_lines/elapsed:,.0f}/s", file=sys.stderr)
|
|
try:
|
|
e = json.loads(line)
|
|
except Exception:
|
|
continue
|
|
tid = e.get('tid')
|
|
kind = e.get('kind')
|
|
tid_event_count[tid] += 1
|
|
if tid not in tid_first_event:
|
|
tid_first_event[tid] = e
|
|
if kind == 'thread.create':
|
|
p = e.get('payload', {}) or {}
|
|
child_tid = p.get('child_tid')
|
|
if child_tid is None:
|
|
# fallback: maybe payload has 'new_tid' or 'tid'
|
|
child_tid = p.get('new_tid') or p.get('thread_id')
|
|
tid_thread_create[child_tid] = {
|
|
'creator_tid': tid,
|
|
'event_idx': e.get('tid_event_idx'),
|
|
'host_ns': e.get('host_ns'),
|
|
'payload': p,
|
|
}
|
|
create_thread_events.append({
|
|
'creator_tid': tid,
|
|
'child_tid': child_tid,
|
|
'host_ns': e.get('host_ns'),
|
|
'payload': p,
|
|
})
|
|
elif kind == 'thread.exit':
|
|
tid_thread_exit[tid] = {
|
|
'event_idx': e.get('tid_event_idx'),
|
|
'host_ns': e.get('host_ns'),
|
|
'payload': e.get('payload', {}),
|
|
}
|
|
elif kind == 'handle.create':
|
|
p = e.get('payload', {}) or {}
|
|
raw = (p.get('raw_handle_id') or '').lower()
|
|
if raw:
|
|
handle_create.setdefault(raw, {
|
|
'object_type': p.get('object_type'),
|
|
'sid': p.get('handle_semantic_id'),
|
|
'object_name': p.get('object_name'),
|
|
'first_seen_tid': tid,
|
|
'first_seen_host_ns': e.get('host_ns'),
|
|
})
|
|
elif kind == 'wait.begin':
|
|
p = e.get('payload', {}) or {}
|
|
raw = (p.get('raw_handle_id') or p.get('handle_id') or '').lower()
|
|
if raw:
|
|
tid_wait_handles[tid][raw] += 1
|
|
elif kind in ('import.call', 'kernel.call'):
|
|
p = e.get('payload', {}) or {}
|
|
name = p.get('name') or p.get('import_name') or p.get('function')
|
|
if name:
|
|
tid_call_names[tid][name] += 1
|
|
if name in ('ExCreateThread', 'ExCreateThreadEx'):
|
|
excreate_events.append({
|
|
'tid': tid,
|
|
'idx': e.get('tid_event_idx'),
|
|
'host_ns': e.get('host_ns'),
|
|
'name': name,
|
|
'payload': p,
|
|
})
|
|
if name == 'NtSetEvent':
|
|
raw = (p.get('handle') or p.get('handle_id') or p.get('raw_handle_id') or '')
|
|
if isinstance(raw, int):
|
|
raw = f'0x{raw:08x}'
|
|
if isinstance(raw, str) and raw:
|
|
tid_ntset_handles[tid][raw.lower()] += 1
|
|
|
|
# Save raw aggregates.
|
|
with open(out_dir / 'tid-event-counts.csv', 'w') as fout:
|
|
fout.write('tid,event_count\n')
|
|
for tid, n in sorted(tid_event_count.items(), key=lambda x: -x[1]):
|
|
fout.write(f'{tid},{n}\n')
|
|
|
|
with open(out_dir / 'thread-creates.json', 'w') as fout:
|
|
json.dump(tid_thread_create, fout, indent=2, sort_keys=True, default=str)
|
|
with open(out_dir / 'thread-exits.json', 'w') as fout:
|
|
json.dump(tid_thread_exit, fout, indent=2, sort_keys=True, default=str)
|
|
with open(out_dir / 'excreate-events.json', 'w') as fout:
|
|
json.dump(excreate_events, fout, indent=2, default=str)
|
|
with open(out_dir / 'create-thread-events.json', 'w') as fout:
|
|
json.dump(create_thread_events, fout, indent=2, default=str)
|
|
with open(out_dir / 'handle-create.json', 'w') as fout:
|
|
json.dump(handle_create, fout, indent=2, default=str)
|
|
|
|
# Per-tid call counts top-20.
|
|
with open(out_dir / 'tid-top-calls.txt', 'w') as fout:
|
|
for tid in sorted(tid_event_count.keys(), key=lambda t: -tid_event_count[t]):
|
|
fout.write(f'=== tid={tid} total_events={tid_event_count[tid]:,} ===\n')
|
|
top = tid_call_names[tid].most_common(20)
|
|
for name, n in top:
|
|
fout.write(f' {n:>10,} {name}\n')
|
|
fout.write('\n')
|
|
|
|
# Per-tid NtSetEvent handle distribution.
|
|
with open(out_dir / 'tid-ntset-handles.txt', 'w') as fout:
|
|
for tid in sorted(tid_ntset_handles.keys(), key=lambda t: -sum(tid_ntset_handles[t].values())):
|
|
if not tid_ntset_handles[tid]:
|
|
continue
|
|
total = sum(tid_ntset_handles[tid].values())
|
|
fout.write(f'=== tid={tid} NtSetEvent total={total:,} ===\n')
|
|
for raw, n in tid_ntset_handles[tid].most_common(10):
|
|
hc = handle_create.get(raw, {})
|
|
fout.write(f' {n:>8,} {raw} obj_type={hc.get("object_type")} sid={hc.get("sid")} first_seen_tid={hc.get("first_seen_tid")}\n')
|
|
fout.write('\n')
|
|
|
|
# Per-tid wait.begin handle distribution.
|
|
with open(out_dir / 'tid-wait-handles.txt', 'w') as fout:
|
|
for tid in sorted(tid_wait_handles.keys(), key=lambda t: -sum(tid_wait_handles[t].values())):
|
|
if not tid_wait_handles[tid]:
|
|
continue
|
|
total = sum(tid_wait_handles[tid].values())
|
|
fout.write(f'=== tid={tid} wait.begin total={total:,} ===\n')
|
|
for raw, n in tid_wait_handles[tid].most_common(10):
|
|
hc = handle_create.get(raw, {})
|
|
fout.write(f' {n:>8,} {raw} obj_type={hc.get("object_type")} sid={hc.get("sid")} first_seen_tid={hc.get("first_seen_tid")}\n')
|
|
fout.write('\n')
|
|
|
|
# Spawn-chain matching.
|
|
# For each thread.create, find the immediately-preceding ExCreateThread* call on creator_tid before host_ns.
|
|
# Build per-tid sorted excreate list once.
|
|
excreate_by_tid = collections.defaultdict(list)
|
|
for ev in excreate_events:
|
|
excreate_by_tid[ev['tid']].append(ev)
|
|
for tid in excreate_by_tid:
|
|
excreate_by_tid[tid].sort(key=lambda e: e['host_ns'])
|
|
|
|
spawn_chain = []
|
|
for tc in create_thread_events:
|
|
ct = tc['creator_tid']
|
|
hns = tc['host_ns']
|
|
# Find newest ExCreateThread call on ct with host_ns <= hns
|
|
cand = excreate_by_tid.get(ct, [])
|
|
best = None
|
|
for ev in cand:
|
|
if ev['host_ns'] <= hns:
|
|
best = ev
|
|
else:
|
|
break
|
|
spawn_chain.append({
|
|
'child_tid': tc['child_tid'],
|
|
'creator_tid': ct,
|
|
'child_host_ns': hns,
|
|
'child_payload': tc['payload'],
|
|
'parent_excreate': best,
|
|
})
|
|
with open(out_dir / 'spawn-chain.json', 'w') as fout:
|
|
json.dump(spawn_chain, fout, indent=2, default=str)
|
|
|
|
print(f"\nDone. lines={total_lines:,} tids={len(tid_event_count)} outputs at {out_dir}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|