handoff: VSync/event-wedge fixes + iterate 2.A–2.BC research notes
Source changes (dormant parity infra, retained from iterate 2.AI/2.AO): - xenia-kernel/exports.rs: nt_create_event manual_reset polarity + related event wiring - xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps (.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as regenerable local artifacts — see memory + HANDOFF for the running findings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
218
audit-runs/phase-nonmatch-investigation/build_profiles.py
Normal file
218
audit-runs/phase-nonmatch-investigation/build_profiles.py
Normal file
@@ -0,0 +1,218 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Phase Non-match Investigation: per-tid profile builder.
|
||||
Streams a large canary cold jsonl and produces:
|
||||
- per-tid event counts
|
||||
- thread.create info (entry_pc, parent_tid, ctx_ptr, priority, name)
|
||||
- thread.exit info (if any)
|
||||
- top kernel.call by name (per tid)
|
||||
- NtSetEvent handle distribution (per tid)
|
||||
- wait.begin handle distribution (per tid)
|
||||
- parent's last few kernel.calls + ExCreateThread LR (per spawn)
|
||||
|
||||
Usage:
|
||||
python3 build_profiles.py <canary.jsonl> <output_dir>
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
import collections
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print("usage: build_profiles.py <canary.jsonl> <output_dir>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
src = sys.argv[1]
|
||||
out_dir = Path(sys.argv[2])
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Per-tid aggregations.
|
||||
tid_event_count = collections.Counter()
|
||||
tid_first_event = {} # tid -> first event (likely thread.create)
|
||||
tid_thread_create = {} # tid -> thread.create payload (extracted)
|
||||
tid_thread_exit = {} # tid -> (idx, host_ns)
|
||||
tid_call_names = collections.defaultdict(collections.Counter) # tid -> Counter[fn_name]
|
||||
tid_ntset_handles = collections.defaultdict(collections.Counter) # tid -> Counter[raw_handle]
|
||||
tid_wait_handles = collections.defaultdict(collections.Counter) # tid -> Counter[raw_handle]
|
||||
|
||||
# Spawn-chain capture.
|
||||
# For every kernel.call ExCreateThread / ExCreateThreadEx events: record (tid, idx, return_pc_LR, host_ns)
|
||||
# For every thread.create event: record (tid, parent_tid_in_payload, parent_call_idx)
|
||||
# We then match: for each thread.create, find the most recent ExCreateThread call from parent_tid prior to host_ns.
|
||||
excreate_events = [] # list of {tid, idx, host_ns, name, lr, ctx}
|
||||
create_thread_events = [] # list of full payloads
|
||||
|
||||
# Handle.create map: raw_handle (hex string lower) -> (object_type, sid, first_seen_tid)
|
||||
handle_create = {}
|
||||
|
||||
# rolling per-tid last-call cache so we can capture LR
|
||||
# For now: we extract any "lr" field present in kernel.call.
|
||||
|
||||
total_lines = 0
|
||||
progress_every = 500_000
|
||||
import time
|
||||
t0 = time.time()
|
||||
with open(src, 'r', encoding='utf-8', errors='replace') as f:
|
||||
for line in f:
|
||||
total_lines += 1
|
||||
if total_lines % progress_every == 0:
|
||||
elapsed = time.time() - t0
|
||||
print(f" lines={total_lines:>10,} elapsed={elapsed:6.1f}s rate={total_lines/elapsed:,.0f}/s", file=sys.stderr)
|
||||
try:
|
||||
e = json.loads(line)
|
||||
except Exception:
|
||||
continue
|
||||
tid = e.get('tid')
|
||||
kind = e.get('kind')
|
||||
tid_event_count[tid] += 1
|
||||
if tid not in tid_first_event:
|
||||
tid_first_event[tid] = e
|
||||
if kind == 'thread.create':
|
||||
p = e.get('payload', {}) or {}
|
||||
child_tid = p.get('child_tid')
|
||||
if child_tid is None:
|
||||
# fallback: maybe payload has 'new_tid' or 'tid'
|
||||
child_tid = p.get('new_tid') or p.get('thread_id')
|
||||
tid_thread_create[child_tid] = {
|
||||
'creator_tid': tid,
|
||||
'event_idx': e.get('tid_event_idx'),
|
||||
'host_ns': e.get('host_ns'),
|
||||
'payload': p,
|
||||
}
|
||||
create_thread_events.append({
|
||||
'creator_tid': tid,
|
||||
'child_tid': child_tid,
|
||||
'host_ns': e.get('host_ns'),
|
||||
'payload': p,
|
||||
})
|
||||
elif kind == 'thread.exit':
|
||||
tid_thread_exit[tid] = {
|
||||
'event_idx': e.get('tid_event_idx'),
|
||||
'host_ns': e.get('host_ns'),
|
||||
'payload': e.get('payload', {}),
|
||||
}
|
||||
elif kind == 'handle.create':
|
||||
p = e.get('payload', {}) or {}
|
||||
raw = (p.get('raw_handle_id') or '').lower()
|
||||
if raw:
|
||||
handle_create.setdefault(raw, {
|
||||
'object_type': p.get('object_type'),
|
||||
'sid': p.get('handle_semantic_id'),
|
||||
'object_name': p.get('object_name'),
|
||||
'first_seen_tid': tid,
|
||||
'first_seen_host_ns': e.get('host_ns'),
|
||||
})
|
||||
elif kind == 'wait.begin':
|
||||
p = e.get('payload', {}) or {}
|
||||
raw = (p.get('raw_handle_id') or p.get('handle_id') or '').lower()
|
||||
if raw:
|
||||
tid_wait_handles[tid][raw] += 1
|
||||
elif kind in ('import.call', 'kernel.call'):
|
||||
p = e.get('payload', {}) or {}
|
||||
name = p.get('name') or p.get('import_name') or p.get('function')
|
||||
if name:
|
||||
tid_call_names[tid][name] += 1
|
||||
if name in ('ExCreateThread', 'ExCreateThreadEx'):
|
||||
excreate_events.append({
|
||||
'tid': tid,
|
||||
'idx': e.get('tid_event_idx'),
|
||||
'host_ns': e.get('host_ns'),
|
||||
'name': name,
|
||||
'payload': p,
|
||||
})
|
||||
if name == 'NtSetEvent':
|
||||
raw = (p.get('handle') or p.get('handle_id') or p.get('raw_handle_id') or '')
|
||||
if isinstance(raw, int):
|
||||
raw = f'0x{raw:08x}'
|
||||
if isinstance(raw, str) and raw:
|
||||
tid_ntset_handles[tid][raw.lower()] += 1
|
||||
|
||||
# Save raw aggregates.
|
||||
with open(out_dir / 'tid-event-counts.csv', 'w') as fout:
|
||||
fout.write('tid,event_count\n')
|
||||
for tid, n in sorted(tid_event_count.items(), key=lambda x: -x[1]):
|
||||
fout.write(f'{tid},{n}\n')
|
||||
|
||||
with open(out_dir / 'thread-creates.json', 'w') as fout:
|
||||
json.dump(tid_thread_create, fout, indent=2, sort_keys=True, default=str)
|
||||
with open(out_dir / 'thread-exits.json', 'w') as fout:
|
||||
json.dump(tid_thread_exit, fout, indent=2, sort_keys=True, default=str)
|
||||
with open(out_dir / 'excreate-events.json', 'w') as fout:
|
||||
json.dump(excreate_events, fout, indent=2, default=str)
|
||||
with open(out_dir / 'create-thread-events.json', 'w') as fout:
|
||||
json.dump(create_thread_events, fout, indent=2, default=str)
|
||||
with open(out_dir / 'handle-create.json', 'w') as fout:
|
||||
json.dump(handle_create, fout, indent=2, default=str)
|
||||
|
||||
# Per-tid call counts top-20.
|
||||
with open(out_dir / 'tid-top-calls.txt', 'w') as fout:
|
||||
for tid in sorted(tid_event_count.keys(), key=lambda t: -tid_event_count[t]):
|
||||
fout.write(f'=== tid={tid} total_events={tid_event_count[tid]:,} ===\n')
|
||||
top = tid_call_names[tid].most_common(20)
|
||||
for name, n in top:
|
||||
fout.write(f' {n:>10,} {name}\n')
|
||||
fout.write('\n')
|
||||
|
||||
# Per-tid NtSetEvent handle distribution.
|
||||
with open(out_dir / 'tid-ntset-handles.txt', 'w') as fout:
|
||||
for tid in sorted(tid_ntset_handles.keys(), key=lambda t: -sum(tid_ntset_handles[t].values())):
|
||||
if not tid_ntset_handles[tid]:
|
||||
continue
|
||||
total = sum(tid_ntset_handles[tid].values())
|
||||
fout.write(f'=== tid={tid} NtSetEvent total={total:,} ===\n')
|
||||
for raw, n in tid_ntset_handles[tid].most_common(10):
|
||||
hc = handle_create.get(raw, {})
|
||||
fout.write(f' {n:>8,} {raw} obj_type={hc.get("object_type")} sid={hc.get("sid")} first_seen_tid={hc.get("first_seen_tid")}\n')
|
||||
fout.write('\n')
|
||||
|
||||
# Per-tid wait.begin handle distribution.
|
||||
with open(out_dir / 'tid-wait-handles.txt', 'w') as fout:
|
||||
for tid in sorted(tid_wait_handles.keys(), key=lambda t: -sum(tid_wait_handles[t].values())):
|
||||
if not tid_wait_handles[tid]:
|
||||
continue
|
||||
total = sum(tid_wait_handles[tid].values())
|
||||
fout.write(f'=== tid={tid} wait.begin total={total:,} ===\n')
|
||||
for raw, n in tid_wait_handles[tid].most_common(10):
|
||||
hc = handle_create.get(raw, {})
|
||||
fout.write(f' {n:>8,} {raw} obj_type={hc.get("object_type")} sid={hc.get("sid")} first_seen_tid={hc.get("first_seen_tid")}\n')
|
||||
fout.write('\n')
|
||||
|
||||
# Spawn-chain matching.
|
||||
# For each thread.create, find the immediately-preceding ExCreateThread* call on creator_tid before host_ns.
|
||||
# Build per-tid sorted excreate list once.
|
||||
excreate_by_tid = collections.defaultdict(list)
|
||||
for ev in excreate_events:
|
||||
excreate_by_tid[ev['tid']].append(ev)
|
||||
for tid in excreate_by_tid:
|
||||
excreate_by_tid[tid].sort(key=lambda e: e['host_ns'])
|
||||
|
||||
spawn_chain = []
|
||||
for tc in create_thread_events:
|
||||
ct = tc['creator_tid']
|
||||
hns = tc['host_ns']
|
||||
# Find newest ExCreateThread call on ct with host_ns <= hns
|
||||
cand = excreate_by_tid.get(ct, [])
|
||||
best = None
|
||||
for ev in cand:
|
||||
if ev['host_ns'] <= hns:
|
||||
best = ev
|
||||
else:
|
||||
break
|
||||
spawn_chain.append({
|
||||
'child_tid': tc['child_tid'],
|
||||
'creator_tid': ct,
|
||||
'child_host_ns': hns,
|
||||
'child_payload': tc['payload'],
|
||||
'parent_excreate': best,
|
||||
})
|
||||
with open(out_dir / 'spawn-chain.json', 'w') as fout:
|
||||
json.dump(spawn_chain, fout, indent=2, default=str)
|
||||
|
||||
print(f"\nDone. lines={total_lines:,} tids={len(tid_event_count)} outputs at {out_dir}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user