Files
xenia-rs/audit-runs/phase-c2-MmAllocatePhysicalMemoryEx/fix-full-file.diff
MechaCat02 ef93a4fa14 handoff: VSync/event-wedge fixes + iterate 2.A–2.BC research notes
Source changes (dormant parity infra, retained from iterate 2.AI/2.AO):
- xenia-kernel/exports.rs: nt_create_event manual_reset polarity +
  related event wiring
- xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity

Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the
iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps
(.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as
regenerable local artifacts — see memory + HANDOFF for the running findings.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-05 07:19:08 +02:00

441 lines
17 KiB
Diff

diff --git a/tools/diff-events/diff_events.py b/tools/diff-events/diff_events.py
new file mode 100644
index 0000000..ecc2c0b
--- /dev/null
+++ b/tools/diff-events/diff_events.py
@@ -0,0 +1,434 @@
+#!/usr/bin/env python3
+"""Phase A event-log diff tool.
+
+Reads two schema-v1 JSONL event logs (one per engine) and reports the
+first behavioral divergence per guest-thread. Aligns streams by
+`tid_event_idx`. Field-comparison rules come straight from
+`audit-runs/phase-a-diff-harness/schema-v1.md` — keep both in sync.
+
+Usage:
+ diff_events.py --canary canary.jsonl --ours ours.jsonl [--out report.md]
+ diff_events.py --canary a.jsonl --ours b.jsonl --validate-identical
+ diff_events.py --canary a.jsonl --ours b.jsonl --tid-map 6=1,7=2
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+SCHEMA_VERSION = 1
+
+# Fields the diff tool skips (engine-local or host-clock).
+SKIP_TOP_FIELDS = {"engine", "host_ns", "guest_cycle", "deterministic"}
+# Within a payload: skipped fields by kind (in addition to the global set).
+SKIP_PAYLOAD_FIELDS_BY_KIND = {
+ # raw_handle_id is engine-local; the diff key is handle_semantic_id.
+ "handle.create": {"raw_handle_id"},
+ "handle.destroy": {"raw_handle_id"},
+ # wait_duration_cycles is non-deterministic (host scheduling).
+ "wait.end": {"wait_duration_cycles"},
+}
+
+# Allocator-returning kernel exports whose `kernel.return.payload.return_value`
+# is a host-allocator-dependent guest VA. Canary and ours legitimately route
+# allocations to different heap regions (e.g. canary `MmAllocatePhysicalMemoryEx`
+# returns `0xBC220000` from `vC0000000` while ours returns `0x40105000` from
+# its single user-heap region — see AUDIT-043 "ε host-allocator address-space
+# divergence" and Phase B `report.md` ε-class). Comparing raw VAs would always
+# diverge at the first allocator call.
+#
+# Canonicalization: per `(tid, export_name)` we assign a stable ordinal
+# (0, 1, 2, …) to each successive `kernel.return.return_value`, replacing
+# both sides' value with the sentinel string `<ALLOC_<NAME>_<ORDINAL>>`
+# before payload comparison. As long as both engines call the same
+# allocator the same number of times in the same order on a given thread,
+# the comparison treats them as equivalent.
+#
+# Limitations (documented):
+# * If one engine calls an allocator more times than the other, ordinals
+# drift and subsequent allocator returns appear as divergences. That's
+# the correct outcome — ordinal-count mismatch IS a behavioral
+# divergence.
+# * `payload.status` is left untouched: it's a copy of the raw VA in
+# hex-string form, useful in diff context.
+# * Other payload fields that happen to embed an allocator VA (e.g. a
+# future `args_resolved.base_address` in a free-call) are NOT
+# canonicalized — out of scope for this divergence. Extend the set
+# below as new divergence classes surface.
+ALLOCATOR_RETURN_FNS = frozenset(
+ [
+ "MmAllocatePhysicalMemoryEx",
+ "MmAllocatePhysicalMemory",
+ "NtAllocateVirtualMemory",
+ "RtlAllocateHeap",
+ "MmCreateKernelStack",
+ ]
+)
+
+
+def canonicalize_allocator_returns(events_by_tid: dict) -> None:
+ """In-place: rewrite `payload.return_value` for every kernel.return whose
+ `payload.name` is in ALLOCATOR_RETURN_FNS, replacing the raw VA with
+ `<ALLOC_<NAME>_<ORDINAL>>`. Ordinals are per (tid, name) and assigned
+ in event order.
+
+ Called on each engine's stream independently; because ordinals are
+ assigned deterministically by per-tid call order, equivalent streams
+ produce equivalent sentinels."""
+ for tid, evs in events_by_tid.items():
+ # name -> next ordinal to assign on this tid
+ counters: dict[str, int] = {}
+ for ev in evs:
+ if ev.get("kind") != "kernel.return":
+ continue
+ payload = ev.get("payload") or {}
+ name = payload.get("name")
+ if name not in ALLOCATOR_RETURN_FNS:
+ continue
+ ordinal = counters.get(name, 0)
+ counters[name] = ordinal + 1
+ sentinel = f"<ALLOC_{name}_{ordinal}>"
+ payload["return_value"] = sentinel
+ # `payload.status` mirrors `return_value` as a hex string for
+ # allocator entries (xboxkrnl trampoline doesn't distinguish
+ # NTSTATUS from pointer-typed returns). Canonicalize together
+ # so they stay in lockstep.
+ if "status" in payload:
+ payload["status"] = sentinel
+
+
+def load_events(path: Path) -> dict:
+ """Return {tid: [event, ...]} keyed by tid, ordered by tid_event_idx.
+
+ Validates the schema header (first line must be schema_version=1).
+ """
+ events_by_tid: dict[int, list[dict]] = {}
+ with path.open("r", encoding="utf-8") as f:
+ first = f.readline()
+ if not first:
+ raise SystemExit(f"{path}: empty file")
+ hdr = json.loads(first)
+ if hdr.get("kind") != "schema_version":
+ raise SystemExit(
+ f"{path}: first event is not schema_version (got {hdr.get('kind')!r})"
+ )
+ if hdr.get("schema_version") != SCHEMA_VERSION:
+ raise SystemExit(
+ f"{path}: schema_version mismatch (expected {SCHEMA_VERSION}, got {hdr.get('schema_version')!r})"
+ )
+ for lineno, line in enumerate(f, start=2):
+ line = line.rstrip("\n")
+ if not line:
+ continue
+ try:
+ ev = json.loads(line)
+ except json.JSONDecodeError as e:
+ raise SystemExit(f"{path}:{lineno}: invalid JSON ({e})")
+ tid = ev.get("tid")
+ if tid is None:
+ raise SystemExit(f"{path}:{lineno}: missing tid")
+ events_by_tid.setdefault(tid, []).append(ev)
+ # Ensure each per-tid list is already monotonic by tid_event_idx.
+ for tid, evs in events_by_tid.items():
+ for i, ev in enumerate(evs):
+ if ev.get("tid_event_idx") != i:
+ # Note: the schema permits one engine to emit fewer events; we
+ # only validate the in-file ordering is strictly monotonic.
+ if i > 0 and ev["tid_event_idx"] <= evs[i - 1]["tid_event_idx"]:
+ raise SystemExit(
+ f"{path}: tid={tid} events out of order at index {i}"
+ )
+ return events_by_tid
+
+
+def auto_tid_map(canary_evs: dict, ours_evs: dict) -> dict[int, int]:
+ """Naive tid mapping: pair canary tids with ours tids by the first
+ kernel.call name in each stream. Documented limitation in README."""
+ def first_call_name(evs: list[dict]) -> str | None:
+ for ev in evs:
+ if ev.get("kind") == "kernel.call":
+ return ev["payload"].get("name")
+ return None
+
+ canary_by_first = {}
+ for tid, evs in canary_evs.items():
+ name = first_call_name(evs)
+ if name is not None:
+ canary_by_first.setdefault(name, []).append(tid)
+
+ ours_by_first = {}
+ for tid, evs in ours_evs.items():
+ name = first_call_name(evs)
+ if name is not None:
+ ours_by_first.setdefault(name, []).append(tid)
+
+ mapping: dict[int, int] = {}
+ for name, c_tids in canary_by_first.items():
+ o_tids = ours_by_first.get(name, [])
+ for c, o in zip(sorted(c_tids), sorted(o_tids)):
+ mapping[c] = o
+ return mapping
+
+
+def parse_tid_map_arg(s: str) -> dict[int, int]:
+ """Parse `--tid-map 6=1,7=2` into {6: 1, 7: 2}."""
+ out: dict[int, int] = {}
+ for token in s.split(","):
+ token = token.strip()
+ if not token:
+ continue
+ if "=" not in token:
+ raise SystemExit(f"--tid-map: bad token {token!r} (expected canary=ours)")
+ a, b = token.split("=", 1)
+ out[int(a.strip(), 0)] = int(b.strip(), 0)
+ return out
+
+
+def compare_payload(kind: str, p_canary: dict, p_ours: dict) -> str | None:
+ """Compare two payloads. Returns None if equivalent, else a short
+ human-readable description of the first differing field."""
+ skip = SKIP_PAYLOAD_FIELDS_BY_KIND.get(kind, set())
+ # Compare the union of keys excluding skipped ones, in canary's key order
+ # first (stable), then any ours-only fields.
+ keys_seen: set[str] = set()
+ for k in p_canary.keys():
+ if k in skip:
+ continue
+ keys_seen.add(k)
+ vc = p_canary.get(k)
+ vo = p_ours.get(k)
+ if vc != vo:
+ return f"payload.{k}: canary={vc!r} ours={vo!r}"
+ for k in p_ours.keys():
+ if k in skip or k in keys_seen:
+ continue
+ if p_ours[k] is not None:
+ return f"payload.{k}: canary=<missing> ours={p_ours[k]!r}"
+ return None
+
+
+def compare_event(ev_canary: dict, ev_ours: dict) -> str | None:
+ """Compare two events. Returns None if equivalent, else a short description."""
+ # Top-level comparison: kind must match.
+ if ev_canary.get("kind") != ev_ours.get("kind"):
+ return f"kind: canary={ev_canary.get('kind')!r} ours={ev_ours.get('kind')!r}"
+ # tid_event_idx must match (it's our diff key).
+ if ev_canary.get("tid_event_idx") != ev_ours.get("tid_event_idx"):
+ return (
+ f"tid_event_idx: canary={ev_canary.get('tid_event_idx')!r} "
+ f"ours={ev_ours.get('tid_event_idx')!r}"
+ )
+ # Payload comparison.
+ pc = ev_canary.get("payload", {})
+ po = ev_ours.get("payload", {})
+ diff = compare_payload(ev_canary["kind"], pc, po)
+ if diff:
+ return diff
+ return None
+
+
+def render_event(ev: dict) -> str:
+ """One-line summary of an event for the diff report."""
+ kind = ev.get("kind", "?")
+ idx = ev.get("tid_event_idx", "?")
+ payload = ev.get("payload", {})
+ if kind in ("kernel.call", "kernel.return", "import.call"):
+ name = payload.get("name") or payload.get("ord")
+ return f"[{idx}] {kind} {name}"
+ if kind in ("handle.create", "handle.destroy"):
+ sid = payload.get("handle_semantic_id", "?")
+ return f"[{idx}] {kind} sid={sid}"
+ if kind in ("thread.create", "thread.exit"):
+ return f"[{idx}] {kind} {payload}"
+ if kind in ("wait.begin", "wait.end"):
+ return f"[{idx}] {kind} {payload}"
+ return f"[{idx}] {kind} {payload}"
+
+
+def diff_one_tid(
+ canary_evs: list[dict], ours_evs: list[dict], canary_tid: int, ours_tid: int
+) -> dict:
+ """Walk one mapped pair. Stop at the first divergence."""
+ matched = 0
+ n = min(len(canary_evs), len(ours_evs))
+ pre_context: list[tuple[dict, dict]] = []
+ diverged_at: int | None = None
+ diff_descr: str | None = None
+ for i in range(n):
+ ec = canary_evs[i]
+ eo = ours_evs[i]
+ d = compare_event(ec, eo)
+ if d is None:
+ matched += 1
+ pre_context.append((ec, eo))
+ if len(pre_context) > 5:
+ pre_context.pop(0)
+ continue
+ diverged_at = i
+ diff_descr = d
+ break
+ return {
+ "canary_tid": canary_tid,
+ "ours_tid": ours_tid,
+ "matched": matched,
+ "canary_total": len(canary_evs),
+ "ours_total": len(ours_evs),
+ "diverged_at": diverged_at,
+ "diff_descr": diff_descr,
+ "pre_context": pre_context,
+ "post_canary": canary_evs[diverged_at] if diverged_at is not None else None,
+ "post_ours": ours_evs[diverged_at] if diverged_at is not None else None,
+ "next_canary": (
+ canary_evs[diverged_at + 1]
+ if diverged_at is not None and diverged_at + 1 < len(canary_evs)
+ else None
+ ),
+ "next_ours": (
+ ours_evs[diverged_at + 1]
+ if diverged_at is not None and diverged_at + 1 < len(ours_evs)
+ else None
+ ),
+ }
+
+
+def render_report(per_tid_results: list[dict]) -> str:
+ out: list[str] = []
+ out.append("# Phase A diff report")
+ out.append("")
+ out.append("**This report is the output of Phase A's diff harness. Divergences")
+ out.append("shown here are INPUT for Phase B (first-divergence localization),")
+ out.append("not findings of Phase A.** Phase A's job is to make the harness")
+ out.append("itself correct, not to analyze what it surfaces.")
+ out.append("")
+ out.append("## Summary")
+ out.append("")
+ out.append("| canary_tid | ours_tid | matched | canary_total | ours_total | first_divergence_at |")
+ out.append("|---|---|---|---|---|---|")
+ for r in per_tid_results:
+ div = r["diverged_at"] if r["diverged_at"] is not None else "—"
+ out.append(
+ f"| {r['canary_tid']} | {r['ours_tid']} | {r['matched']} | "
+ f"{r['canary_total']} | {r['ours_total']} | {div} |"
+ )
+ out.append("")
+ for r in per_tid_results:
+ out.append(f"## canary_tid={r['canary_tid']} → ours_tid={r['ours_tid']}")
+ out.append("")
+ if r["diverged_at"] is None:
+ out.append(
+ f"No divergence within the {r['matched']} compared events "
+ f"(canary has {r['canary_total']}, ours has {r['ours_total']})."
+ )
+ out.append("")
+ continue
+ out.append(f"First divergence at `tid_event_idx={r['diverged_at']}`: {r['diff_descr']}")
+ out.append("")
+ out.append("**Pre-context (last 5 matching events):**")
+ out.append("```")
+ for ec, eo in r["pre_context"]:
+ out.append(f" canary: {render_event(ec)}")
+ out.append(f" ours: {render_event(eo)}")
+ out.append("```")
+ out.append("")
+ out.append("**Divergent event:**")
+ out.append("```")
+ out.append(f" canary: {render_event(r['post_canary'])}")
+ out.append(f" ours: {render_event(r['post_ours'])}")
+ out.append("```")
+ out.append("")
+ out.append("**Next event after the divergence (if any):**")
+ out.append("```")
+ if r["next_canary"]:
+ out.append(f" canary: {render_event(r['next_canary'])}")
+ else:
+ out.append(" canary: <end of stream>")
+ if r["next_ours"]:
+ out.append(f" ours: {render_event(r['next_ours'])}")
+ else:
+ out.append(" ours: <end of stream>")
+ out.append("```")
+ out.append("")
+ out.append("**Raw events (JSON):**")
+ out.append("```json")
+ out.append(json.dumps(r["post_canary"], sort_keys=True))
+ out.append(json.dumps(r["post_ours"], sort_keys=True))
+ out.append("```")
+ out.append("")
+ return "\n".join(out)
+
+
+def main() -> int:
+ ap = argparse.ArgumentParser(description="Phase A event-log diff tool")
+ ap.add_argument("--canary", required=True, type=Path)
+ ap.add_argument("--ours", required=True, type=Path)
+ ap.add_argument("--out", type=Path, help="Write markdown report here (else stdout)")
+ ap.add_argument(
+ "--tid-map",
+ type=str,
+ help="Manual tid mapping like '6=1,7=2'. Overrides auto-mapping.",
+ )
+ ap.add_argument(
+ "--validate-identical",
+ action="store_true",
+ help="Exit non-zero if any mapped tid pair has any divergence. "
+ "Used by gate-4 negative-test and by self-diff smoke tests.",
+ )
+ ap.add_argument(
+ "--no-canonicalize-allocators",
+ action="store_true",
+ help="Disable per-tid ordinal canonicalization of allocator return "
+ "values (default: enabled). See ALLOCATOR_RETURN_FNS for the "
+ "covered set. Disabling reproduces the raw-VA comparison.",
+ )
+ args = ap.parse_args()
+
+ canary_evs = load_events(args.canary)
+ ours_evs = load_events(args.ours)
+
+ if not args.no_canonicalize_allocators:
+ canonicalize_allocator_returns(canary_evs)
+ canonicalize_allocator_returns(ours_evs)
+
+ if args.tid_map:
+ tid_map = parse_tid_map_arg(args.tid_map)
+ else:
+ tid_map = auto_tid_map(canary_evs, ours_evs)
+
+ if not tid_map:
+ sys.stderr.write(
+ "no tid mapping (auto-mapping found no shared first-kernel-call). "
+ "Pass --tid-map manually.\n"
+ )
+ return 2
+
+ per_tid: list[dict] = []
+ for c_tid, o_tid in sorted(tid_map.items()):
+ if c_tid not in canary_evs:
+ sys.stderr.write(f"warn: canary tid {c_tid} not in stream; skipping\n")
+ continue
+ if o_tid not in ours_evs:
+ sys.stderr.write(f"warn: ours tid {o_tid} not in stream; skipping\n")
+ continue
+ per_tid.append(diff_one_tid(canary_evs[c_tid], ours_evs[o_tid], c_tid, o_tid))
+
+ report = render_report(per_tid)
+ if args.out:
+ args.out.write_text(report, encoding="utf-8")
+ sys.stderr.write(f"diff report written to {args.out}\n")
+ else:
+ sys.stdout.write(report)
+
+ if args.validate_identical:
+ for r in per_tid:
+ if r["diverged_at"] is not None:
+ sys.stderr.write(
+ f"validate-identical: divergence in canary_tid={r['canary_tid']} "
+ f"at tid_event_idx={r['diverged_at']} ({r['diff_descr']})\n"
+ )
+ return 1
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())