Source changes (dormant parity infra, retained from iterate 2.AI/2.AO): - xenia-kernel/exports.rs: nt_create_event manual_reset polarity + related event wiring - xenia-gpu/mmio_region.rs: D1MODE_VBLANK_VLINE_STATUS hardcode parity Also lands the audit-runs/ analysis notes (.md/.txt/.json digests) for the iterate 2.x VSync/0x10e8/0x1004 wedge investigation. Raw trace dumps (.jsonl/.gz/.csv/.stdout) and agent worktrees (.claude/) are gitignored as regenerable local artifacts — see memory + HANDOFF for the running findings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
441 lines
17 KiB
Diff
441 lines
17 KiB
Diff
diff --git a/tools/diff-events/diff_events.py b/tools/diff-events/diff_events.py
|
|
new file mode 100644
|
|
index 0000000..ecc2c0b
|
|
--- /dev/null
|
|
+++ b/tools/diff-events/diff_events.py
|
|
@@ -0,0 +1,434 @@
|
|
+#!/usr/bin/env python3
|
|
+"""Phase A event-log diff tool.
|
|
+
|
|
+Reads two schema-v1 JSONL event logs (one per engine) and reports the
|
|
+first behavioral divergence per guest-thread. Aligns streams by
|
|
+`tid_event_idx`. Field-comparison rules come straight from
|
|
+`audit-runs/phase-a-diff-harness/schema-v1.md` — keep both in sync.
|
|
+
|
|
+Usage:
|
|
+ diff_events.py --canary canary.jsonl --ours ours.jsonl [--out report.md]
|
|
+ diff_events.py --canary a.jsonl --ours b.jsonl --validate-identical
|
|
+ diff_events.py --canary a.jsonl --ours b.jsonl --tid-map 6=1,7=2
|
|
+"""
|
|
+
|
|
+import argparse
|
|
+import json
|
|
+import sys
|
|
+from pathlib import Path
|
|
+
|
|
+SCHEMA_VERSION = 1
|
|
+
|
|
+# Fields the diff tool skips (engine-local or host-clock).
|
|
+SKIP_TOP_FIELDS = {"engine", "host_ns", "guest_cycle", "deterministic"}
|
|
+# Within a payload: skipped fields by kind (in addition to the global set).
|
|
+SKIP_PAYLOAD_FIELDS_BY_KIND = {
|
|
+ # raw_handle_id is engine-local; the diff key is handle_semantic_id.
|
|
+ "handle.create": {"raw_handle_id"},
|
|
+ "handle.destroy": {"raw_handle_id"},
|
|
+ # wait_duration_cycles is non-deterministic (host scheduling).
|
|
+ "wait.end": {"wait_duration_cycles"},
|
|
+}
|
|
+
|
|
+# Allocator-returning kernel exports whose `kernel.return.payload.return_value`
|
|
+# is a host-allocator-dependent guest VA. Canary and ours legitimately route
|
|
+# allocations to different heap regions (e.g. canary `MmAllocatePhysicalMemoryEx`
|
|
+# returns `0xBC220000` from `vC0000000` while ours returns `0x40105000` from
|
|
+# its single user-heap region — see AUDIT-043 "ε host-allocator address-space
|
|
+# divergence" and Phase B `report.md` ε-class). Comparing raw VAs would always
|
|
+# diverge at the first allocator call.
|
|
+#
|
|
+# Canonicalization: per `(tid, export_name)` we assign a stable ordinal
|
|
+# (0, 1, 2, …) to each successive `kernel.return.return_value`, replacing
|
|
+# both sides' value with the sentinel string `<ALLOC_<NAME>_<ORDINAL>>`
|
|
+# before payload comparison. As long as both engines call the same
|
|
+# allocator the same number of times in the same order on a given thread,
|
|
+# the comparison treats them as equivalent.
|
|
+#
|
|
+# Limitations (documented):
|
|
+# * If one engine calls an allocator more times than the other, ordinals
|
|
+# drift and subsequent allocator returns appear as divergences. That's
|
|
+# the correct outcome — ordinal-count mismatch IS a behavioral
|
|
+# divergence.
|
|
+# * `payload.status` is left untouched: it's a copy of the raw VA in
|
|
+# hex-string form, useful in diff context.
|
|
+# * Other payload fields that happen to embed an allocator VA (e.g. a
|
|
+# future `args_resolved.base_address` in a free-call) are NOT
|
|
+# canonicalized — out of scope for this divergence. Extend the set
|
|
+# below as new divergence classes surface.
|
|
+ALLOCATOR_RETURN_FNS = frozenset(
|
|
+ [
|
|
+ "MmAllocatePhysicalMemoryEx",
|
|
+ "MmAllocatePhysicalMemory",
|
|
+ "NtAllocateVirtualMemory",
|
|
+ "RtlAllocateHeap",
|
|
+ "MmCreateKernelStack",
|
|
+ ]
|
|
+)
|
|
+
|
|
+
|
|
+def canonicalize_allocator_returns(events_by_tid: dict) -> None:
|
|
+ """In-place: rewrite `payload.return_value` for every kernel.return whose
|
|
+ `payload.name` is in ALLOCATOR_RETURN_FNS, replacing the raw VA with
|
|
+ `<ALLOC_<NAME>_<ORDINAL>>`. Ordinals are per (tid, name) and assigned
|
|
+ in event order.
|
|
+
|
|
+ Called on each engine's stream independently; because ordinals are
|
|
+ assigned deterministically by per-tid call order, equivalent streams
|
|
+ produce equivalent sentinels."""
|
|
+ for tid, evs in events_by_tid.items():
|
|
+ # name -> next ordinal to assign on this tid
|
|
+ counters: dict[str, int] = {}
|
|
+ for ev in evs:
|
|
+ if ev.get("kind") != "kernel.return":
|
|
+ continue
|
|
+ payload = ev.get("payload") or {}
|
|
+ name = payload.get("name")
|
|
+ if name not in ALLOCATOR_RETURN_FNS:
|
|
+ continue
|
|
+ ordinal = counters.get(name, 0)
|
|
+ counters[name] = ordinal + 1
|
|
+ sentinel = f"<ALLOC_{name}_{ordinal}>"
|
|
+ payload["return_value"] = sentinel
|
|
+ # `payload.status` mirrors `return_value` as a hex string for
|
|
+ # allocator entries (xboxkrnl trampoline doesn't distinguish
|
|
+ # NTSTATUS from pointer-typed returns). Canonicalize together
|
|
+ # so they stay in lockstep.
|
|
+ if "status" in payload:
|
|
+ payload["status"] = sentinel
|
|
+
|
|
+
|
|
+def load_events(path: Path) -> dict:
|
|
+ """Return {tid: [event, ...]} keyed by tid, ordered by tid_event_idx.
|
|
+
|
|
+ Validates the schema header (first line must be schema_version=1).
|
|
+ """
|
|
+ events_by_tid: dict[int, list[dict]] = {}
|
|
+ with path.open("r", encoding="utf-8") as f:
|
|
+ first = f.readline()
|
|
+ if not first:
|
|
+ raise SystemExit(f"{path}: empty file")
|
|
+ hdr = json.loads(first)
|
|
+ if hdr.get("kind") != "schema_version":
|
|
+ raise SystemExit(
|
|
+ f"{path}: first event is not schema_version (got {hdr.get('kind')!r})"
|
|
+ )
|
|
+ if hdr.get("schema_version") != SCHEMA_VERSION:
|
|
+ raise SystemExit(
|
|
+ f"{path}: schema_version mismatch (expected {SCHEMA_VERSION}, got {hdr.get('schema_version')!r})"
|
|
+ )
|
|
+ for lineno, line in enumerate(f, start=2):
|
|
+ line = line.rstrip("\n")
|
|
+ if not line:
|
|
+ continue
|
|
+ try:
|
|
+ ev = json.loads(line)
|
|
+ except json.JSONDecodeError as e:
|
|
+ raise SystemExit(f"{path}:{lineno}: invalid JSON ({e})")
|
|
+ tid = ev.get("tid")
|
|
+ if tid is None:
|
|
+ raise SystemExit(f"{path}:{lineno}: missing tid")
|
|
+ events_by_tid.setdefault(tid, []).append(ev)
|
|
+ # Ensure each per-tid list is already monotonic by tid_event_idx.
|
|
+ for tid, evs in events_by_tid.items():
|
|
+ for i, ev in enumerate(evs):
|
|
+ if ev.get("tid_event_idx") != i:
|
|
+ # Note: the schema permits one engine to emit fewer events; we
|
|
+ # only validate the in-file ordering is strictly monotonic.
|
|
+ if i > 0 and ev["tid_event_idx"] <= evs[i - 1]["tid_event_idx"]:
|
|
+ raise SystemExit(
|
|
+ f"{path}: tid={tid} events out of order at index {i}"
|
|
+ )
|
|
+ return events_by_tid
|
|
+
|
|
+
|
|
+def auto_tid_map(canary_evs: dict, ours_evs: dict) -> dict[int, int]:
|
|
+ """Naive tid mapping: pair canary tids with ours tids by the first
|
|
+ kernel.call name in each stream. Documented limitation in README."""
|
|
+ def first_call_name(evs: list[dict]) -> str | None:
|
|
+ for ev in evs:
|
|
+ if ev.get("kind") == "kernel.call":
|
|
+ return ev["payload"].get("name")
|
|
+ return None
|
|
+
|
|
+ canary_by_first = {}
|
|
+ for tid, evs in canary_evs.items():
|
|
+ name = first_call_name(evs)
|
|
+ if name is not None:
|
|
+ canary_by_first.setdefault(name, []).append(tid)
|
|
+
|
|
+ ours_by_first = {}
|
|
+ for tid, evs in ours_evs.items():
|
|
+ name = first_call_name(evs)
|
|
+ if name is not None:
|
|
+ ours_by_first.setdefault(name, []).append(tid)
|
|
+
|
|
+ mapping: dict[int, int] = {}
|
|
+ for name, c_tids in canary_by_first.items():
|
|
+ o_tids = ours_by_first.get(name, [])
|
|
+ for c, o in zip(sorted(c_tids), sorted(o_tids)):
|
|
+ mapping[c] = o
|
|
+ return mapping
|
|
+
|
|
+
|
|
+def parse_tid_map_arg(s: str) -> dict[int, int]:
|
|
+ """Parse `--tid-map 6=1,7=2` into {6: 1, 7: 2}."""
|
|
+ out: dict[int, int] = {}
|
|
+ for token in s.split(","):
|
|
+ token = token.strip()
|
|
+ if not token:
|
|
+ continue
|
|
+ if "=" not in token:
|
|
+ raise SystemExit(f"--tid-map: bad token {token!r} (expected canary=ours)")
|
|
+ a, b = token.split("=", 1)
|
|
+ out[int(a.strip(), 0)] = int(b.strip(), 0)
|
|
+ return out
|
|
+
|
|
+
|
|
+def compare_payload(kind: str, p_canary: dict, p_ours: dict) -> str | None:
|
|
+ """Compare two payloads. Returns None if equivalent, else a short
|
|
+ human-readable description of the first differing field."""
|
|
+ skip = SKIP_PAYLOAD_FIELDS_BY_KIND.get(kind, set())
|
|
+ # Compare the union of keys excluding skipped ones, in canary's key order
|
|
+ # first (stable), then any ours-only fields.
|
|
+ keys_seen: set[str] = set()
|
|
+ for k in p_canary.keys():
|
|
+ if k in skip:
|
|
+ continue
|
|
+ keys_seen.add(k)
|
|
+ vc = p_canary.get(k)
|
|
+ vo = p_ours.get(k)
|
|
+ if vc != vo:
|
|
+ return f"payload.{k}: canary={vc!r} ours={vo!r}"
|
|
+ for k in p_ours.keys():
|
|
+ if k in skip or k in keys_seen:
|
|
+ continue
|
|
+ if p_ours[k] is not None:
|
|
+ return f"payload.{k}: canary=<missing> ours={p_ours[k]!r}"
|
|
+ return None
|
|
+
|
|
+
|
|
+def compare_event(ev_canary: dict, ev_ours: dict) -> str | None:
|
|
+ """Compare two events. Returns None if equivalent, else a short description."""
|
|
+ # Top-level comparison: kind must match.
|
|
+ if ev_canary.get("kind") != ev_ours.get("kind"):
|
|
+ return f"kind: canary={ev_canary.get('kind')!r} ours={ev_ours.get('kind')!r}"
|
|
+ # tid_event_idx must match (it's our diff key).
|
|
+ if ev_canary.get("tid_event_idx") != ev_ours.get("tid_event_idx"):
|
|
+ return (
|
|
+ f"tid_event_idx: canary={ev_canary.get('tid_event_idx')!r} "
|
|
+ f"ours={ev_ours.get('tid_event_idx')!r}"
|
|
+ )
|
|
+ # Payload comparison.
|
|
+ pc = ev_canary.get("payload", {})
|
|
+ po = ev_ours.get("payload", {})
|
|
+ diff = compare_payload(ev_canary["kind"], pc, po)
|
|
+ if diff:
|
|
+ return diff
|
|
+ return None
|
|
+
|
|
+
|
|
+def render_event(ev: dict) -> str:
|
|
+ """One-line summary of an event for the diff report."""
|
|
+ kind = ev.get("kind", "?")
|
|
+ idx = ev.get("tid_event_idx", "?")
|
|
+ payload = ev.get("payload", {})
|
|
+ if kind in ("kernel.call", "kernel.return", "import.call"):
|
|
+ name = payload.get("name") or payload.get("ord")
|
|
+ return f"[{idx}] {kind} {name}"
|
|
+ if kind in ("handle.create", "handle.destroy"):
|
|
+ sid = payload.get("handle_semantic_id", "?")
|
|
+ return f"[{idx}] {kind} sid={sid}"
|
|
+ if kind in ("thread.create", "thread.exit"):
|
|
+ return f"[{idx}] {kind} {payload}"
|
|
+ if kind in ("wait.begin", "wait.end"):
|
|
+ return f"[{idx}] {kind} {payload}"
|
|
+ return f"[{idx}] {kind} {payload}"
|
|
+
|
|
+
|
|
+def diff_one_tid(
|
|
+ canary_evs: list[dict], ours_evs: list[dict], canary_tid: int, ours_tid: int
|
|
+) -> dict:
|
|
+ """Walk one mapped pair. Stop at the first divergence."""
|
|
+ matched = 0
|
|
+ n = min(len(canary_evs), len(ours_evs))
|
|
+ pre_context: list[tuple[dict, dict]] = []
|
|
+ diverged_at: int | None = None
|
|
+ diff_descr: str | None = None
|
|
+ for i in range(n):
|
|
+ ec = canary_evs[i]
|
|
+ eo = ours_evs[i]
|
|
+ d = compare_event(ec, eo)
|
|
+ if d is None:
|
|
+ matched += 1
|
|
+ pre_context.append((ec, eo))
|
|
+ if len(pre_context) > 5:
|
|
+ pre_context.pop(0)
|
|
+ continue
|
|
+ diverged_at = i
|
|
+ diff_descr = d
|
|
+ break
|
|
+ return {
|
|
+ "canary_tid": canary_tid,
|
|
+ "ours_tid": ours_tid,
|
|
+ "matched": matched,
|
|
+ "canary_total": len(canary_evs),
|
|
+ "ours_total": len(ours_evs),
|
|
+ "diverged_at": diverged_at,
|
|
+ "diff_descr": diff_descr,
|
|
+ "pre_context": pre_context,
|
|
+ "post_canary": canary_evs[diverged_at] if diverged_at is not None else None,
|
|
+ "post_ours": ours_evs[diverged_at] if diverged_at is not None else None,
|
|
+ "next_canary": (
|
|
+ canary_evs[diverged_at + 1]
|
|
+ if diverged_at is not None and diverged_at + 1 < len(canary_evs)
|
|
+ else None
|
|
+ ),
|
|
+ "next_ours": (
|
|
+ ours_evs[diverged_at + 1]
|
|
+ if diverged_at is not None and diverged_at + 1 < len(ours_evs)
|
|
+ else None
|
|
+ ),
|
|
+ }
|
|
+
|
|
+
|
|
+def render_report(per_tid_results: list[dict]) -> str:
|
|
+ out: list[str] = []
|
|
+ out.append("# Phase A diff report")
|
|
+ out.append("")
|
|
+ out.append("**This report is the output of Phase A's diff harness. Divergences")
|
|
+ out.append("shown here are INPUT for Phase B (first-divergence localization),")
|
|
+ out.append("not findings of Phase A.** Phase A's job is to make the harness")
|
|
+ out.append("itself correct, not to analyze what it surfaces.")
|
|
+ out.append("")
|
|
+ out.append("## Summary")
|
|
+ out.append("")
|
|
+ out.append("| canary_tid | ours_tid | matched | canary_total | ours_total | first_divergence_at |")
|
|
+ out.append("|---|---|---|---|---|---|")
|
|
+ for r in per_tid_results:
|
|
+ div = r["diverged_at"] if r["diverged_at"] is not None else "—"
|
|
+ out.append(
|
|
+ f"| {r['canary_tid']} | {r['ours_tid']} | {r['matched']} | "
|
|
+ f"{r['canary_total']} | {r['ours_total']} | {div} |"
|
|
+ )
|
|
+ out.append("")
|
|
+ for r in per_tid_results:
|
|
+ out.append(f"## canary_tid={r['canary_tid']} → ours_tid={r['ours_tid']}")
|
|
+ out.append("")
|
|
+ if r["diverged_at"] is None:
|
|
+ out.append(
|
|
+ f"No divergence within the {r['matched']} compared events "
|
|
+ f"(canary has {r['canary_total']}, ours has {r['ours_total']})."
|
|
+ )
|
|
+ out.append("")
|
|
+ continue
|
|
+ out.append(f"First divergence at `tid_event_idx={r['diverged_at']}`: {r['diff_descr']}")
|
|
+ out.append("")
|
|
+ out.append("**Pre-context (last 5 matching events):**")
|
|
+ out.append("```")
|
|
+ for ec, eo in r["pre_context"]:
|
|
+ out.append(f" canary: {render_event(ec)}")
|
|
+ out.append(f" ours: {render_event(eo)}")
|
|
+ out.append("```")
|
|
+ out.append("")
|
|
+ out.append("**Divergent event:**")
|
|
+ out.append("```")
|
|
+ out.append(f" canary: {render_event(r['post_canary'])}")
|
|
+ out.append(f" ours: {render_event(r['post_ours'])}")
|
|
+ out.append("```")
|
|
+ out.append("")
|
|
+ out.append("**Next event after the divergence (if any):**")
|
|
+ out.append("```")
|
|
+ if r["next_canary"]:
|
|
+ out.append(f" canary: {render_event(r['next_canary'])}")
|
|
+ else:
|
|
+ out.append(" canary: <end of stream>")
|
|
+ if r["next_ours"]:
|
|
+ out.append(f" ours: {render_event(r['next_ours'])}")
|
|
+ else:
|
|
+ out.append(" ours: <end of stream>")
|
|
+ out.append("```")
|
|
+ out.append("")
|
|
+ out.append("**Raw events (JSON):**")
|
|
+ out.append("```json")
|
|
+ out.append(json.dumps(r["post_canary"], sort_keys=True))
|
|
+ out.append(json.dumps(r["post_ours"], sort_keys=True))
|
|
+ out.append("```")
|
|
+ out.append("")
|
|
+ return "\n".join(out)
|
|
+
|
|
+
|
|
+def main() -> int:
|
|
+ ap = argparse.ArgumentParser(description="Phase A event-log diff tool")
|
|
+ ap.add_argument("--canary", required=True, type=Path)
|
|
+ ap.add_argument("--ours", required=True, type=Path)
|
|
+ ap.add_argument("--out", type=Path, help="Write markdown report here (else stdout)")
|
|
+ ap.add_argument(
|
|
+ "--tid-map",
|
|
+ type=str,
|
|
+ help="Manual tid mapping like '6=1,7=2'. Overrides auto-mapping.",
|
|
+ )
|
|
+ ap.add_argument(
|
|
+ "--validate-identical",
|
|
+ action="store_true",
|
|
+ help="Exit non-zero if any mapped tid pair has any divergence. "
|
|
+ "Used by gate-4 negative-test and by self-diff smoke tests.",
|
|
+ )
|
|
+ ap.add_argument(
|
|
+ "--no-canonicalize-allocators",
|
|
+ action="store_true",
|
|
+ help="Disable per-tid ordinal canonicalization of allocator return "
|
|
+ "values (default: enabled). See ALLOCATOR_RETURN_FNS for the "
|
|
+ "covered set. Disabling reproduces the raw-VA comparison.",
|
|
+ )
|
|
+ args = ap.parse_args()
|
|
+
|
|
+ canary_evs = load_events(args.canary)
|
|
+ ours_evs = load_events(args.ours)
|
|
+
|
|
+ if not args.no_canonicalize_allocators:
|
|
+ canonicalize_allocator_returns(canary_evs)
|
|
+ canonicalize_allocator_returns(ours_evs)
|
|
+
|
|
+ if args.tid_map:
|
|
+ tid_map = parse_tid_map_arg(args.tid_map)
|
|
+ else:
|
|
+ tid_map = auto_tid_map(canary_evs, ours_evs)
|
|
+
|
|
+ if not tid_map:
|
|
+ sys.stderr.write(
|
|
+ "no tid mapping (auto-mapping found no shared first-kernel-call). "
|
|
+ "Pass --tid-map manually.\n"
|
|
+ )
|
|
+ return 2
|
|
+
|
|
+ per_tid: list[dict] = []
|
|
+ for c_tid, o_tid in sorted(tid_map.items()):
|
|
+ if c_tid not in canary_evs:
|
|
+ sys.stderr.write(f"warn: canary tid {c_tid} not in stream; skipping\n")
|
|
+ continue
|
|
+ if o_tid not in ours_evs:
|
|
+ sys.stderr.write(f"warn: ours tid {o_tid} not in stream; skipping\n")
|
|
+ continue
|
|
+ per_tid.append(diff_one_tid(canary_evs[c_tid], ours_evs[o_tid], c_tid, o_tid))
|
|
+
|
|
+ report = render_report(per_tid)
|
|
+ if args.out:
|
|
+ args.out.write_text(report, encoding="utf-8")
|
|
+ sys.stderr.write(f"diff report written to {args.out}\n")
|
|
+ else:
|
|
+ sys.stdout.write(report)
|
|
+
|
|
+ if args.validate_identical:
|
|
+ for r in per_tid:
|
|
+ if r["diverged_at"] is not None:
|
|
+ sys.stderr.write(
|
|
+ f"validate-identical: divergence in canary_tid={r['canary_tid']} "
|
|
+ f"at tid_event_idx={r['diverged_at']} ({r['diff_descr']})\n"
|
|
+ )
|
|
+ return 1
|
|
+ return 0
|
|
+
|
|
+
|
|
+if __name__ == "__main__":
|
|
+ sys.exit(main())
|