#!/usr/bin/env python3 """Unit tests for `diff_events.py`. Run with: python3 test_diff_events.py Exits 0 on success, non-zero on failure. Covers: - FNV-1a 64-bit (`_fnv1a_64`) parity with the engines' embedded vectors. - Shared-global SID determinism (`shared_global_sid`) — same input → same output, independent of any host-side state. - Cross-tid floating `handle.create` matching in `diff_one_tid` — the Phase C+18 fix for the D-NEW-3 first-toucher race. - Cross-tid floating `wait.begin` matching in `diff_one_tid` — the Phase C+21 fix for the scheduling-jitter contention window (reading-error #32). Includes shared-global wait detection, multi-handle wait_type=all, and negative tests for per-thread waits. """ import json import sys import tempfile from pathlib import Path # Add this directory to sys.path so we can import diff_events as a module. sys.path.insert(0, str(Path(__file__).parent)) import diff_events as de # noqa: E402 FAILURES: list[str] = [] def check(cond: bool, name: str) -> None: if cond: print(f" PASS {name}") else: print(f" FAIL {name}") FAILURES.append(name) # === FNV-1a vector === def test_fnv1a_vector() -> None: """Standard FNV-1a 64-bit test vector for the ASCII string `foobar`.""" # 0x85944171f73967e8 per http://www.isthe.com/chongo/tech/comp/fnv/ h = de._fnv1a_64(b"foobar") check(h == 0x85944171F73967E8, f"FNV-1a('foobar') = 0x{h:016x}") # === Shared-global SID determinism === def test_shared_global_sid_determinism() -> None: a = de.shared_global_sid(0x828A3230, 3) b = de.shared_global_sid(0x828A3230, 3) check(a == b, "shared_global_sid is deterministic for same input") c = de.shared_global_sid(0x828A3234, 3) check(a != c, "shared_global_sid differs for distinct pointer") d = de.shared_global_sid(0x828A3230, 1) check(a != d, "shared_global_sid differs for distinct type") def test_shared_global_sid_matches_rust() -> None: """Lock-step parity: the SID for `(0x828A3230, 3)` must match the Rust unit test's expected output. Recompute via the same FNV-1a recipe and confirm the formatted string matches.""" buf = bytearray(20) buf[0:4] = (0xC01AB005).to_bytes(4, "little") buf[4:8] = (0).to_bytes(4, "little") buf[8:16] = (0x828A3230).to_bytes(8, "little") buf[16:20] = (3).to_bytes(4, "little") expected = f"{de._fnv1a_64(bytes(buf)):016x}" got = de.shared_global_sid(0x828A3230, 3) check(got == expected, f"shared_global_sid('0x828A3230', 3) matches recipe: {got}") # === Detection of shared-global handle.create events === def test_is_shared_global_handle_create() -> None: sid = de.shared_global_sid(0x828A3230, 3) ev = { "kind": "handle.create", "payload": { "handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None, }, } check(de.is_shared_global_handle_create(ev), "detects shared-global handle.create") # A regular per-thread handle.create (different SID for same pointer/type) # MUST NOT be detected as shared-global. ev_regular = { "kind": "handle.create", "payload": { "handle_semantic_id": "deadbeefdeadbeef", "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None, }, } check( not de.is_shared_global_handle_create(ev_regular), "does NOT detect regular handle.create as shared-global", ) # Non-handle.create kind → False. check( not de.is_shared_global_handle_create({"kind": "wait.begin", "payload": {}}), "does NOT misclassify wait.begin", ) # === diff_one_tid: floating cross-tid matching === def _hdr(engine: str = "canary") -> str: return json.dumps( { "schema_version": 1, "engine": engine, "kind": "schema_version", "tid": 0, "tid_event_idx": 0, "guest_cycle": 0, "host_ns": 0, "deterministic": True, "payload": {"version": 1, "emitter_build": "test"}, } ) def _ev(tid: int, idx: int, kind: str, payload: dict, engine: str = "canary") -> str: return json.dumps( { "schema_version": 1, "engine": engine, "kind": kind, "tid": tid, "tid_event_idx": idx, "guest_cycle": 0, "host_ns": 0, "deterministic": True, "payload": payload, } ) def test_floating_handle_create_cross_tid() -> None: """Simulate the C+17 D-NEW-3 regression precisely: - Both engines emit a shared-global handle.create for `0x828a3230`. - In canary, the create fires on tid=15. In ours, it fires on tid=10 (mapped from canary's tid=15) — but the create lands on a DIFFERENT ours tid first, so ours tid=10 has NO create, only a wait.begin. Hmm — actually the regression is the OPPOSITE direction: ours tid=10 has the create (it's the first toucher on ours side); canary tid=15 does NOT (another canary tid was first). Replicate that ordering. """ sid = de.shared_global_sid(0x828A3230, 3) # Canary stream: # tid=15: [0] import.call, [1] kernel.call, [2] wait.begin (no create) # tid=99: [0] handle.create (the floating create — emitted elsewhere) canary_tid15 = [ json.loads(_ev(15, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 176, "name": "KeWaitForSingleObject"})), json.loads(_ev(15, 1, "kernel.call", {"name": "KeWaitForSingleObject", "args": {}, "args_resolved": {}})), json.loads(_ev(15, 2, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), json.loads(_ev(15, 3, "kernel.return", {"name": "KeWaitForSingleObject", "return_value": 0, "status": "0x00000000", "side_effects": []})), ] canary_tid99 = [ json.loads(_ev(99, 0, "handle.create", {"handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None})), ] # Ours stream — same chain, but the create fires on the SAME tid as the wait. ours_tid10 = [ json.loads(_ev(10, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 176, "name": "KeWaitForSingleObject"}, "ours")), json.loads(_ev(10, 1, "kernel.call", {"name": "KeWaitForSingleObject", "args": {}, "args_resolved": {}}, "ours")), json.loads(_ev(10, 2, "handle.create", {"handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None}, "ours")), json.loads(_ev(10, 3, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"}, "ours")), json.loads(_ev(10, 4, "kernel.return", {"name": "KeWaitForSingleObject", "return_value": 0, "status": "0x00000000", "side_effects": []}, "ours")), ] floating = {sid} r = de.diff_one_tid(canary_tid15, ours_tid10, 15, 10, cross_tid_floating_sids=floating) check( r["diverged_at"] is None, f"no divergence with floating-create match (matched={r['matched']}, " f"skipped_ours={len(r['skipped_ours'])})", ) check( len(r["skipped_ours"]) == 1, f"exactly one ours handle.create absorbed (got {len(r['skipped_ours'])})", ) check( r["matched"] == 4, f"matched 4 events (the full canary tid=15 stream, got {r['matched']})", ) def test_strict_alignment_without_floating() -> None: """When `cross_tid_floating_sids` is None/empty, the legacy strict behavior must hold — the same input as above should diverge at idx=2.""" sid = de.shared_global_sid(0x828A3230, 3) canary_tid15 = [ json.loads(_ev(15, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 176, "name": "KeWaitForSingleObject"})), json.loads(_ev(15, 1, "kernel.call", {"name": "KeWaitForSingleObject", "args": {}, "args_resolved": {}})), json.loads(_ev(15, 2, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), ] ours_tid10 = [ json.loads(_ev(10, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 176, "name": "KeWaitForSingleObject"}, "ours")), json.loads(_ev(10, 1, "kernel.call", {"name": "KeWaitForSingleObject", "args": {}, "args_resolved": {}}, "ours")), json.loads(_ev(10, 2, "handle.create", {"handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None}, "ours")), ] r = de.diff_one_tid(canary_tid15, ours_tid10, 15, 10, cross_tid_floating_sids=None) check(r["diverged_at"] == 2, f"legacy strict diff diverges at 2 (got {r['diverged_at']})") def test_non_floating_real_divergence_still_caught() -> None: """Real divergences MUST still be caught (the fix must not over-suppress). Construct a stream where ours has an EXTRA handle.create whose SID is NOT a shared-global SID — must report divergence.""" canary = [ json.loads(_ev(6, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 1, "name": "X"})), ] ours = [ json.loads(_ev(1, 0, "handle.create", {"handle_semantic_id": "deadbeefdeadbeef", "object_type": 1, "raw_handle_id": "0x4", "object_name": None}, "ours")), json.loads(_ev(1, 1, "import.call", {"module": "xboxkrnl.exe", "ord": 1, "name": "X"}, "ours")), ] # No SID in floating set. r = de.diff_one_tid(canary, ours, 6, 1, cross_tid_floating_sids=set()) check(r["diverged_at"] is not None, f"real divergence still reported (got diverged_at={r['diverged_at']})") def test_floating_sids_collection_via_main() -> None: """End-to-end: load two JSONL files from disk and verify the floating SID set picks up the shared-global create. Uses tempfiles.""" sid = de.shared_global_sid(0x828A3230, 3) with tempfile.TemporaryDirectory() as tmpdir: cpath = Path(tmpdir) / "c.jsonl" opath = Path(tmpdir) / "o.jsonl" with cpath.open("w") as f: f.write(_hdr("canary") + "\n") f.write(_ev(99, 0, "handle.create", {"handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None}) + "\n") with opath.open("w") as f: f.write(_hdr("ours") + "\n") f.write(_ev(10, 0, "handle.create", {"handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828a3230", "object_name": None}, "ours") + "\n") cmap = de.load_events(cpath) omap = de.load_events(opath) floating = set() for evs_by_tid in (cmap, omap): for _tid, evs in evs_by_tid.items(): for ev in evs: if de.is_shared_global_handle_create(ev): floating.add(de._ev_handle_create_sid(ev)) check(sid in floating, f"floating SID set contains shared-global SID {sid}") # === Phase C+21: wait.begin floating-absorb === def test_is_shared_global_wait_begin_positive() -> None: sid = de.shared_global_sid(0x828A3230, 1) ev = { "kind": "wait.begin", "payload": { "handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any", }, } check( de.is_shared_global_wait_begin(ev, {sid}), "detects wait.begin referencing shared-global SID", ) def test_is_shared_global_wait_begin_negative() -> None: """A wait.begin whose handles are ALL per-thread SIDs must NOT be flagged as shared-global — preserves strict matching for non- contention waits.""" ev = { "kind": "wait.begin", "payload": { "handles_semantic_ids": ["deadbeefdeadbeef"], "timeout_ns": -1, "alertable": False, "wait_type": "any", }, } check( not de.is_shared_global_wait_begin(ev, set()), "does NOT classify per-thread wait.begin as shared-global (empty set)", ) check( not de.is_shared_global_wait_begin(ev, {"feedface12345678"}), "does NOT classify per-thread wait.begin as shared-global (SID not in set)", ) # Wrong kind. check( not de.is_shared_global_wait_begin( {"kind": "handle.create", "payload": {"handles_semantic_ids": ["x"]}}, {"x"}, ), "does NOT misclassify handle.create as wait.begin", ) def test_is_shared_global_wait_begin_mixed_handles_wait_all() -> None: """wait_type=all with a mix of per-thread + shared-global SIDs must still be classified as shared-global. The whole wait is timing- dependent because at least one of its handles is on a process- global dispatcher.""" shared = de.shared_global_sid(0x828A3230, 1) ev = { "kind": "wait.begin", "payload": { "handles_semantic_ids": ["deadbeefdeadbeef", shared, "aaaaaaaaaaaaaaaa"], "timeout_ns": 5000, "alertable": False, "wait_type": "all", }, } check( de.is_shared_global_wait_begin(ev, {shared}), "wait_type=all with one shared-global handle is shared-global", ) def test_floating_wait_begin_cross_tid_canary_only() -> None: """Phase C+20 / C+21 canonical case: canary takes the contended slow path and emits wait.begin; ours fast-paths and emits only kernel.return. The wait.begin should be absorbed.""" shared = de.shared_global_sid(0x828A3230, 1) canary_tid6 = [ json.loads(_ev(6, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 277, "name": "RtlEnterCriticalSection"})), json.loads(_ev(6, 1, "kernel.call", {"name": "RtlEnterCriticalSection", "args": {}, "args_resolved": {}})), # CONTENDED slow path: emit wait.begin json.loads(_ev(6, 2, "wait.begin", {"handles_semantic_ids": [shared], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), json.loads(_ev(6, 3, "kernel.return", {"name": "RtlEnterCriticalSection", "return_value": 0, "status": "0x00000000", "side_effects": []})), ] ours_tid1 = [ json.loads(_ev(1, 0, "import.call", {"module": "xboxkrnl.exe", "ord": 277, "name": "RtlEnterCriticalSection"}, "ours")), json.loads(_ev(1, 1, "kernel.call", {"name": "RtlEnterCriticalSection", "args": {}, "args_resolved": {}}, "ours")), # FAST path: no wait.begin json.loads(_ev(1, 2, "kernel.return", {"name": "RtlEnterCriticalSection", "return_value": 0, "status": "0x00000000", "side_effects": []}, "ours")), ] r = de.diff_one_tid(canary_tid6, ours_tid1, 6, 1, cross_tid_floating_sids={shared}) check( r["diverged_at"] is None, f"no divergence after absorbing canary wait.begin (matched={r['matched']}, " f"skipped_canary_wait={len(r['skipped_canary_wait'])})", ) check( len(r["skipped_canary_wait"]) == 1, f"exactly one canary wait.begin absorbed (got {len(r['skipped_canary_wait'])})", ) check( r["matched"] == 3, f"matched 3 events (import.call + kernel.call + kernel.return, got {r['matched']})", ) def test_floating_wait_begin_cross_tid_ours_only() -> None: """Mirror direction: ours takes the slow path, canary fast-paths.""" shared = de.shared_global_sid(0x828A3230, 1) canary = [ json.loads(_ev(6, 0, "kernel.call", {"name": "RtlEnterCriticalSection", "args": {}, "args_resolved": {}})), json.loads(_ev(6, 1, "kernel.return", {"name": "RtlEnterCriticalSection", "return_value": 0, "status": "0x00000000", "side_effects": []})), ] ours = [ json.loads(_ev(1, 0, "kernel.call", {"name": "RtlEnterCriticalSection", "args": {}, "args_resolved": {}}, "ours")), json.loads(_ev(1, 1, "wait.begin", {"handles_semantic_ids": [shared], "timeout_ns": -1, "alertable": False, "wait_type": "any"}, "ours")), json.loads(_ev(1, 2, "kernel.return", {"name": "RtlEnterCriticalSection", "return_value": 0, "status": "0x00000000", "side_effects": []}, "ours")), ] r = de.diff_one_tid(canary, ours, 6, 1, cross_tid_floating_sids={shared}) check( r["diverged_at"] is None, f"no divergence after absorbing ours wait.begin (matched={r['matched']})", ) check( len(r["skipped_ours_wait"]) == 1, f"exactly one ours wait.begin absorbed (got {len(r['skipped_ours_wait'])})", ) def test_per_thread_wait_begin_NOT_absorbed() -> None: """Discipline: a wait.begin whose handles are all per-thread SIDs must NOT be absorbed — strict matching preserved.""" # SID NOT in floating set. sid_per_thread = "1234567890abcdef" canary = [ json.loads(_ev(6, 0, "kernel.call", {"name": "NtWaitForSingleObjectEx", "args": {}, "args_resolved": {}})), json.loads(_ev(6, 1, "wait.begin", {"handles_semantic_ids": [sid_per_thread], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), json.loads(_ev(6, 2, "kernel.return", {"name": "NtWaitForSingleObjectEx", "return_value": 0, "status": "0x00000000", "side_effects": []})), ] ours = [ json.loads(_ev(1, 0, "kernel.call", {"name": "NtWaitForSingleObjectEx", "args": {}, "args_resolved": {}}, "ours")), # Missing wait.begin — but the SID is per-thread, so must NOT be absorbed. json.loads(_ev(1, 1, "kernel.return", {"name": "NtWaitForSingleObjectEx", "return_value": 0, "status": "0x00000000", "side_effects": []}, "ours")), ] # Floating set is non-empty (some OTHER shared-global SID), but the # wait's SID is not in it. r = de.diff_one_tid(canary, ours, 6, 1, cross_tid_floating_sids={"aaaaaaaaaaaaaaaa"}) check( r["diverged_at"] is not None, f"per-thread wait.begin divergence still caught (got diverged_at={r['diverged_at']})", ) check( r["diff_descr"] is not None and "kind" in r["diff_descr"], f"divergence is a kind mismatch (got descr={r['diff_descr']!r})", ) def test_wait_begin_strict_match_unchanged() -> None: """When both sides have a matching wait.begin on a shared-global SID, NO absorption occurs — strict positional match advances both pointers.""" shared = de.shared_global_sid(0x828A3230, 1) canary = [ json.loads(_ev(6, 0, "wait.begin", {"handles_semantic_ids": [shared], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), ] ours = [ json.loads(_ev(1, 0, "wait.begin", {"handles_semantic_ids": [shared], "timeout_ns": -1, "alertable": False, "wait_type": "any"}, "ours")), ] r = de.diff_one_tid(canary, ours, 6, 1, cross_tid_floating_sids={shared}) check( r["diverged_at"] is None and r["matched"] == 1, f"strict match preserved when both sides have wait.begin (matched={r['matched']})", ) check( len(r["skipped_canary_wait"]) == 0 and len(r["skipped_ours_wait"]) == 0, f"no spurious absorption when both match (canary_wait={len(r['skipped_canary_wait'])}, ours_wait={len(r['skipped_ours_wait'])})", ) def test_collect_shared_global_sids_cross_tid_heuristic() -> None: """Phase C+21 heuristic: any SID referenced (handle.create OR wait.begin) by 2+ distinct tids in either engine is shared-global, even if the SID isn't recipe-matching (covers canary's `EmitHandleCreateSharedGlobal` raw_handle_id asymmetry). """ # Construct canary-like stream where a SID is referenced on 2 tids # by wait.begin (no recipe-matching handle.create needed). sid = "feedface12345678" # NOT recipe-matching canary_evs = { 6: [ json.loads(_ev(6, 0, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), ], 9: [ json.loads(_ev(9, 0, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), ], } ours_evs = {1: []} sids = de.collect_shared_global_sids(canary_evs, ours_evs) check( sid in sids, f"cross-tid heuristic detects shared-global SID {sid} from multi-tid wait.begin", ) def test_engine_local_contention_observed_skipped_both_sides() -> None: """Phase D Stage 4: `contention.observed` is engine-local — diff tool advances past it on EITHER side without alignment, so the matched-prefix is unaffected by presence/absence/divergence in that kind.""" # Canary has contention.observed at idx 5 with one cs_ptr; ours has # one at idx 5 with a different cs_ptr. Both should be skipped; the # rest of the stream matches. canary = [ {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, {"kind": "kernel.call", "tid": 6, "tid_event_idx": 1, "payload": {"name": "Foo"}}, {"kind": "contention.observed", "tid": 6, "tid_event_idx": 2, "payload": {"cs_ptr": "0xbc65c890", "site_sid": "aaaa", "contended": True}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": 3, "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, ] ours = [ {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, {"kind": "kernel.call", "tid": 6, "tid_event_idx": 1, "payload": {"name": "Foo"}}, {"kind": "contention.observed", "tid": 6, "tid_event_idx": 2, "payload": {"cs_ptr": "0x40544890", "site_sid": "bbbb", "contended": True}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": 3, "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, ] result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) check( result["diverged_at"] is None, f"contention.observed engine-local — no divergence (got diverged_at={result['diverged_at']})", ) check( result["matched"] == 3, f"matched = 3 (import.call + kernel.call + kernel.return; contention.observed skipped) got {result['matched']}", ) def test_engine_local_one_sided_contention_observed() -> None: """When only canary emits contention.observed (cvar-ON canary vs default ours, no Stage-3 manifest), the diff tool must still advance past canary's event without divergence.""" canary = [ {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, {"kind": "kernel.call", "tid": 6, "tid_event_idx": 1, "payload": {"name": "Foo"}}, {"kind": "contention.observed", "tid": 6, "tid_event_idx": 2, "payload": {"cs_ptr": "0x1", "site_sid": "x", "contended": True}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": 3, "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, ] ours = [ {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, {"kind": "kernel.call", "tid": 6, "tid_event_idx": 1, "payload": {"name": "Foo"}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": 2, "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, ] result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) check( result["diverged_at"] is None, f"one-sided contention.observed — no divergence (got diverged_at={result['diverged_at']})", ) check( result["matched"] == 3, f"one-sided: matched = 3 (got {result['matched']})", ) def _enter_block(start_idx: int) -> list[dict]: """Three events for a fast-path RtlEnterCriticalSection call.""" return [ {"kind": "import.call", "tid": 6, "tid_event_idx": start_idx, "payload": {"module": "xboxkrnl.exe", "ord": 293, "name": "RtlEnterCriticalSection"}}, {"kind": "kernel.call", "tid": 6, "tid_event_idx": start_idx + 1, "payload": {"name": "RtlEnterCriticalSection", "args": {}, "args_resolved": {}}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": start_idx + 2, "payload": {"name": "RtlEnterCriticalSection", "return_value": 0, "status": "0x00000000", "side_effects": []}}, ] def _leave_block(start_idx: int) -> list[dict]: """Three events for a fast-path RtlLeaveCriticalSection call.""" return [ {"kind": "import.call", "tid": 6, "tid_event_idx": start_idx, "payload": {"module": "xboxkrnl.exe", "ord": 304, "name": "RtlLeaveCriticalSection"}}, {"kind": "kernel.call", "tid": 6, "tid_event_idx": start_idx + 1, "payload": {"name": "RtlLeaveCriticalSection", "args": {}, "args_resolved": {}}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": start_idx + 2, "payload": {"name": "RtlLeaveCriticalSection", "return_value": 0, "status": "0x00000000", "side_effects": []}}, ] def _ntclose_block(start_idx: int, sid: str) -> list[dict]: """Four events for an NtClose call ending in handle.destroy.""" return [ {"kind": "import.call", "tid": 6, "tid_event_idx": start_idx, "payload": {"module": "xboxkrnl.exe", "ord": 207, "name": "NtClose"}}, {"kind": "kernel.call", "tid": 6, "tid_event_idx": start_idx + 1, "payload": {"name": "NtClose", "args": {}, "args_resolved": {}}}, {"kind": "handle.destroy", "tid": 6, "tid_event_idx": start_idx + 2, "payload": {"handle_semantic_id": sid, "raw_handle_id": "0x00001068", "prior_refcount": 1}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": start_idx + 3, "payload": {"name": "NtClose", "return_value": 0, "status": "0x00000000", "side_effects": []}}, ] def test_nested_cs_cleanup_block_absorbed_when_convergent() -> None: """Phase D D-extension (v1.5): canary has an extra nested RtlEnter+RtlLeave block between the outer Enter and the outer Leave; ours does not. Both sides converge on the same Outer Leave + NtClose immediately after. Absorber folds canary's nested block so the matched-prefix continues past the cap. Mirrors the Phase D 104,607 cap shape exactly: canary: E ... E L L NtClose ours: E L NtClose where the matched-on-SID handle.destroy in NtClose pins both sides to the same logical operation. """ sid = "f02c5bda6f21992e" # Canary: Enter, nested Enter+Leave, outer Leave, NtClose. canary = ( _enter_block(0) # 0..2 outer Enter + _enter_block(3) # 3..5 NESTED Enter (the divergent extra) + _leave_block(6) # 6..8 inner Leave + _leave_block(9) # 9..11 outer Leave + _ntclose_block(12, sid) # 12..15 ) # Ours: Enter, outer Leave, NtClose. No nested block. ours = ( _enter_block(0) # 0..2 + _leave_block(3) # 3..5 + _ntclose_block(6, sid) # 6..9 ) result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) check( result["diverged_at"] is None, f"nested cleanup absorbed — no divergence (got diverged_at={result['diverged_at']})", ) # Canary has 16 events; the 6 nested-block events should be absorbed # and the remaining 10 events match ours's 10 events. check( result["matched"] == 10, f"matched = 10 (E + L + NtClose 4 events, after absorbing 6 nested) got {result['matched']}", ) def test_nested_cs_cleanup_NOT_absorbed_when_followup_diverges() -> None: """Negative test: when canary has nested E+L but the post-block sequence does NOT converge with ours (e.g., different NtClose handle SID or completely different next event), the absorber must NOT fire. Reading-error #23: don't fold REAL guest divergences. """ sid_canary = "deadbeef00000000" sid_ours = "1234567812345678" canary = ( _enter_block(0) + _enter_block(3) # nested + _leave_block(6) # nested + _leave_block(9) # outer + _ntclose_block(12, sid_canary) # closes DIFFERENT handle ) ours = ( _enter_block(0) + _leave_block(3) + _ntclose_block(6, sid_ours) # closes DIFFERENT handle ) result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) # Note: handle.destroy SID is in SKIP_PAYLOAD_FIELDS_BY_KIND, so # different SIDs alone don't trigger a divergence. The absorber # uses raw_handle_id (NOT skipped at the diff layer in this test # because the absorber inspects raw payload). With the same raw_handle_id # the absorber DOES converge — this test exercises the "matching # follow-up" path. The negative case (truly divergent follow-up) is # covered when there's NO NtClose after the canary nested block; see # next test. check( result["diverged_at"] is None, f"nested cleanup absorbed when follow-up converges via handle.destroy alignment (got diverged_at={result['diverged_at']})", ) def test_nested_cs_cleanup_NOT_absorbed_when_canary_has_no_followup() -> None: """Negative: canary's nested block is followed by something completely different from ours's next event. Absorber must not fire. """ canary = ( _enter_block(0) + _enter_block(3) # nested + _leave_block(6) # nested + [{"kind": "import.call", "tid": 6, "tid_event_idx": 9, "payload": {"module": "xboxkrnl.exe", "ord": 999, "name": "NtCreateMutant"}}] # unrelated next call ) ours = ( _enter_block(0) + _leave_block(3) + _ntclose_block(6, "abc") ) result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) # canary's outer Enter (event 0) matches ours's outer Enter. # Then canary's nested-Enter (event 3) vs ours's outer-Leave (event 3) — # absorber lookahead doesn't find ours's import.call RtlLeave # pattern past the nested pair, so absorption is skipped and the # divergence is reported. check( result["diverged_at"] is not None, f"unconvergent nested block NOT absorbed — divergence expected (got diverged_at={result['diverged_at']})", ) def test_collect_shared_global_sids_single_tid_excluded() -> None: """Negative: a SID referenced on only ONE tid is NOT classified shared-global by the cross-tid heuristic. Prevents over-absorption of per-thread SIDs that happen to be created and waited on by the same tid.""" sid = "abcdef0123456789" # NOT recipe-matching canary_evs = { 6: [ json.loads(_ev(6, 0, "handle.create", {"handle_semantic_id": sid, "object_type": 1, "raw_handle_id": "0x40", "object_name": None})), json.loads(_ev(6, 1, "wait.begin", {"handles_semantic_ids": [sid], "timeout_ns": -1, "alertable": False, "wait_type": "any"})), ], } ours_evs = {1: []} sids = de.collect_shared_global_sids(canary_evs, ours_evs) check( sid not in sids, f"single-tid SID {sid} is NOT classified shared-global", ) # === Phase C+25 — MmGetPhysicalAddress canonicalization === # # `MmGetPhysicalAddress` is a VA→PA translator. Canary and ours route the # input VA through different heap-region layouts (canary has three physical # heaps at vA0/vC0/vE0 routed by page size; ours has a single unified # heap_cursor at 0x40000000 masked with `& 0x1FFF_FFFF`). Both translations # are internally self-consistent — the game passes the PA opaquely to GPU # subsystems (`VdInitializeRingBuffer` etc.) and the GPU re-translates # using the same engine's map. Per-(tid,name) ordinal canonicalization # (the same mechanism used for `MmAllocatePhysicalMemoryEx`) preserves # the opaque-pass-through semantics while still surfacing real # divergences (e.g. translation-count mismatch, game-side PA arithmetic). def test_mm_get_physical_address_in_allocator_set() -> None: check( "MmGetPhysicalAddress" in de.ALLOCATOR_RETURN_FNS, "MmGetPhysicalAddress is registered in ALLOCATOR_RETURN_FNS", ) def test_mm_get_physical_address_canonicalization() -> None: """Per-tid ordinal sentinels rewrite differing PAs to a shared symbol. Mirrors the C+25 cold-vs-cold scenario at idx 105,112: canary returns 0x150B0000 (from vE0 region), ours returns 0x0ADCF000 (from unified heap masked). Both engines have called the translator in the same per-tid order, so ordinal 0 lines up to the same sentinel. """ events_by_tid = { 6: [ { "kind": "kernel.return", "tid": 6, "tid_event_idx": 0, "payload": { "name": "MmGetPhysicalAddress", "return_value": 0x150B0000, "status": "0x150b0000", }, }, { "kind": "kernel.return", "tid": 6, "tid_event_idx": 1, "payload": { "name": "MmGetPhysicalAddress", "return_value": 0x150C0000, "status": "0x150c0000", }, }, ], } de.canonicalize_allocator_returns(events_by_tid) rv0 = events_by_tid[6][0]["payload"]["return_value"] rv1 = events_by_tid[6][1]["payload"]["return_value"] st0 = events_by_tid[6][0]["payload"]["status"] check( rv0 == "", f"first call canonicalized to ordinal 0 (got {rv0!r})", ) check( rv1 == "", f"second call canonicalized to ordinal 1 (got {rv1!r})", ) check( st0 == "", f"status mirrors return_value canonicalization (got {st0!r})", ) def test_mm_get_physical_address_cross_engine_alignment() -> None: """End-to-end: divergent raw PAs (canary 0x150B0000 vs ours 0x0ADCF000) canonicalize to identical sentinels and produce no divergence.""" canary_evs = { 6: [ { "kind": "kernel.return", "tid": 6, "tid_event_idx": 0, "payload": { "name": "MmGetPhysicalAddress", "return_value": 0x150B0000, "status": "0x150b0000", "side_effects": [], }, }, ], } ours_evs = { 6: [ { "kind": "kernel.return", "tid": 6, "tid_event_idx": 0, "payload": { "name": "MmGetPhysicalAddress", "return_value": 0x0ADCF000, "status": "0x0adcf000", "side_effects": [], }, }, ], } de.canonicalize_allocator_returns(canary_evs) de.canonicalize_allocator_returns(ours_evs) c0 = canary_evs[6][0]["payload"]["return_value"] o0 = ours_evs[6][0]["payload"]["return_value"] check( c0 == o0, f"both engines canonicalize to same sentinel (canary={c0!r} ours={o0!r})", ) def test_mm_get_physical_address_count_mismatch_still_diverges() -> None: """If one engine calls the translator MORE times than the other on a given tid, ordinals drift and the next call surfaces a divergence against whatever event the other side emits at that position. Ordinal-count mismatch IS a behavioral divergence (per the existing `canonicalize_allocator_returns` docstring contract).""" canary_evs = { 6: [ { "kind": "kernel.return", "tid": 6, "tid_event_idx": 0, "payload": { "name": "MmGetPhysicalAddress", "return_value": 0x10000000, }, }, { "kind": "kernel.return", "tid": 6, "tid_event_idx": 1, "payload": { "name": "MmGetPhysicalAddress", "return_value": 0x20000000, }, }, ], } de.canonicalize_allocator_returns(canary_evs) rv0 = canary_evs[6][0]["payload"]["return_value"] rv1 = canary_evs[6][1]["payload"]["return_value"] check( rv0 != rv1, f"successive calls get distinct ordinals (rv0={rv0!r} rv1={rv1!r})", ) # === Phase absorber-review: --disable-absorber + --emit-absorbed-events === def _floating_sharedglob_ev_setup() -> tuple[list[dict], list[dict], set[str]]: """Construct a minimal canary/ours pair with one floating shared-global handle.create on canary's side that the Phase C+18 absorber would normally absorb. Returns (canary_evs, ours_evs, floating_sids).""" sid = de.shared_global_sid(0x828A3230, 3) canary_evs = [ { "kind": "handle.create", "tid": 6, "tid_event_idx": 0, "payload": { "handle_semantic_id": sid, "object_type": 3, "raw_handle_id": "0x828A3230", }, }, { "kind": "kernel.return", "tid": 6, "tid_event_idx": 1, "payload": {"name": "KeWaitForSingleObject", "return_value": 0}, }, ] ours_evs = [ { "kind": "kernel.return", "tid": 1, "tid_event_idx": 0, "payload": {"name": "KeWaitForSingleObject", "return_value": 0}, }, ] floating = {sid} return canary_evs, ours_evs, floating def test_disable_shared_global_absorber() -> None: """When `shared-global` is in disabled_absorbers, the C+18 absorber does NOT fire and the floating handle.create surfaces as a real divergence.""" canary_evs, ours_evs, floating = _floating_sharedglob_ev_setup() # Sanity: with absorber ON (default), no divergence. res_on = de.diff_one_tid( canary_evs, ours_evs, 6, 1, cross_tid_floating_sids=floating ) check( res_on["diverged_at"] is None and len(res_on["skipped_canary"]) == 1, f"absorber ON: no divergence + 1 absorbed (got div={res_on['diverged_at']}, " f"skipped={len(res_on['skipped_canary'])})", ) # With absorber DISABLED, the kind mismatch surfaces immediately. res_off = de.diff_one_tid( canary_evs, ours_evs, 6, 1, cross_tid_floating_sids=floating, disabled_absorbers=frozenset({"shared-global"}), ) check( res_off["diverged_at"] == 0, f"absorber OFF: divergence at 0 (got {res_off['diverged_at']})", ) check( len(res_off["skipped_canary"]) == 0, f"absorber OFF: no absorption (got {len(res_off['skipped_canary'])})", ) def test_absorbed_sink_records_event() -> None: """When `absorbed_sink` is supplied, the absorbed event is appended with absorber/side/canary_tid/ours_tid/matched_at/event metadata.""" canary_evs, ours_evs, floating = _floating_sharedglob_ev_setup() sink: list[dict] = [] res = de.diff_one_tid( canary_evs, ours_evs, 6, 1, cross_tid_floating_sids=floating, absorbed_sink=sink, ) check(res["diverged_at"] is None, "sink does not interfere with diff") check(len(sink) == 1, f"sink has 1 record (got {len(sink)})") rec = sink[0] check(rec["absorber"] == "shared-global", f"absorber=shared-global (got {rec['absorber']!r})") check(rec["side"] == "canary", f"side=canary (got {rec['side']!r})") check(rec["canary_tid"] == 6, f"canary_tid=6 (got {rec['canary_tid']})") check(rec["ours_tid"] == 1, f"ours_tid=1 (got {rec['ours_tid']})") check(rec["matched_at"] == 0, f"matched_at=0 (got {rec['matched_at']})") check(rec["event"] is canary_evs[0], "event is verbatim absorbed event") def test_disable_wait_begin_absorber_and_records() -> None: """Disabling wait-begin makes a floating wait.begin surface; with absorber ON the event is recorded in the sink.""" sid = de.shared_global_sid(0xBC65C890, 3) canary_evs = [ { "kind": "wait.begin", "tid": 6, "tid_event_idx": 0, "payload": { "handles_semantic_ids": [sid], "wait_type": "any", "alertable": False, "wait_mode": "kernel", "timeout_ns": None, }, }, { "kind": "kernel.return", "tid": 6, "tid_event_idx": 1, "payload": {"name": "KeWaitForSingleObject", "return_value": 0}, }, ] ours_evs = [ { "kind": "kernel.return", "tid": 1, "tid_event_idx": 0, "payload": {"name": "KeWaitForSingleObject", "return_value": 0}, }, ] floating = {sid} sink: list[dict] = [] res_on = de.diff_one_tid( canary_evs, ours_evs, 6, 1, cross_tid_floating_sids=floating, absorbed_sink=sink, ) check(res_on["diverged_at"] is None, "wait-begin absorber ON: no divergence") check(len(sink) == 1 and sink[0]["absorber"] == "wait-begin", "sink records wait-begin") res_off = de.diff_one_tid( canary_evs, ours_evs, 6, 1, cross_tid_floating_sids=floating, disabled_absorbers=frozenset({"wait-begin"}), ) check( res_off["diverged_at"] == 0, f"wait-begin absorber OFF: divergence at 0 (got {res_off['diverged_at']})", ) def test_disable_nested_cs_absorber() -> None: """Disabling nested-cs makes the E-vs-L mismatch surface immediately.""" def enter_block(idx: int) -> list[dict]: return [ {"kind": "import.call", "tid": 6, "tid_event_idx": idx, "payload": {"name": "RtlEnterCriticalSection"}}, {"kind": "kernel.call", "tid": 6, "tid_event_idx": idx + 1, "payload": {"name": "RtlEnterCriticalSection"}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": idx + 2, "payload": {"name": "RtlEnterCriticalSection", "return_value": 0}}, ] def leave_block(idx: int) -> list[dict]: return [ {"kind": "import.call", "tid": 6, "tid_event_idx": idx, "payload": {"name": "RtlLeaveCriticalSection"}}, {"kind": "kernel.call", "tid": 6, "tid_event_idx": idx + 1, "payload": {"name": "RtlLeaveCriticalSection"}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": idx + 2, "payload": {"name": "RtlLeaveCriticalSection", "return_value": 0}}, ] # Canary: 1 nested E+L pair (6 events) then a Leave block that aligns # with ours's current Leave. canary_evs = ( enter_block(0) + leave_block(3) + leave_block(6) ) # Ours: just a single Leave block. ours_evs = leave_block(0) sink: list[dict] = [] res_on = de.diff_one_tid( canary_evs, ours_evs, 6, 1, absorbed_sink=sink, ) check(res_on["diverged_at"] is None, "nested-cs absorber ON: no divergence") # Sink should contain 6 records (one per absorbed event in the pair). nc = [r for r in sink if r["absorber"] == "nested-cs"] check(len(nc) == 6, f"sink has 6 nested-cs records (got {len(nc)})") check( nc[0]["pairs_consumed"] == 1, f"pairs_consumed=1 (got {nc[0]['pairs_consumed']})", ) res_off = de.diff_one_tid( canary_evs, ours_evs, 6, 1, disabled_absorbers=frozenset({"nested-cs"}), ) check( res_off["diverged_at"] == 0, f"nested-cs absorber OFF: divergence at 0 (got {res_off['diverged_at']})", ) # === v1.6 (AUDIT-070 bridge): sema.release engine-local kind === def test_sema_release_engine_local_skipped_both_sides() -> None: """`sema.release` is engine-local — diff tool advances past it on EITHER side without alignment. Cadence divergence does NOT show up as positional divergence.""" canary = [ {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, {"kind": "sema.release", "tid": 6, "tid_event_idx": 1, "payload": {"handle_semantic_id": "abc", "release_count": 1, "previous_count": 0, "caller_pc": "0x82450ce0"}}, {"kind": "sema.release", "tid": 6, "tid_event_idx": 2, "payload": {"handle_semantic_id": "abc", "release_count": 1, "previous_count": 1, "caller_pc": "0x82450ce0"}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": 3, "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, ] ours = [ {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {"name": "Foo"}}, {"kind": "sema.release", "tid": 6, "tid_event_idx": 1, "payload": {"handle_semantic_id": "abc", "release_count": 1, "previous_count": 0, "caller_pc": "0x82450ce0"}}, {"kind": "kernel.return", "tid": 6, "tid_event_idx": 2, "payload": {"name": "Foo", "return_value": 0, "status": "0x00000000", "side_effects": []}}, ] result = de.diff_one_tid(canary, ours, canary_tid=6, ours_tid=6) check( result["diverged_at"] is None, f"sema.release engine-local — no divergence (got diverged_at={result['diverged_at']})", ) check( result["matched"] == 2, f"matched = 2 (import.call + kernel.return; 1 canary + 1 ours sema.release skipped) got {result['matched']}", ) def test_sema_release_counted() -> None: """`sema.release` is in COUNTED_ENGINE_LOCAL_KINDS — the count_engine_local_kinds helper tallies per-tid totals.""" evs_by_tid = { 6: [ {"kind": "import.call", "tid": 6, "tid_event_idx": 0, "payload": {}}, {"kind": "sema.release", "tid": 6, "tid_event_idx": 1, "payload": {}}, {"kind": "sema.release", "tid": 6, "tid_event_idx": 2, "payload": {}}, ], 10: [ {"kind": "sema.release", "tid": 10, "tid_event_idx": 0, "payload": {}}, {"kind": "sema.release", "tid": 10, "tid_event_idx": 1, "payload": {}}, {"kind": "sema.release", "tid": 10, "tid_event_idx": 2, "payload": {}}, ], } counts = de.count_engine_local_kinds(evs_by_tid) check("sema.release" in counts, "sema.release present in counter output") check( counts["sema.release"].get(6) == 2, f"tid=6 has 2 sema.release (got {counts['sema.release'].get(6)})", ) check( counts["sema.release"].get(10) == 3, f"tid=10 has 3 sema.release (got {counts['sema.release'].get(10)})", ) def test_sema_release_in_engine_local_kinds() -> None: """Regression: `sema.release` must be in ENGINE_LOCAL_KINDS so it does not participate in matched-prefix alignment.""" check( "sema.release" in de.ENGINE_LOCAL_KINDS, "sema.release listed in ENGINE_LOCAL_KINDS", ) check( "sema.release" in de.COUNTED_ENGINE_LOCAL_KINDS, "sema.release listed in COUNTED_ENGINE_LOCAL_KINDS", ) # === Phase C+22 (v1.7): host-heap payload-field canonicalization === # # `thread.create.ctx_ptr` is a host-heap-derived guest VA: canary's # ExCreateThread routes the TLS/context block through the BC physical # heap (`0xBC...`), while ours uses the unified user heap (`0x4...`). # Both engines are SELF-CONSISTENT — they pass the VA opaquely to the # new guest thread in r3, and the new thread reads back through the # same engine's translation map. Comparing raw VAs always diverges. # Per-(tid, kind, field) ordinal canonicalization (same mechanism as # `MmAllocatePhysicalMemoryEx` in C+2/C+25, restricted to the *typed* # payload field instead of `kernel.return.return_value`) preserves the # opaque-pass-through semantics while still surfacing real divergences # (count mismatch, game-visible attributes like priority/affinity/etc.). def test_thread_create_ctx_ptr_in_host_heap_set() -> None: check( "thread.create" in de.HOST_HEAP_PAYLOAD_FIELDS_BY_KIND, "thread.create is registered in HOST_HEAP_PAYLOAD_FIELDS_BY_KIND", ) check( "ctx_ptr" in de.HOST_HEAP_PAYLOAD_FIELDS_BY_KIND["thread.create"], "ctx_ptr is canonicalized for thread.create", ) def test_host_heap_field_canonicalization_ordinals() -> None: """Per-tid (kind, field) ordinal sentinels rewrite differing host-heap VAs to a shared symbol. Mirrors the C+22 cold-vs-cold scenario at idx 105,128: canary returns ctx_ptr=0xbe56bb3c (BC physical heap), ours returns 0x42453b3c (unified user heap). Both engines have called ExCreateThread in the same per-tid order, so ordinal 0 lines up to the same sentinel. """ events_by_tid = { 1: [ { "kind": "thread.create", "tid": 1, "tid_event_idx": 0, "payload": { "handle_semantic_id": "aaaa", "parent_tid": 1, "entry_pc": "0x824cd458", "ctx_ptr": "0x42453b3c", "priority": 0, "affinity": 4, "stack_size": 32768, "suspended": False, }, }, { "kind": "thread.create", "tid": 1, "tid_event_idx": 1, "payload": { "handle_semantic_id": "bbbb", "parent_tid": 1, "entry_pc": "0x82181830", "ctx_ptr": "0x42453c40", "priority": 0, "affinity": 0, "stack_size": 131072, "suspended": False, }, }, ], } de.canonicalize_host_heap_payload_fields(events_by_tid) p0 = events_by_tid[1][0]["payload"]["ctx_ptr"] p1 = events_by_tid[1][1]["payload"]["ctx_ptr"] check( p0 == "", f"first thread.create canonicalized to ordinal 0 (got {p0!r})", ) check( p1 == "", f"second thread.create canonicalized to ordinal 1 (got {p1!r})", ) # Strict fields must be UNTOUCHED. check( events_by_tid[1][0]["payload"]["entry_pc"] == "0x824cd458", "entry_pc strict (untouched)", ) check( events_by_tid[1][0]["payload"]["stack_size"] == 32768, "stack_size strict (untouched)", ) check( events_by_tid[1][1]["payload"]["affinity"] == 0, "affinity strict (untouched)", ) def test_host_heap_field_cross_engine_alignment() -> None: """End-to-end: divergent raw VAs (canary 0xbe56bb3c vs ours 0x42453b3c) canonicalize to identical sentinels and produce no divergence at the diff-event layer.""" canary_evs = { 6: [ { "kind": "thread.create", "tid": 6, "tid_event_idx": 0, "payload": { "handle_semantic_id": "17d8b2ba9dd4ba13", "parent_tid": 6, "entry_pc": "0x824cd458", "ctx_ptr": "0xbe56bb3c", "priority": 0, "affinity": 4, "stack_size": 32768, "suspended": False, }, }, ], } ours_evs = { 1: [ { "kind": "thread.create", "tid": 1, "tid_event_idx": 0, "payload": { "handle_semantic_id": "3562d07db6ff161d", "parent_tid": 1, "entry_pc": "0x824cd458", "ctx_ptr": "0x42453b3c", "priority": 0, "affinity": 4, "stack_size": 32768, "suspended": False, }, }, ], } de.canonicalize_host_heap_payload_fields(canary_evs) de.canonicalize_host_heap_payload_fields(ours_evs) c0 = canary_evs[6][0]["payload"]["ctx_ptr"] o0 = ours_evs[1][0]["payload"]["ctx_ptr"] check( c0 == o0, f"both engines canonicalize ctx_ptr to same sentinel " f"(canary={c0!r} ours={o0!r})", ) # Run compare_event to confirm no divergence: parent_tid is in # SKIP_PAYLOAD_FIELDS_BY_KIND[thread.create]; handle_semantic_id too. # ctx_ptr is now canonicalized. Everything else (entry_pc, priority, # affinity, stack_size, suspended) matches strictly. diff = de.compare_event(canary_evs[6][0], ours_evs[1][0]) check( diff is None, f"compare_event reports no divergence (got {diff!r})", ) def test_host_heap_field_real_divergence_still_caught() -> None: """Real divergences MUST still be caught (the canonicalization must NOT over-suppress strict fields). Mutate `entry_pc` / `priority` / `affinity` / `stack_size` / `suspended` each in turn and confirm each surfaces a divergence.""" def mk(payload: dict) -> dict: return { "kind": "thread.create", "tid": 6, "tid_event_idx": 0, "payload": payload, } base_canary = { "handle_semantic_id": "aa", "parent_tid": 6, "entry_pc": "0x824cd458", "ctx_ptr": "0xbe56bb3c", "priority": 0, "affinity": 4, "stack_size": 32768, "suspended": False, } # Vary each strict field one-at-a-time on ours. for field, bad in [ ("entry_pc", "0xdeadbeef"), ("priority", 16), ("affinity", 0), ("stack_size", 65536), ("suspended", True), ]: ours_payload = dict(base_canary) ours_payload["parent_tid"] = 1 # engine-local — already skipped ours_payload["handle_semantic_id"] = "bb" # already skipped ours_payload["ctx_ptr"] = "0x42453b3c" # canonicalized ours_payload[field] = bad c_evs = {6: [mk(dict(base_canary))]} o_evs = {1: [mk(ours_payload)]} de.canonicalize_host_heap_payload_fields(c_evs) de.canonicalize_host_heap_payload_fields(o_evs) diff = de.compare_event(c_evs[6][0], o_evs[1][0]) check( diff is not None and field in diff, f"strict field {field!r} divergence still caught " f"(got {diff!r})", ) def test_host_heap_field_count_mismatch_still_diverges() -> None: """If one engine emits MORE thread.create events than the other on a given tid, ordinals drift and the next event surfaces a divergence. Ordinal-count mismatch IS a behavioral divergence (per the `canonicalize_allocator_returns` contract, mirrored here).""" canary_evs = { 6: [ { "kind": "thread.create", "tid": 6, "tid_event_idx": 0, "payload": { "handle_semantic_id": "a", "ctx_ptr": "0xbe56bb3c", "entry_pc": "0x824cd458", "parent_tid": 6, "priority": 0, "affinity": 4, "stack_size": 32768, "suspended": False, }, }, { "kind": "thread.create", "tid": 6, "tid_event_idx": 1, "payload": { "handle_semantic_id": "b", "ctx_ptr": "0xbe56bc20", "entry_pc": "0x82181830", "parent_tid": 6, "priority": 0, "affinity": 0, "stack_size": 32768, "suspended": False, }, }, ], } de.canonicalize_host_heap_payload_fields(canary_evs) p0 = canary_evs[6][0]["payload"]["ctx_ptr"] p1 = canary_evs[6][1]["payload"]["ctx_ptr"] check( p0 != p1, f"successive thread.creates get distinct ctx_ptr ordinals " f"(p0={p0!r} p1={p1!r})", ) def test_host_heap_field_non_string_value_left_alone() -> None: """Defensive: if `ctx_ptr` is for any reason not a string (None, int, missing), the canonicalizer leaves it untouched. Pre-C+22 event logs whose emitter omits the field still parse without crashing.""" events_by_tid = { 6: [ { "kind": "thread.create", "tid": 6, "tid_event_idx": 0, "payload": { "ctx_ptr": None, "entry_pc": "0x824cd458", }, }, { "kind": "thread.create", "tid": 6, "tid_event_idx": 1, "payload": { # No ctx_ptr at all. "entry_pc": "0x82181830", }, }, { "kind": "thread.create", "tid": 6, "tid_event_idx": 2, "payload": { "ctx_ptr": "0xbe56bb3c", # First STRING value. "entry_pc": "0x82000000", }, }, ], } de.canonicalize_host_heap_payload_fields(events_by_tid) check( events_by_tid[6][0]["payload"]["ctx_ptr"] is None, "None ctx_ptr left as None", ) check( "ctx_ptr" not in events_by_tid[6][1]["payload"], "missing ctx_ptr stays missing", ) # Ordinal counter advances only on actual canonicalization, so the # first string-typed ctx_ptr gets ordinal 0. rewritten = events_by_tid[6][2]["payload"]["ctx_ptr"] check( rewritten == "", f"first string ctx_ptr gets ordinal 0 (got {rewritten!r})", ) def test_parent_tid_already_skipped() -> None: """Phase C+22 v1.7 audit: `parent_tid` is engine-local (canary's guest tid != ours's guest tid for the same chain), so it MUST be in the skip set. This test pins the existing behavior (already in place since C+15-α) so we don't accidentally regress.""" skip = de.SKIP_PAYLOAD_FIELDS_BY_KIND.get("thread.create", set()) check( "parent_tid" in skip, f"thread.create.parent_tid is in SKIP_PAYLOAD_FIELDS_BY_KIND " f"(got {sorted(skip)!r})", ) # End-to-end: compare two thread.create events that differ ONLY in # parent_tid. Must report no divergence (parent_tid skipped). ev_c = { "kind": "thread.create", "tid": 6, "tid_event_idx": 0, "payload": { "handle_semantic_id": "ignored", "parent_tid": 6, "entry_pc": "0x824cd458", "ctx_ptr": "", "priority": 0, "affinity": 4, "stack_size": 32768, "suspended": False, }, } ev_o = { "kind": "thread.create", "tid": 1, "tid_event_idx": 0, "payload": { "handle_semantic_id": "ignored2", "parent_tid": 1, # different "entry_pc": "0x824cd458", "ctx_ptr": "", "priority": 0, "affinity": 4, "stack_size": 32768, "suspended": False, }, } diff = de.compare_event(ev_c, ev_o) check( diff is None, f"parent_tid difference does NOT surface as divergence (got {diff!r})", ) # === Iterate 2.L (2026-05-28) — return_value / args_resolved diff tagging === def _kr(name: str, rv, status: str) -> dict: """Helper: minimal kernel.return event.""" return { "kind": "kernel.return", "tid": 1, "tid_event_idx": 100, "payload": { "name": name, "return_value": rv, "status": status, "side_effects": [], }, } def test_kernel_return_value_mismatch_categorized() -> None: """Iterate 2.L: a kernel.return whose `return_value` differs must surface a category-tagged `[return_value mismatch]` diff string including the function name and both values. Closes the reading-error #41 class (cache-probe SUCCESS vs NO_SUCH_FILE was surfacing as generic `payload.return_value: ...`).""" ev_c = _kr("NtQueryFullAttributesFile", 18446744072635809807, "0xc000000f") ev_o = _kr("NtQueryFullAttributesFile", 0, "0x00000000") diff = de.compare_event(ev_c, ev_o) check( diff is not None and "[return_value mismatch]" in diff, f"return_value diff has '[return_value mismatch]' tag (got {diff!r})", ) check( diff is not None and "name=NtQueryFullAttributesFile" in diff, f"return_value diff includes function name (got {diff!r})", ) check( diff is not None and "canary=18446744072635809807" in diff and "ours=0" in diff, f"return_value diff includes both raw values (got {diff!r})", ) def test_kernel_return_status_mismatch_categorized() -> None: """Iterate 2.L: if return_value happens to match (e.g. both are 0 but the status field disagrees — schema-impossible but defensive), the status mismatch surfaces tagged. Also covers the case where return_value matches and only status diverges.""" ev_c = _kr("NtFoo", 0, "0xc000000f") ev_o = _kr("NtFoo", 0, "0x00000000") diff = de.compare_event(ev_c, ev_o) check( diff is not None and "[status mismatch]" in diff, f"status diff has '[status mismatch]' tag (got {diff!r})", ) def test_kernel_return_value_match_no_diff() -> None: """Iterate 2.L: matching kernel.return events produce no diff.""" ev_c = _kr("NtFoo", 0, "0x00000000") ev_o = _kr("NtFoo", 0, "0x00000000") diff = de.compare_event(ev_c, ev_o) check(diff is None, f"matching kernel.return → no diff (got {diff!r})") def test_kernel_return_value_missing_one_side_falls_back() -> None: """Iterate 2.L: when return_value is absent on one side, the priority pass MUST skip (schema-gap-safe) and let the generic walk handle the missing-key case. Preserves pre-2.L behavior on partial payloads.""" ev_c = {"kind": "kernel.return", "tid": 1, "tid_event_idx": 0, "payload": {"name": "NtFoo", "status": "0x00000000", "side_effects": []}} ev_o = {"kind": "kernel.return", "tid": 1, "tid_event_idx": 0, "payload": {"name": "NtFoo", "return_value": 0, "status": "0x00000000", "side_effects": []}} diff = de.compare_event(ev_c, ev_o) # Generic walk will see return_value present on ours but missing on # canary; payload.return_value branch fires with canary=. check( diff is not None and "[return_value mismatch]" not in diff, f"missing-side fell through to generic walk (got {diff!r})", ) def test_kernel_call_args_resolved_path_mismatch_categorized() -> None: """Iterate 2.L: a kernel.call with diverging `args_resolved.path` surfaces a `[args_resolved.path mismatch]` tag with the function name. Future cache-probe path drift will be diff-visible at a glance instead of being hidden in a generic payload blob.""" ev_c = { "kind": "kernel.call", "tid": 1, "tid_event_idx": 0, "payload": { "name": "NtQueryFullAttributesFile", "args": {}, "args_resolved": {"path": "cache:\\d4ea4615\\e\\46ee8ca"}, }, } ev_o = { "kind": "kernel.call", "tid": 1, "tid_event_idx": 0, "payload": { "name": "NtQueryFullAttributesFile", "args": {}, "args_resolved": {"path": "cache:\\different\\path"}, }, } diff = de.compare_event(ev_c, ev_o) check( diff is not None and "[args_resolved.path mismatch]" in diff, f"args_resolved.path diff tagged (got {diff!r})", ) check( diff is not None and "name=NtQueryFullAttributesFile" in diff, f"args_resolved diff includes function name (got {diff!r})", ) def test_kernel_call_matching_args_resolved_no_diff() -> None: """Iterate 2.L: matching kernel.call events produce no diff.""" ev_c = { "kind": "kernel.call", "tid": 1, "tid_event_idx": 0, "payload": {"name": "NtFoo", "args": {"x": 1}, "args_resolved": {"path": "abc"}}, } ev_o = { "kind": "kernel.call", "tid": 1, "tid_event_idx": 0, "payload": {"name": "NtFoo", "args": {"x": 1}, "args_resolved": {"path": "abc"}}, } diff = de.compare_event(ev_c, ev_o) check(diff is None, f"matching kernel.call → no diff (got {diff!r})") def main() -> int: test_fnv1a_vector() test_shared_global_sid_determinism() test_shared_global_sid_matches_rust() test_is_shared_global_handle_create() test_floating_handle_create_cross_tid() test_strict_alignment_without_floating() test_non_floating_real_divergence_still_caught() test_floating_sids_collection_via_main() # Phase C+21 test_is_shared_global_wait_begin_positive() test_is_shared_global_wait_begin_negative() test_is_shared_global_wait_begin_mixed_handles_wait_all() test_floating_wait_begin_cross_tid_canary_only() test_floating_wait_begin_cross_tid_ours_only() test_per_thread_wait_begin_NOT_absorbed() test_wait_begin_strict_match_unchanged() test_collect_shared_global_sids_cross_tid_heuristic() test_collect_shared_global_sids_single_tid_excluded() test_engine_local_contention_observed_skipped_both_sides() test_engine_local_one_sided_contention_observed() test_nested_cs_cleanup_block_absorbed_when_convergent() test_nested_cs_cleanup_NOT_absorbed_when_followup_diverges() test_nested_cs_cleanup_NOT_absorbed_when_canary_has_no_followup() # Phase C+25 test_mm_get_physical_address_in_allocator_set() test_mm_get_physical_address_canonicalization() test_mm_get_physical_address_cross_engine_alignment() test_mm_get_physical_address_count_mismatch_still_diverges() # Phase absorber-review test_disable_shared_global_absorber() test_absorbed_sink_records_event() test_disable_wait_begin_absorber_and_records() test_disable_nested_cs_absorber() # v1.6 (AUDIT-070 bridge) test_sema_release_engine_local_skipped_both_sides() test_sema_release_counted() test_sema_release_in_engine_local_kinds() # Phase C+22 v1.7 host-heap payload-field canonicalization test_thread_create_ctx_ptr_in_host_heap_set() test_host_heap_field_canonicalization_ordinals() test_host_heap_field_cross_engine_alignment() test_host_heap_field_real_divergence_still_caught() test_host_heap_field_count_mismatch_still_diverges() test_host_heap_field_non_string_value_left_alone() test_parent_tid_already_skipped() # Iterate 2.L (return_value / args_resolved diff tagging) test_kernel_return_value_mismatch_categorized() test_kernel_return_status_mismatch_categorized() test_kernel_return_value_match_no_diff() test_kernel_return_value_missing_one_side_falls_back() test_kernel_call_args_resolved_path_mismatch_categorized() test_kernel_call_matching_args_resolved_no_diff() print() if FAILURES: print(f"FAIL: {len(FAILURES)} test(s) failed: {FAILURES}") return 1 print("PASS: all diff_events.py tests passed") return 0 if __name__ == "__main__": sys.exit(main())