The silph title state machine (tid13) blocked on event 0x10a0, never signaled. Root: the event's producer chain runs on the silph worker (entry 0x821C4AD0, our tid14), which was starved. tid14 shares a HW slot with a guest spinlock/ barrier participant (sub_824D1328, entry 0x824D2940) that busy-spins on the db16cyc hint `or r31,r31,r31` (encoding 0x7FFFFB78) at 0x824D140C. Under our round-robin lockstep the spinner consumed its whole block every round and starved the co-located tid14 (only 9 progress hits over 200M instr) — so the producer never reached the event-create/duplicate/signal dance the canary oracle performs (handle F80000E8 set by the submitter F8000044 via a duplicated handle). Fix (canary-faithful): recognize the db16cyc spin hint exactly as canary's InstrEmit_orx does (code 0x7FFFFB78 -> DelayExecution) and surface it as a new StepResult::Yield. The scheduler's yield_current() promotes every Ready peer on the slot past STARVE_LIMIT so begin_slot_visit picks one next round, then they reset and the spinner reclaims the slot — fair alternation, no priority inversion, pure function of slot state (deterministic). Result (lockstep, cache-persist, -n 200M): tid14 progresses past its old stall into a real wait; tid13 advances off 0x10a0 to a new event; hub/submitter re-enter their wait loops. imports 280k->592k, packets 124M->164M, swaps 1->2. draws still 0 (the splash's first draw is a further-upstream gate). Determinism preserved (two cold n50m runs byte-identical). n50m golden re-baselined (imports 90296->339766, swaps 1->2; draws unchanged 0). n2m golden unchanged (db16cyc not reached in first 2M). Tests 670/670. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
176 lines
6.2 KiB
Python
176 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Round 35 lockstep diff: align sub_8280AD40 entry fires between
|
|
ours (--audit-pc-probe-hex AUDIT-PC-PROBE / AUDIT-R3-DUMP) and
|
|
canary (AUDIT-HLC JitProlog).
|
|
|
|
Outputs side-by-side rendering of:
|
|
- per-fire entry register snapshot (r3..r10, lr)
|
|
- 64-byte r3 dump (u32 lanes, big-endian)
|
|
Alignment is by tid + invocation order (no input-equivalence required).
|
|
"""
|
|
import re
|
|
import sys
|
|
import os
|
|
|
|
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
OURS_LOG = os.path.join(THIS_DIR, "ours.log")
|
|
CANARY_LOG = os.path.join(
|
|
os.path.dirname(THIS_DIR), "round35-lockstep-inflate-canary", "canary.log"
|
|
)
|
|
|
|
PC_TARGET = 0x8280AD40
|
|
|
|
|
|
def parse_ours(path):
|
|
"""Pair AUDIT-PC-PROBE lines with their following AUDIT-R3-DUMP lines."""
|
|
fires = []
|
|
cur = None
|
|
with open(path) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line.startswith("AUDIT-PC-PROBE"):
|
|
m = re.search(
|
|
r"pc=0x([0-9a-f]+) tid=(\d+) hw=\d+ cycle=(\d+) lr=0x([0-9a-f]+) r3=0x([0-9a-f]+) r11=0x([0-9a-f]+)",
|
|
line,
|
|
)
|
|
if not m:
|
|
continue
|
|
pc = int(m.group(1), 16)
|
|
if pc != PC_TARGET:
|
|
cur = None
|
|
continue
|
|
cur = {
|
|
"tid": int(m.group(2)),
|
|
"cycle": int(m.group(3)),
|
|
"lr": int(m.group(4), 16),
|
|
"r3": int(m.group(5), 16),
|
|
"dump": [],
|
|
}
|
|
fires.append(cur)
|
|
elif line.startswith("AUDIT-R3-DUMP") and cur is not None:
|
|
lanes = re.findall(r"\+0x[0-9a-f]+=0x([0-9a-f]+)", line)
|
|
cur["dump"] = [int(x, 16) for x in lanes]
|
|
cur = None
|
|
return fires
|
|
|
|
|
|
def parse_canary(path):
|
|
"""Pair AUDIT-HLC JitProlog header lines with following r3+NN dump lines."""
|
|
fires = []
|
|
cur = None
|
|
hdr_re = re.compile(
|
|
r"AUDIT-HLC JitProlog pc=8280AD40 tid=([0-9A-F]+) r3=([0-9A-F]+) r4=([0-9A-F]+) "
|
|
r"r5=([0-9A-F]+) r6=([0-9A-F]+) r7=([0-9A-F]+) r8=([0-9A-F]+) r9=([0-9A-F]+) r10=([0-9A-F]+) lr=([0-9A-F]+)"
|
|
)
|
|
dump_re = re.compile(
|
|
r"AUDIT-HLC JitProlog pc=8280AD40 r3\+([0-9A-F]+): ([0-9A-F]+) ([0-9A-F]+) ([0-9A-F]+) ([0-9A-F]+)"
|
|
)
|
|
with open(path) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
m = hdr_re.search(line)
|
|
if m:
|
|
cur = {
|
|
"tid": int(m.group(1), 16),
|
|
"r3": int(m.group(2), 16),
|
|
"r4": int(m.group(3), 16),
|
|
"r5": int(m.group(4), 16),
|
|
"r6": int(m.group(5), 16),
|
|
"r7": int(m.group(6), 16),
|
|
"r8": int(m.group(7), 16),
|
|
"r9": int(m.group(8), 16),
|
|
"r10": int(m.group(9), 16),
|
|
"lr": int(m.group(10), 16),
|
|
"dump": [],
|
|
}
|
|
fires.append(cur)
|
|
continue
|
|
m = dump_re.search(line)
|
|
if m and cur is not None:
|
|
off = int(m.group(1), 16)
|
|
for i in range(4):
|
|
word = int(m.group(2 + i), 16)
|
|
# extend dump to fit
|
|
idx = off // 4 + i
|
|
while len(cur["dump"]) <= idx:
|
|
cur["dump"].append(0)
|
|
cur["dump"][idx] = word
|
|
return fires
|
|
|
|
|
|
def fmt_dump(d):
|
|
return " ".join(f"{w:08x}" for w in d[:16])
|
|
|
|
|
|
def main():
|
|
ours = parse_ours(OURS_LOG)
|
|
canary = parse_canary(CANARY_LOG)
|
|
|
|
print(f"=== Fire counts ===")
|
|
print(f" ours: {len(ours)}")
|
|
print(f" canary: {len(canary)}")
|
|
print()
|
|
|
|
print(f"=== Per-LR breakdown ===")
|
|
for label, fires in (("ours", ours), ("canary", canary)):
|
|
lr_counts = {}
|
|
for f in fires:
|
|
lr_counts[f["lr"]] = lr_counts.get(f["lr"], 0) + 1
|
|
print(f" {label}:")
|
|
for lr, n in sorted(lr_counts.items()):
|
|
print(f" lr=0x{lr:08x}: {n}")
|
|
print()
|
|
|
|
print(f"=== Side-by-side first 5 fires (entry registers) ===")
|
|
n = max(len(ours), len(canary))
|
|
n = min(n, 5)
|
|
for i in range(n):
|
|
print(f"\n--- fire #{i} ---")
|
|
if i < len(ours):
|
|
f = ours[i]
|
|
print(
|
|
f" ours: tid={f['tid']:<3} cycle={f['cycle']:<10} lr=0x{f['lr']:08x} r3=0x{f['r3']:08x}"
|
|
)
|
|
print(f" dump: {fmt_dump(f['dump'])}")
|
|
else:
|
|
print(f" ours: <no fire>")
|
|
if i < len(canary):
|
|
f = canary[i]
|
|
print(
|
|
f" canary: tid={f['tid']:<3} cycle=<unk> lr=0x{f['lr']:08x} r3=0x{f['r3']:08x} "
|
|
f"r4=0x{f['r4']:08x} r5=0x{f['r5']:08x} r6=0x{f['r6']:08x} r7=0x{f['r7']:08x}"
|
|
)
|
|
print(f" dump: {fmt_dump(f['dump'])}")
|
|
else:
|
|
print(f" canary: <no fire>")
|
|
|
|
print()
|
|
print("=== Equivalence check: u32 lanes at +0x04 and +0x10 (work-item magic + counter) ===")
|
|
print(" Both fields are stable identifiers across engines (host VAs differ but data should match).")
|
|
print()
|
|
print(" Index of fields:")
|
|
print(" [+0x04] = work-item 'size?' (looks like a length field)")
|
|
print(" [+0x10] = state counter (per round 30, this is [+128/4 ?]) — but in dump it's u32[4]")
|
|
print()
|
|
# +0x04 is dump[1], +0x10 is dump[4]
|
|
ours_keys = [(f["dump"][1], f["dump"][4]) if len(f["dump"]) > 4 else None for f in ours]
|
|
canary_keys = [(f["dump"][1], f["dump"][4]) if len(f["dump"]) > 4 else None for f in canary]
|
|
print(f" ours [+04,+10]: {ours_keys}")
|
|
print(f" canary [+04,+10]: {canary_keys}")
|
|
print()
|
|
# Cross-match: every ours key should appear in canary (canary is a superset)
|
|
matched = []
|
|
unmatched_ours = []
|
|
for k in ours_keys:
|
|
if k in canary_keys:
|
|
matched.append(k)
|
|
else:
|
|
unmatched_ours.append(k)
|
|
print(f" ours fires whose [+04,+10] match a canary fire: {len(matched)}/{len(ours)}")
|
|
if unmatched_ours:
|
|
print(f" ours fires with NO canary match: {unmatched_ours}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|