chore: add migration/ bundle for cross-machine setup
Bundles state that lives OUTSIDE the xenia-rs repo so a fresh clone on
another machine can be brought up to identical configuration via
migration/setup.sh:
- claude-memory/ ~/.claude/projects/-home-fabi-RE-Project-Sylpheed/memory/
(103 files, 1.1 MB - MEMORY.md + every
project_xenia_rs_*.md from audits
addis_signext through audit-058)
- project-root/dot-claude/ <project-root>/.claude/settings.json
(Stop hook + permissions)
- project-root/ppc-manual/ <project-root>/ppc-manual/
(PowerPC reference docs, 397 files, 3.7 MB)
- project-root/run-canary.sh <project-root>/run-canary.sh
- README.md Human-readable setup checklist
- setup.sh Idempotent installer (also reclones
xenia-canary at pinned HEAD 6de80dffe)
- MANIFEST.md Per-file mapping + per-file-not-bundled
restoration recipe
Excluded from bundle (not shippable via git):
- Sylpheed ISO (7.8 GB; copyright; manual copy required)
- sylpheed.db (395 MB; regenerable from XEX via analysis tooling)
- target/ build artifacts (rebuild on target)
- audit-runs probe firehoses (.log/.stdout/.stderr ~11 GB; rerun if needed)
- audit-runs memory dumps (.bin ~4.5 GB; rerun audit-026/027/029 if needed)
- xenia-canary checkout (setup.sh reclones from
git.mc02.dev/fabi/Xenia-Canary.git at HEAD 6de80dffe)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
184
migration/project-root/ppc-manual/generator/rust_scraper.py
Normal file
184
migration/project-root/ppc-manual/generator/rust_scraper.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
Scrapes xenia-rs source files for per-instruction references and
|
||||
snippets of the interpreter semantics.
|
||||
|
||||
Outputs produced for each mnemonic:
|
||||
- opcode_line: line in crates/xenia-cpu/src/opcode.rs where the
|
||||
PpcOpcode variant is declared (1-indexed)
|
||||
- decoder_line: line in crates/xenia-cpu/src/decoder.rs where the
|
||||
variant is produced from raw bits
|
||||
- interp_start: line in crates/xenia-cpu/src/interpreter.rs where
|
||||
the match arm `PpcOpcode::<mnem> =>` begins
|
||||
- interp_end: line where the arm closes (matching brace, naive)
|
||||
- interp_body: raw text of the arm body (for reviewer reference)
|
||||
|
||||
The xenia-rs opcode identifier often has trailing `x` preserved
|
||||
(PpcOpcode::addx) — this scraper matches on the XML mnemonic directly
|
||||
plus a stripped alternative without trailing 'x' and the xenia-style
|
||||
identifier forms.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
|
||||
@dataclass
|
||||
class RustRef:
|
||||
mnem: str
|
||||
opcode_line: int | None = None
|
||||
decoder_line: int | None = None
|
||||
interp_start: int | None = None
|
||||
interp_end: int | None = None
|
||||
interp_body: str = ""
|
||||
|
||||
|
||||
# PpcOpcode identifiers in xenia-rs match the XML mnemonic *exactly* except
|
||||
# that '.' is illegal in Rust identifiers. Mnemonics ending in '.' appear as
|
||||
# a trailing 'x' replacement in some cases but the codebase seems to keep the
|
||||
# XML name verbatim (e.g. addic. → addicx OR addic_). Check the codebase.
|
||||
|
||||
|
||||
def _rust_ident(mnem: str) -> str:
|
||||
"""Convert XML mnemonic to the xenia-rs PpcOpcode variant name."""
|
||||
# Xenia-rs uses the same name as xenia-canary's opcode enum, which
|
||||
# mirrors ppc-instructions.xml directly. '.' is replaced with 'x' in
|
||||
# the opcode enum (e.g. 'addic.' → 'addicx'), but the XML entry is
|
||||
# already 'addic.'. We only need to handle that single case.
|
||||
return mnem.replace(".", "x")
|
||||
|
||||
|
||||
class RustScraper:
|
||||
def __init__(self, repo_root: Path):
|
||||
self.repo_root = repo_root
|
||||
self.cpu_root = repo_root / "xenia-rs" / "crates" / "xenia-cpu" / "src"
|
||||
self._opcode_lines = self._read_lines(self.cpu_root / "opcode.rs")
|
||||
self._decoder_lines = self._read_lines(self.cpu_root / "decoder.rs")
|
||||
self._interp_lines = self._read_lines(self.cpu_root / "interpreter.rs")
|
||||
self._opcode_index: dict[str, int] = self._index_opcode_enum()
|
||||
self._decoder_index: dict[str, int] = self._index_decoder()
|
||||
self._interp_index: dict[str, tuple[int, int]] = self._index_interpreter()
|
||||
|
||||
@staticmethod
|
||||
def _read_lines(path: Path) -> list[str]:
|
||||
if not path.is_file():
|
||||
return []
|
||||
return path.read_text(encoding="utf-8").splitlines()
|
||||
|
||||
def _index_opcode_enum(self) -> dict[str, int]:
|
||||
"""Map rust-identifier → 1-indexed line in opcode.rs. The enum uses
|
||||
comma-separated identifiers (often many per line) so we extract
|
||||
every identifier match inside the enum body."""
|
||||
idx: dict[str, int] = {}
|
||||
token = re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*)\b")
|
||||
in_enum = False
|
||||
for i, line in enumerate(self._opcode_lines, start=1):
|
||||
if "pub enum PpcOpcode" in line:
|
||||
in_enum = True
|
||||
continue
|
||||
if not in_enum:
|
||||
continue
|
||||
if line.startswith("}"):
|
||||
break
|
||||
stripped = line.strip()
|
||||
# skip blank / comment-only lines
|
||||
if not stripped or stripped.startswith("//"):
|
||||
continue
|
||||
# split off any trailing line comment
|
||||
code = stripped.split("//", 1)[0]
|
||||
for m in token.finditer(code):
|
||||
idx.setdefault(m.group(1), i)
|
||||
return idx
|
||||
|
||||
def _index_decoder(self) -> dict[str, int]:
|
||||
"""Map rust-identifier → 1-indexed line of its `PpcOpcode::<name>` producer."""
|
||||
idx: dict[str, int] = {}
|
||||
pat = re.compile(r"PpcOpcode::([A-Za-z_][A-Za-z0-9_]*)")
|
||||
for i, line in enumerate(self._decoder_lines, start=1):
|
||||
for m in pat.finditer(line):
|
||||
name = m.group(1)
|
||||
# keep the FIRST occurrence (the match-arm line where it's
|
||||
# produced, not any later references)
|
||||
idx.setdefault(name, i)
|
||||
return idx
|
||||
|
||||
def _index_interpreter(self) -> dict[str, tuple[int, int]]:
|
||||
"""Map rust-identifier → (start, end) lines of the match arm.
|
||||
|
||||
An arm starts at `PpcOpcode::<name>` and ends at the closing `}`
|
||||
at the same indentation level. We accept multi-variant arms of
|
||||
the form `PpcOpcode::a | PpcOpcode::b => {` by recording the same
|
||||
(start, end) for every named variant.
|
||||
"""
|
||||
arm_header = re.compile(r"^(\s*)((?:PpcOpcode::[A-Za-z_][A-Za-z0-9_]*\s*\|\s*)*PpcOpcode::[A-Za-z_][A-Za-z0-9_]*)\s*=>\s*\{?\s*$")
|
||||
# Some arms use no leading whitespace quirks — adjusted regex:
|
||||
arm_header = re.compile(
|
||||
r"^(\s*)" # indent
|
||||
r"((?:PpcOpcode::[A-Za-z_][A-Za-z0-9_]*" # first variant
|
||||
r"(?:\s*\|\s*PpcOpcode::[A-Za-z_][A-Za-z0-9_]*)*))" # more variants
|
||||
r"\s*=>\s*\{?\s*$"
|
||||
)
|
||||
var_re = re.compile(r"PpcOpcode::([A-Za-z_][A-Za-z0-9_]*)")
|
||||
idx: dict[str, tuple[int, int]] = {}
|
||||
i = 0
|
||||
n = len(self._interp_lines)
|
||||
while i < n:
|
||||
line = self._interp_lines[i]
|
||||
m = arm_header.match(line)
|
||||
if not m:
|
||||
i += 1
|
||||
continue
|
||||
indent = m.group(1)
|
||||
names = var_re.findall(m.group(2))
|
||||
# Find the closing '}' at the same indentation. The arm body
|
||||
# starts on line i (which ends with '{') and ends at a line
|
||||
# whose content (after `indent`) is '}' (with optional trailing
|
||||
# comma).
|
||||
start = i + 1 # 1-indexed
|
||||
end = start
|
||||
j = i + 1
|
||||
depth = 1 if line.rstrip().endswith("{") else 0
|
||||
if depth == 0:
|
||||
# Single-expression arm like `... => foo(),` — treat the line
|
||||
# itself as start=end.
|
||||
end = start
|
||||
j = i + 1
|
||||
else:
|
||||
while j < n:
|
||||
l = self._interp_lines[j]
|
||||
# A naive brace counter suffices for this codebase — the
|
||||
# interpreter arms use balanced braces and no string
|
||||
# literals containing stray braces.
|
||||
depth += l.count("{") - l.count("}")
|
||||
if depth == 0:
|
||||
end = j + 1 # 1-indexed
|
||||
break
|
||||
j += 1
|
||||
for name in names:
|
||||
idx.setdefault(name, (start, end))
|
||||
i = j + 1
|
||||
return idx
|
||||
|
||||
def lookup(self, mnem: str) -> RustRef:
|
||||
ident = _rust_ident(mnem)
|
||||
ref = RustRef(mnem=mnem)
|
||||
ref.opcode_line = self._opcode_index.get(ident)
|
||||
ref.decoder_line = self._decoder_index.get(ident)
|
||||
rng = self._interp_index.get(ident)
|
||||
if rng:
|
||||
ref.interp_start, ref.interp_end = rng
|
||||
body = "\n".join(self._interp_lines[ref.interp_start - 1: ref.interp_end])
|
||||
ref.interp_body = body
|
||||
return ref
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
root = Path(__file__).resolve().parent.parent.parent
|
||||
s = RustScraper(root)
|
||||
for m in ("addx", "addic.", "lwz", "bclrx", "mfspr", "stvx", "vaddfp",
|
||||
"vaddfp128", "faddx", "lvsl"):
|
||||
r = s.lookup(m)
|
||||
print(f"{m:12s} opcode@{r.opcode_line} decoder@{r.decoder_line} "
|
||||
f"interp@{r.interp_start}-{r.interp_end}")
|
||||
Reference in New Issue
Block a user