chore: add migration/ bundle for cross-machine setup

Bundles state that lives OUTSIDE the xenia-rs repo so a fresh clone on another machine can be brought up to identical configuration via migration/setup.sh: - claude-memory/ ~/.claude/projects/-home-fabi-RE-Project-Sylpheed/memory/ (103 files, 1.1 MB - MEMORY.md + every project_xenia_rs_*.md from audits addis_signext through audit-058) - project-root/dot-claude/ <project-root>/.claude/settings.json (Stop hook + permissions) - project-root/ppc-manual/ <project-root>/ppc-manual/ (PowerPC reference docs, 397 files, 3.7 MB) - project-root/run-canary.sh <project-root>/run-canary.sh - README.md Human-readable setup checklist - setup.sh Idempotent installer (also reclones xenia-canary at pinned HEAD 6de80dffe) - MANIFEST.md Per-file mapping + per-file-not-bundled restoration recipe Excluded from bundle (not shippable via git): - Sylpheed ISO (7.8 GB; copyright; manual copy required) - sylpheed.db (395 MB; regenerable from XEX via analysis tooling) - target/ build artifacts (rebuild on target) - audit-runs probe firehoses (.log/.stdout/.stderr ~11 GB; rerun if needed) - audit-runs memory dumps (.bin ~4.5 GB; rerun audit-026/027/029 if needed) - xenia-canary checkout (setup.sh reclones from git.mc02.dev/fabi/Xenia-Canary.git at HEAD 6de80dffe) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 21:38:38 +02:00
parent 8e709b0a24
commit e6d43a23ac
505 changed files with 86028 additions and 0 deletions
--- a/migration/project-root/ppc-manual/generator/rust_scraper.py
+++ b/migration/project-root/ppc-manual/generator/rust_scraper.py
@@ -0,0 +1,184 @@
+"""
+Scrapes xenia-rs source files for per-instruction references and
+snippets of the interpreter semantics.
+
+Outputs produced for each mnemonic:
+  - opcode_line:     line in crates/xenia-cpu/src/opcode.rs where the
+                     PpcOpcode variant is declared (1-indexed)
+  - decoder_line:    line in crates/xenia-cpu/src/decoder.rs where the
+                     variant is produced from raw bits
+  - interp_start:    line in crates/xenia-cpu/src/interpreter.rs where
+                     the match arm `PpcOpcode::<mnem> =>` begins
+  - interp_end:      line where the arm closes (matching brace, naive)
+  - interp_body:     raw text of the arm body (for reviewer reference)
+
+The xenia-rs opcode identifier often has trailing `x` preserved
+(PpcOpcode::addx) — this scraper matches on the XML mnemonic directly
+plus a stripped alternative without trailing 'x' and the xenia-style
+identifier forms.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+import re
+
+
+@dataclass
+class RustRef:
+    mnem: str
+    opcode_line: int | None = None
+    decoder_line: int | None = None
+    interp_start: int | None = None
+    interp_end: int | None = None
+    interp_body: str = ""
+
+
+# PpcOpcode identifiers in xenia-rs match the XML mnemonic *exactly* except
+# that '.' is illegal in Rust identifiers. Mnemonics ending in '.' appear as
+# a trailing 'x' replacement in some cases but the codebase seems to keep the
+# XML name verbatim (e.g. addic. → addicx OR addic_). Check the codebase.
+
+
+def _rust_ident(mnem: str) -> str:
+    """Convert XML mnemonic to the xenia-rs PpcOpcode variant name."""
+    # Xenia-rs uses the same name as xenia-canary's opcode enum, which
+    # mirrors ppc-instructions.xml directly. '.' is replaced with 'x' in
+    # the opcode enum (e.g. 'addic.' → 'addicx'), but the XML entry is
+    # already 'addic.'. We only need to handle that single case.
+    return mnem.replace(".", "x")
+
+
+class RustScraper:
+    def __init__(self, repo_root: Path):
+        self.repo_root = repo_root
+        self.cpu_root = repo_root / "xenia-rs" / "crates" / "xenia-cpu" / "src"
+        self._opcode_lines = self._read_lines(self.cpu_root / "opcode.rs")
+        self._decoder_lines = self._read_lines(self.cpu_root / "decoder.rs")
+        self._interp_lines = self._read_lines(self.cpu_root / "interpreter.rs")
+        self._opcode_index: dict[str, int] = self._index_opcode_enum()
+        self._decoder_index: dict[str, int] = self._index_decoder()
+        self._interp_index: dict[str, tuple[int, int]] = self._index_interpreter()
+
+    @staticmethod
+    def _read_lines(path: Path) -> list[str]:
+        if not path.is_file():
+            return []
+        return path.read_text(encoding="utf-8").splitlines()
+
+    def _index_opcode_enum(self) -> dict[str, int]:
+        """Map rust-identifier → 1-indexed line in opcode.rs. The enum uses
+        comma-separated identifiers (often many per line) so we extract
+        every identifier match inside the enum body."""
+        idx: dict[str, int] = {}
+        token = re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*)\b")
+        in_enum = False
+        for i, line in enumerate(self._opcode_lines, start=1):
+            if "pub enum PpcOpcode" in line:
+                in_enum = True
+                continue
+            if not in_enum:
+                continue
+            if line.startswith("}"):
+                break
+            stripped = line.strip()
+            # skip blank / comment-only lines
+            if not stripped or stripped.startswith("//"):
+                continue
+            # split off any trailing line comment
+            code = stripped.split("//", 1)[0]
+            for m in token.finditer(code):
+                idx.setdefault(m.group(1), i)
+        return idx
+
+    def _index_decoder(self) -> dict[str, int]:
+        """Map rust-identifier → 1-indexed line of its `PpcOpcode::<name>` producer."""
+        idx: dict[str, int] = {}
+        pat = re.compile(r"PpcOpcode::([A-Za-z_][A-Za-z0-9_]*)")
+        for i, line in enumerate(self._decoder_lines, start=1):
+            for m in pat.finditer(line):
+                name = m.group(1)
+                # keep the FIRST occurrence (the match-arm line where it's
+                # produced, not any later references)
+                idx.setdefault(name, i)
+        return idx
+
+    def _index_interpreter(self) -> dict[str, tuple[int, int]]:
+        """Map rust-identifier → (start, end) lines of the match arm.
+
+        An arm starts at `PpcOpcode::<name>` and ends at the closing `}`
+        at the same indentation level. We accept multi-variant arms of
+        the form `PpcOpcode::a | PpcOpcode::b => {` by recording the same
+        (start, end) for every named variant.
+        """
+        arm_header = re.compile(r"^(\s*)((?:PpcOpcode::[A-Za-z_][A-Za-z0-9_]*\s*\|\s*)*PpcOpcode::[A-Za-z_][A-Za-z0-9_]*)\s*=>\s*\{?\s*$")
+        # Some arms use no leading whitespace quirks — adjusted regex:
+        arm_header = re.compile(
+            r"^(\s*)"                                   # indent
+            r"((?:PpcOpcode::[A-Za-z_][A-Za-z0-9_]*"    # first variant
+            r"(?:\s*\|\s*PpcOpcode::[A-Za-z_][A-Za-z0-9_]*)*))"  # more variants
+            r"\s*=>\s*\{?\s*$"
+        )
+        var_re = re.compile(r"PpcOpcode::([A-Za-z_][A-Za-z0-9_]*)")
+        idx: dict[str, tuple[int, int]] = {}
+        i = 0
+        n = len(self._interp_lines)
+        while i < n:
+            line = self._interp_lines[i]
+            m = arm_header.match(line)
+            if not m:
+                i += 1
+                continue
+            indent = m.group(1)
+            names = var_re.findall(m.group(2))
+            # Find the closing '}' at the same indentation. The arm body
+            # starts on line i (which ends with '{') and ends at a line
+            # whose content (after `indent`) is '}' (with optional trailing
+            # comma).
+            start = i + 1  # 1-indexed
+            end = start
+            j = i + 1
+            depth = 1 if line.rstrip().endswith("{") else 0
+            if depth == 0:
+                # Single-expression arm like `... => foo(),` — treat the line
+                # itself as start=end.
+                end = start
+                j = i + 1
+            else:
+                while j < n:
+                    l = self._interp_lines[j]
+                    # A naive brace counter suffices for this codebase — the
+                    # interpreter arms use balanced braces and no string
+                    # literals containing stray braces.
+                    depth += l.count("{") - l.count("}")
+                    if depth == 0:
+                        end = j + 1  # 1-indexed
+                        break
+                    j += 1
+            for name in names:
+                idx.setdefault(name, (start, end))
+            i = j + 1
+        return idx
+
+    def lookup(self, mnem: str) -> RustRef:
+        ident = _rust_ident(mnem)
+        ref = RustRef(mnem=mnem)
+        ref.opcode_line = self._opcode_index.get(ident)
+        ref.decoder_line = self._decoder_index.get(ident)
+        rng = self._interp_index.get(ident)
+        if rng:
+            ref.interp_start, ref.interp_end = rng
+            body = "\n".join(self._interp_lines[ref.interp_start - 1: ref.interp_end])
+            ref.interp_body = body
+        return ref
+
+
+if __name__ == "__main__":
+    root = Path(__file__).resolve().parent.parent.parent
+    s = RustScraper(root)
+    for m in ("addx", "addic.", "lwz", "bclrx", "mfspr", "stvx", "vaddfp",
+              "vaddfp128", "faddx", "lvsl"):
+        r = s.lookup(m)
+        print(f"{m:12s}  opcode@{r.opcode_line}  decoder@{r.decoder_line}  "
+              f"interp@{r.interp_start}-{r.interp_end}")