Files
xenia-rs/migration/project-root/ppc-manual/generator/xml_model.py
MechaCat02 e6d43a23ac chore: add migration/ bundle for cross-machine setup
Bundles state that lives OUTSIDE the xenia-rs repo so a fresh clone on
another machine can be brought up to identical configuration via
migration/setup.sh:

  - claude-memory/             ~/.claude/projects/-home-fabi-RE-Project-Sylpheed/memory/
                               (103 files, 1.1 MB - MEMORY.md + every
                                project_xenia_rs_*.md from audits
                                addis_signext through audit-058)
  - project-root/dot-claude/   <project-root>/.claude/settings.json
                               (Stop hook + permissions)
  - project-root/ppc-manual/   <project-root>/ppc-manual/
                               (PowerPC reference docs, 397 files, 3.7 MB)
  - project-root/run-canary.sh <project-root>/run-canary.sh
  - README.md                  Human-readable setup checklist
  - setup.sh                   Idempotent installer (also reclones
                               xenia-canary at pinned HEAD 6de80dffe)
  - MANIFEST.md                Per-file mapping + per-file-not-bundled
                               restoration recipe

Excluded from bundle (not shippable via git):
  - Sylpheed ISO (7.8 GB; copyright; manual copy required)
  - sylpheed.db (395 MB; regenerable from XEX via analysis tooling)
  - target/ build artifacts (rebuild on target)
  - audit-runs probe firehoses (.log/.stdout/.stderr ~11 GB; rerun if needed)
  - audit-runs memory dumps (.bin ~4.5 GB; rerun audit-026/027/029 if needed)
  - xenia-canary checkout (setup.sh reclones from
    git.mc02.dev/fabi/Xenia-Canary.git at HEAD 6de80dffe)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 21:38:38 +02:00

232 lines
8.3 KiB
Python

"""
Parses xenia-canary's tools/ppc-instructions.xml into typed records.
The XML is the authoritative catalogue of Xbox 360 PPC instructions
(455 <insn> entries). Each entry carries:
- mnem: mnemonic (e.g. "addx", "lwzu", "vaddfp128")
- opcode: 32-bit hex encoding (primary + extended opcode bits)
- form: instruction format (XO, D, DS, X, XL, XFX, ..., VX, VX128_*)
- group: functional group (i=integer, m=memory, b=branch,
c=control, f=fpu, v=vmx)
- desc: short human-readable description
- <in>/<out> fields with optional conditional="true" flag
- <disasm>: template string used by the canary disassembler
"""
from __future__ import annotations
import xml.etree.ElementTree as ET
from dataclasses import dataclass, field
from pathlib import Path
GROUP_NAMES = {
"i": "integer",
"m": "memory",
"b": "branch",
"c": "control",
"f": "fpu",
"v": "vmx",
}
# Maps the short group code to the manual's on-disk category directory.
# VMX entries are split by form in generate_manual.py (VX128_* → vmx128/).
GROUP_TO_DIR = {
"i": "alu",
"m": "memory",
"b": "branch",
"c": "control",
"f": "fpu",
"v": "vmx",
}
@dataclass
class Field:
name: str
conditional: bool = False
@dataclass
class Instruction:
mnem: str
opcode_hex: str # lowercase, no "0x" prefix
form: str
group: str # one-letter code
desc: str
sync: bool
reads: list[Field] = field(default_factory=list)
writes: list[Field] = field(default_factory=list)
disasm: str = ""
@property
def opcode_int(self) -> int:
return int(self.opcode_hex, 16)
@property
def primary_opcode(self) -> int:
# PPC: bits 0-5 of a big-endian 32-bit word are the top 6 bits.
return (self.opcode_int >> 26) & 0x3F
@property
def extended_opcode(self) -> int | None:
"""Best-effort extended opcode extraction by form.
Returns None for forms where "extended opcode" is not meaningful
(I, B, D, DS, SC, M, MD, MDS, DCBZ) — those pages will omit it."""
code = self.opcode_int
form = self.form
if form in ("X", "XL", "XFX", "XFL", "XS", "DCBZ"):
return (code >> 1) & 0x3FF # bits 21-30
if form == "XO":
return (code >> 1) & 0x1FF # bits 22-30 (bit 21 = OE)
if form == "A":
return (code >> 1) & 0x1F # bits 26-30
if form in ("VX", "VX128_2", "VX128_5"):
return code & 0x7FF # bits 21-31
if form == "VA":
return code & 0x3F # bits 26-31
if form == "VC":
return code & 0x3FF # bits 22-31 (bit 21 = Rc)
if form in ("VX128", "VX128_R"):
# complex split; best-effort — not used for lookup, just display
return code & 0x7FF
if form in ("VX128_1", "VX128_3", "VX128_4", "VX128_P"):
return code & 0x7FF
return None
@property
def group_name(self) -> str:
return GROUP_NAMES.get(self.group, "unknown")
@property
def has_rc(self) -> bool:
"""Does this instruction have a runtime Rc bit (record form)?"""
return any(w.name == "CR" and w.conditional for w in self.writes)
@property
def has_oe(self) -> bool:
"""Does this instruction have a runtime OE bit (overflow enable)?"""
return any(w.name == "OE" and w.conditional for w in self.writes)
@property
def has_lk(self) -> bool:
"""Does this instruction have a runtime LK bit (branch link)?"""
return any(r.name == "LK" for r in self.reads)
@property
def rc_is_mandatory(self) -> bool:
"""Instructions like `addic.` where CR is written unconditionally."""
return any(w.name == "CR" and not w.conditional for w in self.writes)
def load_instructions(xml_path: Path | str) -> list[Instruction]:
tree = ET.parse(str(xml_path))
root = tree.getroot()
insns: list[Instruction] = []
for node in root.iter("insn"):
reads = [Field(x.get("field", ""), x.get("conditional") == "true")
for x in node.findall("in")]
writes = [Field(x.get("field", ""), x.get("conditional") == "true")
for x in node.findall("out")]
disasm_node = node.find("disasm")
disasm = (disasm_node.text or "").strip() if disasm_node is not None else ""
insns.append(Instruction(
mnem=node.get("mnem", ""),
opcode_hex=node.get("opcode", "").lower(),
form=node.get("form", ""),
group=node.get("group", ""),
desc=node.get("desc", ""),
sync=node.get("sync") == "true",
reads=reads,
writes=writes,
disasm=disasm,
))
return insns
def expand_runtime_variants(insn: Instruction) -> list[dict]:
"""
Return the set of concrete assembly mnemonics this XML entry represents
under different runtime flag settings. Flags: Rc (record) → append '.',
OE (overflow) → insert 'o' before any '.', LK (link) → append 'l'.
The display mnemonic is derived from the XML mnem by stripping a trailing
'x' if present (xenia uses trailing x to mark X/XO form entries; the
assembly mnemonic omits it). Mnemonics ending in '.' or digits are kept.
"""
raw = insn.mnem
# Xenia convention: trailing 'x' on XO/X/A/M/MD/MDS/XFL/XS/VX/VA form
# marks "extended form" but is dropped in assembly display.
# Keep trailing x for: memory indexed forms (lbzx, lwzx, ...), which are
# separate XML entries — those should not have their x stripped.
# We use the group code to decide: group=i / group=f / group=c /
# form family VX*/VA/VC → strip trailing x. group=m / group=b → keep.
def strip_x(m: str) -> str:
if not m.endswith("x"):
return m
# Memory mnemonics: 'x' is part of the assembly name (indexed form).
if insn.group == "m":
return m
# Branch: bx/bcx/bcctrx/bclrx — xenia's trailing x, strip.
return m[:-1]
base = strip_x(raw)
variants: list[dict] = []
if insn.rc_is_mandatory:
# e.g. addic. — already has the dot baked in
variants.append({"mnem": raw, "flags": {}, "is_primary": True})
return variants
has_rc = insn.has_rc
has_oe = insn.has_oe
has_lk = insn.has_lk
if not (has_rc or has_oe or has_lk):
variants.append({"mnem": base, "flags": {}, "is_primary": True})
return variants
# Enumerate all combinations of the runtime flags that apply.
def insert_o(name: str) -> str:
# 'addo' / 'addo.' — insert 'o' before any trailing '.'
if name.endswith("."):
return name[:-1] + "o."
return name + "o"
combos: list[tuple[str, dict]] = [(base, {})]
if has_oe:
combos += [(insert_o(n), {**f, "OE": 1}) for (n, f) in combos]
if has_rc:
combos += [(n + ".", {**f, "Rc": 1}) for (n, f) in combos]
if has_lk:
# Branch link: append 'l' AFTER any trailing dot? PPC convention:
# bl, bcl, bclrl, bcctrl — 'l' is appended at the end of the base
# mnemonic with no dot (branches don't have Rc). Add the l-variant
# only when OE/Rc weren't applied.
combos += [(n + "l", {**f, "LK": 1}) for (n, f) in combos if "Rc" not in f and "OE" not in f]
for i, (name, flags) in enumerate(combos):
variants.append({"mnem": name, "flags": flags, "is_primary": i == 0})
return variants
if __name__ == "__main__":
# Smoke test: print summary of what we loaded.
import sys
repo_root = Path(__file__).resolve().parent.parent.parent
xml = repo_root / "xenia-canary" / "tools" / "ppc-instructions.xml"
insns = load_instructions(xml)
print(f"Loaded {len(insns)} instructions from {xml}")
total_mnems = sum(len(expand_runtime_variants(i)) for i in insns)
print(f"Total runtime-expanded mnemonics: {total_mnems}")
# show 5 examples
for mnem in ("addx", "lwz", "bclrx", "mfspr", "stvx", "vaddfp", "vaddfp128", "addic."):
for i in insns:
if i.mnem == mnem:
vs = expand_runtime_variants(i)
print(f" {mnem:12s} form={i.form:7s} group={i.group} "
f"variants={[v['mnem'] for v in vs]}")
break
else:
print(f" {mnem:12s} NOT FOUND")