""" Parses xenia-canary's tools/ppc-instructions.xml into typed records. The XML is the authoritative catalogue of Xbox 360 PPC instructions (455 entries). Each entry carries: - mnem: mnemonic (e.g. "addx", "lwzu", "vaddfp128") - opcode: 32-bit hex encoding (primary + extended opcode bits) - form: instruction format (XO, D, DS, X, XL, XFX, ..., VX, VX128_*) - group: functional group (i=integer, m=memory, b=branch, c=control, f=fpu, v=vmx) - desc: short human-readable description - / fields with optional conditional="true" flag - : template string used by the canary disassembler """ from __future__ import annotations import xml.etree.ElementTree as ET from dataclasses import dataclass, field from pathlib import Path GROUP_NAMES = { "i": "integer", "m": "memory", "b": "branch", "c": "control", "f": "fpu", "v": "vmx", } # Maps the short group code to the manual's on-disk category directory. # VMX entries are split by form in generate_manual.py (VX128_* → vmx128/). GROUP_TO_DIR = { "i": "alu", "m": "memory", "b": "branch", "c": "control", "f": "fpu", "v": "vmx", } @dataclass class Field: name: str conditional: bool = False @dataclass class Instruction: mnem: str opcode_hex: str # lowercase, no "0x" prefix form: str group: str # one-letter code desc: str sync: bool reads: list[Field] = field(default_factory=list) writes: list[Field] = field(default_factory=list) disasm: str = "" @property def opcode_int(self) -> int: return int(self.opcode_hex, 16) @property def primary_opcode(self) -> int: # PPC: bits 0-5 of a big-endian 32-bit word are the top 6 bits. return (self.opcode_int >> 26) & 0x3F @property def extended_opcode(self) -> int | None: """Best-effort extended opcode extraction by form. Returns None for forms where "extended opcode" is not meaningful (I, B, D, DS, SC, M, MD, MDS, DCBZ) — those pages will omit it.""" code = self.opcode_int form = self.form if form in ("X", "XL", "XFX", "XFL", "XS", "DCBZ"): return (code >> 1) & 0x3FF # bits 21-30 if form == "XO": return (code >> 1) & 0x1FF # bits 22-30 (bit 21 = OE) if form == "A": return (code >> 1) & 0x1F # bits 26-30 if form in ("VX", "VX128_2", "VX128_5"): return code & 0x7FF # bits 21-31 if form == "VA": return code & 0x3F # bits 26-31 if form == "VC": return code & 0x3FF # bits 22-31 (bit 21 = Rc) if form in ("VX128", "VX128_R"): # complex split; best-effort — not used for lookup, just display return code & 0x7FF if form in ("VX128_1", "VX128_3", "VX128_4", "VX128_P"): return code & 0x7FF return None @property def group_name(self) -> str: return GROUP_NAMES.get(self.group, "unknown") @property def has_rc(self) -> bool: """Does this instruction have a runtime Rc bit (record form)?""" return any(w.name == "CR" and w.conditional for w in self.writes) @property def has_oe(self) -> bool: """Does this instruction have a runtime OE bit (overflow enable)?""" return any(w.name == "OE" and w.conditional for w in self.writes) @property def has_lk(self) -> bool: """Does this instruction have a runtime LK bit (branch link)?""" return any(r.name == "LK" for r in self.reads) @property def rc_is_mandatory(self) -> bool: """Instructions like `addic.` where CR is written unconditionally.""" return any(w.name == "CR" and not w.conditional for w in self.writes) def load_instructions(xml_path: Path | str) -> list[Instruction]: tree = ET.parse(str(xml_path)) root = tree.getroot() insns: list[Instruction] = [] for node in root.iter("insn"): reads = [Field(x.get("field", ""), x.get("conditional") == "true") for x in node.findall("in")] writes = [Field(x.get("field", ""), x.get("conditional") == "true") for x in node.findall("out")] disasm_node = node.find("disasm") disasm = (disasm_node.text or "").strip() if disasm_node is not None else "" insns.append(Instruction( mnem=node.get("mnem", ""), opcode_hex=node.get("opcode", "").lower(), form=node.get("form", ""), group=node.get("group", ""), desc=node.get("desc", ""), sync=node.get("sync") == "true", reads=reads, writes=writes, disasm=disasm, )) return insns def expand_runtime_variants(insn: Instruction) -> list[dict]: """ Return the set of concrete assembly mnemonics this XML entry represents under different runtime flag settings. Flags: Rc (record) → append '.', OE (overflow) → insert 'o' before any '.', LK (link) → append 'l'. The display mnemonic is derived from the XML mnem by stripping a trailing 'x' if present (xenia uses trailing x to mark X/XO form entries; the assembly mnemonic omits it). Mnemonics ending in '.' or digits are kept. """ raw = insn.mnem # Xenia convention: trailing 'x' on XO/X/A/M/MD/MDS/XFL/XS/VX/VA form # marks "extended form" but is dropped in assembly display. # Keep trailing x for: memory indexed forms (lbzx, lwzx, ...), which are # separate XML entries — those should not have their x stripped. # We use the group code to decide: group=i / group=f / group=c / # form family VX*/VA/VC → strip trailing x. group=m / group=b → keep. def strip_x(m: str) -> str: if not m.endswith("x"): return m # Memory mnemonics: 'x' is part of the assembly name (indexed form). if insn.group == "m": return m # Branch: bx/bcx/bcctrx/bclrx — xenia's trailing x, strip. return m[:-1] base = strip_x(raw) variants: list[dict] = [] if insn.rc_is_mandatory: # e.g. addic. — already has the dot baked in variants.append({"mnem": raw, "flags": {}, "is_primary": True}) return variants has_rc = insn.has_rc has_oe = insn.has_oe has_lk = insn.has_lk if not (has_rc or has_oe or has_lk): variants.append({"mnem": base, "flags": {}, "is_primary": True}) return variants # Enumerate all combinations of the runtime flags that apply. def insert_o(name: str) -> str: # 'addo' / 'addo.' — insert 'o' before any trailing '.' if name.endswith("."): return name[:-1] + "o." return name + "o" combos: list[tuple[str, dict]] = [(base, {})] if has_oe: combos += [(insert_o(n), {**f, "OE": 1}) for (n, f) in combos] if has_rc: combos += [(n + ".", {**f, "Rc": 1}) for (n, f) in combos] if has_lk: # Branch link: append 'l' AFTER any trailing dot? PPC convention: # bl, bcl, bclrl, bcctrl — 'l' is appended at the end of the base # mnemonic with no dot (branches don't have Rc). Add the l-variant # only when OE/Rc weren't applied. combos += [(n + "l", {**f, "LK": 1}) for (n, f) in combos if "Rc" not in f and "OE" not in f] for i, (name, flags) in enumerate(combos): variants.append({"mnem": name, "flags": flags, "is_primary": i == 0}) return variants if __name__ == "__main__": # Smoke test: print summary of what we loaded. import sys repo_root = Path(__file__).resolve().parent.parent.parent xml = repo_root / "xenia-canary" / "tools" / "ppc-instructions.xml" insns = load_instructions(xml) print(f"Loaded {len(insns)} instructions from {xml}") total_mnems = sum(len(expand_runtime_variants(i)) for i in insns) print(f"Total runtime-expanded mnemonics: {total_mnems}") # show 5 examples for mnem in ("addx", "lwz", "bclrx", "mfspr", "stvx", "vaddfp", "vaddfp128", "addic."): for i in insns: if i.mnem == mnem: vs = expand_runtime_variants(i) print(f" {mnem:12s} form={i.form:7s} group={i.group} " f"variants={[v['mnem'] for v in vs]}") break else: print(f" {mnem:12s} NOT FOUND")