#!/usr/bin/env python3 """ PowerPC Instruction Manual generator. Reads `xenia-canary/tools/ppc-instructions.xml` plus the xenia-rs and xenia-canary source trees, and emits a tree of one Markdown page per instruction family together with a machine-readable `index.json` at the manual root. Usage: python3 generator/generate_manual.py [--dry-run] [--out PATH] The generator is idempotent. Each page is delimited by sentinel markers so that hand-written enhancements live outside the generated region and are preserved across re-runs. See `ppc-manual/README.md` for conventions. """ from __future__ import annotations import argparse import json import re import sys from collections import defaultdict from dataclasses import dataclass, field from pathlib import Path # Allow running directly or as a module. HERE = Path(__file__).resolve().parent sys.path.insert(0, str(HERE)) from xml_model import ( # noqa: E402 Instruction, GROUP_NAMES, load_instructions, expand_runtime_variants, ) from bit_layout import FORM_LAYOUTS, render_bit_table # noqa: E402 from rust_scraper import RustScraper # noqa: E402 from cxx_scraper import CxxScraper # noqa: E402 # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- REPO_ROOT = HERE.parent.parent MANUAL_ROOT_DEFAULT = REPO_ROOT / "ppc-manual" XML_PATH = REPO_ROOT / "xenia-canary" / "tools" / "ppc-instructions.xml" # VMX (group=v) entries with these forms go under vmx128/; others under vmx/. VMX128_FORMS = { "VX128", "VX128_1", "VX128_2", "VX128_3", "VX128_4", "VX128_5", "VX128_P", "VX128_R", } GROUP_TO_CATEGORY = { "i": "alu", "m": "memory", "b": "branch", "c": "control", "f": "fpu", # "v" resolved by form } CATEGORY_LABELS = { "alu": ("Integer ALU", "Fixed-point add/sub/multiply/divide, logical, rotate, shift, compare, count-leading-zeros, sign-extension, trap-on-condition."), "memory": ("Memory", "Loads/stores for byte, half, word, doubleword, float, multiple and string; cache management (dcbt, dcbf, dcbz); reservation pair lwarx/stwcx."), "branch": ("Branch & System", "Unconditional / conditional branches, branch to LR/CTR, traps, system call."), "fpu": ("Floating-Point", "IEEE-754 add/sub/mul/div/sqrt, fused multiply-add, conversions, compares, FPSCR moves."), "vmx": ("VMX (Altivec)", "128-bit SIMD over 32 registers V0–V31. Integer/float arithmetic, logical, compare, permute/merge, pack/unpack, saturation helpers."), "vmx128": ("VMX128", "Xbox-360-specific Altivec extension that widens the vector register file to 128 registers (V0–V127). Register IDs are encoded with bit-fusion across non-contiguous fields."), "control": ("Control / CR / SPR", "Condition-register logical ops, CR field moves, mfspr/mtspr/mtcrf, time-base reads, synchronisation (sync, isync, eieio)."), } # Field descriptions used for operand tables. Keyed by XML field name. FIELD_DESCRIPTIONS = { "RA": "Source GPR (`r0`–`r31`).", "RA0": "Source GPR; when the encoded register number is 0 the operand is the literal 64-bit zero, **not** `r0`.", "RB": "Source GPR.", "RD": "Destination GPR.", "RS": "Source GPR (alias for RD in some stores).", "RT": "Destination GPR (alias for RD).", "OE": "Overflow-enable bit. When 1, the instruction updates `XER[OV]` and stickies `XER[SO]` on signed overflow.", "CR": "Condition-register update. When `Rc=1`, CR field 0 (or CR6 for vector compares, CR1 for FPU) is updated from the result.", "CA": "XER[CA] carry bit. Read by add-with-carry/subtract-with-borrow instructions, written by carrying instructions.", "CRM": "8-bit CR field mask used by `mtcrf` — one bit per CR field.", "CRFD": "CR destination field (`crf`, 0–7).", "CRFS": "CR source field.", "CRBA": "CR source bit A (0–31).", "CRBB": "CR source bit B (0–31).", "CRBD": "CR destination bit (0–31).", "IMM": "Generic immediate field.", "SIMM": "16-bit signed immediate. Sign-extended to 64 bits before use.", "UIMM": "16-bit unsigned immediate. Zero-extended.", "d": "16-bit signed displacement (`d`) added to the base address register.", "ds": "14-bit signed word-aligned displacement (`DS << 2`).", "LR": "Link register. Written by `bl`/`bla`/`bcl`/`bclrl`/`bcctrl`; read by `bclr`/`bclrl`.", "BI": "CR bit index (0–31) selected by BO's condition test.", "BO": "5-bit branch options — selects CTR decrement, CTR test polarity, and CR bit test polarity. See `forms/XL.md`.", "CTR": "Count register. Decremented and optionally tested by conditional branches when `BO[2]=0`.", "LK": "Link bit. When 1, LR ← address-of-next-instruction before the branch is taken.", "AA": "Absolute-address bit. When 1, the branch target is the sign-extended displacement itself; when 0, it is added to the current instruction address.", "L": "Operand-length bit for compare instructions (`0 ⇒ 32-bit`, `1 ⇒ 64-bit`).", "FPSCR": "Floating-Point Status and Control Register.", "FPSCRD": "FPSCR destination field.", "MSR": "Machine State Register.", "SPR": "Special-Purpose-Register number. Encoded with the two 5-bit halves swapped (bits 11-15 become the high half, bits 16-20 the low half).", "VSCR": "Vector Status and Control Register (NJ/SAT bits).", "TBR": "Time-Base Register selector for `mftb`.", "FM": "8-bit FPSCR field-mask used by `mtfsf`.", "FA": "Source A floating-point register (`fr0`–`fr31`).", "FB": "Source B floating-point register.", "FC": "Source C floating-point register (for madd-style ops).", "FD": "Destination floating-point register.", "FS": "Source floating-point register.", "VA": "Source A vector register.", "VB": "Source B vector register.", "VC": "Source C vector register / 3-bit selector.", "VD": "Destination vector register.", "VS": "Source vector register (alias for VD on stores).", "SH": "Shift amount.", "SHB": "Shift amount (byte granularity, `vsldoi`).", "MB": "Mask begin bit.", "ME": "Mask end bit.", "TO": "Trap-on condition mask (5 bits) — LT, GT, EQ, LGT, LLT bits.", "LEV": "System-call exception level (for `sc`).", "ADDR": "Encoded branch target displacement (24-bit for I-form, 14-bit for B-form, word-shifted).", } # Simple per-mnemonic pseudocode seeds for the most common ALU patterns. # Phase 2 review can rewrite any of these; the generator only fills in where # no hand-written block exists. PSEUDOCODE_SEEDS: dict[str, str] = { "addx": "RT <- (RA) + (RB)", "addcx": "RT <- (RA) + (RB)\nCA <- carry_out_of_32_or_64_bit_add((RA), (RB))", "addex": "RT <- (RA) + (RB) + CA\nCA <- carry_out_of_the_add", "addmex": "RT <- (RA) + CA + 0xFFFF_FFFF_FFFF_FFFF\nCA <- carry_out", "addzex": "RT <- (RA) + CA\nCA <- carry_out", "addi": "if RA = 0 then RT <- EXTS(SIMM)\nelse RT <- (RA) + EXTS(SIMM)", "addic": "RT <- (RA) + EXTS(SIMM)\nCA <- carry_out", "addicx": "RT <- (RA) + EXTS(SIMM)\nCA <- carry_out\nCR0 <- signed_compare(RT, 0)", "addis": "if RA = 0 then RT <- EXTS(SIMM) << 16\nelse RT <- (RA) + (EXTS(SIMM) << 16)", "subfx": "RT <- ~(RA) + (RB) + 1 ; = (RB) − (RA)", "subfcx": "RT <- ~(RA) + (RB) + 1\nCA <- carry_out", "subfex": "RT <- ~(RA) + (RB) + CA\nCA <- carry_out", "subfic": "RT <- ~(RA) + EXTS(SIMM) + 1\nCA <- carry_out", "negx": "RT <- ~(RA) + 1", "andx": "RA <- (RS) & (RB)", "andcx": "RA <- (RS) & ~(RB)", "andix": "RA <- (RS) & (0x0000 || UIMM)", "andisx": "RA <- (RS) & (UIMM || 0x0000)", "orx": "RA <- (RS) | (RB)", "orcx": "RA <- (RS) | ~(RB)", "ori": "RA <- (RS) | (0x0000 || UIMM)", "oris": "RA <- (RS) | (UIMM || 0x0000)", "xorx": "RA <- (RS) ^ (RB)", "xori": "RA <- (RS) ^ (0x0000 || UIMM)", "xoris": "RA <- (RS) ^ (UIMM || 0x0000)", "nandx": "RA <- ~((RS) & (RB))", "norx": "RA <- ~((RS) | (RB))", "eqvx": "RA <- ~((RS) ^ (RB))", "extsbx": "RA <- EXTS_8_to_64((RS)[56:63])", "extshx": "RA <- EXTS_16_to_64((RS)[48:63])", "extswx": "RA <- EXTS_32_to_64((RS)[32:63])", "mullwx": "RT <- ((RA)[32:63]) * ((RB)[32:63]) ; signed 32×32 → 64", "mulhwx": "RT <- high_32_of_signed_multiply((RA)[32:63], (RB)[32:63]) sign-extended to 64", "mulhwux": "RT <- high_32_of_unsigned_multiply((RA)[32:63], (RB)[32:63]) zero-extended to 64", "mulldx": "RT <- ((RA) * (RB))[64:127] ; low 64 of signed 64×64", "mulhdx": "RT <- ((RA) * (RB))[0:63] ; high 64 of signed 64×64", "mulhdux": "RT <- ((RA) * (RB))[0:63] ; high 64 of unsigned 64×64", "mulli": "RT <- ((RA) * EXTS(SIMM))[64:127]", "divwx": "RT <- ((RA)[32:63] /s (RB)[32:63]) sign-extended to 64 ; undefined if RB=0 or overflow", "divwux": "RT <- ((RA)[32:63] /u (RB)[32:63]) zero-extended to 64 ; undefined if RB=0", "divdx": "RT <- (RA) /s (RB) ; undefined if RB=0 or (RA=-2^63 and RB=-1)", "divdux": "RT <- (RA) /u (RB) ; undefined if RB=0", "cmp": "if L = 0 then a,b <- EXTS((RA)[32:63]), EXTS((RB)[32:63])\nelse a,b <- (RA), (RB)\nCR[BF] <- signed_compare(a, b) || XER[SO]", "cmpl": "if L = 0 then a,b <- (RA)[32:63], (RB)[32:63]\nelse a,b <- (RA), (RB)\nCR[BF] <- unsigned_compare(a, b) || XER[SO]", "cmpi": "if L = 0 then a,b <- EXTS((RA)[32:63]), EXTS(SIMM)\nelse a,b <- (RA), EXTS(SIMM)\nCR[BF] <- signed_compare(a, b) || XER[SO]", "cmpli": "if L = 0 then a,b <- (RA)[32:63], UIMM\nelse a,b <- (RA), (0 || UIMM)\nCR[BF] <- unsigned_compare(a, b) || XER[SO]", "cntlzwx": "n <- number_of_leading_zero_bits((RS)[32:63]) ; n in 0..32\nRA <- zero_extend(n)", "cntlzdx": "n <- number_of_leading_zero_bits((RS)) ; n in 0..64\nRA <- zero_extend(n)", "slwx": "n <- (RB)[58:63]\nRA <- ((RS) << n) & 0x0000_0000_FFFF_FFFF if n < 32 else 0", "srwx": "n <- (RB)[58:63]\nRA <- ((RS)[32:63] >> n) zero-extended if n < 32 else 0", "srawx": "n <- (RB)[58:63]\nRA <- ((RS)[32:63] >>a n) sign-extended\nCA <- 1 if (signed RS < 0) && any_bit_shifted_out else 0", "sldx": "n <- (RB)[57:63]\nRA <- ((RS) << n) if n < 64 else 0", "srdx": "n <- (RB)[57:63]\nRA <- ((RS) >> n) if n < 64 else 0", "sradx": "n <- (RB)[57:63]\nRA <- ((RS) >>a n) sign-extended if n < 64\nCA <- (RS signed < 0) && any_bit_shifted_out", "srawix": "RA <- ((RS)[32:63] >>a SH) sign-extended\nCA <- (RS[32] signed) && any_low_bit_shifted_out", "sradix": "RA <- ((RS) >>a SH) sign-extended\nCA <- (RS signed < 0) && any_bit_shifted_out", # Branch family "bx": "NIA <- (CIA + EXTS(LI || 0b00)) if AA=0\n <- EXTS(LI || 0b00) if AA=1\nif LK then LR <- CIA + 4", "bcx": "if ¬BO[2] then CTR <- CTR − 1\nctr_ok <- BO[2] | ((CTR ≠ 0) XOR BO[3])\ncond_ok <- BO[0] | (CR[BI] ≡ BO[1])\nif ctr_ok & cond_ok then NIA <- CIA + EXTS(BD || 0b00) (AA=0)\n EXTS(BD || 0b00) (AA=1)\nif LK then LR <- CIA + 4", "bclrx": "if ¬BO[2] then CTR <- CTR − 1\nctr_ok <- BO[2] | ((CTR ≠ 0) XOR BO[3])\ncond_ok <- BO[0] | (CR[BI] ≡ BO[1])\nif ctr_ok & cond_ok then NIA <- LR[0:61] || 0b00\nif LK then LR <- CIA + 4", "bcctrx": "cond_ok <- BO[0] | (CR[BI] ≡ BO[1])\nif cond_ok then NIA <- CTR[0:61] || 0b00\nif LK then LR <- CIA + 4", "sc": "system_call_exception(LEV)", # Loads (D-form, zero/sign-extend) "lbz": "EA <- (RA|0) + EXTS(d)\nRT <- 0x00000000_000000_00 || MEM(EA, 1)", "lbzu": "EA <- (RA) + EXTS(d) ; RA ≠ 0 required\nRT <- ZEXT8_to_64(MEM(EA, 1))\nRA <- EA", "lbzx": "EA <- (RA|0) + (RB)\nRT <- ZEXT8_to_64(MEM(EA, 1))", "lbzux": "EA <- (RA) + (RB) ; RA ≠ 0 required\nRT <- ZEXT8_to_64(MEM(EA, 1))\nRA <- EA", "lhz": "EA <- (RA|0) + EXTS(d)\nRT <- ZEXT16_to_64(MEM(EA, 2))", "lha": "EA <- (RA|0) + EXTS(d)\nRT <- SEXT16_to_64(MEM(EA, 2))", "lwz": "EA <- (RA|0) + EXTS(d)\nRT <- ZEXT32_to_64(MEM(EA, 4))", "lwa": "EA <- (RA|0) + EXTS(ds || 0b00)\nRT <- SEXT32_to_64(MEM(EA, 4))", "ld": "EA <- (RA|0) + EXTS(ds || 0b00)\nRT <- MEM(EA, 8)", # Stores (D-form) "stb": "EA <- (RA|0) + EXTS(d)\nMEM(EA, 1) <- (RS)[56:63]", "sth": "EA <- (RA|0) + EXTS(d)\nMEM(EA, 2) <- (RS)[48:63]", "stw": "EA <- (RA|0) + EXTS(d)\nMEM(EA, 4) <- (RS)[32:63]", "std": "EA <- (RA|0) + EXTS(ds || 0b00)\nMEM(EA, 8) <- (RS)", # Floats "lfs": "EA <- (RA|0) + EXTS(d)\nFRT <- DoubleFromSingle(MEM(EA, 4))", "lfd": "EA <- (RA|0) + EXTS(d)\nFRT <- MEM(EA, 8)", "stfs": "EA <- (RA|0) + EXTS(d)\nMEM(EA, 4) <- SingleFromDouble(FRS)", "stfd": "EA <- (RA|0) + EXTS(d)\nMEM(EA, 8) <- (FRS)", # SPR "mfspr": "n <- spr_number(SPR) ; SPR field has its two 5-bit halves swapped\nRT <- SPR(n)", "mtspr": "n <- spr_number(SPR)\nSPR(n) <- (RS)", "mfcr": "RT <- 0x00000000 || CR", "mtcrf": "for i in 0..7:\n if CRM[i] then CR[i] <- (RS)[32+i*4 : 35+i*4]", # Sync "sync": "multi-thread memory barrier (heavy). L=0 full sync; L=1 lightweight sync.", "isync": "instruction-stream synchronisation — discards speculative state.", "eieio": "enforce in-order execution of I/O", # FPU — a minimal set "faddx": "FRT <- FRA + FRB ; double-precision", "faddsx": "FRT <- RoundToSingle(FRA + FRB) ; single-precision", "fsubx": "FRT <- FRA − FRB", "fmulx": "FRT <- FRA × FRC", "fdivx": "FRT <- FRA ÷ FRB", "fmaddx": "FRT <- (FRA × FRC) + FRB", "fmsubx": "FRT <- (FRA × FRC) − FRB", "fnmaddx": "FRT <- −((FRA × FRC) + FRB)", "fnmsubx": "FRT <- −((FRA × FRC) − FRB)", "fnegx": "FRT <- flip_sign(FRB)", "fabsx": "FRT <- clear_sign(FRB)", "fnabsx": "FRT <- set_sign(FRB)", "fmrx": "FRT <- FRB", # Vector — most need hand-authored pseudocode; seed only the arithmetic sweetspots "vaddfp": "for each 32-bit float lane i in 0..3:\n VD[i] <- VA[i] + VB[i]", "vsubfp": "for each 32-bit float lane i in 0..3:\n VD[i] <- VA[i] − VB[i]", "vmulfp": "for each 32-bit float lane i in 0..3:\n VD[i] <- VA[i] * VB[i] ; (note: not a native Altivec op; xenia helper)", "vmaddfp": "for each 32-bit float lane i in 0..3:\n VD[i] <- (VA[i] * VC[i]) + VB[i]", "vnmsubfp": "for each 32-bit float lane i in 0..3:\n VD[i] <- −((VA[i] * VC[i]) − VB[i])", # Vector memory "stvx": "EA <- ((RA|0) + (RB)) & ~0xF ; align to 16\nMEM(EA, 16) <- byteswap(VS)", "lvx": "EA <- ((RA|0) + (RB)) & ~0xF ; align to 16\nVD <- byteswap(MEM(EA, 16))", "lvsl": "addr_lo <- ((RA|0) + (RB))[60:63]\nfor i in 0..15: VD[i] <- addr_lo + i", "lvsr": "addr_lo <- ((RA|0) + (RB))[60:63]\nfor i in 0..15: VD[i] <- 16 − addr_lo + i", } # --------------------------------------------------------------------------- # Family grouping # --------------------------------------------------------------------------- @dataclass class Family: head: str # stable key — also the on-disk slug category: str # alu/memory/branch/fpu/vmx/vmx128/control members: list[Instruction] = field(default_factory=list) @property def primary(self) -> Instruction: # Prefer a member whose mnemonic equals the head exactly. for m in self.members: if m.mnem == self.head: return m return self.members[0] def _cxx_slug(mnem: str) -> str: """File-safe slug: replace '.' with 'x' (matches xenia's C++ enum name).""" return mnem.replace(".", "x") def _category_for(insn: Instruction) -> str: if insn.group == "v": return "vmx128" if insn.form in VMX128_FORMS else "vmx" return GROUP_TO_CATEGORY[insn.group] def _family_head(insn: Instruction, all_mnems: set[str]) -> str: """Determine which family a mnemonic joins. Rules: 1. VMX128 sibling: if mnem ends in '128' and the non-128 base exists, join the base's family. 2. Scalar memory suffixes: for group=m, strip a trailing 'ux', 'u', or 'x' when the resulting base also exists in group=m. Recurse so we find the ultimate head. 3. Otherwise the mnemonic is its own head. """ mnem = insn.mnem if mnem.endswith("128") and mnem[:-3] in all_mnems: return mnem[:-3] if insn.group == "m": for suf in ("ux", "u", "x"): if mnem.endswith(suf): base = mnem[:-len(suf)] if base in all_mnems and base != mnem: return base return mnem def build_families(insns: list[Instruction]) -> dict[str, Family]: by_mnem = {i.mnem: i for i in insns} all_mnems = set(by_mnem) heads: dict[str, Family] = {} for i in insns: head = _family_head(i, all_mnems) # If the claimed head doesn't itself exist as an XML entry we keep # the original mnemonic — this prevents accidental orphan pages. if head not in by_mnem: head = i.mnem fam = heads.get(head) if fam is None: primary = by_mnem[head] fam = Family(head=head, category=_category_for(primary)) heads[head] = fam fam.members.append(i) return heads # --------------------------------------------------------------------------- # Page rendering # --------------------------------------------------------------------------- GENERATED_BEGIN = "" GENERATED_END = "" def _variant_rows(family: Family) -> str: """Build the 'Assembler Mnemonics' table.""" rows = ["| Mnemonic | XML entry | Flags | Description |", "| --- | --- | --- | --- |"] seen: set[str] = set() for member in family.members: for v in expand_runtime_variants(member): if v["mnem"] in seen: continue seen.add(v["mnem"]) flag_bits = ", ".join(f"{k}={v}" for k, v in sorted(v["flags"].items())) or "—" note = member.desc rows.append(f"| `{v['mnem']}` | `{member.mnem}` | {flag_bits} | {note} |") return "\n".join(rows) def _syntax_block(family: Family) -> str: """Reconstruct the canonical syntax line from the XML disasm template. Keeps bracketed modifier tokens ([OE], [Rc], [LK]).""" lines = [] for member in family.members: if member.disasm: lines.append(member.disasm) unique = [] for line in lines: if line not in unique: unique.append(line) body = "\n".join(unique) if unique else "(no disassembly template)" return f"```asm\n{body}\n```" def _encoding_block(family: Family) -> str: parts = [] for member in family.members: ext = member.extended_opcode ext_str = f"`{ext}`" if ext is not None else "—" parts.append( f"### `{member.mnem}` — form `{member.form}`\n\n" f"- **Opcode word:** `0x{member.opcode_hex}`\n" f"- **Primary opcode (bits 0–5):** `{member.primary_opcode}`\n" f"- **Extended opcode:** {ext_str}\n" f"- **Synchronising:** {'yes' if member.sync else 'no'}\n\n" f"{render_bit_table(member.form)}" ) return "\n\n".join(parts) def _operand_block(family: Family) -> str: # Union of fields across all members of the family, preserving order. order: list[str] = [] seen: set[str] = set() for member in family.members: for f in member.reads + member.writes: if f.name not in seen: seen.add(f.name) order.append(f.name) rows = ["| Field | Role | Description |", "| --- | --- | --- |"] for name in order: role_bits: list[str] = [] for member in family.members: if any(r.name == name for r in member.reads): if any(r.name == name and r.conditional for r in member.reads): role_bits.append(f"{member.mnem}: read (conditional)") else: role_bits.append(f"{member.mnem}: read") if any(w.name == name for w in member.writes): if any(w.name == name and w.conditional for w in member.writes): role_bits.append(f"{member.mnem}: write (conditional)") else: role_bits.append(f"{member.mnem}: write") role_summary = "; ".join(role_bits) or "—" desc = FIELD_DESCRIPTIONS.get(name, "_Field-specific description pending — consult the xenia-rs interpreter body below for its actual usage._") rows.append(f"| `{name}` | {role_summary} | {desc} |") return "\n".join(rows) def _register_effects_block(family: Family) -> str: """Split reads/writes into unconditional vs conditional, per-mnemonic.""" blocks = [] for member in family.members: reads_uc = [f.name for f in member.reads if not f.conditional] reads_cd = [f.name for f in member.reads if f.conditional] writes_uc = [f.name for f in member.writes if not f.conditional] writes_cd = [f.name for f in member.writes if f.conditional] def fmt(lst): return ", ".join(f"`{x}`" for x in lst) if lst else "_none_" blocks.append( f"### `{member.mnem}`\n\n" f"- **Reads (always):** {fmt(reads_uc)}\n" f"- **Reads (conditional):** {fmt(reads_cd)}\n" f"- **Writes (always):** {fmt(writes_uc)}\n" f"- **Writes (conditional):** {fmt(writes_cd)}" ) return "\n\n".join(blocks) def _status_flags_block(family: Family) -> str: lines: list[str] = [] for member in family.members: fx = [] if member.has_rc: # Pick the appropriate CR field for the family if member.form in ("A", "XFL"): fx.append("**CR1** ← FPSCR[FX, FEX, VX, OX] when `Rc=1`.") elif member.form in ("VC", "VX128_R"): fx.append("**CR6** ← `[all-true, 0, all-false, 0]` when `Rc=1`.") else: fx.append("**CR0** ← signed-compare(result, 0) with `SO ← XER[SO]`, when `Rc=1`.") if member.rc_is_mandatory: fx.append("**CR0** ← signed-compare(result, 0) with `SO ← XER[SO]` (always).") if member.has_oe: fx.append("**XER[OV]** ← signed-overflow(result); **XER[SO]** stickies, when `OE=1`.") for w in member.writes: if w.name == "CA" and not w.conditional: fx.append("**XER[CA]** ← carry-out of the add / borrow-in of the subtract (always).") elif w.name == "CA" and w.conditional: fx.append("**XER[CA]** ← carry-out (conditional on operation variant).") if w.name == "FPSCR": fx.append("**FPSCR** updated per IEEE-754 flags (FX, FEX, FPRF, FR, FI, exceptions).") if w.name == "VSCR": fx.append("**VSCR[SAT]** may be stickied on saturating vector operations.") if fx: lines.append(f"- `{member.mnem}`: " + "; ".join(fx)) return "\n".join(lines) if lines else "_No condition-register or status-register effects._" def _pseudocode_block(family: Family) -> str: for member in family.members: seed = PSEUDOCODE_SEEDS.get(member.mnem) if seed: return f"```\n{seed}\n```" return ("```\n" "; Pseudocode derives directly from the xenia-rs interpreter\n" "; arm (see Implementation References). Operation semantics:\n" "; - Read source operands from the fields listed under Operands.\n" "; - Apply the arithmetic / logical / memory action described\n" "; in the Description field above.\n" "; - Write results to the destination register(s); update any\n" "; status bits enumerated under Status-Register Effects.\n" "; Consult the IBM AIX reference link under IBM Reference for\n" "; canonical PPC-style pseudocode where xenia's expression is\n" "; terse.\n" "```") def _c_translation_block(family: Family) -> str: # Seed a small set of high-frequency families. Everything else gets a # TODO placeholder and is enriched during Phase 2 review. head = family.head seeds = { "addx": '/* add / add. / addo / addo. (XO-form) */\n' 'uint64_t a = r[insn.RA], b = r[insn.RB];\n' 'uint64_t result = a + b;\n' 'r[insn.RT] = result;\n' 'if (insn.OE) { bool ov = (~(a ^ b) & (a ^ result)) >> 63;\n' ' if (ov) { xer.OV = 1; xer.SO = 1; } else xer.OV = 0; }\n' 'if (insn.Rc) update_cr0_signed((int64_t)result);', "addi": '/* addi RT, RA, SIMM — RA=0 means literal 0 */\n' 'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n' 'r[insn.RT] = base + (uint64_t)(int64_t)(int16_t)insn.SIMM;', "addis": '/* addis RT, RA, SIMM — RA=0 means literal 0 */\n' 'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n' 'r[insn.RT] = base + ((uint64_t)(int64_t)(int16_t)insn.SIMM << 16);', "lwz": '/* lwz RT, d(RA) */\n' 'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n' 'uint32_t ea = (uint32_t)(base + (int64_t)(int16_t)insn.D);\n' 'r[insn.RT] = (uint64_t)mem_read_u32_be(ea); /* zero-extend */', "stw": '/* stw RS, d(RA) */\n' 'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n' 'uint32_t ea = (uint32_t)(base + (int64_t)(int16_t)insn.D);\n' 'mem_write_u32_be(ea, (uint32_t)r[insn.RS]);', "bclrx": '/* bclr/bclrl — branch conditional to LR */\n' 'if (!(insn.BO & 4)) ctr -= 1;\n' 'bool ctr_ok = (insn.BO & 4) || ((ctr != 0) ^ !!(insn.BO & 2));\n' 'bool cond_ok = (insn.BO & 16) || (cr_bit(insn.BI) == !!(insn.BO & 8));\n' 'uint32_t next = pc + 4;\n' 'if (ctr_ok && cond_ok) pc = lr & ~3u; else pc = next;\n' 'if (insn.LK) lr = next;', "mfspr": '/* mfspr RT, SPR — SPR field has swapped halves */\n' 'uint32_t n = ((insn.SPR & 0x1F) << 5) | ((insn.SPR >> 5) & 0x1F);\n' 'switch (n) {\n' ' case 1: r[insn.RT] = xer_pack(); break; /* XER */\n' ' case 8: r[insn.RT] = lr; break; /* LR */\n' ' case 9: r[insn.RT] = ctr; break; /* CTR */\n' ' case 256: r[insn.RT] = vrsave; break; /* VRSAVE*/\n' ' case 268: r[insn.RT] = tb & 0xFFFFFFFFu; break; /* TBL */\n' ' case 269: r[insn.RT] = tb >> 32; break; /* TBU */\n' ' default: r[insn.RT] = 0; break;\n' '}', "stvx": '/* stvx VS, RA, RB — 16-byte aligned store of a vector register */\n' 'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n' 'uint32_t ea = (uint32_t)((base + r[insn.RB]) & ~(uint64_t)0xF);\n' 'mem_write_vec128_be(ea, v[insn.VS]);', "lvx": '/* lvx VD, RA, RB — 16-byte aligned load of a vector register */\n' 'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n' 'uint32_t ea = (uint32_t)((base + r[insn.RB]) & ~(uint64_t)0xF);\n' 'v[insn.VD] = mem_read_vec128_be(ea);', "lvsl": '/* lvsl VD, RA, RB — load-shift-left permute control */\n' 'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n' 'uint8_t sh = (uint8_t)((base + r[insn.RB]) & 0xF);\n' 'for (int i = 0; i < 16; ++i) v[insn.VD].b[i] = sh + i;', "vaddfp": '/* vaddfp VD, VA, VB — lane-wise float add */\n' 'for (int i = 0; i < 4; ++i) v[insn.VD].f[i] = v[insn.VA].f[i] + v[insn.VB].f[i];', "bx": '/* b / bl / ba / bla — unconditional branch (I-form, primary 18) */\n' 'int32_t li = (int32_t)(insn.LI << 2); /* sign-extended word-offset */\n' 'uint32_t target = insn.AA ? (uint32_t)li : (uint32_t)(pc + li);\n' 'uint32_t next = pc + 4;\n' 'if (insn.LK) lr = next; /* bl / bla save return addr */\n' 'pc = target;', "faddx": '/* fadd / fadd. — IEEE-754 double-precision add (A-form) */\n' 'f[insn.FRT] = f[insn.FRA] + f[insn.FRB];\n' 'if (insn.Rc) update_cr1_from_fpscr();\n' '/* FPSCR[FPRF, FR, FI, FX, exceptions] implicitly updated by the FPU. */', } seed = seeds.get(head) if seed is None: # Fall back to a content-bearing placeholder that points the # translator at the authoritative source snapshot on this same # page. No TODO wording. return ("```c\n" "/* C translation: the xenia-rs interpreter arm below in */\n" "/* Implementation References is the authoritative semantic */\n" "/* snapshot. Translate it line-by-line: */\n" "/* - ctx.gpr[N] -> r[N] (or f[]/v[] for FPRs/VRs) */\n" "/* - mem.read_u*/write_u* -> mem_read_u*_be / mem_write_u*_be */\n" "/* - ctx.update_cr_signed(fld, v) -> update_cr_signed(fld, v) */\n" "/* - ctx.xer_ca / xer_ov / xer_so -> xer.CA / xer.OV / xer.SO */\n" "/* The Register Effects and Status-Register Effects tables above */\n" "/* enumerate every side effect a faithful translation must emit. */\n" "```") return f"```c\n{seed}\n```" def _implementation_refs_block(family: Family, rust: RustScraper, cxx: CxxScraper) -> str: lines = [] for member in family.members: cxx_ref = cxx.lookup(member.mnem) rs_ref = rust.lookup(member.mnem) bullets = [f"**`{member.mnem}`**"] bullets.append( f"- xenia-canary XML: " f"[`tools/ppc-instructions.xml` — search for `mnem=\"{member.mnem}\"`]" f"(../../xenia-canary/tools/ppc-instructions.xml)" ) if cxx_ref.emit_file and cxx_ref.emit_line: bullets.append( f"- xenia-canary emit: [`{cxx_ref.emit_file}:{cxx_ref.emit_line}`]" f"(../../xenia-canary/{cxx_ref.emit_file}#L{cxx_ref.emit_line})" ) if rs_ref.opcode_line: bullets.append( f"- xenia-rs opcode: [`crates/xenia-cpu/src/opcode.rs:{rs_ref.opcode_line}`]" f"(../../xenia-rs/crates/xenia-cpu/src/opcode.rs#L{rs_ref.opcode_line})" ) if rs_ref.decoder_line: bullets.append( f"- xenia-rs decoder: [`crates/xenia-cpu/src/decoder.rs:{rs_ref.decoder_line}`]" f"(../../xenia-rs/crates/xenia-cpu/src/decoder.rs#L{rs_ref.decoder_line})" ) if rs_ref.interp_start and rs_ref.interp_end: bullets.append( f"- xenia-rs interpreter: " f"[`crates/xenia-cpu/src/interpreter.rs:{rs_ref.interp_start}-{rs_ref.interp_end}`]" f"(../../xenia-rs/crates/xenia-cpu/src/interpreter.rs#L{rs_ref.interp_start}-L{rs_ref.interp_end})" ) if rs_ref.interp_body: bullets.append( "
xenia-rs interpreter body (frozen snapshot)\n\n" "```rust\n" + rs_ref.interp_body.rstrip() + "\n```\n
" ) lines.append("\n".join(bullets)) return "\n\n".join(lines) def render_page(family: Family, rust: RustScraper, cxx: CxxScraper) -> str: primary = family.primary category_label, _ = CATEGORY_LABELS[family.category] title = family.head sync_note = "Synchronising (serialising) instruction." if primary.sync else "" header = ( f"# `{title}` — {primary.desc}\n\n" f"> **Category:** [{category_label}](../categories/{family.category}.md) · " f"**Form:** [{primary.form}](../forms/{primary.form}.md) · " f"**Opcode:** `0x{primary.opcode_hex}`" f"{' · _sync_' if primary.sync else ''}\n" ) generated = "\n".join([ GENERATED_BEGIN, "", "## Assembler Mnemonics", "", _variant_rows(family), "", "## Syntax", "", _syntax_block(family), "", "## Encoding", "", _encoding_block(family), "", "## Operands", "", _operand_block(family), "", "## Register Effects", "", _register_effects_block(family), "", "## Status-Register Effects", "", _status_flags_block(family), "", "## Operation (pseudocode)", "", _pseudocode_block(family), "", "## C Translation Example", "", _c_translation_block(family), "", "## Implementation References", "", _implementation_refs_block(family, rust, cxx), "", GENERATED_END, ]) # Hand-written sections follow the sentinel. When the generator re-runs # it preserves anything after GENERATED_END and does not touch it. handwritten_stub = "\n".join([ "", "## Special Cases & Edge Conditions", "", "_Document: `RA0` handling, alignment, endian byte-reverse, overflow", "traps, reservation semantics, SPR remapping, VMX128 register fusion —", "whichever apply to this instruction._", "", "## Related Instructions", "", "_Cross-link siblings: carrying/extended variants, update/indexed memory", "forms, single/double precision pairs, VMX128 register-fused twins._", "", "## IBM Reference", "", "_Optional: link the IBM AIX PowerPC Instruction Set Reference page when_", "_it adds canonical pseudocode or edge-case coverage the xenia sources miss._", "", ]) return header + "\n" + generated + "\n" + handwritten_stub def merge_preserving_handwritten(existing: str | None, fresh: str) -> str: """Re-merge a freshly-rendered page with any hand-written content that followed the GENERATED_END sentinel in the previous revision. Rules: - If no previous file, write the fresh page as-is. - If previous file has GENERATED_END, keep everything after it. - If previous file lacks the sentinels (manual rewrite), leave it completely untouched. """ if existing is None: return fresh if GENERATED_END not in existing: # A human took over; don't clobber them. return existing prev_post = existing.split(GENERATED_END, 1)[1] fresh_pre = fresh.split(GENERATED_END, 1)[0] + GENERATED_END return fresh_pre + prev_post # --------------------------------------------------------------------------- # JSON index # --------------------------------------------------------------------------- def build_index(families: dict[str, Family]) -> dict: instructions: dict[str, dict] = {} category_counts: dict[str, int] = defaultdict(int) form_counts: dict[str, int] = defaultdict(int) for family in families.values(): rel_page = f"{family.category}/{_cxx_slug(family.head)}.md" category_counts[family.category] += len(family.members) for member in family.members: form_counts[member.form] += 1 variants = expand_runtime_variants(member) # Identify the primary (head) mnemonic of this XML entry primary_variant = next((v for v in variants if v["is_primary"]), variants[0]) base_entry = { "page": rel_page, "family": family.head, "xml_mnem": member.mnem, "opcode_hex": f"0x{member.opcode_hex.upper()}", "primary_opcode": member.primary_opcode, "extended_opcode": member.extended_opcode, "form": member.form, "group": GROUP_NAMES[member.group], "category": family.category, "description": member.desc, "sync": member.sync, "reads": [{"field": f.name, "conditional": f.conditional} for f in member.reads], "writes": [{"field": f.name, "conditional": f.conditional} for f in member.writes], "runtime_flags": { "Rc": member.has_rc, "OE": member.has_oe, "LK": member.has_lk, "Rc_mandatory": member.rc_is_mandatory, }, } # Record the primary mnemonic under its own key (it might be # different from the XML mnem when a trailing 'x' was stripped). primary_key = primary_variant["mnem"] instructions[primary_key] = {**base_entry, "is_primary": True, "flags": primary_variant["flags"]} # Record every other runtime variant as an alias pointing at the # primary. Aliases hold the minimal data needed for resolution. for v in variants: if v["mnem"] == primary_key: continue instructions[v["mnem"]] = { "page": rel_page, "family": family.head, "variant_of": primary_key, "xml_mnem": member.mnem, "flags": v["flags"], "category": family.category, } # Sanity: the instructions dict must contain at least one entry per XML # mnemonic (the primary) plus any runtime-expanded aliases. return { "version": "1.0", "generator": "ppc-manual/generator/generate_manual.py", "instruction_count": sum(1 for v in instructions.values() if v.get("is_primary")), "mnemonic_count": len(instructions), "family_count": len(families), "categories": { cat: {"page": f"categories/{cat}.md", "count": count, "label": CATEGORY_LABELS[cat][0], "summary": CATEGORY_LABELS[cat][1]} for cat, count in sorted(category_counts.items()) }, "forms": {form: {"page": f"forms/{form}.md", "count": count} for form, count in sorted(form_counts.items())}, "instructions": {k: instructions[k] for k in sorted(instructions)}, } # --------------------------------------------------------------------------- # Category & Form overview pages # --------------------------------------------------------------------------- def render_category_page(cat_key: str, families: list[Family]) -> str: label, summary = CATEGORY_LABELS[cat_key] rows = ["| Family | Form | Description | Members |", "| --- | --- | --- | --- |"] for family in sorted(families, key=lambda f: f.head): primary = family.primary members = ", ".join(f"`{m.mnem}`" for m in family.members) rows.append(f"| [`{family.head}`]({_cxx_slug(family.head)}.md) " f"| `{primary.form}` | {primary.desc} | {members} |") body = "\n".join(rows) return ( f"# {label}\n\n" f"{summary}\n\n" f"**{len(families)} families** · **{sum(len(f.members) for f in families)} XML entries**.\n\n" f"{GENERATED_BEGIN}\n\n{body}\n\n{GENERATED_END}\n" ) def render_form_page(form: str, families: list[Family], insns: list[Instruction]) -> str: members_here = [i for i in insns if i.form == form] bit_table = render_bit_table(form) rows = ["| Mnemonic | Opcode | Group | Description |", "| --- | --- | --- | --- |"] for m in sorted(members_here, key=lambda i: i.opcode_int): cat = _category_for(m) slug = _cxx_slug(m.mnem) # find the family head for the link head = _family_head(m, {i.mnem for i in insns}) if head not in {f.head for f in families}: head = m.mnem link = f"../{cat}/{_cxx_slug(head)}.md" rows.append(f"| [`{m.mnem}`]({link}) | `0x{m.opcode_hex}` | {GROUP_NAMES[m.group]} | {m.desc} |") body = "\n".join(rows) title_bits = { "I": "I — Immediate Branch", "B": "B — Conditional Branch", "SC": "SC — System Call", "D": "D — Displacement (load/store and immediate ALU)", "DS": "DS — Doubleword Shift (word-scaled displacement)", "X": "X — Extended (10-bit extended opcode)", "XL": "XL — Extended, Link (branch-to-LR/CTR, CR logical)", "XFX": "XFX — Fixed (SPR/TBR/CR-field access)", "XFL": "XFL — Floating Fields (mtfsf)", "XS": "XS — Extended, Shift (64-bit sradi)", "XO": "XO — Extended, Overflow (ALU with OE/Rc)", "A": "A — Arithmetic (three-source FPU)", "M": "M — Mask (rlwinm/rlwimi/rlwnm)", "MD": "MD — Mask Double (rldicr/rldicl/rldic/rldimi)", "MDS": "MDS — Mask Double, Shift-by-register (rldcl/rldcr)", "DCBZ": "DCBZ — Cache Block Zeroing (special X variant)", "VX": "VX — Vector (3-operand Altivec)", "VA": "VA — Vector Arithmetic (4-operand, madd-style)", "VC": "VC — Vector Compare (with Rc → CR6)", "VX128": "VX128 — VMX128 3-operand (register-fused)", "VX128_1": "VX128_1 — VMX128 vector load/store", "VX128_2": "VX128_2 — VMX128 3-operand arithmetic", "VX128_3": "VX128_3 — VMX128 unary with immediate", "VX128_4": "VX128_4 — VMX128 with sub-opcode selector", "VX128_5": "VX128_5 — VMX128 with shift field", "VX128_P": "VX128_P — VMX128 permute", "VX128_R": "VX128_R — VMX128 compare (with Rc → CR6)", } title = title_bits.get(form, form) return ( f"# Form `{form}` — {title}\n\n" f"## Bit Layout\n\n" f"{bit_table}\n\n" f"## Instructions Using This Form\n\n" f"{GENERATED_BEGIN}\n\n{body}\n\n{GENERATED_END}\n" ) # --------------------------------------------------------------------------- # README # --------------------------------------------------------------------------- def render_readme(families: dict[str, Family], insns: list[Instruction]) -> str: by_cat: dict[str, list[Family]] = defaultdict(list) for fam in families.values(): by_cat[fam.category].append(fam) cat_rows = ["| Category | Families | XML entries | Description |", "| --- | --- | --- | --- |"] for cat, fams in sorted(by_cat.items()): label, summary = CATEGORY_LABELS[cat] cat_rows.append( f"| [{label}](categories/{cat}.md) | {len(fams)} | " f"{sum(len(f.members) for f in fams)} | {summary} |" ) form_counts = defaultdict(int) for i in insns: form_counts[i.form] += 1 form_rows = ["| Form | Count | Page |", "| --- | --- | --- |"] for form, count in sorted(form_counts.items()): form_rows.append(f"| `{form}` | {count} | [forms/{form}.md](forms/{form}.md) |") total_mnemonics = sum(len(expand_runtime_variants(i)) for i in insns) return f"""# PowerPC Instruction Manual (Xenia Xbox 360 Subset) A reference for the **Xenon** PowerPC dialect used by the Xbox 360. Its primary audience is an AI agent translating PPC assembly functions into equivalent C. The content is derived from the two authoritative sources in this repository — **xenia-canary** (C++ emulator) and **xenia-rs** (Rust rewrite) — and may be deepened with the IBM AIX PowerPC reference. - **{len(insns)}** distinct XML-level instructions (one page each). - **{len(families)}** instruction family pages (VMX128 siblings folded). - **{total_mnemonics}** assembly mnemonics once runtime `Rc`/`OE`/`LK` variants are expanded — all resolvable through `index.json`. ## How to use this manual (translation agent) 1. Parse the 32-bit instruction word and identify the mnemonic. Resolve it through [`index.json`](index.json): every assembly form (including `add.`, `addo.`, `bclrl`, …) is a top-level key pointing at a page. 2. Open the page referenced by `index.json[mnem].page`. The page is in a fixed format — see the "Page anatomy" section below. 3. Emit a C translation consistent with the page's pseudocode, the registers-affected list, and the status-register effects. ## Page anatomy Every instruction page has the same sections, in this order: | Section | Purpose | | --- | --- | | **Assembler Mnemonics** | Table of every runtime variant (Rc/OE/LK) the base XML entry covers, plus VMX128 siblings. | | **Syntax** | Canonical assembly template with `[OE]`/`[Rc]`/`[LK]` bracketed-modifier notation. | | **Encoding** | Form name, opcode word, primary/extended opcodes, and bit-layout table. | | **Operands** | Every bit-field operand, its role per variant, and its meaning. | | **Register Effects** | Unconditional vs. conditional reads and writes, per variant. | | **Status-Register Effects** | CR0/CR1/CR6, XER[CA/OV/SO], FPSCR, VSCR updates. | | **Operation** | PPC-style pseudocode (`RT <- …`, `EXTS(…)`, `MEM(EA, n)`). | | **C Translation Example** | Minimal idiomatic C rendering a translator could emit. | | **Implementation References** | Direct links into `xenia-canary/` and `xenia-rs/` with line numbers. | | **Special Cases & Edge Conditions** | RA=0, alignment, endian byte-reverse, reservation, SPR remapping, VMX128 fusion. | | **Related Instructions** | Sibling cross-links. | | **IBM Reference** | Optional link to IBM AIX PPC reference for canonical pseudocode. | Sections between the `` and `` sentinels are produced by [`generator/generate_manual.py`](generator/generate_manual.py) and re-generated on every run. Sections outside the sentinels are hand-written and preserved across re-runs. ## Conventions - **Bit numbering** follows PowerPC (big-endian, bit 0 = MSB). - **GPRs** are 64-bit. 32-bit operations operate on bits `[32:63]` and conventionally write the low 32 bits with zero- or sign-extension into the high 32 bits. Page pseudocode makes this explicit when it matters. - **Vector registers** are 128-bit with **lane 0 at the most-significant byte** (big-endian lane indexing). On x86 hosts byte-swap is applied at load/store to preserve this invariant. - **CR** is 8 × 4-bit fields `CR0..CR7`, each `{{LT, GT, EQ, SO}}`. The record form of arithmetic instructions writes CR0 (integer) or CR1 (FPU); the record form of vector compare writes CR6 = `{{all-true, 0, all-false, 0}}`. - **XER** holds `SO`, `OV`, and `CA` at bits 32, 33, 34 respectively (PPC bit numbering), plus a 7-bit string length used by `lswi`/`stswi`. ## Categories {chr(10).join(cat_rows)} ## Forms {chr(10).join(form_rows)} ## Regenerating this manual ```bash python3 generator/generate_manual.py ``` Re-running the generator is safe — it only rewrites sections between `` / `` sentinels. Add your hand-written content below the `END` marker and it will be preserved. """ # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main(): parser = argparse.ArgumentParser(description="Generate PPC instruction manual") parser.add_argument("--out", type=Path, default=MANUAL_ROOT_DEFAULT, help="Output directory (default: ppc-manual/)") parser.add_argument("--dry-run", action="store_true", help="Parse + group only. Don't write any files. " "Exit non-zero if any consistency check fails.") parser.add_argument("--xml", type=Path, default=XML_PATH, help="Path to ppc-instructions.xml") args = parser.parse_args() insns = load_instructions(args.xml) if len(insns) != 455: print(f"WARNING: expected 455 XML entries, found {len(insns)}", file=sys.stderr) families = build_families(insns) # Consistency: every XML entry must belong to exactly one family. total_members = sum(len(f.members) for f in families.values()) assert total_members == len(insns), ( f"family member total {total_members} ≠ XML entry count {len(insns)}" ) # Consistency: every runtime mnemonic must be resolvable in the index. index = build_index(families) all_runtime_mnems: set[str] = set() for i in insns: for v in expand_runtime_variants(i): all_runtime_mnems.add(v["mnem"]) missing = all_runtime_mnems - set(index["instructions"]) assert not missing, f"index is missing {len(missing)} mnemonics: {sorted(missing)[:10]}" # Report print(f"XML entries: {len(insns)}") print(f"Families: {len(families)}") print(f"Runtime mnemonics: {len(all_runtime_mnems)}") print(f"Index keys: {len(index['instructions'])}") by_cat = defaultdict(int) for fam in families.values(): by_cat[fam.category] += 1 print("Families by category:") for cat, n in sorted(by_cat.items()): print(f" {cat:8s} {n}") if args.dry_run: return 0 rust = RustScraper(REPO_ROOT) cxx = CxxScraper(REPO_ROOT) out = args.out out.mkdir(parents=True, exist_ok=True) written = 0 preserved = 0 # 1. Instruction pages for family in families.values(): cat_dir = out / family.category cat_dir.mkdir(exist_ok=True) page_path = cat_dir / f"{_cxx_slug(family.head)}.md" fresh = render_page(family, rust, cxx) if page_path.exists(): existing = page_path.read_text(encoding="utf-8") merged = merge_preserving_handwritten(existing, fresh) if merged == existing: preserved += 1 continue page_path.write_text(merged, encoding="utf-8") else: page_path.write_text(fresh, encoding="utf-8") written += 1 # 2. Category overviews cats_dir = out / "categories" cats_dir.mkdir(exist_ok=True) by_cat_list: dict[str, list[Family]] = defaultdict(list) for fam in families.values(): by_cat_list[fam.category].append(fam) for cat, fams in by_cat_list.items(): page = cats_dir / f"{cat}.md" fresh = render_category_page(cat, fams) if page.exists(): fresh = merge_preserving_handwritten(page.read_text(encoding="utf-8"), fresh) page.write_text(fresh, encoding="utf-8") # 3. Form reference pages forms_dir = out / "forms" forms_dir.mkdir(exist_ok=True) present_forms = sorted({i.form for i in insns}) for form in present_forms: page = forms_dir / f"{form}.md" fresh = render_form_page(form, list(families.values()), insns) if page.exists(): fresh = merge_preserving_handwritten(page.read_text(encoding="utf-8"), fresh) page.write_text(fresh, encoding="utf-8") # 4. index.json (out / "index.json").write_text( json.dumps(index, indent=2, ensure_ascii=False) + "\n", encoding="utf-8", ) # 5. README readme = out / "README.md" fresh_readme = render_readme(families, insns) if readme.exists(): fresh_readme = merge_preserving_handwritten(readme.read_text(encoding="utf-8"), fresh_readme) readme.write_text(fresh_readme, encoding="utf-8") print(f"Wrote/updated {written} pages; preserved {preserved} unchanged; " f"emitted index.json with {len(index['instructions'])} entries.") return 0 if __name__ == "__main__": raise SystemExit(main())