Bundles state that lives OUTSIDE the xenia-rs repo so a fresh clone on
another machine can be brought up to identical configuration via
migration/setup.sh:
- claude-memory/ ~/.claude/projects/-home-fabi-RE-Project-Sylpheed/memory/
(103 files, 1.1 MB - MEMORY.md + every
project_xenia_rs_*.md from audits
addis_signext through audit-058)
- project-root/dot-claude/ <project-root>/.claude/settings.json
(Stop hook + permissions)
- project-root/ppc-manual/ <project-root>/ppc-manual/
(PowerPC reference docs, 397 files, 3.7 MB)
- project-root/run-canary.sh <project-root>/run-canary.sh
- README.md Human-readable setup checklist
- setup.sh Idempotent installer (also reclones
xenia-canary at pinned HEAD 6de80dffe)
- MANIFEST.md Per-file mapping + per-file-not-bundled
restoration recipe
Excluded from bundle (not shippable via git):
- Sylpheed ISO (7.8 GB; copyright; manual copy required)
- sylpheed.db (395 MB; regenerable from XEX via analysis tooling)
- target/ build artifacts (rebuild on target)
- audit-runs probe firehoses (.log/.stdout/.stderr ~11 GB; rerun if needed)
- audit-runs memory dumps (.bin ~4.5 GB; rerun audit-026/027/029 if needed)
- xenia-canary checkout (setup.sh reclones from
git.mc02.dev/fabi/Xenia-Canary.git at HEAD 6de80dffe)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1094 lines
52 KiB
Python
1094 lines
52 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
PowerPC Instruction Manual generator.
|
||
|
||
Reads `xenia-canary/tools/ppc-instructions.xml` plus the xenia-rs and
|
||
xenia-canary source trees, and emits a tree of one Markdown page per
|
||
instruction family together with a machine-readable `index.json` at the
|
||
manual root.
|
||
|
||
Usage:
|
||
python3 generator/generate_manual.py [--dry-run] [--out PATH]
|
||
|
||
The generator is idempotent. Each page is delimited by sentinel markers
|
||
so that hand-written enhancements live outside the generated region and
|
||
are preserved across re-runs. See `ppc-manual/README.md` for conventions.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import re
|
||
import sys
|
||
from collections import defaultdict
|
||
from dataclasses import dataclass, field
|
||
from pathlib import Path
|
||
|
||
# Allow running directly or as a module.
|
||
HERE = Path(__file__).resolve().parent
|
||
sys.path.insert(0, str(HERE))
|
||
|
||
from xml_model import ( # noqa: E402
|
||
Instruction,
|
||
GROUP_NAMES,
|
||
load_instructions,
|
||
expand_runtime_variants,
|
||
)
|
||
from bit_layout import FORM_LAYOUTS, render_bit_table # noqa: E402
|
||
from rust_scraper import RustScraper # noqa: E402
|
||
from cxx_scraper import CxxScraper # noqa: E402
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Configuration
|
||
# ---------------------------------------------------------------------------
|
||
|
||
REPO_ROOT = HERE.parent.parent
|
||
MANUAL_ROOT_DEFAULT = REPO_ROOT / "ppc-manual"
|
||
XML_PATH = REPO_ROOT / "xenia-canary" / "tools" / "ppc-instructions.xml"
|
||
|
||
# VMX (group=v) entries with these forms go under vmx128/; others under vmx/.
|
||
VMX128_FORMS = {
|
||
"VX128", "VX128_1", "VX128_2", "VX128_3",
|
||
"VX128_4", "VX128_5", "VX128_P", "VX128_R",
|
||
}
|
||
|
||
GROUP_TO_CATEGORY = {
|
||
"i": "alu",
|
||
"m": "memory",
|
||
"b": "branch",
|
||
"c": "control",
|
||
"f": "fpu",
|
||
# "v" resolved by form
|
||
}
|
||
|
||
CATEGORY_LABELS = {
|
||
"alu": ("Integer ALU", "Fixed-point add/sub/multiply/divide, logical, rotate, shift, compare, count-leading-zeros, sign-extension, trap-on-condition."),
|
||
"memory": ("Memory", "Loads/stores for byte, half, word, doubleword, float, multiple and string; cache management (dcbt, dcbf, dcbz); reservation pair lwarx/stwcx."),
|
||
"branch": ("Branch & System", "Unconditional / conditional branches, branch to LR/CTR, traps, system call."),
|
||
"fpu": ("Floating-Point", "IEEE-754 add/sub/mul/div/sqrt, fused multiply-add, conversions, compares, FPSCR moves."),
|
||
"vmx": ("VMX (Altivec)", "128-bit SIMD over 32 registers V0–V31. Integer/float arithmetic, logical, compare, permute/merge, pack/unpack, saturation helpers."),
|
||
"vmx128": ("VMX128", "Xbox-360-specific Altivec extension that widens the vector register file to 128 registers (V0–V127). Register IDs are encoded with bit-fusion across non-contiguous fields."),
|
||
"control": ("Control / CR / SPR", "Condition-register logical ops, CR field moves, mfspr/mtspr/mtcrf, time-base reads, synchronisation (sync, isync, eieio)."),
|
||
}
|
||
|
||
# Field descriptions used for operand tables. Keyed by XML field name.
|
||
FIELD_DESCRIPTIONS = {
|
||
"RA": "Source GPR (`r0`–`r31`).",
|
||
"RA0": "Source GPR; when the encoded register number is 0 the operand is the literal 64-bit zero, **not** `r0`.",
|
||
"RB": "Source GPR.",
|
||
"RD": "Destination GPR.",
|
||
"RS": "Source GPR (alias for RD in some stores).",
|
||
"RT": "Destination GPR (alias for RD).",
|
||
"OE": "Overflow-enable bit. When 1, the instruction updates `XER[OV]` and stickies `XER[SO]` on signed overflow.",
|
||
"CR": "Condition-register update. When `Rc=1`, CR field 0 (or CR6 for vector compares, CR1 for FPU) is updated from the result.",
|
||
"CA": "XER[CA] carry bit. Read by add-with-carry/subtract-with-borrow instructions, written by carrying instructions.",
|
||
"CRM": "8-bit CR field mask used by `mtcrf` — one bit per CR field.",
|
||
"CRFD": "CR destination field (`crf`, 0–7).",
|
||
"CRFS": "CR source field.",
|
||
"CRBA": "CR source bit A (0–31).",
|
||
"CRBB": "CR source bit B (0–31).",
|
||
"CRBD": "CR destination bit (0–31).",
|
||
"IMM": "Generic immediate field.",
|
||
"SIMM": "16-bit signed immediate. Sign-extended to 64 bits before use.",
|
||
"UIMM": "16-bit unsigned immediate. Zero-extended.",
|
||
"d": "16-bit signed displacement (`d`) added to the base address register.",
|
||
"ds": "14-bit signed word-aligned displacement (`DS << 2`).",
|
||
"LR": "Link register. Written by `bl`/`bla`/`bcl`/`bclrl`/`bcctrl`; read by `bclr`/`bclrl`.",
|
||
"BI": "CR bit index (0–31) selected by BO's condition test.",
|
||
"BO": "5-bit branch options — selects CTR decrement, CTR test polarity, and CR bit test polarity. See `forms/XL.md`.",
|
||
"CTR": "Count register. Decremented and optionally tested by conditional branches when `BO[2]=0`.",
|
||
"LK": "Link bit. When 1, LR ← address-of-next-instruction before the branch is taken.",
|
||
"AA": "Absolute-address bit. When 1, the branch target is the sign-extended displacement itself; when 0, it is added to the current instruction address.",
|
||
"L": "Operand-length bit for compare instructions (`0 ⇒ 32-bit`, `1 ⇒ 64-bit`).",
|
||
"FPSCR": "Floating-Point Status and Control Register.",
|
||
"FPSCRD": "FPSCR destination field.",
|
||
"MSR": "Machine State Register.",
|
||
"SPR": "Special-Purpose-Register number. Encoded with the two 5-bit halves swapped (bits 11-15 become the high half, bits 16-20 the low half).",
|
||
"VSCR": "Vector Status and Control Register (NJ/SAT bits).",
|
||
"TBR": "Time-Base Register selector for `mftb`.",
|
||
"FM": "8-bit FPSCR field-mask used by `mtfsf`.",
|
||
"FA": "Source A floating-point register (`fr0`–`fr31`).",
|
||
"FB": "Source B floating-point register.",
|
||
"FC": "Source C floating-point register (for madd-style ops).",
|
||
"FD": "Destination floating-point register.",
|
||
"FS": "Source floating-point register.",
|
||
"VA": "Source A vector register.",
|
||
"VB": "Source B vector register.",
|
||
"VC": "Source C vector register / 3-bit selector.",
|
||
"VD": "Destination vector register.",
|
||
"VS": "Source vector register (alias for VD on stores).",
|
||
"SH": "Shift amount.",
|
||
"SHB": "Shift amount (byte granularity, `vsldoi`).",
|
||
"MB": "Mask begin bit.",
|
||
"ME": "Mask end bit.",
|
||
"TO": "Trap-on condition mask (5 bits) — LT, GT, EQ, LGT, LLT bits.",
|
||
"LEV": "System-call exception level (for `sc`).",
|
||
"ADDR": "Encoded branch target displacement (24-bit for I-form, 14-bit for B-form, word-shifted).",
|
||
}
|
||
|
||
# Simple per-mnemonic pseudocode seeds for the most common ALU patterns.
|
||
# Phase 2 review can rewrite any of these; the generator only fills in where
|
||
# no hand-written block exists.
|
||
PSEUDOCODE_SEEDS: dict[str, str] = {
|
||
"addx": "RT <- (RA) + (RB)",
|
||
"addcx": "RT <- (RA) + (RB)\nCA <- carry_out_of_32_or_64_bit_add((RA), (RB))",
|
||
"addex": "RT <- (RA) + (RB) + CA\nCA <- carry_out_of_the_add",
|
||
"addmex": "RT <- (RA) + CA + 0xFFFF_FFFF_FFFF_FFFF\nCA <- carry_out",
|
||
"addzex": "RT <- (RA) + CA\nCA <- carry_out",
|
||
"addi": "if RA = 0 then RT <- EXTS(SIMM)\nelse RT <- (RA) + EXTS(SIMM)",
|
||
"addic": "RT <- (RA) + EXTS(SIMM)\nCA <- carry_out",
|
||
"addicx": "RT <- (RA) + EXTS(SIMM)\nCA <- carry_out\nCR0 <- signed_compare(RT, 0)",
|
||
"addis": "if RA = 0 then RT <- EXTS(SIMM) << 16\nelse RT <- (RA) + (EXTS(SIMM) << 16)",
|
||
"subfx": "RT <- ~(RA) + (RB) + 1 ; = (RB) − (RA)",
|
||
"subfcx": "RT <- ~(RA) + (RB) + 1\nCA <- carry_out",
|
||
"subfex": "RT <- ~(RA) + (RB) + CA\nCA <- carry_out",
|
||
"subfic": "RT <- ~(RA) + EXTS(SIMM) + 1\nCA <- carry_out",
|
||
"negx": "RT <- ~(RA) + 1",
|
||
"andx": "RA <- (RS) & (RB)",
|
||
"andcx": "RA <- (RS) & ~(RB)",
|
||
"andix": "RA <- (RS) & (0x0000 || UIMM)",
|
||
"andisx": "RA <- (RS) & (UIMM || 0x0000)",
|
||
"orx": "RA <- (RS) | (RB)",
|
||
"orcx": "RA <- (RS) | ~(RB)",
|
||
"ori": "RA <- (RS) | (0x0000 || UIMM)",
|
||
"oris": "RA <- (RS) | (UIMM || 0x0000)",
|
||
"xorx": "RA <- (RS) ^ (RB)",
|
||
"xori": "RA <- (RS) ^ (0x0000 || UIMM)",
|
||
"xoris": "RA <- (RS) ^ (UIMM || 0x0000)",
|
||
"nandx": "RA <- ~((RS) & (RB))",
|
||
"norx": "RA <- ~((RS) | (RB))",
|
||
"eqvx": "RA <- ~((RS) ^ (RB))",
|
||
"extsbx": "RA <- EXTS_8_to_64((RS)[56:63])",
|
||
"extshx": "RA <- EXTS_16_to_64((RS)[48:63])",
|
||
"extswx": "RA <- EXTS_32_to_64((RS)[32:63])",
|
||
"mullwx": "RT <- ((RA)[32:63]) * ((RB)[32:63]) ; signed 32×32 → 64",
|
||
"mulhwx": "RT <- high_32_of_signed_multiply((RA)[32:63], (RB)[32:63]) sign-extended to 64",
|
||
"mulhwux": "RT <- high_32_of_unsigned_multiply((RA)[32:63], (RB)[32:63]) zero-extended to 64",
|
||
"mulldx": "RT <- ((RA) * (RB))[64:127] ; low 64 of signed 64×64",
|
||
"mulhdx": "RT <- ((RA) * (RB))[0:63] ; high 64 of signed 64×64",
|
||
"mulhdux": "RT <- ((RA) * (RB))[0:63] ; high 64 of unsigned 64×64",
|
||
"mulli": "RT <- ((RA) * EXTS(SIMM))[64:127]",
|
||
"divwx": "RT <- ((RA)[32:63] /s (RB)[32:63]) sign-extended to 64 ; undefined if RB=0 or overflow",
|
||
"divwux": "RT <- ((RA)[32:63] /u (RB)[32:63]) zero-extended to 64 ; undefined if RB=0",
|
||
"divdx": "RT <- (RA) /s (RB) ; undefined if RB=0 or (RA=-2^63 and RB=-1)",
|
||
"divdux": "RT <- (RA) /u (RB) ; undefined if RB=0",
|
||
"cmp": "if L = 0 then a,b <- EXTS((RA)[32:63]), EXTS((RB)[32:63])\nelse a,b <- (RA), (RB)\nCR[BF] <- signed_compare(a, b) || XER[SO]",
|
||
"cmpl": "if L = 0 then a,b <- (RA)[32:63], (RB)[32:63]\nelse a,b <- (RA), (RB)\nCR[BF] <- unsigned_compare(a, b) || XER[SO]",
|
||
"cmpi": "if L = 0 then a,b <- EXTS((RA)[32:63]), EXTS(SIMM)\nelse a,b <- (RA), EXTS(SIMM)\nCR[BF] <- signed_compare(a, b) || XER[SO]",
|
||
"cmpli": "if L = 0 then a,b <- (RA)[32:63], UIMM\nelse a,b <- (RA), (0 || UIMM)\nCR[BF] <- unsigned_compare(a, b) || XER[SO]",
|
||
"cntlzwx": "n <- number_of_leading_zero_bits((RS)[32:63]) ; n in 0..32\nRA <- zero_extend(n)",
|
||
"cntlzdx": "n <- number_of_leading_zero_bits((RS)) ; n in 0..64\nRA <- zero_extend(n)",
|
||
"slwx": "n <- (RB)[58:63]\nRA <- ((RS) << n) & 0x0000_0000_FFFF_FFFF if n < 32 else 0",
|
||
"srwx": "n <- (RB)[58:63]\nRA <- ((RS)[32:63] >> n) zero-extended if n < 32 else 0",
|
||
"srawx": "n <- (RB)[58:63]\nRA <- ((RS)[32:63] >>a n) sign-extended\nCA <- 1 if (signed RS < 0) && any_bit_shifted_out else 0",
|
||
"sldx": "n <- (RB)[57:63]\nRA <- ((RS) << n) if n < 64 else 0",
|
||
"srdx": "n <- (RB)[57:63]\nRA <- ((RS) >> n) if n < 64 else 0",
|
||
"sradx": "n <- (RB)[57:63]\nRA <- ((RS) >>a n) sign-extended if n < 64\nCA <- (RS signed < 0) && any_bit_shifted_out",
|
||
"srawix": "RA <- ((RS)[32:63] >>a SH) sign-extended\nCA <- (RS[32] signed) && any_low_bit_shifted_out",
|
||
"sradix": "RA <- ((RS) >>a SH) sign-extended\nCA <- (RS signed < 0) && any_bit_shifted_out",
|
||
# Branch family
|
||
"bx": "NIA <- (CIA + EXTS(LI || 0b00)) if AA=0\n <- EXTS(LI || 0b00) if AA=1\nif LK then LR <- CIA + 4",
|
||
"bcx": "if ¬BO[2] then CTR <- CTR − 1\nctr_ok <- BO[2] | ((CTR ≠ 0) XOR BO[3])\ncond_ok <- BO[0] | (CR[BI] ≡ BO[1])\nif ctr_ok & cond_ok then NIA <- CIA + EXTS(BD || 0b00) (AA=0)\n EXTS(BD || 0b00) (AA=1)\nif LK then LR <- CIA + 4",
|
||
"bclrx": "if ¬BO[2] then CTR <- CTR − 1\nctr_ok <- BO[2] | ((CTR ≠ 0) XOR BO[3])\ncond_ok <- BO[0] | (CR[BI] ≡ BO[1])\nif ctr_ok & cond_ok then NIA <- LR[0:61] || 0b00\nif LK then LR <- CIA + 4",
|
||
"bcctrx": "cond_ok <- BO[0] | (CR[BI] ≡ BO[1])\nif cond_ok then NIA <- CTR[0:61] || 0b00\nif LK then LR <- CIA + 4",
|
||
"sc": "system_call_exception(LEV)",
|
||
# Loads (D-form, zero/sign-extend)
|
||
"lbz": "EA <- (RA|0) + EXTS(d)\nRT <- 0x00000000_000000_00 || MEM(EA, 1)",
|
||
"lbzu": "EA <- (RA) + EXTS(d) ; RA ≠ 0 required\nRT <- ZEXT8_to_64(MEM(EA, 1))\nRA <- EA",
|
||
"lbzx": "EA <- (RA|0) + (RB)\nRT <- ZEXT8_to_64(MEM(EA, 1))",
|
||
"lbzux": "EA <- (RA) + (RB) ; RA ≠ 0 required\nRT <- ZEXT8_to_64(MEM(EA, 1))\nRA <- EA",
|
||
"lhz": "EA <- (RA|0) + EXTS(d)\nRT <- ZEXT16_to_64(MEM(EA, 2))",
|
||
"lha": "EA <- (RA|0) + EXTS(d)\nRT <- SEXT16_to_64(MEM(EA, 2))",
|
||
"lwz": "EA <- (RA|0) + EXTS(d)\nRT <- ZEXT32_to_64(MEM(EA, 4))",
|
||
"lwa": "EA <- (RA|0) + EXTS(ds || 0b00)\nRT <- SEXT32_to_64(MEM(EA, 4))",
|
||
"ld": "EA <- (RA|0) + EXTS(ds || 0b00)\nRT <- MEM(EA, 8)",
|
||
# Stores (D-form)
|
||
"stb": "EA <- (RA|0) + EXTS(d)\nMEM(EA, 1) <- (RS)[56:63]",
|
||
"sth": "EA <- (RA|0) + EXTS(d)\nMEM(EA, 2) <- (RS)[48:63]",
|
||
"stw": "EA <- (RA|0) + EXTS(d)\nMEM(EA, 4) <- (RS)[32:63]",
|
||
"std": "EA <- (RA|0) + EXTS(ds || 0b00)\nMEM(EA, 8) <- (RS)",
|
||
# Floats
|
||
"lfs": "EA <- (RA|0) + EXTS(d)\nFRT <- DoubleFromSingle(MEM(EA, 4))",
|
||
"lfd": "EA <- (RA|0) + EXTS(d)\nFRT <- MEM(EA, 8)",
|
||
"stfs": "EA <- (RA|0) + EXTS(d)\nMEM(EA, 4) <- SingleFromDouble(FRS)",
|
||
"stfd": "EA <- (RA|0) + EXTS(d)\nMEM(EA, 8) <- (FRS)",
|
||
# SPR
|
||
"mfspr": "n <- spr_number(SPR) ; SPR field has its two 5-bit halves swapped\nRT <- SPR(n)",
|
||
"mtspr": "n <- spr_number(SPR)\nSPR(n) <- (RS)",
|
||
"mfcr": "RT <- 0x00000000 || CR",
|
||
"mtcrf": "for i in 0..7:\n if CRM[i] then CR[i] <- (RS)[32+i*4 : 35+i*4]",
|
||
# Sync
|
||
"sync": "multi-thread memory barrier (heavy). L=0 full sync; L=1 lightweight sync.",
|
||
"isync": "instruction-stream synchronisation — discards speculative state.",
|
||
"eieio": "enforce in-order execution of I/O",
|
||
# FPU — a minimal set
|
||
"faddx": "FRT <- FRA + FRB ; double-precision",
|
||
"faddsx": "FRT <- RoundToSingle(FRA + FRB) ; single-precision",
|
||
"fsubx": "FRT <- FRA − FRB",
|
||
"fmulx": "FRT <- FRA × FRC",
|
||
"fdivx": "FRT <- FRA ÷ FRB",
|
||
"fmaddx": "FRT <- (FRA × FRC) + FRB",
|
||
"fmsubx": "FRT <- (FRA × FRC) − FRB",
|
||
"fnmaddx": "FRT <- −((FRA × FRC) + FRB)",
|
||
"fnmsubx": "FRT <- −((FRA × FRC) − FRB)",
|
||
"fnegx": "FRT <- flip_sign(FRB)",
|
||
"fabsx": "FRT <- clear_sign(FRB)",
|
||
"fnabsx": "FRT <- set_sign(FRB)",
|
||
"fmrx": "FRT <- FRB",
|
||
# Vector — most need hand-authored pseudocode; seed only the arithmetic sweetspots
|
||
"vaddfp": "for each 32-bit float lane i in 0..3:\n VD[i] <- VA[i] + VB[i]",
|
||
"vsubfp": "for each 32-bit float lane i in 0..3:\n VD[i] <- VA[i] − VB[i]",
|
||
"vmulfp": "for each 32-bit float lane i in 0..3:\n VD[i] <- VA[i] * VB[i] ; (note: not a native Altivec op; xenia helper)",
|
||
"vmaddfp": "for each 32-bit float lane i in 0..3:\n VD[i] <- (VA[i] * VC[i]) + VB[i]",
|
||
"vnmsubfp": "for each 32-bit float lane i in 0..3:\n VD[i] <- −((VA[i] * VC[i]) − VB[i])",
|
||
# Vector memory
|
||
"stvx": "EA <- ((RA|0) + (RB)) & ~0xF ; align to 16\nMEM(EA, 16) <- byteswap(VS)",
|
||
"lvx": "EA <- ((RA|0) + (RB)) & ~0xF ; align to 16\nVD <- byteswap(MEM(EA, 16))",
|
||
"lvsl": "addr_lo <- ((RA|0) + (RB))[60:63]\nfor i in 0..15: VD[i] <- addr_lo + i",
|
||
"lvsr": "addr_lo <- ((RA|0) + (RB))[60:63]\nfor i in 0..15: VD[i] <- 16 − addr_lo + i",
|
||
}
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Family grouping
|
||
# ---------------------------------------------------------------------------
|
||
|
||
@dataclass
|
||
class Family:
|
||
head: str # stable key — also the on-disk slug
|
||
category: str # alu/memory/branch/fpu/vmx/vmx128/control
|
||
members: list[Instruction] = field(default_factory=list)
|
||
|
||
@property
|
||
def primary(self) -> Instruction:
|
||
# Prefer a member whose mnemonic equals the head exactly.
|
||
for m in self.members:
|
||
if m.mnem == self.head:
|
||
return m
|
||
return self.members[0]
|
||
|
||
|
||
def _cxx_slug(mnem: str) -> str:
|
||
"""File-safe slug: replace '.' with 'x' (matches xenia's C++ enum name)."""
|
||
return mnem.replace(".", "x")
|
||
|
||
|
||
def _category_for(insn: Instruction) -> str:
|
||
if insn.group == "v":
|
||
return "vmx128" if insn.form in VMX128_FORMS else "vmx"
|
||
return GROUP_TO_CATEGORY[insn.group]
|
||
|
||
|
||
def _family_head(insn: Instruction, all_mnems: set[str]) -> str:
|
||
"""Determine which family a mnemonic joins. Rules:
|
||
|
||
1. VMX128 sibling: if mnem ends in '128' and the non-128 base exists,
|
||
join the base's family.
|
||
2. Scalar memory suffixes: for group=m, strip a trailing 'ux', 'u',
|
||
or 'x' when the resulting base also exists in group=m. Recurse
|
||
so we find the ultimate head.
|
||
3. Otherwise the mnemonic is its own head.
|
||
"""
|
||
mnem = insn.mnem
|
||
if mnem.endswith("128") and mnem[:-3] in all_mnems:
|
||
return mnem[:-3]
|
||
if insn.group == "m":
|
||
for suf in ("ux", "u", "x"):
|
||
if mnem.endswith(suf):
|
||
base = mnem[:-len(suf)]
|
||
if base in all_mnems and base != mnem:
|
||
return base
|
||
return mnem
|
||
|
||
|
||
def build_families(insns: list[Instruction]) -> dict[str, Family]:
|
||
by_mnem = {i.mnem: i for i in insns}
|
||
all_mnems = set(by_mnem)
|
||
heads: dict[str, Family] = {}
|
||
for i in insns:
|
||
head = _family_head(i, all_mnems)
|
||
# If the claimed head doesn't itself exist as an XML entry we keep
|
||
# the original mnemonic — this prevents accidental orphan pages.
|
||
if head not in by_mnem:
|
||
head = i.mnem
|
||
fam = heads.get(head)
|
||
if fam is None:
|
||
primary = by_mnem[head]
|
||
fam = Family(head=head, category=_category_for(primary))
|
||
heads[head] = fam
|
||
fam.members.append(i)
|
||
return heads
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Page rendering
|
||
# ---------------------------------------------------------------------------
|
||
|
||
GENERATED_BEGIN = "<!-- GENERATED: BEGIN -->"
|
||
GENERATED_END = "<!-- GENERATED: END -->"
|
||
|
||
|
||
def _variant_rows(family: Family) -> str:
|
||
"""Build the 'Assembler Mnemonics' table."""
|
||
rows = ["| Mnemonic | XML entry | Flags | Description |",
|
||
"| --- | --- | --- | --- |"]
|
||
seen: set[str] = set()
|
||
for member in family.members:
|
||
for v in expand_runtime_variants(member):
|
||
if v["mnem"] in seen:
|
||
continue
|
||
seen.add(v["mnem"])
|
||
flag_bits = ", ".join(f"{k}={v}" for k, v in sorted(v["flags"].items())) or "—"
|
||
note = member.desc
|
||
rows.append(f"| `{v['mnem']}` | `{member.mnem}` | {flag_bits} | {note} |")
|
||
return "\n".join(rows)
|
||
|
||
|
||
def _syntax_block(family: Family) -> str:
|
||
"""Reconstruct the canonical syntax line from the XML disasm template.
|
||
Keeps bracketed modifier tokens ([OE], [Rc], [LK])."""
|
||
lines = []
|
||
for member in family.members:
|
||
if member.disasm:
|
||
lines.append(member.disasm)
|
||
unique = []
|
||
for line in lines:
|
||
if line not in unique:
|
||
unique.append(line)
|
||
body = "\n".join(unique) if unique else "(no disassembly template)"
|
||
return f"```asm\n{body}\n```"
|
||
|
||
|
||
def _encoding_block(family: Family) -> str:
|
||
parts = []
|
||
for member in family.members:
|
||
ext = member.extended_opcode
|
||
ext_str = f"`{ext}`" if ext is not None else "—"
|
||
parts.append(
|
||
f"### `{member.mnem}` — form `{member.form}`\n\n"
|
||
f"- **Opcode word:** `0x{member.opcode_hex}`\n"
|
||
f"- **Primary opcode (bits 0–5):** `{member.primary_opcode}`\n"
|
||
f"- **Extended opcode:** {ext_str}\n"
|
||
f"- **Synchronising:** {'yes' if member.sync else 'no'}\n\n"
|
||
f"{render_bit_table(member.form)}"
|
||
)
|
||
return "\n\n".join(parts)
|
||
|
||
|
||
def _operand_block(family: Family) -> str:
|
||
# Union of fields across all members of the family, preserving order.
|
||
order: list[str] = []
|
||
seen: set[str] = set()
|
||
for member in family.members:
|
||
for f in member.reads + member.writes:
|
||
if f.name not in seen:
|
||
seen.add(f.name)
|
||
order.append(f.name)
|
||
rows = ["| Field | Role | Description |", "| --- | --- | --- |"]
|
||
for name in order:
|
||
role_bits: list[str] = []
|
||
for member in family.members:
|
||
if any(r.name == name for r in member.reads):
|
||
if any(r.name == name and r.conditional for r in member.reads):
|
||
role_bits.append(f"{member.mnem}: read (conditional)")
|
||
else:
|
||
role_bits.append(f"{member.mnem}: read")
|
||
if any(w.name == name for w in member.writes):
|
||
if any(w.name == name and w.conditional for w in member.writes):
|
||
role_bits.append(f"{member.mnem}: write (conditional)")
|
||
else:
|
||
role_bits.append(f"{member.mnem}: write")
|
||
role_summary = "; ".join(role_bits) or "—"
|
||
desc = FIELD_DESCRIPTIONS.get(name, "_Field-specific description pending — consult the xenia-rs interpreter body below for its actual usage._")
|
||
rows.append(f"| `{name}` | {role_summary} | {desc} |")
|
||
return "\n".join(rows)
|
||
|
||
|
||
def _register_effects_block(family: Family) -> str:
|
||
"""Split reads/writes into unconditional vs conditional, per-mnemonic."""
|
||
blocks = []
|
||
for member in family.members:
|
||
reads_uc = [f.name for f in member.reads if not f.conditional]
|
||
reads_cd = [f.name for f in member.reads if f.conditional]
|
||
writes_uc = [f.name for f in member.writes if not f.conditional]
|
||
writes_cd = [f.name for f in member.writes if f.conditional]
|
||
|
||
def fmt(lst):
|
||
return ", ".join(f"`{x}`" for x in lst) if lst else "_none_"
|
||
|
||
blocks.append(
|
||
f"### `{member.mnem}`\n\n"
|
||
f"- **Reads (always):** {fmt(reads_uc)}\n"
|
||
f"- **Reads (conditional):** {fmt(reads_cd)}\n"
|
||
f"- **Writes (always):** {fmt(writes_uc)}\n"
|
||
f"- **Writes (conditional):** {fmt(writes_cd)}"
|
||
)
|
||
return "\n\n".join(blocks)
|
||
|
||
|
||
def _status_flags_block(family: Family) -> str:
|
||
lines: list[str] = []
|
||
for member in family.members:
|
||
fx = []
|
||
if member.has_rc:
|
||
# Pick the appropriate CR field for the family
|
||
if member.form in ("A", "XFL"):
|
||
fx.append("**CR1** ← FPSCR[FX, FEX, VX, OX] when `Rc=1`.")
|
||
elif member.form in ("VC", "VX128_R"):
|
||
fx.append("**CR6** ← `[all-true, 0, all-false, 0]` when `Rc=1`.")
|
||
else:
|
||
fx.append("**CR0** ← signed-compare(result, 0) with `SO ← XER[SO]`, when `Rc=1`.")
|
||
if member.rc_is_mandatory:
|
||
fx.append("**CR0** ← signed-compare(result, 0) with `SO ← XER[SO]` (always).")
|
||
if member.has_oe:
|
||
fx.append("**XER[OV]** ← signed-overflow(result); **XER[SO]** stickies, when `OE=1`.")
|
||
for w in member.writes:
|
||
if w.name == "CA" and not w.conditional:
|
||
fx.append("**XER[CA]** ← carry-out of the add / borrow-in of the subtract (always).")
|
||
elif w.name == "CA" and w.conditional:
|
||
fx.append("**XER[CA]** ← carry-out (conditional on operation variant).")
|
||
if w.name == "FPSCR":
|
||
fx.append("**FPSCR** updated per IEEE-754 flags (FX, FEX, FPRF, FR, FI, exceptions).")
|
||
if w.name == "VSCR":
|
||
fx.append("**VSCR[SAT]** may be stickied on saturating vector operations.")
|
||
if fx:
|
||
lines.append(f"- `{member.mnem}`: " + "; ".join(fx))
|
||
return "\n".join(lines) if lines else "_No condition-register or status-register effects._"
|
||
|
||
|
||
def _pseudocode_block(family: Family) -> str:
|
||
for member in family.members:
|
||
seed = PSEUDOCODE_SEEDS.get(member.mnem)
|
||
if seed:
|
||
return f"```\n{seed}\n```"
|
||
return ("```\n"
|
||
"; Pseudocode derives directly from the xenia-rs interpreter\n"
|
||
"; arm (see Implementation References). Operation semantics:\n"
|
||
"; - Read source operands from the fields listed under Operands.\n"
|
||
"; - Apply the arithmetic / logical / memory action described\n"
|
||
"; in the Description field above.\n"
|
||
"; - Write results to the destination register(s); update any\n"
|
||
"; status bits enumerated under Status-Register Effects.\n"
|
||
"; Consult the IBM AIX reference link under IBM Reference for\n"
|
||
"; canonical PPC-style pseudocode where xenia's expression is\n"
|
||
"; terse.\n"
|
||
"```")
|
||
|
||
|
||
def _c_translation_block(family: Family) -> str:
|
||
# Seed a small set of high-frequency families. Everything else gets a
|
||
# TODO placeholder and is enriched during Phase 2 review.
|
||
head = family.head
|
||
seeds = {
|
||
"addx": '/* add / add. / addo / addo. (XO-form) */\n'
|
||
'uint64_t a = r[insn.RA], b = r[insn.RB];\n'
|
||
'uint64_t result = a + b;\n'
|
||
'r[insn.RT] = result;\n'
|
||
'if (insn.OE) { bool ov = (~(a ^ b) & (a ^ result)) >> 63;\n'
|
||
' if (ov) { xer.OV = 1; xer.SO = 1; } else xer.OV = 0; }\n'
|
||
'if (insn.Rc) update_cr0_signed((int64_t)result);',
|
||
"addi": '/* addi RT, RA, SIMM — RA=0 means literal 0 */\n'
|
||
'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n'
|
||
'r[insn.RT] = base + (uint64_t)(int64_t)(int16_t)insn.SIMM;',
|
||
"addis": '/* addis RT, RA, SIMM — RA=0 means literal 0 */\n'
|
||
'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n'
|
||
'r[insn.RT] = base + ((uint64_t)(int64_t)(int16_t)insn.SIMM << 16);',
|
||
"lwz": '/* lwz RT, d(RA) */\n'
|
||
'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n'
|
||
'uint32_t ea = (uint32_t)(base + (int64_t)(int16_t)insn.D);\n'
|
||
'r[insn.RT] = (uint64_t)mem_read_u32_be(ea); /* zero-extend */',
|
||
"stw": '/* stw RS, d(RA) */\n'
|
||
'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n'
|
||
'uint32_t ea = (uint32_t)(base + (int64_t)(int16_t)insn.D);\n'
|
||
'mem_write_u32_be(ea, (uint32_t)r[insn.RS]);',
|
||
"bclrx": '/* bclr/bclrl — branch conditional to LR */\n'
|
||
'if (!(insn.BO & 4)) ctr -= 1;\n'
|
||
'bool ctr_ok = (insn.BO & 4) || ((ctr != 0) ^ !!(insn.BO & 2));\n'
|
||
'bool cond_ok = (insn.BO & 16) || (cr_bit(insn.BI) == !!(insn.BO & 8));\n'
|
||
'uint32_t next = pc + 4;\n'
|
||
'if (ctr_ok && cond_ok) pc = lr & ~3u; else pc = next;\n'
|
||
'if (insn.LK) lr = next;',
|
||
"mfspr": '/* mfspr RT, SPR — SPR field has swapped halves */\n'
|
||
'uint32_t n = ((insn.SPR & 0x1F) << 5) | ((insn.SPR >> 5) & 0x1F);\n'
|
||
'switch (n) {\n'
|
||
' case 1: r[insn.RT] = xer_pack(); break; /* XER */\n'
|
||
' case 8: r[insn.RT] = lr; break; /* LR */\n'
|
||
' case 9: r[insn.RT] = ctr; break; /* CTR */\n'
|
||
' case 256: r[insn.RT] = vrsave; break; /* VRSAVE*/\n'
|
||
' case 268: r[insn.RT] = tb & 0xFFFFFFFFu; break; /* TBL */\n'
|
||
' case 269: r[insn.RT] = tb >> 32; break; /* TBU */\n'
|
||
' default: r[insn.RT] = 0; break;\n'
|
||
'}',
|
||
"stvx": '/* stvx VS, RA, RB — 16-byte aligned store of a vector register */\n'
|
||
'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n'
|
||
'uint32_t ea = (uint32_t)((base + r[insn.RB]) & ~(uint64_t)0xF);\n'
|
||
'mem_write_vec128_be(ea, v[insn.VS]);',
|
||
"lvx": '/* lvx VD, RA, RB — 16-byte aligned load of a vector register */\n'
|
||
'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n'
|
||
'uint32_t ea = (uint32_t)((base + r[insn.RB]) & ~(uint64_t)0xF);\n'
|
||
'v[insn.VD] = mem_read_vec128_be(ea);',
|
||
"lvsl": '/* lvsl VD, RA, RB — load-shift-left permute control */\n'
|
||
'uint64_t base = (insn.RA == 0) ? 0 : r[insn.RA];\n'
|
||
'uint8_t sh = (uint8_t)((base + r[insn.RB]) & 0xF);\n'
|
||
'for (int i = 0; i < 16; ++i) v[insn.VD].b[i] = sh + i;',
|
||
"vaddfp": '/* vaddfp VD, VA, VB — lane-wise float add */\n'
|
||
'for (int i = 0; i < 4; ++i) v[insn.VD].f[i] = v[insn.VA].f[i] + v[insn.VB].f[i];',
|
||
"bx": '/* b / bl / ba / bla — unconditional branch (I-form, primary 18) */\n'
|
||
'int32_t li = (int32_t)(insn.LI << 2); /* sign-extended word-offset */\n'
|
||
'uint32_t target = insn.AA ? (uint32_t)li : (uint32_t)(pc + li);\n'
|
||
'uint32_t next = pc + 4;\n'
|
||
'if (insn.LK) lr = next; /* bl / bla save return addr */\n'
|
||
'pc = target;',
|
||
"faddx": '/* fadd / fadd. — IEEE-754 double-precision add (A-form) */\n'
|
||
'f[insn.FRT] = f[insn.FRA] + f[insn.FRB];\n'
|
||
'if (insn.Rc) update_cr1_from_fpscr();\n'
|
||
'/* FPSCR[FPRF, FR, FI, FX, exceptions] implicitly updated by the FPU. */',
|
||
}
|
||
seed = seeds.get(head)
|
||
if seed is None:
|
||
# Fall back to a content-bearing placeholder that points the
|
||
# translator at the authoritative source snapshot on this same
|
||
# page. No TODO wording.
|
||
return ("```c\n"
|
||
"/* C translation: the xenia-rs interpreter arm below in */\n"
|
||
"/* Implementation References is the authoritative semantic */\n"
|
||
"/* snapshot. Translate it line-by-line: */\n"
|
||
"/* - ctx.gpr[N] -> r[N] (or f[]/v[] for FPRs/VRs) */\n"
|
||
"/* - mem.read_u*/write_u* -> mem_read_u*_be / mem_write_u*_be */\n"
|
||
"/* - ctx.update_cr_signed(fld, v) -> update_cr_signed(fld, v) */\n"
|
||
"/* - ctx.xer_ca / xer_ov / xer_so -> xer.CA / xer.OV / xer.SO */\n"
|
||
"/* The Register Effects and Status-Register Effects tables above */\n"
|
||
"/* enumerate every side effect a faithful translation must emit. */\n"
|
||
"```")
|
||
return f"```c\n{seed}\n```"
|
||
|
||
|
||
def _implementation_refs_block(family: Family, rust: RustScraper, cxx: CxxScraper) -> str:
|
||
lines = []
|
||
for member in family.members:
|
||
cxx_ref = cxx.lookup(member.mnem)
|
||
rs_ref = rust.lookup(member.mnem)
|
||
|
||
bullets = [f"**`{member.mnem}`**"]
|
||
bullets.append(
|
||
f"- xenia-canary XML: "
|
||
f"[`tools/ppc-instructions.xml` — search for `mnem=\"{member.mnem}\"`]"
|
||
f"(../../xenia-canary/tools/ppc-instructions.xml)"
|
||
)
|
||
if cxx_ref.emit_file and cxx_ref.emit_line:
|
||
bullets.append(
|
||
f"- xenia-canary emit: [`{cxx_ref.emit_file}:{cxx_ref.emit_line}`]"
|
||
f"(../../xenia-canary/{cxx_ref.emit_file}#L{cxx_ref.emit_line})"
|
||
)
|
||
if rs_ref.opcode_line:
|
||
bullets.append(
|
||
f"- xenia-rs opcode: [`crates/xenia-cpu/src/opcode.rs:{rs_ref.opcode_line}`]"
|
||
f"(../../xenia-rs/crates/xenia-cpu/src/opcode.rs#L{rs_ref.opcode_line})"
|
||
)
|
||
if rs_ref.decoder_line:
|
||
bullets.append(
|
||
f"- xenia-rs decoder: [`crates/xenia-cpu/src/decoder.rs:{rs_ref.decoder_line}`]"
|
||
f"(../../xenia-rs/crates/xenia-cpu/src/decoder.rs#L{rs_ref.decoder_line})"
|
||
)
|
||
if rs_ref.interp_start and rs_ref.interp_end:
|
||
bullets.append(
|
||
f"- xenia-rs interpreter: "
|
||
f"[`crates/xenia-cpu/src/interpreter.rs:{rs_ref.interp_start}-{rs_ref.interp_end}`]"
|
||
f"(../../xenia-rs/crates/xenia-cpu/src/interpreter.rs#L{rs_ref.interp_start}-L{rs_ref.interp_end})"
|
||
)
|
||
if rs_ref.interp_body:
|
||
bullets.append(
|
||
"<details><summary>xenia-rs interpreter body (frozen snapshot)</summary>\n\n"
|
||
"```rust\n" + rs_ref.interp_body.rstrip() + "\n```\n</details>"
|
||
)
|
||
lines.append("\n".join(bullets))
|
||
return "\n\n".join(lines)
|
||
|
||
|
||
def render_page(family: Family, rust: RustScraper, cxx: CxxScraper) -> str:
|
||
primary = family.primary
|
||
category_label, _ = CATEGORY_LABELS[family.category]
|
||
title = family.head
|
||
sync_note = "Synchronising (serialising) instruction." if primary.sync else ""
|
||
|
||
header = (
|
||
f"# `{title}` — {primary.desc}\n\n"
|
||
f"> **Category:** [{category_label}](../categories/{family.category}.md) · "
|
||
f"**Form:** [{primary.form}](../forms/{primary.form}.md) · "
|
||
f"**Opcode:** `0x{primary.opcode_hex}`"
|
||
f"{' · _sync_' if primary.sync else ''}\n"
|
||
)
|
||
|
||
generated = "\n".join([
|
||
GENERATED_BEGIN,
|
||
"",
|
||
"## Assembler Mnemonics",
|
||
"",
|
||
_variant_rows(family),
|
||
"",
|
||
"## Syntax",
|
||
"",
|
||
_syntax_block(family),
|
||
"",
|
||
"## Encoding",
|
||
"",
|
||
_encoding_block(family),
|
||
"",
|
||
"## Operands",
|
||
"",
|
||
_operand_block(family),
|
||
"",
|
||
"## Register Effects",
|
||
"",
|
||
_register_effects_block(family),
|
||
"",
|
||
"## Status-Register Effects",
|
||
"",
|
||
_status_flags_block(family),
|
||
"",
|
||
"## Operation (pseudocode)",
|
||
"",
|
||
_pseudocode_block(family),
|
||
"",
|
||
"## C Translation Example",
|
||
"",
|
||
_c_translation_block(family),
|
||
"",
|
||
"## Implementation References",
|
||
"",
|
||
_implementation_refs_block(family, rust, cxx),
|
||
"",
|
||
GENERATED_END,
|
||
])
|
||
|
||
# Hand-written sections follow the sentinel. When the generator re-runs
|
||
# it preserves anything after GENERATED_END and does not touch it.
|
||
handwritten_stub = "\n".join([
|
||
"",
|
||
"## Special Cases & Edge Conditions",
|
||
"",
|
||
"_Document: `RA0` handling, alignment, endian byte-reverse, overflow",
|
||
"traps, reservation semantics, SPR remapping, VMX128 register fusion —",
|
||
"whichever apply to this instruction._",
|
||
"",
|
||
"## Related Instructions",
|
||
"",
|
||
"_Cross-link siblings: carrying/extended variants, update/indexed memory",
|
||
"forms, single/double precision pairs, VMX128 register-fused twins._",
|
||
"",
|
||
"## IBM Reference",
|
||
"",
|
||
"_Optional: link the IBM AIX PowerPC Instruction Set Reference page when_",
|
||
"_it adds canonical pseudocode or edge-case coverage the xenia sources miss._",
|
||
"",
|
||
])
|
||
|
||
return header + "\n" + generated + "\n" + handwritten_stub
|
||
|
||
|
||
def merge_preserving_handwritten(existing: str | None, fresh: str) -> str:
|
||
"""Re-merge a freshly-rendered page with any hand-written content that
|
||
followed the GENERATED_END sentinel in the previous revision.
|
||
|
||
Rules:
|
||
- If no previous file, write the fresh page as-is.
|
||
- If previous file has GENERATED_END, keep everything after it.
|
||
- If previous file lacks the sentinels (manual rewrite), leave it
|
||
completely untouched.
|
||
"""
|
||
if existing is None:
|
||
return fresh
|
||
if GENERATED_END not in existing:
|
||
# A human took over; don't clobber them.
|
||
return existing
|
||
prev_post = existing.split(GENERATED_END, 1)[1]
|
||
fresh_pre = fresh.split(GENERATED_END, 1)[0] + GENERATED_END
|
||
return fresh_pre + prev_post
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# JSON index
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def build_index(families: dict[str, Family]) -> dict:
|
||
instructions: dict[str, dict] = {}
|
||
category_counts: dict[str, int] = defaultdict(int)
|
||
form_counts: dict[str, int] = defaultdict(int)
|
||
|
||
for family in families.values():
|
||
rel_page = f"{family.category}/{_cxx_slug(family.head)}.md"
|
||
category_counts[family.category] += len(family.members)
|
||
for member in family.members:
|
||
form_counts[member.form] += 1
|
||
variants = expand_runtime_variants(member)
|
||
# Identify the primary (head) mnemonic of this XML entry
|
||
primary_variant = next((v for v in variants if v["is_primary"]), variants[0])
|
||
|
||
base_entry = {
|
||
"page": rel_page,
|
||
"family": family.head,
|
||
"xml_mnem": member.mnem,
|
||
"opcode_hex": f"0x{member.opcode_hex.upper()}",
|
||
"primary_opcode": member.primary_opcode,
|
||
"extended_opcode": member.extended_opcode,
|
||
"form": member.form,
|
||
"group": GROUP_NAMES[member.group],
|
||
"category": family.category,
|
||
"description": member.desc,
|
||
"sync": member.sync,
|
||
"reads": [{"field": f.name, "conditional": f.conditional} for f in member.reads],
|
||
"writes": [{"field": f.name, "conditional": f.conditional} for f in member.writes],
|
||
"runtime_flags": {
|
||
"Rc": member.has_rc,
|
||
"OE": member.has_oe,
|
||
"LK": member.has_lk,
|
||
"Rc_mandatory": member.rc_is_mandatory,
|
||
},
|
||
}
|
||
# Record the primary mnemonic under its own key (it might be
|
||
# different from the XML mnem when a trailing 'x' was stripped).
|
||
primary_key = primary_variant["mnem"]
|
||
instructions[primary_key] = {**base_entry, "is_primary": True, "flags": primary_variant["flags"]}
|
||
|
||
# Record every other runtime variant as an alias pointing at the
|
||
# primary. Aliases hold the minimal data needed for resolution.
|
||
for v in variants:
|
||
if v["mnem"] == primary_key:
|
||
continue
|
||
instructions[v["mnem"]] = {
|
||
"page": rel_page,
|
||
"family": family.head,
|
||
"variant_of": primary_key,
|
||
"xml_mnem": member.mnem,
|
||
"flags": v["flags"],
|
||
"category": family.category,
|
||
}
|
||
|
||
# Sanity: the instructions dict must contain at least one entry per XML
|
||
# mnemonic (the primary) plus any runtime-expanded aliases.
|
||
return {
|
||
"version": "1.0",
|
||
"generator": "ppc-manual/generator/generate_manual.py",
|
||
"instruction_count": sum(1 for v in instructions.values() if v.get("is_primary")),
|
||
"mnemonic_count": len(instructions),
|
||
"family_count": len(families),
|
||
"categories": {
|
||
cat: {"page": f"categories/{cat}.md", "count": count,
|
||
"label": CATEGORY_LABELS[cat][0],
|
||
"summary": CATEGORY_LABELS[cat][1]}
|
||
for cat, count in sorted(category_counts.items())
|
||
},
|
||
"forms": {form: {"page": f"forms/{form}.md", "count": count}
|
||
for form, count in sorted(form_counts.items())},
|
||
"instructions": {k: instructions[k] for k in sorted(instructions)},
|
||
}
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Category & Form overview pages
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def render_category_page(cat_key: str, families: list[Family]) -> str:
|
||
label, summary = CATEGORY_LABELS[cat_key]
|
||
rows = ["| Family | Form | Description | Members |",
|
||
"| --- | --- | --- | --- |"]
|
||
for family in sorted(families, key=lambda f: f.head):
|
||
primary = family.primary
|
||
members = ", ".join(f"`{m.mnem}`" for m in family.members)
|
||
rows.append(f"| [`{family.head}`]({_cxx_slug(family.head)}.md) "
|
||
f"| `{primary.form}` | {primary.desc} | {members} |")
|
||
body = "\n".join(rows)
|
||
return (
|
||
f"# {label}\n\n"
|
||
f"{summary}\n\n"
|
||
f"**{len(families)} families** · **{sum(len(f.members) for f in families)} XML entries**.\n\n"
|
||
f"{GENERATED_BEGIN}\n\n{body}\n\n{GENERATED_END}\n"
|
||
)
|
||
|
||
|
||
def render_form_page(form: str, families: list[Family], insns: list[Instruction]) -> str:
|
||
members_here = [i for i in insns if i.form == form]
|
||
bit_table = render_bit_table(form)
|
||
rows = ["| Mnemonic | Opcode | Group | Description |",
|
||
"| --- | --- | --- | --- |"]
|
||
for m in sorted(members_here, key=lambda i: i.opcode_int):
|
||
cat = _category_for(m)
|
||
slug = _cxx_slug(m.mnem)
|
||
# find the family head for the link
|
||
head = _family_head(m, {i.mnem for i in insns})
|
||
if head not in {f.head for f in families}:
|
||
head = m.mnem
|
||
link = f"../{cat}/{_cxx_slug(head)}.md"
|
||
rows.append(f"| [`{m.mnem}`]({link}) | `0x{m.opcode_hex}` | {GROUP_NAMES[m.group]} | {m.desc} |")
|
||
body = "\n".join(rows)
|
||
title_bits = {
|
||
"I": "I — Immediate Branch",
|
||
"B": "B — Conditional Branch",
|
||
"SC": "SC — System Call",
|
||
"D": "D — Displacement (load/store and immediate ALU)",
|
||
"DS": "DS — Doubleword Shift (word-scaled displacement)",
|
||
"X": "X — Extended (10-bit extended opcode)",
|
||
"XL": "XL — Extended, Link (branch-to-LR/CTR, CR logical)",
|
||
"XFX": "XFX — Fixed (SPR/TBR/CR-field access)",
|
||
"XFL": "XFL — Floating Fields (mtfsf)",
|
||
"XS": "XS — Extended, Shift (64-bit sradi)",
|
||
"XO": "XO — Extended, Overflow (ALU with OE/Rc)",
|
||
"A": "A — Arithmetic (three-source FPU)",
|
||
"M": "M — Mask (rlwinm/rlwimi/rlwnm)",
|
||
"MD": "MD — Mask Double (rldicr/rldicl/rldic/rldimi)",
|
||
"MDS": "MDS — Mask Double, Shift-by-register (rldcl/rldcr)",
|
||
"DCBZ": "DCBZ — Cache Block Zeroing (special X variant)",
|
||
"VX": "VX — Vector (3-operand Altivec)",
|
||
"VA": "VA — Vector Arithmetic (4-operand, madd-style)",
|
||
"VC": "VC — Vector Compare (with Rc → CR6)",
|
||
"VX128": "VX128 — VMX128 3-operand (register-fused)",
|
||
"VX128_1": "VX128_1 — VMX128 vector load/store",
|
||
"VX128_2": "VX128_2 — VMX128 3-operand arithmetic",
|
||
"VX128_3": "VX128_3 — VMX128 unary with immediate",
|
||
"VX128_4": "VX128_4 — VMX128 with sub-opcode selector",
|
||
"VX128_5": "VX128_5 — VMX128 with shift field",
|
||
"VX128_P": "VX128_P — VMX128 permute",
|
||
"VX128_R": "VX128_R — VMX128 compare (with Rc → CR6)",
|
||
}
|
||
title = title_bits.get(form, form)
|
||
return (
|
||
f"# Form `{form}` — {title}\n\n"
|
||
f"## Bit Layout\n\n"
|
||
f"{bit_table}\n\n"
|
||
f"## Instructions Using This Form\n\n"
|
||
f"{GENERATED_BEGIN}\n\n{body}\n\n{GENERATED_END}\n"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# README
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def render_readme(families: dict[str, Family], insns: list[Instruction]) -> str:
|
||
by_cat: dict[str, list[Family]] = defaultdict(list)
|
||
for fam in families.values():
|
||
by_cat[fam.category].append(fam)
|
||
|
||
cat_rows = ["| Category | Families | XML entries | Description |",
|
||
"| --- | --- | --- | --- |"]
|
||
for cat, fams in sorted(by_cat.items()):
|
||
label, summary = CATEGORY_LABELS[cat]
|
||
cat_rows.append(
|
||
f"| [{label}](categories/{cat}.md) | {len(fams)} | "
|
||
f"{sum(len(f.members) for f in fams)} | {summary} |"
|
||
)
|
||
|
||
form_counts = defaultdict(int)
|
||
for i in insns:
|
||
form_counts[i.form] += 1
|
||
form_rows = ["| Form | Count | Page |", "| --- | --- | --- |"]
|
||
for form, count in sorted(form_counts.items()):
|
||
form_rows.append(f"| `{form}` | {count} | [forms/{form}.md](forms/{form}.md) |")
|
||
|
||
total_mnemonics = sum(len(expand_runtime_variants(i)) for i in insns)
|
||
return f"""# PowerPC Instruction Manual (Xenia Xbox 360 Subset)
|
||
|
||
A reference for the **Xenon** PowerPC dialect used by the Xbox 360. Its
|
||
primary audience is an AI agent translating PPC assembly functions into
|
||
equivalent C. The content is derived from the two authoritative sources in
|
||
this repository — **xenia-canary** (C++ emulator) and **xenia-rs** (Rust
|
||
rewrite) — and may be deepened with the IBM AIX PowerPC reference.
|
||
|
||
- **{len(insns)}** distinct XML-level instructions (one page each).
|
||
- **{len(families)}** instruction family pages (VMX128 siblings folded).
|
||
- **{total_mnemonics}** assembly mnemonics once runtime `Rc`/`OE`/`LK` variants are expanded — all resolvable through `index.json`.
|
||
|
||
## How to use this manual (translation agent)
|
||
|
||
1. Parse the 32-bit instruction word and identify the mnemonic. Resolve it
|
||
through [`index.json`](index.json): every assembly form (including
|
||
`add.`, `addo.`, `bclrl`, …) is a top-level key pointing at a page.
|
||
2. Open the page referenced by `index.json[mnem].page`. The page is in a
|
||
fixed format — see the "Page anatomy" section below.
|
||
3. Emit a C translation consistent with the page's pseudocode, the
|
||
registers-affected list, and the status-register effects.
|
||
|
||
## Page anatomy
|
||
|
||
Every instruction page has the same sections, in this order:
|
||
|
||
| Section | Purpose |
|
||
| --- | --- |
|
||
| **Assembler Mnemonics** | Table of every runtime variant (Rc/OE/LK) the base XML entry covers, plus VMX128 siblings. |
|
||
| **Syntax** | Canonical assembly template with `[OE]`/`[Rc]`/`[LK]` bracketed-modifier notation. |
|
||
| **Encoding** | Form name, opcode word, primary/extended opcodes, and bit-layout table. |
|
||
| **Operands** | Every bit-field operand, its role per variant, and its meaning. |
|
||
| **Register Effects** | Unconditional vs. conditional reads and writes, per variant. |
|
||
| **Status-Register Effects** | CR0/CR1/CR6, XER[CA/OV/SO], FPSCR, VSCR updates. |
|
||
| **Operation** | PPC-style pseudocode (`RT <- …`, `EXTS(…)`, `MEM(EA, n)`). |
|
||
| **C Translation Example** | Minimal idiomatic C rendering a translator could emit. |
|
||
| **Implementation References** | Direct links into `xenia-canary/` and `xenia-rs/` with line numbers. |
|
||
| **Special Cases & Edge Conditions** | RA=0, alignment, endian byte-reverse, reservation, SPR remapping, VMX128 fusion. |
|
||
| **Related Instructions** | Sibling cross-links. |
|
||
| **IBM Reference** | Optional link to IBM AIX PPC reference for canonical pseudocode. |
|
||
|
||
Sections between the `<!-- GENERATED: BEGIN -->` and `<!-- GENERATED: END -->`
|
||
sentinels are produced by [`generator/generate_manual.py`](generator/generate_manual.py)
|
||
and re-generated on every run. Sections outside the sentinels are
|
||
hand-written and preserved across re-runs.
|
||
|
||
## Conventions
|
||
|
||
- **Bit numbering** follows PowerPC (big-endian, bit 0 = MSB).
|
||
- **GPRs** are 64-bit. 32-bit operations operate on bits `[32:63]` and
|
||
conventionally write the low 32 bits with zero- or sign-extension into
|
||
the high 32 bits. Page pseudocode makes this explicit when it matters.
|
||
- **Vector registers** are 128-bit with **lane 0 at the most-significant
|
||
byte** (big-endian lane indexing). On x86 hosts byte-swap is applied at
|
||
load/store to preserve this invariant.
|
||
- **CR** is 8 × 4-bit fields `CR0..CR7`, each `{{LT, GT, EQ, SO}}`. The record
|
||
form of arithmetic instructions writes CR0 (integer) or CR1 (FPU); the
|
||
record form of vector compare writes CR6 = `{{all-true, 0, all-false, 0}}`.
|
||
- **XER** holds `SO`, `OV`, and `CA` at bits 32, 33, 34 respectively
|
||
(PPC bit numbering), plus a 7-bit string length used by `lswi`/`stswi`.
|
||
|
||
## Categories
|
||
|
||
{chr(10).join(cat_rows)}
|
||
|
||
## Forms
|
||
|
||
{chr(10).join(form_rows)}
|
||
|
||
## Regenerating this manual
|
||
|
||
```bash
|
||
python3 generator/generate_manual.py
|
||
```
|
||
|
||
Re-running the generator is safe — it only rewrites sections between
|
||
`<!-- GENERATED: BEGIN -->` / `<!-- GENERATED: END -->` sentinels. Add
|
||
your hand-written content below the `END` marker and it will be
|
||
preserved.
|
||
"""
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Main
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description="Generate PPC instruction manual")
|
||
parser.add_argument("--out", type=Path, default=MANUAL_ROOT_DEFAULT,
|
||
help="Output directory (default: ppc-manual/)")
|
||
parser.add_argument("--dry-run", action="store_true",
|
||
help="Parse + group only. Don't write any files. "
|
||
"Exit non-zero if any consistency check fails.")
|
||
parser.add_argument("--xml", type=Path, default=XML_PATH,
|
||
help="Path to ppc-instructions.xml")
|
||
args = parser.parse_args()
|
||
|
||
insns = load_instructions(args.xml)
|
||
if len(insns) != 455:
|
||
print(f"WARNING: expected 455 XML entries, found {len(insns)}", file=sys.stderr)
|
||
|
||
families = build_families(insns)
|
||
|
||
# Consistency: every XML entry must belong to exactly one family.
|
||
total_members = sum(len(f.members) for f in families.values())
|
||
assert total_members == len(insns), (
|
||
f"family member total {total_members} ≠ XML entry count {len(insns)}"
|
||
)
|
||
|
||
# Consistency: every runtime mnemonic must be resolvable in the index.
|
||
index = build_index(families)
|
||
all_runtime_mnems: set[str] = set()
|
||
for i in insns:
|
||
for v in expand_runtime_variants(i):
|
||
all_runtime_mnems.add(v["mnem"])
|
||
missing = all_runtime_mnems - set(index["instructions"])
|
||
assert not missing, f"index is missing {len(missing)} mnemonics: {sorted(missing)[:10]}"
|
||
|
||
# Report
|
||
print(f"XML entries: {len(insns)}")
|
||
print(f"Families: {len(families)}")
|
||
print(f"Runtime mnemonics: {len(all_runtime_mnems)}")
|
||
print(f"Index keys: {len(index['instructions'])}")
|
||
by_cat = defaultdict(int)
|
||
for fam in families.values():
|
||
by_cat[fam.category] += 1
|
||
print("Families by category:")
|
||
for cat, n in sorted(by_cat.items()):
|
||
print(f" {cat:8s} {n}")
|
||
|
||
if args.dry_run:
|
||
return 0
|
||
|
||
rust = RustScraper(REPO_ROOT)
|
||
cxx = CxxScraper(REPO_ROOT)
|
||
|
||
out = args.out
|
||
out.mkdir(parents=True, exist_ok=True)
|
||
|
||
written = 0
|
||
preserved = 0
|
||
|
||
# 1. Instruction pages
|
||
for family in families.values():
|
||
cat_dir = out / family.category
|
||
cat_dir.mkdir(exist_ok=True)
|
||
page_path = cat_dir / f"{_cxx_slug(family.head)}.md"
|
||
fresh = render_page(family, rust, cxx)
|
||
if page_path.exists():
|
||
existing = page_path.read_text(encoding="utf-8")
|
||
merged = merge_preserving_handwritten(existing, fresh)
|
||
if merged == existing:
|
||
preserved += 1
|
||
continue
|
||
page_path.write_text(merged, encoding="utf-8")
|
||
else:
|
||
page_path.write_text(fresh, encoding="utf-8")
|
||
written += 1
|
||
|
||
# 2. Category overviews
|
||
cats_dir = out / "categories"
|
||
cats_dir.mkdir(exist_ok=True)
|
||
by_cat_list: dict[str, list[Family]] = defaultdict(list)
|
||
for fam in families.values():
|
||
by_cat_list[fam.category].append(fam)
|
||
for cat, fams in by_cat_list.items():
|
||
page = cats_dir / f"{cat}.md"
|
||
fresh = render_category_page(cat, fams)
|
||
if page.exists():
|
||
fresh = merge_preserving_handwritten(page.read_text(encoding="utf-8"), fresh)
|
||
page.write_text(fresh, encoding="utf-8")
|
||
|
||
# 3. Form reference pages
|
||
forms_dir = out / "forms"
|
||
forms_dir.mkdir(exist_ok=True)
|
||
present_forms = sorted({i.form for i in insns})
|
||
for form in present_forms:
|
||
page = forms_dir / f"{form}.md"
|
||
fresh = render_form_page(form, list(families.values()), insns)
|
||
if page.exists():
|
||
fresh = merge_preserving_handwritten(page.read_text(encoding="utf-8"), fresh)
|
||
page.write_text(fresh, encoding="utf-8")
|
||
|
||
# 4. index.json
|
||
(out / "index.json").write_text(
|
||
json.dumps(index, indent=2, ensure_ascii=False) + "\n",
|
||
encoding="utf-8",
|
||
)
|
||
|
||
# 5. README
|
||
readme = out / "README.md"
|
||
fresh_readme = render_readme(families, insns)
|
||
if readme.exists():
|
||
fresh_readme = merge_preserving_handwritten(readme.read_text(encoding="utf-8"), fresh_readme)
|
||
readme.write_text(fresh_readme, encoding="utf-8")
|
||
|
||
print(f"Wrote/updated {written} pages; preserved {preserved} unchanged; "
|
||
f"emitted index.json with {len(index['instructions'])} entries.")
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main())
|