M6: addr_mode column on xrefs + extended store/load classes
Adds finer-grained addressing-mode classification to every data xref row plus new dispatch for instruction families not previously emitted: - New `xrefs.addr_mode VARCHAR NULL` column. NULL for control-flow edges (call / ind_call / j / br); one of d_form / lis_addi / lis_ori / multiword / x_form_indexed / x_form_byterev / atomic / dcbz for data edges. Index idx_xrefs_addr_mode. - New `xenia_analysis::xref::AddrMode` enum + Xref::addr_mode field. - Opcode 46/47 (lmw/stmw) expand to one xref per slot — D-form multi-word load/store now resolves all (32-rS) consecutive addresses. - Opcode 31 X-form dispatch — stwx/stbx/sthx/stwux/stbux/sthux/stdx/stdux, lwzx/lbzx/lhzx/lhax/lwzux/lbzux/lhzux/lhaux/ldx/ldux, stwcx./stdcx. (atomic), stwbrx/sthbrx/lwbrx/lhbrx (byte-reverse), dcbz (cache-line clear). - X-form rows are emitted ONLY when both rA and rB resolve to known constants (rare but present); the dominant runtime-indexed pattern remains correctly skipped. Sylpheed yield (regen on master + merge): - 442 newly-detected x_form_indexed reads (lwzx/lhzx into static tables). - 40 newly-detected atomic writes (stwcx./stdcx. with resolvable address). - 28,834 lis_addi refs, 18,485 d_form reads, 3,288 d_form writes — every pre-existing data row now tagged. - 0 multiword / dcbz / byterev (these instructions exist but aren't on lis+addi-tracked code paths). Tests 633→636 (+3 xref unit tests covering AddrMode tag uniqueness, data-edge addr_mode round-trip, control-edge None invariant). Schema golden updated (xrefs gains addr_mode column). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -39,10 +39,54 @@ impl XrefKind {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sub-classification of how `source`'s instruction computes its target
|
||||
/// address. Only meaningful for data xrefs (`read` / `write` / `ref`); call
|
||||
/// / jump / branch / ind_call rows store `None`.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
pub enum AddrMode {
|
||||
/// Standard signed-16 displacement: `lwz rD, simm(rA)`, `stw rS, simm(rA)`,
|
||||
/// FP D-forms (`lfs/lfd/stfs/stfd`), update variants. The dominant case.
|
||||
DForm,
|
||||
/// Address materialised via `lis + addi` register tracking — no
|
||||
/// load/store yet at this site.
|
||||
LisAddi,
|
||||
/// Address materialised via `lis + ori` register tracking.
|
||||
LisOri,
|
||||
/// Multi-word D-form: `lmw / stmw rS, simm(rA)` — emits one xref per
|
||||
/// register slot (32-rS slots starting at the resolved base).
|
||||
Multiword,
|
||||
/// X-form indexed: `stwx / stbx / sthx / stwux / stbux / sthux / stdx / stdux`.
|
||||
/// Static resolution requires both rA and rB constant.
|
||||
XFormIndexed,
|
||||
/// X-form byte-reverse: `stwbrx / sthbrx / lwbrx / lhbrx`.
|
||||
XFormByteRev,
|
||||
/// Reservation/atomic store-conditional: `stwcx. / stdcx.`.
|
||||
Atomic,
|
||||
/// Cache-line clear: `dcbz rA, rB` — clears 32 bytes at rA+rB.
|
||||
DCBZ,
|
||||
}
|
||||
|
||||
impl AddrMode {
|
||||
pub fn tag(self) -> &'static str {
|
||||
match self {
|
||||
AddrMode::DForm => "d_form",
|
||||
AddrMode::LisAddi => "lis_addi",
|
||||
AddrMode::LisOri => "lis_ori",
|
||||
AddrMode::Multiword => "multiword",
|
||||
AddrMode::XFormIndexed => "x_form_indexed",
|
||||
AddrMode::XFormByteRev => "x_form_byterev",
|
||||
AddrMode::Atomic => "atomic",
|
||||
AddrMode::DCBZ => "dcbz",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Xref {
|
||||
pub source: u32,
|
||||
pub kind: XrefKind,
|
||||
/// `None` for control-flow edges; `Some(...)` for data edges.
|
||||
pub addr_mode: Option<AddrMode>,
|
||||
}
|
||||
|
||||
pub type XrefMap = HashMap<u32, Vec<Xref>>;
|
||||
@@ -160,7 +204,10 @@ pub fn analyze_xrefs(
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRef));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRef });
|
||||
xrefs.entry(data_addr).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataRef,
|
||||
addr_mode: Some(AddrMode::LisAddi),
|
||||
});
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
reg_hi[rd] = Some(data_addr); // propagate for chained access
|
||||
@@ -175,7 +222,10 @@ pub fn analyze_xrefs(
|
||||
let data_addr = base | uimm;
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRef));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRef });
|
||||
xrefs.entry(data_addr).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataRef,
|
||||
addr_mode: Some(AddrMode::LisOri),
|
||||
});
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
reg_hi[ra] = Some(data_addr);
|
||||
@@ -184,31 +234,147 @@ pub fn analyze_xrefs(
|
||||
}
|
||||
}
|
||||
// Load instructions: lwz, lbz, lhz, lha, lfs, lfd, lwzu, etc.
|
||||
32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 46 | 48 | 49 | 50 | 51 => {
|
||||
32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 48 | 49 | 50 | 51 => {
|
||||
if ra != 0
|
||||
&& let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataRead));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataRead });
|
||||
xrefs.entry(data_addr).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataRead,
|
||||
addr_mode: Some(AddrMode::DForm),
|
||||
});
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
}
|
||||
// Load into rD may clobber the tracked value
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
// lmw rD, simm(rA) — D-form multi-word load. Reads (32-rD)
|
||||
// consecutive 4-byte words starting at base+simm into
|
||||
// rD..r31. Emits one DataRead per slot.
|
||||
46 => {
|
||||
if ra != 0
|
||||
&& let Some(base) = reg_hi[ra]
|
||||
{
|
||||
let mut addr_w = base.wrapping_add(simm as u32);
|
||||
for _slot in (rd as u32)..32 {
|
||||
if is_in_ranges(addr_w, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (addr_w, XrefKind::DataRead));
|
||||
xrefs.entry(addr_w).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataRead,
|
||||
addr_mode: Some(AddrMode::Multiword),
|
||||
});
|
||||
labels.entry(addr_w).or_insert_with(|| format!("dat_{addr_w:08X}"));
|
||||
}
|
||||
addr_w = addr_w.wrapping_add(4);
|
||||
}
|
||||
}
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
// Store instructions: stw, stb, sth, stfs, stfd, stwu, etc.
|
||||
36 | 37 | 38 | 39 | 44 | 45 | 47 | 52 | 53 | 54 | 55 => {
|
||||
36 | 37 | 38 | 39 | 44 | 45 | 52 | 53 | 54 | 55 => {
|
||||
if ra != 0
|
||||
&& let Some(base) = reg_hi[ra] {
|
||||
let data_addr = base.wrapping_add(simm as u32);
|
||||
if is_in_ranges(data_addr, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (data_addr, XrefKind::DataWrite));
|
||||
xrefs.entry(data_addr).or_default().push(Xref { source: abs_addr, kind: XrefKind::DataWrite });
|
||||
xrefs.entry(data_addr).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataWrite,
|
||||
addr_mode: Some(AddrMode::DForm),
|
||||
});
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
// stmw rS, simm(rA) — D-form multi-word store. Writes
|
||||
// (32-rS) consecutive 4-byte words from rS..r31 to
|
||||
// base+simm onward. Emits one DataWrite per slot.
|
||||
47 => {
|
||||
if ra != 0
|
||||
&& let Some(base) = reg_hi[ra]
|
||||
{
|
||||
let mut addr_w = base.wrapping_add(simm as u32);
|
||||
for _slot in (rd as u32)..32 {
|
||||
if is_in_ranges(addr_w, &data_ranges) {
|
||||
data_annotations.insert(abs_addr, (addr_w, XrefKind::DataWrite));
|
||||
xrefs.entry(addr_w).or_default().push(Xref {
|
||||
source: abs_addr, kind: XrefKind::DataWrite,
|
||||
addr_mode: Some(AddrMode::Multiword),
|
||||
});
|
||||
labels.entry(addr_w).or_insert_with(|| format!("dat_{addr_w:08X}"));
|
||||
}
|
||||
addr_w = addr_w.wrapping_add(4);
|
||||
}
|
||||
}
|
||||
}
|
||||
// X-form: opcode 31 — indexed loads/stores, atomic ops, dcbz.
|
||||
// We can't statically resolve `rA + rB` without tracking rB
|
||||
// too; we record an xref ONLY when rB is also a known
|
||||
// constant (rare) OR when rB is r0 (which encodes as zero).
|
||||
// Falls through to the generic-clobber arm afterwards via
|
||||
// the explicit reg_hi update.
|
||||
31 => {
|
||||
let xo = (instr >> 1) & 0x3FF;
|
||||
let rb = ((instr >> 11) & 0x1F) as usize;
|
||||
let resolve_rab = |reg_hi: &[Option<u32>; 32]| -> Option<u32> {
|
||||
let a = if ra == 0 { Some(0u32) } else { reg_hi[ra] };
|
||||
let b = if rb == 0 { Some(0u32) } else { reg_hi[rb] };
|
||||
match (a, b) {
|
||||
(Some(av), Some(bv)) => Some(av.wrapping_add(bv)),
|
||||
_ => None,
|
||||
}
|
||||
};
|
||||
let mode_for_xo = |xo: u32| -> Option<(AddrMode, XrefKind)> {
|
||||
match xo {
|
||||
// Atomic store-conditional
|
||||
150 => Some((AddrMode::Atomic, XrefKind::DataWrite)), // stwcx.
|
||||
214 => Some((AddrMode::Atomic, XrefKind::DataWrite)), // stdcx.
|
||||
// Byte-reverse stores
|
||||
662 => Some((AddrMode::XFormByteRev, XrefKind::DataWrite)), // stwbrx
|
||||
918 => Some((AddrMode::XFormByteRev, XrefKind::DataWrite)), // sthbrx
|
||||
// Byte-reverse loads
|
||||
534 => Some((AddrMode::XFormByteRev, XrefKind::DataRead)), // lwbrx
|
||||
790 => Some((AddrMode::XFormByteRev, XrefKind::DataRead)), // lhbrx
|
||||
// dcbz — cache-line zero (32-byte clear). Treat as a write.
|
||||
1014 => Some((AddrMode::DCBZ, XrefKind::DataWrite)),
|
||||
// Plain X-form indexed stores (the common ones)
|
||||
151 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stwx
|
||||
215 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stbx
|
||||
407 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // sthx
|
||||
183 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stwux
|
||||
247 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stbux
|
||||
439 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // sthux
|
||||
149 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stdx
|
||||
181 => Some((AddrMode::XFormIndexed, XrefKind::DataWrite)), // stdux
|
||||
// Plain X-form indexed loads
|
||||
23 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lwzx
|
||||
87 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lbzx
|
||||
279 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhzx
|
||||
343 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhax
|
||||
55 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lwzux
|
||||
119 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lbzux
|
||||
311 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhzux
|
||||
375 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // lhaux
|
||||
21 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // ldx
|
||||
53 => Some((AddrMode::XFormIndexed, XrefKind::DataRead)), // ldux
|
||||
_ => None,
|
||||
}
|
||||
};
|
||||
if let Some((addr_mode, kind)) = mode_for_xo(xo)
|
||||
&& let Some(data_addr) = resolve_rab(®_hi)
|
||||
&& is_in_ranges(data_addr, &data_ranges)
|
||||
{
|
||||
data_annotations.insert(abs_addr, (data_addr, kind));
|
||||
xrefs.entry(data_addr).or_default().push(Xref {
|
||||
source: abs_addr, kind,
|
||||
addr_mode: Some(addr_mode),
|
||||
});
|
||||
labels.entry(data_addr).or_insert_with(|| format!("dat_{data_addr:08X}"));
|
||||
}
|
||||
// Fall through: any X-form op may write rD; invalidate.
|
||||
reg_hi[rd] = None;
|
||||
}
|
||||
// Any other instruction writing to rD: invalidate
|
||||
_ => {
|
||||
// Conservatively invalidate for instructions that modify rD
|
||||
@@ -248,7 +414,7 @@ fn collect_branch_target(instr: u32, addr: u32, labels: &mut HashMap<u32, String
|
||||
let target = if aa { li as u32 } else { addr.wrapping_add(li as u32) };
|
||||
labels.entry(target).or_insert_with(|| format!("loc_{target:08X}"));
|
||||
let kind = if lk { XrefKind::Call } else { XrefKind::Jump };
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind });
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind, addr_mode: None });
|
||||
}
|
||||
16 => {
|
||||
// B-form: bc/bcl
|
||||
@@ -256,7 +422,7 @@ fn collect_branch_target(instr: u32, addr: u32, labels: &mut HashMap<u32, String
|
||||
let aa = instr & 2 != 0;
|
||||
let target = if aa { bd as u32 } else { addr.wrapping_add(bd as u32) };
|
||||
labels.entry(target).or_insert_with(|| format!("loc_{target:08X}"));
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind: XrefKind::Branch });
|
||||
xrefs.entry(target).or_default().push(Xref { source: addr, kind: XrefKind::Branch, addr_mode: None });
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -306,3 +472,36 @@ pub fn resolve_source_label(
|
||||
|
||||
format!("0x{addr:08X}")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn addr_mode_tags_are_distinct() {
|
||||
let modes = [
|
||||
AddrMode::DForm,
|
||||
AddrMode::LisAddi,
|
||||
AddrMode::LisOri,
|
||||
AddrMode::Multiword,
|
||||
AddrMode::XFormIndexed,
|
||||
AddrMode::XFormByteRev,
|
||||
AddrMode::Atomic,
|
||||
AddrMode::DCBZ,
|
||||
];
|
||||
let tags: std::collections::HashSet<&str> = modes.iter().map(|m| m.tag()).collect();
|
||||
assert_eq!(tags.len(), modes.len(), "every AddrMode variant must have a unique tag");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xref_struct_carries_addr_mode_for_data_edges() {
|
||||
let x = Xref { source: 0x1234, kind: XrefKind::DataWrite, addr_mode: Some(AddrMode::DForm) };
|
||||
assert_eq!(x.addr_mode.unwrap().tag(), "d_form");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xref_struct_addr_mode_is_none_for_call_edges() {
|
||||
let x = Xref { source: 0x1234, kind: XrefKind::Call, addr_mode: None };
|
||||
assert!(x.addr_mode.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user