M5+M7: indirect-dispatch reachability + .rdata string detection
Two MEDIUM milestones bundled (both opportunistic per plan; both small).
## M5 — indirect-dispatch reachability
- `xenia_analysis::indirect`: per-basic-block register tracker over each
detected function. Recognises the canonical static-vtable pattern
`lis+addi → lwz off(rA) → mtctr → bcctrl` where rA holds a known M3
vtable address. Emits one `Xref { kind: IndirectCall }` per resolvable
bcctrl site.
- PowerPC ABI awareness: `bl`-style calls clobber volatile r0..r12 + ctr
but preserve non-volatile r13..r31, so a vtable pointer parked in r30/r31
before a call survives.
- Label-based basic-block boundaries kill register state — bounds
false-positive risk for jump-IN paths.
- New `XrefKind::IndirectCall` variant (DB tag `'ind_call'`).
- New SQL view `v_indirect_reachability_from_entry` — strict superset of
`v_reachability_from_entry`, taking `ind_call` edges in the BFS.
Sylpheed yield: 0 edges detected. The binary's 1,001 static lis+addi
references into vtables are nearly all constructor-side vptr writes, not
dispatches; real method dispatch goes through `this->vptr` which requires
alias analysis we explicitly don't do. Documented in SCHEMA.md as the
expected limitation. Three unit tests cover the synthetic-correctness path.
## M7 — string / constant-pool detection
- `xenia_analysis::strings`: scans `.rdata` for runs of ≥ 6 printable
ASCII bytes (NUL-terminated) and ≥ 6 UTF-16LE code units (basic-plane
printable ASCII, NUL u16 terminator).
- New `strings(address PK, encoding, length, content)` table + encoding index.
- Implicit cross-ref via existing `xrefs.kind='ref'` rows whose target
matches a strings.address.
Sylpheed yield: 6,311 ASCII strings (including embedded HLSL shader source
and AS_CB_SURFACE_SWIZZLE_* assertion strings). 9,132 lis+addi sites
cross-reference detected strings — names source PCs near each string in
one query. Four unit tests cover encoding detection, NUL termination, and
short-run rejection.
Tests 626→633 (+3 indirect, +4 strings).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -306,6 +306,7 @@ impl DbWriter {
|
||||
///
|
||||
/// `vtables` is the M3 result; pass an empty slice when the caller has
|
||||
/// not run the vtable scan (the tables are still created, just empty).
|
||||
/// `strings` is the M7 result; same convention.
|
||||
#[tracing::instrument(skip_all, name = "db.write_analysis_results")]
|
||||
pub fn write_analysis_results(
|
||||
&mut self,
|
||||
@@ -315,6 +316,7 @@ impl DbWriter {
|
||||
labels: &HashMap<u32, String>,
|
||||
xrefs: &XrefMap,
|
||||
vtables: &[crate::vtables::Vtable],
|
||||
strings: &[crate::strings::DetectedString],
|
||||
) -> anyhow::Result<()> {
|
||||
self.conn.execute_batch("
|
||||
CREATE TABLE functions (
|
||||
@@ -368,6 +370,13 @@ impl DbWriter {
|
||||
base_classes_json VARCHAR -- JSON of base class names (NULL when stripped)
|
||||
);
|
||||
|
||||
CREATE TABLE strings (
|
||||
address BIGINT PRIMARY KEY, -- absolute VA of first byte
|
||||
encoding VARCHAR NOT NULL, -- 'ascii' or 'utf16le'
|
||||
length BIGINT NOT NULL, -- length in bytes (excluding NUL terminator)
|
||||
content VARCHAR NOT NULL -- UTF-8 representation of the string
|
||||
);
|
||||
|
||||
CREATE TABLE demangled_names (
|
||||
address BIGINT, -- VA the mangled name is associated with; NULL when from a non-address source (e.g. RTTI-only string)
|
||||
mangled VARCHAR NOT NULL, -- original mangled symbol (e.g. ?Foo@Bar@@QEAAXXZ)
|
||||
@@ -395,6 +404,7 @@ impl DbWriter {
|
||||
insert_demangled_from_labels(&self.conn, labels, info.import_libraries)?;
|
||||
insert_vtables(&self.conn, vtables, pe, info.image_base)?;
|
||||
insert_methods_and_classes(&self.conn, vtables, labels)?;
|
||||
insert_strings(&self.conn, strings)?;
|
||||
insert_xrefs_streaming(&self.conn, xrefs, pe, info.image_base, func_analysis, labels)?;
|
||||
|
||||
let indices = [
|
||||
@@ -407,6 +417,7 @@ impl DbWriter {
|
||||
("idx_demangled_method", "CREATE INDEX idx_demangled_method ON demangled_names(method_name)"),
|
||||
("idx_methods_function", "CREATE INDEX idx_methods_function ON methods(function_address)"),
|
||||
("idx_classes_rtti", "CREATE INDEX idx_classes_rtti ON classes(rtti_present)"),
|
||||
("idx_strings_encoding", "CREATE INDEX idx_strings_encoding ON strings(encoding)"),
|
||||
("idx_xrefs_target", "CREATE INDEX idx_xrefs_target ON xrefs(target)"),
|
||||
("idx_xrefs_source", "CREATE INDEX idx_xrefs_source ON xrefs(source)"),
|
||||
("idx_xrefs_source_func", "CREATE INDEX idx_xrefs_source_func ON xrefs(source_func)"),
|
||||
@@ -423,7 +434,8 @@ impl DbWriter {
|
||||
|
||||
/// Back-compat wrapper for callers that want the full pre-Phase-3
|
||||
/// "everything in one shot" behaviour. Equivalent to
|
||||
/// `ingest_instructions` + `write_analysis_results` with no M3 vtables.
|
||||
/// `ingest_instructions` + `write_analysis_results` with no M3 vtables /
|
||||
/// M7 strings.
|
||||
#[tracing::instrument(skip_all, name = "db.write_disasm")]
|
||||
pub fn write_disasm(
|
||||
&mut self,
|
||||
@@ -434,7 +446,7 @@ impl DbWriter {
|
||||
xrefs: &XrefMap,
|
||||
) -> anyhow::Result<()> {
|
||||
self.ingest_instructions(pe, info, func_analysis, labels)?;
|
||||
self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[])?;
|
||||
self.write_analysis_results(pe, info, func_analysis, labels, xrefs, &[], &[])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -846,6 +858,30 @@ fn insert_methods_and_classes(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_strings(
|
||||
conn: &Connection,
|
||||
strings: &[crate::strings::DetectedString],
|
||||
) -> anyhow::Result<()> {
|
||||
if strings.is_empty() { return Ok(()); }
|
||||
let mut stmt = conn.prepare(
|
||||
"INSERT INTO strings (address, encoding, length, content) VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT DO NOTHING"
|
||||
)?;
|
||||
let mut count = 0u64;
|
||||
for s in strings {
|
||||
stmt.execute(params![
|
||||
s.address as i64,
|
||||
s.encoding,
|
||||
s.length as i64,
|
||||
s.content.as_str(),
|
||||
])?;
|
||||
count += 1;
|
||||
}
|
||||
metrics::counter!("db.rows", "table" => "strings").increment(count);
|
||||
tracing::info!(rows = count, table = "strings", "bulk insert complete");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_demangled_from_labels(
|
||||
conn: &Connection,
|
||||
labels: &HashMap<u32, String>,
|
||||
|
||||
Reference in New Issue
Block a user