//! MSVC C++ name demangling for Xbox 360 binaries. //! //! Wraps [`msvc_demangler::demangle`] (a Rust port of LLVM's //! `MicrosoftDemangle.cpp`) and splits the resulting human-readable string //! into structured fields (namespace path, class name, method name, params //! signature) for storage in the `demangled_names` DB table. //! //! The structured split is heuristic — it operates on the formatted output, //! not the parsed AST. This is good enough for typical RTTI strings of the //! form `?AVClassName@Namespace@@` and standard member functions; exotic //! template / lambda forms degrade gracefully (the structured fields end up //! `None` while `raw_demangled` retains the full LLVM-style output). //! //! Reference: (LLVM `MicrosoftDemangle.cpp` port). use msvc_demangler::DemangleFlags; /// Structured view of one demangled MSVC symbol. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Demangled { /// Original mangled string. pub mangled: String, /// Full LLVM-style demangled output (e.g. `xe::apu::AudioSystem::Setup(void)`). pub raw_demangled: String, /// `::`-joined namespace path leading up to the class, e.g. `xe::apu`. None /// when the symbol is at global scope. pub namespace_path: Option, /// Class name for member functions, e.g. `AudioSystem`. None when the /// symbol is a free function. pub class_name: Option, /// Method or free-function name, e.g. `Setup`. None when the heuristic /// could not separate the name from the rest of the demangled string. pub method_name: Option, /// Parameter signature without the surrounding parens, e.g. `void` or /// `int, char *`. None when not a function or no `(...)` was found. pub params_signature: Option, } /// Demangle one mangled MSVC C++ symbol. Returns `None` if the input does not /// start with `?` (early-out for non-mangled names) OR if the underlying /// demangler fails to parse it. Callers that want a "best effort" record /// (NULL fields + raw=mangled) should use [`demangle_or_raw`] instead. pub fn demangle(mangled: &str) -> Option { if !mangled.starts_with('?') { return None; } let raw = msvc_demangler::demangle(mangled, DemangleFlags::llvm()).ok()?; Some(split_structured(mangled.to_string(), raw)) } /// Demangle, or fall back to a record that just carries the original mangled /// string in `raw_demangled` and leaves all structured fields `None`. Useful /// for DB insert paths that want one row per mangled input regardless of /// parser success. pub fn demangle_or_raw(mangled: &str) -> Demangled { if let Some(d) = demangle(mangled) { return d; } Demangled { mangled: mangled.to_string(), raw_demangled: mangled.to_string(), namespace_path: None, class_name: None, method_name: None, params_signature: None, } } /// Split a fully-formatted demangled string into structured fields. /// /// Strategy: /// 1. Find the first un-nested `(` — everything before it is the qualified /// name; everything inside the matching parens is `params_signature`. /// 2. Strip leading return-type tokens before the qualified name (everything /// up to the LAST whitespace not inside `<...>` or `(...)` brackets). /// 3. Split the qualified name on `::` (top-level only) — last segment is /// `method_name`, second-to-last is `class_name`, the rest joined back /// with `::` is `namespace_path`. fn split_structured(mangled: String, raw: String) -> Demangled { let raw_view = raw.as_str(); let (qualified_name, params) = match find_paren_split(raw_view) { Some((before, inside)) => (before.trim_end().to_string(), Some(inside.to_string())), None => (raw_view.to_string(), None), }; // Drop any return-type prefix: keep everything after the last top-level // whitespace boundary (where "top-level" means depth-0 in <...>/(...)). let qname_clean = strip_return_type_prefix(&qualified_name); let (namespace_path, class_name, method_name) = split_qname(&qname_clean); Demangled { mangled, raw_demangled: raw, namespace_path, class_name, method_name, params_signature: params, } } /// Returns `(text_before_paren, text_inside_outer_parens)` for the first /// top-level `(` in `s`. Returns `None` when no top-level paren is present. fn find_paren_split(s: &str) -> Option<(&str, &str)> { let bytes = s.as_bytes(); let mut depth_angle: i32 = 0; for (i, &b) in bytes.iter().enumerate() { match b { b'<' => depth_angle += 1, b'>' if depth_angle > 0 => depth_angle -= 1, b'(' if depth_angle == 0 => { // Find matching close at depth 0 on parens. let mut depth_paren = 1i32; let mut depth_angle2 = 0i32; for (j, &b2) in bytes.iter().enumerate().skip(i + 1) { match b2 { b'<' => depth_angle2 += 1, b'>' if depth_angle2 > 0 => depth_angle2 -= 1, b'(' => depth_paren += 1, b')' => { depth_paren -= 1; if depth_paren == 0 { return Some((&s[..i], &s[i + 1..j])); } } _ => {} } } return None; } _ => {} } } None } /// Strip a leading return-type token (everything up to and including the /// last top-level whitespace). E.g. `void __cdecl Foo::Bar` → `Foo::Bar`. fn strip_return_type_prefix(s: &str) -> String { let bytes = s.as_bytes(); let mut depth_angle: i32 = 0; let mut depth_paren: i32 = 0; let mut last_ws_at: Option = None; for (i, &b) in bytes.iter().enumerate() { match b { b'<' => depth_angle += 1, b'>' if depth_angle > 0 => depth_angle -= 1, b'(' => depth_paren += 1, b')' if depth_paren > 0 => depth_paren -= 1, b' ' if depth_angle == 0 && depth_paren == 0 => last_ws_at = Some(i), _ => {} } } match last_ws_at { Some(i) => s[i + 1..].to_string(), None => s.to_string(), } } /// Split a fully-qualified name on top-level `::` and tag the parts. fn split_qname(qname: &str) -> (Option, Option, Option) { if qname.is_empty() { return (None, None, None); } let parts = top_level_split_colon_colon(qname); match parts.len() { 0 => (None, None, None), 1 => (None, None, Some(parts[0].clone())), 2 => (None, Some(parts[0].clone()), Some(parts[1].clone())), _ => { let n = parts.len(); let method = parts[n - 1].clone(); let class = parts[n - 2].clone(); let ns = parts[..n - 2].join("::"); (Some(ns), Some(class), Some(method)) } } } /// Split on top-level `::` — `::` inside `<...>` or `(...)` is preserved. fn top_level_split_colon_colon(s: &str) -> Vec { let bytes = s.as_bytes(); let mut depth_angle: i32 = 0; let mut depth_paren: i32 = 0; let mut out: Vec = Vec::new(); let mut start = 0usize; let mut i = 0usize; while i < bytes.len() { let b = bytes[i]; match b { b'<' => depth_angle += 1, b'>' if depth_angle > 0 => depth_angle -= 1, b'(' => depth_paren += 1, b')' if depth_paren > 0 => depth_paren -= 1, b':' if depth_angle == 0 && depth_paren == 0 && i + 1 < bytes.len() && bytes[i + 1] == b':' => { out.push(s[start..i].to_string()); start = i + 2; i += 2; continue; } _ => {} } i += 1; } out.push(s[start..].to_string()); out.into_iter().filter(|p| !p.is_empty()).collect() } #[cfg(test)] mod tests { use super::*; #[test] fn early_out_on_non_mangled() { assert!(demangle("plain_c_name").is_none()); assert!(demangle("Foo::Bar").is_none()); } #[test] fn demangle_or_raw_records_failures() { let d = demangle_or_raw("not_mangled"); assert_eq!(d.mangled, "not_mangled"); assert_eq!(d.raw_demangled, "not_mangled"); assert!(d.method_name.is_none()); } #[test] fn simple_member_function() { // ?Setup@AudioSystem@apu@xe@@QEAAXXZ → public: __cdecl xe::apu::AudioSystem::Setup(void) let d = demangle("?Setup@AudioSystem@apu@xe@@QEAAXXZ").expect("should parse"); assert_eq!(d.method_name.as_deref(), Some("Setup")); assert_eq!(d.class_name.as_deref(), Some("AudioSystem")); assert_eq!(d.namespace_path.as_deref(), Some("xe::apu")); assert_eq!(d.params_signature.as_deref(), Some("void")); } #[test] fn rtti_type_descriptor_string() { // RTTI TypeDescriptor mangled name format: ".?AVClassName@@" → "class ClassName". // We strip the leading "." and call demangle on the "?AV…" part below in M3. // For now confirm the demangler handles the minimal class form. let d = demangle("?AVAudioSystem@apu@xe@@").expect("should parse"); assert!( d.raw_demangled.contains("AudioSystem"), "raw='{}'", d.raw_demangled ); } #[test] fn split_qname_handles_namespace_chain() { let (ns, cls, m) = split_qname("a::b::c::Klass::method"); assert_eq!(ns.as_deref(), Some("a::b::c")); assert_eq!(cls.as_deref(), Some("Klass")); assert_eq!(m.as_deref(), Some("method")); } #[test] fn paren_split_handles_template_in_args() { // Templates inside the param list must not confuse paren matching. let s = "void __cdecl Foo::Bar(std::vector, std::map)"; let (before, inside) = find_paren_split(s).expect("paren found"); assert_eq!(before, "void __cdecl Foo::Bar"); assert_eq!(inside, "std::vector, std::map"); } #[test] fn double_colon_inside_template_not_split() { let parts = top_level_split_colon_colon("a::b::e"); assert_eq!(parts, vec!["a", "b", "e"]); } }