From 296b7fd036351fad542689de1f80eafa4f09e48d Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Fri, 17 Apr 2026 08:47:09 -0400 Subject: [PATCH 1/4] Fix a ton of MSVC demangler bugs --- demangler/msvc/demangle_msvc.cpp | 1143 ++++++++++++++++++++---------- demangler/msvc/demangle_msvc.h | 155 +++- 2 files changed, 913 insertions(+), 385 deletions(-) diff --git a/demangler/msvc/demangle_msvc.cpp b/demangler/msvc/demangle_msvc.cpp index 0489c341a..2eef96099 100644 --- a/demangler/msvc/demangle_msvc.cpp +++ b/demangler/msvc/demangle_msvc.cpp @@ -28,82 +28,32 @@ using namespace std; #endif -#define MAX_DEMANGLE_LENGTH 4096 - -Demangle::Reader::Reader(string data) -{ - m_data = data; - //Check for non-ascii characters - for (auto a : m_data) - { - if (a < 0x20 || a > 0x7e) - throw DemangleException(); - } -} - - -string Demangle::Reader::PeekString(size_t count) -{ - if (count > Length()) - throw DemangleException(); - return m_data.substr(0, count); -} - - -char Demangle::Reader::Peek() -{ - if (1 > Length()) - throw DemangleException(); - return (char)m_data[0]; -} - - -const char* Demangle::Reader::GetRaw() -{ - return m_data.c_str(); -} - - -char Demangle::Reader::Read() -{ - if (1 > Length()) - throw DemangleException(); - char out = m_data[0]; - m_data = m_data.substr(1); - return out; -} +#define MAX_DEMANGLE_LENGTH 32768 +// Define MSVC_DEMANGLE_DEBUG to enable trace logging +#ifdef MSVC_DEMANGLE_DEBUG +#define MSVC_TRACE(...) LogTrace(__VA_ARGS__) +#else +#define MSVC_TRACE(...) do {} while(0) +#endif string Demangle::Reader::ReadString(size_t count) { - if (count > Length()) + if (m_ptr + count >= m_end) throw DemangleException(); - string out = m_data.substr(0, count); - m_data = m_data.substr(count + 1); + string out(m_ptr, count); + m_ptr += count + 1; // skip count chars + sentinel return out; } string Demangle::Reader::ReadUntil(char sentinal) { - size_t pos = m_data.find_first_of(sentinal); - if (pos == string::npos) - throw DemangleException(); - return ReadString(pos); -} - - -void Demangle::Reader::Consume(size_t count) -{ - if (count > Length()) + const char* found = (const char*)memchr(m_ptr, sentinal, m_end - m_ptr); + if (!found) throw DemangleException(); - m_data = m_data.substr(count); -} - - -size_t Demangle::Reader::Length() -{ - return m_data.length(); + size_t count = found - m_ptr; + return ReadString(count); } @@ -111,17 +61,15 @@ const TypeBuilder& Demangle::BackrefList::GetTypeBackref(size_t reference) { if (reference < typeList.size()) return typeList[reference]; - // LogDebug("type: %llx - : %d/%d\n", this, typeList.size(), reference); throw DemangleException(string("Backref too large " + std::to_string(reference))); } -string Demangle::BackrefList::GetStringBackref(size_t reference) +const string& Demangle::BackrefList::GetStringBackref(size_t reference) { - // LogDebug("type: %llx - ref: %d\n", this, reference); if (reference < nameList.size()) return nameList[reference]; - LogDebug("type: %p - Backref too large: %zu/%zu\n", this, nameList.size(), reference); + MSVC_TRACE("type: %p - Backref too large: %zu/%zu\n", this, nameList.size(), reference); throw DemangleException(string("Backref too large " + std::to_string(reference))); } @@ -134,72 +82,106 @@ void Demangle::BackrefList::PushTypeBackref(TypeBuilder t) } -void Demangle::BackrefList::PushStringBackref(string& s) +void Demangle::BackrefList::PushStringBackref(const string& s) { if (s.size() > MAX_DEMANGLE_LENGTH) throw DemangleException(); - LogDebug("this: %p - Backref: %zu - %s\n", this, nameList.size(), s.c_str()); + MSVC_TRACE("this: %p - Backref: %zu - %s\n", this, nameList.size(), s.c_str()); for (const auto& name : nameList) if (name == s) return; - nameList.push_back(s); + if (nameList.size() <= 9) + nameList.push_back(s); } -void Demangle::BackrefList::PushFrontStringBackref(string& s) +void Demangle::BackrefList::PushTemplateSpecialization(const string& s) { if (s.size() > MAX_DEMANGLE_LENGTH) throw DemangleException(); - // LogDebug("this: %llx - F-Backref: %lld - %s\n", this, nameList.size(), s.c_str()); - nameList.insert(nameList.begin(), s); + templateList.push_back(s); } -Demangle::Demangle(Architecture* arch, string mangledName) : - reader(mangledName), + +Demangle::Demangle(Architecture* arch, const string& mangledName) : + m_mangledName(mangledName), + reader(m_mangledName), m_arch(arch), m_platform(nullptr), m_view(nullptr) { - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -Demangle::Demangle(Ref platform, string mangledName) : - reader(mangledName), +Demangle::Demangle(Ref platform, const string& mangledName) : + m_mangledName(mangledName), + reader(m_mangledName), m_arch(platform->GetArchitecture()), m_platform(platform), m_view(nullptr) { - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -Demangle::Demangle(Ref view, string mangledName) : - reader(mangledName), +Demangle::Demangle(Ref view, const string& mangledName) : + m_mangledName(mangledName), + reader(m_mangledName), m_view(view) { m_platform = view->GetDefaultPlatform(); if (!m_platform) throw DemangleException(); m_arch = m_platform->GetArchitecture(); - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, QualifiedName& name) +void Demangle::Reset(Architecture* arch, const string& mangledName) +{ + m_mangledName = mangledName; + reader.Reset(m_mangledName); + m_backrefList.Clear(); + m_arch = arch; + m_platform = nullptr; + m_view = nullptr; + m_varName.clear(); + m_templateParamDepth = 0; +} + + +void Demangle::RewriteTemplateBackrefName(NameList& typeName, const BackrefList& nameBackrefList) const +{ + if (typeName.empty()) + return; + + string& baseName = typeName.back(); + if (baseName.find('<') != string::npos) + return; + + for (auto it = nameBackrefList.templateList.rbegin(); it != nameBackrefList.templateList.rend(); ++it) + { + const string& candidate = *it; + size_t templateStart = candidate.find('<'); + if ((templateStart == string::npos) || (templateStart != baseName.size())) + continue; + if (candidate.compare(0, templateStart, baseName) != 0) + continue; + baseName = candidate; + return; + } +} + + +TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameList& name) { - m_logger->LogDebug("%s: '%s' - %lu\n", __FUNCTION__, reader.GetRaw(), varList.nameList.size()); + MSVC_TRACE("%s: '%s' - %lu\n", __FUNCTION__, reader.GetRaw(), varList.nameList.size()); TypeBuilder newType; - bool _const = false, _volatile = false, isMember = false; //TODO: use this info, _signed = false; + bool _const = false, _volatile = false, isMember = false; BNReferenceType refType; BNTypeClass typeClass = IntegerTypeClass; BNStructureVariant structType; - QualifiedName varName; - QualifiedName typeName; + NameList varName; + NameList typeName; BNNameType classFunctionType; size_t width; @@ -218,7 +200,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali _const = false; _volatile = true; break; - case 'C': return TypeBuilder::IntegerType(1, true); + case 'C': return TypeBuilder::IntegerType(1, true, "signed char"); case 'D': return TypeBuilder::IntegerType(1, true); case 'E': return TypeBuilder::IntegerType(1, false); case 'F': return TypeBuilder::IntegerType(2, true); @@ -261,21 +243,69 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali typeClass = EnumerationTypeClass; switch (reader.Read()) { - case '0': width = 1; /* TODO: use these _signed = true; */ break; - case '1': width = 1; /* TODO: use these _signed = false; */ break; - case '2': width = 2; /* TODO: use these _signed = true; */ break; - case '3': width = 2; /* TODO: use these _signed = false; */ break; - case '4': width = 4; /* TODO: use these _signed = true; */ break; - case '5': width = 4; /* TODO: use these _signed = false; */ break; - case '6': width = 4; /* TODO: use these _signed = true; */ break; - case '7': width = 4; /* TODO: use these _signed = false; */ break; + case '0': width = 1; break; + case '1': width = 1; break; + case '2': width = 2; break; + case '3': width = 2; break; + case '4': width = 4; break; + case '5': width = 4; break; + case '6': width = 4; break; + case '7': width = 4; break; default: throw DemangleException(); } break; case 'X': return TypeBuilder::VoidType(); break; case 'Y': - throw DemangleException(); //TODO: handle cointerfaces - case 'Z': return TypeBuilder::VarArgsType(); break; + { + // Multi-dimensional array type: Y...@ + int64_t nDimensions; + DemangleNumber(nDimensions); + _STD_VECTOR elementList; + while (nDimensions--) + { + int64_t element = 0; + DemangleNumber(element); + elementList.push_back(element); + } + NameList arrayName; + newType = DemangleVarType(varList, false, arrayName); + for (auto i = elementList.rbegin(); i != elementList.rend(); i++) + { + newType = TypeBuilder::ArrayType(newType.Finalize(), *i); + } + if (!isReturn) + varList.PushTypeBackref(newType); + return newType; + } + case 'Z': return TypeBuilder::VarArgsType(); + case '?': + { + if (reader.Peek() >= '0' && reader.Peek() <= '9') + { + size_t reference = reader.Read() - '0'; + try + { + return varList.GetTypeBackref(reference); + } + catch (const DemangleException&) + { + if (reference == 2) + return MakeNamedType(UnknownNamedTypeClass, NameList{"auto"}); + throw; + } + } + if (reader.Peek() != '<') + throw DemangleException(); + + string placeholder = reader.ReadUntil('@'); + if (reader.Peek() == '@') + reader.Consume(); + if (placeholder == "") + return MakeNamedType(UnknownNamedTypeClass, NameList{"auto"}); + if (placeholder == "") + return MakeNamedType(UnknownNamedTypeClass, NameList{"decltype(auto)"}); + return MakeNamedType(UnknownNamedTypeClass, NameList{placeholder}); + } case '_': switch (reader.Read()) { @@ -292,16 +322,17 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case 'N': newType = TypeBuilder::BoolType(); break; case 'O': { - QualifiedName name; - //m_logger->Indent(); + NameList name; auto childType = DemangleVarType(varList, false, name); - //m_logger->Dedent(); newType = TypeBuilder::ArrayType(childType.Finalize(), 0); break; } case 'S': newType = TypeBuilder::IntegerType(2, true, "char16_t"); break; case 'U': newType = TypeBuilder::IntegerType(4, true, "char32_t"); break; case 'W': newType = TypeBuilder::IntegerType(2, false, "wchar_t"); break; + case 'P': newType = MakeNamedType(UnknownNamedTypeClass, NameList{"auto"}); break; + case 'Q': newType = MakeNamedType(UnknownNamedTypeClass, NameList{"auto"}); break; // decltype(auto) displayed as auto + case 'T': newType = MakeNamedType(UnknownNamedTypeClass, NameList{"auto"}); break; // decltype(auto) alternate case 'X': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Coclass case 'Y': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Cointerface default: @@ -309,7 +340,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali } break; case '$': - if (reader.PeekString(2) == "$Q") // && + if (reader.PeekMatch("$Q", 2)) // && { reader.Consume(2); typeClass = PointerTypeClass; @@ -317,7 +348,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali _const = false; _volatile = false; } - else if (reader.PeekString(2) == "$R") // && volatile + else if (reader.PeekMatch("$R", 2)) // && volatile { reader.Consume(2); typeClass = PointerTypeClass; @@ -325,32 +356,44 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali _const = false; _volatile = true; } - else if (reader.PeekString(2) == "$A") + else if (reader.PeekMatch("$A", 2)) { reader.Consume(2); char num = reader.Read(); - if (num == 8) - return DemangleFunction(NoNameType, true, varList); - if (num == '6' || num == '7') - return DemangleFunction(NoNameType, false, varList); + if (num >= '6' && num <= '9') + { + // For member function types (8/9), skip the class scope marker @@ + if ((num == '8' || num == '9') && reader.Length() >= 2 + && reader.Peek() == '@' && reader.PeekAt(1) == '@') + reader.Consume(2); + return DemangleFunction(NoNameType, num >= '7', varList); + } throw DemangleException(); } - else if (reader.PeekString(2) == "$C") + else if (reader.PeekMatch("$C", 2)) { reader.Consume(2); DemangleModifiers(_const, _volatile, isMember); - QualifiedName name; - //m_logger->Indent(); + NameList name; newType = DemangleVarType(varList, false, name); - //m_logger->Dedent(); newType.SetConst(_const); newType.SetVolatile(_volatile); return newType; } - else if (reader.PeekString(2) == "$T") + else if (reader.PeekMatch("$T", 2)) + { + reader.Consume(2); + auto t = MakeNamedType(UnknownNamedTypeClass, NameList{"std::nullptr"}); + if (!isReturn) + varList.PushTypeBackref(t); + return t; + } + else if (reader.PeekMatch("$B", 2)) { + // $$B is a type modifier (managed/const) - strip and parse underlying type reader.Consume(2); - return TypeBuilder::ValueType("std::nullptr"); + NameList name; + return DemangleVarType(varList, isReturn, name); } else if (reader.Peek() == '0') { @@ -359,11 +402,34 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali DemangleNumber(value); return TypeBuilder::ValueType(to_string(value)); } + else if (reader.Peek() == 'D') + { + // $D - template type alias / anonymous type parameter + reader.Consume(); + NameList name; + return DemangleVarType(varList, isReturn, name); + } + else if (reader.Peek() == 'H' || reader.Peek() == 'I' || reader.Peek() == 'J') + { + // $H/$I/$J - member function pointer as non-type template parameter + // Format: $H@ + // $I has two adjustment numbers, $J has three + char kind = reader.Read(); + auto context = DemangleSymbol(); + // Read adjustment number(s) — NOT $-prefixed, just raw numbers + int adjustments = (kind == 'H') ? 1 : (kind == 'I') ? 2 : 3; + for (int i = 0; i < adjustments && reader.Length() > 0 && reader.Peek() != '@'; i++) + { + int64_t adj; + DemangleNumber(adj); + } + return TypeBuilder::PointerType(m_arch, context.type.Finalize(), false, false, PointerReferenceType); + } else if (reader.Peek() == '1') { reader.Consume(); auto context = DemangleSymbol(); - return TypeBuilder::PointerType(m_arch, context.type.Finalize()); + return TypeBuilder::PointerType(m_arch, context.type.Finalize(), false, false, PointerReferenceType); } else throw DemangleException(); @@ -379,7 +445,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case '8': case '9': //Make a copy of the item in the backref list. Exit early since we don't want this added to the backref list. - m_logger->LogDebug("Backref %u %lu", elm - '0', varList.typeList.size()); + MSVC_TRACE("Backref %u %lu", elm - '0', varList.typeList.size()); return varList.GetTypeBackref(elm - '0'); default: throw DemangleException(); @@ -400,10 +466,6 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali throw DemangleException(); case '6': { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } reader.Consume(); auto childType = DemangleFunction(NoNameType, false, varList); newType = TypeBuilder::PointerType(m_arch, @@ -416,10 +478,6 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case '7': //Function pointer case '9': //Class Function pointer { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } reader.Consume(); auto childType = DemangleFunction(NoNameType, true, varList); newType = TypeBuilder::PointerType(m_arch, @@ -431,12 +489,8 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali } case '8': //Named class function pointer { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } reader.Consume(); - DemangleName(name, classFunctionType, varList); + DemangleName(name, classFunctionType, varList, true); name.push_back(""); auto childType = DemangleFunction(NoNameType, true, varList); newType = TypeBuilder::PointerType(m_arch, @@ -448,14 +502,15 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali } default: // Non-numeric { - m_logger->LogDebug("Demangle pointer subtype: '%s'\n", reader.GetRaw()); + MSVC_TRACE("Demangle pointer subtype: '%s'\n", reader.GetRaw()); TypeBuilder child; bool _const2 = false, _volatile2 = false, isMember = false; auto suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); DemangleModifiers(_const2, _volatile2, isMember); if (reader.Peek() == 'Y') //Multi-dimentional array { - m_logger->LogDebug("Demangle multi-dimentional array"); + MSVC_TRACE("Demangle multi-dimentional array"); int64_t nDimentions; reader.Consume(); DemangleNumber(nDimentions); @@ -466,10 +521,8 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali DemangleNumber(element); elementList.push_back(element); } - QualifiedName name; - //m_logger->Indent(); + NameList name; child = DemangleVarType(varList, false, name); - //m_logger->Dedent(); for (auto i = elementList.rbegin(); i != elementList.rend(); i++) { @@ -478,10 +531,8 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali } else { - QualifiedName name; - //m_logger->Indent(); + NameList name; child = DemangleVarType(varList, true, name); - //m_logger->Dedent(); } child.SetConst(_const2); @@ -492,43 +543,35 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali _volatile, refType); - newType.SetPointerSuffix(suffix); - m_logger->LogDebug("Name: %s\n", newType.GetString().c_str()); + newType.SetPointerSuffix(PointerSuffixSet(suffix)); + MSVC_TRACE("Name: %s\n", newType.GetString().c_str()); break; } } break; } case EnumerationTypeClass: - m_logger->LogDebug("Demangle enumeration\n"); - //m_logger->Indent(); - DemangleName(typeName, classFunctionType, varList); - //m_logger->Dedent(); - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(EnumNamedTypeClass, typeName), - width, width); + MSVC_TRACE("Demangle enumeration\n"); + DemangleName(typeName, classFunctionType, varList, true); + newType = MakeNamedType(EnumNamedTypeClass, typeName, width, width); break; case StructureTypeClass: - m_logger->LogDebug("Demangle structure\n"); - //m_logger->Indent(); - DemangleName(typeName, classFunctionType, varList); - //m_logger->Dedent(); + MSVC_TRACE("Demangle structure\n"); + DemangleName(typeName, classFunctionType, varList, true); + RewriteTemplateBackrefName(typeName, varList); switch (structType) { case ClassStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - ClassNamedTypeClass, typeName)); + newType = MakeNamedType(ClassNamedTypeClass, typeName); break; case StructStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + newType = MakeNamedType(StructNamedTypeClass, typeName); break; case UnionStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnionNamedTypeClass, typeName)); + newType = MakeNamedType(UnionNamedTypeClass, typeName); break; default: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnknownNamedTypeClass, typeName)); + newType = MakeNamedType(UnknownNamedTypeClass, typeName); break; } break; @@ -545,7 +588,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali void Demangle::DemangleNumber(int64_t& num) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); num = 0; int mult = 1; if (reader.Peek() == '?') @@ -580,7 +623,7 @@ void Demangle::DemangleNumber(int64_t& num) void Demangle::DemangleChar(char& ch) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); // Basic char is just the char if (reader.Peek() != '?') { @@ -593,7 +636,7 @@ void Demangle::DemangleChar(char& ch) // Hex char is ?$XX for 2 hex digits XX if (reader.Peek() == '$') { - m_logger->LogDebug("%s: Hex digit '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Hex digit '%s'\n", __FUNCTION__, reader.GetRaw()); reader.Consume(); char c1 = reader.Peek(); @@ -613,7 +656,7 @@ void Demangle::DemangleChar(char& ch) return; } - m_logger->LogDebug("%s: Table lookup '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Table lookup '%s'\n", __FUNCTION__, reader.GetRaw()); // Otherwise it's a lookup based on some big table // Thanks, LLVM! @@ -699,47 +742,71 @@ void Demangle::DemangleWideChar(uint16_t& wch) void Demangle::DemangleVariableList(vector& paramList, BackrefList& varList) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - set suffix; - for (size_t i = 0; reader.Peek() != 'Z'; i++) + uint8_t suffix = 0; + for (;;) { bool hasModifiers = false; + if (reader.Peek() == 'Z') + { + if (reader.Length() >= 2 && reader.PeekAt(1) == 'Z') + { + paramList.push_back(FunctionParameter("", Type::VarArgsType(), true, {})); + reader.Consume(); + continue; + } + break; + } if (reader.Peek() == '@') { reader.Consume(); break; } + else if (reader.Length() >= 3 && (reader.PeekMatch("$$V", 3) || reader.PeekMatch("$$Z", 3) || reader.PeekMatch("$$S", 3))) + { + // $$V = empty parameter pack, $$Z = end of parameter pack, $$S = empty non-type param + reader.Consume(3); + continue; + } + else if (reader.Length() >= 2 && reader.PeekMatch("$S", 2)) + { + // $S = empty non-type template parameter + reader.Consume(2); + continue; + } else if (reader.Peek() == '?') { reader.Consume(); suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); DemangleModifiers(_const, _volatile, isMember); hasModifiers = true; } - FunctionParameter vt; - QualifiedName name; - m_logger->LogDebug("Argument %d: %s", i, reader.GetRaw()); - //m_logger->Indent(); + NameList name; + MSVC_TRACE("Argument %zu: %s", paramList.size(), reader.GetRaw()); TypeBuilder type = DemangleVarType(varList, false, name); - //m_logger->Dedent(); if (hasModifiers) { type.SetConst(_const); type.SetVolatile(_volatile); - type.SetPointerSuffix(suffix); + type.SetPointerSuffix(PointerSuffixSet(suffix)); } - vt.name = name.GetString(); + + FunctionParameter vt; + if (name.size() == 1) + vt.name = std::move(name[0]); + else if (name.size() > 1) + vt.name = JoinNameList(name); vt.type = type.Finalize(); vt.defaultLocation = true; - - paramList.push_back(vt); - m_logger->LogDebug("Argument %zu: '%s' - '%s'\n", i, vt.type->GetString().c_str(), reader.GetRaw()); + paramList.push_back(std::move(vt)); + MSVC_TRACE("Argument %zu: '%s' - '%s'\n", paramList.size() - 1, paramList.back().type->GetString().c_str(), reader.GetRaw()); } if (reader.Peek() == 'Z') reader.Consume(); - m_logger->LogDebug("%s: done '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: done '%s'\n", __FUNCTION__, reader.GetRaw()); } @@ -751,6 +818,12 @@ Demangle::NameType Demangle::GetNameType() if (reader.Peek()== '?') { reader.Consume(); + // Check for ??@ (MD5 hashed name) after consuming both ?s + if (reader.Peek() == '@') + { + reader.Consume(); // consume '@' + return NameString; // ReadUntil('@') will get the hash + } return GetNameType(); } else if (reader.Peek() == '$') @@ -773,10 +846,17 @@ Demangle::NameType Demangle::GetNameType() reader.Consume(); return NameReturn; } - else if (reader.PeekString(2) == "_R") + else if (reader.PeekMatch("_R", 2)) { reader.Consume(2); return NameRtti; + } + else if (reader.Peek() >= 'a' && reader.Peek() <= 'z') + { + // Lowercase after ? indicates a non-standard extension name + // (e.g., ??null$initializer$ for thread-safe static init guards) + // All standard MSVC operator codes use uppercase/digits/_ + return NameString; } // else if (reader.PeekString(3) == "__E") // { @@ -811,22 +891,22 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, { case '0': { - if (reader.Peek() != '?') - throw DemangleException(); - reader.Consume(); - bool _const = false, _volatile = false, isMember = false; - auto suffix = DemanglePointerSuffix(); - DemangleModifiers(_const, _volatile, isMember); + uint8_t suffix = 0; + if (reader.Peek() == '?') + { + reader.Consume(); + suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); + DemangleModifiers(_const, _volatile, isMember); + } - QualifiedName name; - //m_logger->Indent(); + NameList name; rtti = DemangleVarType(nameBackrefList, false, name); - //m_logger->Dedent(); rtti.SetConst(_const); rtti.SetVolatile(_volatile); - rtti.SetPointerSuffix(suffix); - out = rtti.GetString() + " `RTTI Type Descriptor' "; + rtti.SetPointerSuffix(PointerSuffixSet(suffix)); + out = rtti.GetString() + " `RTTI Type Descriptor'"; classFunctionType = RttiTypeDescriptor; break; } @@ -838,7 +918,7 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, DemangleNumber(num); if (i > 0) { - out += ","; + out += ", "; } out += to_string(num); } @@ -864,10 +944,13 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); switch (reader.Read()) { case '?': functionType = NoNameType; break; + case '0': functionType = ConstructorNameType; break; + case '1': functionType = ConstructorNameType; out = "~"; break; // destructor + case 'B': functionType = OperatorReturnTypeNameType; out = "operator"; break; // conversion operator case '2': functionType = OperatorNewNameType; break; case '3': functionType = OperatorDeleteNameType; break; case '4': functionType = OperatorAssignNameType; break; @@ -903,7 +986,7 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) case 'Z': functionType = OperatorMinusEqualNameType; break; case '_': { - m_logger->LogDebug(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); switch (reader.Read()) { case '0': functionType = OperatorDivideEqualNameType; break; @@ -942,20 +1025,22 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) case 'W': // Fallthrough case 'Z': functionType = NoNameType; break; case '_': - m_logger->LogDebug(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); switch (reader.Read()) { case 'A': functionType = ManagedVectorConstructorIteratorNameType; break; case 'B': functionType = ManagedVectorDestructorIteratorNameType; break; case 'C': functionType = EHVectorCopyConstructorIteratorNameType; break; case 'D': functionType = EHVectorVBaseConstructorIteratorNameType; break; - case 'E': functionType = DynamicInitializerNameType; break; - case 'F': functionType = DynamicAtExitDestructorNameType; break; + case 'E': functionType = DynamicInitializerNameType; out = "`dynamic initializer'"; break; + case 'F': functionType = DynamicAtExitDestructorNameType; out = "`dynamic atexit destructor'"; break; case 'G': functionType = VectorCopyConstructorIteratorNameType; break; case 'H': functionType = VectorVBaseCopyConstructorIteratorNameType; break; case 'I': functionType = ManagedVectorCopyConstructorIteratorNameType; break; case 'J': functionType = LocalStaticGuardNameType; break; case 'K': functionType = UserDefinedLiteralOperatorNameType; break; + case 'L': functionType = NoNameType; out = "operator co_await"; break; + case 'M': functionType = NoNameType; out = "operator<=>"; break; // spaceship operator default: throw DemangleException("Demangle Lookup Failed"); // fall through } break; @@ -966,16 +1051,16 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) } default: throw DemangleException("Demangle Lookup Failed"); } - out = Type::GetNameTypeString(functionType); + if (out.empty()) + out = Type::GetNameTypeString(functionType); } string Demangle::DemangleTemplateInstantiationName(BackrefList& nameBackrefList) { string out; - BackrefList templateBackref; + MSVC_TRACE("DemangleTemplateInstantiationName: '%s'\n", reader.GetRaw()); reader.Consume(2); - m_logger->LogDebug("DemangleTemplateInstantiationName: '%s'\n", reader.GetRaw()); if (reader.Peek() >= '0' && reader.Peek() <= '9') { out = nameBackrefList.GetStringBackref(reader.Read() - '0'); @@ -989,28 +1074,39 @@ string Demangle::DemangleTemplateInstantiationName(BackrefList& nameBackrefList) } -string Demangle::DemangleTemplateParams(vector& params, BackrefList& nameBackrefList, string& out) +string Demangle::DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList) { - //m_logger->Indent(); - DemangleVariableList(params, nameBackrefList); - //m_logger->Dedent(); - m_logger->LogDebug("VariableList done\n"); - out += "<"; - for (size_t i = 0; i < params.size(); i++) + string out; + vector params; + BNNameType dummyFunctionType = NoNameType; + NameList dummyNameList; + BackrefList outerBackrefs; + MSVC_TRACE("DemangleTemplateInstantiationNameInLocalContext: '%s'\n", reader.GetRaw()); + + std::swap(outerBackrefs.typeList, nameBackrefList.typeList); + std::swap(outerBackrefs.nameList, nameBackrefList.nameList); + std::swap(outerBackrefs.templateList, nameBackrefList.templateList); + + try { - if (i == 0) - { - out += params[i].type->GetString(); - } - else - { - out += "," + params[i].type->GetString(); - } + reader.Consume(2); + out = DemangleUnqualifiedSymbolName(dummyNameList, nameBackrefList, dummyFunctionType); + nameBackrefList.PushStringBackref(out); + DemangleTemplateParams(params, nameBackrefList, out); + } + catch (...) + { + std::swap(outerBackrefs.typeList, nameBackrefList.typeList); + std::swap(outerBackrefs.nameList, nameBackrefList.nameList); + std::swap(outerBackrefs.templateList, nameBackrefList.templateList); + throw; } - if (out[out.size()-1] == '>') - out += " "; //Be c++03 compliant where we can - out += ">"; + std::swap(outerBackrefs.typeList, nameBackrefList.typeList); + std::swap(outerBackrefs.nameList, nameBackrefList.nameList); + std::swap(outerBackrefs.templateList, nameBackrefList.templateList); + + nameBackrefList.PushTemplateSpecialization(out); nameBackrefList.PushStringBackref(out); return out; } @@ -1027,10 +1123,62 @@ string Demangle::DemangleTemplateParams(vector& params, Backr // } -string Demangle::DemangleUnqualifiedSymbolName(QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType) +void Demangle::DemangleTemplateParams(vector& params, BackrefList& nameBackrefList, string& out) +{ + const bool nestedTemplateContext = (m_templateParamDepth > 0); + struct NameBackrefScopeGuard + { + BackrefList& backrefs; + size_t typeCount; + size_t nameCount; + ~NameBackrefScopeGuard() + { + backrefs.typeList.resize(typeCount); + backrefs.nameList.resize(nameCount); + } + }; + struct TemplateDepthGuard + { + size_t& depth; + TemplateDepthGuard(size_t& depth): depth(depth) { depth++; } + ~TemplateDepthGuard() { depth--; } + }; + + { + TemplateDepthGuard depthGuard(m_templateParamDepth); + NameBackrefScopeGuard scopeGuard { + nameBackrefList, + nameBackrefList.typeList.size(), + nameBackrefList.nameList.size() + }; + + DemangleVariableList(params, nameBackrefList); + } + + // Reserve space to reduce reallocation during template string building + out.reserve(out.size() + params.size() * 16 + 2); + out += '<'; + for (size_t i = 0; i < params.size(); i++) + { + if (i > 0) + out += ','; + if (!params[i].type) + continue; + out += params[i].type->GetString(); + } + if (out.back() == '>') + out += ' '; // C++03 compat: >> → > > + out += '>'; + nameBackrefList.PushTemplateSpecialization(out); + if (nestedTemplateContext) + nameBackrefList.PushStringBackref(out); +} + + +string Demangle::DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType) { string out; - if (reader.PeekString(2) == "?$") + if (reader.PeekMatch("?$", 2)) { reader.Consume(2); out = DemangleTemplateInstantiationName(nameBackrefList); @@ -1055,7 +1203,7 @@ string Demangle::DemangleUnqualifiedSymbolName(QualifiedName& nameList, BackrefL TypeBuilder Demangle::DemangleString() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); // ??_C@_@ if (reader.Peek() != '_') { @@ -1068,6 +1216,8 @@ TypeBuilder Demangle::DemangleString() switch (reader.Peek()) { case '1': + case '2': // UTF-16/UTF-32 encoding variants + case '3': isWideChar = true; break; case '0': @@ -1087,7 +1237,7 @@ TypeBuilder Demangle::DemangleString() } uint64_t length = (uint64_t)lengthRaw; - m_logger->LogDebug("%s: Before CRC32 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Before CRC32 '%s'\n", __FUNCTION__, reader.GetRaw()); // CRC32 (ignored) while (reader.Peek() != '@') @@ -1100,13 +1250,15 @@ TypeBuilder Demangle::DemangleString() bool truncated = false; string name = ""; + string literalPrefix; TypeBuilder type; // String bytes if (isWideChar) { - m_logger->LogDebug("%s: Wide string '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Wide string '%s'\n", __FUNCTION__, reader.GetRaw()); string utf8name; + literalPrefix = "L"; truncated = (length > 64); while (reader.Peek() != '@') { @@ -1127,7 +1279,7 @@ TypeBuilder Demangle::DemangleString() } else { - m_logger->LogDebug("%s: Non-wide string '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Non-wide string '%s'\n", __FUNCTION__, reader.GetRaw()); uint64_t numNulls = 0; size_t endNulls = 0; vector chars; @@ -1156,7 +1308,7 @@ TypeBuilder Demangle::DemangleString() // Now time to guess encoding if (chars.size() % 1 != 0) { - m_logger->LogDebug("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); type = Type::ArrayType(Type::IntegerType(1, true), length); } @@ -1164,40 +1316,39 @@ TypeBuilder Demangle::DemangleString() { if (chars.size() % 4 == 0 && numNulls > length * 2 / 3) { - m_logger->LogDebug("%s: Looks like UTF32 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Looks like UTF32 '%s'\n", __FUNCTION__, reader.GetRaw()); string utf8name; for (size_t i = 0; i < chars.size() - endNulls; i += 4) { utf8name += Unicode::UTF32ToUTF8(chars.data() + i); } name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); + literalPrefix = "U"; type = Type::ArrayType(Type::WideCharType(4), length / 4); } else if (numNulls > length / 3) { - m_logger->LogDebug("%s: Looks like UTF16 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Looks like UTF16 '%s'\n", __FUNCTION__, reader.GetRaw()); string utf8name; for (size_t i = 0; i < chars.size() - endNulls; i += 2) { utf8name += Unicode::UTF16ToUTF8(chars.data() + i, 2); } name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); + literalPrefix = "L"; type = Type::ArrayType(Type::WideCharType(2), length / 2); } else { - m_logger->LogDebug("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); type = Type::ArrayType(Type::IntegerType(1, true), length); } } } - if (truncated) - { - name += "..."; - } - m_varName.push_back(name); + m_varName.clear(); + m_varName.push_back(fmt::bnformat("{}\"{}\"{}", literalPrefix, name, truncated ? "..." : "")); return type; } @@ -1211,9 +1362,9 @@ TypeBuilder Demangle::DemangleTypeInfoName() bool isMember = false; DemangleModifiers(_const, _volatile, isMember); - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - QualifiedName name; + NameList name; TypeBuilder type = DemangleVarType(m_backrefList, false, name); type.SetConst(_const); type.SetVolatile(_volatile); @@ -1221,79 +1372,299 @@ TypeBuilder Demangle::DemangleTypeInfoName() switch (type.GetClass()) { case NamedTypeReferenceClass: - m_varName = type.GetNamedTypeReference()->GetName(); + { + m_varName = type.GetNamedTypeReference()->GetName().GetContents().GetStrings(); return type; + } default: throw DemangleException("Unexpected type of RTTI Type Name"); } } -void Demangle::DemangleName(QualifiedName& nameList, +void Demangle::DemangleName(NameList& nameList, BNNameType& classFunctionType, - BackrefList& nameBackrefList) + BackrefList& nameBackrefList, + bool typeNameContext) { + size_t nameListSizeAtEntry = nameList.size(); + auto tryDemangleEscapedLookupScopeName = [&]() -> bool + { + if (nameList.size() <= nameListSizeAtEntry) + return false; + + const char* start = reader.GetRaw(); + if (reader.Length() < 4) + return false; + + char prefix = start[0]; + if (!((prefix >= 'A' && prefix <= 'Z') || (prefix == '_'))) + return false; + if (start[1] == '@' || start[1] == '?') + return false; + + const char* limit = start + reader.Length(); + const char* end = nullptr; + for (const char* cur = start + 1; (cur + 1) < limit; cur++) + { + if ((cur[0] == '@') && (cur[1] == '@')) + { + end = cur; + break; + } + } + if (!end) + return false; + + vector escapedNames; + const char* componentStart = start; + while (componentStart < end) + { + const char* componentEnd = componentStart; + while ((componentEnd < end) && (*componentEnd != '@')) + { + char ch = *componentEnd; + if (ch == '?') + return false; + if (!((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z') || (ch == '_') || (ch == '$'))) + { + return false; + } + componentEnd++; + } + if (componentEnd == componentStart) + return false; + + escapedNames.emplace_back(componentStart, componentEnd - componentStart); + componentStart = componentEnd + 1; + } + + for (const auto& escapedName: escapedNames) + { + nameList.insert(nameList.begin(), escapedName); + nameBackrefList.PushStringBackref(escapedName); + } + reader.SetRaw(end + 2); + return true; + }; + string out; BNNameType functionType; BNNameType dummyFunctionType; vector params; while(1) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); switch (GetNameType()) { case NameString: - m_logger->LogDebug("Demangle String\n"); + MSVC_TRACE("Demangle String\n"); DemangleNameTypeString(out); nameList.insert(nameList.begin(), out); - m_logger->LogDebug("Pushing backref NameString %s", out.c_str()); + MSVC_TRACE("Pushing backref NameString %s", out.c_str()); nameBackrefList.PushStringBackref(out); - m_logger->LogDebug("nameList.front(): %s\n", nameList.front().c_str()); - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("nameList.front(): %s\n", nameList.front().c_str()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); break; case NameLookup: - m_logger->LogDebug("Demangle Lookup\n"); + { + MSVC_TRACE("Demangle Lookup\n"); + if (tryDemangleEscapedLookupScopeName()) + break; DemangleTypeNameLookup(out, functionType); classFunctionType = functionType; nameList.insert(nameList.begin(), out); + // Check if this is a scope specifier. Scope specifiers are ? + // followed by either @?? or directly ?? (for digit scopes like ?3??func@...) + // When nameList has prior components, the operator name is actually a scope index + // Also handle dynamic init/dtor wrapping ??@ (MD5 hash) + if (reader.Length() >= 4 && reader.PeekMatch("??@", 3)) + { + reader.Consume(3); // consume ??@ + _STD_STRING hash = reader.ReadUntil('@'); + nameList.insert(nameList.begin(), "??@" + hash + "@"); + // Consume the trailing @ (name terminator) — the ??@hash@ pattern + // is followed by @@ (end of scoped name) before the function type + if (reader.Length() > 0 && reader.Peek() == '@') + reader.Consume(); + break; + } + bool isScope = false; + if (nameList.size() > nameListSizeAtEntry + 1) + { + if (reader.Length() >= 3 && reader.Peek() == '@' + && reader.PeekAt(1) == '?' && reader.PeekAt(2) == '?') + { + reader.Consume(); // consume '@' separator + isScope = true; + } + else if (reader.Length() >= 2 && reader.Peek() == '?' + && reader.PeekAt(1) == '?') + { + // No '@' separator — digit scope like ?3??func + isScope = true; + } + else if (reader.Length() >= 4 && reader.Peek() >= 'A' && reader.Peek() <= 'Z' + && reader.PeekAt(1) == '@' && reader.PeekAt(2) == '?' + && reader.PeekAt(3) == '?') + { + // Two-char letter scope like ?EL@??func — consume second char + '@' + reader.Consume(2); // consume and '@' + isScope = true; + } + } + if (isScope) + { + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); + if (reader.Length() > 0 && reader.Peek() != '@') + { + const char* saved = reader.GetRaw(); + try + { + char ft = reader.Read(); + if (ft >= 'A' && ft <= 'Z') + DemangleFunction(NoNameType, + ft != 'C' && ft != 'D' && ft != 'K' && ft != 'L' && + ft != 'S' && ft != 'T' && ft != 'Y' && ft != 'Z', + m_backrefList); + } + catch (...) + { + reader.SetRaw(saved); + while (reader.Length() > 0) + { + char c = reader.Read(); + if (c == 'Z' && reader.Length() > 0 && reader.Peek() == '@') + break; + } + } + } + } break; + } case NameBackref: - m_logger->LogDebug("Demangle Backref"); + MSVC_TRACE("Demangle Backref"); out = nameBackrefList.GetStringBackref(reader.Read() - '0'); - m_logger->LogDebug("Demangle Backref: %s", out.c_str()); + MSVC_TRACE("Demangle Backref: %s", out.c_str()); nameList.insert(nameList.begin(), out); break; case NameTemplate: { - m_logger->LogDebug("Demangle Template: '%s'\n", reader.GetRaw()); - BackrefList templateBackref; - out = DemangleUnqualifiedSymbolName(nameList, templateBackref, functionType); - m_logger->LogDebug("Pushing backref NameTemplate %s", out.c_str()); - templateBackref.PushStringBackref(out); - m_logger->LogDebug("Demangling Template variables %s\n", reader.GetRaw()); - DemangleTemplateParams(params, templateBackref, out); + MSVC_TRACE("Demangle Template: '%s'\n", reader.GetRaw()); + if (m_templateParamDepth > 0) + { + const char* saved = reader.GetRaw(); + reader.SetRaw(saved - 2); + out = DemangleTemplateInstantiationNameInLocalContext(nameBackrefList); + } + else + { + out = DemangleUnqualifiedSymbolName(nameList, nameBackrefList, functionType); + if (functionType == NoNameType) + { + MSVC_TRACE("Pushing backref NameTemplate %s", out.c_str()); + nameBackrefList.PushStringBackref(out); + } + MSVC_TRACE("Demangling Template variables %s\n", reader.GetRaw()); + if (typeNameContext) + { + BackrefList localTemplateBackrefs; + localTemplateBackrefs.typeList = nameBackrefList.typeList; + localTemplateBackrefs.templateList = nameBackrefList.templateList; + localTemplateBackrefs.PushStringBackref(out); + DemangleTemplateParams(params, localTemplateBackrefs, out); + for (const auto& specialization: localTemplateBackrefs.templateList) + nameBackrefList.PushTemplateSpecialization(specialization); + } + else + { + DemangleTemplateParams(params, nameBackrefList, out); + } + } nameList.insert(nameList.begin(), out); - nameBackrefList.PushStringBackref(out); break; } case NameConstructor: - m_logger->LogDebug("NameConstructor\n"); - classFunctionType = ConstructorNameType; - DemangleName(nameList, dummyFunctionType, nameBackrefList); + { + MSVC_TRACE("NameConstructor\n"); + bool isScope = (nameList.size() > nameListSizeAtEntry); + if (!isScope) + classFunctionType = ConstructorNameType; + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); + if (isScope) + { + if (reader.Length() > 0 && reader.Peek() != '@') + { + const char* saved = reader.GetRaw(); + try + { + char ft = reader.Read(); + if (ft >= 'A' && ft <= 'Z') + DemangleFunction(NoNameType, + ft != 'C' && ft != 'D' && ft != 'K' && ft != 'L' && + ft != 'S' && ft != 'T' && ft != 'Y' && ft != 'Z', + m_backrefList); + } + catch (...) + { + reader.SetRaw(saved); + while (reader.Length() > 0) + { + char c = reader.Read(); + if (c == 'Z' && reader.Length() > 0 && reader.Peek() == '@') + break; + } + } + } + break; + } if (nameList.size() == 0) throw DemangleException(); nameList.push_back(nameList[nameList.size()-1]); return; + } case NameDestructor: - classFunctionType = ConstructorNameType; - m_logger->LogDebug("NameDestructor\n"); - DemangleName(nameList, dummyFunctionType, nameBackrefList); + { + MSVC_TRACE("NameDestructor\n"); + bool isScope = (nameList.size() > nameListSizeAtEntry); + if (!isScope) + classFunctionType = ConstructorNameType; + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); + if (isScope) + { + if (reader.Length() > 0 && reader.Peek() != '@') + { + const char* saved = reader.GetRaw(); + try + { + char ft = reader.Read(); + if (ft >= 'A' && ft <= 'Z') + DemangleFunction(NoNameType, + ft != 'C' && ft != 'D' && ft != 'K' && ft != 'L' && + ft != 'S' && ft != 'T' && ft != 'Y' && ft != 'Z', + m_backrefList); + } + catch (...) + { + reader.SetRaw(saved); + while (reader.Length() > 0) + { + char c = reader.Read(); + if (c == 'Z' && reader.Length() > 0 && reader.Peek() == '@') + break; + } + } + } + break; + } if (nameList.size() == 0) throw DemangleException(); nameList.push_back("~" + nameList[nameList.size()-1]); return; + } case NameRtti: - m_logger->LogDebug("NameRtti\n"); + MSVC_TRACE("NameRtti\n"); DemangleNameTypeRtti(classFunctionType, nameBackrefList, out); nameList.insert(nameList.begin(), out); break; @@ -1306,11 +1677,57 @@ void Demangle::DemangleName(QualifiedName& nameList, // DemangleInitFiniStub(false); // break; case NameReturn: - m_logger->LogDebug("NameReturn\n"); + { + MSVC_TRACE("NameReturn\n"); + // Check if this is actually a multi-char scope specifier (?B@??) + // rather than a conversion operator. In scope context, ?B followed by + // a name component and then @?? indicates a scope index, not operator B. + if (nameList.size() > nameListSizeAtEntry && reader.Length() >= 1) + { + // Read the "conversion type" which is really a scope suffix + _STD_STRING scopeName; + DemangleNameTypeString(scopeName); + nameBackrefList.PushStringBackref(scopeName); + nameList.insert(nameList.begin(), scopeName); + // Check if this is a scope followed by ?? + if (reader.Length() >= 2 && reader.Peek() == '?' && reader.PeekAt(1) == '?') + { + // Scope specifier — parse the scoped function + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); + if (reader.Length() > 0 && reader.Peek() != '@') + { + const char* saved = reader.GetRaw(); + try + { + char ft = reader.Read(); + if (ft >= 'A' && ft <= 'Z') + DemangleFunction(NoNameType, + ft != 'C' && ft != 'D' && ft != 'K' && ft != 'L' && + ft != 'S' && ft != 'T' && ft != 'Y' && ft != 'Z', + m_backrefList); + } + catch (...) + { + reader.SetRaw(saved); + while (reader.Length() > 0) + { + char c = reader.Read(); + if (c == 'Z' && reader.Length() > 0 && reader.Peek() == '@') + break; + } + } + } + break; + } + break; + } classFunctionType = OperatorReturnTypeNameType; - if (reader.PeekString(2) == "?$") + if (reader.PeekMatch("?$", 2)) { - out = DemangleTemplateInstantiationName(nameBackrefList); + if (m_templateParamDepth > 0) + out = DemangleTemplateInstantiationNameInLocalContext(nameBackrefList); + else + out = DemangleTemplateInstantiationName(nameBackrefList); DemangleTemplateParams(params, nameBackrefList, out); } else @@ -1320,10 +1737,11 @@ void Demangle::DemangleName(QualifiedName& nameList, } nameList.insert(nameList.begin(), out); break; + } default: throw DemangleException(); } - if (nameList.StringSize() > MAX_DEMANGLE_LENGTH) + if (NameListStringSize(nameList) > MAX_DEMANGLE_LENGTH) throw DemangleException(); if (reader.Peek() == '@') { @@ -1333,45 +1751,11 @@ void Demangle::DemangleName(QualifiedName& nameList, } } -Ref Demangle::GetCallingConventionForType(BNCallingConventionName ccName) -{ - string name; - switch (ccName) - { - case NoCallingConvention: name = ""; break; - case CdeclCallingConvention: name = "cdecl"; break; - case PascalCallingConvention: name = "pascal"; break; - case ThisCallCallingConvention: name = "thiscall"; break; - case STDCallCallingConvention: name = "stdcall"; break; - case FastcallCallingConvention: name = "fastcall"; break; - case CLRCallCallingConvention: name = "clrcall"; break; - case EabiCallCallingConvention: name = "eabi"; break; - case VectorCallCallingConvention: name = "vectorcall"; break; - case SwiftCallingConvention: name = "swiftcall"; break; - case SwiftAsyncCallingConvention: name = "swiftasync"; break; - default: break; - } - - if (m_platform) - { - for (const auto& cc : m_platform->GetCallingConventions()) - { - if (cc->GetName() == name) - return cc; - } - } - for (const auto& cc : m_arch->GetCallingConventions()) - { - if (cc->GetName() == name) - return cc; - } - return nullptr; -} BNCallingConventionName Demangle::DemangleCallingConvention() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); switch (reader.Read()) { case 'A': //Exported function @@ -1397,10 +1781,18 @@ BNCallingConventionName Demangle::DemangleCallingConvention() } } -set Demangle::DemanglePointerSuffix() + +void Demangle::ConsumeExtendedModifierPrefix() +{ + while (reader.PeekMatch("$A", 2)) + reader.Consume(2); +} + + +uint8_t Demangle::DemanglePointerSuffix() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - set suffix; + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + uint8_t suffix = 0; if (reader.Peek() == '@') return suffix; @@ -1408,15 +1800,15 @@ set Demangle::DemanglePointerSuffix() for (int i = 0; i < 5; i++, elm = reader.Peek()) { if (elm == 'E') - suffix.insert(suffix.end(), Ptr64Suffix); + suffix |= (1u << Ptr64Suffix); else if (elm == 'F') - suffix.insert(suffix.end(), UnalignedSuffix); + suffix |= (1u << UnalignedSuffix); else if (elm == 'G') - suffix.insert(suffix.end(), ReferenceSuffix); + suffix |= (1u << ReferenceSuffix); else if (elm == 'H') - suffix.insert(suffix.end(), LvalueSuffix); + suffix |= (1u << LvalueSuffix); else if (elm == 'I') - suffix.insert(suffix.end(), RestrictSuffix); + suffix |= (1u << RestrictSuffix); else break; reader.Consume(1); @@ -1426,7 +1818,7 @@ set Demangle::DemanglePointerSuffix() void Demangle::DemangleModifiers(bool& _const, bool& _volatile, bool &isMember) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); if (reader.Peek() == '@') return; @@ -1494,9 +1886,9 @@ void Demangle::DemangleModifiers(bool& _const, bool& _volatile, bool &isMember) TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& nameBackrefList, int funcClass) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - set suffix; + uint8_t suffix = 0; TypeBuilder returnType; BNCallingConventionName cc; @@ -1534,6 +1926,7 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe if (pointerSuffix) { suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); DemangleModifiers(_const, _volatile, isMember); } if (reader.Peek() == '?') @@ -1545,13 +1938,13 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe //No return type shouldHaveReturnType = false; reader.Consume(); - m_logger->LogDebug("Function has no return type %s", reader.GetRaw()); + MSVC_TRACE("Function has no return type %s", reader.GetRaw()); } else { //Demangle function return type bool return_const = false, return_volatile = false, isMember = false; - set return_suffix; + uint8_t return_suffix = 0; bool hasModifiers = false; //Check for modifiers before return type if (reader.Peek() == '?') @@ -1562,23 +1955,21 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe hasModifiers = true; } - QualifiedName name; - m_logger->LogDebug("Demangle function return type %s", reader.GetRaw()); - //m_logger->Indent(); + NameList name; + MSVC_TRACE("Demangle function return type %s", reader.GetRaw()); returnType = DemangleVarType(nameBackrefList, true, name); - m_logger->LogDebug("Return type: %s", returnType.GetString().c_str()); - //m_logger->Dedent(); + MSVC_TRACE("Return type: %s", returnType.GetString().c_str()); if (hasModifiers) { returnType.SetConst(return_const); returnType.SetVolatile(return_volatile); - returnType.SetPointerSuffix(return_suffix); + returnType.SetPointerSuffix(PointerSuffixSet(return_suffix)); } } if (reader.Peek() == '@') reader.Consume(); - m_logger->LogDebug("\tDemangle Function Parameters %s", reader.GetRaw()); + MSVC_TRACE("\tDemangle Function Parameters %s", reader.GetRaw()); vector params; bool needsThisPtr = false; if (cc == ThisCallCallingConvention) @@ -1603,19 +1994,17 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe if (needsThisPtr) { // Insert implicit "this" parameter for thiscall - // TODO: Replace this with calling convention / platform callbacks to insert thisptr (ask rss) - QualifiedName thisName = m_varName; - if (thisName.size() > 0) - thisName.erase(thisName.end() - 1); - params.push_back(FunctionParameter("this", Type::PointerType(m_arch, Type::NamedType(thisName, Type::VoidType())), true, {})); + NameList thisName = m_varName; + if (classFunctionType != OperatorReturnTypeNameType && !thisName.empty()) + thisName.pop_back(); + params.push_back(FunctionParameter("this", Type::PointerType(m_arch, Type::NamedType(ToQualifiedName(thisName), Type::VoidType())), true, {})); } DemangleVariableList(params, m_backrefList); - if (params.size() >= 1 && params.back().type->GetClass() == VoidTypeClass) + if (params.size() >= 1 && params.back().type.GetValue() && params.back().type->GetClass() == VoidTypeClass) params.pop_back(); - // TODO: fix calling convention Ref returnTypeObj; if (shouldHaveReturnType) returnTypeObj = returnType.Finalize(); @@ -1624,71 +2013,66 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe TypeBuilder newType = TypeBuilder::FunctionType(returnTypeObj, nullptr, params); newType.SetConst(_const); newType.SetVolatile(_volatile); - newType.SetPointerSuffix(suffix); + newType.SetPointerSuffix(PointerSuffixSet(suffix)); newType.SetNameType(classFunctionType); newType.SetCallingConventionName(cc); - auto convention = GetCallingConventionForType(cc); - if (convention) - newType.SetCallingConvention(convention); - m_logger->LogDebug("Successfully Created Function Type!\n"); + MSVC_TRACE("Successfully Created Function Type!\n"); return newType; } TypeBuilder Demangle::DemangleData() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - QualifiedName name; - //m_logger->Indent(); + NameList name; TypeBuilder newType = DemangleVarType(m_backrefList, false, name); - //m_logger->Dedent(); auto suffix = DemanglePointerSuffix(); DemangleModifiers(_const, _volatile, isMember); - newType.SetConst(_const); - newType.SetVolatile(_volatile); - newType.SetPointerSuffix(suffix); + if (newType.GetClass() != PointerTypeClass) + { + newType.SetConst(_const); + newType.SetVolatile(_volatile); + newType.SetPointerSuffix(PointerSuffixSet(suffix)); + } return newType; } TypeBuilder Demangle::DemanagleRTTI(BNNameType nameType) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; if (reader.Length() > 0) DemangleModifiers(_const, _volatile, isMember); - QualifiedName typeName = m_varName; - m_logger->LogDebug("new struct type\n"); - TypeBuilder newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + NameList typeName = m_varName; + MSVC_TRACE("new struct type\n"); + TypeBuilder newType = MakeNamedType(StructNamedTypeClass, typeName); newType.SetNameType(nameType); newType.SetConst(_const); newType.SetVolatile(_volatile); - m_logger->LogDebug("log: %s\n", newType.GetString().c_str()); + MSVC_TRACE("log: %s\n", newType.GetString().c_str()); return newType; } TypeBuilder Demangle::DemangleVTable() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; DemangleModifiers(_const, _volatile, isMember); - TypeBuilder newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, m_varName)); + TypeBuilder newType = MakeNamedType(StructNamedTypeClass, m_varName); if (reader.Peek() != '@') { - QualifiedName typeName; + NameList typeName; BNNameType classFunctionType = NoNameType; - DemangleName(typeName, classFunctionType, m_backrefList); + DemangleName(typeName, classFunctionType, m_backrefList, true); string suffix = m_varName.back(); - m_varName.back() += "{for `" + typeName.GetString() + "'}"; + m_varName.back() += "{for `" + JoinNameList(typeName) + "'}"; typeName.push_back(suffix); - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + newType = MakeNamedType(StructNamedTypeClass, typeName); } newType.SetConst(_const); newType.SetVolatile(_volatile); @@ -1700,10 +2084,9 @@ TypeBuilder Demangle::DemangleVTable() Demangle::DemangleContext Demangle::DemangleSymbol() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - //m_logger->Indent(); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); BNNameType classFunctionType = NoNameType; - QualifiedName varName; + NameList varName; if (reader.Peek() == '.') { @@ -1717,10 +2100,37 @@ Demangle::DemangleContext Demangle::DemangleSymbol() throw DemangleException(); } + // MD5-hashed names: ??@<32hex>@ + if (reader.Length() >= 2 && reader.PeekMatch("?@", 2)) + { + reader.Consume(2); // consume ?@ + _STD_STRING hash = reader.ReadUntil('@'); + m_varName.push_back("??@" + hash + "@"); + return { TypeBuilder::VoidType(), NoAccess, NoScope }; + } + DemangleName(varName, classFunctionType, m_backrefList); - m_logger->LogDebug("Done demangling Name: '%s' - '%s'", varName.GetString().c_str(), reader.GetRaw()); + MSVC_TRACE("Done demangling Name: '%s' - '%s'", JoinNameList(varName).c_str(), reader.GetRaw()); m_varName = varName; + if ((classFunctionType == DynamicInitializerNameType || classFunctionType == DynamicAtExitDestructorNameType) + && m_varName.size() >= 2) + { + string target; + for (size_t i = 0; i + 1 < m_varName.size(); i++) + { + if (!target.empty()) + target += "::"; + target += m_varName[i]; + } + + string displayName = m_varName.back(); + if (!displayName.empty() && displayName.back() == '\'') + displayName.pop_back(); + displayName = fmt::bnformat("{} for '{}''", displayName, target); + m_varName = {displayName}; + } + DemangleContext context; if (classFunctionType == StringNameType) @@ -1770,6 +2180,21 @@ Demangle::DemangleContext Demangle::DemangleSymbol() case 'Z': context = {DemangleFunction(classFunctionType, false, m_backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; case '$': { + if (reader.Peek() == 'B') + { + // Vcall thunk: $B + reader.Consume(); + int64_t offset; + DemangleNumber(offset); + m_varName.back() = "`vcall'{" + to_string(offset) + ", {flat}}'"; + // Consume calling convention char + this-type flag char + if (reader.Length() >= 1) + reader.Consume(); // calling convention (A=cdecl, etc.) + if (reader.Length() >= 1 && reader.Peek() != '@') + reader.Consume(); // this-type flag + context = {TypeBuilder::VoidType(), NoAccess, NoScope}; + break; + } int funcClass = VirtualThunkFunctionClass; if (reader.Peek() == 'R') { @@ -1820,7 +2245,8 @@ bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, return false; try { + // Can't use thread_local here — BinaryView overload needs platform/view state Demangle demangle(view, mangledName); // For now we're throwing away MemberScope and MemberAccess outType = demangle.DemangleSymbol().type.Finalize(); diff --git a/demangler/msvc/demangle_msvc.h b/demangler/msvc/demangle_msvc.h index c2eeb79f6..c5bced449 100644 --- a/demangler/msvc/demangle_msvc.h +++ b/demangler/msvc/demangle_msvc.h @@ -84,17 +84,62 @@ class Demangle class Reader { public: - Reader(_STD_STRING data); - _STD_STRING PeekString(size_t count=1); - char Peek(); - const char* GetRaw(); - char Read(); - _STD_STRING ReadString(size_t count=1); + Reader(const _STD_STRING& data) : m_ptr(data.c_str()), m_end(data.c_str() + data.size()) + { + for (const char* p = m_ptr; p < m_end; p++) + if (*p < 0x20 || *p > 0x7e) + throw DemangleException(); + } + void Reset(const _STD_STRING& data) + { + m_ptr = data.c_str(); + m_end = data.c_str() + data.size(); + for (const char* p = m_ptr; p < m_end; p++) + if (*p < 0x20 || *p > 0x7e) + throw DemangleException(); + } + bool PeekMatch(const char* str, size_t len) const + { + if (len > Length()) + throw DemangleException(); + if (len == 2) + return m_ptr[0] == str[0] && m_ptr[1] == str[1]; + if (len == 3) + return m_ptr[0] == str[0] && m_ptr[1] == str[1] && m_ptr[2] == str[2]; + return memcmp(m_ptr, str, len) == 0; + } + char PeekAt(size_t offset) const + { + if (m_ptr + offset >= m_end) + throw DemangleException(); + return m_ptr[offset]; + } + char Peek() const + { + if (m_ptr >= m_end) + throw DemangleException(); + return *m_ptr; + } + const char* GetRaw() const { return m_ptr; } + void SetRaw(const char* p) { m_ptr = p; } + char Read() + { + if (m_ptr >= m_end) + throw DemangleException(); + return *m_ptr++; + } + void Consume(size_t count = 1) + { + if (m_ptr + count > m_end) + throw DemangleException(); + m_ptr += count; + } + size_t Length() const { return (size_t)(m_end - m_ptr); } + _STD_STRING ReadString(size_t count); _STD_STRING ReadUntil(char sentinal); - void Consume(size_t count=1); - size_t Length(); private: - _STD_STRING m_data; + const char* m_ptr; + const char* m_end; }; class BackrefList @@ -102,28 +147,83 @@ class Demangle public: _STD_VECTOR typeList; _STD_VECTOR<_STD_STRING> nameList; + _STD_VECTOR<_STD_STRING> templateList; + void Clear() { typeList.clear(); nameList.clear(); templateList.clear(); } const BN::TypeBuilder& GetTypeBackref(size_t reference); - _STD_STRING GetStringBackref(size_t reference); + const _STD_STRING& GetStringBackref(size_t reference); void PushTypeBackref(BN::TypeBuilder t); - void PushStringBackref(_STD_STRING& s); - void PushFrontStringBackref(_STD_STRING& s); + void PushStringBackref(const _STD_STRING& s); + void PushTemplateSpecialization(const _STD_STRING& s); }; + // Internal name list type - avoids QualifiedName overhead during parsing + typedef _STD_VECTOR<_STD_STRING> NameList; + + static size_t NameListStringSize(const NameList& nl) + { + size_t total = 0; + for (const auto& s : nl) + total += s.size(); + if (nl.size() > 1) + total += (nl.size() - 1) * 2; // "::" separators + return total; + } + + static _STD_STRING JoinNameList(const NameList& nl) + { + if (nl.empty()) return {}; + if (nl.size() == 1) return nl[0]; + _STD_STRING out; + out.reserve(NameListStringSize(nl)); + out = nl[0]; + for (size_t i = 1; i < nl.size(); i++) + { + out += ':'; + out += ':'; + out += nl[i]; + } + return out; + } + + static BN::QualifiedName ToQualifiedName(const NameList& nl) + { + return BN::QualifiedName(nl); + } + + static _STD_SET PointerSuffixSet(uint8_t suffixBits) + { + _STD_SET suffix; + for (int i = 0; i < 8; i++) + { + if (suffixBits & (1u << i)) + suffix.insert((BNPointerSuffix)i); + } + return suffix; + } + + static BN::TypeBuilder MakeNamedType(BNNamedTypeReferenceClass cls, const NameList& nl, size_t width = 0, size_t align = 0) + { + return BN::TypeBuilder::NamedType( + BN::NamedTypeReference::GenerateAutoDemangledTypeReference(cls, ToQualifiedName(nl)), width, align); + } + + _STD_STRING m_mangledName; // Owns the string; Reader points into it Reader reader; BackrefList m_backrefList; BN::Architecture* m_arch; BN::Ref m_platform; BN::Ref m_view; - BN::QualifiedName m_varName; - BN::Ref m_logger; + NameList m_varName; + size_t m_templateParamDepth = 0; NameType GetNameType(); - BN::TypeBuilder DemangleVarType(BackrefList& varList, bool isReturn, BN::QualifiedName& name); + void RewriteTemplateBackrefName(NameList& typeName, const BackrefList& nameBackrefList) const; + BN::TypeBuilder DemangleVarType(BackrefList& varList, bool isReturn, NameList& name); void DemangleNumber(int64_t& num); void DemangleChar(char& ch); void DemangleWideChar(uint16_t& wch); void DemangleModifiers(bool& _const, bool& _volatile, bool& isMember); - _STD_SET DemanglePointerSuffix(); + uint8_t DemanglePointerSuffix(); void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList); void DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, @@ -131,12 +231,12 @@ class Demangle _STD_STRING& rttiTypeName); void DemangleTypeNameLookup(_STD_STRING& out, BNNameType& functionType); void DemangleNameTypeString(_STD_STRING& out); - void DemangleNameTypeBackref(_STD_STRING& out, const _STD_VECTOR<_STD_STRING>& backrefList); - void DemangleName(BN::QualifiedName& nameList, + void DemangleName(NameList& nameList, BNNameType& classFunctionType, - BackrefList& nameBackrefList); - BN::Ref GetCallingConventionForType(BNCallingConventionName ccName); + BackrefList& nameBackrefList, + bool typeNameContext = false); BNCallingConventionName DemangleCallingConvention(); + void ConsumeExtendedModifierPrefix(); BN::TypeBuilder DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, int funcClass = NoneFunctionClass); BN::TypeBuilder DemangleData(); void DemangleNameTypeRtti(BNNameType& classFunctionType, @@ -144,9 +244,10 @@ class Demangle _STD_STRING& out); BN::TypeBuilder DemangleVTable(); BN::TypeBuilder DemanagleRTTI(BNNameType classFunctionType); + _STD_STRING DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList); _STD_STRING DemangleTemplateInstantiationName(BackrefList& nameBackrefList); - _STD_STRING DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, _STD_STRING& out); - _STD_STRING DemangleUnqualifiedSymbolName(BN::QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType); + void DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, _STD_STRING& out); + _STD_STRING DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType); BN::TypeBuilder DemangleString(); BN::TypeBuilder DemangleTypeInfoName(); @@ -157,11 +258,12 @@ class Demangle BNMemberAccess access; BNMemberScope scope; }; - Demangle(BN::Architecture* arch, _STD_STRING mangledName); - Demangle(BN::Ref view, _STD_STRING mangledName); - Demangle(BN::Ref platform, _STD_STRING mangledName); + Demangle(BN::Architecture* arch, const _STD_STRING& mangledName); + Demangle(BN::Ref view, const _STD_STRING& mangledName); + Demangle(BN::Ref platform, const _STD_STRING& mangledName); + void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); DemangleContext DemangleSymbol(); - BN::QualifiedName GetVarName() const { return m_varName; } + BN::QualifiedName GetVarName() const { return BN::QualifiedName(m_varName); } // Be careful not to accidentally implicitly cast a BinaryView* to a bool static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, @@ -176,4 +278,3 @@ class Demangle static bool DemangleMS(const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, BN::BinaryView* view); }; - From 0a5390f4878b3664deb0f11d3c514ebf52827579 Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Fri, 17 Apr 2026 08:48:40 -0400 Subject: [PATCH 2/4] Optimize MSVC demangler and DemangledTypeNode performance --- demangler/gnu3/demangle_gnu3.cpp | 10 +- demangler/gnu3/demangled_type_node.cpp | 282 ++++++++++++++++++------- demangler/gnu3/demangled_type_node.h | 75 ++++--- demangler/msvc/CMakeLists.txt | 4 +- demangler/msvc/demangle_msvc.cpp | 268 +++++++++++------------ demangler/msvc/demangle_msvc.h | 54 ++--- 6 files changed, 392 insertions(+), 301 deletions(-) diff --git a/demangler/gnu3/demangle_gnu3.cpp b/demangler/gnu3/demangle_gnu3.cpp index f2130a4e1..a1cab5e88 100644 --- a/demangler/gnu3/demangle_gnu3.cpp +++ b/demangler/gnu3/demangle_gnu3.cpp @@ -602,7 +602,7 @@ DemangledTypeNode DemangleGNU3::DemangleType() if (vltl) type.SetVolatile(true); if (rstrct) - type.SetPointerSuffix({RestrictSuffix}); + type.SetPointerSuffixBits(1u << RestrictSuffix); PushType(type); return type; } @@ -2210,7 +2210,7 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() type.SetTypeName(std::move(newName)); type.SetConst(tmpType.IsConst()); type.SetVolatile(tmpType.IsVolatile()); - type.SetPointerSuffix(tmpType.GetPointerSuffix()); + type.SetPointerSuffixBits(tmpType.GetPointerSuffixBits()); } else { @@ -2735,7 +2735,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) varName = type.GetTypeName(); cnst = type.IsConst(); vltl = type.IsVolatile(); - auto suffix = type.GetPointerSuffix(); + auto suffix = type.GetPointerSuffixBits(); if (m_reader.Peek() == 'J') { m_reader.Consume(); @@ -2828,11 +2828,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) if (isReturnTypeUnknown) type.SetReturnTypeConfidence(BN_MINIMUM_CONFIDENCE); - type.SetPointerSuffix(suffix); + type.SetPointerSuffixBits(suffix); type.SetConst(cnst); type.SetVolatile(vltl); if (rstrct) - type.SetPointerSuffix({RestrictSuffix}); + type.SetPointerSuffixBits(1u << RestrictSuffix); // PrintTables(); MyLogDebug("Done: %s%s%s\n", type.GetStringBeforeName().c_str(), varName.GetString().c_str(), diff --git a/demangler/gnu3/demangled_type_node.cpp b/demangler/gnu3/demangled_type_node.cpp index 89bfb2fad..f3f744783 100644 --- a/demangler/gnu3/demangled_type_node.cpp +++ b/demangler/gnu3/demangled_type_node.cpp @@ -23,14 +23,30 @@ using namespace BinaryNinja; using namespace std; #endif +static const char* CallingConventionString[] = +{ + "", + "__cdecl", + "__pascal", + "__thiscall", + "__stdcall", + "__fastcall", + "__clrcall", + "__eabi", + "__vectorcall", + "__swiftcall", + "__swiftasync" +}; + DemangledTypeNode::DemangledTypeNode() - : m_typeClass(VoidTypeClass), m_width(0), m_alignment(0), - m_const(false), m_volatile(false), m_signed(false), - m_hasTemplateArgs(false), m_nameType(NoNameType), - m_ntrClass(UnknownNamedTypeClass), - m_pointerReference(PointerReferenceType), m_elements(0), - m_returnTypeConfidence(BN_DEFAULT_CONFIDENCE) + : m_typeClass(VoidTypeClass), m_ntrClass(UnknownNamedTypeClass), + m_pointerReference(PointerReferenceType), m_nameType(NoNameType), + m_callingConventionName(NoCallingConvention), m_pointerSuffixBits(0), + m_returnTypeConfidence(BN_DEFAULT_CONFIDENCE), + m_const(false), m_volatile(false), m_signed(false), m_hasVariableArgs(false), + m_hasTemplateArgs(false), m_width(0), m_alignment(0), + m_elements(0) { } @@ -73,6 +89,25 @@ DemangledTypeNode DemangledTypeNode::FloatType(size_t width, const string& altNa } +DemangledTypeNode DemangledTypeNode::WideCharType(size_t width, const string& altName) +{ + DemangledTypeNode n; + n.m_typeClass = WideCharTypeClass; + n.m_width = width; + n.m_altName = altName; + return n; +} + + +DemangledTypeNode DemangledTypeNode::ValueType(const string& value) +{ + DemangledTypeNode n; + n.m_typeClass = ValueTypeClass; + n.m_altName = value; + return n; +} + + DemangledTypeNode DemangledTypeNode::VarArgsType() { DemangledTypeNode n; @@ -122,7 +157,7 @@ DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, DemangledTypeNode n; n.m_typeClass = NamedTypeReferenceClass; n.m_ntrClass = cls; - n.m_nameSegments = std::make_shared>(std::move(nameSegments)); + n.m_nameSegments = std::move(nameSegments); n.m_width = width; n.m_alignment = align; return n; @@ -139,7 +174,7 @@ DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, vector nameSegments) { m_ntrClass = cls; - m_nameSegments = std::make_shared>(std::move(nameSegments)); + m_nameSegments = std::move(nameSegments); } @@ -151,9 +186,7 @@ void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, const QualifiedNam string DemangledTypeNode::GetTypeNameString() const { - if (!m_nameSegments) - return {}; - const auto& segs = *m_nameSegments; + const auto& segs = m_nameSegments; size_t n = segs.size(); if (n == 0) return {}; @@ -179,10 +212,8 @@ string DemangledTypeNode::GetTypeNameString() const size_t DemangledTypeNode::NameStringSize() const { - if (!m_nameSegments) - return 0; size_t total = 0; - for (const auto& s : *m_nameSegments) + for (const auto& s : m_nameSegments) total += s.size(); return total; } @@ -200,7 +231,27 @@ string DemangledTypeNode::GetModifierString() const } +// Append " const", " volatile", or " const volatile" to out. No-op if neither. +void DemangledTypeNode::AppendModifiers(string& out) const +{ + if (m_const && m_volatile) + out += " const volatile"; + else if (m_const) + out += " const"; + else if (m_volatile) + out += " volatile"; +} + + string DemangledTypeNode::GetPointerSuffixString() const +{ + string out; + AppendPointerSuffix(out); + return out; +} + + +void DemangledTypeNode::AppendPointerSuffix(string& out) const { static const char* suffixStrings[] = { "__ptr64", @@ -210,14 +261,30 @@ string DemangledTypeNode::GetPointerSuffixString() const "&&" }; - string out; - for (auto& s : m_pointerSuffix) + for (uint8_t bits = m_pointerSuffixBits, i = 0; bits; bits >>= 1, i++) { - if (!out.empty() && out.back() != ' ') - out += ' '; - out += suffixStrings[s]; + if (bits & 1) + { + if (!out.empty() && out.back() != ' ') + out += ' '; + out += suffixStrings[i]; + } + } +} + + +void DemangledTypeNode::AppendTypeName(string& out) const +{ + const auto& segs = m_nameSegments; + size_t n = segs.size(); + if (n == 0) + return; + out += segs[0]; + for (size_t i = 1; i < n; i++) + { + out += "::"; + out += segs[i]; } - return out; } @@ -239,9 +306,6 @@ string DemangledTypeNode::GetStringAfterName() const void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* parentType) const { - string modifiers = GetModifierString(); - string ptrSuffix = GetPointerSuffixString(); - switch (m_typeClass) { case FunctionTypeClass: @@ -249,15 +313,25 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p if (m_childType) { if (!out.empty() && out.back() != ' ') - out += " "; + out += ' '; m_childType->AppendBeforeName(out, this); } // If parent is a pointer, add "(" for function pointer syntax if (parentType && parentType->m_typeClass == PointerTypeClass) { if (!out.empty() && out.back() != ' ') - out += " "; - out += "("; + out += ' '; + out += '('; + } + if (m_callingConventionName < (sizeof(CallingConventionString) / sizeof(CallingConventionString[0]))) + { + const char* callingConvention = CallingConventionString[m_callingConventionName]; + if (callingConvention[0] != 0) + { + if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; + out += callingConvention; + } } break; @@ -267,11 +341,18 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p else if (m_signed && m_width == 1) out += "char"; else if (m_signed) - out += "int" + to_string(m_width * 8) + "_t"; + { + out += "int"; + out += to_string(m_width * 8); + out += "_t"; + } else - out += "uint" + to_string(m_width * 8) + "_t"; - if (!modifiers.empty()) - out += " " + modifiers; + { + out += "uint"; + out += to_string(m_width * 8); + out += "_t"; + } + AppendModifiers(out); break; case FloatTypeClass: @@ -283,22 +364,22 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p case 4: out += "float"; break; case 8: out += "double"; break; case 10: out += "long double"; break; - default: out += "float" + to_string(m_width * 8); break; + default: + out += "float"; + out += to_string(m_width * 8); + break; } - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case BoolTypeClass: out += "bool"; - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case VoidTypeClass: out += "void"; - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case VarArgsTypeClass: @@ -310,24 +391,24 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p m_childType->AppendBeforeName(out, this); switch (m_pointerReference) { - case ReferenceReferenceType: out += "&"; break; - case PointerReferenceType: out += "*"; break; + case ReferenceReferenceType: out += '&'; break; + case PointerReferenceType: out += '*'; break; case RValueReferenceType: out += "&&"; break; default: break; } - if (!ptrSuffix.empty()) - out += " " + ptrSuffix; - if (!modifiers.empty()) - out += " " + modifiers; + if (m_pointerSuffixBits != 0) + { + out += ' '; + AppendPointerSuffix(out); + } + AppendModifiers(out); break; case ArrayTypeClass: if (m_childType) m_childType->AppendBeforeName(out, this); if (parentType && parentType->m_typeClass == PointerTypeClass) - { out += " ("; - } break; case NamedTypeReferenceClass: @@ -339,9 +420,20 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p case EnumNamedTypeClass: out += "enum "; break; default: break; } - out += GetTypeNameString(); - if (!modifiers.empty()) - out += " " + modifiers; + AppendTypeName(out); + AppendModifiers(out); + break; + + case WideCharTypeClass: + if (!m_altName.empty()) + out += m_altName; + else + out += "wchar_t"; + AppendModifiers(out); + break; + + case ValueTypeClass: + out += m_altName; break; default: @@ -358,30 +450,32 @@ static string FormatArrayCount(uint64_t elements) void DemangledTypeNode::AppendAfterName(string& out, const DemangledTypeNode* parentType) const { - string modifiers = GetModifierString(); - string ptrSuffix = GetPointerSuffixString(); - switch (m_typeClass) { case FunctionTypeClass: { // Close the "(" from before-name if parent is pointer if (parentType && parentType->m_typeClass == PointerTypeClass) - out += ")"; + out += ')'; - out += "("; + out += '('; for (size_t i = 0; i < m_params.size(); i++) { if (i != 0) out += ", "; if (m_params[i].type) - out += m_params[i].type->GetString(); + m_params[i].type->AppendString(out); } - out += ")"; - if (!modifiers.empty()) - out += " " + modifiers; - if (!ptrSuffix.empty()) - out += ptrSuffix; + if (m_hasVariableArgs) + { + if (!m_params.empty()) + out += ", "; + out += "..."; + } + out += ')'; + AppendModifiers(out); + if (m_pointerSuffixBits != 0) + AppendPointerSuffix(out); // Return type's after-name tokens if (m_childType) m_childType->AppendAfterName(out, this); @@ -404,17 +498,34 @@ void DemangledTypeNode::AppendAfterName(string& out, const DemangledTypeNode* pa } -string DemangledTypeNode::GetString() const +void DemangledTypeNode::AppendString(string& out) const { - const string before = GetStringBeforeName(); - const string after = GetStringAfterName(); - if (!before.empty() && !after.empty() && before.back() != ' ' && before.back() != '*' - && before.back() != '&' && after.front() != ' ' && after.front() != '[' - && m_childType && m_childType->m_typeClass != FunctionTypeClass) + size_t beforeEnd = out.size(); + AppendBeforeName(out); + beforeEnd = out.size(); // track where "before" ends + + string after; + AppendAfterName(after); + + if (!after.empty() && beforeEnd > 0) { - return before + " " + after; + char lastBefore = out[beforeEnd - 1]; + if (lastBefore != ' ' && lastBefore != '*' && lastBefore != '&' + && after.front() != ' ' && after.front() != '[' + && m_childType && m_childType->m_typeClass != FunctionTypeClass) + { + out += ' '; + } } - return before + after; + out += after; +} + + +string DemangledTypeNode::GetString() const +{ + string out; + AppendString(out); + return out; } @@ -477,10 +588,23 @@ Ref DemangledTypeNode::Finalize() const case VarArgsTypeClass: return TypeBuilder::VarArgsType().Finalize(); + case WideCharTypeClass: + { + if (!m_const && !m_volatile) + return Type::WideCharType(m_width, m_altName); + TypeBuilder tb = TypeBuilder::WideCharType(m_width, m_altName); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + return tb.Finalize(); + } + + case ValueTypeClass: + return Type::ValueType(m_altName); + case PointerTypeClass: { Ref child = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); - return TypeBuilder::PointerType(m_width, child, m_const, m_volatile, m_pointerReference).Finalize(); + return TypeBuilder::PointerType(m_width, child, m_const, m_volatile, static_cast(m_pointerReference)).Finalize(); } case ArrayTypeClass: @@ -504,12 +628,17 @@ Ref DemangledTypeNode::Finalize() const Ref pType = p.type ? p.type->Finalize() : Ref(Type::VoidType()); finalParams.push_back({p.name, pType, true, Variable()}); } - TypeBuilder tb = TypeBuilder::FunctionType(retType->WithConfidence(m_returnTypeConfidence), nullptr, finalParams); + TypeBuilder tb = TypeBuilder::FunctionType( + retType->WithConfidence(m_returnTypeConfidence), nullptr, finalParams, + Confidence(m_hasVariableArgs, m_hasVariableArgs ? BN_DEFAULT_CONFIDENCE : 0)); tb.SetConst(m_const); tb.SetVolatile(m_volatile); - for (auto ps : m_pointerSuffix) - tb.AddPointerSuffix(ps); - tb.SetNameType(m_nameType); + for (uint8_t bits = m_pointerSuffixBits, i = 0; bits; bits >>= 1, i++) + if (bits & 1) + tb.AddPointerSuffix(static_cast(i)); + tb.SetNameType(static_cast(m_nameType)); + if (m_callingConventionName != NoCallingConvention) + tb.SetCallingConventionName(static_cast(m_callingConventionName)); return tb.Finalize(); } @@ -517,13 +646,14 @@ Ref DemangledTypeNode::Finalize() const { TypeBuilder tb = TypeBuilder::NamedType( NamedTypeReference::GenerateAutoDemangledTypeReference( - m_ntrClass, QualifiedName(m_nameSegments ? *m_nameSegments : vector{})), + static_cast(m_ntrClass), QualifiedName(m_nameSegments)), m_width, m_alignment > 0 ? m_alignment : 1); tb.SetConst(m_const); tb.SetVolatile(m_volatile); - for (auto ps : m_pointerSuffix) - tb.AddPointerSuffix(ps); - tb.SetNameType(m_nameType); + for (uint8_t bits = m_pointerSuffixBits, i = 0; bits; bits >>= 1, i++) + if (bits & 1) + tb.AddPointerSuffix(static_cast(i)); + tb.SetNameType(static_cast(m_nameType)); tb.SetHasTemplateArguments(m_hasTemplateArgs); return tb.Finalize(); } diff --git a/demangler/gnu3/demangled_type_node.h b/demangler/gnu3/demangled_type_node.h index 62ad9004a..b41ae7c6f 100644 --- a/demangler/gnu3/demangled_type_node.h +++ b/demangler/gnu3/demangled_type_node.h @@ -49,7 +49,7 @@ #define _STD_SET std::set #endif -// Lightweight type representation for the GNU3 demangler. +// Lightweight type representation for demanglers (GNU3 and MSVC). // This object serves as an abstraction layer between C++'s type system and our own. // It also removes a source of a lot of reallocation of NamedTypeReference BinaryNinja::Type objects // and only creates real Type objects when Finalize() is called. @@ -73,6 +73,8 @@ class DemangledTypeNode static DemangledTypeNode BoolType(); static DemangledTypeNode IntegerType(size_t width, bool isSigned, const _STD_STRING& altName = ""); static DemangledTypeNode FloatType(size_t width, const _STD_STRING& altName = ""); + static DemangledTypeNode WideCharType(size_t width, const _STD_STRING& altName = ""); + static DemangledTypeNode ValueType(const _STD_STRING& value); static DemangledTypeNode VarArgsType(); static DemangledTypeNode PointerType(BN::Architecture* arch, DemangledTypeNode child, bool cnst, bool vltl, BNReferenceType refType); @@ -85,51 +87,47 @@ class DemangledTypeNode const BN::QualifiedName& name, size_t width = 0, size_t align = 0); // Getters - BNTypeClass GetClass() const { return m_typeClass; } + BNTypeClass GetClass() const { return static_cast(m_typeClass); } #ifdef BINARYNINJACORE_LIBRARY - BNTypeClass GetTypeClass() const { return m_typeClass; } + BNTypeClass GetTypeClass() const { return static_cast(m_typeClass); } #endif - const _STD_VECTOR<_STD_STRING>& GetTypeName() const - { - if (!m_nameSegments) - { - static const _STD_VECTOR<_STD_STRING> empty; - return empty; - } - return *m_nameSegments; - } - _STD_VECTOR<_STD_STRING>& GetMutableTypeName() - { - if (!m_nameSegments) - m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(); - else if (m_nameSegments.use_count() > 1) - m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(*m_nameSegments); - return *m_nameSegments; - } + const _STD_VECTOR<_STD_STRING>& GetTypeName() const { return m_nameSegments; } + _STD_VECTOR<_STD_STRING>& GetMutableTypeName() { return m_nameSegments; } _STD_STRING GetTypeNameString() const; size_t NameStringSize() const; bool IsConst() const { return m_const; } bool IsVolatile() const { return m_volatile; } - BNNameType GetNameType() const { return m_nameType; } + bool HasVariableArguments() const { return m_hasVariableArgs; } + BNNameType GetNameType() const { return static_cast(m_nameType); } bool HasTemplateArguments() const { return m_hasTemplateArgs; } - const _STD_SET& GetPointerSuffix() const { return m_pointerSuffix; } - BNNamedTypeReferenceClass GetNTRClass() const { return m_ntrClass; } + uint8_t GetPointerSuffixBits() const { return m_pointerSuffixBits; } + bool HasPointerSuffix(BNPointerSuffix ps) const { return (m_pointerSuffixBits & (1u << ps)) != 0; } + BNNamedTypeReferenceClass GetNTRClass() const { return static_cast(m_ntrClass); } // Setters - void SetTypeName(_STD_VECTOR<_STD_STRING> name) { m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(std::move(name)); } + void SetTypeName(_STD_VECTOR<_STD_STRING> name) { m_nameSegments = std::move(name); } void SetConst(bool c) { m_const = c; } void SetVolatile(bool v) { m_volatile = v; } + void SetHasVariableArguments(bool v) { m_hasVariableArgs = v; } void SetNameType(BNNameType nt) { m_nameType = nt; } void SetHasTemplateArguments(bool t) { m_hasTemplateArgs = t; } - void SetPointerSuffix(const _STD_SET& s) { m_pointerSuffix = s; } - void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffix.insert(ps); } + void SetPointerSuffix(const _STD_SET& s) + { + m_pointerSuffixBits = 0; + for (auto ps : s) + m_pointerSuffixBits |= (1u << ps); + } + void SetPointerSuffixBits(uint8_t bits) { m_pointerSuffixBits = bits; } + void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffixBits |= (1u << ps); } void SetReturnTypeConfidence(uint8_t c) { m_returnTypeConfidence = c; } + void SetCallingConventionName(BNCallingConventionName cc) { m_callingConventionName = cc; } // Named type reference operations void SetNTR(BNNamedTypeReferenceClass cls, _STD_VECTOR<_STD_STRING> nameSegments); void SetNTR(BNNamedTypeReferenceClass cls, const BN::QualifiedName& name); // String formatting + void AppendString(_STD_STRING& out) const; _STD_STRING GetString() const; _STD_STRING GetStringBeforeName() const; _STD_STRING GetStringAfterName() const; @@ -139,33 +137,40 @@ class DemangledTypeNode BN::Ref Finalize() const; private: - BNTypeClass m_typeClass; - size_t m_width; - size_t m_alignment; + // Packed flags and small enums — all fit in a few bytes + uint8_t m_typeClass; // BNTypeClass (13 values, 4 bits) + uint8_t m_ntrClass; // BNNamedTypeReferenceClass (6 values, 3 bits) + uint8_t m_pointerReference; // BNReferenceType (4 values, 2 bits) + uint8_t m_nameType; // BNNameType (90 values, 7 bits) + uint8_t m_callingConventionName; // BNCallingConventionName (11 values, 4 bits) + uint8_t m_pointerSuffixBits; // Bitmask of BNPointerSuffix (5 flags) + uint8_t m_returnTypeConfidence; bool m_const; bool m_volatile; bool m_signed; + bool m_hasVariableArgs; bool m_hasTemplateArgs; - BNNameType m_nameType; - _STD_SET m_pointerSuffix; + + size_t m_width; + size_t m_alignment; _STD_STRING m_altName; // Named type ref data - BNNamedTypeReferenceClass m_ntrClass; - std::shared_ptr<_STD_VECTOR<_STD_STRING>> m_nameSegments; + _STD_VECTOR<_STD_STRING> m_nameSegments; // Child type (for pointer/array/function return) std::shared_ptr m_childType; - BNReferenceType m_pointerReference; uint64_t m_elements; // Function params _STD_VECTOR m_params; - uint8_t m_returnTypeConfidence; // Helpers for string formatting _STD_STRING GetModifierString() const; _STD_STRING GetPointerSuffixString() const; + void AppendModifiers(_STD_STRING& out) const; + void AppendPointerSuffix(_STD_STRING& out) const; + void AppendTypeName(_STD_STRING& out) const; void AppendBeforeName(_STD_STRING& out, const DemangledTypeNode* parentType = nullptr) const; void AppendAfterName(_STD_STRING& out, const DemangledTypeNode* parentType = nullptr) const; }; diff --git a/demangler/msvc/CMakeLists.txt b/demangler/msvc/CMakeLists.txt index b12559916..3536c899a 100644 --- a/demangler/msvc/CMakeLists.txt +++ b/demangler/msvc/CMakeLists.txt @@ -5,7 +5,9 @@ project(demangle_msvc) file(GLOB SOURCES CONFIGURE_DEPENDS *.cpp *.c - *.h) + *.h + ../gnu3/demangled_type_node.cpp + ../gnu3/demangled_type_node.h) if(DEMO) add_library(${PROJECT_NAME} STATIC ${SOURCES}) diff --git a/demangler/msvc/demangle_msvc.cpp b/demangler/msvc/demangle_msvc.cpp index 2eef96099..b9c8eec4f 100644 --- a/demangler/msvc/demangle_msvc.cpp +++ b/demangler/msvc/demangle_msvc.cpp @@ -57,7 +57,7 @@ string Demangle::Reader::ReadUntil(char sentinal) } -const TypeBuilder& Demangle::BackrefList::GetTypeBackref(size_t reference) +const DemangledTypeNode& Demangle::BackrefList::GetTypeBackref(size_t reference) { if (reference < typeList.size()) return typeList[reference]; @@ -74,11 +74,10 @@ const string& Demangle::BackrefList::GetStringBackref(size_t reference) } -void Demangle::BackrefList::PushTypeBackref(TypeBuilder t) +void Demangle::BackrefList::PushTypeBackref(DemangledTypeNode t) { - // LogDebug("this: %llx - TypeBackref: %lld %s\n", this, nameList.size(), t.GetString().c_str()); if (typeList.size() <= 9) - typeList.push_back(t); + typeList.push_back(std::move(t)); } @@ -172,10 +171,10 @@ void Demangle::RewriteTemplateBackrefName(NameList& typeName, const BackrefList& } -TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameList& name) +DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameList& name) { MSVC_TRACE("%s: '%s' - %lu\n", __FUNCTION__, reader.GetRaw(), varList.nameList.size()); - TypeBuilder newType; + DemangledTypeNode newType; bool _const = false, _volatile = false, isMember = false; BNReferenceType refType; BNTypeClass typeClass = IntegerTypeClass; @@ -200,18 +199,18 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL _const = false; _volatile = true; break; - case 'C': return TypeBuilder::IntegerType(1, true, "signed char"); - case 'D': return TypeBuilder::IntegerType(1, true); - case 'E': return TypeBuilder::IntegerType(1, false); - case 'F': return TypeBuilder::IntegerType(2, true); - case 'G': return TypeBuilder::IntegerType(2, false); - case 'H': return TypeBuilder::IntegerType(4, true); - case 'I': return TypeBuilder::IntegerType(4, false); - case 'J': return TypeBuilder::IntegerType(4, true, "long"); - case 'K': return TypeBuilder::IntegerType(4, false, "unsigned long"); - case 'M': return TypeBuilder::FloatType(4); - case 'N': return TypeBuilder::FloatType(8); - case 'O': return TypeBuilder::FloatType(10, "long double"); + case 'C': return DemangledTypeNode::IntegerType(1, true, "signed char"); + case 'D': return DemangledTypeNode::IntegerType(1, true); + case 'E': return DemangledTypeNode::IntegerType(1, false); + case 'F': return DemangledTypeNode::IntegerType(2, true); + case 'G': return DemangledTypeNode::IntegerType(2, false); + case 'H': return DemangledTypeNode::IntegerType(4, true); + case 'I': return DemangledTypeNode::IntegerType(4, false); + case 'J': return DemangledTypeNode::IntegerType(4, true, "long"); + case 'K': return DemangledTypeNode::IntegerType(4, false, "unsigned long"); + case 'M': return DemangledTypeNode::FloatType(4); + case 'N': return DemangledTypeNode::FloatType(8); + case 'O': return DemangledTypeNode::FloatType(10, "long double"); case 'P': // * typeClass = PointerTypeClass; refType = PointerReferenceType; @@ -254,7 +253,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL default: throw DemangleException(); } break; - case 'X': return TypeBuilder::VoidType(); break; + case 'X': return DemangledTypeNode::VoidType(); break; case 'Y': { // Multi-dimensional array type: Y...@ @@ -271,13 +270,13 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL newType = DemangleVarType(varList, false, arrayName); for (auto i = elementList.rbegin(); i != elementList.rend(); i++) { - newType = TypeBuilder::ArrayType(newType.Finalize(), *i); + newType = DemangledTypeNode::ArrayType(std::move(newType), *i); } if (!isReturn) varList.PushTypeBackref(newType); return newType; } - case 'Z': return TypeBuilder::VarArgsType(); + case 'Z': return DemangledTypeNode::VarArgsType(); case '?': { if (reader.Peek() >= '0' && reader.Peek() <= '9') @@ -290,7 +289,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL catch (const DemangleException&) { if (reference == 2) - return MakeNamedType(UnknownNamedTypeClass, NameList{"auto"}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"auto"}); throw; } } @@ -301,38 +300,38 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL if (reader.Peek() == '@') reader.Consume(); if (placeholder == "") - return MakeNamedType(UnknownNamedTypeClass, NameList{"auto"}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"auto"}); if (placeholder == "") - return MakeNamedType(UnknownNamedTypeClass, NameList{"decltype(auto)"}); - return MakeNamedType(UnknownNamedTypeClass, NameList{placeholder}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"decltype(auto)"}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{placeholder}); } case '_': switch (reader.Read()) { - case 'D': newType = TypeBuilder::IntegerType(1, true); break; - case 'E': newType = TypeBuilder::IntegerType(1, false); break; - case 'F': newType = TypeBuilder::IntegerType(2, true); break; - case 'G': newType = TypeBuilder::IntegerType(2, false); break; - case 'H': newType = TypeBuilder::IntegerType(4, true); break; - case 'I': newType = TypeBuilder::IntegerType(4, false); break; - case 'J': newType = TypeBuilder::IntegerType(8, true); break; - case 'K': newType = TypeBuilder::IntegerType(8, false); break; - case 'L': newType = TypeBuilder::IntegerType(16, true); break; - case 'M': newType = TypeBuilder::IntegerType(16, false); break; - case 'N': newType = TypeBuilder::BoolType(); break; + case 'D': newType = DemangledTypeNode::IntegerType(1, true); break; + case 'E': newType = DemangledTypeNode::IntegerType(1, false); break; + case 'F': newType = DemangledTypeNode::IntegerType(2, true); break; + case 'G': newType = DemangledTypeNode::IntegerType(2, false); break; + case 'H': newType = DemangledTypeNode::IntegerType(4, true); break; + case 'I': newType = DemangledTypeNode::IntegerType(4, false); break; + case 'J': newType = DemangledTypeNode::IntegerType(8, true); break; + case 'K': newType = DemangledTypeNode::IntegerType(8, false); break; + case 'L': newType = DemangledTypeNode::IntegerType(16, true); break; + case 'M': newType = DemangledTypeNode::IntegerType(16, false); break; + case 'N': newType = DemangledTypeNode::BoolType(); break; case 'O': { NameList name; auto childType = DemangleVarType(varList, false, name); - newType = TypeBuilder::ArrayType(childType.Finalize(), 0); + newType = DemangledTypeNode::ArrayType(std::move(childType), 0); break; } - case 'S': newType = TypeBuilder::IntegerType(2, true, "char16_t"); break; - case 'U': newType = TypeBuilder::IntegerType(4, true, "char32_t"); break; - case 'W': newType = TypeBuilder::IntegerType(2, false, "wchar_t"); break; - case 'P': newType = MakeNamedType(UnknownNamedTypeClass, NameList{"auto"}); break; - case 'Q': newType = MakeNamedType(UnknownNamedTypeClass, NameList{"auto"}); break; // decltype(auto) displayed as auto - case 'T': newType = MakeNamedType(UnknownNamedTypeClass, NameList{"auto"}); break; // decltype(auto) alternate + case 'S': newType = DemangledTypeNode::IntegerType(2, true, "char16_t"); break; + case 'U': newType = DemangledTypeNode::IntegerType(4, true, "char32_t"); break; + case 'W': newType = DemangledTypeNode::IntegerType(2, false, "wchar_t"); break; + case 'P': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"auto"}); break; + case 'Q': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"auto"}); break; // decltype(auto) displayed as auto + case 'T': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"auto"}); break; // decltype(auto) alternate case 'X': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Coclass case 'Y': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Cointerface default: @@ -383,7 +382,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL else if (reader.PeekMatch("$T", 2)) { reader.Consume(2); - auto t = MakeNamedType(UnknownNamedTypeClass, NameList{"std::nullptr"}); + auto t = DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"std::nullptr"}); if (!isReturn) varList.PushTypeBackref(t); return t; @@ -400,7 +399,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL reader.Consume(); int64_t value; DemangleNumber(value); - return TypeBuilder::ValueType(to_string(value)); + return DemangledTypeNode::ValueType(to_string(value)); } else if (reader.Peek() == 'D') { @@ -423,13 +422,13 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL int64_t adj; DemangleNumber(adj); } - return TypeBuilder::PointerType(m_arch, context.type.Finalize(), false, false, PointerReferenceType); + return DemangledTypeNode::PointerType(m_arch, std::move(context.type), false, false, PointerReferenceType); } else if (reader.Peek() == '1') { reader.Consume(); auto context = DemangleSymbol(); - return TypeBuilder::PointerType(m_arch, context.type.Finalize(), false, false, PointerReferenceType); + return DemangledTypeNode::PointerType(m_arch, std::move(context.type), false, false, PointerReferenceType); } else throw DemangleException(); @@ -468,11 +467,11 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL { reader.Consume(); auto childType = DemangleFunction(NoNameType, false, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); + newType = DemangledTypeNode::PointerType(m_arch, + std::move(childType), + _const, + _volatile, + refType); break; } case '7': //Function pointer @@ -480,11 +479,11 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL { reader.Consume(); auto childType = DemangleFunction(NoNameType, true, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); + newType = DemangledTypeNode::PointerType(m_arch, + std::move(childType), + _const, + _volatile, + refType); break; } case '8': //Named class function pointer @@ -493,17 +492,17 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL DemangleName(name, classFunctionType, varList, true); name.push_back(""); auto childType = DemangleFunction(NoNameType, true, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); + newType = DemangledTypeNode::PointerType(m_arch, + std::move(childType), + _const, + _volatile, + refType); break; } default: // Non-numeric { MSVC_TRACE("Demangle pointer subtype: '%s'\n", reader.GetRaw()); - TypeBuilder child; + DemangledTypeNode child; bool _const2 = false, _volatile2 = false, isMember = false; auto suffix = DemanglePointerSuffix(); ConsumeExtendedModifierPrefix(); @@ -526,7 +525,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL for (auto i = elementList.rbegin(); i != elementList.rend(); i++) { - child = TypeBuilder::ArrayType(child.Finalize(), *i); + child = DemangledTypeNode::ArrayType(std::move(child), *i); } } else @@ -537,13 +536,13 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL child.SetConst(_const2); child.SetVolatile(_volatile2); - newType = TypeBuilder::PointerType(m_arch, - child.Finalize(), - _const, - _volatile, - refType); + newType = DemangledTypeNode::PointerType(m_arch, + std::move(child), + _const, + _volatile, + refType); - newType.SetPointerSuffix(PointerSuffixSet(suffix)); + newType.SetPointerSuffixBits(suffix); MSVC_TRACE("Name: %s\n", newType.GetString().c_str()); break; } @@ -553,7 +552,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL case EnumerationTypeClass: MSVC_TRACE("Demangle enumeration\n"); DemangleName(typeName, classFunctionType, varList, true); - newType = MakeNamedType(EnumNamedTypeClass, typeName, width, width); + newType = DemangledTypeNode::NamedType(EnumNamedTypeClass, typeName, width, width); break; case StructureTypeClass: MSVC_TRACE("Demangle structure\n"); @@ -562,16 +561,16 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameL switch (structType) { case ClassStructureType: - newType = MakeNamedType(ClassNamedTypeClass, typeName); + newType = DemangledTypeNode::NamedType(ClassNamedTypeClass, typeName); break; case StructStructureType: - newType = MakeNamedType(StructNamedTypeClass, typeName); + newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); break; case UnionStructureType: - newType = MakeNamedType(UnionNamedTypeClass, typeName); + newType = DemangledTypeNode::NamedType(UnionNamedTypeClass, typeName); break; default: - newType = MakeNamedType(UnknownNamedTypeClass, typeName); + newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, typeName); break; } break; @@ -740,7 +739,7 @@ void Demangle::DemangleWideChar(uint16_t& wch) } -void Demangle::DemangleVariableList(vector& paramList, BackrefList& varList) +void Demangle::DemangleVariableList(vector& paramList, BackrefList& varList) { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; @@ -752,7 +751,7 @@ void Demangle::DemangleVariableList(vector& paramList, Backre { if (reader.Length() >= 2 && reader.PeekAt(1) == 'Z') { - paramList.push_back(FunctionParameter("", Type::VarArgsType(), true, {})); + paramList.push_back({"", std::make_shared(DemangledTypeNode::VarArgsType())}); reader.Consume(); continue; } @@ -786,21 +785,20 @@ void Demangle::DemangleVariableList(vector& paramList, Backre NameList name; MSVC_TRACE("Argument %zu: %s", paramList.size(), reader.GetRaw()); - TypeBuilder type = DemangleVarType(varList, false, name); + DemangledTypeNode type = DemangleVarType(varList, false, name); if (hasModifiers) { type.SetConst(_const); type.SetVolatile(_volatile); - type.SetPointerSuffix(PointerSuffixSet(suffix)); + type.SetPointerSuffixBits(suffix); } - FunctionParameter vt; + DemangledTypeNode::Param vt; if (name.size() == 1) vt.name = std::move(name[0]); else if (name.size() > 1) vt.name = JoinNameList(name); - vt.type = type.Finalize(); - vt.defaultLocation = true; + vt.type = std::make_shared(std::move(type)); paramList.push_back(std::move(vt)); MSVC_TRACE("Argument %zu: '%s' - '%s'\n", paramList.size() - 1, paramList.back().type->GetString().c_str(), reader.GetRaw()); } @@ -858,11 +856,6 @@ Demangle::NameType Demangle::GetNameType() // All standard MSVC operator codes use uppercase/digits/_ return NameString; } - // else if (reader.PeekString(3) == "__E") - // { - // reader.Consume(2); - // return NameDynamicInitializer; - // } else { return NameLookup; @@ -886,7 +879,7 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, string& out) { - TypeBuilder rtti; + DemangledTypeNode rtti; switch (reader.Read()) { case '0': @@ -905,7 +898,7 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, rtti = DemangleVarType(nameBackrefList, false, name); rtti.SetConst(_const); rtti.SetVolatile(_volatile); - rtti.SetPointerSuffix(PointerSuffixSet(suffix)); + rtti.SetPointerSuffixBits(suffix); out = rtti.GetString() + " `RTTI Type Descriptor'"; classFunctionType = RttiTypeDescriptor; break; @@ -1077,7 +1070,7 @@ string Demangle::DemangleTemplateInstantiationName(BackrefList& nameBackrefList) string Demangle::DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList) { string out; - vector params; + vector params; BNNameType dummyFunctionType = NoNameType; NameList dummyNameList; BackrefList outerBackrefs; @@ -1111,19 +1104,8 @@ string Demangle::DemangleTemplateInstantiationNameInLocalContext(BackrefList& na return out; } -// void Demangle::DemangleInitFiniStub(bool destructor, QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType) -// { -// bool isStatic = false; -// if (reader.Peek() == '?') -// { -// reader.Consume(); -// isStatic = true; -// } -// string out = DemangleUnqualifiedSymbolName(nameList, nameBackrefList, classFunctionType); -// } - -void Demangle::DemangleTemplateParams(vector& params, BackrefList& nameBackrefList, string& out) +void Demangle::DemangleTemplateParams(vector& params, BackrefList& nameBackrefList, string& out) { const bool nestedTemplateContext = (m_templateParamDepth > 0); struct NameBackrefScopeGuard @@ -1162,9 +1144,8 @@ void Demangle::DemangleTemplateParams(vector& params, Backref { if (i > 0) out += ','; - if (!params[i].type) - continue; - out += params[i].type->GetString(); + if (params[i].type) + params[i].type->AppendString(out); } if (out.back() == '>') out += ' '; // C++03 compat: >> → > > @@ -1201,7 +1182,7 @@ string Demangle::DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& } -TypeBuilder Demangle::DemangleString() +DemangledTypeNode Demangle::DemangleString() { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); // ??_C@_@ @@ -1251,7 +1232,7 @@ TypeBuilder Demangle::DemangleString() bool truncated = false; string name = ""; string literalPrefix; - TypeBuilder type; + DemangledTypeNode type; // String bytes if (isWideChar) @@ -1275,7 +1256,7 @@ TypeBuilder Demangle::DemangleString() reader.Consume(); name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); - type = Type::ArrayType(Type::WideCharType(2), length / 2); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(2), length / 2); } else { @@ -1310,7 +1291,7 @@ TypeBuilder Demangle::DemangleString() { MSVC_TRACE("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); - type = Type::ArrayType(Type::IntegerType(1, true), length); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::IntegerType(1, true), length); } else { @@ -1324,7 +1305,7 @@ TypeBuilder Demangle::DemangleString() } name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); literalPrefix = "U"; - type = Type::ArrayType(Type::WideCharType(4), length / 4); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(4), length / 4); } else if (numNulls > length / 3) { @@ -1336,14 +1317,14 @@ TypeBuilder Demangle::DemangleString() } name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); literalPrefix = "L"; - type = Type::ArrayType(Type::WideCharType(2), length / 2); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(2), length / 2); } else { MSVC_TRACE("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); - type = Type::ArrayType(Type::IntegerType(1, true), length); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::IntegerType(1, true), length); } } } @@ -1353,7 +1334,7 @@ TypeBuilder Demangle::DemangleString() } -TypeBuilder Demangle::DemangleTypeInfoName() +DemangledTypeNode Demangle::DemangleTypeInfoName() { if (reader.Read() != '?') throw DemangleException("Unknown raw name type"); @@ -1365,17 +1346,15 @@ TypeBuilder Demangle::DemangleTypeInfoName() MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); NameList name; - TypeBuilder type = DemangleVarType(m_backrefList, false, name); + DemangledTypeNode type = DemangleVarType(m_backrefList, false, name); type.SetConst(_const); type.SetVolatile(_volatile); switch (type.GetClass()) { case NamedTypeReferenceClass: - { - m_varName = type.GetNamedTypeReference()->GetName().GetContents().GetStrings(); + m_varName = NameList(type.GetTypeName().begin(), type.GetTypeName().end()); return type; - } default: throw DemangleException("Unexpected type of RTTI Type Name"); } @@ -1452,7 +1431,7 @@ void Demangle::DemangleName(NameList& nameList, string out; BNNameType functionType; BNNameType dummyFunctionType; - vector params; + vector params; while(1) { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); @@ -1668,14 +1647,6 @@ void Demangle::DemangleName(NameList& nameList, DemangleNameTypeRtti(classFunctionType, nameBackrefList, out); nameList.insert(nameList.begin(), out); break; - // case NameDynamicInitializer: - // m_logger->LogDebug("NameDynamicInitializer\n"); - // DemangleInitFiniStub(false); - // break; - // case NameDynamicAtExitDestructor: - // m_logger->LogDebug("NameDynamicAtExitDestructor\n"); - // DemangleInitFiniStub(false); - // break; case NameReturn: { MSVC_TRACE("NameReturn\n"); @@ -1884,12 +1855,12 @@ void Demangle::DemangleModifiers(bool& _const, bool& _volatile, bool &isMember) } -TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& nameBackrefList, int funcClass) +DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& nameBackrefList, int funcClass) { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; uint8_t suffix = 0; - TypeBuilder returnType; + DemangledTypeNode returnType; BNCallingConventionName cc; //Demangle adjustor which we don't do anything with for now @@ -1963,14 +1934,14 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe { returnType.SetConst(return_const); returnType.SetVolatile(return_volatile); - returnType.SetPointerSuffix(PointerSuffixSet(return_suffix)); + returnType.SetPointerSuffixBits(return_suffix); } } if (reader.Peek() == '@') reader.Consume(); MSVC_TRACE("\tDemangle Function Parameters %s", reader.GetRaw()); - vector params; + vector params; bool needsThisPtr = false; if (cc == ThisCallCallingConvention) { @@ -1997,23 +1968,22 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe NameList thisName = m_varName; if (classFunctionType != OperatorReturnTypeNameType && !thisName.empty()) thisName.pop_back(); - params.push_back(FunctionParameter("this", Type::PointerType(m_arch, Type::NamedType(ToQualifiedName(thisName), Type::VoidType())), true, {})); + auto thisNamedType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, thisName); + auto thisPtrType = DemangledTypeNode::PointerType(m_arch, std::move(thisNamedType), false, false, PointerReferenceType); + params.push_back(DemangledTypeNode::Param{"this", std::make_shared(std::move(thisPtrType))}); } DemangleVariableList(params, m_backrefList); - if (params.size() >= 1 && params.back().type.GetValue() && params.back().type->GetClass() == VoidTypeClass) + if (params.size() >= 1 && params.back().type && params.back().type->GetClass() == VoidTypeClass) params.pop_back(); - Ref returnTypeObj; - if (shouldHaveReturnType) - returnTypeObj = returnType.Finalize(); - else - returnTypeObj = Type::VoidType(); - TypeBuilder newType = TypeBuilder::FunctionType(returnTypeObj, nullptr, params); + if (!shouldHaveReturnType) + returnType = DemangledTypeNode::VoidType(); + DemangledTypeNode newType = DemangledTypeNode::FunctionType(std::move(returnType), nullptr, std::move(params)); newType.SetConst(_const); newType.SetVolatile(_volatile); - newType.SetPointerSuffix(PointerSuffixSet(suffix)); + newType.SetPointerSuffixBits(suffix); newType.SetNameType(classFunctionType); newType.SetCallingConventionName(cc); @@ -2022,25 +1992,25 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe } -TypeBuilder Demangle::DemangleData() +DemangledTypeNode Demangle::DemangleData() { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; NameList name; - TypeBuilder newType = DemangleVarType(m_backrefList, false, name); + DemangledTypeNode newType = DemangleVarType(m_backrefList, false, name); auto suffix = DemanglePointerSuffix(); DemangleModifiers(_const, _volatile, isMember); if (newType.GetClass() != PointerTypeClass) { newType.SetConst(_const); newType.SetVolatile(_volatile); - newType.SetPointerSuffix(PointerSuffixSet(suffix)); + newType.SetPointerSuffixBits(suffix); } return newType; } -TypeBuilder Demangle::DemanagleRTTI(BNNameType nameType) +DemangledTypeNode Demangle::DemanagleRTTI(BNNameType nameType) { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; @@ -2048,7 +2018,7 @@ TypeBuilder Demangle::DemanagleRTTI(BNNameType nameType) DemangleModifiers(_const, _volatile, isMember); NameList typeName = m_varName; MSVC_TRACE("new struct type\n"); - TypeBuilder newType = MakeNamedType(StructNamedTypeClass, typeName); + DemangledTypeNode newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); newType.SetNameType(nameType); newType.SetConst(_const); newType.SetVolatile(_volatile); @@ -2057,12 +2027,12 @@ TypeBuilder Demangle::DemanagleRTTI(BNNameType nameType) } -TypeBuilder Demangle::DemangleVTable() +DemangledTypeNode Demangle::DemangleVTable() { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; DemangleModifiers(_const, _volatile, isMember); - TypeBuilder newType = MakeNamedType(StructNamedTypeClass, m_varName); + DemangledTypeNode newType = DemangledTypeNode::NamedType(StructNamedTypeClass, m_varName); if (reader.Peek() != '@') { NameList typeName; @@ -2072,7 +2042,7 @@ TypeBuilder Demangle::DemangleVTable() m_varName.back() += "{for `" + JoinNameList(typeName) + "'}"; typeName.push_back(suffix); - newType = MakeNamedType(StructNamedTypeClass, typeName); + newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); } newType.SetConst(_const); newType.SetVolatile(_volatile); @@ -2106,7 +2076,7 @@ Demangle::DemangleContext Demangle::DemangleSymbol() reader.Consume(2); // consume ?@ _STD_STRING hash = reader.ReadUntil('@'); m_varName.push_back("??@" + hash + "@"); - return { TypeBuilder::VoidType(), NoAccess, NoScope }; + return { DemangledTypeNode::VoidType(), NoAccess, NoScope }; } DemangleName(varName, classFunctionType, m_backrefList); @@ -2192,7 +2162,7 @@ Demangle::DemangleContext Demangle::DemangleSymbol() reader.Consume(); // calling convention (A=cdecl, etc.) if (reader.Length() >= 1 && reader.Peek() != '@') reader.Consume(); // this-type flag - context = {TypeBuilder::VoidType(), NoAccess, NoScope}; + context = {DemangledTypeNode::VoidType(), NoAccess, NoScope}; break; } int funcClass = VirtualThunkFunctionClass; diff --git a/demangler/msvc/demangle_msvc.h b/demangler/msvc/demangle_msvc.h index c5bced449..a23dfc192 100644 --- a/demangler/msvc/demangle_msvc.h +++ b/demangler/msvc/demangle_msvc.h @@ -38,6 +38,12 @@ #define _STD_SET std::set #endif +#ifdef BINARYNINJACORE_LIBRARY +#include "demangler/gnu3/demangled_type_node.h" +#else +#include "../gnu3/demangled_type_node.h" +#endif + class DemangleException: public std::exception { _STD_STRING m_message; @@ -145,13 +151,13 @@ class Demangle class BackrefList { public: - _STD_VECTOR typeList; + _STD_VECTOR typeList; _STD_VECTOR<_STD_STRING> nameList; _STD_VECTOR<_STD_STRING> templateList; void Clear() { typeList.clear(); nameList.clear(); templateList.clear(); } - const BN::TypeBuilder& GetTypeBackref(size_t reference); + const DemangledTypeNode& GetTypeBackref(size_t reference); const _STD_STRING& GetStringBackref(size_t reference); - void PushTypeBackref(BN::TypeBuilder t); + void PushTypeBackref(DemangledTypeNode t); void PushStringBackref(const _STD_STRING& s); void PushTemplateSpecialization(const _STD_STRING& s); }; @@ -185,28 +191,6 @@ class Demangle return out; } - static BN::QualifiedName ToQualifiedName(const NameList& nl) - { - return BN::QualifiedName(nl); - } - - static _STD_SET PointerSuffixSet(uint8_t suffixBits) - { - _STD_SET suffix; - for (int i = 0; i < 8; i++) - { - if (suffixBits & (1u << i)) - suffix.insert((BNPointerSuffix)i); - } - return suffix; - } - - static BN::TypeBuilder MakeNamedType(BNNamedTypeReferenceClass cls, const NameList& nl, size_t width = 0, size_t align = 0) - { - return BN::TypeBuilder::NamedType( - BN::NamedTypeReference::GenerateAutoDemangledTypeReference(cls, ToQualifiedName(nl)), width, align); - } - _STD_STRING m_mangledName; // Owns the string; Reader points into it Reader reader; BackrefList m_backrefList; @@ -218,13 +202,13 @@ class Demangle NameType GetNameType(); void RewriteTemplateBackrefName(NameList& typeName, const BackrefList& nameBackrefList) const; - BN::TypeBuilder DemangleVarType(BackrefList& varList, bool isReturn, NameList& name); + DemangledTypeNode DemangleVarType(BackrefList& varList, bool isReturn, NameList& name); void DemangleNumber(int64_t& num); void DemangleChar(char& ch); void DemangleWideChar(uint16_t& wch); void DemangleModifiers(bool& _const, bool& _volatile, bool& isMember); uint8_t DemanglePointerSuffix(); - void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList); + void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList); void DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, _STD_STRING& out, @@ -237,24 +221,24 @@ class Demangle bool typeNameContext = false); BNCallingConventionName DemangleCallingConvention(); void ConsumeExtendedModifierPrefix(); - BN::TypeBuilder DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, int funcClass = NoneFunctionClass); - BN::TypeBuilder DemangleData(); + DemangledTypeNode DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, int funcClass = NoneFunctionClass); + DemangledTypeNode DemangleData(); void DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, _STD_STRING& out); - BN::TypeBuilder DemangleVTable(); - BN::TypeBuilder DemanagleRTTI(BNNameType classFunctionType); + DemangledTypeNode DemangleVTable(); + DemangledTypeNode DemanagleRTTI(BNNameType classFunctionType); _STD_STRING DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList); _STD_STRING DemangleTemplateInstantiationName(BackrefList& nameBackrefList); - void DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, _STD_STRING& out); + void DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, _STD_STRING& out); _STD_STRING DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType); - BN::TypeBuilder DemangleString(); - BN::TypeBuilder DemangleTypeInfoName(); + DemangledTypeNode DemangleString(); + DemangledTypeNode DemangleTypeInfoName(); public: struct DemangleContext { - BN::TypeBuilder type; + DemangledTypeNode type; BNMemberAccess access; BNMemberScope scope; }; From a58259dd9fb76fb54ba4cfe75fdf08d692f939aa Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Thu, 30 Apr 2026 09:41:31 -0400 Subject: [PATCH 3/4] Additional MSVC Demangling bug fixes Fix demangler backrefs and type-node formatting Fix MSVC demangler parsing for scoped backrefs, template and local-name contexts, member pointers, enum signedness, varargs, RTTI names, thunks, and pointer/function modifier edge cases. Update shared demangled type-node construction and GNU3 pointer suffix formatting so demangled types produce stable type/name strings without encoding invalid intermediate types. Fix MSVC member function pointer demangling Parse P8 member function pointer types as member pointers instead of plain function pointers, preserving the owning class in the demangled type node. Keep finalization returning a real Type so callers retain callable type information, even though Binary Ninja cannot represent exact C++ member function pointer declarator placement. Move implicit this handling into DemangledTypeNode finalization so top-level function Types receive a synthetic this parameter, while stringified nested/template symbols omit it. Also tighten nested function pointer parsing so implicit this is not added to function pointer parameter and return types. --- demangler/gnu3/demangle_gnu3.cpp | 309 +++++---- demangler/gnu3/demangle_gnu3.h | 12 +- demangler/gnu3/demangled_type_node.cpp | 233 ++++--- demangler/gnu3/demangled_type_node.h | 79 +-- demangler/msvc/demangle_msvc.cpp | 923 +++++++++++++++++-------- demangler/msvc/demangle_msvc.h | 46 +- type.cpp | 1 - 7 files changed, 1011 insertions(+), 592 deletions(-) diff --git a/demangler/gnu3/demangle_gnu3.cpp b/demangler/gnu3/demangle_gnu3.cpp index a1cab5e88..f3c1a8e60 100644 --- a/demangler/gnu3/demangle_gnu3.cpp +++ b/demangler/gnu3/demangle_gnu3.cpp @@ -58,7 +58,7 @@ static inline void rtrim(string &s) } -static size_t TotalStringSize(const _STD_VECTOR<_STD_STRING>& v) +static size_t TotalStringSize(const StringList& v) { size_t n = 0; for (const auto& s : v) @@ -67,6 +67,25 @@ static size_t TotalStringSize(const _STD_VECTOR<_STD_STRING>& v) } +static string JoinNameSegments(const StringList& name) +{ + if (name.empty()) + return {}; + if (name.size() == 1) + return name[0]; + + string out; + out.reserve(TotalStringSize(name) + (name.size() - 1) * 2); + out += name[0]; + for (size_t i = 1; i < name.size(); i++) + { + out += "::"; + out += name[i]; + } + return out; +} + + static string GetTemplateString(const vector& args) { // Pre-calculate total length to avoid reallocations @@ -333,7 +352,6 @@ void DemangleGNU3::Reset(Architecture* arch, const string& mangledName) { m_reader.Reset(mangledName); m_arch = arch; - m_varName.clear(); m_substitute.clear(); m_templateSubstitute.clear(); m_functionSubstitute.clear(); @@ -351,7 +369,7 @@ void DemangleGNU3::Reset(Architecture* arch, const string& mangledName) } -DemangledTypeNode DemangleGNU3::CreateUnknownType(const QualifiedName& s) +DemangledTypeNode DemangleGNU3::CreateUnknownType(const StringList& s) { return DemangledTypeNode::NamedType(UnknownNamedTypeClass, s); } @@ -359,7 +377,7 @@ DemangledTypeNode DemangleGNU3::CreateUnknownType(const QualifiedName& s) DemangledTypeNode DemangleGNU3::CreateUnknownType(const string& s) { - return DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{s}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{s}); } @@ -590,7 +608,6 @@ DemangledTypeNode DemangleGNU3::DemangleType() DemangledTypeNode type; bool cnst = false, vltl = false, rstrct = false; bool substitute = false; - QualifiedName name; DemangleCVQualifiers(cnst, vltl, rstrct); @@ -663,20 +680,20 @@ DemangledTypeNode DemangleGNU3::DemangleType() if (m_reader.Peek() == 's') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(StructNamedTypeClass, _STD_VECTOR<_STD_STRING>{DemangleSourceName()}); + type = DemangledTypeNode::NamedType(StructNamedTypeClass, StringList{DemangleSourceName()}); break; } else if (m_reader.Peek() == 'u') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(UnionNamedTypeClass, _STD_VECTOR<_STD_STRING>{DemangleSourceName()}); + type = DemangledTypeNode::NamedType(UnionNamedTypeClass, StringList{DemangleSourceName()}); break; } else if (m_reader.Peek() == 'e') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(EnumNamedTypeClass, QualifiedName({DemangleSourceName()}), - m_arch->GetDefaultIntegerSize(), m_arch->GetDefaultIntegerSize()); + type = DemangledTypeNode::NamedType(EnumNamedTypeClass, StringList{DemangleSourceName()}, + m_arch->GetDefaultIntegerSize()); break; } @@ -786,7 +803,6 @@ DemangledTypeNode DemangleGNU3::DemangleType() string fullName = member.GetStringBeforeName() + "(" + memberName.GetString() + "::*)" + member.GetStringAfterName(); //member.SetScope(NonStaticScope); //DemangledTypeNode ptr = DemangledTypeNode::PointerType(m_arch, member, cnst, vltl); - //QualifiedName qn({memberName.GetString(), "*"}); type = CreateUnknownType(fullName); substitute = true; break; @@ -805,7 +821,7 @@ DemangledTypeNode DemangleGNU3::DemangleType() case 'c': type = CreateUnknownType("decltype(auto)"); break; //decltype(auto) case 'n': { - static const QualifiedName stdNullptrTName(vector{"std", "nullptr_t"}); + static const StringList stdNullptrTName(vector{"std", "nullptr_t"}); type = CreateUnknownType(stdNullptrTName); break; } @@ -905,19 +921,19 @@ DemangledTypeNode DemangleGNU3::DemangleType() DemangledTypeNode DemangleGNU3::DemangleSubstitution() { - static const QualifiedName stdAllocatorName(vector{"std", "allocator"}); - static const QualifiedName stdBasicStringName(vector{"std", "basic_string"}); - static const QualifiedName stdIostreamName(vector{"std", "iostream"}); - static const QualifiedName stdIstreamName(vector{"std", "istream"}); - static const QualifiedName stdOstreamName(vector{"std", "ostream"}); - static const QualifiedName stdStringName(vector{"std", "string"}); - static const QualifiedName stdName(vector{"std"}); + static const StringList stdAllocatorName(vector{"std", "allocator"}); + static const StringList stdBasicStringName(vector{"std", "basic_string"}); + static const StringList stdIostreamName(vector{"std", "iostream"}); + static const StringList stdIstreamName(vector{"std", "istream"}); + static const StringList stdOstreamName(vector{"std", "ostream"}); + static const StringList stdStringName(vector{"std", "string"}); + static const StringList stdName(vector{"std"}); indent() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); char elm; elm = m_reader.Read(); - QualifiedName name; + StringList name; size_t number = 0; switch (elm) { @@ -1023,7 +1039,7 @@ string DemangleGNU3::DemanglePrimaryExpression() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); char elm1 = '\0'; string out; - QualifiedName tmpList; + StringList tmpList; bool oldTopLevel; //expr-primary if (m_reader.PeekString(2) == "_Z") @@ -1397,7 +1413,7 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() } -QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() +StringList DemangleGNU3::DemangleBaseUnresolvedName() { // ::= # unresolved name // ::= on # unresolved operator-function-id @@ -1407,7 +1423,7 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() indent() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - QualifiedName out; + StringList out; if (m_reader.Length() > 1) { const string str = m_reader.PeekString(2); @@ -1819,7 +1835,7 @@ string DemangleGNU3::DemangleExpression() { out += DemangleSourceName(); // Push bare name (before template args) to substitution table. - PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); + PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{out})); if (m_reader.Peek() == 'I') { vector args; @@ -1828,13 +1844,13 @@ string DemangleGNU3::DemangleExpression() DemangleTemplateArgs(args); out += GetTemplateString(args); // Also push the template instantiation (name+args). - PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); + PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{out})); } out += "::"; }while (m_reader.Peek() != 'E'); m_reader.Consume(); - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); return out; } if (isdigit(m_reader.Peek())) @@ -1874,7 +1890,7 @@ string DemangleGNU3::DemangleExpression() // so check rather than unconditionally consuming. if (m_reader.Peek() == 'E') m_reader.Consume(); - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); return out; } else @@ -1916,7 +1932,7 @@ string DemangleGNU3::DemangleExpression() } } // peek is not a digit: fall back for operator-names ("on") / destructor-names ("dn"). - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); } return out; default: @@ -2027,7 +2043,7 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) indent(); MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - DemangledTypeNode type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, QualifiedName()); + DemangledTypeNode type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{}); bool cnst = false, vltl = false, rstrct = false; bool ref = false; bool rvalueRef = false; @@ -2099,15 +2115,16 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) if (!isTemplate) { type.SetNameType(newType.GetNameType()); - auto aNames = type.GetTypeName(); - auto bNames = newType.GetTypeName(); - _STD_VECTOR<_STD_STRING> newName; + const auto& aNames = type.GetTypeName(); + const auto& bNames = newType.GetTypeName(); + StringList newName; newName.reserve(aNames.size() + bNames.size()); newName.insert(newName.end(), aNames.begin(), aNames.end()); newName.insert(newName.end(), bNames.begin(), bNames.end()); if (TotalStringSize(newName) > MAX_DEMANGLE_LENGTH) throw DemangleException("Detected adversarial mangled string"); - type.SetNTR(type.GetNTRClass(), newName); + type.SetTypeName(std::move(newName)); + type.SetNTRType(newType.GetNTRClass()); type.SetHasTemplateArguments(false); } // Consume any ABI tags (B ) following this name component. @@ -2163,7 +2180,7 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() indent(); MyLogDebug("%s '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); DemangledTypeNode type; - QualifiedName varName; + StringList varName; // The local function has its own template scope. Save the outer template // substitution table and set m_topLevel = true so that when the local // function's template args are parsed (e.g. handleMessageDelayed), @@ -2201,13 +2218,14 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, varName); auto aNames = type.GetTypeName(); auto bNames = tmpType.GetTypeName(); - _STD_VECTOR<_STD_STRING> newName; + StringList newName; newName.reserve(aNames.size() + bNames.size()); newName.insert(newName.end(), aNames.begin(), aNames.end()); newName.insert(newName.end(), bNames.begin(), bNames.end()); if (TotalStringSize(newName) > MAX_DEMANGLE_LENGTH) throw DemangleException("Detected adversarial mangled string"); type.SetTypeName(std::move(newName)); + type.SetNTRType(tmpType.GetNTRClass()); type.SetConst(tmpType.IsConst()); type.SetVolatile(tmpType.IsVolatile()); type.SetPointerSuffixBits(tmpType.GetPointerSuffixBits()); @@ -2331,7 +2349,7 @@ DemangledTypeNode DemangleGNU3::DemangleName() } -DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) +DemangledTypeNode DemangleGNU3::DemangleSymbol(StringList& varName) { indent(); MyLogDebug("%s: %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); @@ -2341,7 +2359,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) ParamList params; bool cnst = false, vltl = false, rstrct = false; bool oldTopLevel; - QualifiedName name; + StringList name; /* ::= @@ -2387,7 +2405,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{name.GetString() + " [transaction clone]" + t.GetStringAfterName()}); + StringList{JoinNameSegments(name) + " [transaction clone]" + t.GetStringAfterName()}); } case 'V': { @@ -2576,7 +2594,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) annotation += ']'; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{routineName + annotation}); + StringList{routineName + annotation}); } default: throw DemangleException(); @@ -2628,7 +2646,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"covariant_return_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"covariant_return_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName()}); } case 'C': { @@ -2638,7 +2656,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) throw DemangleException(); return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"construction_vtable_for_" + DemangleTypeString() + "-in-" + t.GetString()}); + StringList{"construction_vtable_for_" + DemangleTypeString() + "-in-" + t.GetString()}); } case 'D': LogWarn("Unsupported: 'typeinfo common proxy'\n"); @@ -2656,7 +2674,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"non-virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"non-virtual_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName()}); } case 'H': // TLS init function { @@ -2665,11 +2683,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"tls_init_function_for_" + t.GetTypeAndName(name)}); + StringList{"tls_init_function_for_" + t.GetTypeAndName(name)}); } case 'I': return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"typeinfo_for_" + DemangleTypeString()}); + StringList{"typeinfo_for_" + DemangleTypeString()}); case 'J': LogWarn("Unsupported: 'java class'\n"); throw DemangleException(); @@ -2684,7 +2702,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) { DemangledTypeNode t = DemangleType(); return DemangledTypeNode::NamedType(StructNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"VTT_for_" + t.GetString()}); + StringList{"VTT_for_" + t.GetString()}); } case 'v': // virtual thunk { @@ -2699,11 +2717,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"virtual_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName()}); } case 'V': //Vtable return DemangledTypeNode::NamedType(StructNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"vtable_for_" + DemangleTypeString()}); + StringList{"vtable_for_" + DemangleTypeString()}); case 'W': // TLS wrapper function { oldTopLevel = m_topLevel; @@ -2711,7 +2729,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"tls_wrapper_function_for_" + t.GetTypeAndName(name)}); + StringList{"tls_wrapper_function_for_" + t.GetTypeAndName(name)}); } default: throw DemangleException(); @@ -2835,7 +2853,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) type.SetPointerSuffixBits(1u << RestrictSuffix); // PrintTables(); - MyLogDebug("Done: %s%s%s\n", type.GetStringBeforeName().c_str(), varName.GetString().c_str(), + MyLogDebug("Done: %s%s%s\n", type.GetStringBeforeName().c_str(), JoinNameSegments(varName).c_str(), type.GetStringAfterName().c_str()); dedent(); @@ -2892,117 +2910,130 @@ bool DemangleGNU3Static::DemangleGlobalHeader(string& name, string& header) } -bool DemangleGNU3Static::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName) +namespace { - // Handle _block_invoke[.N] and _block_invoke_N suffixes (Clang/Apple block invocations). - // E.g. ____ZN4dyld5_mainEPK12macho_headermiPPKcS5_S5_Pm_block_invoke.110 - // -> "invocation_function_for_block_in_dyld::_main(...)" - static const string blockInvokeSuffix = "_block_invoke"; - size_t blockPos = name.rfind(blockInvokeSuffix); - if (blockPos != string::npos) - { - // Verify the suffix is _block_invoke optionally followed by [._] only - string tail = name.substr(blockPos + blockInvokeSuffix.size()); - bool validSuffix = tail.empty(); - if (!validSuffix && (tail[0] == '.' || tail[0] == '_')) - { - size_t i = 1; - while (i < tail.size() && isdigit((unsigned char)tail[i])) - i++; - validSuffix = (i == tail.size() && i > 1); - } - if (validSuffix) - { - // Extract the base symbol: everything before _block_invoke - string base = name.substr(0, blockPos); - // Normalize leading underscores: find 'Z' after underscores, keep one '_' before it - size_t zPos = base.find_first_not_of('_'); - if (zPos != string::npos && base[zPos] == 'Z') + static bool DemangleStringGNU3Segments( + Architecture* arch, const string& name, Ref& outType, StringList& outVarName) + { + // Handle _block_invoke[.N] and _block_invoke_N suffixes (Clang/Apple block invocations). + // E.g. ____ZN4dyld5_mainEPK12macho_headermiPPKcS5_S5_Pm_block_invoke.110 + // -> "invocation_function_for_block_in_dyld::_main(...)" + static const string blockInvokeSuffix = "_block_invoke"; + size_t blockPos = name.rfind(blockInvokeSuffix); + if (blockPos != string::npos) + { + // Verify the suffix is _block_invoke optionally followed by [._] only + string tail = name.substr(blockPos + blockInvokeSuffix.size()); + bool validSuffix = tail.empty(); + if (!validSuffix && (tail[0] == '.' || tail[0] == '_')) { - string normalized = "_" + base.substr(zPos); - Ref baseType; - QualifiedName baseName; - if (DemangleStringGNU3(arch, normalized, baseType, baseName)) + size_t i = 1; + while (i < tail.size() && isdigit((unsigned char)tail[i])) + i++; + validSuffix = (i == tail.size() && i > 1); + } + if (validSuffix) + { + // Extract the base symbol: everything before _block_invoke + string base = name.substr(0, blockPos); + // Normalize leading underscores: find 'Z' after underscores, keep one '_' before it + size_t zPos = base.find_first_not_of('_'); + if (zPos != string::npos && base[zPos] == 'Z') { - outVarName.clear(); - outVarName.push_back("invocation_function_for_block_in_" + baseName.GetString()); - outType = baseType; - return true; + string normalized = "_" + base.substr(zPos); + Ref baseType; + StringList baseName; + if (DemangleStringGNU3Segments(arch, normalized, baseType, baseName)) + { + outVarName.clear(); + outVarName.push_back("invocation_function_for_block_in_" + JoinNameSegments(baseName)); + outType = baseType; + return true; + } } } } - } - // Handle macOS thread-local variable initializer suffix: $tlv$init - // E.g. __ZL9recursive$tlv$init -> demangle "__ZL9recursive" then annotate. - static const string tlvInitSuffix = "$tlv$init"; - if (name.size() > tlvInitSuffix.size() && - name.compare(name.size() - tlvInitSuffix.size(), tlvInitSuffix.size(), tlvInitSuffix) == 0) - { - string base = name.substr(0, name.size() - tlvInitSuffix.size()); - Ref baseType; - QualifiedName baseName; - if (DemangleStringGNU3(arch, base, baseType, baseName)) + // Handle macOS thread-local variable initializer suffix: $tlv$init + // E.g. __ZL9recursive$tlv$init -> demangle "__ZL9recursive" then annotate. + static const string tlvInitSuffix = "$tlv$init"; + if (name.size() > tlvInitSuffix.size() && + name.compare(name.size() - tlvInitSuffix.size(), tlvInitSuffix.size(), tlvInitSuffix) == 0) { - outVarName = baseName; - if (outVarName.size() > 0) - outVarName[outVarName.size() - 1] += "$tlv$init"; - else - outVarName.push_back("$tlv$init"); - outType = baseType; - return true; + string base = name.substr(0, name.size() - tlvInitSuffix.size()); + Ref baseType; + StringList baseName; + if (DemangleStringGNU3Segments(arch, base, baseType, baseName)) + { + outVarName = std::move(baseName); + if (outVarName.size() > 0) + outVarName[outVarName.size() - 1] += "$tlv$init"; + else + outVarName.push_back("$tlv$init"); + outType = baseType; + return true; + } } - } - string encoding = name; - string header; - bool foundHeader = DemangleGlobalHeader(encoding, header); - - if (!encoding.compare(0, 2, "_Z")) - encoding = encoding.substr(2); - else if (!encoding.compare(0, 3, "__Z")) - encoding = encoding.substr(3); - else if (foundHeader && !header.empty()) - { - outVarName.clear(); - outVarName.push_back(header); - outVarName.push_back(encoding); - outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, outVarName).Finalize(); - return true; - } - else - return false; + string encoding = name; + string header; + bool foundHeader = DemangleGNU3Static::DemangleGlobalHeader(encoding, header); - thread_local DemangleGNU3 demangle(arch, encoding); - demangle.Reset(arch, encoding); - try - { - outType = demangle.DemangleSymbol(outVarName).Finalize(); + if (!encoding.compare(0, 2, "_Z")) + encoding = encoding.substr(2); + else if (!encoding.compare(0, 3, "__Z")) + encoding = encoding.substr(3); + else if (foundHeader && !header.empty()) + { + outVarName.clear(); + outVarName.push_back(header); + outVarName.push_back(encoding); + outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, outVarName).Finalize(); + return true; + } + else + return false; - if (outVarName.size() == 0) + thread_local DemangleGNU3 demangle(arch, encoding); + demangle.Reset(arch, encoding); + try { - if (outType->GetClass() == NamedTypeReferenceClass && outType->GetNamedTypeReference()->GetTypeReferenceClass() == UnknownNamedTypeClass) - { - outVarName = outType->GetTypeName(); - outType = nullptr; - } - else if (outType->GetClass() == NamedTypeReferenceClass) + outType = demangle.DemangleSymbol(outVarName).Finalize(); + + if (outVarName.size() == 0) { - auto typeName = outType->GetTypeName(); - if (typeName.size() > 0) - outVarName = "_" + typeName[typeName.size() - 1]; + if (outType->GetClass() == NamedTypeReferenceClass && outType->GetNamedTypeReference()->GetTypeReferenceClass() == UnknownNamedTypeClass) + { + const auto typeName = outType->GetTypeName(); + outVarName = StringList(typeName.begin(), typeName.end()); + outType = nullptr; + } + else if (outType->GetClass() == NamedTypeReferenceClass) + { + auto typeName = outType->GetTypeName(); + if (typeName.size() > 0) + outVarName = StringList{"_" + typeName[typeName.size() - 1]}; + } } - } - if (foundHeader && !header.empty()) + if (foundHeader && !header.empty()) + outVarName.insert(outVarName.begin(), header); + } + catch (std::exception&) { - outVarName.insert(outVarName.begin(), header); + return false; } + return true; } - catch (std::exception&) - { +} + + +bool DemangleGNU3Static::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName) +{ + StringList outVarNameSegments; + if (!DemangleStringGNU3Segments(arch, name, outType, outVarNameSegments)) return false; - } + outVarName = QualifiedName(outVarNameSegments); return true; } diff --git a/demangler/gnu3/demangle_gnu3.h b/demangler/gnu3/demangle_gnu3.h index 8ee0fca62..261987c49 100644 --- a/demangler/gnu3/demangle_gnu3.h +++ b/demangler/gnu3/demangle_gnu3.h @@ -95,7 +95,6 @@ class DemangleGNU3 { using ParamList = _STD_VECTOR; - BN::QualifiedName m_varName; DemangleGNU3Reader m_reader; BN::Architecture* m_arch; _STD_VECTOR m_substitute; @@ -118,10 +117,10 @@ class DemangleGNU3 bool m_inLocalName; struct ForwardRef { size_t index; }; _STD_VECTOR m_pendingForwardRefs; - void ResolveForwardTemplateRefs(DemangledTypeNode& type, const _STD_VECTOR<_STD_STRING>& args); + void ResolveForwardTemplateRefs(DemangledTypeNode& type, const StringList& args); static _STD_STRING ForwardRefPlaceholder(size_t index); enum SymbolType { Function, FunctionWithReturn, Data, VTable, Rtti, Name}; - BN::QualifiedName DemangleBaseUnresolvedName(); + StringList DemangleBaseUnresolvedName(); DemangledTypeNode DemangleUnresolvedType(); _STD_STRING DemangleUnarySuffixExpression(const _STD_STRING& op); _STD_STRING DemangleUnaryPrefixExpression(const _STD_STRING& op); @@ -140,7 +139,7 @@ class DemangleGNU3 void DemangleCVQualifiers(bool& cnst, bool& vltl, bool& rstrct); DemangledTypeNode DemangleSubstitution(); DemangledTypeNode DemangleTemplateSubstitution(); - void DemangleTemplateArgs(_STD_VECTOR<_STD_STRING>& args, bool* hadNonTypeArg = nullptr); + void DemangleTemplateArgs(StringList& args, bool* hadNonTypeArg = nullptr); DemangledTypeNode DemangleFunction(bool cnst, bool vltl); DemangledTypeNode DemangleType(); int64_t DemangleNumber(); @@ -149,7 +148,7 @@ class DemangleGNU3 void PushType(const DemangledTypeNode& type); const DemangledTypeNode& GetType(size_t ref); - DemangledTypeNode CreateUnknownType(const BN::QualifiedName& s); + DemangledTypeNode CreateUnknownType(const StringList& s); DemangledTypeNode CreateUnknownType(const _STD_STRING& s); static void ExtendTypeName(DemangledTypeNode& type, const _STD_STRING& extend); @@ -161,8 +160,7 @@ class DemangleGNU3 public: DemangleGNU3(BN::Architecture* arch, const _STD_STRING& mangledName); void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); - DemangledTypeNode DemangleSymbol(BN::QualifiedName& varName); - BN::QualifiedName GetVarName() const { return m_varName; } + DemangledTypeNode DemangleSymbol(StringList& varName); }; diff --git a/demangler/gnu3/demangled_type_node.cpp b/demangler/gnu3/demangled_type_node.cpp index f3f744783..c15e64218 100644 --- a/demangler/gnu3/demangled_type_node.cpp +++ b/demangler/gnu3/demangled_type_node.cpp @@ -23,6 +23,47 @@ using namespace BinaryNinja; using namespace std; #endif +namespace +{ + static constexpr uint8_t DemangledPtr64Bit = 1u << 0; + static constexpr uint8_t DemangledUnalignedBit = 1u << 1; + static constexpr uint8_t DemangledRestrictBit = 1u << 2; + static constexpr uint8_t DemangledReferenceBit = 1u << 3; + static constexpr uint8_t DemangledLvalueBit = 1u << 4; + + static void AppendPointerSuffixToken(string& out, const char* token) + { + if (!out.empty() && out.back() != ' ') + out += ' '; + out += token; + } + + static string JoinNameList(const StringList& name) + { + if (name.empty()) + return {}; + if (name.size() == 1) + return name[0]; + + size_t total = (name.size() - 1) * 2; + for (const auto& segment : name) + total += segment.size(); + + string out; + out.reserve(total); + out += name[0]; + for (size_t i = 1; i < name.size(); i++) + { + out += "::"; + out += name[i]; + } + return out; + } + +} + +#define HAS_POINTER_SUFFIX(bit) ((m_pointerSuffixBits & (bit)) != 0) + static const char* CallingConventionString[] = { "", @@ -45,7 +86,8 @@ DemangledTypeNode::DemangledTypeNode() m_callingConventionName(NoCallingConvention), m_pointerSuffixBits(0), m_returnTypeConfidence(BN_DEFAULT_CONFIDENCE), m_const(false), m_volatile(false), m_signed(false), m_hasVariableArgs(false), - m_hasTemplateArgs(false), m_width(0), m_alignment(0), + m_hasTemplateArgs(false), m_width(0), + m_isMemberPointer(false), m_elements(0) { } @@ -130,6 +172,22 @@ DemangledTypeNode DemangledTypeNode::PointerType(Architecture* arch, DemangledTy } +DemangledTypeNode DemangledTypeNode::MemberPointerType(Architecture* arch, DemangledTypeNode child, + StringList ownerName, bool cnst, bool vltl) +{ + DemangledTypeNode n; + n.m_typeClass = PointerTypeClass; + n.m_width = arch->GetAddressSize(); + n.m_childType = std::make_shared(std::move(child)); + n.m_const = cnst; + n.m_volatile = vltl; + n.m_pointerReference = PointerReferenceType; + n.m_memberPointerOwnerName = std::move(ownerName); + n.m_isMemberPointer = true; + return n; +} + + DemangledTypeNode DemangledTypeNode::ArrayType(DemangledTypeNode child, uint64_t count) { DemangledTypeNode n; @@ -151,36 +209,56 @@ DemangledTypeNode DemangledTypeNode::FunctionType(DemangledTypeNode retType, } +void DemangledTypeNode::SetImplicitThisParameter(DemangledTypeNode type) +{ + m_implicitThisParameterType = std::make_shared(std::move(type)); +} + + DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, - vector nameSegments, size_t width, size_t align) + StringList nameSegments, size_t width, bool isSigned) { DemangledTypeNode n; n.m_typeClass = NamedTypeReferenceClass; n.m_ntrClass = cls; n.m_nameSegments = std::move(nameSegments); n.m_width = width; - n.m_alignment = align; + n.m_signed = isSigned; return n; } - -DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, - const QualifiedName& name, size_t width, size_t align) -{ - return NamedType(cls, vector(name.begin(), name.end()), width, align); -} - - -void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, vector nameSegments) +uint8_t DemangledTypeNode::PointerSuffixBit(BNPointerSuffix ps) { - m_ntrClass = cls; - m_nameSegments = std::move(nameSegments); + switch (ps) + { + case Ptr64Suffix: + return DemangledPtr64Bit; + case UnalignedSuffix: + return DemangledUnalignedBit; + case RestrictSuffix: + return DemangledRestrictBit; + case ReferenceSuffix: + return DemangledReferenceBit; + case LvalueSuffix: + return DemangledLvalueBit; + default: + return 0; + } } -void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, const QualifiedName& name) +void DemangledTypeNode::AddPointerSuffixes(TypeBuilder& tb, bool omitPtr64) const { - SetNTR(cls, vector(name.begin(), name.end())); + if (HAS_POINTER_SUFFIX(DemangledPtr64Bit) && !omitPtr64) + tb.AddPointerSuffix(Ptr64Suffix); + if (HAS_POINTER_SUFFIX(DemangledUnalignedBit)) + tb.AddPointerSuffix(UnalignedSuffix); + if (HAS_POINTER_SUFFIX(DemangledRestrictBit)) + tb.AddPointerSuffix(RestrictSuffix); + if (HAS_POINTER_SUFFIX(DemangledReferenceBit)) + tb.AddPointerSuffix(ReferenceSuffix); + if (HAS_POINTER_SUFFIX(DemangledLvalueBit)) + tb.AddPointerSuffix(LvalueSuffix); } @@ -219,19 +297,6 @@ size_t DemangledTypeNode::NameStringSize() const } -string DemangledTypeNode::GetModifierString() const -{ - if (m_const && m_volatile) - return "const volatile"; - if (m_const) - return "const"; - if (m_volatile) - return "volatile"; - return ""; -} - - -// Append " const", " volatile", or " const volatile" to out. No-op if neither. void DemangledTypeNode::AppendModifiers(string& out) const { if (m_const && m_volatile) @@ -243,33 +308,16 @@ void DemangledTypeNode::AppendModifiers(string& out) const } -string DemangledTypeNode::GetPointerSuffixString() const -{ - string out; - AppendPointerSuffix(out); - return out; -} - - void DemangledTypeNode::AppendPointerSuffix(string& out) const { - static const char* suffixStrings[] = { - "__ptr64", - "__unaligned", - "__restrict", - "&", - "&&" - }; - - for (uint8_t bits = m_pointerSuffixBits, i = 0; bits; bits >>= 1, i++) - { - if (bits & 1) - { - if (!out.empty() && out.back() != ' ') - out += ' '; - out += suffixStrings[i]; - } - } + if (HAS_POINTER_SUFFIX(DemangledUnalignedBit)) + AppendPointerSuffixToken(out, "__unaligned"); + if (HAS_POINTER_SUFFIX(DemangledRestrictBit)) + AppendPointerSuffixToken(out, "__restrict"); + if (HAS_POINTER_SUFFIX(DemangledReferenceBit)) + AppendPointerSuffixToken(out, "&"); + if (HAS_POINTER_SUFFIX(DemangledLvalueBit)) + AppendPointerSuffixToken(out, "&&"); } @@ -309,10 +357,11 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p switch (m_typeClass) { case FunctionTypeClass: + { // Return type before name if (m_childType) { - if (!out.empty() && out.back() != ' ') + if (!out.empty() && out.back() != ' ' && out.back() != '(') out += ' '; m_childType->AppendBeforeName(out, this); } @@ -323,9 +372,9 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p out += ' '; out += '('; } - if (m_callingConventionName < (sizeof(CallingConventionString) / sizeof(CallingConventionString[0]))) + if (static_cast(m_callingConventionName) < (sizeof(CallingConventionString) / sizeof(CallingConventionString[0]))) { - const char* callingConvention = CallingConventionString[m_callingConventionName]; + const char* callingConvention = CallingConventionString[static_cast(m_callingConventionName)]; if (callingConvention[0] != 0) { if (!out.empty() && out.back() != ' ' && out.back() != '(') @@ -334,6 +383,7 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p } } break; + } case IntegerTypeClass: if (!m_altName.empty()) @@ -389,14 +439,25 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p case PointerTypeClass: if (m_childType) m_childType->AppendBeforeName(out, this); - switch (m_pointerReference) + if (m_isMemberPointer) { - case ReferenceReferenceType: out += '&'; break; - case PointerReferenceType: out += '*'; break; - case RValueReferenceType: out += "&&"; break; - default: break; + if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; + out += JoinNameList(m_memberPointerOwnerName); + out += "::*"; + } + else + { + switch (m_pointerReference) + { + case ReferenceReferenceType: out += '&'; break; + case PointerReferenceType: out += '*'; break; + case RValueReferenceType: out += "&&"; break; + default: break; + } } - if (m_pointerSuffixBits != 0) + if ((m_pointerSuffixBits & (DemangledUnalignedBit | DemangledRestrictBit | + DemangledReferenceBit | DemangledLvalueBit)) != 0) { out += ' '; AppendPointerSuffix(out); @@ -474,7 +535,8 @@ void DemangledTypeNode::AppendAfterName(string& out, const DemangledTypeNode* pa } out += ')'; AppendModifiers(out); - if (m_pointerSuffixBits != 0) + if ((m_pointerSuffixBits & (DemangledUnalignedBit | DemangledRestrictBit | + DemangledReferenceBit | DemangledLvalueBit)) != 0) AppendPointerSuffix(out); // Return type's after-name tokens if (m_childType) @@ -530,9 +592,15 @@ string DemangledTypeNode::GetString() const string DemangledTypeNode::GetTypeAndName(const QualifiedName& name) const +{ + return GetTypeAndName(vector(name.begin(), name.end())); +} + + +string DemangledTypeNode::GetTypeAndName(const StringList& name) const { const string before = GetStringBeforeName(); - const string qName = name.GetString(); + const string qName = JoinNameList(name); const string after = GetStringAfterName(); if ((!before.empty() && !qName.empty() && before.back() != ' ' && qName.front() != ' ') || (!before.empty() && !after.empty() && before.back() != ' ' && after.front() != ' ')) @@ -604,7 +672,12 @@ Ref DemangledTypeNode::Finalize() const case PointerTypeClass: { Ref child = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); - return TypeBuilder::PointerType(m_width, child, m_const, m_volatile, static_cast(m_pointerReference)).Finalize(); + TypeBuilder tb = TypeBuilder::PointerType(m_width, child, m_const, m_volatile, m_pointerReference); + AddPointerSuffixes(tb, true); + Ref normalized = tb.Finalize(); + if (m_isMemberPointer) + return Type::NamedType(QualifiedName({GetString()}), normalized.GetPtr()); + return normalized; } case ArrayTypeClass: @@ -622,38 +695,35 @@ Ref DemangledTypeNode::Finalize() const { Ref retType = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); vector finalParams; - finalParams.reserve(m_params.size()); + finalParams.reserve(m_params.size() + (m_implicitThisParameterType ? 1 : 0)); + if (m_implicitThisParameterType) + finalParams.push_back({"this", m_implicitThisParameterType->Finalize(), true, Variable()}); for (auto& p : m_params) { Ref pType = p.type ? p.type->Finalize() : Ref(Type::VoidType()); finalParams.push_back({p.name, pType, true, Variable()}); } TypeBuilder tb = TypeBuilder::FunctionType( - retType->WithConfidence(m_returnTypeConfidence), nullptr, finalParams, + retType->WithConfidence(static_cast(m_returnTypeConfidence)), nullptr, finalParams, Confidence(m_hasVariableArgs, m_hasVariableArgs ? BN_DEFAULT_CONFIDENCE : 0)); tb.SetConst(m_const); tb.SetVolatile(m_volatile); - for (uint8_t bits = m_pointerSuffixBits, i = 0; bits; bits >>= 1, i++) - if (bits & 1) - tb.AddPointerSuffix(static_cast(i)); - tb.SetNameType(static_cast(m_nameType)); + AddPointerSuffixes(tb); + tb.SetNameType(m_nameType); if (m_callingConventionName != NoCallingConvention) - tb.SetCallingConventionName(static_cast(m_callingConventionName)); + tb.SetCallingConventionName(m_callingConventionName); return tb.Finalize(); } case NamedTypeReferenceClass: { + QualifiedName name(m_nameSegments); TypeBuilder tb = TypeBuilder::NamedType( - NamedTypeReference::GenerateAutoDemangledTypeReference( - static_cast(m_ntrClass), QualifiedName(m_nameSegments)), - m_width, m_alignment > 0 ? m_alignment : 1); + NamedTypeReference::GenerateAutoDemangledTypeReference(m_ntrClass, name), m_width, 1); tb.SetConst(m_const); tb.SetVolatile(m_volatile); - for (uint8_t bits = m_pointerSuffixBits, i = 0; bits; bits >>= 1, i++) - if (bits & 1) - tb.AddPointerSuffix(static_cast(i)); - tb.SetNameType(static_cast(m_nameType)); + AddPointerSuffixes(tb); + tb.SetNameType(m_nameType); tb.SetHasTemplateArguments(m_hasTemplateArgs); return tb.Finalize(); } @@ -662,3 +732,6 @@ Ref DemangledTypeNode::Finalize() const return Type::VoidType(); } } + +#undef HAS_POINTER_SUFFIX +#undef GetClass diff --git a/demangler/gnu3/demangled_type_node.h b/demangler/gnu3/demangled_type_node.h index b41ae7c6f..54bab1c87 100644 --- a/demangler/gnu3/demangled_type_node.h +++ b/demangler/gnu3/demangled_type_node.h @@ -40,14 +40,10 @@ #endif #endif +#include #include -#ifdef BINARYNINJACORE_LIBRARY -#include "binaryninjacore_global.h" -#define _STD_SET BinaryNinjaCore::set -#else -#include -#define _STD_SET std::set -#endif + +typedef _STD_VECTOR<_STD_STRING> StringList; // Lightweight type representation for demanglers (GNU3 and MSVC). // This object serves as an abstraction layer between C++'s type system and our own. @@ -78,85 +74,76 @@ class DemangledTypeNode static DemangledTypeNode VarArgsType(); static DemangledTypeNode PointerType(BN::Architecture* arch, DemangledTypeNode child, bool cnst, bool vltl, BNReferenceType refType); + static DemangledTypeNode MemberPointerType(BN::Architecture* arch, DemangledTypeNode child, + StringList ownerName, bool cnst, bool vltl); static DemangledTypeNode ArrayType(DemangledTypeNode child, uint64_t count); static DemangledTypeNode FunctionType(DemangledTypeNode retType, std::nullptr_t, _STD_VECTOR params); static DemangledTypeNode NamedType(BNNamedTypeReferenceClass cls, - _STD_VECTOR<_STD_STRING> nameSegments, size_t width = 0, size_t align = 0); - static DemangledTypeNode NamedType(BNNamedTypeReferenceClass cls, - const BN::QualifiedName& name, size_t width = 0, size_t align = 0); + StringList nameSegments, size_t width = 0, bool isSigned = false); - // Getters - BNTypeClass GetClass() const { return static_cast(m_typeClass); } + BNTypeClass GetClass() const { return m_typeClass; } #ifdef BINARYNINJACORE_LIBRARY - BNTypeClass GetTypeClass() const { return static_cast(m_typeClass); } + BNTypeClass GetTypeClass() const { return m_typeClass; } #endif - const _STD_VECTOR<_STD_STRING>& GetTypeName() const { return m_nameSegments; } - _STD_VECTOR<_STD_STRING>& GetMutableTypeName() { return m_nameSegments; } + const StringList& GetTypeName() const { return m_nameSegments; } + StringList& GetMutableTypeName() { return m_nameSegments; } _STD_STRING GetTypeNameString() const; size_t NameStringSize() const; bool IsConst() const { return m_const; } bool IsVolatile() const { return m_volatile; } bool HasVariableArguments() const { return m_hasVariableArgs; } - BNNameType GetNameType() const { return static_cast(m_nameType); } + BNNameType GetNameType() const { return m_nameType; } bool HasTemplateArguments() const { return m_hasTemplateArgs; } uint8_t GetPointerSuffixBits() const { return m_pointerSuffixBits; } - bool HasPointerSuffix(BNPointerSuffix ps) const { return (m_pointerSuffixBits & (1u << ps)) != 0; } - BNNamedTypeReferenceClass GetNTRClass() const { return static_cast(m_ntrClass); } + bool HasPointerSuffix(BNPointerSuffix ps) const { return (m_pointerSuffixBits & PointerSuffixBit(ps)) != 0; } + BNNamedTypeReferenceClass GetNTRClass() const { return m_ntrClass; } + bool IsMemberPointer() const { return m_isMemberPointer; } + const StringList& GetMemberPointerOwnerName() const { return m_memberPointerOwnerName; } - // Setters - void SetTypeName(_STD_VECTOR<_STD_STRING> name) { m_nameSegments = std::move(name); } + void SetTypeName(StringList name) { m_nameSegments = std::move(name); } void SetConst(bool c) { m_const = c; } void SetVolatile(bool v) { m_volatile = v; } void SetHasVariableArguments(bool v) { m_hasVariableArgs = v; } void SetNameType(BNNameType nt) { m_nameType = nt; } void SetHasTemplateArguments(bool t) { m_hasTemplateArgs = t; } - void SetPointerSuffix(const _STD_SET& s) - { - m_pointerSuffixBits = 0; - for (auto ps : s) - m_pointerSuffixBits |= (1u << ps); - } void SetPointerSuffixBits(uint8_t bits) { m_pointerSuffixBits = bits; } - void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffixBits |= (1u << ps); } - void SetReturnTypeConfidence(uint8_t c) { m_returnTypeConfidence = c; } + void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffixBits |= PointerSuffixBit(ps); } + void SetReturnTypeConfidence(int8_t c) { m_returnTypeConfidence = c; } void SetCallingConventionName(BNCallingConventionName cc) { m_callingConventionName = cc; } + void SetNTRType(BNNamedTypeReferenceClass cls) { m_ntrClass = cls; } + void SetImplicitThisParameter(DemangledTypeNode type); - // Named type reference operations - void SetNTR(BNNamedTypeReferenceClass cls, _STD_VECTOR<_STD_STRING> nameSegments); - void SetNTR(BNNamedTypeReferenceClass cls, const BN::QualifiedName& name); - - // String formatting void AppendString(_STD_STRING& out) const; _STD_STRING GetString() const; _STD_STRING GetStringBeforeName() const; _STD_STRING GetStringAfterName() const; + _STD_STRING GetTypeAndName(const StringList& name) const; _STD_STRING GetTypeAndName(const BN::QualifiedName& name) const; - // Conversion to real Type BN::Ref Finalize() const; private: - // Packed flags and small enums — all fit in a few bytes - uint8_t m_typeClass; // BNTypeClass (13 values, 4 bits) - uint8_t m_ntrClass; // BNNamedTypeReferenceClass (6 values, 3 bits) - uint8_t m_pointerReference; // BNReferenceType (4 values, 2 bits) - uint8_t m_nameType; // BNNameType (90 values, 7 bits) - uint8_t m_callingConventionName; // BNCallingConventionName (11 values, 4 bits) - uint8_t m_pointerSuffixBits; // Bitmask of BNPointerSuffix (5 flags) + BNTypeClass m_typeClass; + BNNamedTypeReferenceClass m_ntrClass; + BNReferenceType m_pointerReference; + BNNameType m_nameType; + BNCallingConventionName m_callingConventionName; + uint8_t m_pointerSuffixBits; uint8_t m_returnTypeConfidence; bool m_const; bool m_volatile; bool m_signed; bool m_hasVariableArgs; bool m_hasTemplateArgs; + std::shared_ptr m_implicitThisParameterType; size_t m_width; - size_t m_alignment; _STD_STRING m_altName; - // Named type ref data - _STD_VECTOR<_STD_STRING> m_nameSegments; + StringList m_nameSegments; + StringList m_memberPointerOwnerName; + bool m_isMemberPointer; // Child type (for pointer/array/function return) std::shared_ptr m_childType; @@ -166,8 +153,8 @@ class DemangledTypeNode _STD_VECTOR m_params; // Helpers for string formatting - _STD_STRING GetModifierString() const; - _STD_STRING GetPointerSuffixString() const; + static uint8_t PointerSuffixBit(BNPointerSuffix ps); + void AddPointerSuffixes(BN::TypeBuilder& tb, bool omitPtr64 = true) const; void AppendModifiers(_STD_STRING& out) const; void AppendPointerSuffix(_STD_STRING& out) const; void AppendTypeName(_STD_STRING& out) const; diff --git a/demangler/msvc/demangle_msvc.cpp b/demangler/msvc/demangle_msvc.cpp index b9c8eec4f..a12ba970d 100644 --- a/demangler/msvc/demangle_msvc.cpp +++ b/demangler/msvc/demangle_msvc.cpp @@ -30,6 +30,14 @@ using namespace std; #define MAX_DEMANGLE_LENGTH 32768 +static int64_t SignExtendInt32(int64_t value) +{ + uint64_t lowBits = static_cast(value) & 0xffffffffULL; + if ((lowBits & 0x80000000ULL) != 0) + return static_cast(lowBits) - 0x100000000LL; + return static_cast(lowBits); +} + // Define MSVC_DEMANGLE_DEBUG to enable trace logging #ifdef MSVC_DEMANGLE_DEBUG #define MSVC_TRACE(...) LogTrace(__VA_ARGS__) @@ -170,8 +178,50 @@ void Demangle::RewriteTemplateBackrefName(NameList& typeName, const BackrefList& } } +DemangledTypeNode Demangle::DemangleReferencedSymbolValue(BackrefList& varList) +{ + // Template argument backrefs are scoped. A referenced symbol nested inside + // a non-type template argument can see backrefs already introduced by the + // surrounding template argument list, but any backrefs created while parsing + // that nested symbol must not leak back into the outer template list. + BackrefList symbolBackrefs = varList; + NameList savedVarName = m_varName; + + try + { + auto context = DemangleSymbol(symbolBackrefs); + string value = "&" + context.type.GetTypeAndName(m_varName); + m_varName = std::move(savedVarName); + + return DemangledTypeNode::ValueType(value); + } + catch (...) + { + m_varName = std::move(savedVarName); + throw; + } +} + -DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameList& name) +DemangledTypeNode Demangle::DemangleAutoNonTypeTemplateParam(BackrefList& varList) +{ + if (reader.Peek() == '0') + { + reader.Consume(); + int64_t value; + DemangleNumber(value); + return DemangledTypeNode::ValueType(to_string(value)); + } + if (reader.Peek() == '1') + { + reader.Consume(); + return DemangleReferencedSymbolValue(varList); + } + throw DemangleException(); +} + + +DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameList& name, bool includeImplicitThis) { MSVC_TRACE("%s: '%s' - %lu\n", __FUNCTION__, reader.GetRaw(), varList.nameList.size()); DemangledTypeNode newType; @@ -182,8 +232,8 @@ DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameList varName; NameList typeName; BNNameType classFunctionType; - - size_t width; + size_t width = 0; + bool _enumSigned = false; char elm = reader.Read(); switch (elm) { @@ -242,14 +292,14 @@ DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, typeClass = EnumerationTypeClass; switch (reader.Read()) { - case '0': width = 1; break; - case '1': width = 1; break; - case '2': width = 2; break; - case '3': width = 2; break; - case '4': width = 4; break; - case '5': width = 4; break; - case '6': width = 4; break; - case '7': width = 4; break; + case '0': width = 1; _enumSigned = true; break; + case '1': width = 1; _enumSigned = false; break; + case '2': width = 2; _enumSigned = true; break; + case '3': width = 2; _enumSigned = false; break; + case '4': width = 4; _enumSigned = true; break; + case '5': width = 4; _enumSigned = false; break; + case '6': width = 4; _enumSigned = true; break; + case '7': width = 4; _enumSigned = false; break; default: throw DemangleException(); } break; @@ -329,11 +379,30 @@ DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, case 'S': newType = DemangledTypeNode::IntegerType(2, true, "char16_t"); break; case 'U': newType = DemangledTypeNode::IntegerType(4, true, "char32_t"); break; case 'W': newType = DemangledTypeNode::IntegerType(2, false, "wchar_t"); break; - case 'P': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"auto"}); break; - case 'Q': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"auto"}); break; // decltype(auto) displayed as auto - case 'T': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"auto"}); break; // decltype(auto) alternate - case 'X': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Coclass - case 'Y': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Cointerface + // `_P` (auto) and `_T` (decltype(auto)) are placeholder return-type + // encodings. For normal source code they are deduced at the function + // definition and mangled as the deduced type — you will not see `_P` + // or `_T` from something like `auto foo() { return 0; }` (that becomes + // `?foo@@YAHXZ`). They do appear in compiler-emitted symbols for + // function templates whose declared return type is literally `auto` + // or `decltype(auto)` and which are mangled before/without deduction + // settling on a concrete type — e.g. `??$seq@HX@llvm@@YA?A_PH@Z` + // (llvm::seq) or `??$_Get_unwrapped@...@std@@YA?A_T...@Z`. Handle + // them as named-type placeholders so downstream type consumers get + // something sensible (rather than a `` demangle) even though + // the underlying type is not expressible as a Binary Ninja Type. + case 'P': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"auto"}); break; + case 'Q': newType = DemangledTypeNode::IntegerType(1, true, "char8_t"); break; // C++20 char8_t + case 'T': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"decltype(auto)"}); break; + // NOTE: `_X` and `_Y` were previously mapped to coclass/cointerface + // here, but those encodings are not emitted by any real toolchain. + // LLVM's MicrosoftDemangle / MicrosoftMangle and Wine's undname + // reimplementation none of them recognize `_X` or `_Y` as type + // codes. Real cointerface is plain `Y@@` (no underscore) at + // the top-level type switch, grouped with T/U/V; coclass has no + // dedicated mangling and is emitted as `V@@` (class). Let + // `_X` / `_Y` fall through to the `default: throw` so malformed + // input is rejected instead of producing a bogus class type. default: throw DemangleException(); } @@ -365,7 +434,7 @@ DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, if ((num == '8' || num == '9') && reader.Length() >= 2 && reader.Peek() == '@' && reader.PeekAt(1) == '@') reader.Consume(2); - return DemangleFunction(NoNameType, num >= '7', varList); + return DemangleFunction(NoNameType, num >= '7', varList, NoneFunctionClass, false); } throw DemangleException(); } @@ -408,27 +477,41 @@ DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameList name; return DemangleVarType(varList, isReturn, name); } + else if (reader.Peek() == 'M') + { + // $M - C++17 `auto` non-type template parameter. + // The encoded type is the deduced type for the following bare + // non-type payload and is not itself printed as a template arg. + reader.Consume(); + NameList autoTypeName; + DemangleVarType(varList, false, autoTypeName); + return DemangleAutoNonTypeTemplateParam(varList); + } else if (reader.Peek() == 'H' || reader.Peek() == 'I' || reader.Peek() == 'J') { - // $H/$I/$J - member function pointer as non-type template parameter - // Format: $H@ - // $I has two adjustment numbers, $J has three + // $H/$I/$J - member function pointer value as a non-type template + // parameter. Format: $H@; + // $I has two adjustment numbers, $J has three. char kind = reader.Read(); - auto context = DemangleSymbol(); - // Read adjustment number(s) — NOT $-prefixed, just raw numbers + BackrefList symbolBackrefs = varList; + auto context = DemangleSymbol(symbolBackrefs); + _STD_STRING value = "{" + context.type.GetTypeAndName(m_varName); + + // Read adjustment number(s) — NOT $-prefixed, just raw numbers. int adjustments = (kind == 'H') ? 1 : (kind == 'I') ? 2 : 3; for (int i = 0; i < adjustments && reader.Length() > 0 && reader.Peek() != '@'; i++) { int64_t adj; DemangleNumber(adj); + value += "," + to_string(adj); } - return DemangledTypeNode::PointerType(m_arch, std::move(context.type), false, false, PointerReferenceType); + value += "}"; + return DemangledTypeNode::ValueType(value); } else if (reader.Peek() == '1') { reader.Consume(); - auto context = DemangleSymbol(); - return DemangledTypeNode::PointerType(m_arch, std::move(context.type), false, false, PointerReferenceType); + return DemangleReferencedSymbolValue(varList); } else throw DemangleException(); @@ -462,23 +545,13 @@ DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, case '3': case '4': case '5': + case '7': + case '9': throw DemangleException(); case '6': { reader.Consume(); - auto childType = DemangleFunction(NoNameType, false, varList); - newType = DemangledTypeNode::PointerType(m_arch, - std::move(childType), - _const, - _volatile, - refType); - break; - } - case '7': //Function pointer - case '9': //Class Function pointer - { - reader.Consume(); - auto childType = DemangleFunction(NoNameType, true, varList); + auto childType = DemangleFunction(NoNameType, false, varList, NoneFunctionClass, false); newType = DemangledTypeNode::PointerType(m_arch, std::move(childType), _const, @@ -489,14 +562,15 @@ DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, case '8': //Named class function pointer { reader.Consume(); - DemangleName(name, classFunctionType, varList, true); - name.push_back(""); - auto childType = DemangleFunction(NoNameType, true, varList); - newType = DemangledTypeNode::PointerType(m_arch, + NameList ownerName; + DemangleName(ownerName, classFunctionType, varList, true); + RewriteTemplateBackrefName(ownerName, varList); + auto childType = DemangleFunction(NoNameType, true, varList, NoneFunctionClass, false); + newType = DemangledTypeNode::MemberPointerType(m_arch, std::move(childType), + std::move(ownerName), _const, - _volatile, - refType); + _volatile); break; } default: // Non-numeric @@ -504,9 +578,15 @@ DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, MSVC_TRACE("Demangle pointer subtype: '%s'\n", reader.GetRaw()); DemangledTypeNode child; bool _const2 = false, _volatile2 = false, isMember = false; + NameList ownerName; auto suffix = DemanglePointerSuffix(); ConsumeExtendedModifierPrefix(); DemangleModifiers(_const2, _volatile2, isMember); + if (isMember) + { + DemangleName(ownerName, classFunctionType, varList, true); + RewriteTemplateBackrefName(ownerName, varList); + } if (reader.Peek() == 'Y') //Multi-dimentional array { MSVC_TRACE("Demangle multi-dimentional array"); @@ -531,16 +611,24 @@ DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, else { NameList name; - child = DemangleVarType(varList, true, name); + child = DemangleVarType(varList, true, name, includeImplicitThis && !isMember); } child.SetConst(_const2); child.SetVolatile(_volatile2); - newType = DemangledTypeNode::PointerType(m_arch, - std::move(child), - _const, - _volatile, - refType); + if (isMember) + { + newType = DemangledTypeNode::MemberPointerType( + m_arch, std::move(child), std::move(ownerName), _const, _volatile); + } + else + { + newType = DemangledTypeNode::PointerType(m_arch, + std::move(child), + _const, + _volatile, + refType); + } newType.SetPointerSuffixBits(suffix); MSVC_TRACE("Name: %s\n", newType.GetString().c_str()); @@ -552,7 +640,7 @@ DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, case EnumerationTypeClass: MSVC_TRACE("Demangle enumeration\n"); DemangleName(typeName, classFunctionType, varList, true); - newType = DemangledTypeNode::NamedType(EnumNamedTypeClass, typeName, width, width); + newType = DemangledTypeNode::NamedType(EnumNamedTypeClass, typeName, width, _enumSigned); break; case StructureTypeClass: MSVC_TRACE("Demangle structure\n"); @@ -739,7 +827,7 @@ void Demangle::DemangleWideChar(uint16_t& wch) } -void Demangle::DemangleVariableList(vector& paramList, BackrefList& varList) +void Demangle::DemangleVariableList(vector& paramList, BackrefList& varList, bool typeBackrefs) { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; @@ -762,15 +850,29 @@ void Demangle::DemangleVariableList(vector& paramList, reader.Consume(); break; } - else if (reader.Length() >= 3 && (reader.PeekMatch("$$V", 3) || reader.PeekMatch("$$Z", 3) || reader.PeekMatch("$$S", 3))) + else if (reader.Length() >= 4 && reader.PeekMatch("$$$V", 4)) { - // $$V = empty parameter pack, $$Z = end of parameter pack, $$S = empty non-type param + // $$$V = empty expanded type / template-template pack (post-MSVC2015 mangling). + // See clang/lib/AST/MicrosoftMangle.cpp: for MSVC2015-compat this emits $$V, + // otherwise $$$V. + reader.Consume(4); + continue; + } + else if (reader.Length() >= 3 && (reader.PeekMatch("$$V", 3) || reader.PeekMatch("$$Z", 3))) + { + // $$V = empty expanded type / template-template pack (MSVC2015-compat mangling). + // $$Z = separator between two consecutive packs (emitted between non-empty packs, + // not as a lone template argument). LLVM's demangler leniently skips it in + // any position; we follow suit. + // NB: $$S is NOT emitted by any known toolchain - only $S (single $) is a real + // token, handled below. reader.Consume(3); continue; } else if (reader.Length() >= 2 && reader.PeekMatch("$S", 2)) { - // $S = empty non-type template parameter + // $S = empty expanded non-type template pack + // (e.g. `template` or `template` instantiated with zero args). reader.Consume(2); continue; } @@ -785,7 +887,16 @@ void Demangle::DemangleVariableList(vector& paramList, NameList name; MSVC_TRACE("Argument %zu: %s", paramList.size(), reader.GetRaw()); + size_t typeListSizeAtEntry = varList.typeList.size(); DemangledTypeNode type = DemangleVarType(varList, false, name); + // Template argument lists may use temporary backrefs created while + // parsing an argument, and later arguments may refer to them. However, + // the completed top-level argument itself is not added as a later type + // backref in the same template argument list. DemangleVarType appends + // that completed type last, so preserve any intermediate entries and + // drop only the final top-level type. + if (!typeBackrefs && varList.typeList.size() > typeListSizeAtEntry) + varList.typeList.pop_back(); if (hasModifiers) { type.SetConst(_const); @@ -844,6 +955,11 @@ Demangle::NameType Demangle::GetNameType() reader.Consume(); return NameReturn; } + else if (reader.Length() >= 3 && reader.Peek() == 'A' && reader.PeekAt(1) == '0' && reader.PeekAt(2) == 'x') + { + reader.Consume(); + return NameAnonymousNamespace; + } else if (reader.PeekMatch("_R", 2)) { reader.Consume(2); @@ -1024,14 +1140,33 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) case 'A': functionType = ManagedVectorConstructorIteratorNameType; break; case 'B': functionType = ManagedVectorDestructorIteratorNameType; break; case 'C': functionType = EHVectorCopyConstructorIteratorNameType; break; - case 'D': functionType = EHVectorVBaseConstructorIteratorNameType; break; - case 'E': functionType = DynamicInitializerNameType; out = "`dynamic initializer'"; break; - case 'F': functionType = DynamicAtExitDestructorNameType; out = "`dynamic atexit destructor'"; break; + // ??__D is the *copy* variant per LLVM (MicrosoftDemangle.cpp:701). + // Previously routed to EHVectorVBaseConstructorIteratorNameType + // (the non-copy enum used by ??_O), which dropped the "copy" word. + case 'D': functionType = EHVectorVBaseCopyConstructorIteratorNameType; break; + // ??__E and ??__F are not reached here — they're handled at the + // top level in DemangleSymbol, matching LLVM's special-intrinsic + // dispatch. See DemangleDynamicInitFini. + case 'E': // fall through — unreachable in practice + case 'F': functionType = (reader.PeekAt(-1) == 'E') ? DynamicInitializerNameType : DynamicAtExitDestructorNameType; break; case 'G': functionType = VectorCopyConstructorIteratorNameType; break; case 'H': functionType = VectorVBaseCopyConstructorIteratorNameType; break; case 'I': functionType = ManagedVectorCopyConstructorIteratorNameType; break; - case 'J': functionType = LocalStaticGuardNameType; break; - case 'K': functionType = UserDefinedLiteralOperatorNameType; break; + case 'J': functionType = LocalStaticThreadGuardNameType; break; + case 'K': + { + // User-defined literal operator: ??__K@ + // LLVM's demangleLiteralOperatorIdentifier consumes a simple + // string terminated by '@' as the literal suffix and renders it + // as `operator ""`. The outer DemangleName loop then + // picks up any enclosing scope chain as a normal prefix. + functionType = UserDefinedLiteralOperatorNameType; + _STD_STRING suffix = reader.ReadUntil('@'); + if (suffix.empty()) + throw DemangleException("??__K requires a non-empty literal suffix"); + out = "operator \"\"" + suffix; + break; + } case 'L': functionType = NoNameType; out = "operator co_await"; break; case 'M': functionType = NoNameType; out = "operator<=>"; break; // spaceship operator default: throw DemangleException("Demangle Lookup Failed"); // fall through @@ -1074,6 +1209,7 @@ string Demangle::DemangleTemplateInstantiationNameInLocalContext(BackrefList& na BNNameType dummyFunctionType = NoNameType; NameList dummyNameList; BackrefList outerBackrefs; + bool backrefEligible = true; MSVC_TRACE("DemangleTemplateInstantiationNameInLocalContext: '%s'\n", reader.GetRaw()); std::swap(outerBackrefs.typeList, nameBackrefList.typeList); @@ -1083,8 +1219,9 @@ string Demangle::DemangleTemplateInstantiationNameInLocalContext(BackrefList& na try { reader.Consume(2); - out = DemangleUnqualifiedSymbolName(dummyNameList, nameBackrefList, dummyFunctionType); - nameBackrefList.PushStringBackref(out); + out = DemangleUnqualifiedSymbolName(dummyNameList, nameBackrefList, dummyFunctionType, backrefEligible); + if (backrefEligible && dummyFunctionType == NoNameType) + nameBackrefList.PushStringBackref(out); DemangleTemplateParams(params, nameBackrefList, out); } catch (...) @@ -1107,6 +1244,7 @@ string Demangle::DemangleTemplateInstantiationNameInLocalContext(BackrefList& na void Demangle::DemangleTemplateParams(vector& params, BackrefList& nameBackrefList, string& out) { + params.clear(); const bool nestedTemplateContext = (m_templateParamDepth > 0); struct NameBackrefScopeGuard { @@ -1134,7 +1272,7 @@ void Demangle::DemangleTemplateParams(vector& params, nameBackrefList.nameList.size() }; - DemangleVariableList(params, nameBackrefList); + DemangleVariableList(params, nameBackrefList, false); } // Reserve space to reduce reallocation during template string building @@ -1156,8 +1294,10 @@ void Demangle::DemangleTemplateParams(vector& params, } -string Demangle::DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType) +string Demangle::DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& nameBackrefList, + BNNameType& classFunctionType, bool& backrefEligible) { + backrefEligible = true; string out; if (reader.PeekMatch("?$", 2)) { @@ -1169,6 +1309,9 @@ string Demangle::DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& { reader.Consume(); DemangleTypeNameLookup(out, classFunctionType); + // Lookup-based operator names are not normal identifier components and + // should not satisfy later scope backrefs such as strong_ordering@0@. + backrefEligible = false; } else if (reader.Peek() >= '0' && reader.Peek() <= '9') { @@ -1240,11 +1383,15 @@ DemangledTypeNode Demangle::DemangleString() MSVC_TRACE("%s: Wide string '%s'\n", __FUNCTION__, reader.GetRaw()); string utf8name; literalPrefix = "L"; - truncated = (length > 64); + // Track the last wide char so we can detect missing null terminator. + uint16_t lastWch = 1; + size_t wcharCount = 0; while (reader.Peek() != '@') { uint16_t wch; DemangleWideChar(wch); + lastWch = wch; + wcharCount++; uint8_t chs[2]; chs[0] = wch & 0xFF; @@ -1255,6 +1402,12 @@ DemangledTypeNode Demangle::DemangleString() } reader.Consume(); + // MSVC string literals always mangle their trailing null. A payload + // that doesn't end in a wide null means the original was too long to + // fit in the mangling and was truncated. Matches LLVM's demangler. + if (wcharCount == 0 || lastWch != 0) + truncated = true; + name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(2), length / 2); } @@ -1285,6 +1438,11 @@ DemangledTypeNode Demangle::DemangleString() { truncated = true; } + // MSVC includes the trailing '\0' in the mangled payload. If the last + // byte isn't a null, the original string was truncated to fit the + // encoding's size limit — LLVM signals this with a `...` suffix. + if (!chars.empty() && chars.back() != 0) + truncated = true; // Now time to guess encoding if (chars.size() % 1 != 0) @@ -1353,8 +1511,21 @@ DemangledTypeNode Demangle::DemangleTypeInfoName() switch (type.GetClass()) { case NamedTypeReferenceClass: - m_varName = NameList(type.GetTypeName().begin(), type.GetTypeName().end()); - return type; + { + // Match LLVM's demangler: a raw type-info name (.?A...) renders as + // ` `RTTI Type Descriptor Name''`. Bake the type + // keyword + name into the symbol's qualified name via m_varName, + // then return a fresh NamedType marked RttiTypeDescriptor so BN's + // core type formatter skips its own class/struct prefix — this + // mirrors the treatment of ??_R0 in DemangleNameTypeRtti case '0'. + string rendered = type.GetString() + " `RTTI Type Descriptor Name'"; + m_varName = { rendered }; + DemangledTypeNode newType = DemangledTypeNode::NamedType( + StructNamedTypeClass, + NameList(type.GetTypeName().begin(), type.GetTypeName().end())); + newType.SetNameType(RttiTypeDescriptor); + return newType; + } default: throw DemangleException("Unexpected type of RTTI Type Name"); } @@ -1367,6 +1538,27 @@ void Demangle::DemangleName(NameList& nameList, bool typeNameContext) { size_t nameListSizeAtEntry = nameList.size(); + bool pendingConstructorTemplateName = false; + + auto finalizeConstructorTemplateName = [&]() { + if (!pendingConstructorTemplateName) + return; + + if (nameList.size() <= nameListSizeAtEntry + 1) + throw DemangleException("Constructor template missing class scope"); + + string& constructorTemplateName = nameList.back(); + if (constructorTemplateName.empty() || constructorTemplateName[0] != '<') + throw DemangleException("Invalid constructor template name"); + + // `??$?0...@Class@@` is a templated constructor. LLVM models `?0` as a + // structor identifier and attaches the parsed enclosing class to it after + // the qualified name is complete; Wine's undname does the same as a string + // post-process. We store names as strings, so apply that post-process here: + // `?0` becomes `Class`. + constructorTemplateName = nameList[nameList.size() - 2] + constructorTemplateName; + }; + auto tryDemangleEscapedLookupScopeName = [&]() -> bool { if (nameList.size() <= nameListSizeAtEntry) @@ -1428,6 +1620,75 @@ void Demangle::DemangleName(NameList& nameList, return true; }; + auto decodeEncodedNumber = [&](const string& encoded) -> int64_t + { + if (encoded.empty()) + throw DemangleException("Empty encoded number"); + + size_t offset = 0; + int mult = 1; + if (encoded[offset] == '?') + { + mult = -1; + offset++; + } + if (offset >= encoded.size()) + throw DemangleException("Truncated encoded number"); + + if (encoded[offset] >= '0' && encoded[offset] <= '9') + { + if (offset + 1 != encoded.size()) + throw DemangleException("Decimal encoded number has trailing characters"); + return mult * (encoded[offset] + 1 - '0'); + } + + int64_t num = 0; + for (; offset < encoded.size(); offset++) + { + char a = encoded[offset]; + num *= 16; + if (a >= 'A' && a <= 'P') + num += a - 'A'; + else + throw DemangleException("Invalid encoded hex digit"); + } + return num * mult; + }; + + auto functionTypeHasPointerSuffix = [&](char ft) -> bool + { + return ft != 'C' && ft != 'D' && ft != 'K' && ft != 'L' + && ft != 'S' && ft != 'T' && ft != 'Y' && ft != 'Z'; + }; + + auto formatFunctionScopeSignature = [&](const DemangledTypeNode& type, const NameList& scopeName) -> string + { + string out = type.GetTypeAndName(scopeName); + while (!out.empty() && out.back() == ' ') + out.pop_back(); + return out; + }; + + auto appendLocalScope = [&](int64_t scopeOrdinal) -> void + { + NameList scopeName; + BNNameType scopeFunctionType = NoNameType; + DemangleName(scopeName, scopeFunctionType, nameBackrefList, typeNameContext); + + if (reader.Length() == 0) + throw DemangleException("Missing local scope function encoding"); + + char ft = reader.Read(); + if (ft < 'A' || ft > 'Z') + throw DemangleException("Invalid local scope function encoding"); + + DemangledTypeNode scopeType = DemangleFunction( + scopeFunctionType, functionTypeHasPointerSuffix(ft), nameBackrefList); + + nameList.insert(nameList.begin(), "`" + to_string(scopeOrdinal) + "'"); + nameList.insert(nameList.begin(), "`" + formatFunctionScopeSignature(scopeType, scopeName) + "'"); + }; + string out; BNNameType functionType; BNNameType dummyFunctionType; @@ -1449,6 +1710,24 @@ void Demangle::DemangleName(NameList& nameList, case NameLookup: { MSVC_TRACE("Demangle Lookup\n"); + if (nameList.size() > nameListSizeAtEntry) + { + const char* saved = reader.GetRaw(); + try + { + int64_t scopeOrdinal; + DemangleNumber(scopeOrdinal); + if (reader.Length() >= 2 && reader.Peek() == '?' && reader.PeekAt(1) == '?') + { + appendLocalScope(scopeOrdinal); + break; + } + } + catch (...) + { + } + reader.SetRaw(saved); + } if (tryDemangleEscapedLookupScopeName()) break; DemangleTypeNameLookup(out, functionType); @@ -1469,57 +1748,12 @@ void Demangle::DemangleName(NameList& nameList, reader.Consume(); break; } - bool isScope = false; - if (nameList.size() > nameListSizeAtEntry + 1) - { - if (reader.Length() >= 3 && reader.Peek() == '@' - && reader.PeekAt(1) == '?' && reader.PeekAt(2) == '?') - { - reader.Consume(); // consume '@' separator - isScope = true; - } - else if (reader.Length() >= 2 && reader.Peek() == '?' - && reader.PeekAt(1) == '?') - { - // No '@' separator — digit scope like ?3??func - isScope = true; - } - else if (reader.Length() >= 4 && reader.Peek() >= 'A' && reader.Peek() <= 'Z' - && reader.PeekAt(1) == '@' && reader.PeekAt(2) == '?' - && reader.PeekAt(3) == '?') - { - // Two-char letter scope like ?EL@??func — consume second char + '@' - reader.Consume(2); // consume and '@' - isScope = true; - } - } - if (isScope) - { - DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); - if (reader.Length() > 0 && reader.Peek() != '@') - { - const char* saved = reader.GetRaw(); - try - { - char ft = reader.Read(); - if (ft >= 'A' && ft <= 'Z') - DemangleFunction(NoNameType, - ft != 'C' && ft != 'D' && ft != 'K' && ft != 'L' && - ft != 'S' && ft != 'T' && ft != 'Y' && ft != 'Z', - m_backrefList); - } - catch (...) - { - reader.SetRaw(saved); - while (reader.Length() > 0) - { - char c = reader.Read(); - if (c == 'Z' && reader.Length() > 0 && reader.Peek() == '@') - break; - } - } - } - } + break; + } + case NameAnonymousNamespace: + { + DemangleNameTypeString(out); // discard compiler-generated hash + nameList.insert(nameList.begin(), "`anonymous namespace'"); break; } case NameBackref: @@ -1531,7 +1765,9 @@ void Demangle::DemangleName(NameList& nameList, case NameTemplate: { MSVC_TRACE("Demangle Template: '%s'\n", reader.GetRaw()); - if (m_templateParamDepth > 0) + BNNameType functionType = NoNameType; + bool backrefEligible = true; + if (typeNameContext || (m_templateParamDepth > 0) || (nameList.size() > nameListSizeAtEntry)) { const char* saved = reader.GetRaw(); reader.SetRaw(saved - 2); @@ -1539,26 +1775,18 @@ void Demangle::DemangleName(NameList& nameList, } else { - out = DemangleUnqualifiedSymbolName(nameList, nameBackrefList, functionType); - if (functionType == NoNameType) + out = DemangleUnqualifiedSymbolName(nameList, nameBackrefList, functionType, backrefEligible); + if (backrefEligible && functionType == NoNameType) { MSVC_TRACE("Pushing backref NameTemplate %s", out.c_str()); nameBackrefList.PushStringBackref(out); } MSVC_TRACE("Demangling Template variables %s\n", reader.GetRaw()); - if (typeNameContext) - { - BackrefList localTemplateBackrefs; - localTemplateBackrefs.typeList = nameBackrefList.typeList; - localTemplateBackrefs.templateList = nameBackrefList.templateList; - localTemplateBackrefs.PushStringBackref(out); - DemangleTemplateParams(params, localTemplateBackrefs, out); - for (const auto& specialization: localTemplateBackrefs.templateList) - nameBackrefList.PushTemplateSpecialization(specialization); - } - else + DemangleTemplateParams(params, nameBackrefList, out); + if (functionType == ConstructorNameType) { - DemangleTemplateParams(params, nameBackrefList, out); + classFunctionType = ConstructorNameType; + pendingConstructorTemplateName = true; } } nameList.insert(nameList.begin(), out); @@ -1570,34 +1798,12 @@ void Demangle::DemangleName(NameList& nameList, bool isScope = (nameList.size() > nameListSizeAtEntry); if (!isScope) classFunctionType = ConstructorNameType; - DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); if (isScope) { - if (reader.Length() > 0 && reader.Peek() != '@') - { - const char* saved = reader.GetRaw(); - try - { - char ft = reader.Read(); - if (ft >= 'A' && ft <= 'Z') - DemangleFunction(NoNameType, - ft != 'C' && ft != 'D' && ft != 'K' && ft != 'L' && - ft != 'S' && ft != 'T' && ft != 'Y' && ft != 'Z', - m_backrefList); - } - catch (...) - { - reader.SetRaw(saved); - while (reader.Length() > 0) - { - char c = reader.Read(); - if (c == 'Z' && reader.Length() > 0 && reader.Peek() == '@') - break; - } - } - } + appendLocalScope(1); break; } + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); if (nameList.size() == 0) throw DemangleException(); nameList.push_back(nameList[nameList.size()-1]); @@ -1609,34 +1815,12 @@ void Demangle::DemangleName(NameList& nameList, bool isScope = (nameList.size() > nameListSizeAtEntry); if (!isScope) classFunctionType = ConstructorNameType; - DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); if (isScope) { - if (reader.Length() > 0 && reader.Peek() != '@') - { - const char* saved = reader.GetRaw(); - try - { - char ft = reader.Read(); - if (ft >= 'A' && ft <= 'Z') - DemangleFunction(NoNameType, - ft != 'C' && ft != 'D' && ft != 'K' && ft != 'L' && - ft != 'S' && ft != 'T' && ft != 'Y' && ft != 'Z', - m_backrefList); - } - catch (...) - { - reader.SetRaw(saved); - while (reader.Length() > 0) - { - char c = reader.Read(); - if (c == 'Z' && reader.Length() > 0 && reader.Peek() == '@') - break; - } - } - } + appendLocalScope(2); break; } + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); if (nameList.size() == 0) throw DemangleException(); nameList.push_back("~" + nameList[nameList.size()-1]); @@ -1650,47 +1834,23 @@ void Demangle::DemangleName(NameList& nameList, case NameReturn: { MSVC_TRACE("NameReturn\n"); - // Check if this is actually a multi-char scope specifier (?B@??) - // rather than a conversion operator. In scope context, ?B followed by - // a name component and then @?? indicates a scope index, not operator B. if (nameList.size() > nameListSizeAtEntry && reader.Length() >= 1) { - // Read the "conversion type" which is really a scope suffix - _STD_STRING scopeName; - DemangleNameTypeString(scopeName); - nameBackrefList.PushStringBackref(scopeName); - nameList.insert(nameList.begin(), scopeName); - // Check if this is a scope followed by ?? - if (reader.Length() >= 2 && reader.Peek() == '?' && reader.PeekAt(1) == '?') + const char* saved = reader.GetRaw(); + try { - // Scope specifier — parse the scoped function - DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); - if (reader.Length() > 0 && reader.Peek() != '@') + _STD_STRING scopeSuffix; + DemangleNameTypeString(scopeSuffix); + if (reader.Length() >= 2 && reader.Peek() == '?' && reader.PeekAt(1) == '?') { - const char* saved = reader.GetRaw(); - try - { - char ft = reader.Read(); - if (ft >= 'A' && ft <= 'Z') - DemangleFunction(NoNameType, - ft != 'C' && ft != 'D' && ft != 'K' && ft != 'L' && - ft != 'S' && ft != 'T' && ft != 'Y' && ft != 'Z', - m_backrefList); - } - catch (...) - { - reader.SetRaw(saved); - while (reader.Length() > 0) - { - char c = reader.Read(); - if (c == 'Z' && reader.Length() > 0 && reader.Peek() == '@') - break; - } - } + appendLocalScope(decodeEncodedNumber("B" + scopeSuffix)); + break; } - break; } - break; + catch (...) + { + } + reader.SetRaw(saved); } classFunctionType = OperatorReturnTypeNameType; if (reader.PeekMatch("?$", 2)) @@ -1717,6 +1877,7 @@ void Demangle::DemangleName(NameList& nameList, if (reader.Peek() == '@') { reader.Consume(); + finalizeConstructorTemplateName(); return; } } @@ -1855,7 +2016,8 @@ void Demangle::DemangleModifiers(bool& _const, bool& _volatile, bool &isMember) } -DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& nameBackrefList, int funcClass) +DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& nameBackrefList, + int funcClass, bool includeImplicitThis) { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; @@ -1882,6 +2044,9 @@ DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool DemangleNumber(vbOffsetOffset); DemangleNumber(vtorDispOffset); DemangleNumber(staticOffset); + vbptrOffset = SignExtendInt32(vbptrOffset); + vbOffsetOffset = SignExtendInt32(vbOffsetOffset); + vtorDispOffset = SignExtendInt32(vtorDispOffset); m_varName.back() += "`vtordispex{" + to_string(vbptrOffset) + ", " + to_string(vbOffsetOffset) + ", " + to_string(vtorDispOffset) + ", " + to_string(staticOffset) + "}'"; } else @@ -1890,6 +2055,7 @@ DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool int64_t staticOffset; DemangleNumber(vtorDispOffset); DemangleNumber(staticOffset); + vtorDispOffset = SignExtendInt32(vtorDispOffset); m_varName.back() += "`vtordisp{" + to_string(vtorDispOffset) + ", " + to_string(staticOffset) + "}'"; } } @@ -1930,6 +2096,10 @@ DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool MSVC_TRACE("Demangle function return type %s", reader.GetRaw()); returnType = DemangleVarType(nameBackrefList, true, name); MSVC_TRACE("Return type: %s", returnType.GetString().c_str()); + // '...' (varargs) is only legal as the trailing parameter marker, + // never as a return type. Reject so we don't build a bogus type. + if (returnType.GetClass() == VarArgsTypeClass) + throw DemangleException("Varargs ('Z') is not a valid function return type"); if (hasModifiers) { returnType.SetConst(return_const); @@ -1943,11 +2113,11 @@ DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool MSVC_TRACE("\tDemangle Function Parameters %s", reader.GetRaw()); vector params; bool needsThisPtr = false; - if (cc == ThisCallCallingConvention) + if (includeImplicitThis && cc == ThisCallCallingConvention) { needsThisPtr = true; } - if (funcClass != NoneFunctionClass) + if (includeImplicitThis && funcClass != NoneFunctionClass) { if ((funcClass & VirtualFunctionClass) == VirtualFunctionClass || (funcClass & StaticThunkFunctionClass) == StaticThunkFunctionClass @@ -1962,18 +2132,7 @@ DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool } } - if (needsThisPtr) - { - // Insert implicit "this" parameter for thiscall - NameList thisName = m_varName; - if (classFunctionType != OperatorReturnTypeNameType && !thisName.empty()) - thisName.pop_back(); - auto thisNamedType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, thisName); - auto thisPtrType = DemangledTypeNode::PointerType(m_arch, std::move(thisNamedType), false, false, PointerReferenceType); - params.push_back(DemangledTypeNode::Param{"this", std::make_shared(std::move(thisPtrType))}); - } - - DemangleVariableList(params, m_backrefList); + DemangleVariableList(params, nameBackrefList); if (params.size() >= 1 && params.back().type && params.back().type->GetClass() == VoidTypeClass) params.pop_back(); @@ -1986,18 +2145,27 @@ DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool newType.SetPointerSuffixBits(suffix); newType.SetNameType(classFunctionType); newType.SetCallingConventionName(cc); + if (needsThisPtr) + { + NameList thisName = m_varName; + if (classFunctionType != OperatorReturnTypeNameType && !thisName.empty()) + thisName.pop_back(); + auto thisNamedType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, std::move(thisName)); + newType.SetImplicitThisParameter(DemangledTypeNode::PointerType( + m_arch, std::move(thisNamedType), false, false, PointerReferenceType)); + } MSVC_TRACE("Successfully Created Function Type!\n"); return newType; } -DemangledTypeNode Demangle::DemangleData() +DemangledTypeNode Demangle::DemangleData(BackrefList& varList) { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; NameList name; - DemangledTypeNode newType = DemangleVarType(m_backrefList, false, name); + DemangledTypeNode newType = DemangleVarType(varList, false, name); auto suffix = DemanglePointerSuffix(); DemangleModifiers(_const, _volatile, isMember); if (newType.GetClass() != PointerTypeClass) @@ -2027,7 +2195,7 @@ DemangledTypeNode Demangle::DemanagleRTTI(BNNameType nameType) } -DemangledTypeNode Demangle::DemangleVTable() +DemangledTypeNode Demangle::DemangleVTable(BackrefList& nameBackrefList) { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; @@ -2037,7 +2205,7 @@ DemangledTypeNode Demangle::DemangleVTable() { NameList typeName; BNNameType classFunctionType = NoNameType; - DemangleName(typeName, classFunctionType, m_backrefList, true); + DemangleName(typeName, classFunctionType, nameBackrefList, true); string suffix = m_varName.back(); m_varName.back() += "{for `" + JoinNameList(typeName) + "'}"; @@ -2051,8 +2219,147 @@ DemangledTypeNode Demangle::DemangleVTable() } +// ??__E (dynamic initializer) / ??__F (dynamic atexit destructor). +// +// LLVM dispatches these at the top level via demangleSpecialIntrinsic --> +// demangleInitFiniStub. The mangling wraps another symbol (either a variable +// or a function) and emits a new function stub that initializes/destroys it: +// +// ??__E function form, e.g. ??__Efoo@@YAXXZ +// ??__E?@@ variable form, e.g. ??__E?foo@@3HA@@YAXXZ +// +// LLVM's output places the descriptor (`dynamic initializer for ''`) +// at file scope — not as a member of the target's enclosing class — and +// interpolates the target name inside backticks/quotes. For the variable +// form, it additionally renders the variable's type inside the inner +// backtick pair: `dynamic initializer for `int foo''. +Demangle::DemangleContext Demangle::DemangleDynamicInitFini(bool isDtor, BackrefList& backrefList) +{ + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + + // /d2FH4 may replace a long wrapped target with an MD5 name (??@@). + // Parse it before the optional '?' marker below; otherwise the first '?' + // of the hash spelling is mistaken for IsKnownStaticDataMember. + NameList innerNameList; + BNNameType innerClassFunctionType = NoNameType; + bool isMD5Name = false; + if (reader.Length() >= 3 && reader.PeekMatch("??@", 3)) + { + reader.Consume(3); + _STD_STRING hash = reader.ReadUntil('@'); + innerNameList.push_back("??@" + hash + "@"); + isMD5Name = true; + } + + // Optional leading '?' flags the "known static data member" form. LLVM + // calls this IsKnownStaticDataMember — when present, the mangling is + // required to carry two trailing '@' before the outer function encoding + // rather than one. + bool isKnownStaticDataMember = false; + if (!isMD5Name && reader.Length() > 0 && reader.Peek() == '?') + { + reader.Consume(); + isKnownStaticDataMember = true; + } + + // Parse the inner symbol's qualified name exactly as any other symbol + // would. DemangleName handles locally-scoped pieces, anonymous namespaces, + // templates, etc. so a target like + // instance@?1??Get@Globals@@SAAEAU1@XZ@ + // resolves correctly. + if (!isMD5Name) + DemangleName(innerNameList, innerClassFunctionType, backrefList); + + const char* prefix = isDtor + ? "`dynamic atexit destructor for " + : "`dynamic initializer for "; + BNNameType classFunctionType = isDtor + ? DynamicAtExitDestructorNameType + : DynamicInitializerNameType; + + _STD_STRING descriptor; + + if (reader.Length() == 0) + throw DemangleException("Truncated ??__E/??__F"); + + char next = reader.Peek(); + if (next >= '0' && next <= '4') + { + // Variable form: <@-terminators> + // . We don't attach the storage class to + // anything — it exists only to disambiguate variable-vs-function + // inside the wrapper and to match the mangling grammar. + reader.Consume(); // storage class + DemangledTypeNode varType = DemangleData(backrefList); + _STD_STRING varTypeStr = varType.GetString(); + _STD_STRING innerJoined = JoinNameList(innerNameList); + descriptor = _STD_STRING(prefix) + "`" + varTypeStr + " " + innerJoined + "''"; + + // Consume the @-terminators between the inner variable encoding and + // the outer function encoding. LLVM requires two when the optional + // leading '?' was present, one otherwise. + int atCount = isKnownStaticDataMember ? 2 : 1; + for (int i = 0; i < atCount; i++) + { + if (reader.Length() == 0 || reader.Read() != '@') + throw DemangleException("Expected '@' terminator in ??__E/??__F variable form"); + } + } + else + { + // Function form: the inner symbol's function encoding follows + // directly. The outer stub reuses that encoding (there's no separate + // outer signature). + if (isKnownStaticDataMember) + throw DemangleException("??__E/??__F with leading '?' but no variable form"); + if (isMD5Name) + { + while (reader.Length() > 0 && reader.Peek() == '@') + reader.Consume(); + } + _STD_STRING innerJoined = JoinNameList(innerNameList); + descriptor = _STD_STRING(prefix) + "'" + innerJoined + "''"; + } + + // Replace the symbol's qualified name with just the descriptor — this is + // what puts the output at file scope with no enclosing class prefix. + m_varName = { descriptor }; + + // Parse the outer function encoding. MSVC emits a global cdecl stub + // ('Y'/'Z') in practice but we dispatch through the full table for + // robustness (private/public/static/etc.). + if (reader.Length() == 0) + throw DemangleException("Truncated ??__E/??__F outer function encoding"); + char funcType = reader.Read(); + switch (funcType) + { + case 'A': return { DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; + case 'B': return { DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; + case 'C': return { DemangleFunction(classFunctionType, false, backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; + case 'D': return { DemangleFunction(classFunctionType, false, backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; + case 'I': return { DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; + case 'J': return { DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; + case 'K': return { DemangleFunction(classFunctionType, false, backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; + case 'L': return { DemangleFunction(classFunctionType, false, backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; + case 'Q': return { DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass), PublicAccess, NoScope }; + case 'R': return { DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass), PublicAccess, NoScope }; + case 'S': return { DemangleFunction(classFunctionType, false, backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; + case 'T': return { DemangleFunction(classFunctionType, false, backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; + case 'Y': return { DemangleFunction(classFunctionType, false, backrefList, GlobalFunctionClass), NoAccess, NoScope }; + case 'Z': return { DemangleFunction(classFunctionType, false, backrefList, GlobalFunctionClass), NoAccess, NoScope }; + default: + throw DemangleException(_STD_STRING("Unexpected outer function type '") + funcType + "' in ??__E/??__F"); + } +} + Demangle::DemangleContext Demangle::DemangleSymbol() +{ + return DemangleSymbol(m_backrefList); +} + + +Demangle::DemangleContext Demangle::DemangleSymbol(BackrefList& backrefList) { MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); BNNameType classFunctionType = NoNameType; @@ -2079,28 +2386,21 @@ Demangle::DemangleContext Demangle::DemangleSymbol() return { DemangledTypeNode::VoidType(), NoAccess, NoScope }; } - DemangleName(varName, classFunctionType, m_backrefList); - MSVC_TRACE("Done demangling Name: '%s' - '%s'", JoinNameList(varName).c_str(), reader.GetRaw()); - m_varName = varName; - - if ((classFunctionType == DynamicInitializerNameType || classFunctionType == DynamicAtExitDestructorNameType) - && m_varName.size() >= 2) + // Special intrinsics dispatched at the top level (matches LLVM's + // demangleSpecialIntrinsic). ??__E/??__F have a non-uniform grammar + // that the normal DemangleName scope-chain loop can't express — the + // bytes after the code are a wrapped inner symbol, not scope prefixes. + if (reader.Length() >= 4 && (reader.PeekMatch("?__E", 4) || reader.PeekMatch("?__F", 4))) { - string target; - for (size_t i = 0; i + 1 < m_varName.size(); i++) - { - if (!target.empty()) - target += "::"; - target += m_varName[i]; - } - - string displayName = m_varName.back(); - if (!displayName.empty() && displayName.back() == '\'') - displayName.pop_back(); - displayName = fmt::bnformat("{} for '{}''", displayName, target); - m_varName = {displayName}; + bool isDtor = reader.PeekAt(3) == 'F'; + reader.Consume(4); // consume ?__E or ?__F + return DemangleDynamicInitFini(isDtor, backrefList); } + DemangleName(varName, classFunctionType, backrefList); + MSVC_TRACE("Done demangling Name: '%s' - '%s'", JoinNameList(varName).c_str(), reader.GetRaw()); + m_varName = varName; + DemangleContext context; if (classFunctionType == StringNameType) @@ -2109,45 +2409,60 @@ Demangle::DemangleContext Demangle::DemangleSymbol() return context; } + // ??__J (local static thread guard) is a *variable*, not a function, per + // LLVM's demangleLocalStaticGuard. The only valid storage-class suffixes + // are '4' (IsVisible=false, followed by type encoding like 'IA' for int) + // and '5' (IsVisible=true). Any other byte here indicates a malformed + // symbol — e.g. the function-form ??__JFoo@@YAXXZ that earlier permissive + // code would accept and misrender. + if (classFunctionType == LocalStaticThreadGuardNameType) + { + if (reader.Length() == 0) + throw DemangleException("Truncated ??__J"); + char next = reader.Peek(); + if (next != '4' && next != '5') + throw DemangleException("??__J requires variable storage class ('4' or '5'), got '" + _STD_STRING(1, next) + "'"); + } + char funcType = reader.Read(); switch(funcType) { - case '0': context = {DemangleData(), PrivateAccess, StaticScope }; break; - case '1': context = {DemangleData(), ProtectedAccess, StaticScope }; break; - case '2': context = {DemangleData(), PublicAccess, StaticScope }; break; - case '3': context = {DemangleData(), NoAccess, NoScope }; break; - case '4': context = {DemangleData(), NoAccess, NoScope }; break; - case '5': context = {DemangleVTable(), NoAccess, NoScope }; break; - case '6': context = {DemangleVTable(), NoAccess, NoScope }; break; - case '7': context = {DemangleVTable(), NoAccess, NoScope }; break; + case '0': context = {DemangleData(backrefList), PrivateAccess, StaticScope }; break; + case '1': context = {DemangleData(backrefList), ProtectedAccess, StaticScope }; break; + case '2': context = {DemangleData(backrefList), PublicAccess, StaticScope }; break; + case '3': context = {DemangleData(backrefList), NoAccess, NoScope }; break; + case '4': context = {DemangleData(backrefList), NoAccess, NoScope }; break; + case '5': context = {DemangleVTable(backrefList), NoAccess, NoScope }; break; + case '6': context = {DemangleVTable(backrefList), NoAccess, NoScope }; break; + case '7': context = {DemangleVTable(backrefList), NoAccess, NoScope }; break; case '8': context = {DemanagleRTTI(classFunctionType), NoAccess, NoScope }; break; case '9': context = {DemanagleRTTI(classFunctionType), NoAccess, NoScope }; break; - case 'A': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; - case 'B': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; - case 'C': context = {DemangleFunction(classFunctionType, false, m_backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; - case 'D': context = {DemangleFunction(classFunctionType, false, m_backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; - case 'E': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; - case 'F': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; - case 'G': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; - case 'H': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; - case 'I': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; - case 'J': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; - case 'K': context = {DemangleFunction(classFunctionType, false, m_backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; - case 'L': context = {DemangleFunction(classFunctionType, false, m_backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; - case 'M': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; - case 'N': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; - case 'O': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; - case 'P': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; - case 'Q': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; - case 'R': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; - case 'S': context = {DemangleFunction(classFunctionType, false, m_backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; - case 'T': context = {DemangleFunction(classFunctionType, false, m_backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; - case 'U': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; - case 'V': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; - case 'W': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; - case 'X': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; - case 'Y': context = {DemangleFunction(classFunctionType, false, m_backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; - case 'Z': context = {DemangleFunction(classFunctionType, false, m_backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; + case 'A': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; + case 'B': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; + case 'C': context = {DemangleFunction(classFunctionType, false, backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; + case 'D': context = {DemangleFunction(classFunctionType, false, backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; + case 'E': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; + case 'F': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; + case 'G': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; + case 'H': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; + case 'I': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; + case 'J': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; + case 'K': context = {DemangleFunction(classFunctionType, false, backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; + case 'L': context = {DemangleFunction(classFunctionType, false, backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; + case 'M': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; + case 'N': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; + case 'O': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; + case 'P': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; + case 'Q': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; + case 'R': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; + case 'S': context = {DemangleFunction(classFunctionType, false, backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; + case 'T': context = {DemangleFunction(classFunctionType, false, backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; + case 'U': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; + case 'V': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; + case 'W': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; + case 'X': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; + case 'Y': context = {DemangleFunction(classFunctionType, false, backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; + case 'Z': context = {DemangleFunction(classFunctionType, false, backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; case '$': { if (reader.Peek() == 'B') @@ -2174,12 +2489,12 @@ Demangle::DemangleContext Demangle::DemangleSymbol() char thunkType = reader.Read(); switch (thunkType) { - case '0': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; - case '1': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; - case '2': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; - case '3': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; - case '4': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; - case '5': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; + case '0': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; + case '1': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; + case '2': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; + case '3': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; + case '4': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; + case '5': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; default: throw DemangleException("Unknown virtual thunk type " + string(1, thunkType)); } break; @@ -2189,6 +2504,12 @@ Demangle::DemangleContext Demangle::DemangleSymbol() return context; } +std::pair, QualifiedName> Demangle::Finalize() +{ + DemangleContext context = DemangleSymbol(); + return {context.type.Finalize(), QualifiedName(m_varName)}; +} + bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, QualifiedName& outVarName, const Ref& view) { @@ -2217,10 +2538,9 @@ bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, { // Can't use thread_local here — BinaryView overload needs platform/view state Demangle demangle(view, mangledName); - // For now we're throwing away MemberScope and MemberAccess - outType = demangle.DemangleSymbol().type.Finalize(); - outVarName = demangle.GetVarName(); - + auto result = demangle.Finalize(); + outType = std::move(result.first); + outVarName = std::move(result.second); } catch (DemangleException &e) { diff --git a/demangler/msvc/demangle_msvc.h b/demangler/msvc/demangle_msvc.h index a23dfc192..9e33af190 100644 --- a/demangler/msvc/demangle_msvc.h +++ b/demangler/msvc/demangle_msvc.h @@ -15,6 +15,7 @@ #pragma once #include #include +#include // XXX: Compiled directly into the core for performance reasons // Will still work fine compiled independently, just at about a @@ -69,7 +70,8 @@ class Demangle NameDynamicInitializer, NameDynamicAtExitDestructor, NameLocalStaticThreadGuard, - NameLocalVftable + NameLocalVftable, + NameAnonymousNamespace }; enum FunctionClass @@ -87,6 +89,15 @@ class Demangle VirtualThunkExFunctionClass = 1 << 9, }; +public: + struct DemangleContext + { + DemangledTypeNode type; + BNMemberAccess access; + BNMemberScope scope; + }; + +private: class Reader { public: @@ -152,8 +163,8 @@ class Demangle { public: _STD_VECTOR typeList; - _STD_VECTOR<_STD_STRING> nameList; - _STD_VECTOR<_STD_STRING> templateList; + StringList nameList; + StringList templateList; void Clear() { typeList.clear(); nameList.clear(); templateList.clear(); } const DemangledTypeNode& GetTypeBackref(size_t reference); const _STD_STRING& GetStringBackref(size_t reference); @@ -163,7 +174,7 @@ class Demangle }; // Internal name list type - avoids QualifiedName overhead during parsing - typedef _STD_VECTOR<_STD_STRING> NameList; + typedef StringList NameList; static size_t NameListStringSize(const NameList& nl) { @@ -202,13 +213,16 @@ class Demangle NameType GetNameType(); void RewriteTemplateBackrefName(NameList& typeName, const BackrefList& nameBackrefList) const; - DemangledTypeNode DemangleVarType(BackrefList& varList, bool isReturn, NameList& name); + DemangledTypeNode DemangleReferencedSymbolValue(BackrefList& varList); + DemangledTypeNode DemangleAutoNonTypeTemplateParam(BackrefList& varList); + DemangledTypeNode DemangleVarType(BackrefList& varList, bool isReturn, NameList& name, + bool includeImplicitThis = true); void DemangleNumber(int64_t& num); void DemangleChar(char& ch); void DemangleWideChar(uint16_t& wch); void DemangleModifiers(bool& _const, bool& _volatile, bool& isMember); uint8_t DemanglePointerSuffix(); - void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList); + void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList, bool typeBackrefs = true); void DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, _STD_STRING& out, @@ -221,33 +235,31 @@ class Demangle bool typeNameContext = false); BNCallingConventionName DemangleCallingConvention(); void ConsumeExtendedModifierPrefix(); - DemangledTypeNode DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, int funcClass = NoneFunctionClass); - DemangledTypeNode DemangleData(); + DemangledTypeNode DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, + int funcClass = NoneFunctionClass, bool includeImplicitThis = true); + DemangledTypeNode DemangleData(BackrefList& varList); void DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, _STD_STRING& out); - DemangledTypeNode DemangleVTable(); + DemangledTypeNode DemangleVTable(BackrefList& nameBackrefList); DemangledTypeNode DemanagleRTTI(BNNameType classFunctionType); _STD_STRING DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList); _STD_STRING DemangleTemplateInstantiationName(BackrefList& nameBackrefList); void DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, _STD_STRING& out); - _STD_STRING DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType); + _STD_STRING DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& nameBackrefList, + BNNameType& classFunctionType, bool& backrefEligible); DemangledTypeNode DemangleString(); DemangledTypeNode DemangleTypeInfoName(); + DemangleContext DemangleDynamicInitFini(bool isDtor, BackrefList& backrefList); + DemangleContext DemangleSymbol(BackrefList& backrefList); public: - struct DemangleContext - { - DemangledTypeNode type; - BNMemberAccess access; - BNMemberScope scope; - }; Demangle(BN::Architecture* arch, const _STD_STRING& mangledName); Demangle(BN::Ref view, const _STD_STRING& mangledName); Demangle(BN::Ref platform, const _STD_STRING& mangledName); void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); DemangleContext DemangleSymbol(); - BN::QualifiedName GetVarName() const { return BN::QualifiedName(m_varName); } + std::pair, BN::QualifiedName> Finalize(); // Be careful not to accidentally implicitly cast a BinaryView* to a bool static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, diff --git a/type.cpp b/type.cpp index 5aa679a16..6410b9f11 100644 --- a/type.cpp +++ b/type.cpp @@ -3442,4 +3442,3 @@ fmt::format_context::iterator fmt::formatter::format( return fmt::format_to(ctx.out(), "{}{}", obj.GetStringBeforeName(), obj.GetStringAfterName()); } } - From 4b906ea256c914afba5b6f300aae504c89823985 Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Fri, 1 May 2026 13:19:42 -0400 Subject: [PATCH 4/4] Ensure demangling recovers calling convention correctly --- demangle.cpp | 15 ++++- demangler/gnu3/demangled_type_node.cpp | 7 +- demangler/gnu3/demangled_type_node.h | 2 + demangler/msvc/demangle_msvc.cpp | 93 ++++++++++++++++++++------ demangler/msvc/demangle_msvc.h | 3 + plugins/pdb-ng/src/symbol_parser.rs | 4 +- plugins/rtti/rtti.cpp | 27 +++++--- rust/src/demangle.rs | 40 +++++++++++ view/pe/coffview.cpp | 2 +- view/pe/peview.cpp | 2 +- 10 files changed, 157 insertions(+), 38 deletions(-) diff --git a/demangle.cpp b/demangle.cpp index 8f13c2132..bb7eb1326 100644 --- a/demangle.cpp +++ b/demangle.cpp @@ -47,8 +47,19 @@ namespace BinaryNinja { bool DemangleMS(Architecture* arch, const std::string& mangledName, Ref& outType, QualifiedName& outVarName, BinaryView* view) { - const bool simplify = Settings::Instance()->Get("analysis.types.templateSimplifier", view); - return DemangleMS(arch, mangledName, outType, outVarName, simplify); + BNType* localType = nullptr; + char** localVarName = nullptr; + size_t localSize = 0; + if (!BNDemangleMSWithOptions(arch->GetObject(), mangledName.c_str(), &localType, &localVarName, &localSize, + view ? view->GetObject() : nullptr)) + return false; + outType = localType ? new Type(localType) : nullptr; + for (size_t i = 0; i < localSize; i++) + { + outVarName.push_back(localVarName[i]); + } + BNFreeDemangledName(&localVarName, localSize); + return true; } bool DemangleMS(Architecture* arch, const std::string& mangledName, Ref& outType, QualifiedName& outVarName, diff --git a/demangler/gnu3/demangled_type_node.cpp b/demangler/gnu3/demangled_type_node.cpp index c15e64218..ca69a838d 100644 --- a/demangler/gnu3/demangled_type_node.cpp +++ b/demangler/gnu3/demangled_type_node.cpp @@ -84,7 +84,7 @@ DemangledTypeNode::DemangledTypeNode() : m_typeClass(VoidTypeClass), m_ntrClass(UnknownNamedTypeClass), m_pointerReference(PointerReferenceType), m_nameType(NoNameType), m_callingConventionName(NoCallingConvention), m_pointerSuffixBits(0), - m_returnTypeConfidence(BN_DEFAULT_CONFIDENCE), + m_returnTypeConfidence(BN_FULL_CONFIDENCE), m_const(false), m_volatile(false), m_signed(false), m_hasVariableArgs(false), m_hasTemplateArgs(false), m_width(0), m_isMemberPointer(false), @@ -703,8 +703,11 @@ Ref DemangledTypeNode::Finalize() const Ref pType = p.type ? p.type->Finalize() : Ref(Type::VoidType()); finalParams.push_back({p.name, pType, true, Variable()}); } + Confidence> callingConvention; + if (m_callingConvention) + callingConvention = Confidence>(m_callingConvention, BN_FULL_CONFIDENCE); TypeBuilder tb = TypeBuilder::FunctionType( - retType->WithConfidence(static_cast(m_returnTypeConfidence)), nullptr, finalParams, + retType->WithConfidence(static_cast(m_returnTypeConfidence)), callingConvention, finalParams, Confidence(m_hasVariableArgs, m_hasVariableArgs ? BN_DEFAULT_CONFIDENCE : 0)); tb.SetConst(m_const); tb.SetVolatile(m_volatile); diff --git a/demangler/gnu3/demangled_type_node.h b/demangler/gnu3/demangled_type_node.h index 54bab1c87..c3ee08f8b 100644 --- a/demangler/gnu3/demangled_type_node.h +++ b/demangler/gnu3/demangled_type_node.h @@ -111,6 +111,7 @@ class DemangledTypeNode void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffixBits |= PointerSuffixBit(ps); } void SetReturnTypeConfidence(int8_t c) { m_returnTypeConfidence = c; } void SetCallingConventionName(BNCallingConventionName cc) { m_callingConventionName = cc; } + void SetCallingConvention(BN::Ref cc) { m_callingConvention = std::move(cc); } void SetNTRType(BNNamedTypeReferenceClass cls) { m_ntrClass = cls; } void SetImplicitThisParameter(DemangledTypeNode type); @@ -129,6 +130,7 @@ class DemangledTypeNode BNReferenceType m_pointerReference; BNNameType m_nameType; BNCallingConventionName m_callingConventionName; + BN::Ref m_callingConvention; uint8_t m_pointerSuffixBits; uint8_t m_returnTypeConfidence; bool m_const; diff --git a/demangler/msvc/demangle_msvc.cpp b/demangler/msvc/demangle_msvc.cpp index a12ba970d..86fb8e136 100644 --- a/demangler/msvc/demangle_msvc.cpp +++ b/demangler/msvc/demangle_msvc.cpp @@ -1914,6 +1914,38 @@ BNCallingConventionName Demangle::DemangleCallingConvention() } +Ref Demangle::ResolveCallingConvention(BNCallingConventionName cc) const +{ + switch (cc) + { + case CdeclCallingConvention: + if (m_platform && m_platform->GetCdeclCallingConvention()) + return m_platform->GetCdeclCallingConvention(); + if (m_arch && m_arch->GetCdeclCallingConvention()) + return m_arch->GetCdeclCallingConvention(); + return m_arch ? m_arch->GetCallingConventionByName("cdecl") : nullptr; + case STDCallCallingConvention: + if (m_platform && m_platform->GetStdcallCallingConvention()) + return m_platform->GetStdcallCallingConvention(); + if (m_arch && m_arch->GetStdcallCallingConvention()) + return m_arch->GetStdcallCallingConvention(); + return m_arch ? m_arch->GetCallingConventionByName("stdcall") : nullptr; + case FastcallCallingConvention: + if (m_platform && m_platform->GetFastcallCallingConvention()) + return m_platform->GetFastcallCallingConvention(); + if (m_arch && m_arch->GetFastcallCallingConvention()) + return m_arch->GetFastcallCallingConvention(); + return m_arch ? m_arch->GetCallingConventionByName("fastcall") : nullptr; + case ThisCallCallingConvention: + if (m_arch) + return m_arch->GetCallingConventionByName("thiscall"); + return nullptr; + default: + return nullptr; + } +} + + void Demangle::ConsumeExtendedModifierPrefix() { while (reader.PeekMatch("$A", 2)) @@ -2112,25 +2144,10 @@ DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool MSVC_TRACE("\tDemangle Function Parameters %s", reader.GetRaw()); vector params; - bool needsThisPtr = false; - if (includeImplicitThis && cc == ThisCallCallingConvention) - { - needsThisPtr = true; - } - if (includeImplicitThis && funcClass != NoneFunctionClass) - { - if ((funcClass & VirtualFunctionClass) == VirtualFunctionClass - || (funcClass & StaticThunkFunctionClass) == StaticThunkFunctionClass - || (funcClass & VirtualThunkFunctionClass) == VirtualThunkFunctionClass) - { - needsThisPtr = true; - } - else if ((funcClass & StaticFunctionClass) != StaticFunctionClass - && (funcClass & GlobalFunctionClass) != GlobalFunctionClass) - { - needsThisPtr = true; - } - } + bool needsThisPtr = includeImplicitThis + && funcClass != NoneFunctionClass + && (funcClass & StaticFunctionClass) != StaticFunctionClass + && (funcClass & GlobalFunctionClass) != GlobalFunctionClass; DemangleVariableList(params, nameBackrefList); @@ -2145,12 +2162,14 @@ DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool newType.SetPointerSuffixBits(suffix); newType.SetNameType(classFunctionType); newType.SetCallingConventionName(cc); + if (auto callingConvention = ResolveCallingConvention(cc)) + newType.SetCallingConvention(callingConvention); if (needsThisPtr) { NameList thisName = m_varName; if (classFunctionType != OperatorReturnTypeNameType && !thisName.empty()) thisName.pop_back(); - auto thisNamedType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, std::move(thisName)); + auto thisNamedType = DemangledTypeNode::NamedType(TypedefNamedTypeClass, std::move(thisName)); newType.SetImplicitThisParameter(DemangledTypeNode::PointerType( m_arch, std::move(thisNamedType), false, false, PointerReferenceType)); } @@ -2516,6 +2535,8 @@ bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref(view)); return DemangleMS(arch, mangledName, outType, outVarName); } +bool Demangle::DemangleMS(Platform* platform, const string& mangledName, Ref& outType, + QualifiedName& outVarName) +{ + outType = nullptr; + if (!platform || mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) + return false; + try + { + Demangle demangle(Ref(platform), mangledName); + auto result = demangle.Finalize(); + outType = std::move(result.first); + outVarName = std::move(result.second); + } + catch (DemangleException &e) + { + LogDebugForException(e, "Demangling Failed '%s' '%s;", mangledName.c_str(), e.what()); + return false; + } + return true; +} + bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, QualifiedName& outVarName) { @@ -2573,6 +2617,15 @@ bool Demangle::DemangleMS(const string& mangledName, Ref& outType, return true; } +bool Demangle::DemangleMS(const string& mangledName, Ref& outType, + QualifiedName& outVarName, BinaryView* view) +{ + outType = nullptr; + if (!view) + return false; + return DemangleMS(mangledName, outType, outVarName, Ref(view)); +} + class MSDemangler: public Demangler { diff --git a/demangler/msvc/demangle_msvc.h b/demangler/msvc/demangle_msvc.h index 9e33af190..31256450b 100644 --- a/demangler/msvc/demangle_msvc.h +++ b/demangler/msvc/demangle_msvc.h @@ -234,6 +234,7 @@ class Demangle BackrefList& nameBackrefList, bool typeNameContext = false); BNCallingConventionName DemangleCallingConvention(); + BN::Ref ResolveCallingConvention(BNCallingConventionName cc) const; void ConsumeExtendedModifierPrefix(); DemangledTypeNode DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, int funcClass = NoneFunctionClass, bool includeImplicitThis = true); @@ -266,6 +267,8 @@ class Demangle BN::QualifiedName& outVarName, const BN::Ref& view); static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, BN::BinaryView* view); + static bool DemangleMS(BN::Platform* platform, const _STD_STRING& mangledName, BN::Ref& outType, + BN::QualifiedName& outVarName); static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName); diff --git a/plugins/pdb-ng/src/symbol_parser.rs b/plugins/pdb-ng/src/symbol_parser.rs index a4ee4a6c6..b7f53b596 100644 --- a/plugins/pdb-ng/src/symbol_parser.rs +++ b/plugins/pdb-ng/src/symbol_parser.rs @@ -39,7 +39,7 @@ use crate::PDBParserInstance; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterId}; use binaryninja::binary_view::BinaryViewBase; use binaryninja::confidence::{Conf, MAX_CONFIDENCE, MIN_CONFIDENCE}; -use binaryninja::demangle::demangle_ms; +use binaryninja::demangle::demangle_ms_with_view; use binaryninja::rc::Ref; use binaryninja::types::{FunctionParameter, QualifiedName, StructureBuilder, Type, TypeClass}; use binaryninja::variable::{Variable, VariableSourceType}; @@ -1820,7 +1820,7 @@ impl<'a, S: Source<'a> + 'a> PDBParserInstance<'a, S> { raw_name: &String, rva: Rva, ) -> Result<(Option>>, Option)> { - let (mut t, mut name) = match demangle_ms(&self.arch, raw_name, true) { + let (mut t, mut name) = match demangle_ms_with_view(&self.arch, raw_name, Some(self.bv)) { Some((name, Some(t))) => (Some(Conf::new(t, DEMANGLE_CONFIDENCE)), name), Some((name, _)) => (None, name), _ => (None, QualifiedName::new(vec![raw_name.clone()])), diff --git a/plugins/rtti/rtti.cpp b/plugins/rtti/rtti.cpp index fde5a6ab4..f713aef77 100644 --- a/plugins/rtti/rtti.cpp +++ b/plugins/rtti/rtti.cpp @@ -3,6 +3,20 @@ using namespace BinaryNinja; using namespace BinaryNinja::RTTI; +namespace +{ + std::string NormalizeRTTIClassName(std::string name) + { + size_t beginFind = name.find_first_of(' '); + if (beginFind != std::string::npos) + name.erase(0, beginFind + 1); + size_t endFind = name.find(" `RTTI Type Descriptor Name'"); + if (endFind != std::string::npos) + name.erase(endFind, name.length()); + return name; + } +} + Ref RTTI::GetRealSymbol(BinaryView *view, uint64_t relocAddr, uint64_t symAddr) { @@ -24,9 +38,9 @@ std::optional RTTI::DemangleNameMS(BinaryView* view, bool allowMang { QualifiedName demangledName = {}; Ref outType = {}; - if (!DemangleMS(view->GetDefaultArchitecture(), mangledName, outType, demangledName, true)) + if (!DemangleMS(view->GetDefaultArchitecture(), mangledName, outType, demangledName, view)) return DemangleNameLLVM(allowMangled, mangledName); - return demangledName.GetString(); + return NormalizeRTTIClassName(demangledName.GetString()); } @@ -90,14 +104,7 @@ std::optional RTTI::DemangleNameLLVM(bool allowMangled, const std:: Ref outType = {}; if (!DemangleLLVM(mangledName, demangledName, true)) return allowMangled ? std::optional(mangledName) : std::nullopt; - auto demangledNameStr = demangledName.GetString(); - size_t beginFind = demangledNameStr.find_first_of(' '); - if (beginFind != std::string::npos) - demangledNameStr.erase(0, beginFind + 1); - size_t endFind = demangledNameStr.find(" `RTTI Type Descriptor Name'"); - if (endFind != std::string::npos) - demangledNameStr.erase(endFind, demangledNameStr.length()); - return demangledNameStr; + return NormalizeRTTIClassName(demangledName.GetString()); } diff --git a/rust/src/demangle.rs b/rust/src/demangle.rs index 1f9f8941c..d6aad25ce 100644 --- a/rust/src/demangle.rs +++ b/rust/src/demangle.rs @@ -165,6 +165,46 @@ pub fn demangle_ms( } } +pub fn demangle_ms_with_view( + arch: &CoreArchitecture, + mangled_name: &str, + view: Option<&BinaryView>, +) -> Option<(QualifiedName, Option>)> { + let mangled_name = mangled_name.to_cstr(); + let mut out_type: *mut BNType = std::ptr::null_mut(); + let mut out_name: *mut *mut std::os::raw::c_char = std::ptr::null_mut(); + let mut out_size: usize = 0; + let res = unsafe { + BNDemangleMSWithOptions( + arch.handle, + mangled_name.as_ptr(), + &mut out_type, + &mut out_name, + &mut out_size, + view.map(|v| v.handle).unwrap_or(std::ptr::null_mut()), + ) + }; + + match res { + true => { + assert!(!out_name.is_null()); + let names: Vec<_> = unsafe { ArrayGuard::::new(out_name, out_size, ()) } + .iter() + .map(str::to_string) + .collect(); + unsafe { BNFreeDemangledName(&mut out_name, out_size) }; + + let out_type = match out_type.is_null() { + true => None, + false => Some(unsafe { Type::ref_from_raw(out_type) }), + }; + + Some((names.into(), out_type)) + } + false => None, + } +} + #[derive(PartialEq, Eq, Hash)] pub struct Demangler { pub(crate) handle: *mut BNDemangler, diff --git a/view/pe/coffview.cpp b/view/pe/coffview.cpp index e009b91db..a36aff64a 100644 --- a/view/pe/coffview.cpp +++ b/view/pe/coffview.cpp @@ -1531,7 +1531,7 @@ void COFFView::AddCOFFSymbol(BNSymbolType type, const string& dll, const string& { QualifiedName demangledName; Ref demangledType; - if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, nullptr, m_simplifyTemplates)) + if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, this, m_simplifyTemplates)) { shortName = demangledName.GetString(); fullName = shortName; diff --git a/view/pe/peview.cpp b/view/pe/peview.cpp index 718822ba9..7fb74b57b 100644 --- a/view/pe/peview.cpp +++ b/view/pe/peview.cpp @@ -3562,7 +3562,7 @@ void PEView::AddPESymbol(BNSymbolType type, const string& dll, const string& nam { QualifiedName demangledName; Ref demangledType; - if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, nullptr, m_simplifyTemplates)) + if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, this, m_simplifyTemplates)) { shortName = demangledName.GetString(); fullName = shortName;