From 4ea37e18bfed8de162dfb816971417a0679b602f Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Wed, 7 May 2025 18:22:50 -0700 Subject: [PATCH 1/3] Implement a Pseudo Objective-C language representation This is implemented in the Pseudo C plug-in as it shares 99% of the logic. The Objective-C support is implemented in a subclass of `PseudoCFunction`. The handling of instruction types that the Objective-C representation needs to customize is extracted into virtual functions that the Objective-C subclass overrides. This currently supports: * Rewriting `objc_msgSend` / `objc_msgSendSuper2` with constant selectors to `[receiver message]` notation. * Rewriting calls to `objc_alloc` / `objc_alloc_init` / `objc_new` to the equivalent message send notation. * Rewriting `objc_retain` / `objc_release` and friends to the equivalent message send notation. * Displaying Objective-C class references as their class names rather than `_OBJC_CLASS_$_` symbol names. * Displaying Objective-C string literals as `@"..."`. This works best when used in conjunction with https://github.com/bdash/bn-objc-extras as the reference counting runtime calls add so much clutter. --- lang/c/plugin.cpp | 30 +++ lang/c/pseudoc.cpp | 192 +++++++++---------- lang/c/pseudoc.h | 13 ++ lang/c/pseudoobjc.cpp | 421 ++++++++++++++++++++++++++++++++++++++++++ lang/c/pseudoobjc.h | 44 +++++ 5 files changed, 596 insertions(+), 104 deletions(-) create mode 100644 lang/c/plugin.cpp create mode 100644 lang/c/pseudoobjc.cpp create mode 100644 lang/c/pseudoobjc.h diff --git a/lang/c/plugin.cpp b/lang/c/plugin.cpp new file mode 100644 index 0000000000..721cfc1987 --- /dev/null +++ b/lang/c/plugin.cpp @@ -0,0 +1,30 @@ +#include "binaryninjaapi.h" +#include "pseudoc.h" +#include "pseudoobjc.h" + +using namespace BinaryNinja; + +extern "C" +{ + BN_DECLARE_CORE_ABI_VERSION + +#ifndef DEMO_EDITION + BINARYNINJAPLUGIN void CorePluginDependencies() + { + } +#endif + +#ifdef DEMO_EDITION + bool PseudoCPluginInit() +#else + BINARYNINJAPLUGIN bool CorePluginInit() +#endif + { + LanguageRepresentationFunctionType* type = new PseudoCFunctionType(); + LanguageRepresentationFunctionType::Register(type); + + type = new PseudoObjCFunctionType(); + LanguageRepresentationFunctionType::Register(type); + return true; + } +} diff --git a/lang/c/pseudoc.cpp b/lang/c/pseudoc.cpp index 7046c6d04e..fdc044fc20 100644 --- a/lang/c/pseudoc.cpp +++ b/lang/c/pseudoc.cpp @@ -976,73 +976,11 @@ void PseudoCFunction::GetExprTextInternal(const HighLevelILInstruction& instr, H break; case HLIL_CALL: - [&]() { - const auto destExpr = instr.GetDestExpr(); - const auto parameterExprs = instr.GetParameterExprs(); - - GetExprTextInternal(destExpr, tokens, settings, MemberAndFunctionOperatorPrecedence); - tokens.AppendOpenParen(); - - vector namedParams; - Ref functionType = instr.GetDestExpr().GetType(); - if (functionType && (functionType->GetClass() == PointerTypeClass) - && (functionType->GetChildType()->GetClass() == FunctionTypeClass)) - namedParams = functionType->GetChildType()->GetParameters(); - - for (size_t index{}; index < parameterExprs.size(); index++) - { - const auto& parameterExpr = parameterExprs[index]; - if (index != 0) tokens.Append(TextToken, ", "); - - // If the type of the parameter is known to be a pointer to a string, then we directly render it as a - // string, regardless of its length - bool renderedAsString = false; - if (index < namedParams.size() && parameterExprs[index].operation == HLIL_CONST_PTR) - { - auto exprType = namedParams[index].type; - if (exprType && (exprType->GetClass() == PointerTypeClass)) - { - if (auto child = exprType->GetChildType(); child) - { - if ((child->IsInteger() && child->IsSigned() && child->GetWidth() == 1) - || child->IsWideChar()) - { - tokens.AppendPointerTextToken(parameterExprs[index], - parameterExprs[index].GetConstant(), settings, AddressOfDataSymbols, - precedence, true); - renderedAsString = true; - } - } - } - } - - if (!renderedAsString) - GetExprText(parameterExpr, tokens, settings); - } - tokens.AppendCloseParen(); - if (statement) - tokens.AppendSemicolon(); - }(); + GetExpr_CALL_OR_TAILCALL(instr, tokens, settings, precedence, statement); break; case HLIL_IMPORT: - [&]() { - const auto constant = instr.GetConstant(); - auto symbol = GetHighLevelILFunction()->GetFunction()->GetView()->GetSymbolByAddress(constant); - const auto symbolType = symbol->GetType(); - - if (symbol && (symbolType == ImportedDataSymbol || symbolType == ImportAddressSymbol)) - { - symbol = Symbol::ImportedFunctionFromImportAddressSymbol(symbol, constant); - const auto symbolShortName = symbol->GetShortName(); - tokens.Append(IndirectImportToken, NoTokenContext, symbolShortName, instr.address, constant, instr.size, instr.sourceOperand); - return; - } - - tokens.AppendPointerTextToken(instr, constant, settings, DereferenceNonDataSymbols, precedence); - if (statement) - tokens.AppendSemicolon(); - }(); + GetExpr_IMPORT(instr, tokens, settings, precedence, statement); break; case HLIL_ARRAY_INDEX: @@ -1288,12 +1226,7 @@ void PseudoCFunction::GetExprTextInternal(const HighLevelILInstruction& instr, H break; case HLIL_CONST_PTR: - [&]() { - tokens.AppendPointerTextToken( - instr, instr.GetConstant(), settings, AddressOfDataSymbols, precedence); - if (statement) - tokens.AppendSemicolon(); - }(); + GetExpr_CONST_PTR(instr, tokens, settings, precedence, statement); break; case HLIL_VAR: @@ -1766,17 +1699,8 @@ void PseudoCFunction::GetExprTextInternal(const HighLevelILInstruction& instr, H tokens.Append(AnnotationToken, "/* tailcall */"); tokens.NewLine(); tokens.Append(KeywordToken, "return "); - GetExprTextInternal(destExpr, tokens, settings, MemberAndFunctionOperatorPrecedence); - tokens.AppendOpenParen(); - for (size_t index{}; index < parameterExprs.size(); index++) - { - const auto& parameterExpr = parameterExprs[index]; - if (index != 0) tokens.Append(TextToken, ", "); - GetExprTextInternal(parameterExpr, tokens, settings); - } - tokens.AppendCloseParen(); - if (statement) - tokens.AppendSemicolon(); + + GetExpr_CALL_OR_TAILCALL(instr, tokens, settings, precedence, statement); }(); break; @@ -2829,6 +2753,88 @@ void PseudoCFunction::GetExprTextInternal(const HighLevelILInstruction& instr, H } } +void PseudoCFunction::GetExpr_CALL_OR_TAILCALL(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement) +{ + const auto destExpr = instr.GetDestExpr(); + const auto parameterExprs = instr.GetParameterExprs(); + + vector namedParams; + Ref functionType = destExpr.GetType(); + if (functionType && (functionType->GetClass() == PointerTypeClass) + && (functionType->GetChildType()->GetClass() == FunctionTypeClass)) + namedParams = functionType->GetChildType()->GetParameters(); + + GetExprTextInternal(destExpr, tokens, settings, MemberAndFunctionOperatorPrecedence); + tokens.AppendOpenParen(); + + for (size_t index {}; index < parameterExprs.size(); index++) + { + const auto& parameterExpr = parameterExprs[index]; + if (index != 0) + tokens.Append(TextToken, ", "); + + // If the type of the parameter is known to be a pointer to a string, then we directly render it as a + // string, regardless of its length + bool renderedAsString = false; + if (index < namedParams.size() && parameterExprs[index].operation == HLIL_CONST_PTR) + { + auto exprType = namedParams[index].type; + if (exprType && (exprType->GetClass() == PointerTypeClass)) + { + if (auto child = exprType->GetChildType(); child) + { + if ((child->IsInteger() && child->IsSigned() && child->GetWidth() == 1) || child->IsWideChar()) + { + tokens.AppendPointerTextToken(parameterExprs[index], + parameterExprs[index].GetConstant(), settings, AddressOfDataSymbols, + precedence, true); + renderedAsString = true; + } + } + } + } + + if (!renderedAsString) + GetExprText(parameterExpr, tokens, settings); + } + tokens.AppendCloseParen(); + if (statement) + tokens.AppendSemicolon(); +} + +void PseudoCFunction::GetExpr_CONST_PTR(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement) +{ + auto constant = instr.GetConstant(); + tokens.AppendPointerTextToken( + instr, instr.GetConstant(), settings, AddressOfDataSymbols, precedence); + if (statement) + tokens.AppendSemicolon(); +} + +void PseudoCFunction::GetExpr_IMPORT(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement) +{ + const auto constant = instr.GetConstant(); + auto symbol = GetHighLevelILFunction()->GetFunction()->GetView()->GetSymbolByAddress(constant); + const auto symbolType = symbol->GetType(); + + if (symbol && (symbolType == ImportedDataSymbol || symbolType == ImportAddressSymbol)) + { + symbol = Symbol::ImportedFunctionFromImportAddressSymbol(symbol, constant); + const auto symbolShortName = symbol->GetShortName(); + tokens.Append(IndirectImportToken, NoTokenContext, symbolShortName, instr.address, constant, instr.size, instr.sourceOperand); + return; + } + + tokens.AppendPointerTextToken(instr, constant, settings, DereferenceNonDataSymbols, precedence); + if (statement) + tokens.AppendSemicolon(); +} string PseudoCFunction::GetAnnotationStartString() const { @@ -2848,32 +2854,10 @@ PseudoCFunctionType::PseudoCFunctionType(): LanguageRepresentationFunctionType(" { } +PseudoCFunctionType::PseudoCFunctionType(const string& name) : LanguageRepresentationFunctionType(name) {} Ref PseudoCFunctionType::Create(Architecture* arch, Function* owner, HighLevelILFunction* highLevelILFunction) { return new PseudoCFunction(this, arch, owner, highLevelILFunction); } - - -extern "C" -{ - BN_DECLARE_CORE_ABI_VERSION - -#ifndef DEMO_EDITION - BINARYNINJAPLUGIN void CorePluginDependencies() - { - } -#endif - -#ifdef DEMO_EDITION - bool PseudoCPluginInit() -#else - BINARYNINJAPLUGIN bool CorePluginInit() -#endif - { - LanguageRepresentationFunctionType* type = new PseudoCFunctionType(); - LanguageRepresentationFunctionType::Register(type); - return true; - } -} diff --git a/lang/c/pseudoc.h b/lang/c/pseudoc.h index a3b03a88f3..661784320e 100644 --- a/lang/c/pseudoc.h +++ b/lang/c/pseudoc.h @@ -52,6 +52,16 @@ class PseudoCFunction: public BinaryNinja::LanguageRepresentationFunction void EndLines( const BinaryNinja::HighLevelILInstruction& instr, BinaryNinja::HighLevelILTokenEmitter& tokens) override; + virtual void GetExpr_CALL_OR_TAILCALL(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement); + virtual void GetExpr_CONST_PTR(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement); + virtual void GetExpr_IMPORT(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement); + public: PseudoCFunction(BinaryNinja::LanguageRepresentationFunctionType* type, BinaryNinja::Architecture* arch, BinaryNinja::Function* owner, BinaryNinja::HighLevelILFunction* highLevelILFunction); @@ -66,4 +76,7 @@ class PseudoCFunctionType: public BinaryNinja::LanguageRepresentationFunctionTyp PseudoCFunctionType(); BinaryNinja::Ref Create(BinaryNinja::Architecture* arch, BinaryNinja::Function* owner, BinaryNinja::HighLevelILFunction* highLevelILFunction) override; + +protected: + PseudoCFunctionType(const std::string& name); }; diff --git a/lang/c/pseudoobjc.cpp b/lang/c/pseudoobjc.cpp new file mode 100644 index 0000000000..eccf10f1e2 --- /dev/null +++ b/lang/c/pseudoobjc.cpp @@ -0,0 +1,421 @@ +#include "pseudoobjc.h" + +#include "binaryninjaapi.h" +#include "highlevelilinstruction.h" +#include +#include +#include + +using namespace BinaryNinja; + +namespace { + +bool ParameterIsString(const HighLevelILInstruction& expr) +{ + if (expr.operation != HLIL_CONST_PTR) + return false; + + auto exprType = expr.GetType(); + if (!exprType || exprType->GetClass() != PointerTypeClass) + return false; + + if (auto child = exprType->GetChildType(); child) + { + child = child->IsArray() ? child->GetChildType() : child; + return child->IsInteger() && child->IsSigned() && child->GetWidth() == 1; + } + return false; +} + +struct SelectorReference +{ + std::string name; + uint64_t address; +}; + +std::optional GetSelectorFromParameter( + const HighLevelILInstruction& expr, const Function& function) +{ + if (expr.operation != HLIL_CONST_PTR) + return std::nullopt; + + if (!ParameterIsString(expr)) + return std::nullopt; + + uint64_t constant = expr.GetConstant(); + std::string string; + auto stringType = function.GetView()->CheckForStringAnnotationType(constant, string, true, true, 1); + + if (!stringType || (stringType != AsciiString && stringType != Utf8String)) + return std::nullopt; + + return SelectorReference {string, constant}; +} + +void SplitSelector(const std::string& selector, std::vector& tokens) +{ + std::stringstream ss(selector); + std::string token; + while (std::getline(ss, token, ':')) + tokens.push_back(token); +} + +struct RuntimeCall +{ + enum Type + { + MessageSend, + MessageSendSuper, + Alloc, + AllocInit, + New, + Retain, + Release, + Autorelease, + RetainAutorelease, + Class, + }; + + Type type; + uint64_t address; +}; + +constexpr std::array RUNTIME_CALLS = { + std::make_pair("_objc_alloc_init", RuntimeCall::AllocInit), + std::make_pair("_objc_alloc", RuntimeCall::Alloc), + std::make_pair("_objc_autorelease", RuntimeCall::Autorelease), + std::make_pair("_objc_autoreleaseReturnValue", RuntimeCall::Autorelease), + std::make_pair("_objc_msgSend", RuntimeCall::MessageSend), + std::make_pair("_objc_msgSendSuper", RuntimeCall::MessageSendSuper), + std::make_pair("_objc_msgSendSuper2", RuntimeCall::MessageSendSuper), + std::make_pair("_objc_opt_class", RuntimeCall::Class), + std::make_pair("_objc_opt_new", RuntimeCall::New), + std::make_pair("_objc_release", RuntimeCall::Release), + std::make_pair("_objc_retain", RuntimeCall::Retain), + std::make_pair("_objc_retainAutoreleasedReturnValue", RuntimeCall::Retain), + std::make_pair("_objc_retainAutoreleaseReturnValue", RuntimeCall::RetainAutorelease), + std::make_pair("_objc_retainBlock", RuntimeCall::Retain), + std::make_pair("j__objc_alloc_init", RuntimeCall::AllocInit), + std::make_pair("j__objc_alloc", RuntimeCall::Alloc), + std::make_pair("j__objc_autorelease", RuntimeCall::Autorelease), + std::make_pair("j__objc_autoreleaseReturnValue", RuntimeCall::Autorelease), + std::make_pair("j__objc_msgSend", RuntimeCall::MessageSend), + std::make_pair("j__objc_msgSendSuper", RuntimeCall::MessageSendSuper), + std::make_pair("j__objc_msgSendSuper2", RuntimeCall::MessageSendSuper), + std::make_pair("j__objc_opt_class", RuntimeCall::Class), + std::make_pair("j__objc_opt_new", RuntimeCall::New), + std::make_pair("j__objc_release", RuntimeCall::Release), + std::make_pair("j__objc_retain", RuntimeCall::Retain), + std::make_pair("j__objc_retainAutoreleasedReturnValue", RuntimeCall::Retain), + std::make_pair("j__objc_retainAutoreleaseReturnValue", RuntimeCall::RetainAutorelease), + std::make_pair("j__objc_retainBlock", RuntimeCall::Retain), +}; + +std::optional DetectObjCRuntimeCall(const HighLevelILInstruction& callTarget, + const std::vector& parameterExprs, const Function& function) +{ + uint64_t constant = 0; + Ref symbol; + + switch (callTarget.operation) + { + case HLIL_CONST_PTR: + { + constant = callTarget.GetConstant(); + symbol = function.GetView()->GetSymbolByAddress(constant); + break; + } + case HLIL_IMPORT: + { + constant = callTarget.GetConstant(); + auto importAddressSymbol = function.GetView()->GetSymbolByAddress(constant); + if (!importAddressSymbol) + return std::nullopt; + + const auto symbolType = importAddressSymbol->GetType(); + if (symbolType != ImportedDataSymbol && symbolType != ImportAddressSymbol) + return std::nullopt; + + symbol = Symbol::ImportedFunctionFromImportAddressSymbol(importAddressSymbol, constant); + } + default: + break; + } + + if (!symbol) + return std::nullopt; + + const auto symbolShortName = symbol->GetShortName(); + auto it = std::find_if(RUNTIME_CALLS.begin(), RUNTIME_CALLS.end(), [&](const auto& pair) { + return pair.first == symbolShortName; + }); + if (it == RUNTIME_CALLS.end()) + return std::nullopt; + + return RuntimeCall {it->second, constant}; +} + +} // unnamed namespace + +PseudoObjCFunction::PseudoObjCFunction(LanguageRepresentationFunctionType* type, Architecture* arch, Function* owner, + HighLevelILFunction* highLevelILFunction) : PseudoCFunction(type, arch, owner, highLevelILFunction) +{} + +void PseudoObjCFunction::GetExpr_CALL_OR_TAILCALL(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement) +{ + const auto destExpr = instr.GetDestExpr(); + const auto parameterExprs = instr.GetParameterExprs(); + + auto objCRuntimeCall = DetectObjCRuntimeCall(destExpr, parameterExprs, *GetFunction()); + if (!objCRuntimeCall) + return PseudoCFunction::GetExpr_CALL_OR_TAILCALL(instr, tokens, settings, precedence, statement); + + std::vector runtimeCallTokens; + switch (objCRuntimeCall->type) + { + case RuntimeCall::MessageSend: + case RuntimeCall::MessageSendSuper: + if (GetExpr_ObjCMsgSend(destExpr, tokens, settings, parameterExprs)) + { + if (statement) + tokens.AppendSemicolon(); + return; + } + break; + case RuntimeCall::Alloc: + runtimeCallTokens = {"alloc"}; + break; + case RuntimeCall::AllocInit: + runtimeCallTokens = {"alloc", "init"}; + break; + case RuntimeCall::New: + runtimeCallTokens = {"new"}; + break; + case RuntimeCall::Retain: + runtimeCallTokens = {"retain"}; + break; + case RuntimeCall::Release: + runtimeCallTokens = {"release"}; + break; + case RuntimeCall::Autorelease: + runtimeCallTokens = {"autorelease"}; + break; + case RuntimeCall::RetainAutorelease: + runtimeCallTokens = {"retain", "autorelease"}; + break; + case RuntimeCall::Class: + runtimeCallTokens = {"class"}; + break; + default: + break; + } + + if (runtimeCallTokens.size() + && GetExpr_GenericObjCRuntimeCall( + objCRuntimeCall->address, instr, tokens, settings, parameterExprs, runtimeCallTokens)) + { + if (statement) + tokens.AppendSemicolon(); + return; + } + + return PseudoCFunction::GetExpr_CALL_OR_TAILCALL(instr, tokens, settings, precedence, statement); +} + +bool PseudoObjCFunction::GetExpr_ObjCMsgSend(const HighLevelILInstruction& instr, HighLevelILTokenEmitter& tokens, + DisassemblySettings* settings, const std::vector& parameterExprs) +{ + if (parameterExprs.size() < 2) + return false; + + auto maybeSelector = GetSelectorFromParameter(parameterExprs[1], *GetFunction()); + if (!maybeSelector) + return false; + + auto [selector, selectorAddress] = maybeSelector.value(); + std::vector selectorTokens {2}; + SplitSelector(selector, selectorTokens); + + tokens.AppendOpenBracket(); + + GetExprText(parameterExprs[0], tokens, settings); + + for (size_t index = 2; index < parameterExprs.size(); index++) + { + const auto& parameterExpr = parameterExprs[index]; + tokens.Append(TextToken, " "); + if (index < selectorTokens.size()) + { + tokens.Append( + DataSymbolToken, StringReferenceTokenContext, selectorTokens[index], instr.address, selectorAddress); + tokens.Append(TextToken, ":"); + } + else + { + tokens.Append(TextToken, ", "); + } + GetExprText(parameterExpr, tokens, settings); + } + if (selectorTokens.size() > parameterExprs.size()) + { + tokens.Append(TextToken, " "); + for (size_t index = parameterExprs.size(); index < selectorTokens.size(); index++) + { + tokens.Append( + DataSymbolToken, StringReferenceTokenContext, selectorTokens[index], instr.address, selectorAddress); + if (index != selectorTokens.size() - 1 || selector.back() == ':') + tokens.Append(TextToken, ":"); + } + } + tokens.AppendCloseBracket(); + return true; +} + +bool PseudoObjCFunction::GetExpr_GenericObjCRuntimeCall(uint64_t address, const HighLevelILInstruction& instr, + HighLevelILTokenEmitter& tokens, DisassemblySettings* settings, + const std::vector& parameterExprs, const std::vector& selectorTokens) +{ + if (parameterExprs.size() < 1) + return false; + + for (auto _ : selectorTokens) + tokens.AppendOpenBracket(); + + GetExprText(parameterExprs[0], tokens, settings); + for (auto& token : selectorTokens) + { + tokens.Append(TextToken, " "); + tokens.Append(CodeSymbolToken, StringReferenceTokenContext, std::string(token), instr.address, address); + tokens.AppendCloseBracket(); + } + return true; +} + +void PseudoObjCFunction::GetExpr_CONST_PTR(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement) +{ + uint64_t constant = instr.GetConstant(); + auto symbol = GetFunction()->GetView()->GetSymbolByAddress(constant); + if (!symbol) + return PseudoCFunction::GetExpr_CONST_PTR(instr, tokens, settings, precedence, statement); + + auto shortName = symbol->GetShortName(); + + // Match class references based only on the symbol name as the class metadata may be imported + // from a different image. + if (shortName.rfind("_OBJC_CLASS_$_", 0) == 0 || shortName.rfind("cls_", 0) == 0) + { + if (GetExpr_OBJC_CLASS(*symbol, constant, instr, tokens, settings, precedence, statement)) + return; + } + + DataVariable variable {}; + auto hasVariable = GetFunction()->GetView()->GetDataVariableAtAddress(constant, variable); + if (!hasVariable) + return PseudoCFunction::GetExpr_CONST_PTR(instr, tokens, settings, precedence, statement); + + auto type = variable.type->IsNamedTypeRefer() ? + GetFunction()->GetView()->GetTypeByRef(variable.type->GetNamedTypeReference()) : + variable.type.GetValue(); + + if (type->GetClass() != StructureTypeClass) + return PseudoCFunction::GetExpr_CONST_PTR(instr, tokens, settings, precedence, statement); + + auto structureName = type->GetStructureName().GetString(); + if (structureName == "__NSConstantString") + { + if (GetExpr_NSConstantString(type, constant, instr, tokens, settings, precedence, statement)) + return; + } + + PseudoCFunction::GetExpr_CONST_PTR(instr, tokens, settings, precedence, statement); +} + +bool PseudoObjCFunction::GetExpr_OBJC_CLASS(const Symbol& symbol, uint64_t constant, + const BinaryNinja::HighLevelILInstruction& instr, BinaryNinja::HighLevelILTokenEmitter& tokens, + BinaryNinja::DisassemblySettings* settings, BNOperatorPrecedence precedence, bool statement) +{ + auto shortName = symbol.GetShortName(); + std::string className; + if (shortName.rfind("_OBJC_CLASS_$_", 0) == 0) + className = shortName.substr(14); + else if (shortName.rfind("cls_", 0) == 0) + className = shortName.substr(4); + + if (className.empty()) + return false; + + tokens.Append(DataSymbolToken, ConstDataTokenContext, className, instr.address, constant); + if (statement) + tokens.AppendSemicolon(); + + return true; +} + +bool PseudoObjCFunction::GetExpr_NSConstantString(Ref type, uint64_t constant, + const BinaryNinja::HighLevelILInstruction& instr, BinaryNinja::HighLevelILTokenEmitter& tokens, + BinaryNinja::DisassemblySettings* settings, BNOperatorPrecedence precedence, bool statement) +{ + StructureMember dataMember; + bool hasDataField = type->GetStructure()->GetMemberByName("data", dataMember); + if (!hasDataField) + return false; + + uint64_t dataPointer = 0; + if (!GetFunction()->GetView()->Read( + &dataPointer, constant + dataMember.offset, GetFunction()->GetView()->GetAddressSize())) + return false; + + std::string stringValue; + if (!GetFunction()->GetView()->CheckForStringAnnotationType(dataPointer, stringValue, true, true, 1)) + return false; + + // TODO: Ideally this'd be part of the same token as the quotes for the string literal. + // Sometimes the view ends up wrapping between the @ and the quote. + tokens.Append(TextToken, "@"); + tokens.AppendConstantTextToken( + instr, dataPointer, GetFunction()->GetView()->GetAddressSize(), settings, MemberAndFunctionOperatorPrecedence); + if (statement) + tokens.AppendSemicolon(); + + return true; +} + +void PseudoObjCFunction::GetExpr_IMPORT(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement) +{ + const auto constant = instr.GetConstant(); + auto symbol = GetHighLevelILFunction()->GetFunction()->GetView()->GetSymbolByAddress(constant); + const auto symbolType = symbol->GetType(); + + if (symbol && (symbolType == ImportedDataSymbol || symbolType == ImportAddressSymbol)) + { + symbol = Symbol::ImportedFunctionFromImportAddressSymbol(symbol, constant); + const auto symbolShortName = symbol->GetShortName(); + if (symbolShortName.rfind("_OBJC_CLASS_$_", 0) == 0) + { + tokens.Append(IndirectImportToken, ConstDataTokenContext, symbolShortName.substr(14), instr.address, + constant); + if (statement) + tokens.AppendSemicolon(); + return; + } + tokens.Append(IndirectImportToken, NoTokenContext, symbolShortName, instr.address, constant, instr.size, instr.sourceOperand); + return; + } + + PseudoCFunction::GetExpr_IMPORT(instr, tokens, settings, precedence, statement); +} + + +PseudoObjCFunctionType::PseudoObjCFunctionType() : PseudoCFunctionType("Pseudo Objective-C") {} + +Ref PseudoObjCFunctionType::Create( + Architecture* arch, Function* owner, HighLevelILFunction* highLevelILFunction) +{ + return new PseudoObjCFunction(this, arch, owner, highLevelILFunction); +} diff --git a/lang/c/pseudoobjc.h b/lang/c/pseudoobjc.h new file mode 100644 index 0000000000..4b87c7d775 --- /dev/null +++ b/lang/c/pseudoobjc.h @@ -0,0 +1,44 @@ +#pragma once + +#include "pseudoc.h" + +#include "binaryninjaapi.h" + +class PseudoObjCFunction : public PseudoCFunction +{ +public: + PseudoObjCFunction(BinaryNinja::LanguageRepresentationFunctionType* type, BinaryNinja::Architecture* arch, + BinaryNinja::Function* owner, BinaryNinja::HighLevelILFunction* highLevelILFunction); + +protected: + void GetExpr_CALL_OR_TAILCALL(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement) override; + void GetExpr_CONST_PTR(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement) override; + void GetExpr_IMPORT(const BinaryNinja::HighLevelILInstruction& instr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + BNOperatorPrecedence precedence, bool statement) override; + +private: + bool GetExpr_ObjCMsgSend(const BinaryNinja::HighLevelILInstruction& expr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + const std::vector& parameterExprs); + bool GetExpr_GenericObjCRuntimeCall(uint64_t address, const BinaryNinja::HighLevelILInstruction& expr, + BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, + const std::vector& parameterExprs, const std::vector& selectorTokens); + bool GetExpr_OBJC_CLASS(const BinaryNinja::Symbol& symbol, uint64_t constant, + const BinaryNinja::HighLevelILInstruction& expr, BinaryNinja::HighLevelILTokenEmitter& tokens, + BinaryNinja::DisassemblySettings* settings, BNOperatorPrecedence precedence, bool statement); + bool GetExpr_NSConstantString(BinaryNinja::Ref type, uint64_t constant, + const BinaryNinja::HighLevelILInstruction& expr, BinaryNinja::HighLevelILTokenEmitter& tokens, + BinaryNinja::DisassemblySettings* settings, BNOperatorPrecedence precedence, bool statement); +}; + +class PseudoObjCFunctionType : public PseudoCFunctionType { +public: + PseudoObjCFunctionType(); + BinaryNinja::Ref Create(BinaryNinja::Architecture* arch, + BinaryNinja::Function* owner, BinaryNinja::HighLevelILFunction* highLevelILFunction) override; +}; From 2524e2ce6be351d79477442a073e743946d1334a Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Thu, 8 May 2025 11:31:10 -0700 Subject: [PATCH 2/3] Detect calls that the Objective-C workflow has rewritten directly to an implementation These are detected by their function names having the characteristic `-[ClassName methodName:]` format. Calls to functions with this naming pattern that accept a selector as a second argument are assumed to be `objc_msgSend` calls that the Objective-C workflow rewrote. These calls are formatted identically to other `objc_msgSend` calls with the exception that the selector tokens reference the address of the call target rather than the selector string, so double-clicking on them takes you to the fixed destination of the rewritten call. --- lang/c/pseudoobjc.cpp | 106 ++++++++++++++++++++++++++++-------------- lang/c/pseudoobjc.h | 2 +- 2 files changed, 73 insertions(+), 35 deletions(-) diff --git a/lang/c/pseudoobjc.cpp b/lang/c/pseudoobjc.cpp index eccf10f1e2..4cb360a771 100644 --- a/lang/c/pseudoobjc.cpp +++ b/lang/c/pseudoobjc.cpp @@ -60,6 +60,43 @@ void SplitSelector(const std::string& selector, std::vector& tokens tokens.push_back(token); } +std::optional>> GetCallTargetInfo(const HighLevelILInstruction& callTarget, + const std::vector& parameterExprs, const Function& function) +{ + uint64_t constant = 0; + Ref symbol; + + switch (callTarget.operation) + { + case HLIL_CONST_PTR: + { + constant = callTarget.GetConstant(); + symbol = function.GetView()->GetSymbolByAddress(constant); + break; + } + case HLIL_IMPORT: + { + constant = callTarget.GetConstant(); + auto importAddressSymbol = function.GetView()->GetSymbolByAddress(constant); + if (!importAddressSymbol) + return std::nullopt; + + const auto symbolType = importAddressSymbol->GetType(); + if (symbolType != ImportedDataSymbol && symbolType != ImportAddressSymbol) + return std::nullopt; + + symbol = Symbol::ImportedFunctionFromImportAddressSymbol(importAddressSymbol, constant); + } + default: + break; + } + + if (!symbol) + return std::nullopt; + + return std::make_pair(constant, symbol); +} + struct RuntimeCall { enum Type @@ -78,6 +115,7 @@ struct RuntimeCall Type type; uint64_t address; + bool isRewritten = false; }; constexpr std::array RUNTIME_CALLS = { @@ -114,36 +152,10 @@ constexpr std::array RUNTIME_CALLS = { std::optional DetectObjCRuntimeCall(const HighLevelILInstruction& callTarget, const std::vector& parameterExprs, const Function& function) { - uint64_t constant = 0; - Ref symbol; - - switch (callTarget.operation) - { - case HLIL_CONST_PTR: - { - constant = callTarget.GetConstant(); - symbol = function.GetView()->GetSymbolByAddress(constant); - break; - } - case HLIL_IMPORT: - { - constant = callTarget.GetConstant(); - auto importAddressSymbol = function.GetView()->GetSymbolByAddress(constant); - if (!importAddressSymbol) - return std::nullopt; - - const auto symbolType = importAddressSymbol->GetType(); - if (symbolType != ImportedDataSymbol && symbolType != ImportAddressSymbol) - return std::nullopt; - - symbol = Symbol::ImportedFunctionFromImportAddressSymbol(importAddressSymbol, constant); - } - default: - break; - } - - if (!symbol) + auto callTargetInfo = GetCallTargetInfo(callTarget, parameterExprs, function); + if (!callTargetInfo) return std::nullopt; + auto [constant, symbol] = callTargetInfo.value(); const auto symbolShortName = symbol->GetShortName(); auto it = std::find_if(RUNTIME_CALLS.begin(), RUNTIME_CALLS.end(), [&](const auto& pair) { @@ -155,6 +167,26 @@ std::optional DetectObjCRuntimeCall(const HighLevelILInstruction& c return RuntimeCall {it->second, constant}; } +std::optional DetectRewrittenDirectObjCMethodCall(const HighLevelILInstruction& callTarget, + const std::vector& parameterExprs, const Function& function) +{ + auto callTargetInfo = GetCallTargetInfo(callTarget, parameterExprs, function); + if (!callTargetInfo) + return std::nullopt; + auto [constant, symbol] = callTargetInfo.value(); + + const auto symbolShortName = symbol->GetShortName(); + if (symbolShortName.length() < 6) + return std::nullopt; + + // Look for the pattern -[ClassName methodName:] or +[ClassName methodName:] + if ((symbolShortName[0] != '-' && symbolShortName[0] != '+') || symbolShortName[1] != '[' + || symbolShortName.back() != ']' || symbolShortName.find(' ') == std::string::npos) + return std::nullopt; + + return RuntimeCall {RuntimeCall::MessageSend, constant, true}; +} + } // unnamed namespace PseudoObjCFunction::PseudoObjCFunction(LanguageRepresentationFunctionType* type, Architecture* arch, Function* owner, @@ -169,6 +201,9 @@ void PseudoObjCFunction::GetExpr_CALL_OR_TAILCALL(const BinaryNinja::HighLevelIL const auto parameterExprs = instr.GetParameterExprs(); auto objCRuntimeCall = DetectObjCRuntimeCall(destExpr, parameterExprs, *GetFunction()); + if (!objCRuntimeCall) + objCRuntimeCall = DetectRewrittenDirectObjCMethodCall(destExpr, parameterExprs, *GetFunction()); + if (!objCRuntimeCall) return PseudoCFunction::GetExpr_CALL_OR_TAILCALL(instr, tokens, settings, precedence, statement); @@ -177,7 +212,7 @@ void PseudoObjCFunction::GetExpr_CALL_OR_TAILCALL(const BinaryNinja::HighLevelIL { case RuntimeCall::MessageSend: case RuntimeCall::MessageSendSuper: - if (GetExpr_ObjCMsgSend(destExpr, tokens, settings, parameterExprs)) + if (GetExpr_ObjCMsgSend(objCRuntimeCall->address, objCRuntimeCall->isRewritten, destExpr, tokens, settings, parameterExprs)) { if (statement) tokens.AppendSemicolon(); @@ -224,8 +259,9 @@ void PseudoObjCFunction::GetExpr_CALL_OR_TAILCALL(const BinaryNinja::HighLevelIL return PseudoCFunction::GetExpr_CALL_OR_TAILCALL(instr, tokens, settings, precedence, statement); } -bool PseudoObjCFunction::GetExpr_ObjCMsgSend(const HighLevelILInstruction& instr, HighLevelILTokenEmitter& tokens, - DisassemblySettings* settings, const std::vector& parameterExprs) +bool PseudoObjCFunction::GetExpr_ObjCMsgSend(uint64_t msgSendAddress, bool isRewritten, + const HighLevelILInstruction& instr, HighLevelILTokenEmitter& tokens, DisassemblySettings* settings, + const std::vector& parameterExprs) { if (parameterExprs.size() < 2) return false; @@ -238,6 +274,8 @@ bool PseudoObjCFunction::GetExpr_ObjCMsgSend(const HighLevelILInstruction& instr std::vector selectorTokens {2}; SplitSelector(selector, selectorTokens); + uint64_t referencedAddress = isRewritten ? msgSendAddress : selectorAddress; + tokens.AppendOpenBracket(); GetExprText(parameterExprs[0], tokens, settings); @@ -249,7 +287,7 @@ bool PseudoObjCFunction::GetExpr_ObjCMsgSend(const HighLevelILInstruction& instr if (index < selectorTokens.size()) { tokens.Append( - DataSymbolToken, StringReferenceTokenContext, selectorTokens[index], instr.address, selectorAddress); + DataSymbolToken, StringReferenceTokenContext, selectorTokens[index], instr.address, referencedAddress); tokens.Append(TextToken, ":"); } else @@ -264,7 +302,7 @@ bool PseudoObjCFunction::GetExpr_ObjCMsgSend(const HighLevelILInstruction& instr for (size_t index = parameterExprs.size(); index < selectorTokens.size(); index++) { tokens.Append( - DataSymbolToken, StringReferenceTokenContext, selectorTokens[index], instr.address, selectorAddress); + DataSymbolToken, StringReferenceTokenContext, selectorTokens[index], instr.address, referencedAddress); if (index != selectorTokens.size() - 1 || selector.back() == ':') tokens.Append(TextToken, ":"); } diff --git a/lang/c/pseudoobjc.h b/lang/c/pseudoobjc.h index 4b87c7d775..9910c4b83c 100644 --- a/lang/c/pseudoobjc.h +++ b/lang/c/pseudoobjc.h @@ -22,7 +22,7 @@ class PseudoObjCFunction : public PseudoCFunction BNOperatorPrecedence precedence, bool statement) override; private: - bool GetExpr_ObjCMsgSend(const BinaryNinja::HighLevelILInstruction& expr, + bool GetExpr_ObjCMsgSend(uint64_t msgSendAddress, bool isRewritten, const BinaryNinja::HighLevelILInstruction& expr, BinaryNinja::HighLevelILTokenEmitter& tokens, BinaryNinja::DisassemblySettings* settings, const std::vector& parameterExprs); bool GetExpr_GenericObjCRuntimeCall(uint64_t address, const BinaryNinja::HighLevelILInstruction& expr, From 40a395ba603907d79c197435243ef4570f96f6f0 Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Thu, 22 May 2025 16:47:01 -0700 Subject: [PATCH 3/3] [PseudoObjC] Handle named type references to types that don't exist --- lang/c/pseudoobjc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang/c/pseudoobjc.cpp b/lang/c/pseudoobjc.cpp index 4cb360a771..6ffc115857 100644 --- a/lang/c/pseudoobjc.cpp +++ b/lang/c/pseudoobjc.cpp @@ -359,7 +359,7 @@ void PseudoObjCFunction::GetExpr_CONST_PTR(const BinaryNinja::HighLevelILInstruc GetFunction()->GetView()->GetTypeByRef(variable.type->GetNamedTypeReference()) : variable.type.GetValue(); - if (type->GetClass() != StructureTypeClass) + if (!type || type->GetClass() != StructureTypeClass) return PseudoCFunction::GetExpr_CONST_PTR(instr, tokens, settings, precedence, statement); auto structureName = type->GetStructureName().GetString();