From 3429718517806507641861347342952251225833 Mon Sep 17 00:00:00 2001 From: Glenn Smith Date: Fri, 20 Dec 2024 13:25:16 -0500 Subject: [PATCH 01/35] Linear View: Persist highlight during renames --- ui/linearview.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ui/linearview.h b/ui/linearview.h index 3e242ff4e7..8543d80a35 100644 --- a/ui/linearview.h +++ b/ui/linearview.h @@ -249,6 +249,7 @@ class BINARYNINJAUIAPI LinearView : public QAbstractScrollArea, public View, pub bool cacheNextLines(); void updateCache(); void updateBounds(); + void updateHighlight(); void refreshAtCurrentLocation(bool cursorFixup = false); bool navigateToAddress(uint64_t addr, bool center, bool updateHighlight, bool navByRef = false); bool navigateToLine( From fb31e0abb07eb40179310a0cb817b18831321900 Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Thu, 21 Nov 2024 17:36:01 +0000 Subject: [PATCH 02/35] [SharedCache] Serialize `SharedCache::m_symbolInfos` `SharedCache::m_symbolInfos` isn't being serialized but there is an attempt to deserialize it. This commit adds in the code to serialize it. I slightly modified the format because I didn't really understand how it was expected to be serialized based on the deserialization code. The deserialization code looked wrong to me but its likely a misunderstanding on my part. I kept it similar to how `m_exportInfos` is serialized. --- view/sharedcache/core/SharedCache.cpp | 32 ++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index 81b5053a0c..e4d4ef11f3 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -3386,6 +3386,27 @@ void SharedCache::Store(SerializationContext& context) const } context.writer.EndArray(); + Serialize(context, "symbolInfos"); + context.writer.StartArray(); + for (const auto& pair1 : State().symbolInfos) + { + context.writer.StartObject(); + Serialize(context, "key", pair1.first); + Serialize(context, "value"); + context.writer.StartArray(); + for (const auto& pair2 : pair1.second) + { + context.writer.StartObject(); + Serialize(context, "key", pair2.first); + Serialize(context, "val1", pair2.second.first); + Serialize(context, "val2", pair2.second.second); + context.writer.EndObject(); + } + context.writer.EndArray(); + context.writer.EndObject(); + } + context.writer.EndArray(); + Serialize(context, "backingCaches", State().backingCaches); Serialize(context, "stubIslands", State().stubIslandRegions); Serialize(context, "images", State().images); @@ -3441,13 +3462,14 @@ void SharedCache::Load(DeserializationContext& context) for (auto& symbolInfo : context.doc["symbolInfos"].GetArray()) { - std::vector>> symbolInfoVec; - for (auto& symbolInfoPair : symbolInfo.GetArray()) + std::vector>> + symbolInfos; + for (auto& si : symbolInfo["value"].GetArray()) { - symbolInfoVec.push_back({symbolInfoPair[0].GetUint64(), - {(BNSymbolType)symbolInfoPair[1].GetUint(), symbolInfoPair[2].GetString()}}); + symbolInfos.push_back({si["key"].GetUint64(), + {static_cast(si["val1"].GetUint64()), si["val2"].GetString()}}); } - MutableState().symbolInfos[symbolInfo[0].GetUint64()] = std::move(symbolInfoVec); + MutableState().symbolInfos[symbolInfo["key"].GetUint64()] = std::move(symbolInfos); } for (auto& bcV : context.doc["backingCaches"].GetArray()) From d67f08d74988d5114685a82d1308625f80db2fd4 Mon Sep 17 00:00:00 2001 From: kat Date: Mon, 23 Dec 2024 11:35:40 -0500 Subject: [PATCH 03/35] Update metadata version to prevent v2 loader incorrectly trying to load v3 symbol list We should think through a better way of handling upgrades, as v3 can load v2 dbs just fine, but there is no clean way to upgrade the info currently. We need SharedCache ser/des functions in DSCView.cpp --- view/sharedcache/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/view/sharedcache/CMakeLists.txt b/view/sharedcache/CMakeLists.txt index 63ba602bf4..0f6abea22c 100644 --- a/view/sharedcache/CMakeLists.txt +++ b/view/sharedcache/CMakeLists.txt @@ -30,7 +30,7 @@ endif() set(HARD_FAIL_MODE OFF CACHE BOOL "Enable hard fail mode") set(SLIDEINFO_DEBUG_TAGS OFF CACHE BOOL "Enable debug tags in slideinfo") set(VIEW_NAME "DSCView" CACHE STRING "Name of the view") -set(METADATA_VERSION 2 CACHE STRING "Version of the metadata") +set(METADATA_VERSION 3 CACHE STRING "Version of the metadata") add_subdirectory(core) add_subdirectory(api) From ce76c08060a8f4ac87334e5543ef7359b8e8760b Mon Sep 17 00:00:00 2001 From: Visual Ehrmanntraut <30368284+VisualEhrmanntraut@users.noreply.github.com> Date: Tue, 24 Dec 2024 15:50:11 +0200 Subject: [PATCH 04/35] Fix Shared Cache Plugin out-of-tree build --- view/sharedcache/CMakeLists.txt | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/view/sharedcache/CMakeLists.txt b/view/sharedcache/CMakeLists.txt index 0f6abea22c..407fb0d565 100644 --- a/view/sharedcache/CMakeLists.txt +++ b/view/sharedcache/CMakeLists.txt @@ -2,11 +2,14 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sharedcache) -if((NOT BN_API_PATH) AND (NOT BN_INTERNAL_BUILD)) - set(BN_API_PATH $ENV{BN_API_PATH} CACHE STRING "Path to Binary Ninja API source") - if(NOT BN_API_PATH) - message(FATAL_ERROR "Provide path to Binary Ninja API source in BN_API_PATH") - endif() +if(NOT BN_INTERNAL_BUILD) + find_path( + BN_API_PATH + NAMES binaryninjaapi.h + HINTS ../.. binaryninjaapi $ENV{BN_API_PATH} + REQUIRED + ) + add_subdirectory(${BN_API_PATH} binaryninjaapi) endif() if (NOT BN_INTERNAL_BUILD) @@ -105,4 +108,4 @@ message(" ▒▒▓ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ Crash on Failure: ${HARD_FAIL_MODE} ░ ▒ ▒ ░ ░▒ ░ ░ ░ ▒ Slideinfo Debug Tags: ${SLIDEINFO_DEBUG_TAGS} ░ ░ ░ ░ ░ ░ ░ REFCOUNT_DEBUG: ${BN_REF_COUNT_DEBUG} -") \ No newline at end of file +") From 08d91f14061c54134038fadc2d333a3aa3da0fe1 Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Mon, 25 Nov 2024 16:04:33 +0000 Subject: [PATCH 05/35] [SharedCache] Remove unnecessary lock in `SharedCache` destructor From what I can tell the lock being taken in `SharedCache::~SharedCache` was purely for the decrement of `sharedCacheReferences`, however its an atomic so a lock isn't necessary. The lock being taken is extremely contentious and therefore often slow to be acquired. This resulted in a surprising amount of execution time spent in the `SharedCache` destructor. Nothing hugely significant but a quick and easy win to remove this single line of code. --- view/sharedcache/core/SharedCache.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index e4d4ef11f3..7bbacf6383 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -1424,7 +1424,6 @@ SharedCache::SharedCache(BinaryNinja::Ref dscView) : m_ } SharedCache::~SharedCache() { - std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); sharedCacheReferences--; } From 934104743a0e3071606277692822be4b853288ac Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Thu, 21 Nov 2024 19:01:44 +0000 Subject: [PATCH 06/35] [SharedCache] Fix post-processing of Objective-C sections Prior to this commit the function `DSCObjCProcessor::PostProcessObjCSections` never does anything because it doesn't use the correct names to get the Objective-C sections of the recently loaded library. In fact it never does anything because the DSC never has sections with the names its searching for. This commit passes the `baseName` (the name of the library that was loaded), which is what other Objective-C section processing code does. Combining the base name with the section names it will now find them and process them as intended. This was resulting in alot of Objective-C related stuff being missed. There is however still an issue of the fact that the way this DSC plugin works means it only analyzes Objective-C sections once. This catches alot of things but there are a number of cases where other libraries need to be loaded first due to information being referenced in another library. For instance errors like `Failed to determine base classname for category` can be caused by the class reference in the category being to a class outside of the loaded library. Once the library containing the class has been loaded, the section containing the category should be re-proccessed. --- view/sharedcache/core/ObjC.cpp | 16 ++++++++-------- view/sharedcache/core/ObjC.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/view/sharedcache/core/ObjC.cpp b/view/sharedcache/core/ObjC.cpp index 271e5c941d..95fbb2a2d5 100644 --- a/view/sharedcache/core/ObjC.cpp +++ b/view/sharedcache/core/ObjC.cpp @@ -1087,16 +1087,16 @@ void DSCObjCProcessor::ApplyMethodTypes(Class& cls) } } -void DSCObjCProcessor::PostProcessObjCSections(VMReader* reader) +void DSCObjCProcessor::PostProcessObjCSections(VMReader* reader, std::string baseName) { auto ptrSize = m_data->GetAddressSize(); - if (auto imageInfo = m_data->GetSectionByName("__objc_imageinfo")) + if (auto imageInfo = m_data->GetSectionByName(baseName + "::__objc_imageinfo")) { auto start = imageInfo->GetStart(); auto type = Type::NamedType(m_data, m_typeNames.imageInfo); m_data->DefineDataVariable(start, type); } - if (auto selrefs = m_data->GetSectionByName("__objc_selrefs")) + if (auto selrefs = m_data->GetSectionByName(baseName + "::__objc_selrefs")) { auto start = selrefs->GetStart(); auto end = selrefs->GetEnd(); @@ -1119,7 +1119,7 @@ void DSCObjCProcessor::PostProcessObjCSections(VMReader* reader) DefineObjCSymbol(DataSymbol, type, "selRef_" + sel, i, true); } } - if (auto superRefs = m_data->GetSectionByName("__objc_classrefs")) + if (auto superRefs = m_data->GetSectionByName(baseName + "::__objc_classrefs")) { auto start = superRefs->GetStart(); auto end = superRefs->GetEnd(); @@ -1137,7 +1137,7 @@ void DSCObjCProcessor::PostProcessObjCSections(VMReader* reader) } } } - if (auto superRefs = m_data->GetSectionByName("__objc_superrefs")) + if (auto superRefs = m_data->GetSectionByName(baseName + "::__objc_superrefs")) { auto start = superRefs->GetStart(); auto end = superRefs->GetEnd(); @@ -1155,7 +1155,7 @@ void DSCObjCProcessor::PostProcessObjCSections(VMReader* reader) } } } - if (auto protoRefs = m_data->GetSectionByName("__objc_protorefs")) + if (auto protoRefs = m_data->GetSectionByName(baseName + "::__objc_protorefs")) { auto start = protoRefs->GetStart(); auto end = protoRefs->GetEnd(); @@ -1173,7 +1173,7 @@ void DSCObjCProcessor::PostProcessObjCSections(VMReader* reader) } } } - if (auto ivars = m_data->GetSectionByName("__objc_ivar")) + if (auto ivars = m_data->GetSectionByName(baseName + "::__objc_ivar")) { auto start = ivars->GetStart(); auto end = ivars->GetEnd(); @@ -1416,7 +1416,7 @@ void DSCObjCProcessor::ProcessObjCData(std::shared_ptr vm, std::string baseN if (auto protoList = m_data->GetSectionByName(baseName + "::__objc_protolist")) LoadProtocols(&reader, protoList); - PostProcessObjCSections(&reader); + PostProcessObjCSections(&reader, baseName); auto id = m_data->BeginUndoActions(); m_symbolQueue->Process(); diff --git a/view/sharedcache/core/ObjC.h b/view/sharedcache/core/ObjC.h index 40f2441bb9..016ff02247 100644 --- a/view/sharedcache/core/ObjC.h +++ b/view/sharedcache/core/ObjC.h @@ -222,7 +222,7 @@ namespace DSCObjC { void GenerateClassTypes(); bool ApplyMethodType(Class& cls, Method& method, bool isInstanceMethod); void ApplyMethodTypes(Class& cls); - void PostProcessObjCSections(VMReader* reader); + void PostProcessObjCSections(VMReader* reader, std::string baseName); public: DSCObjCProcessor(BinaryView* data, SharedCacheCore::SharedCache* cache, bool isBackedByDatabase); void ProcessObjCData(std::shared_ptr vm, std::string baseName); From 2cce2ab6c79879a4d64df1fc7375e2c871565f8f Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Mon, 25 Nov 2024 19:54:44 +0000 Subject: [PATCH 07/35] [SharedCache] Add the ability to skip Objective-C processing when loading a library This is useful when batch loading libraries to avoid extra processing (once the next commit has landed). --- .../api/python/_sharedcachecore.py | 12 ++-- view/sharedcache/api/python/sharedcache.py | 8 +-- view/sharedcache/api/sharedcache.cpp | 8 +-- view/sharedcache/api/sharedcacheapi.h | 4 +- view/sharedcache/api/sharedcachecore.h | 4 +- view/sharedcache/core/SharedCache.cpp | 59 ++++++++++--------- view/sharedcache/core/SharedCache.h | 4 +- 7 files changed, 53 insertions(+), 46 deletions(-) diff --git a/view/sharedcache/api/python/_sharedcachecore.py b/view/sharedcache/api/python/_sharedcachecore.py index d208048bc3..bd9e0764cb 100644 --- a/view/sharedcache/api/python/_sharedcachecore.py +++ b/view/sharedcache/api/python/_sharedcachecore.py @@ -528,15 +528,17 @@ def BNDSCViewLoadAllSymbolsAndWait( _BNDSCViewLoadImageContainingAddress.argtypes = [ ctypes.POINTER(BNSharedCache), ctypes.c_ulonglong, + ctypes.c_bool, ] # noinspection PyPep8Naming def BNDSCViewLoadImageContainingAddress( cache: ctypes.POINTER(BNSharedCache), - address: int + address: int, + skipObjC: bool ) -> bool: - return _BNDSCViewLoadImageContainingAddress(cache, address) + return _BNDSCViewLoadImageContainingAddress(cache, address, skipObjC) # ------------------------------------------------------- @@ -547,15 +549,17 @@ def BNDSCViewLoadImageContainingAddress( _BNDSCViewLoadImageWithInstallName.argtypes = [ ctypes.POINTER(BNSharedCache), ctypes.c_char_p, + ctypes.c_bool, ] # noinspection PyPep8Naming def BNDSCViewLoadImageWithInstallName( cache: ctypes.POINTER(BNSharedCache), - name: Optional[str] + name: Optional[str], + skipObjC: bool ) -> bool: - return _BNDSCViewLoadImageWithInstallName(cache, cstr(name)) + return _BNDSCViewLoadImageWithInstallName(cache, cstr(name), skipObjC) # ------------------------------------------------------- diff --git a/view/sharedcache/api/python/sharedcache.py b/view/sharedcache/api/python/sharedcache.py index b660d9d97f..7902c76ae0 100644 --- a/view/sharedcache/api/python/sharedcache.py +++ b/view/sharedcache/api/python/sharedcache.py @@ -108,14 +108,14 @@ class SharedCache: def __init__(self, view): self.handle = sccore.BNGetSharedCache(view.handle) - def load_image_with_install_name(self, installName): - return sccore.BNDSCViewLoadImageWithInstallName(self.handle, installName) + def load_image_with_install_name(self, installName, skipObjC = False): + return sccore.BNDSCViewLoadImageWithInstallName(self.handle, installName, skipObjC) def load_section_at_address(self, addr): return sccore.BNDSCViewLoadSectionAtAddress(self.handle, addr) - def load_image_containing_address(self, addr): - return sccore.BNDSCViewLoadImageContainingAddress(self.handle, addr) + def load_image_containing_address(self, addr, skipObjC = False): + return sccore.BNDSCViewLoadImageContainingAddress(self.handle, addr, skipObjC) @property def caches(self): diff --git a/view/sharedcache/api/sharedcache.cpp b/view/sharedcache/api/sharedcache.cpp index 71d190da21..6498de3289 100644 --- a/view/sharedcache/api/sharedcache.cpp +++ b/view/sharedcache/api/sharedcache.cpp @@ -20,10 +20,10 @@ namespace SharedCacheAPI { return BNDSCViewFastGetBackingCacheCount(view->GetObject()); } - bool SharedCache::LoadImageWithInstallName(std::string installName) + bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObjC) { char* str = BNAllocString(installName.c_str()); - return BNDSCViewLoadImageWithInstallName(m_object, str); + return BNDSCViewLoadImageWithInstallName(m_object, str, skipObjC); } bool SharedCache::LoadSectionAtAddress(uint64_t addr) @@ -31,9 +31,9 @@ namespace SharedCacheAPI { return BNDSCViewLoadSectionAtAddress(m_object, addr); } - bool SharedCache::LoadImageContainingAddress(uint64_t addr) + bool SharedCache::LoadImageContainingAddress(uint64_t addr, bool skipObjC) { - return BNDSCViewLoadImageContainingAddress(m_object, addr); + return BNDSCViewLoadImageContainingAddress(m_object, addr, skipObjC); } std::vector SharedCache::GetAvailableImages() diff --git a/view/sharedcache/api/sharedcacheapi.h b/view/sharedcache/api/sharedcacheapi.h index 7b049bc423..f556b1a86e 100644 --- a/view/sharedcache/api/sharedcacheapi.h +++ b/view/sharedcache/api/sharedcacheapi.h @@ -257,9 +257,9 @@ namespace SharedCacheAPI { static BNDSCViewLoadProgress GetLoadProgress(Ref view); static uint64_t FastGetBackingCacheCount(Ref view); - bool LoadImageWithInstallName(std::string installName); + bool LoadImageWithInstallName(std::string installName, bool skipObjC = false); bool LoadSectionAtAddress(uint64_t addr); - bool LoadImageContainingAddress(uint64_t addr); + bool LoadImageContainingAddress(uint64_t addr, bool skipObjC = false); std::vector GetAvailableImages(); std::vector LoadAllSymbolsAndWait(); diff --git a/view/sharedcache/api/sharedcachecore.h b/view/sharedcache/api/sharedcachecore.h index 9fc3327561..967a7d0be6 100644 --- a/view/sharedcache/api/sharedcachecore.h +++ b/view/sharedcache/api/sharedcachecore.h @@ -120,9 +120,9 @@ extern "C" SHAREDCACHE_FFI_API char** BNDSCViewGetInstallNames(BNSharedCache* cache, size_t* count); - SHAREDCACHE_FFI_API bool BNDSCViewLoadImageWithInstallName(BNSharedCache* cache, char* name); + SHAREDCACHE_FFI_API bool BNDSCViewLoadImageWithInstallName(BNSharedCache* cache, char* name, bool skipObjC); SHAREDCACHE_FFI_API bool BNDSCViewLoadSectionAtAddress(BNSharedCache* cache, uint64_t name); - SHAREDCACHE_FFI_API bool BNDSCViewLoadImageContainingAddress(BNSharedCache* cache, uint64_t address); + SHAREDCACHE_FFI_API bool BNDSCViewLoadImageContainingAddress(BNSharedCache* cache, uint64_t address, bool skipObjC); SHAREDCACHE_FFI_API char* BNDSCViewGetNameForAddress(BNSharedCache* cache, uint64_t address); SHAREDCACHE_FFI_API char* BNDSCViewGetImageNameForAddress(BNSharedCache* cache, uint64_t address); diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index 7bbacf6383..d4ce0921ef 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -1406,7 +1406,7 @@ SharedCache::SharedCache(BinaryNinja::Ref dscView) : m_ { lock.unlock(); m_logger->LogInfo("Loading core libsystem_c.dylib library"); - LoadImageWithInstallName(header.installName); + LoadImageWithInstallName(header.installName, false); lock.lock(); break; } @@ -1518,7 +1518,7 @@ std::string SharedCache::ImageNameForAddress(uint64_t address) return ""; } -bool SharedCache::LoadImageContainingAddress(uint64_t address) +bool SharedCache::LoadImageContainingAddress(uint64_t address, bool skipObjC) { for (const auto& [start, header] : State().headers) { @@ -1526,7 +1526,7 @@ bool SharedCache::LoadImageContainingAddress(uint64_t address) { if (segment.vmaddr <= address && segment.vmaddr + segment.vmsize > address) { - return LoadImageWithInstallName(header.installName); + return LoadImageWithInstallName(header.installName, skipObjC); } } } @@ -1727,7 +1727,7 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) return true; } -bool SharedCache::LoadImageWithInstallName(std::string installName) +bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObjC) { auto settings = m_dscView->GetLoadSettings(VIEW_NAME); @@ -1837,28 +1837,31 @@ bool SharedCache::LoadImageWithInstallName(std::string installName) SharedCache::InitializeHeader(m_dscView, vm.get(), *h, regions); - try + if (!skipObjC) { - auto objc = std::make_unique(m_dscView, this, false); + try + { + auto objc = std::make_unique(m_dscView, this, false); - bool processCFStrings = true; - bool processObjCMetadata = true; - if (settings && settings->Contains("loader.dsc.processCFStrings")) - processCFStrings = settings->Get("loader.dsc.processCFStrings", m_dscView); - if (settings && settings->Contains("loader.dsc.processObjC")) - processObjCMetadata = settings->Get("loader.dsc.processObjC", m_dscView); - if (processObjCMetadata) - objc->ProcessObjCData(vm, h->identifierPrefix); - if (processCFStrings) - objc->ProcessCFStrings(vm, h->identifierPrefix); - } - catch (const std::exception& ex) - { - m_logger->LogWarn("Error processing ObjC data: %s", ex.what()); - } - catch (...) - { - m_logger->LogWarn("Error processing ObjC data"); + bool processCFStrings = true; + bool processObjCMetadata = true; + if (settings && settings->Contains("loader.dsc.processCFStrings")) + processCFStrings = settings->Get("loader.dsc.processCFStrings", m_dscView); + if (settings && settings->Contains("loader.dsc.processObjC")) + processObjCMetadata = settings->Get("loader.dsc.processObjC", m_dscView); + if (processObjCMetadata) + objc->ProcessObjCData(vm, h->identifierPrefix); + if (processCFStrings) + objc->ProcessCFStrings(vm, h->identifierPrefix); + } + catch (const std::exception& ex) + { + m_logger->LogWarn("Error processing ObjC data: %s", ex.what()); + } + catch (...) + { + m_logger->LogWarn("Error processing ObjC data"); + } } m_dscView->AddAnalysisOption("linearsweep"); @@ -3033,13 +3036,13 @@ extern "C" cache->object->ReleaseAPIRef(); } - bool BNDSCViewLoadImageWithInstallName(BNSharedCache* cache, char* name) + bool BNDSCViewLoadImageWithInstallName(BNSharedCache* cache, char* name, bool skipObjC) { std::string imageName = std::string(name); // FIXME !!!!!!!! BNFreeString(name); if (cache->object) - return cache->object->LoadImageWithInstallName(imageName); + return cache->object->LoadImageWithInstallName(imageName, skipObjC); return false; } @@ -3054,11 +3057,11 @@ extern "C" return false; } - bool BNDSCViewLoadImageContainingAddress(BNSharedCache* cache, uint64_t address) + bool BNDSCViewLoadImageContainingAddress(BNSharedCache* cache, uint64_t address, bool skipObjC) { if (cache->object) { - return cache->object->LoadImageContainingAddress(address); + return cache->object->LoadImageContainingAddress(address, skipObjC); } return false; diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 924d81f21f..c82dea5cf5 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -571,9 +571,9 @@ namespace SharedCacheCore { void ParseAndApplySlideInfoForFile(std::shared_ptr file); std::optional GetImageStart(std::string installName); std::optional HeaderForAddress(uint64_t); - bool LoadImageWithInstallName(std::string installName); + bool LoadImageWithInstallName(std::string installName, bool skipObjC); bool LoadSectionAtAddress(uint64_t address); - bool LoadImageContainingAddress(uint64_t address); + bool LoadImageContainingAddress(uint64_t address, bool skipObjC); std::string NameForAddress(uint64_t address); std::string ImageNameForAddress(uint64_t address); std::vector GetAvailableImages(); From a9245348accd81263e106896f7b98ed5bc4afe5c Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Tue, 26 Nov 2024 00:09:03 +0000 Subject: [PATCH 08/35] [SharedCache] Add the ability to manually trigger Objective-C processing A problem with processing Objective-C sections at the time a library is loaded is that some of the references within those sections may refer to unload sections. This results in things like selectors not being correctly typed and named. This commit provides a way for users to manually trigger Objective-C parsing against sections for a specific library or all libraries, via the API. Combined with the previous commit a user can use the API to batch load a number of libraries and skip Objective-C processing for each one and then run it across the entire of the DSC once they are all loaded. Further improvement would be to provide a way to trigger Objective-C processing through the UI. --- .../api/python/_sharedcachecore.py | 38 ++++++ view/sharedcache/api/python/sharedcache.py | 6 + view/sharedcache/api/sharedcache.cpp | 11 ++ view/sharedcache/api/sharedcacheapi.h | 3 + view/sharedcache/api/sharedcachecore.h | 3 + view/sharedcache/core/SharedCache.cpp | 117 ++++++++++++++---- view/sharedcache/core/SharedCache.h | 2 + 7 files changed, 158 insertions(+), 22 deletions(-) diff --git a/view/sharedcache/api/python/_sharedcachecore.py b/view/sharedcache/api/python/_sharedcachecore.py index bd9e0764cb..5f31697ed4 100644 --- a/view/sharedcache/api/python/_sharedcachecore.py +++ b/view/sharedcache/api/python/_sharedcachecore.py @@ -581,6 +581,44 @@ def BNDSCViewLoadSectionAtAddress( return _BNDSCViewLoadSectionAtAddress(cache, name) +# ------------------------------------------------------- +# _BNDSCViewProcessAllObjCSections + +_BNDSCViewProcessAllObjCSections = core.BNDSCViewProcessAllObjCSections +_BNDSCViewProcessAllObjCSections.restype = None +_BNDSCViewProcessAllObjCSections.argtypes = [ + ctypes.POINTER(BNSharedCache), + ] + + +# noinspection PyPep8Naming +def BNDSCViewProcessAllObjCSections( + cache: ctypes.POINTER(BNSharedCache) + ) -> None: + return _BNDSCViewProcessAllObjCSections(cache) + + +# ------------------------------------------------------- +# _BNDSCViewProcessObjCSectionsForImageWithInstallName + +_BNDSCViewProcessObjCSectionsForImageWithInstallName = core.BNDSCViewProcessObjCSectionsForImageWithInstallName +_BNDSCViewProcessObjCSectionsForImageWithInstallName.restype = None +_BNDSCViewProcessObjCSectionsForImageWithInstallName.argtypes = [ + ctypes.POINTER(BNSharedCache), + ctypes.c_char_p, + ctypes.c_bool, + ] + + +# noinspection PyPep8Naming +def BNDSCViewProcessObjCSectionsForImageWithInstallName( + cache: ctypes.POINTER(BNSharedCache), + name: Optional[str], + deallocName: bool + ) -> None: + return _BNDSCViewProcessObjCSectionsForImageWithInstallName(cache, cstr(name), deallocName) + + # ------------------------------------------------------- # _BNFreeSharedCacheReference diff --git a/view/sharedcache/api/python/sharedcache.py b/view/sharedcache/api/python/sharedcache.py index 7902c76ae0..e0e18bde0c 100644 --- a/view/sharedcache/api/python/sharedcache.py +++ b/view/sharedcache/api/python/sharedcache.py @@ -117,6 +117,12 @@ def load_section_at_address(self, addr): def load_image_containing_address(self, addr, skipObjC = False): return sccore.BNDSCViewLoadImageContainingAddress(self.handle, addr, skipObjC) + def process_objc_sections_for_image_with_install_name(self, installName): + return sccore.BNDSCViewProcessObjCSectionsForImageWithInstallName(self.handle, installName, False) + + def process_all_objc_sections(self): + return sccore.BNDSCViewProcessAllObjCSections(self.handle) + @property def caches(self): count = ctypes.c_ulonglong() diff --git a/view/sharedcache/api/sharedcache.cpp b/view/sharedcache/api/sharedcache.cpp index 6498de3289..e764531cd2 100644 --- a/view/sharedcache/api/sharedcache.cpp +++ b/view/sharedcache/api/sharedcache.cpp @@ -55,6 +55,17 @@ namespace SharedCacheAPI { return result; } + void SharedCache::ProcessObjCSectionsForImageWithInstallName(std::string installName) + { + char* str = BNAllocString(installName.c_str()); + BNDSCViewProcessObjCSectionsForImageWithInstallName(m_object, str, true); + } + + void SharedCache::ProcessAllObjCSections() + { + BNDSCViewProcessAllObjCSections(m_object); + } + std::vector SharedCache::GetLoadedMemoryRegions() { size_t count; diff --git a/view/sharedcache/api/sharedcacheapi.h b/view/sharedcache/api/sharedcacheapi.h index f556b1a86e..cfe7835127 100644 --- a/view/sharedcache/api/sharedcacheapi.h +++ b/view/sharedcache/api/sharedcacheapi.h @@ -261,6 +261,9 @@ namespace SharedCacheAPI { bool LoadSectionAtAddress(uint64_t addr); bool LoadImageContainingAddress(uint64_t addr, bool skipObjC = false); std::vector GetAvailableImages(); + + void ProcessObjCSectionsForImageWithInstallName(std::string installName); + void ProcessAllObjCSections(); std::vector LoadAllSymbolsAndWait(); diff --git a/view/sharedcache/api/sharedcachecore.h b/view/sharedcache/api/sharedcachecore.h index 967a7d0be6..155f39de3a 100644 --- a/view/sharedcache/api/sharedcachecore.h +++ b/view/sharedcache/api/sharedcachecore.h @@ -123,6 +123,9 @@ extern "C" SHAREDCACHE_FFI_API bool BNDSCViewLoadImageWithInstallName(BNSharedCache* cache, char* name, bool skipObjC); SHAREDCACHE_FFI_API bool BNDSCViewLoadSectionAtAddress(BNSharedCache* cache, uint64_t name); SHAREDCACHE_FFI_API bool BNDSCViewLoadImageContainingAddress(BNSharedCache* cache, uint64_t address, bool skipObjC); + + SHAREDCACHE_FFI_API void BNDSCViewProcessObjCSectionsForImageWithInstallName(BNSharedCache* cache, char* name, bool deallocName); + SHAREDCACHE_FFI_API void BNDSCViewProcessAllObjCSections(BNSharedCache* cache); SHAREDCACHE_FFI_API char* BNDSCViewGetNameForAddress(BNSharedCache* cache, uint64_t address); SHAREDCACHE_FFI_API char* BNDSCViewGetImageNameForAddress(BNSharedCache* cache, uint64_t address); diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index d4ce0921ef..51e95bc6a7 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -1727,6 +1727,81 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) return true; } +static void GetObjCSettings(Ref view, bool* processObjCMetadata, bool* processCFStrings) +{ + auto settings = view->GetLoadSettings(VIEW_NAME); + *processCFStrings = true; + *processObjCMetadata = true; + if (settings && settings->Contains("loader.dsc.processCFStrings")) + *processCFStrings = settings->Get("loader.dsc.processCFStrings", view); + if (settings && settings->Contains("loader.dsc.processObjC")) + *processObjCMetadata = settings->Get("loader.dsc.processObjC", view); +} + +static void ProcessObjCSectionsForImageWithName(std::string baseName, std::shared_ptr vm, std::shared_ptr objc, bool processCFStrings, bool processObjCMetadata, Ref logger) +{ + try + { + if (processObjCMetadata) + objc->ProcessObjCData(vm, baseName); + if (processCFStrings) + objc->ProcessCFStrings(vm, baseName); + } + catch (const std::exception& ex) + { + logger->LogWarn("Error processing ObjC data for image %s: %s", baseName.c_str(), ex.what()); + } + catch (...) + { + logger->LogWarn("Error processing ObjC data for image %s", baseName.c_str()); + } +} + +void SharedCache::ProcessObjCSectionsForImageWithInstallName(std::string installName) +{ + bool processCFStrings; + bool processObjCMetadata; + GetObjCSettings(m_dscView, &processCFStrings, &processObjCMetadata); + + if (!processObjCMetadata && !processCFStrings) + return; + + auto objc = std::make_shared(m_dscView, this, false); + auto vm = GetVMMap(); + + ProcessObjCSectionsForImageWithName(base_name(installName), vm, objc, processCFStrings, processObjCMetadata, m_logger); +} + +void SharedCache::ProcessAllObjCSections() +{ + bool processCFStrings; + bool processObjCMetadata; + GetObjCSettings(m_dscView, &processCFStrings, &processObjCMetadata); + + if (!processObjCMetadata && !processCFStrings) + return; + + auto objc = std::make_shared(m_dscView, this, false); + auto vm = GetVMMap(); + + std::set processedImageHeaders; + for (auto region : GetMappedRegions()) + { + if (!region.loaded) + continue; + + // Don't repeat the same images multiple times + auto header = HeaderForAddress(region.start); + if (!header) + continue; + if (processedImageHeaders.find(header->textBase) != processedImageHeaders.end()) + continue; + processedImageHeaders.insert(header->textBase); + + ProcessObjCSectionsForImageWithName(header->identifierPrefix, vm, objc, processCFStrings, processObjCMetadata, m_logger); + } +} + bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObjC) { auto settings = m_dscView->GetLoadSettings(VIEW_NAME); @@ -1839,29 +1914,11 @@ bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObj if (!skipObjC) { - try - { - auto objc = std::make_unique(m_dscView, this, false); + bool processCFStrings; + bool processObjCMetadata; + GetObjCSettings(m_dscView, &processCFStrings, &processObjCMetadata); - bool processCFStrings = true; - bool processObjCMetadata = true; - if (settings && settings->Contains("loader.dsc.processCFStrings")) - processCFStrings = settings->Get("loader.dsc.processCFStrings", m_dscView); - if (settings && settings->Contains("loader.dsc.processObjC")) - processObjCMetadata = settings->Get("loader.dsc.processObjC", m_dscView); - if (processObjCMetadata) - objc->ProcessObjCData(vm, h->identifierPrefix); - if (processCFStrings) - objc->ProcessCFStrings(vm, h->identifierPrefix); - } - catch (const std::exception& ex) - { - m_logger->LogWarn("Error processing ObjC data: %s", ex.what()); - } - catch (...) - { - m_logger->LogWarn("Error processing ObjC data"); - } + ProcessObjCSectionsForImageWithName(h->identifierPrefix, vm, std::make_shared(m_dscView, this, false), processCFStrings, processObjCMetadata, m_logger); } m_dscView->AddAnalysisOption("linearsweep"); @@ -3067,6 +3124,22 @@ extern "C" return false; } + void BNDSCViewProcessObjCSectionsForImageWithInstallName(BNSharedCache* cache, char* name, bool deallocName) + { + std::string imageName = std::string(name); + if (deallocName) + BNFreeString(name); + + if (cache->object) + cache->object->ProcessObjCSectionsForImageWithInstallName(imageName); + } + + void BNDSCViewProcessAllObjCSections(BNSharedCache* cache) + { + if (cache->object) + cache->object->ProcessAllObjCSections(); + } + char** BNDSCViewGetInstallNames(BNSharedCache* cache, size_t* count) { if (cache->object) diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index c82dea5cf5..8fa98c05c5 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -574,6 +574,8 @@ namespace SharedCacheCore { bool LoadImageWithInstallName(std::string installName, bool skipObjC); bool LoadSectionAtAddress(uint64_t address); bool LoadImageContainingAddress(uint64_t address, bool skipObjC); + void ProcessObjCSectionsForImageWithInstallName(std::string installName); + void ProcessAllObjCSections(); std::string NameForAddress(uint64_t address); std::string ImageNameForAddress(uint64_t address); std::vector GetAvailableImages(); From 6735403947635158586629755761325149687393 Mon Sep 17 00:00:00 2001 From: Glenn Smith Date: Fri, 27 Dec 2024 12:45:20 -0500 Subject: [PATCH 09/35] Clean up langreps' opcode print switch --- lang/c/pseudoc.cpp | 126 +-------------------------------------- lang/rust/pseudorust.cpp | 126 +-------------------------------------- 2 files changed, 2 insertions(+), 250 deletions(-) diff --git a/lang/c/pseudoc.cpp b/lang/c/pseudoc.cpp index 47d566d104..d89b39119c 100644 --- a/lang/c/pseudoc.cpp +++ b/lang/c/pseudoc.cpp @@ -548,131 +548,7 @@ void PseudoCFunction::GetExprTextInternal(const HighLevelILInstruction& instr, H if (settings && settings->IsOptionSet(ShowILOpcodes)) { tokens.Append(OperationToken, "/*"); - switch (instr.operation) - { - case HLIL_NOP: tokens.Append(OperationToken, "HLIL_NOP"); break; - case HLIL_BLOCK: tokens.Append(OperationToken, "HLIL_BLOCK"); break; - case HLIL_IF: tokens.Append(OperationToken, "HLIL_IF"); break; - case HLIL_WHILE: tokens.Append(OperationToken, "HLIL_WHILE"); break; - case HLIL_DO_WHILE: tokens.Append(OperationToken, "HLIL_DO_WHILE"); break; - case HLIL_FOR: tokens.Append(OperationToken, "HLIL_FOR"); break; - case HLIL_SWITCH: tokens.Append(OperationToken, "HLIL_SWITCH"); break; - case HLIL_CASE: tokens.Append(OperationToken, "HLIL_CASE"); break; - case HLIL_BREAK: tokens.Append(OperationToken, "HLIL_BREAK"); break; - case HLIL_CONTINUE: tokens.Append(OperationToken, "HLIL_CONTINUE"); break; - case HLIL_JUMP: tokens.Append(OperationToken, "HLIL_JUMP"); break; - case HLIL_RET: tokens.Append(OperationToken, "HLIL_RET"); break; - case HLIL_NORET: tokens.Append(OperationToken, "HLIL_NORET"); break; - case HLIL_GOTO: tokens.Append(OperationToken, "HLIL_GOTO"); break; - case HLIL_LABEL: tokens.Append(OperationToken, "HLIL_LABEL"); break; - case HLIL_VAR_DECLARE: tokens.Append(OperationToken, "HLIL_VAR_DECLARE"); break; - case HLIL_VAR_INIT: tokens.Append(OperationToken, "HLIL_VAR_INIT"); break; - case HLIL_ASSIGN: tokens.Append(OperationToken, "HLIL_ASSIGN"); break; - case HLIL_ASSIGN_UNPACK: tokens.Append(OperationToken, "HLIL_ASSIGN_UNPACK"); break; - case HLIL_VAR: tokens.Append(OperationToken, "HLIL_VAR"); break; - case HLIL_STRUCT_FIELD: tokens.Append(OperationToken, "HLIL_STRUCT_FIELD"); break; - case HLIL_ARRAY_INDEX: tokens.Append(OperationToken, "HLIL_ARRAY_INDEX"); break; - case HLIL_SPLIT: tokens.Append(OperationToken, "HLIL_SPLIT"); break; - case HLIL_DEREF: tokens.Append(OperationToken, "HLIL_DEREF"); break; - case HLIL_DEREF_FIELD: tokens.Append(OperationToken, "HLIL_DEREF_FIELD"); break; - case HLIL_ADDRESS_OF: tokens.Append(OperationToken, "HLIL_ADDRESS_OF"); break; - case HLIL_CONST: tokens.Append(OperationToken, "HLIL_CONST"); break; - case HLIL_CONST_DATA: tokens.Append(OperationToken, "HLIL_CONST_DATA"); break; - case HLIL_CONST_PTR: tokens.Append(OperationToken, "HLIL_CONST_PTR"); break; - case HLIL_EXTERN_PTR: tokens.Append(OperationToken, "HLIL_EXTERN_PTR"); break; - case HLIL_FLOAT_CONST: tokens.Append(OperationToken, "HLIL_FLOAT_CONST"); break; - case HLIL_IMPORT: tokens.Append(OperationToken, "HLIL_IMPORT"); break; - case HLIL_ADD: tokens.Append(OperationToken, "HLIL_ADD"); break; - case HLIL_ADC: tokens.Append(OperationToken, "HLIL_ADC"); break; - case HLIL_SUB: tokens.Append(OperationToken, "HLIL_SUB"); break; - case HLIL_SBB: tokens.Append(OperationToken, "HLIL_SBB"); break; - case HLIL_AND: tokens.Append(OperationToken, "HLIL_AND"); break; - case HLIL_OR: tokens.Append(OperationToken, "HLIL_OR"); break; - case HLIL_XOR: tokens.Append(OperationToken, "HLIL_XOR"); break; - case HLIL_LSL: tokens.Append(OperationToken, "HLIL_LSL"); break; - case HLIL_LSR: tokens.Append(OperationToken, "HLIL_LSR"); break; - case HLIL_ASR: tokens.Append(OperationToken, "HLIL_ASR"); break; - case HLIL_ROL: tokens.Append(OperationToken, "HLIL_ROL"); break; - case HLIL_RLC: tokens.Append(OperationToken, "HLIL_RLC"); break; - case HLIL_ROR: tokens.Append(OperationToken, "HLIL_ROR"); break; - case HLIL_RRC: tokens.Append(OperationToken, "HLIL_RRC"); break; - case HLIL_MUL: tokens.Append(OperationToken, "HLIL_MUL"); break; - case HLIL_MULU_DP: tokens.Append(OperationToken, "HLIL_MULU_DP"); break; - case HLIL_MULS_DP: tokens.Append(OperationToken, "HLIL_MULS_DP"); break; - case HLIL_DIVU: tokens.Append(OperationToken, "HLIL_DIVU"); break; - case HLIL_DIVU_DP: tokens.Append(OperationToken, "HLIL_DIVU_DP"); break; - case HLIL_DIVS: tokens.Append(OperationToken, "HLIL_DIVS"); break; - case HLIL_DIVS_DP: tokens.Append(OperationToken, "HLIL_DIVS_DP"); break; - case HLIL_MODU: tokens.Append(OperationToken, "HLIL_MODU"); break; - case HLIL_MODU_DP: tokens.Append(OperationToken, "HLIL_MODU_DP"); break; - case HLIL_MODS: tokens.Append(OperationToken, "HLIL_MODS"); break; - case HLIL_MODS_DP: tokens.Append(OperationToken, "HLIL_MODS_DP"); break; - case HLIL_NEG: tokens.Append(OperationToken, "HLIL_NEG"); break; - case HLIL_NOT: tokens.Append(OperationToken, "HLIL_NOT"); break; - case HLIL_SX: tokens.Append(OperationToken, "HLIL_SX"); break; - case HLIL_ZX: tokens.Append(OperationToken, "HLIL_ZX"); break; - case HLIL_LOW_PART: tokens.Append(OperationToken, "HLIL_LOW_PART"); break; - case HLIL_CALL: tokens.Append(OperationToken, "HLIL_CALL"); break; - case HLIL_CMP_E: tokens.Append(OperationToken, "HLIL_CMP_E"); break; - case HLIL_CMP_NE: tokens.Append(OperationToken, "HLIL_CMP_NE"); break; - case HLIL_CMP_SLT: tokens.Append(OperationToken, "HLIL_CMP_SLT"); break; - case HLIL_CMP_ULT: tokens.Append(OperationToken, "HLIL_CMP_ULT"); break; - case HLIL_CMP_SLE: tokens.Append(OperationToken, "HLIL_CMP_SLE"); break; - case HLIL_CMP_ULE: tokens.Append(OperationToken, "HLIL_CMP_ULE"); break; - case HLIL_CMP_SGE: tokens.Append(OperationToken, "HLIL_CMP_SGE"); break; - case HLIL_CMP_UGE: tokens.Append(OperationToken, "HLIL_CMP_UGE"); break; - case HLIL_CMP_SGT: tokens.Append(OperationToken, "HLIL_CMP_SGT"); break; - case HLIL_CMP_UGT: tokens.Append(OperationToken, "HLIL_CMP_UGT"); break; - case HLIL_TEST_BIT: tokens.Append(OperationToken, "HLIL_TEST_BIT"); break; - case HLIL_BOOL_TO_INT: tokens.Append(OperationToken, "HLIL_BOOL_TO_INT"); break; - case HLIL_ADD_OVERFLOW: tokens.Append(OperationToken, "HLIL_ADD_OVERFLOW"); break; - case HLIL_SYSCALL: tokens.Append(OperationToken, "HLIL_SYSCALL"); break; - case HLIL_TAILCALL: tokens.Append(OperationToken, "HLIL_TAILCALL"); break; - case HLIL_INTRINSIC: tokens.Append(OperationToken, "HLIL_INTRINSIC"); break; - case HLIL_BP: tokens.Append(OperationToken, "HLIL_BP"); break; - case HLIL_TRAP: tokens.Append(OperationToken, "HLIL_TRAP"); break; - case HLIL_UNDEF: tokens.Append(OperationToken, "HLIL_UNDEF"); break; - case HLIL_UNIMPL: tokens.Append(OperationToken, "HLIL_UNIMPL"); break; - case HLIL_UNIMPL_MEM: tokens.Append(OperationToken, "HLIL_UNIMPL_MEM"); break; - case HLIL_FADD: tokens.Append(OperationToken, "HLIL_FADD"); break; - case HLIL_FSUB: tokens.Append(OperationToken, "HLIL_FSUB"); break; - case HLIL_FMUL: tokens.Append(OperationToken, "HLIL_FMUL"); break; - case HLIL_FDIV: tokens.Append(OperationToken, "HLIL_FDIV"); break; - case HLIL_FSQRT: tokens.Append(OperationToken, "HLIL_FSQRT"); break; - case HLIL_FNEG: tokens.Append(OperationToken, "HLIL_FNEG"); break; - case HLIL_FABS: tokens.Append(OperationToken, "HLIL_FABS"); break; - case HLIL_FLOAT_TO_INT: tokens.Append(OperationToken, "HLIL_FLOAT_TO_INT"); break; - case HLIL_INT_TO_FLOAT: tokens.Append(OperationToken, "HLIL_INT_TO_FLOAT"); break; - case HLIL_FLOAT_CONV: tokens.Append(OperationToken, "HLIL_FLOAT_CONV"); break; - case HLIL_ROUND_TO_INT: tokens.Append(OperationToken, "HLIL_ROUND_TO_INT"); break; - case HLIL_FLOOR: tokens.Append(OperationToken, "HLIL_FLOOR"); break; - case HLIL_CEIL: tokens.Append(OperationToken, "HLIL_CEIL"); break; - case HLIL_FTRUNC: tokens.Append(OperationToken, "HLIL_FTRUNC"); break; - case HLIL_FCMP_E: tokens.Append(OperationToken, "HLIL_FCMP_E"); break; - case HLIL_FCMP_NE: tokens.Append(OperationToken, "HLIL_FCMP_NE"); break; - case HLIL_FCMP_LT: tokens.Append(OperationToken, "HLIL_FCMP_LT"); break; - case HLIL_FCMP_LE: tokens.Append(OperationToken, "HLIL_FCMP_LE"); break; - case HLIL_FCMP_GE: tokens.Append(OperationToken, "HLIL_FCMP_GE"); break; - case HLIL_FCMP_GT: tokens.Append(OperationToken, "HLIL_FCMP_GT"); break; - case HLIL_FCMP_O: tokens.Append(OperationToken, "HLIL_FCMP_O"); break; - case HLIL_FCMP_UO: tokens.Append(OperationToken, "HLIL_FCMP_UO"); break; - case HLIL_UNREACHABLE: tokens.Append(OperationToken, "HLIL_UNREACHABLE"); break; - case HLIL_WHILE_SSA: tokens.Append(OperationToken, "HLIL_WHILE_SSA"); break; - case HLIL_DO_WHILE_SSA: tokens.Append(OperationToken, "HLIL_DO_WHILE_SSA"); break; - case HLIL_FOR_SSA: tokens.Append(OperationToken, "HLIL_FOR_SSA"); break; - case HLIL_VAR_INIT_SSA: tokens.Append(OperationToken, "HLIL_VAR_INIT_SSA"); break; - case HLIL_ASSIGN_MEM_SSA: tokens.Append(OperationToken, "HLIL_ASSIGN_MEM_SSA"); break; - case HLIL_ASSIGN_UNPACK_MEM_SSA: tokens.Append(OperationToken, "HLIL_ASSIGN_UNPACK_MEM_SSA"); break; - case HLIL_VAR_SSA: tokens.Append(OperationToken, "HLIL_VAR_SSA"); break; - case HLIL_ARRAY_INDEX_SSA: tokens.Append(OperationToken, "HLIL_ARRAY_INDEX_SSA"); break; - case HLIL_DEREF_SSA: tokens.Append(OperationToken, "HLIL_DEREF_SSA"); break; - case HLIL_DEREF_FIELD_SSA: tokens.Append(OperationToken, "HLIL_DEREF_FIELD_SSA"); break; - case HLIL_CALL_SSA: tokens.Append(OperationToken, "HLIL_CALL_SSA"); break; - case HLIL_SYSCALL_SSA: tokens.Append(OperationToken, "HLIL_SYSCALL_SSA"); break; - case HLIL_INTRINSIC_SSA: tokens.Append(OperationToken, "HLIL_INTRINSIC_SSA"); break; - case HLIL_VAR_PHI: tokens.Append(OperationToken, "HLIL_VAR_PHI"); break; - case HLIL_MEM_PHI: tokens.Append(OperationToken, "HLIL_MEM_PHI"); break; - } + tokens.Append(OperationToken, fmt::format("{}", instr.operation)); tokens.Append(OperationToken, "*/"); tokens.Append(TextToken, " "); } diff --git a/lang/rust/pseudorust.cpp b/lang/rust/pseudorust.cpp index 265d3688f2..703f1efdf4 100644 --- a/lang/rust/pseudorust.cpp +++ b/lang/rust/pseudorust.cpp @@ -586,131 +586,7 @@ void PseudoRustFunction::GetExprText(const HighLevelILInstruction& instr, HighLe if (settings && settings->IsOptionSet(ShowILOpcodes)) { tokens.Append(OperationToken, "/*"); - switch (instr.operation) - { - case HLIL_NOP: tokens.Append(OperationToken, "HLIL_NOP"); break; - case HLIL_BLOCK: tokens.Append(OperationToken, "HLIL_BLOCK"); break; - case HLIL_IF: tokens.Append(OperationToken, "HLIL_IF"); break; - case HLIL_WHILE: tokens.Append(OperationToken, "HLIL_WHILE"); break; - case HLIL_DO_WHILE: tokens.Append(OperationToken, "HLIL_DO_WHILE"); break; - case HLIL_FOR: tokens.Append(OperationToken, "HLIL_FOR"); break; - case HLIL_SWITCH: tokens.Append(OperationToken, "HLIL_SWITCH"); break; - case HLIL_CASE: tokens.Append(OperationToken, "HLIL_CASE"); break; - case HLIL_BREAK: tokens.Append(OperationToken, "HLIL_BREAK"); break; - case HLIL_CONTINUE: tokens.Append(OperationToken, "HLIL_CONTINUE"); break; - case HLIL_JUMP: tokens.Append(OperationToken, "HLIL_JUMP"); break; - case HLIL_RET: tokens.Append(OperationToken, "HLIL_RET"); break; - case HLIL_NORET: tokens.Append(OperationToken, "HLIL_NORET"); break; - case HLIL_GOTO: tokens.Append(OperationToken, "HLIL_GOTO"); break; - case HLIL_LABEL: tokens.Append(OperationToken, "HLIL_LABEL"); break; - case HLIL_VAR_DECLARE: tokens.Append(OperationToken, "HLIL_VAR_DECLARE"); break; - case HLIL_VAR_INIT: tokens.Append(OperationToken, "HLIL_VAR_INIT"); break; - case HLIL_ASSIGN: tokens.Append(OperationToken, "HLIL_ASSIGN"); break; - case HLIL_ASSIGN_UNPACK: tokens.Append(OperationToken, "HLIL_ASSIGN_UNPACK"); break; - case HLIL_VAR: tokens.Append(OperationToken, "HLIL_VAR"); break; - case HLIL_STRUCT_FIELD: tokens.Append(OperationToken, "HLIL_STRUCT_FIELD"); break; - case HLIL_ARRAY_INDEX: tokens.Append(OperationToken, "HLIL_ARRAY_INDEX"); break; - case HLIL_SPLIT: tokens.Append(OperationToken, "HLIL_SPLIT"); break; - case HLIL_DEREF: tokens.Append(OperationToken, "HLIL_DEREF"); break; - case HLIL_DEREF_FIELD: tokens.Append(OperationToken, "HLIL_DEREF_FIELD"); break; - case HLIL_ADDRESS_OF: tokens.Append(OperationToken, "HLIL_ADDRESS_OF"); break; - case HLIL_CONST: tokens.Append(OperationToken, "HLIL_CONST"); break; - case HLIL_CONST_DATA: tokens.Append(OperationToken, "HLIL_CONST_DATA"); break; - case HLIL_CONST_PTR: tokens.Append(OperationToken, "HLIL_CONST_PTR"); break; - case HLIL_EXTERN_PTR: tokens.Append(OperationToken, "HLIL_EXTERN_PTR"); break; - case HLIL_FLOAT_CONST: tokens.Append(OperationToken, "HLIL_FLOAT_CONST"); break; - case HLIL_IMPORT: tokens.Append(OperationToken, "HLIL_IMPORT"); break; - case HLIL_ADD: tokens.Append(OperationToken, "HLIL_ADD"); break; - case HLIL_ADC: tokens.Append(OperationToken, "HLIL_ADC"); break; - case HLIL_SUB: tokens.Append(OperationToken, "HLIL_SUB"); break; - case HLIL_SBB: tokens.Append(OperationToken, "HLIL_SBB"); break; - case HLIL_AND: tokens.Append(OperationToken, "HLIL_AND"); break; - case HLIL_OR: tokens.Append(OperationToken, "HLIL_OR"); break; - case HLIL_XOR: tokens.Append(OperationToken, "HLIL_XOR"); break; - case HLIL_LSL: tokens.Append(OperationToken, "HLIL_LSL"); break; - case HLIL_LSR: tokens.Append(OperationToken, "HLIL_LSR"); break; - case HLIL_ASR: tokens.Append(OperationToken, "HLIL_ASR"); break; - case HLIL_ROL: tokens.Append(OperationToken, "HLIL_ROL"); break; - case HLIL_RLC: tokens.Append(OperationToken, "HLIL_RLC"); break; - case HLIL_ROR: tokens.Append(OperationToken, "HLIL_ROR"); break; - case HLIL_RRC: tokens.Append(OperationToken, "HLIL_RRC"); break; - case HLIL_MUL: tokens.Append(OperationToken, "HLIL_MUL"); break; - case HLIL_MULU_DP: tokens.Append(OperationToken, "HLIL_MULU_DP"); break; - case HLIL_MULS_DP: tokens.Append(OperationToken, "HLIL_MULS_DP"); break; - case HLIL_DIVU: tokens.Append(OperationToken, "HLIL_DIVU"); break; - case HLIL_DIVU_DP: tokens.Append(OperationToken, "HLIL_DIVU_DP"); break; - case HLIL_DIVS: tokens.Append(OperationToken, "HLIL_DIVS"); break; - case HLIL_DIVS_DP: tokens.Append(OperationToken, "HLIL_DIVS_DP"); break; - case HLIL_MODU: tokens.Append(OperationToken, "HLIL_MODU"); break; - case HLIL_MODU_DP: tokens.Append(OperationToken, "HLIL_MODU_DP"); break; - case HLIL_MODS: tokens.Append(OperationToken, "HLIL_MODS"); break; - case HLIL_MODS_DP: tokens.Append(OperationToken, "HLIL_MODS_DP"); break; - case HLIL_NEG: tokens.Append(OperationToken, "HLIL_NEG"); break; - case HLIL_NOT: tokens.Append(OperationToken, "HLIL_NOT"); break; - case HLIL_SX: tokens.Append(OperationToken, "HLIL_SX"); break; - case HLIL_ZX: tokens.Append(OperationToken, "HLIL_ZX"); break; - case HLIL_LOW_PART: tokens.Append(OperationToken, "HLIL_LOW_PART"); break; - case HLIL_CALL: tokens.Append(OperationToken, "HLIL_CALL"); break; - case HLIL_CMP_E: tokens.Append(OperationToken, "HLIL_CMP_E"); break; - case HLIL_CMP_NE: tokens.Append(OperationToken, "HLIL_CMP_NE"); break; - case HLIL_CMP_SLT: tokens.Append(OperationToken, "HLIL_CMP_SLT"); break; - case HLIL_CMP_ULT: tokens.Append(OperationToken, "HLIL_CMP_ULT"); break; - case HLIL_CMP_SLE: tokens.Append(OperationToken, "HLIL_CMP_SLE"); break; - case HLIL_CMP_ULE: tokens.Append(OperationToken, "HLIL_CMP_ULE"); break; - case HLIL_CMP_SGE: tokens.Append(OperationToken, "HLIL_CMP_SGE"); break; - case HLIL_CMP_UGE: tokens.Append(OperationToken, "HLIL_CMP_UGE"); break; - case HLIL_CMP_SGT: tokens.Append(OperationToken, "HLIL_CMP_SGT"); break; - case HLIL_CMP_UGT: tokens.Append(OperationToken, "HLIL_CMP_UGT"); break; - case HLIL_TEST_BIT: tokens.Append(OperationToken, "HLIL_TEST_BIT"); break; - case HLIL_BOOL_TO_INT: tokens.Append(OperationToken, "HLIL_BOOL_TO_INT"); break; - case HLIL_ADD_OVERFLOW: tokens.Append(OperationToken, "HLIL_ADD_OVERFLOW"); break; - case HLIL_SYSCALL: tokens.Append(OperationToken, "HLIL_SYSCALL"); break; - case HLIL_TAILCALL: tokens.Append(OperationToken, "HLIL_TAILCALL"); break; - case HLIL_INTRINSIC: tokens.Append(OperationToken, "HLIL_INTRINSIC"); break; - case HLIL_BP: tokens.Append(OperationToken, "HLIL_BP"); break; - case HLIL_TRAP: tokens.Append(OperationToken, "HLIL_TRAP"); break; - case HLIL_UNDEF: tokens.Append(OperationToken, "HLIL_UNDEF"); break; - case HLIL_UNIMPL: tokens.Append(OperationToken, "HLIL_UNIMPL"); break; - case HLIL_UNIMPL_MEM: tokens.Append(OperationToken, "HLIL_UNIMPL_MEM"); break; - case HLIL_FADD: tokens.Append(OperationToken, "HLIL_FADD"); break; - case HLIL_FSUB: tokens.Append(OperationToken, "HLIL_FSUB"); break; - case HLIL_FMUL: tokens.Append(OperationToken, "HLIL_FMUL"); break; - case HLIL_FDIV: tokens.Append(OperationToken, "HLIL_FDIV"); break; - case HLIL_FSQRT: tokens.Append(OperationToken, "HLIL_FSQRT"); break; - case HLIL_FNEG: tokens.Append(OperationToken, "HLIL_FNEG"); break; - case HLIL_FABS: tokens.Append(OperationToken, "HLIL_FABS"); break; - case HLIL_FLOAT_TO_INT: tokens.Append(OperationToken, "HLIL_FLOAT_TO_INT"); break; - case HLIL_INT_TO_FLOAT: tokens.Append(OperationToken, "HLIL_INT_TO_FLOAT"); break; - case HLIL_FLOAT_CONV: tokens.Append(OperationToken, "HLIL_FLOAT_CONV"); break; - case HLIL_ROUND_TO_INT: tokens.Append(OperationToken, "HLIL_ROUND_TO_INT"); break; - case HLIL_FLOOR: tokens.Append(OperationToken, "HLIL_FLOOR"); break; - case HLIL_CEIL: tokens.Append(OperationToken, "HLIL_CEIL"); break; - case HLIL_FTRUNC: tokens.Append(OperationToken, "HLIL_FTRUNC"); break; - case HLIL_FCMP_E: tokens.Append(OperationToken, "HLIL_FCMP_E"); break; - case HLIL_FCMP_NE: tokens.Append(OperationToken, "HLIL_FCMP_NE"); break; - case HLIL_FCMP_LT: tokens.Append(OperationToken, "HLIL_FCMP_LT"); break; - case HLIL_FCMP_LE: tokens.Append(OperationToken, "HLIL_FCMP_LE"); break; - case HLIL_FCMP_GE: tokens.Append(OperationToken, "HLIL_FCMP_GE"); break; - case HLIL_FCMP_GT: tokens.Append(OperationToken, "HLIL_FCMP_GT"); break; - case HLIL_FCMP_O: tokens.Append(OperationToken, "HLIL_FCMP_O"); break; - case HLIL_FCMP_UO: tokens.Append(OperationToken, "HLIL_FCMP_UO"); break; - case HLIL_UNREACHABLE: tokens.Append(OperationToken, "HLIL_UNREACHABLE"); break; - case HLIL_WHILE_SSA: tokens.Append(OperationToken, "HLIL_WHILE_SSA"); break; - case HLIL_DO_WHILE_SSA: tokens.Append(OperationToken, "HLIL_DO_WHILE_SSA"); break; - case HLIL_FOR_SSA: tokens.Append(OperationToken, "HLIL_FOR_SSA"); break; - case HLIL_VAR_INIT_SSA: tokens.Append(OperationToken, "HLIL_VAR_INIT_SSA"); break; - case HLIL_ASSIGN_MEM_SSA: tokens.Append(OperationToken, "HLIL_ASSIGN_MEM_SSA"); break; - case HLIL_ASSIGN_UNPACK_MEM_SSA: tokens.Append(OperationToken, "HLIL_ASSIGN_UNPACK_MEM_SSA"); break; - case HLIL_VAR_SSA: tokens.Append(OperationToken, "HLIL_VAR_SSA"); break; - case HLIL_ARRAY_INDEX_SSA: tokens.Append(OperationToken, "HLIL_ARRAY_INDEX_SSA"); break; - case HLIL_DEREF_SSA: tokens.Append(OperationToken, "HLIL_DEREF_SSA"); break; - case HLIL_DEREF_FIELD_SSA: tokens.Append(OperationToken, "HLIL_DEREF_FIELD_SSA"); break; - case HLIL_CALL_SSA: tokens.Append(OperationToken, "HLIL_CALL_SSA"); break; - case HLIL_SYSCALL_SSA: tokens.Append(OperationToken, "HLIL_SYSCALL_SSA"); break; - case HLIL_INTRINSIC_SSA: tokens.Append(OperationToken, "HLIL_INTRINSIC_SSA"); break; - case HLIL_VAR_PHI: tokens.Append(OperationToken, "HLIL_VAR_PHI"); break; - case HLIL_MEM_PHI: tokens.Append(OperationToken, "HLIL_MEM_PHI"); break; - } + tokens.Append(OperationToken, fmt::format("{}", instr.operation)); tokens.Append(OperationToken, "*/"); tokens.Append(TextToken, " "); } From 5716430455143230163265790501642777327084 Mon Sep 17 00:00:00 2001 From: Glenn Smith Date: Fri, 27 Dec 2024 12:45:36 -0500 Subject: [PATCH 10/35] Make pseudo-c use the default type printer --- lang/c/pseudoc.cpp | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/lang/c/pseudoc.cpp b/lang/c/pseudoc.cpp index d89b39119c..dff591ffed 100644 --- a/lang/c/pseudoc.cpp +++ b/lang/c/pseudoc.cpp @@ -177,28 +177,11 @@ BNSymbolDisplayResult PseudoCFunction::AppendPointerTextToken(const HighLevelILI string PseudoCFunction::GetSizeToken(size_t size, bool isSigned) { - char sizeStr[32]; - - switch (size) - { - case 0: - return {}; - case 1: - return (isSigned ? "int8_t" : "uint8_t"); - case 2: - return (isSigned ? "int16_t" : "uint16_t"); - case 4: - return (isSigned ? "int32_t" : "uint32_t"); - case 8: - return (isSigned ? "int64_t" : "uint64_t"); - case 10: - return (isSigned ? "int80_t" : "uint80_t"); - case 16: - return (isSigned ? "int128_t" : "uint128_t"); - } - - snprintf(sizeStr, sizeof(sizeStr), "%sint%" PRIuPTR "_t", isSigned ? "" : "u", size); - return {sizeStr}; + return TypePrinter::GetDefault()->GetTypeString( + Type::IntegerType(size, isSigned), + nullptr, + QualifiedName() + ); } @@ -538,7 +521,12 @@ void PseudoCFunction::GetExprTextInternal(const HighLevelILInstruction& instr, H { tokens.AppendOpenParen(); tokens.AppendOpenParen(); - for (auto& token: instr.GetType()->GetTokens(GetArchitecture()->GetStandalonePlatform())) + auto typeTokens = TypePrinter::GetDefault()->GetTypeTokens( + instr.GetType(), + GetArchitecture()->GetStandalonePlatform(), + QualifiedName() + ); + for (auto& token: typeTokens) { tokens.Append(token); } @@ -1055,11 +1043,11 @@ void PseudoCFunction::GetExprTextInternal(const HighLevelILInstruction& instr, H const auto platform = GetHighLevelILFunction()->GetFunction()->GetPlatform(); const auto prevTypeTokens = variableType ? - variableType->GetTokensBeforeName(platform, variableType.GetConfidence()) : + TypePrinter::GetDefault()->GetTypeTokensBeforeName(variableType, platform, variableType.GetConfidence()) : vector{}; const auto postTypeTokens = variableType ? - variableType->GetTokensAfterName(platform, variableType.GetConfidence()) : + TypePrinter::GetDefault()->GetTypeTokensAfterName(variableType, platform, variableType.GetConfidence()) : vector{}; // Check to see if the variable appears live @@ -1121,11 +1109,11 @@ void PseudoCFunction::GetExprTextInternal(const HighLevelILInstruction& instr, H const auto platform = GetHighLevelILFunction()->GetFunction()->GetPlatform(); const auto prevTypeTokens = variableType ? - variableType->GetTokensBeforeName(platform, variableType.GetConfidence()) : + TypePrinter::GetDefault()->GetTypeTokensBeforeName(variableType, platform, variableType.GetConfidence()) : vector{}; const auto postTypeTokens = variableType ? - variableType->GetTokensAfterName(platform, variableType.GetConfidence()) : + TypePrinter::GetDefault()->GetTypeTokensAfterName(variableType, platform, variableType.GetConfidence()) : vector{}; if (variableType) From 2356259101133816400254823086942099919442 Mon Sep 17 00:00:00 2001 From: Glenn Smith Date: Fri, 27 Dec 2024 13:41:54 -0500 Subject: [PATCH 11/35] One case of needing the type printer in pseudo rust --- lang/rust/pseudorust.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lang/rust/pseudorust.cpp b/lang/rust/pseudorust.cpp index 703f1efdf4..837a7932ed 100644 --- a/lang/rust/pseudorust.cpp +++ b/lang/rust/pseudorust.cpp @@ -576,7 +576,13 @@ void PseudoRustFunction::GetExprText(const HighLevelILInstruction& instr, HighLe { tokens.AppendOpenParen(); tokens.AppendOpenParen(); - for (auto& token: instr.GetType()->GetTokens(GetArchitecture()->GetStandalonePlatform())) + RustTypePrinter printer; + auto typeTokens = printer.GetTypeTokens( + instr.GetType(), + GetArchitecture()->GetStandalonePlatform(), + QualifiedName() + ); + for (auto& token: typeTokens) { tokens.Append(token); } From 5922737739514174576270927152852743525306 Mon Sep 17 00:00:00 2001 From: Glenn Smith Date: Fri, 27 Dec 2024 14:41:21 -0500 Subject: [PATCH 12/35] Add default implementations to Python TypePrinter to dual C++ --- python/typeprinter.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/python/typeprinter.py b/python/typeprinter.py index e9e085c6e3..ce6a6d688b 100644 --- a/python/typeprinter.py +++ b/python/typeprinter.py @@ -17,7 +17,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. - +import abc import ctypes import dataclasses from json import dumps @@ -313,8 +313,15 @@ def get_type_tokens(self, type: types.Type, platform: Optional[_platform.Platfor :param escaping: Style of escaping literals which may not be parsable :return: List of text tokens representing the type """ - raise NotImplementedError() + before = self.get_type_tokens_before_name(type, platform, base_confidence, None, escaping) + after = self.get_type_tokens_after_name(type, platform, base_confidence, None, escaping) + if len(before) > 0 and before[-1].text[-1] != ' ' and before[-1].text[-1] != '*' and before[-1].text[-1] != '&' and len(after) > 0 and after[0].text[0] != ' ': + if type.type_class != types.TypeClass.FunctionTypeClass: + before.append(_function.InstructionTextToken(_function.InstructionTextTokenType.TextToken, " ")) + return before + after + + @abc.abstractmethod def get_type_tokens_before_name(self, type: types.Type, platform: Optional[_platform.Platform] = None, base_confidence: int = core.max_confidence, parent_type: Optional[types.Type] = None, escaping: TokenEscapingType = TokenEscapingType.BackticksTokenEscapingType) -> List[_function.InstructionTextToken]: """ In a single-line text representation of a type, generate the tokens that should @@ -329,6 +336,7 @@ def get_type_tokens_before_name(self, type: types.Type, platform: Optional[_plat """ raise NotImplementedError() + @abc.abstractmethod def get_type_tokens_after_name(self, type: types.Type, platform: Optional[_platform.Platform] = None, base_confidence: int = core.max_confidence, parent_type: Optional[types.Type] = None, escaping: TokenEscapingType = TokenEscapingType.BackticksTokenEscapingType) -> List[_function.InstructionTextToken]: """ In a single-line text representation of a type, generate the tokens that should @@ -353,7 +361,13 @@ def get_type_string(self, type: types.Type, platform: Optional[_platform.Platfor :param escaping: Style of escaping literals which may not be parsable :return: String representing the type """ - raise NotImplementedError() + before = self.get_type_string_before_name(type, platform, escaping) + q_name = types.QualifiedName.escape(name, escaping) + after = self.get_type_string_after_name(type, platform, escaping) + if (len(before) > 0 and len(q_name) > 0 and before[-1] != ' ' and q_name[0] != ' ') \ + or (len(before) > 0 and len(after) > 0 and before[-1] != ' ' and after[0] != ' '): + return before + " " + q_name + after + return before + q_name + after def get_type_string_before_name(self, type: types.Type, platform: Optional[_platform.Platform] = None, escaping: TokenEscapingType = TokenEscapingType.BackticksTokenEscapingType) -> str: """ @@ -365,7 +379,8 @@ def get_type_string_before_name(self, type: types.Type, platform: Optional[_plat :param escaping: Style of escaping literals which may not be parsable :return: String representing the type """ - raise NotImplementedError() + tokens = self.get_type_tokens_before_name(type, platform, core.max_confidence, None, escaping) + return ''.join(token.text for token in tokens) def get_type_string_after_name(self, type: types.Type, platform: Optional[_platform.Platform] = None, escaping: TokenEscapingType = TokenEscapingType.BackticksTokenEscapingType) -> str: """ @@ -377,8 +392,10 @@ def get_type_string_after_name(self, type: types.Type, platform: Optional[_platf :param escaping: Style of escaping literals which may not be parsable :return: String representing the type """ - raise NotImplementedError() + tokens = self.get_type_tokens_after_name(type, platform, core.max_confidence, None, escaping) + return ''.join(token.text for token in tokens) + @abc.abstractmethod def get_type_lines(self, type: types.Type, container: 'typecontainer.TypeContainer', name: types.QualifiedNameType, padding_cols = 64, collapsed = False, escaping: TokenEscapingType = TokenEscapingType.BackticksTokenEscapingType) -> List[types.TypeDefinitionLine]: """ Generate a multi-line representation of a type From 7323628022239278956504476997716316149930 Mon Sep 17 00:00:00 2001 From: Glenn Smith Date: Fri, 27 Dec 2024 17:25:42 -0500 Subject: [PATCH 13/35] Fix trying to construct a FunctionViewType from a FunctionViewType --- python/function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/function.py b/python/function.py index 6e7eef7317..e962d91fbd 100644 --- a/python/function.py +++ b/python/function.py @@ -193,7 +193,7 @@ def __init__(self, view_type: FunctionViewTypeOrName): if isinstance(view_type, FunctionViewType): self.view_type = view_type.view_type self.name = view_type.name - if isinstance(view_type, FunctionGraphType): + elif isinstance(view_type, FunctionGraphType): self.view_type = view_type self.name = None else: From 80fee0a3a32568d042de3ec8ce7eb38c5bbc7140 Mon Sep 17 00:00:00 2001 From: Jordan Wiens Date: Mon, 30 Dec 2024 10:09:33 -0500 Subject: [PATCH 14/35] add note to UI plugin bullet about the qt-build repo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ea35bbe10f..e7b6ecd201 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ cmake --build build -j8 In addition to the default build setup, you may want to: - **Build examples.** To build the [API examples](#examples), pass `-DBN_API_BUILD_EXAMPLES=ON` to CMake when configuring the build. After the build succeeds, you can install the built plugins by running the `install` target. When using the "Unix Makefiles" build generator, this looks like: `make install`. -- **Build UI plugins.** You will need Qt 6.7.2 (as of writing) installed to build UI plugins. +- **Build UI plugins.** You will need Qt 6.7.2 installed to build UI plugins. We use a slightly modified [build configuration](https://github.com/Vector35/qt-build) internally that has some ABI-compatible fixes and changes to defaults, but a stock build can also work. Note that it is not recommended to use pre-built configurations from Homebrew. Either using the official installer or building from our repo is recommended. - **Build headlessly.** If you are using a headless Binary Ninja distribution or you do not wish to build UI plugins, pass `-DHEADLESS=ON` to CMake when configuring the build. ### Troubleshooting From 958c87b41aa12d15d85e9649cf6542efd97cf9e7 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Wed, 11 Dec 2024 14:17:49 -0500 Subject: [PATCH 15/35] Move Changelog filtering to UI --- ui/updateinfo.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ui/updateinfo.h b/ui/updateinfo.h index 55102870d5..a3bf0d893d 100644 --- a/ui/updateinfo.h +++ b/ui/updateinfo.h @@ -25,8 +25,9 @@ class BINARYNINJAUIAPI UpdateInfoFetcher : public QObject QString author; QString commit; QString body; - ChangelogEntryItem(const QString& author = "", const QString& commit = "", const QString& body = "") - : author(author), commit(commit), body(body) {}; + bool isHidden = false; + ChangelogEntryItem(const QString& author = "", const QString& commit = "", const QString& body = "", const bool isHidden = false) + : author(author), commit(commit), body(body), isHidden(isHidden) {}; /// In-struct cache for wrapped text mutable QString bodyWrapCache; }; @@ -65,6 +66,7 @@ class BINARYNINJAUIAPI UpdateInfoFetcher : public QObject void startFetch(); const std::vector& getChannels(); const Channel* getActiveChannel(); + std::vector getFilteredChangelog(); signals: void fetchCompleted(const FetchError& error); }; \ No newline at end of file From 3c05103cfaca688f3961b78043e01b1353a238c7 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Mon, 25 Nov 2024 18:40:54 -0500 Subject: [PATCH 16/35] Delete ViewNavigationMode Does not seem to do anything other than cause a crash! --- ui/viewframe.h | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/ui/viewframe.h b/ui/viewframe.h index 520e3f77a4..2cbaeacf83 100644 --- a/ui/viewframe.h +++ b/ui/viewframe.h @@ -122,15 +122,6 @@ class BINARYNINJAUIAPI View bool writeDataToClipboard(const BinaryNinja::DataBuffer& data, bool binary, TransformRef xform); BinaryNinja::DataBuffer readDataFromClipboard(TransformRef xform); - // FIXME: Support for typeview, where the default navigation mode is not compatible with the navigation interface - // The view concept and navigation interface needs to be revisited at some point - // New interface/design should be pushed to NavigationHandler and through API - // The empty string is global navigation (inside view) by default, allows offset to be interpreted by mode - friend class ViewNavigationMode; - virtual std::string getNavigationMode() { return ""; } - virtual void setNavigationMode(std::string mode) { (void)mode; } - virtual std::vector getNavigationModes() { return {}; } - public: View(); virtual ~View() {} @@ -259,25 +250,6 @@ class BINARYNINJAUIAPI View void notifyContextMenuCreated(); }; -/*! - - \ingroup viewframe -*/ -class BINARYNINJAUIAPI ViewNavigationMode -{ - View* m_view; - std::string m_mode; - - ViewNavigationMode(); - - public: - ViewNavigationMode(View* view, std::string mode) : m_view(view) - { - m_mode = m_view->getNavigationMode(); - m_view->setNavigationMode(mode); - } - ~ViewNavigationMode() { m_view->setNavigationMode(m_mode); } -}; /*! From 8b65cc20da3ef624479812c8dff69df2649f405f Mon Sep 17 00:00:00 2001 From: Alexander Taylor Date: Tue, 7 Jan 2025 12:47:16 -0500 Subject: [PATCH 17/35] Bump UI ABI version due to previous commit. --- ui/uitypes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/uitypes.h b/ui/uitypes.h index 3c8c2ed361..410dde7002 100644 --- a/ui/uitypes.h +++ b/ui/uitypes.h @@ -6,14 +6,14 @@ // there are changes to the API that affect linking, including new functions, // new types, modifications to existing functions or types, or new versions // of the Qt libraries. -#define BN_CURRENT_UI_ABI_VERSION 8 +#define BN_CURRENT_UI_ABI_VERSION 9 // Minimum ABI version that is supported for loading of plugins. Plugins that // are linked to an ABI version less than this will not be able to load and // will require rebuilding. The minimum version is increased when there are // incompatible changes that break binary compatibility, such as changes to // existing types or functions, or a new version of Qt. -#define BN_MINIMUM_UI_ABI_VERSION 8 +#define BN_MINIMUM_UI_ABI_VERSION 9 #ifdef __GNUC__ #ifdef BINARYNINJAUI_LIBRARY From 7a6b85bbff43d1770bd7cce6fbfaf2475e98c7d4 Mon Sep 17 00:00:00 2001 From: "fosdick.io" <67963637+fosdickio@users.noreply.github.com> Date: Tue, 7 Jan 2025 13:49:31 -0500 Subject: [PATCH 18/35] Add information on the Portal to the Getting Started guide. --- docs/getting-started.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/getting-started.md b/docs/getting-started.md index a747729d54..6b5b30bafe 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -10,7 +10,7 @@ The download links you receive after purchasing expire after 72 hours but as lon ### Linux -Because Linux install locations can vary widely, we do not assume that Binary Ninja has been installed in any particular folder on Linux. Instead, first unzip the installation zip wherever you wish to install Binary Ninja. Next, for paid versions, run `./binaryninja/scripts/linux-setup.sh`. This sets up file associations, icons, and adds Binary Ninja's Python library to your Python path. Adding the library to your path is most helpful for headless functionality in the Commercial and Ultimate editions, but even on the Non-Commercial edition it can help your IDE find the api sources to make plugin development easier. Run the script with `-h` to see customization options. +Because Linux install locations can vary widely, we do not assume that Binary Ninja has been installed in any particular folder on Linux. Instead, first unzip the installation zip wherever you wish to install Binary Ninja. Next, for paid versions, run `./binaryninja/scripts/linux-setup.sh`. This sets up file associations, icons, and adds Binary Ninja's Python library to your Python path. Adding the library to your path is most helpful for headless functionality in the commercial editions, but even on the Non-Commercial edition it can help your IDE find the api sources to make plugin development easier. Run the script with `-h` to see customization options. ### macOS @@ -22,7 +22,7 @@ To install on Windows, use the installer linked from the email you received afte ## License -When you first run Binary Ninja, it will prompt you for your license key. You should have received your license key via the same email that included your download links. If not, please contact [support](https://binary.ninja/support). +When you first run Binary Ninja, it will prompt you for your license key. You should have received your license key via the same email that included your download links. Additionally, you can manage your licenses in the [Binary Ninja Portal](https://portal.binary.ninja) or by contacting [support](https://binary.ninja/support). ## Opening Files From 4e321006598404093544cae8e58b452b21323e09 Mon Sep 17 00:00:00 2001 From: "fosdick.io" <67963637+fosdickio@users.noreply.github.com> Date: Tue, 7 Jan 2025 14:38:51 -0500 Subject: [PATCH 19/35] Verbiage change in Getting Started doc. --- docs/getting-started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting-started.md b/docs/getting-started.md index 6b5b30bafe..6e0c8601b2 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -10,7 +10,7 @@ The download links you receive after purchasing expire after 72 hours but as lon ### Linux -Because Linux install locations can vary widely, we do not assume that Binary Ninja has been installed in any particular folder on Linux. Instead, first unzip the installation zip wherever you wish to install Binary Ninja. Next, for paid versions, run `./binaryninja/scripts/linux-setup.sh`. This sets up file associations, icons, and adds Binary Ninja's Python library to your Python path. Adding the library to your path is most helpful for headless functionality in the commercial editions, but even on the Non-Commercial edition it can help your IDE find the api sources to make plugin development easier. Run the script with `-h` to see customization options. +Because Linux install locations can vary widely, we do not assume that Binary Ninja has been installed in any particular folder on Linux. Instead, first unzip the installation zip wherever you wish to install Binary Ninja. Next, for paid versions, run `./binaryninja/scripts/linux-setup.sh`. This sets up file associations, icons, and adds Binary Ninja's Python library to your Python path. Adding the library to your path is most helpful for headless functionality in the Commercial and Ultimate editions, but even on the Non-Commercial edition it can help your IDE find the api sources to make plugin development easier. Run the script with `-h` to see customization options. ### macOS From cc763dc7a962e13442c1bcfa4d36ba59c94c4f88 Mon Sep 17 00:00:00 2001 From: Josh Ferrell Date: Tue, 7 Jan 2025 16:32:09 -0500 Subject: [PATCH 20/35] Add setting to limit maximum recursion depth for computing an expression value in an mlil dataflow query --- docs/guide/settings.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/guide/settings.md b/docs/guide/settings.md index f7afbdcb6e..9464dc3045 100644 --- a/docs/guide/settings.md +++ b/docs/guide/settings.md @@ -71,6 +71,7 @@ All settings are uniquely identified with an identifier string. Identifiers are |analysis|Initial Analysis Hold|When enabled, this setting pauses analysis upon opening the file, preventing all further analysis updates until the hold is manually cleared. It applies once per file load and must be set again for each new session, if needed.|`boolean`|`False`|[`SettingsProjectScope`, `SettingsResourceScope`, `SettingsUserScope`]|analysis.initialAnalysisHold| |analysis|Keep Dead Code Branches|Keep unreachable code branches and associated basic blocks in HLIL.|`boolean`|`False`|[`SettingsProjectScope`, `SettingsResourceScope`, `SettingsUserScope`]|analysis.keepDeadCodeBranches| |analysis|Advanced Analysis Cache Size|Controls the number of functions for which the most recent generated advanced analysis is cached. Large values may result in very high memory utilization.|`number`|`64`|[`SettingsProjectScope`, `SettingsResourceScope`, `SettingsUserScope`]|analysis.limits.cacheSize| +|analysis|Maximum Recursion Depth for Expression Value Computation|Maximum depth to recurse when computing the value for an expression via mlil ssa.|`number`|`512`|[`SettingsProjectScope`, `SettingsResourceScope`, `SettingsUserScope`]|analysis.limits.expressionValueComputeMaxDepth| |analysis|Max Function Analysis Time|Any functions that exceed this analysis time are deferred. A value of 0 disables this feature. The default value is 20 seconds. Time is specified in milliseconds.|`number`|`20000`|[`SettingsProjectScope`, `SettingsResourceScope`, `SettingsUserScope`]|analysis.limits.maxFunctionAnalysisTime| |analysis|Max Function Size|Any functions over this size will not be automatically analyzed. A value of 0 disables this feature. Size is specified in bytes.|`number`|`65536`|[`SettingsProjectScope`, `SettingsResourceScope`, `SettingsUserScope`]|analysis.limits.maxFunctionSize| |analysis|Max Function Update Count|Any functions that exceed this incremental update count are deferred. A value of 0 disables this feature.|`number`|`100`|[`SettingsProjectScope`, `SettingsResourceScope`, `SettingsUserScope`]|analysis.limits.maxFunctionUpdateCount| @@ -502,4 +503,4 @@ All settings are uniquely identified with an identifier string. Identifiers are |updates|Show All Versions|Show all versions that are available for the current update channel in the UI.|`boolean`|`False`|[`SettingsUserScope`]|updates.showAllVersions| |updates|Use Enterprise Server For Updates|Use the enterprise server to check for updates. (Ultimate Only)|`boolean`|`False`|[`SettingsUserScope`]|updates.useEnterpriseServer| |user|Email|The email that will be shown when collaborating with other users.|`string`| |[`SettingsUserScope`]|user.email| -|user|Name|The name that will be shown when collaborating with other users.|`string`| |[`SettingsUserScope`]|user.name| \ No newline at end of file +|user|Name|The name that will be shown when collaborating with other users.|`string`| |[`SettingsUserScope`]|user.name| From b34393c33a82050dcb5e65d5fc7d36960c18e167 Mon Sep 17 00:00:00 2001 From: Josh Ferrell Date: Wed, 8 Jan 2025 14:52:33 -0500 Subject: [PATCH 21/35] Prevent crash when Mach-O view doesn't exist --- platform/mac/platform_mac.cpp | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/platform/mac/platform_mac.cpp b/platform/mac/platform_mac.cpp index 6c2adf7aa3..2eec7f530b 100644 --- a/platform/mac/platform_mac.cpp +++ b/platform/mac/platform_mac.cpp @@ -283,13 +283,14 @@ extern "C" BINARYNINJAPLUGIN bool CorePluginInit() #endif { - auto viewType = BinaryViewType::GetByName("Mach-O"); + Ref viewType = BinaryViewType::GetByName("Mach-O"); Ref x86 = Architecture::GetByName("x86"); if (x86) { g_macX86 = new MacX86Platform(x86); Platform::Register("mac", g_macX86); - viewType->RegisterPlatformRecognizer(7, LittleEndian, MacX86Platform::Recognize); + if (viewType) + viewType->RegisterPlatformRecognizer(7, LittleEndian, MacX86Platform::Recognize); } Ref x64 = Architecture::GetByName("x86_64"); @@ -297,7 +298,8 @@ extern "C" { g_macX64 = new MacX64Platform(x64); Platform::Register("mac", g_macX64); - viewType->RegisterPlatformRecognizer(0x01000007, LittleEndian, MacX64Platform::Recognize); + if (viewType) + viewType->RegisterPlatformRecognizer(0x01000007, LittleEndian, MacX64Platform::Recognize); } Ref armv7 = Architecture::GetByName("armv7"); @@ -316,8 +318,11 @@ extern "C" Platform::Register("ios", g_iosArmv7); Platform::Register("mac", g_macThumb2); Platform::Register("ios", g_iosThumb2); - viewType->RegisterPlatformRecognizer(0xc, LittleEndian, MacArmv7Platform::Recognize); - viewType->RegisterPlatformRecognizer(0xc, LittleEndian, IOSArmv7Platform::Recognize); + if (viewType) + { + viewType->RegisterPlatformRecognizer(0xc, LittleEndian, MacArmv7Platform::Recognize); + viewType->RegisterPlatformRecognizer(0xc, LittleEndian, IOSArmv7Platform::Recognize); + } } Ref arm64 = Architecture::GetByName("aarch64"); @@ -327,11 +332,14 @@ extern "C" g_iosArm64 = new IOSArm64Platform(arm64); Platform::Register("mac", g_macArm64); Platform::Register("ios", g_iosArm64); - viewType->RegisterPlatformRecognizer(0, LittleEndian, MacArm64Platform::Recognize); - viewType->RegisterPlatformRecognizer(0x0100000c, LittleEndian, MacArm64Platform::Recognize); - viewType->RegisterPlatformRecognizer(0x0200000c, LittleEndian, MacArm64Platform::Recognize); - viewType->RegisterPlatformRecognizer(0, LittleEndian, IOSArm64Platform::Recognize); - viewType->RegisterPlatformRecognizer(0x0100000c, LittleEndian, IOSArm64Platform::Recognize); + if (viewType) + { + viewType->RegisterPlatformRecognizer(0, LittleEndian, MacArm64Platform::Recognize); + viewType->RegisterPlatformRecognizer(0x0100000c, LittleEndian, MacArm64Platform::Recognize); + viewType->RegisterPlatformRecognizer(0x0200000c, LittleEndian, MacArm64Platform::Recognize); + viewType->RegisterPlatformRecognizer(0, LittleEndian, IOSArm64Platform::Recognize); + viewType->RegisterPlatformRecognizer(0x0100000c, LittleEndian, IOSArm64Platform::Recognize); + } } return true; From faac469f6fb026ebd91e6b93169046bdd2bae320 Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Sun, 29 Dec 2024 13:45:27 +0000 Subject: [PATCH 22/35] [SharedCache] Fix uninitialized `loaded` field for mappings returned by `BNDSCViewGetAllImages` --- view/sharedcache/core/SharedCache.cpp | 11 +++++++++-- view/sharedcache/core/SharedCache.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index 51e95bc6a7..d45785e877 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -3059,6 +3059,11 @@ std::vector SharedCache::GetMappedRegions() const return State().regionsMappedIntoMemory; } +bool SharedCache::IsMemoryMapped(uint64_t address) +{ + return m_dscView->IsValidOffset(address); +} + extern "C" { BNSharedCache* BNGetSharedCache(BNBinaryView* data) @@ -3322,11 +3327,13 @@ extern "C" images[i].mappings = (BNDSCImageMemoryMapping*)malloc(sizeof(BNDSCImageMemoryMapping) * header.sections.size()); for (size_t j = 0; j < header.sections.size(); j++) { + const auto sectionStart = header.sections[j].addr; images[i].mappings[j].rawViewOffset = header.sections[j].offset; - images[i].mappings[j].vmAddress = header.sections[j].addr; + images[i].mappings[j].vmAddress = sectionStart; images[i].mappings[j].size = header.sections[j].size; images[i].mappings[j].name = BNAllocString(header.sectionNames[j].c_str()); - images[i].mappings[j].filePath = BNAllocString(vm->MappingAtAddress(header.sections[j].addr).first.filePath.c_str()); + images[i].mappings[j].filePath = BNAllocString(vm->MappingAtAddress(sectionStart).first.filePath.c_str()); + images[i].mappings[j].loaded = cache->object->IsMemoryMapped(sectionStart); } i++; } diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 8fa98c05c5..3a521894e1 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -581,6 +581,7 @@ namespace SharedCacheCore { std::vector GetAvailableImages(); std::vector GetMappedRegions() const; + bool IsMemoryMapped(uint64_t address); std::vector>> LoadAllSymbolsAndWait(); From f4cebd8e48385868161f32c7de7fd25c55ba89b6 Mon Sep 17 00:00:00 2001 From: Brian Potchik Date: Thu, 9 Jan 2025 12:25:29 -0500 Subject: [PATCH 23/35] Deprecate `Segment.serialize` and `Section.serialize` in favor of descriptor-based classes. --- python/binaryview.py | 93 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 6 deletions(-) diff --git a/python/binaryview.py b/python/binaryview.py index 06783723eb..73ddfd3929 100644 --- a/python/binaryview.py +++ b/python/binaryview.py @@ -1413,6 +1413,7 @@ def __repr__(self): return f"" @classmethod + @deprecation.deprecated(deprecated_in="4.3.6653", details="Use `SegmentDescriptorList` instead.") def serialize(cls, image_base: int, start: int, length: int, data_offset: int=0, data_length: int=0, flags: 'SegmentFlag'=SegmentFlag.SegmentReadable, auto_defined=True, segments: str="[]"): """ Serialize segment parameters into a JSON string. This is useful for generating a properly formatted segment description as options when using `load`. @@ -1431,9 +1432,10 @@ def serialize(cls, image_base: int, start: int, length: int, data_offset: int=0, :Example:: >>> base = 0x400000 >>> rom_base = 0xffff0000 - >>> segments = Segment.serialize(image_base=base, start=base, length=0x1000, data_offset=0, data_length=0x1000, flags=SegmentFlag.SegmentReadable|SegmentFlag.SegmentExecutable) - >>> segments = Segment.serialize(image_base=base, start=rom_base, length=0x1000, flags=SegmentFlag.SegmentReadable, segments=segments) - >>> view = load(bytes.fromhex('5054ebfe'), options={'loader.imageBase': base, 'loader.platform': 'x86', 'loader.segments': segments}) + >>> segments = SegmentDescriptorList(base) + >>> segments.append(start=base, length=0x1000, data_offset=0, data_length=0x1000, flags=SegmentFlag.SegmentReadable|SegmentFlag.SegmentExecutable) + >>> segments.append(start=rom_base, length=0x1000, flags=SegmentFlag.SegmentReadable) + >>> view = load(bytes.fromhex('5054ebfe'), options={'loader.imageBase': base, 'loader.platform': 'x86', 'loader.segments': json.dumps(segments)}) """ segments_list = json.loads(segments) segment_info = { @@ -1507,6 +1509,39 @@ def auto_defined(self) -> bool: return core.BNSegmentIsAutoDefined(self.handle) +class SegmentDescriptorList(list): + def __init__(self, image_base: int): + """ + Initialize the SegmentDescriptorList with a base image address. + + :param int image_base: The base address of the image. + """ + super().__init__() + self.image_base = image_base + + def append(self, start: int, length: int, data_offset: int = 0, data_length: int = 0, + flags: 'SegmentFlag' = SegmentFlag.SegmentReadable, auto_defined: bool = True): + """ + Append a segment descriptor to the list. + + :param int start: The start address of the segment. + :param int length: The length of the segment. + :param int data_offset: The offset of the data within the segment. + :param int data_length: The length of the data within the segment. + :param SegmentFlag flags: The flags of the segment. + :param bool auto_defined: Whether the segment is auto-defined. + """ + segment_info = { + "start": start - self.image_base, + "length": length, + "data_offset": data_offset, + "data_length": data_length, + "flags": flags, + "auto_defined": auto_defined + } + super().append(segment_info) + + class Section: """ The ``Section`` object is returned during BinaryView creation and should not be directly instantiated. @@ -1541,6 +1576,7 @@ def __contains__(self, i: int): return i >= self.start and i < self.end @classmethod + @deprecation.deprecated(deprecated_in="4.3.6653", details="Use `SectionDescriptorList` instead.") def serialize(cls, image_base: int, name: str, start: int, length: int, semantics: SectionSemantics=SectionSemantics.DefaultSectionSemantics, type: str="", align: int=1, entry_size: int=0, link: str="", info_section: str="", info_data: int=0, auto_defined: bool=True, sections: str="[]"): """ Serialize section parameters into a JSON string. This is useful for generating a properly formatted section description as options when using `load`. @@ -1627,6 +1663,50 @@ def end(self) -> int: return self.start + self.length +class SectionDescriptorList(list): + def __init__(self, image_base: int): + """ + Initialize the SectionDescriptorList with a base image address. + + :param int image_base: The base address of the image. + """ + super().__init__() + self.image_base = image_base + + def append(self, name: str, start: int, length: int, semantics: 'SectionSemantics' = SectionSemantics.DefaultSectionSemantics, + type: str = "", align: int = 1, entry_size: int = 0, link: str = "", info_section: str = "", + info_data: int = 0, auto_defined: bool = True): + """ + Append a section descriptor to the list. + + :param str name: The name of the section. + :param int start: The start address of the section. + :param int length: The length of the section. + :param SectionSemantics semantics: The semantics of the section. + :param str type: The type of the section. + :param int align: The alignment of the section. + :param int entry_size: The size of each entry in the section. + :param str link: An optional link field. + :param str info_section: An optional info_section field. + :param int info_data: An optional info_data field. + :param bool auto_defined: Whether the section is auto-defined. + """ + section_info = { + "name": name, + "start": start - self.image_base, + "length": length, + "semantics": semantics, + "type": type, + "align": align, + "entry_size": entry_size, + "link": link, + "info_section": info_section, + "info_data": info_data, + "auto_defined": auto_defined + } + super().append(section_info) + + class TagType: """ The ``TagType`` object is created by the create_tag_type API and should not be directly instantiated. @@ -2166,9 +2246,10 @@ class MemoryMap: >>> base = 0x10000 >>> rom_base = 0xc0000000 - >>> segments = Segment.serialize(image_base=base, start=base, length=0x1000, data_offset=0, data_length=0x1000, flags=SegmentFlag.SegmentReadable|SegmentFlag.SegmentExecutable) - >>> segments = Segment.serialize(image_base=base, start=rom_base, length=0x1000, flags=SegmentFlag.SegmentReadable, segments=segments) - >>> view = load(bytes.fromhex('5054ebfe'), options={'loader.imageBase': base, 'loader.platform': 'x86', 'loader.segments': segments}) + >>> segments = SegmentDescriptorList(base) + >>> segments.append(start=base, length=0x1000, data_offset=0, data_length=0x1000, flags=SegmentFlag.SegmentReadable|SegmentFlag.SegmentExecutable) + >>> segments.append(start=rom_base, length=0x1000, flags=SegmentFlag.SegmentReadable) + >>> view = load(bytes.fromhex('5054ebfe'), options={'loader.imageBase': base, 'loader.platform': 'x86', 'loader.segments': json.dumps(segments)}) >>> view.memory_map size: 0x4 From 7630dcd1686e56a51d71545a5ac4880410b429d5 Mon Sep 17 00:00:00 2001 From: Brian Potchik Date: Thu, 9 Jan 2025 17:27:11 -0500 Subject: [PATCH 24/35] Add BNAllocStringWithLength API. --- binaryninjacore.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/binaryninjacore.h b/binaryninjacore.h index 5e090d95f3..2fe0861400 100644 --- a/binaryninjacore.h +++ b/binaryninjacore.h @@ -37,7 +37,7 @@ // Current ABI version for linking to the core. This is incremented any time // there are changes to the API that affect linking, including new functions, // new types, or modifications to existing functions or types. -#define BN_CURRENT_CORE_ABI_VERSION 86 +#define BN_CURRENT_CORE_ABI_VERSION 87 // Minimum ABI version that is supported for loading of plugins. Plugins that // are linked to an ABI version less than this will not be able to load and @@ -3539,6 +3539,7 @@ extern "C" BINARYNINJACOREAPI char* BNAllocString(const char* contents); + BINARYNINJACOREAPI char* BNAllocStringWithLength(const char* contents, size_t len); BINARYNINJACOREAPI void BNFreeString(char* str); BINARYNINJACOREAPI char** BNAllocStringList(const char** contents, size_t size); BINARYNINJACOREAPI void BNFreeStringList(char** strs, size_t count); From 1dc9179158d8af2555e9182c6e0425b86d378909 Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Sun, 24 Nov 2024 16:54:16 -0800 Subject: [PATCH 25/35] [SharedCache] Split view-specific state into a separate struct The existing view-specific state was stored in several global unordered maps. Many of these were accessed without locking, including `viewSpecificMutexes`, which is racy in the face of multiple threads. View-specific state is stored in a new heap-allocated `ViewSpecificState` struct that is reference counted via `std::shared_ptr`. A static map holds a `std::weak_ptr` to each view-specific state, keyed by session id. `SharedCache` retrieves its view-specific state during its constructor. Since `ViewSpecificState` is reference counted it will naturally be deallocated when the last `SharedCache` instance that references it goes away. Its corresponding entry will remain in the static map, though since it only holds a `std::weak_ptr` rather than any state it will not use much memory. The next time view-specific state is retrieved any expired entries will be removed from the map. --- view/sharedcache/core/SharedCache.cpp | 164 +++++++++++++++----------- view/sharedcache/core/SharedCache.h | 3 + 2 files changed, 96 insertions(+), 71 deletions(-) diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index d45785e877..cafa6bd826 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -27,6 +27,8 @@ #include "SharedCache.h" #include "ObjC.h" #include +#include +#include #include #include #include @@ -78,19 +80,51 @@ struct SharedCache::State DSCViewState viewState = DSCViewStateUnloaded; }; -static std::recursive_mutex viewStateMutex; -static std::unordered_map> viewStateCache; +struct SharedCache::ViewSpecificState { + std::mutex typeLibraryMutex; + std::mutex viewOperationsThatInfluenceMetadataMutex; -std::mutex progressMutex; -std::unordered_map progressMap; + std::atomic progress; -struct ViewSpecificMutexes { - std::mutex viewOperationsThatInfluenceMetadataMutex; - std::mutex typeLibraryLookupAndApplicationMutex; + std::mutex stateMutex; + std::shared_ptr cachedState; }; -static std::unordered_map viewSpecificMutexes; +std::shared_ptr ViewSpecificStateForId(uint64_t viewIdentifier, bool insertIfNeeded = true) { + static std::mutex viewSpecificStateMutex; + static std::unordered_map> viewSpecificState; + + std::lock_guard lock(viewSpecificStateMutex); + + if (auto it = viewSpecificState.find(viewIdentifier); it != viewSpecificState.end()) { + if (auto statePtr = it->second.lock()) { + return statePtr; + } + } + + if (!insertIfNeeded) { + return nullptr; + } + + auto statePtr = std::make_shared(); + viewSpecificState[viewIdentifier] = statePtr; + + // Prune entries for any views that are no longer in use. + for (auto it = viewSpecificState.begin(); it != viewSpecificState.end(); ) { + if (it->second.expired()) { + it = viewSpecificState.erase(it); + } else { + ++it; + } + } + + return statePtr; +} + +std::shared_ptr ViewSpecificStateForView(Ref view) { + return ViewSpecificStateForId(view->GetFile()->GetSessionId()); +} std::string base_name(std::string const& path) { @@ -220,9 +254,7 @@ void SharedCache::PerformInitialLoad() auto path = m_dscView->GetFile()->GetOriginalFilename(); auto baseFile = MMappedFileAccessor::Open(m_dscView, m_dscView->GetFile()->GetSessionId(), path)->lock(); - progressMutex.lock(); - progressMap[m_dscView->GetFile()->GetSessionId()] = LoadProgressLoadingCaches; - progressMutex.unlock(); + m_viewSpecificState->progress = LoadProgressLoadingCaches; WillMutateState(); @@ -737,9 +769,8 @@ void SharedCache::PerformInitialLoad() } } baseFile.reset(); - progressMutex.lock(); - progressMap[m_dscView->GetFile()->GetSessionId()] = LoadProgressLoadingImages; - progressMutex.unlock(); + + m_viewSpecificState->progress = LoadProgressLoadingImages; // We have set up enough metadata to map VM now. @@ -952,9 +983,7 @@ void SharedCache::PerformInitialLoad() m_logger->LogDebug("Finished initial load of Shared Cache"); - progressMutex.lock(); - progressMap[m_dscView->GetFile()->GetSessionId()] = LoadProgressFinished; - progressMutex.unlock(); + m_viewSpecificState->progress = LoadProgressFinished; } std::shared_ptr SharedCache::GetVMMap(bool mapPages) @@ -983,10 +1012,10 @@ void SharedCache::DeserializeFromRawView() { if (m_dscView->QueryMetadata(SharedCacheMetadataTag)) { - std::unique_lock viewStateCacheLock(viewStateMutex); - if (auto it = viewStateCache.find(m_dscView->GetFile()->GetSessionId()); it != viewStateCache.end()) + std::lock_guard lock(m_viewSpecificState->stateMutex); + if (m_viewSpecificState->cachedState) { - m_state = it->second; + m_state = m_viewSpecificState->cachedState; m_stateIsShared = true; m_metadataValid = true; } @@ -1367,7 +1396,7 @@ void SharedCache::ParseAndApplySlideInfoForFile(std::shared_ptr dscView) : m_dscView(dscView) +SharedCache::SharedCache(BinaryNinja::Ref dscView) : m_dscView(dscView), m_viewSpecificState(ViewSpecificStateForView(dscView)) { if (dscView->GetTypeName() != VIEW_NAME) { @@ -1381,46 +1410,43 @@ SharedCache::SharedCache(BinaryNinja::Ref dscView) : m_ DeserializeFromRawView(); if (!m_metadataValid) return; - if (State().viewState == DSCViewStateUnloaded) + + if (State().viewState != DSCViewStateUnloaded) { + m_viewSpecificState->progress = LoadProgressFinished; + return; + } + + std::unique_lock lock(m_viewSpecificState->viewOperationsThatInfluenceMetadataMutex); + try { + PerformInitialLoad(); + } + catch (...) { - std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); - try { - PerformInitialLoad(); - } - catch (...) - { - m_logger->LogError("Failed to perform initial load of Shared Cache"); - } + m_logger->LogError("Failed to perform initial load of Shared Cache"); + } - auto settings = m_dscView->GetLoadSettings(VIEW_NAME); - bool autoLoadLibsystem = true; - if (settings && settings->Contains("loader.dsc.autoLoadLibSystem")) - { - autoLoadLibsystem = settings->Get("loader.dsc.autoLoadLibSystem", m_dscView); - } - if (autoLoadLibsystem) + auto settings = m_dscView->GetLoadSettings(VIEW_NAME); + bool autoLoadLibsystem = true; + if (settings && settings->Contains("loader.dsc.autoLoadLibSystem")) + { + autoLoadLibsystem = settings->Get("loader.dsc.autoLoadLibSystem", m_dscView); + } + if (autoLoadLibsystem) + { + for (const auto& [_, header] : State().headers) { - for (const auto& [_, header] : State().headers) + if (header.installName.find("libsystem_c.dylib") != std::string::npos) { - if (header.installName.find("libsystem_c.dylib") != std::string::npos) - { - lock.unlock(); - m_logger->LogInfo("Loading core libsystem_c.dylib library"); - LoadImageWithInstallName(header.installName, false); - lock.lock(); - break; - } + lock.unlock(); + m_logger->LogInfo("Loading core libsystem_c.dylib library"); + LoadImageWithInstallName(header.installName, false); + break; } } - MutableState().viewState = DSCViewStateLoaded; - SaveToDSCView(); - } - else - { - progressMutex.lock(); - progressMap[m_dscView->GetFile()->GetSessionId()] = LoadProgressFinished; - progressMutex.unlock(); } + + MutableState().viewState = DSCViewStateLoaded; + SaveToDSCView(); } SharedCache::~SharedCache() { @@ -1536,7 +1562,7 @@ bool SharedCache::LoadImageContainingAddress(uint64_t address, bool skipObjC) bool SharedCache::LoadSectionAtAddress(uint64_t address) { - std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); + std::unique_lock lock(m_viewSpecificState->viewOperationsThatInfluenceMetadataMutex); DeserializeFromRawView(); WillMutateState(); @@ -1806,7 +1832,7 @@ bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObj { auto settings = m_dscView->GetLoadSettings(VIEW_NAME); - std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); + std::unique_lock lock(m_viewSpecificState->viewOperationsThatInfluenceMetadataMutex); DeserializeFromRawView(); WillMutateState(); @@ -1880,7 +1906,7 @@ bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObj return false; } - std::unique_lock typelibLock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].typeLibraryLookupAndApplicationMutex); + std::unique_lock typelibLock(m_viewSpecificState->typeLibraryMutex); auto typeLib = m_dscView->GetTypeLibrary(header.installName); if (!typeLib) @@ -2875,7 +2901,7 @@ std::vector>> SharedCache::LoadAllSymbolsAndW { WillMutateState(); - std::unique_lock initialLoadBlock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); + std::lock_guard initialLoadBlock(m_viewSpecificState->viewOperationsThatInfluenceMetadataMutex); std::vector>> symbols; for (const auto& img : State().images) @@ -2973,7 +2999,7 @@ void SharedCache::FindSymbolAtAddrAndApplyToAddr( } auto exportList = SharedCache::ParseExportTrie(mapping, *header); std::vector>> exportMapping; - std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].typeLibraryLookupAndApplicationMutex); + std::unique_lock lock(m_viewSpecificState->typeLibraryMutex); auto typeLib = m_dscView->GetTypeLibrary(header->installName); if (!typeLib) { @@ -3020,7 +3046,7 @@ void SharedCache::FindSymbolAtAddrAndApplyToAddr( } } { - std::unique_lock _lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); + std::lock_guard lock(m_viewSpecificState->viewOperationsThatInfluenceMetadataMutex); MutableState().exportInfos[header->textBase] = std::move(exportMapping); } m_dscView->EndBulkModifySymbols(); @@ -3044,8 +3070,8 @@ bool SharedCache::SaveToDSCView() m_state = cachedState; m_stateIsShared = true; - std::unique_lock viewStateCacheLock(viewStateMutex); - viewStateCache[m_dscView->GetFile()->GetSessionId()] = std::move(cachedState); + std::lock_guard lock(m_viewSpecificState->stateMutex); + m_viewSpecificState->cachedState = std::move(cachedState); m_metadataValid = true; @@ -3055,7 +3081,7 @@ bool SharedCache::SaveToDSCView() } std::vector SharedCache::GetMappedRegions() const { - std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); + std::lock_guard lock(m_viewSpecificState->viewOperationsThatInfluenceMetadataMutex); return State().regionsMappedIntoMemory; } @@ -3392,15 +3418,11 @@ extern "C" BNDSCViewLoadProgress BNDSCViewGetLoadProgress(uint64_t sessionID) { - progressMutex.lock(); - if (progressMap.find(sessionID) == progressMap.end()) - { - progressMutex.unlock(); - return LoadProgressNotStarted; + if (auto viewSpecificState = ViewSpecificStateForId(sessionID, false)) { + return viewSpecificState->progress; } - auto progress = progressMap[sessionID]; - progressMutex.unlock(); - return progress; + + return LoadProgressNotStarted; } uint64_t BNDSCViewFastGetBackingCacheCount(BNBinaryView* data) diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 3a521894e1..7b19830551 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -538,6 +538,8 @@ namespace SharedCacheCore { struct State; + struct ViewSpecificState; + private: Ref m_logger; /* VIEW STATE BEGIN -- SERIALIZE ALL OF THIS AND STORE IT IN RAW VIEW */ @@ -552,6 +554,7 @@ namespace SharedCacheCore { bool m_metadataValid = false; /* VIEWSTATE END -- NOTHING PAST THIS IS SERIALIZED */ + std::shared_ptr m_viewSpecificState; /* API VIEW START */ BinaryNinja::Ref m_dscView; From 1f0be2385be2c796152e12ffebc229f10e3ffc28 Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Sun, 24 Nov 2024 16:55:39 -0800 Subject: [PATCH 26/35] [SharedCache] Cache type libraries in the view-specific state They're surprisingly expensive to look up. --- view/sharedcache/core/SharedCache.cpp | 49 ++++++++++++--------------- view/sharedcache/core/SharedCache.h | 3 ++ 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index cafa6bd826..b734a6b192 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -82,6 +82,8 @@ struct SharedCache::State struct SharedCache::ViewSpecificState { std::mutex typeLibraryMutex; + std::unordered_map> typeLibraries; + std::mutex viewOperationsThatInfluenceMetadataMutex; std::atomic progress; @@ -1906,21 +1908,7 @@ bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObj return false; } - std::unique_lock typelibLock(m_viewSpecificState->typeLibraryMutex); - auto typeLib = m_dscView->GetTypeLibrary(header.installName); - - if (!typeLib) - { - auto typeLibs = m_dscView->GetDefaultPlatform()->GetTypeLibrariesByName(header.installName); - if (!typeLibs.empty()) - { - typeLib = typeLibs[0]; - m_dscView->AddTypeLibrary(typeLib); - m_logger->LogInfo("shared-cache: adding type library for '%s': %s (%s)", - targetImage->installName.c_str(), typeLib->GetName().c_str(), typeLib->GetGuid().c_str()); - } - } - typelibLock.unlock(); + auto typeLib = TypeLibraryForImage(header.installName); SaveToDSCView(); @@ -2955,6 +2943,24 @@ std::string SharedCache::SerializedImageHeaderForName(std::string name) return ""; } +Ref SharedCache::TypeLibraryForImage(const std::string& installName) { + std::lock_guard lock(m_viewSpecificState->typeLibraryMutex); + if (auto it = m_viewSpecificState->typeLibraries.find(installName); it != m_viewSpecificState->typeLibraries.end()) { + return it->second; + } + + auto typeLib = m_dscView->GetTypeLibrary(installName); + if (!typeLib) { + auto typeLibs = m_dscView->GetDefaultPlatform()->GetTypeLibrariesByName(installName); + if (!typeLibs.empty()) { + typeLib = typeLibs[0]; + m_dscView->AddTypeLibrary(typeLib); + } + } + + m_viewSpecificState->typeLibraries[installName] = typeLib; + return typeLib; +} void SharedCache::FindSymbolAtAddrAndApplyToAddr( uint64_t symbolLocation, uint64_t targetLocation, bool triggerReanalysis) @@ -2999,18 +3005,7 @@ void SharedCache::FindSymbolAtAddrAndApplyToAddr( } auto exportList = SharedCache::ParseExportTrie(mapping, *header); std::vector>> exportMapping; - std::unique_lock lock(m_viewSpecificState->typeLibraryMutex); - auto typeLib = m_dscView->GetTypeLibrary(header->installName); - if (!typeLib) - { - auto typeLibs = m_dscView->GetDefaultPlatform()->GetTypeLibrariesByName(header->installName); - if (!typeLibs.empty()) - { - typeLib = typeLibs[0]; - m_dscView->AddTypeLibrary(typeLib); - } - } - lock.unlock(); + auto typeLib = TypeLibraryForImage(header->installName); id = m_dscView->BeginUndoActions(); m_dscView->BeginBulkModifySymbols(); for (const auto& sym : exportList) diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 7b19830551..1dcc3033e5 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -603,6 +603,7 @@ namespace SharedCacheCore { explicit SharedCache(BinaryNinja::Ref rawView); virtual ~SharedCache(); +private: std::optional LoadHeaderForAddress( std::shared_ptr vm, uint64_t address, std::string installName); void InitializeHeader( @@ -612,6 +613,8 @@ namespace SharedCacheCore { std::vector> ParseExportTrie( std::shared_ptr linkeditFile, SharedCacheMachOHeader header); + Ref TypeLibraryForImage(const std::string& installName); + const State& State() const { return *m_state; } struct State& MutableState() { AssertMutable(); return *m_state; } From a44b78355031ed429ac07f20a6439f07f9f47d9d Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Sun, 24 Nov 2024 22:34:40 -0800 Subject: [PATCH 27/35] [SharedCache] Track whether non-image regions are data vs code `BackingCache` now tracks the `dyld_cache_mapping_info` for its mappings so it has access to the memory protections for the region. This means it can avoid marking some regions as containing code when they don't, reducing the amount of analysis work that has to be done. Using `dyld_cache_mapping_info` also makes references to mappings easier to understand due to its named fields vs the nested `std::pair`s that were previously in use. --- view/sharedcache/core/SharedCache.cpp | 152 ++++++++++++++------------ view/sharedcache/core/SharedCache.h | 31 ++---- 2 files changed, 90 insertions(+), 93 deletions(-) diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index b734a6b192..0f61da6384 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -133,6 +133,24 @@ std::string base_name(std::string const& path) return path.substr(path.find_last_of("/\\") + 1); } +BNSegmentFlag SegmentFlagsFromMachOProtections(int initProt, int maxProt) { + + uint32_t flags = 0; + if (initProt & MACHO_VM_PROT_READ) + flags |= SegmentReadable; + if (initProt & MACHO_VM_PROT_WRITE) + flags |= SegmentWritable; + if (initProt & MACHO_VM_PROT_EXECUTE) + flags |= SegmentExecutable; + if (((initProt & MACHO_VM_PROT_WRITE) == 0) && + ((maxProt & MACHO_VM_PROT_WRITE) == 0)) + flags |= SegmentDenyWrite; + if (((initProt & MACHO_VM_PROT_EXECUTE) == 0) && + ((maxProt & MACHO_VM_PROT_EXECUTE) == 0)) + flags |= SegmentDenyExecute; + return (BNSegmentFlag)flags; +} + #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-function" @@ -305,11 +323,7 @@ void SharedCache::PerformInitialLoad() for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); - std::pair> mapRawToAddrAndSize; - mapRawToAddrAndSize.first = mapping.fileOffset; - mapRawToAddrAndSize.second.first = mapping.address; - mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(mapRawToAddrAndSize); + cache.mappings.push_back(mapping); } MutableState().backingCaches.push_back(std::move(cache)); @@ -373,11 +387,7 @@ void SharedCache::PerformInitialLoad() for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); - std::pair> mapRawToAddrAndSize; - mapRawToAddrAndSize.first = mapping.fileOffset; - mapRawToAddrAndSize.second.first = mapping.address; - mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(std::move(mapRawToAddrAndSize)); + cache.mappings.push_back(mapping); } MutableState().backingCaches.push_back(std::move(cache)); @@ -449,11 +459,7 @@ void SharedCache::PerformInitialLoad() { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), sizeof(subCacheMapping)); - std::pair> mapRawToAddrAndSize; - mapRawToAddrAndSize.first = subCacheMapping.fileOffset; - mapRawToAddrAndSize.second.first = subCacheMapping.address; - mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); + subCache.mappings.push_back(subCacheMapping); } if (subCacheHeader.mappingCount == 1 && subCacheHeader.imagesCountOld == 0 && subCacheHeader.imagesCount == 0 @@ -485,11 +491,7 @@ void SharedCache::PerformInitialLoad() for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); - std::pair> mapRawToAddrAndSize; - mapRawToAddrAndSize.first = mapping.fileOffset; - mapRawToAddrAndSize.second.first = mapping.address; - mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(std::move(mapRawToAddrAndSize)); + cache.mappings.push_back(mapping); } MutableState().backingCaches.push_back(std::move(cache)); @@ -545,11 +547,7 @@ void SharedCache::PerformInitialLoad() { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), sizeof(subCacheMapping)); - std::pair> mapRawToAddrAndSize; - mapRawToAddrAndSize.first = subCacheMapping.fileOffset; - mapRawToAddrAndSize.second.first = subCacheMapping.address; - mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); + subCache.mappings.push_back(subCacheMapping); } MutableState().backingCaches.push_back(std::move(subCache)); @@ -591,11 +589,7 @@ void SharedCache::PerformInitialLoad() { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), sizeof(subCacheMapping)); - std::pair> mapRawToAddrAndSize; - mapRawToAddrAndSize.first = subCacheMapping.fileOffset; - mapRawToAddrAndSize.second.first = subCacheMapping.address; - mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); + subCache.mappings.push_back(subCacheMapping); } MutableState().backingCaches.push_back(std::move(subCache)); @@ -612,11 +606,7 @@ void SharedCache::PerformInitialLoad() for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); - std::pair> mapRawToAddrAndSize; - mapRawToAddrAndSize.first = mapping.fileOffset; - mapRawToAddrAndSize.second.first = mapping.address; - mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(std::move(mapRawToAddrAndSize)); + cache.mappings.push_back(mapping); } MutableState().backingCaches.push_back(std::move(cache)); @@ -694,12 +684,7 @@ void SharedCache::PerformInitialLoad() { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), sizeof(subCacheMapping)); - - std::pair> mapRawToAddrAndSize; - mapRawToAddrAndSize.first = subCacheMapping.fileOffset; - mapRawToAddrAndSize.second.first = subCacheMapping.address; - mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); + subCache.mappings.push_back(subCacheMapping); if (subCachePath.find(".dylddata") != std::string::npos) { @@ -756,11 +741,7 @@ void SharedCache::PerformInitialLoad() { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), sizeof(subCacheMapping)); - std::pair> mapRawToAddrAndSize; - mapRawToAddrAndSize.first = subCacheMapping.fileOffset; - mapRawToAddrAndSize.second.first = subCacheMapping.address; - mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); + subCache.mappings.push_back(subCacheMapping); } MutableState().backingCaches.push_back(std::move(subCache)); @@ -806,19 +787,7 @@ void SharedCache::PerformInitialLoad() sectionRegion.prettyName = imageHeader.value().identifierPrefix + "::" + std::string(segName); sectionRegion.start = segment.vmaddr; sectionRegion.size = segment.vmsize; - uint32_t flags = 0; - if (segment.initprot & MACHO_VM_PROT_READ) - flags |= SegmentReadable; - if (segment.initprot & MACHO_VM_PROT_WRITE) - flags |= SegmentWritable; - if (segment.initprot & MACHO_VM_PROT_EXECUTE) - flags |= SegmentExecutable; - if (((segment.initprot & MACHO_VM_PROT_WRITE) == 0) && - ((segment.maxprot & MACHO_VM_PROT_WRITE) == 0)) - flags |= SegmentDenyWrite; - if (((segment.initprot & MACHO_VM_PROT_EXECUTE) == 0) && - ((segment.maxprot & MACHO_VM_PROT_EXECUTE) == 0)) - flags |= SegmentDenyExecute; + uint32_t flags = SegmentFlagsFromMachOProtections(segment.initprot, segment.maxprot); // if we're positive we have an entry point for some reason, force the segment // executable. this helps with kernel images. @@ -850,11 +819,10 @@ void SharedCache::PerformInitialLoad() for (const auto& mapping : cache.mappings) { MemoryRegion region; - region.start = mapping.second.first; - region.size = mapping.second.second; + region.start = mapping.address; + region.size = mapping.size; region.prettyName = base_name(cache.path) + "::" + std::to_string(i); - // FIXME flags!!! BackingCache.mapping needs refactored to store this information! - region.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); + region.flags = SegmentFlagsFromMachOProtections(mapping.initProt, mapping.maxProt); MutableState().nonImageRegions.push_back(std::move(region)); i++; } @@ -998,7 +966,7 @@ std::shared_ptr SharedCache::GetVMMap(bool mapPages) { for (const auto& mapping : cache.mappings) { - vm->MapPages(m_dscView, m_dscView->GetFile()->GetSessionId(), mapping.second.first, mapping.first, mapping.second.second, cache.path, + vm->MapPages(m_dscView, m_dscView->GetFile()->GetSessionId(), mapping.address, mapping.fileOffset, mapping.size, cache.path, [this, vm=vm](std::shared_ptr mmap){ ParseAndApplySlideInfoForFile(mmap); }); @@ -1065,9 +1033,9 @@ void SharedCache::ParseAndApplySlideInfoForFile(std::shared_ptrGetParentView()->WriteBuffer(rawViewEnd, buff); m_dscView->GetParentView()->AddAutoSegment(rawViewEnd, region.size, rawViewEnd, region.size, region.flags); m_dscView->AddUserSegment(region.start, region.size, rawViewEnd, region.size, region.flags); - m_dscView->AddUserSection(name, region.start, region.size, ReadOnlyCodeSectionSemantics); + m_dscView->AddUserSection(name, region.start, region.size, region.flags & SegmentDenyExecute ? ReadOnlyDataSectionSemantics : ReadOnlyCodeSectionSemantics); m_dscView->WriteBuffer(region.start, buff); region.loaded = true; @@ -3298,11 +3266,11 @@ extern "C" mappings = (BNDSCBackingCacheMapping*)malloc(sizeof(BNDSCBackingCacheMapping) * viewCaches[i].mappings.size()); size_t j = 0; - for (const auto& [fileOffset, mapping] : viewCaches[i].mappings) + for (const auto& mapping : viewCaches[i].mappings) { - mappings[j].vmAddress = mapping.first; - mappings[j].size = mapping.second; - mappings[j].fileOffset = fileOffset; + mappings[j].vmAddress = mapping.address; + mappings[j].size = mapping.size; + mappings[j].fileOffset = mapping.fileOffset; j++; } caches[i].mappings = mappings; @@ -3445,6 +3413,34 @@ void InitDSCViewType() namespace SharedCacheCore { +void Serialize(SerializationContext& context, const dyld_cache_mapping_info& value) +{ + context.writer.StartArray(); + Serialize(context, value.address); + Serialize(context, value.size); + Serialize(context, value.fileOffset); + Serialize(context, value.maxProt); + Serialize(context, value.initProt); + context.writer.EndArray(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, std::vector& b) +{ + + auto bArr = context.doc[name.data()].GetArray(); + for (auto& s : bArr) + { + dyld_cache_mapping_info mapping; + auto s2 = s.GetArray(); + mapping.address = s2[0].GetUint64(); + mapping.size = s2[1].GetUint64(); + mapping.fileOffset = s2[2].GetUint64(); + mapping.maxProt = s2[3].GetUint(); + mapping.initProt = s2[4].GetUint(); + b.push_back(mapping); + } +} + void SharedCache::Store(SerializationContext& context) const { Serialize(context, "metadataVersion", METADATA_VERSION); @@ -3616,6 +3612,19 @@ void SharedCache::Load(DeserializationContext& context) m_metadataValid = true; } +void BackingCache::Store(SerializationContext& context) const +{ + MSS(path); + MSS(isPrimary); + MSS(mappings); +} +void BackingCache::Load(DeserializationContext& context) +{ + MSL(path); + MSL(isPrimary); + MSL(mappings); +} + #if defined(__GNUC__) || defined(__clang__) __attribute__((always_inline)) void SharedCache::AssertMutable() const #elif defined(_MSC_VER) @@ -3664,5 +3673,4 @@ const std::unordered_map& SharedCache::AllImag { return State().headers; } - } // namespace SharedCacheCore diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 1dcc3033e5..a0d4e59661 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -23,7 +23,6 @@ namespace SharedCacheCore { DSCViewStateLoadedWithImages, }; - const std::string SharedCacheMetadataTag = "SHAREDCACHE-SharedCacheData"; struct MemoryRegion : public MetadataSerializable @@ -91,26 +90,6 @@ namespace SharedCacheCore { } }; - struct BackingCache : public MetadataSerializable - { - std::string path; - bool isPrimary = false; - std::vector>> mappings; - - void Store(SerializationContext& context) const - { - MSS(path); - MSS(isPrimary); - MSS(mappings); - } - void Load(DeserializationContext& context) - { - MSL(path); - MSL(isPrimary); - MSL(mappings); - } - }; - #if defined(__GNUC__) || defined(__clang__) #define PACKED_STRUCT __attribute__((packed)) #else @@ -132,6 +111,16 @@ namespace SharedCacheCore { uint32_t initProt; }; + struct BackingCache : public MetadataSerializable + { + std::string path; + bool isPrimary = false; + std::vector mappings; + + void Store(SerializationContext& context) const; + void Load(DeserializationContext& context); + }; + struct LoadedMapping { std::shared_ptr backingFile; From aa408cfdeaa3cd0c4319598223a4548b18731290 Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Sun, 24 Nov 2024 22:28:04 -0800 Subject: [PATCH 28/35] [SharedCache] Fix handling of relative selectors in macOS shared caches Find the relative selector base address in the Objective-C optimization data pointed to by the shared cache header, rather than via `__objc_scoffs`. This is only present on iOS, and not for every iOS version that encodes selectors via direct offsets. This also includes some related improvements: 1. Direct selectors get their own pointer type so they're rendered correctly in the view. 2. Method lists encoded as lists of lists are now handled. 3. The `dyld_cache_header` type added to the view is truncated to the length in the loaded cache. This ensures it is applied to the view. 4. A couple of methods that process method IMPs and selectors are updated to check whether the address is valid before attempting to process them. They would otherwise fail by throwing an exception if they proceed, but checking for validity is quicker and makes exception breakpoints usable. --- view/sharedcache/core/DSCView.cpp | 18 +- view/sharedcache/core/ObjC.cpp | 116 +++++++------ view/sharedcache/core/ObjC.h | 9 +- view/sharedcache/core/SharedCache.cpp | 44 +++++ view/sharedcache/core/SharedCache.h | 163 ++++++++++-------- .../workflow/SharedCacheWorkflow.cpp | 2 +- 6 files changed, 227 insertions(+), 125 deletions(-) diff --git a/view/sharedcache/core/DSCView.cpp b/view/sharedcache/core/DSCView.cpp index 31af3bd73f..1ef6198b37 100644 --- a/view/sharedcache/core/DSCView.cpp +++ b/view/sharedcache/core/DSCView.cpp @@ -200,6 +200,15 @@ bool DSCView::Init() "\t\tuint64_t rosettaReadWriteSize;\t// maximum size of the Rosetta read-write region\n" "\t\tuint32_t imagesOffset;\t\t\t// file offset to first dyld_cache_image_info\n" "\t\tuint32_t imagesCount;\t\t\t// number of dyld_cache_image_info entries\n" + "\t\tuint32_t cacheSubType; // 0 for development, 1 for production, when cacheType is multi-cache(2)\n" + "\t\tuint64_t objcOptsOffset; // VM offset from cache_header* to ObjC optimizations header\n" + "\t\tuint64_t objcOptsSize; // size of ObjC optimizations header\n" + "\t\tuint64_t cacheAtlasOffset; // VM offset from cache_header* to embedded cache atlas for process introspection\n" + "\t\tuint64_t cacheAtlasSize; // size of embedded cache atlas\n" + "\t\tuint64_t dynamicDataOffset; // VM offset from cache_header* to the location of dyld_cache_dynamic_data_header\n" + "\t\tuint64_t dynamicDataMaxSize; // maximum size of space reserved from dynamic data\n" + "\t\tuint32_t tproMappingsOffset; // file offset to first dyld_cache_tpro_mapping_info\n" + "\t\tuint32_t tproMappingsCount; // number of dyld_cache_tpro_mapping_info entries\n" "\t};", headerType, err); Ref settings = GetLoadSettings(GetTypeName()); @@ -732,8 +741,13 @@ bool DSCView::Init() return false; } - AddAutoSegment(primaryBase, 0x200, 0, 0x200, SegmentReadable); - AddAutoSection("__dsc_header", primaryBase, 0x200, ReadOnlyCodeSectionSemantics); + uint64_t headerSize = std::min(basePointer, headerType.type->GetWidth()); + // Truncate the `dyld_cache_header` structure to the structure present in the cache file. + auto newStructure = StructureBuilder(headerType.type->GetStructure()).SetWidth(headerSize).Finalize(); + headerType.type = TypeBuilder::StructureType(newStructure).Finalize(); + + AddAutoSegment(primaryBase, headerSize, 0, headerSize, SegmentReadable); + AddAutoSection("__dsc_header", primaryBase, headerSize, ReadOnlyDataSectionSemantics); DefineType("dyld_cache_header", headerType.name, headerType.type); DefineAutoSymbolAndVariableOrFunction(GetDefaultPlatform(), new Symbol(DataSymbol, "primary_cache_header", primaryBase), headerType.type); diff --git a/view/sharedcache/core/ObjC.cpp b/view/sharedcache/core/ObjC.cpp index 95fbb2a2d5..6e998e2a8c 100644 --- a/view/sharedcache/core/ObjC.cpp +++ b/view/sharedcache/core/ObjC.cpp @@ -775,22 +775,57 @@ void DSCObjCProcessor::LoadProtocols(VMReader* reader, Ref
listSection) } } -void DSCObjCProcessor::ReadMethodList(VMReader* reader, ClassBase& cls, std::string name, view_ptr_t start) +void DSCObjCProcessor::ReadListOfMethodLists(VMReader* reader, ClassBase& cls, std::string_view name, view_ptr_t start) { reader->Seek(start); method_list_t head; head.entsizeAndFlags = reader->Read32(); head.count = reader->Read32(); + if (head.count > 0x1000) + { + m_logger->LogError("List of method lists at 0x%llx has an invalid count of 0x%x", start, head.count); + return; + } + + for (size_t i = 0; i < head.count; ++i) { + relative_list_list_entry_t list_entry; + reader->Read(&list_entry, sizeof(list_entry)); + + ReadMethodList(reader, cls, name, reader->GetOffset() - sizeof(list_entry) + list_entry.listOffset); + // Reset the cursor to immediately past the list entry. + reader->Seek(start + sizeof(method_list_t) + ((i + 1) * sizeof(relative_list_list_entry_t))); + } +} + +void DSCObjCProcessor::ReadMethodList(VMReader* reader, ClassBase& cls, std::string_view name, view_ptr_t start) +{ + // Lower two bits indicate the type of method list. + switch (start & 0b11) { + case 0: + break; + case 1: + return ReadListOfMethodLists(reader, cls, name, start - 1); + default: + m_logger->LogDebug("ReadMethodList: Unknown method list type at 0x%llx: %d", start, start & 0x3); + return; + } + + reader->Seek(start); + method_list_t head; + head.entsizeAndFlags = reader->Read32(); + head.count = reader->Read32(); + if (head.count > 0x1000) { m_logger->LogError("Method list at 0x%llx has an invalid count of 0x%x", start, head.count); return; } + uint64_t pointerSize = m_data->GetAddressSize(); bool relativeOffsets = (head.entsizeAndFlags & 0xFFFF0000) & 0x80000000; bool directSelectors = (head.entsizeAndFlags & 0xFFFF0000) & 0x40000000; auto methodSize = relativeOffsets ? 12 : pointerSize * 3; - DefineObjCSymbol(DataSymbol, m_typeNames.methodList, "method_list_" + name, start, true); + DefineObjCSymbol(DataSymbol, m_typeNames.methodList, "method_list_" + std::string(name), start, true); for (unsigned i = 0; i < head.count; i++) { @@ -806,18 +841,14 @@ void DSCObjCProcessor::ReadMethodList(VMReader* reader, ClassBase& cls, std::str // -- if (relativeOffsets) { - if (m_customRelativeMethodSelectorBase.has_value()) - { - meth.name = m_customRelativeMethodSelectorBase.value() + reader->ReadS32(); - meth.types = reader->GetOffset() + reader->ReadS32(); - meth.imp = reader->GetOffset() + reader->ReadS32(); - } - else - { - meth.name = reader->GetOffset() + reader->ReadS32(); - meth.types = reader->GetOffset() + reader->ReadS32(); - meth.imp = reader->GetOffset() + reader->ReadS32(); + auto selectorBaseOffset = reader->GetOffset(); + if (directSelectors && m_customRelativeMethodSelectorBase.has_value()) { + selectorBaseOffset = m_customRelativeMethodSelectorBase.value(); } + + meth.name = selectorBaseOffset + reader->Read32(); + meth.types = reader->GetOffset() + reader->ReadS32(); + meth.imp = reader->GetOffset() + reader->ReadS32(); } else { @@ -881,14 +912,14 @@ void DSCObjCProcessor::ReadMethodList(VMReader* reader, ClassBase& cls, std::str } } -void DSCObjCProcessor::ReadIvarList(VMReader* reader, ClassBase& cls, std::string name, view_ptr_t start) +void DSCObjCProcessor::ReadIvarList(VMReader* reader, ClassBase& cls, std::string_view name, view_ptr_t start) { reader->Seek(start); ivar_list_t head; head.entsizeAndFlags = reader->Read32(); head.count = reader->Read32(); auto addressSize = m_data->GetAddressSize(); - DefineObjCSymbol(DataSymbol, m_typeNames.ivarList, "ivar_list_" + name, start, true); + DefineObjCSymbol(DataSymbol, m_typeNames.ivarList, "ivar_list_" + std::string(name), start, true); if (head.count > 0x1000) { m_logger->LogError("Ivar list at 0x%llx has an invalid count of 0x%llx", start, head.count); @@ -1010,6 +1041,10 @@ void DSCObjCProcessor::GenerateClassTypes() bool DSCObjCProcessor::ApplyMethodType(Class& cls, Method& method, bool isInstanceMethod) { + if (!method.imp || !m_data->IsValidOffset(method.imp)) { + return false; + } + std::stringstream r(method.name); std::string token; @@ -1221,6 +1256,19 @@ void DSCObjCProcessor::ProcessObjCData(std::shared_ptr vm, std::string baseN m_typeNames.nsuInteger = defineTypedef(m_data, {"NSUInteger"}, Type::IntegerType(addrSize, false)); m_typeNames.cgFloat = defineTypedef(m_data, {"CGFloat"}, Type::FloatType(addrSize)); + Ref relativeSelectorPtr; + auto reader = VMReader(vm); + if (auto objCRelativeMethodsBaseAddr = m_cache->GetObjCRelativeMethodBaseAddress(reader)) { + m_logger->LogDebug("RelativeMethodSelector Base: 0x%llx", objCRelativeMethodsBaseAddr); + m_customRelativeMethodSelectorBase = objCRelativeMethodsBaseAddr; + + auto type = TypeBuilder::PointerType(4, Type::PointerType(addrSize, Type::IntegerType(1, false))) + .SetPointerBase(RelativeToConstantPointerBaseType, objCRelativeMethodsBaseAddr) + .Finalize(); + auto relativeSelectorPtrName = defineTypedef(m_data, {"relative_SEL"}, type); + relativeSelectorPtr = Type::NamedType(m_data, relativeSelectorPtrName); + } + // https://github.com/apple-oss-distributions/objc4/blob/196363c165b175ed925ef6b9b99f558717923c47/runtime/objc-abi.h EnumerationBuilder imageInfoFlagBuilder; imageInfoFlagBuilder.AddMemberWithValue("IsReplacement", 1 << 0); @@ -1256,7 +1304,7 @@ void DSCObjCProcessor::ProcessObjCData(std::shared_ptr vm, std::string baseN m_typeNames.imageInfo = imageInfoType.first; StructureBuilder methodEntry; - methodEntry.AddMember(rptr_t, "name"); + methodEntry.AddMember(relativeSelectorPtr ? relativeSelectorPtr : rptr_t, "name"); methodEntry.AddMember(rptr_t, "types"); methodEntry.AddMember(rptr_t, "imp"); auto type = finalizeStructureBuilder(m_data, methodEntry, "objc_method_entry_t"); @@ -1360,42 +1408,6 @@ void DSCObjCProcessor::ProcessObjCData(std::shared_ptr vm, std::string baseN protocolBuilder.AddMember(Type::IntegerType(4, false), "flags"); m_typeNames.protocol = finalizeStructureBuilder(m_data, protocolBuilder, "objc_protocol_t").first; - auto reader = VMReader(vm); - - if (auto addr = m_cache->GetImageStart("/usr/lib/libobjc.A.dylib")) - { - auto header = m_cache->HeaderForAddress(addr.value()); - uint64_t scoffs_addr = 0; - size_t scoffs_size = 0; - - for (const auto& section : header->sections) - { - char name[17]; - memcpy(name, section.sectname, 16); - name[16] = 0; - if (std::string(name) == "__objc_scoffs") - { - scoffs_addr = section.addr; - scoffs_size = section.size; - break; - } - } - - if (scoffs_size && scoffs_addr) - { - if (scoffs_size == 0x20) - { - m_customRelativeMethodSelectorBase = reader.ReadULong(scoffs_addr); - } - else - { - m_customRelativeMethodSelectorBase = reader.ReadULong(scoffs_addr + 8); - } - m_logger->LogDebug("RelativeMethodSelector Base: 0x%llx", m_customRelativeMethodSelectorBase.value()); - } - } - - m_data->BeginBulkModifySymbols(); if (auto classList = m_data->GetSectionByName(baseName + "::__objc_classlist")) LoadClasses(&reader, classList); diff --git a/view/sharedcache/core/ObjC.h b/view/sharedcache/core/ObjC.h index 016ff02247..0f4526c046 100644 --- a/view/sharedcache/core/ObjC.h +++ b/view/sharedcache/core/ObjC.h @@ -58,6 +58,10 @@ namespace DSCObjC { typedef struct { uint64_t count; } protocol_list_t; + struct relative_list_list_entry_t { + uint64_t imageIndex: 16; + int64_t listOffset: 48; + }; typedef struct { view_ptr_t isa; view_ptr_t mangledName; @@ -214,8 +218,9 @@ namespace DSCObjC { std::vector ParseEncodedType(const std::string& type); void DefineObjCSymbol(BNSymbolType symbolType, QualifiedName typeName, const std::string& name, uint64_t addr, bool deferred); void DefineObjCSymbol(BNSymbolType symbolType, Ref type, const std::string& name, uint64_t addr, bool deferred); - void ReadIvarList(VMReader* reader, ClassBase& cls, std::string name, view_ptr_t start); - void ReadMethodList(VMReader* reader, ClassBase& cls, std::string name, view_ptr_t start); + void ReadIvarList(VMReader* reader, ClassBase& cls, std::string_view name, view_ptr_t start); + void ReadMethodList(VMReader* reader, ClassBase& cls, std::string_view name, view_ptr_t start); + void ReadListOfMethodLists(VMReader* reader, ClassBase& cls, std::string_view name, view_ptr_t start); void LoadClasses(VMReader* reader, Ref
listSection); void LoadCategories(VMReader* reader, Ref
listSection); void LoadProtocols(VMReader* reader, Ref
listSection); diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index 0f61da6384..b9150c31c8 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -75,6 +75,8 @@ struct SharedCache::State std::vector dyldDataRegions; std::vector nonImageRegions; + std::optional> objcOptimizationDataRange; + std::string baseFilePath; SharedCacheFormat cacheFormat; DSCViewState viewState = DSCViewStateUnloaded; @@ -311,6 +313,10 @@ void SharedCache::PerformInitialLoad() MutableState().cacheFormat = iOS16CacheFormat; } + if (primaryCacheHeader.objcOptsOffset && primaryCacheHeader.objcOptsSize) { + MutableState().objcOptimizationDataRange = {primaryCacheHeader.objcOptsOffset, primaryCacheHeader.objcOptsSize}; + } + switch (State().cacheFormat) { case RegularCacheFormat: @@ -3673,4 +3679,42 @@ const std::unordered_map& SharedCache::AllImag { return State().headers; } +size_t SharedCache::GetBaseAddress() const { + if (State().backingCaches.empty()) { + return 0; + } + + const BackingCache& primaryCache = State().backingCaches[0]; + if (!primaryCache.isPrimary) { + abort(); + return 0; + } + + if (primaryCache.mappings.empty()) { + return 0; + } + + return primaryCache.mappings[0].address; +} + +// Intentionally takes a copy to avoid modifying the cursor position in the original reader. +std::optional SharedCache::GetObjCOptimizationHeader(VMReader reader) const { + if (!State().objcOptimizationDataRange) { + return {}; + } + + ObjCOptimizationHeader header{}; + // Ignoring `objcOptsSize` in favor of `sizeof(ObjCOptimizationHeader)` matches dyld's behavior. + reader.Read(&header, GetBaseAddress() + State().objcOptimizationDataRange->first, sizeof(ObjCOptimizationHeader)); + + return header; +} + +size_t SharedCache::GetObjCRelativeMethodBaseAddress(const VMReader& reader) const { + if (auto header = GetObjCOptimizationHeader(reader); header.has_value()) { + return GetBaseAddress() + header->relativeMethodSelectorBaseAddressOffset; + } + return 0; +} + } // namespace SharedCacheCore diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index a0d4e59661..b5a0491fe2 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -262,74 +262,84 @@ namespace SharedCacheCore { struct PACKED_STRUCT dyld_cache_header { - char magic[16]; // e.g. "dyld_v0 i386" - uint32_t mappingOffset; // file offset to first dyld_cache_mapping_info - uint32_t mappingCount; // number of dyld_cache_mapping_info entries - uint32_t imagesOffsetOld; // UNUSED: moved to imagesOffset to prevent older dsc_extarctors from crashing - uint32_t imagesCountOld; // UNUSED: moved to imagesCount to prevent older dsc_extarctors from crashing - uint64_t dyldBaseAddress; // base address of dyld when cache was built - uint64_t codeSignatureOffset; // file offset of code signature blob - uint64_t codeSignatureSize; // size of code signature blob (zero means to end of file) - uint64_t slideInfoOffsetUnused; // unused. Used to be file offset of kernel slid info - uint64_t slideInfoSizeUnused; // unused. Used to be size of kernel slid info - uint64_t localSymbolsOffset; // file offset of where local symbols are stored - uint64_t localSymbolsSize; // size of local symbols information - uint8_t uuid[16]; // unique value for each shared cache file - uint64_t cacheType; // 0 for development, 1 for production // Kat: , 2 for iOS 16? - uint32_t branchPoolsOffset; // file offset to table of uint64_t pool addresses - uint32_t branchPoolsCount; // number of uint64_t entries - uint64_t accelerateInfoAddr; // (unslid) address of optimization info - uint64_t accelerateInfoSize; // size of optimization info - uint64_t imagesTextOffset; // file offset to first dyld_cache_image_text_info - uint64_t imagesTextCount; // number of dyld_cache_image_text_info entries - uint64_t patchInfoAddr; // (unslid) address of dyld_cache_patch_info - uint64_t patchInfoSize; // Size of all of the patch information pointed to via the dyld_cache_patch_info - uint64_t otherImageGroupAddrUnused; // unused - uint64_t otherImageGroupSizeUnused; // unused - uint64_t progClosuresAddr; // (unslid) address of list of program launch closures - uint64_t progClosuresSize; // size of list of program launch closures - uint64_t progClosuresTrieAddr; // (unslid) address of trie of indexes into program launch closures - uint64_t progClosuresTrieSize; // size of trie of indexes into program launch closures - uint32_t platform; // platform number (macOS=1, etc) - uint32_t formatVersion : 8, // dyld3::closure::kFormatVersion - dylibsExpectedOnDisk : 1, // dyld should expect the dylib exists on disk and to compare inode/mtime to see if cache is valid - simulator : 1, // for simulator of specified platform - locallyBuiltCache : 1, // 0 for B&I built cache, 1 for locally built cache - builtFromChainedFixups : 1, // some dylib in cache was built using chained fixups, so patch tables must be used for overrides - padding : 20; // TBD - uint64_t sharedRegionStart; // base load address of cache if not slid - uint64_t sharedRegionSize; // overall size required to map the cache and all subCaches, if any - uint64_t maxSlide; // runtime slide of cache can be between zero and this value - uint64_t dylibsImageArrayAddr; // (unslid) address of ImageArray for dylibs in this cache - uint64_t dylibsImageArraySize; // size of ImageArray for dylibs in this cache - uint64_t dylibsTrieAddr; // (unslid) address of trie of indexes of all cached dylibs - uint64_t dylibsTrieSize; // size of trie of cached dylib paths - uint64_t otherImageArrayAddr; // (unslid) address of ImageArray for dylibs and bundles with dlopen closures - uint64_t otherImageArraySize; // size of ImageArray for dylibs and bundles with dlopen closures - uint64_t otherTrieAddr; // (unslid) address of trie of indexes of all dylibs and bundles with dlopen closures - uint64_t otherTrieSize; // size of trie of dylibs and bundles with dlopen closures - uint32_t mappingWithSlideOffset; // file offset to first dyld_cache_mapping_and_slide_info - uint32_t mappingWithSlideCount; // number of dyld_cache_mapping_and_slide_info entries - uint64_t dylibsPBLStateArrayAddrUnused; // unused - uint64_t dylibsPBLSetAddr; // (unslid) address of PrebuiltLoaderSet of all cached dylibs - uint64_t programsPBLSetPoolAddr; // (unslid) address of pool of PrebuiltLoaderSet for each program - uint64_t programsPBLSetPoolSize; // size of pool of PrebuiltLoaderSet for each program - uint64_t programTrieAddr; // (unslid) address of trie mapping program path to PrebuiltLoaderSet - uint32_t programTrieSize; - uint32_t osVersion; // OS Version of dylibs in this cache for the main platform - uint32_t altPlatform; // e.g. iOSMac on macOS - uint32_t altOsVersion; // e.g. 14.0 for iOSMac - uint64_t swiftOptsOffset; // file offset to Swift optimizations header - uint64_t swiftOptsSize; // size of Swift optimizations header - uint32_t subCacheArrayOffset; // file offset to first dyld_subcache_entry - uint32_t subCacheArrayCount; // number of subCache entries - uint8_t symbolFileUUID[16]; // unique value for the shared cache file containing unmapped local symbols - uint64_t rosettaReadOnlyAddr; // (unslid) address of the start of where Rosetta can add read-only/executable data - uint64_t rosettaReadOnlySize; // maximum size of the Rosetta read-only/executable region - uint64_t rosettaReadWriteAddr; // (unslid) address of the start of where Rosetta can add read-write data - uint64_t rosettaReadWriteSize; // maximum size of the Rosetta read-write region - uint32_t imagesOffset; // file offset to first dyld_cache_image_info - uint32_t imagesCount; // number of dyld_cache_image_info entries + char magic[16]; // e.g. "dyld_v0 i386" + uint32_t mappingOffset; // file offset to first dyld_cache_mapping_info + uint32_t mappingCount; // number of dyld_cache_mapping_info entries + uint32_t imagesOffsetOld; // UNUSED: moved to imagesOffset to prevent older dsc_extarctors from crashing + uint32_t imagesCountOld; // UNUSED: moved to imagesCount to prevent older dsc_extarctors from crashing + uint64_t dyldBaseAddress; // base address of dyld when cache was built + uint64_t codeSignatureOffset; // file offset of code signature blob + uint64_t codeSignatureSize; // size of code signature blob (zero means to end of file) + uint64_t slideInfoOffsetUnused; // unused. Used to be file offset of kernel slid info + uint64_t slideInfoSizeUnused; // unused. Used to be size of kernel slid info + uint64_t localSymbolsOffset; // file offset of where local symbols are stored + uint64_t localSymbolsSize; // size of local symbols information + uint8_t uuid[16]; // unique value for each shared cache file + uint64_t cacheType; // 0 for development, 1 for production, 2 for multi-cache + uint32_t branchPoolsOffset; // file offset to table of uint64_t pool addresses + uint32_t branchPoolsCount; // number of uint64_t entries + uint64_t dyldInCacheMH; // (unslid) address of mach_header of dyld in cache + uint64_t dyldInCacheEntry; // (unslid) address of entry point (_dyld_start) of dyld in cache + uint64_t imagesTextOffset; // file offset to first dyld_cache_image_text_info + uint64_t imagesTextCount; // number of dyld_cache_image_text_info entries + uint64_t patchInfoAddr; // (unslid) address of dyld_cache_patch_info + uint64_t patchInfoSize; // Size of all of the patch information pointed to via the dyld_cache_patch_info + uint64_t otherImageGroupAddrUnused; // unused + uint64_t otherImageGroupSizeUnused; // unused + uint64_t progClosuresAddr; // (unslid) address of list of program launch closures + uint64_t progClosuresSize; // size of list of program launch closures + uint64_t progClosuresTrieAddr; // (unslid) address of trie of indexes into program launch closures + uint64_t progClosuresTrieSize; // size of trie of indexes into program launch closures + uint32_t platform; // platform number (macOS=1, etc) + uint32_t formatVersion : 8, // dyld3::closure::kFormatVersion + dylibsExpectedOnDisk : 1, // dyld should expect the dylib exists on disk and to compare inode/mtime to see if cache is valid + simulator : 1, // for simulator of specified platform + locallyBuiltCache : 1, // 0 for B&I built cache, 1 for locally built cache + builtFromChainedFixups : 1, // some dylib in cache was built using chained fixups, so patch tables must be used for overrides + padding : 20; // TBD + uint64_t sharedRegionStart; // base load address of cache if not slid + uint64_t sharedRegionSize; // overall size required to map the cache and all subCaches, if any + uint64_t maxSlide; // runtime slide of cache can be between zero and this value + uint64_t dylibsImageArrayAddr; // (unslid) address of ImageArray for dylibs in this cache + uint64_t dylibsImageArraySize; // size of ImageArray for dylibs in this cache + uint64_t dylibsTrieAddr; // (unslid) address of trie of indexes of all cached dylibs + uint64_t dylibsTrieSize; // size of trie of cached dylib paths + uint64_t otherImageArrayAddr; // (unslid) address of ImageArray for dylibs and bundles with dlopen closures + uint64_t otherImageArraySize; // size of ImageArray for dylibs and bundles with dlopen closures + uint64_t otherTrieAddr; // (unslid) address of trie of indexes of all dylibs and bundles with dlopen closures + uint64_t otherTrieSize; // size of trie of dylibs and bundles with dlopen closures + uint32_t mappingWithSlideOffset; // file offset to first dyld_cache_mapping_and_slide_info + uint32_t mappingWithSlideCount; // number of dyld_cache_mapping_and_slide_info entries + uint64_t dylibsPBLStateArrayAddrUnused; // unused + uint64_t dylibsPBLSetAddr; // (unslid) address of PrebuiltLoaderSet of all cached dylibs + uint64_t programsPBLSetPoolAddr; // (unslid) address of pool of PrebuiltLoaderSet for each program + uint64_t programsPBLSetPoolSize; // size of pool of PrebuiltLoaderSet for each program + uint64_t programTrieAddr; // (unslid) address of trie mapping program path to PrebuiltLoaderSet + uint32_t programTrieSize; + uint32_t osVersion; // OS Version of dylibs in this cache for the main platform + uint32_t altPlatform; // e.g. iOSMac on macOS + uint32_t altOsVersion; // e.g. 14.0 for iOSMac + uint64_t swiftOptsOffset; // VM offset from cache_header* to Swift optimizations header + uint64_t swiftOptsSize; // size of Swift optimizations header + uint32_t subCacheArrayOffset; // file offset to first dyld_subcache_entry + uint32_t subCacheArrayCount; // number of subCache entries + uint8_t symbolFileUUID[16]; // unique value for the shared cache file containing unmapped local symbols + uint64_t rosettaReadOnlyAddr; // (unslid) address of the start of where Rosetta can add read-only/executable data + uint64_t rosettaReadOnlySize; // maximum size of the Rosetta read-only/executable region + uint64_t rosettaReadWriteAddr; // (unslid) address of the start of where Rosetta can add read-write data + uint64_t rosettaReadWriteSize; // maximum size of the Rosetta read-write region + uint32_t imagesOffset; // file offset to first dyld_cache_image_info + uint32_t imagesCount; // number of dyld_cache_image_info entries + uint32_t cacheSubType; // 0 for development, 1 for production, when cacheType is multi-cache(2) + uint32_t padding2; + uint64_t objcOptsOffset; // VM offset from cache_header* to ObjC optimizations header + uint64_t objcOptsSize; // size of ObjC optimizations header + uint64_t cacheAtlasOffset; // VM offset from cache_header* to embedded cache atlas for process introspection + uint64_t cacheAtlasSize; // size of embedded cache atlas + uint64_t dynamicDataOffset; // VM offset from cache_header* to the location of dyld_cache_dynamic_data_header + uint64_t dynamicDataMaxSize; // maximum size of space reserved from dynamic data + uint32_t tproMappingsOffset; // file offset to first dyld_cache_tpro_mapping_info + uint32_t tproMappingsCount; // number of dyld_cache_tpro_mapping_info entries }; struct PACKED_STRUCT dyld_subcache_entry @@ -345,6 +355,18 @@ namespace SharedCacheCore { char fileExtension[32]; }; + struct ObjCOptimizationHeader + { + uint32_t version; + uint32_t flags; + uint64_t headerInfoROCacheOffset; + uint64_t headerInfoRWCacheOffset; + uint64_t selectorHashTableCacheOffset; + uint64_t classHashTableCacheOffset; + uint64_t protocolHashTableCacheOffset; + uint64_t relativeMethodSelectorBaseAddressOffset; + }; + #if defined(_MSC_VER) #pragma pack(pop) #else @@ -592,6 +614,8 @@ namespace SharedCacheCore { explicit SharedCache(BinaryNinja::Ref rawView); virtual ~SharedCache(); + size_t GetObjCRelativeMethodBaseAddress(const VMReader& reader) const; + private: std::optional LoadHeaderForAddress( std::shared_ptr vm, uint64_t address, std::string installName); @@ -604,6 +628,9 @@ namespace SharedCacheCore { Ref TypeLibraryForImage(const std::string& installName); + size_t GetBaseAddress() const; + std::optional GetObjCOptimizationHeader(VMReader reader) const; + const State& State() const { return *m_state; } struct State& MutableState() { AssertMutable(); return *m_state; } diff --git a/view/sharedcache/workflow/SharedCacheWorkflow.cpp b/view/sharedcache/workflow/SharedCacheWorkflow.cpp index 61832e931a..fa34e2f51d 100644 --- a/view/sharedcache/workflow/SharedCacheWorkflow.cpp +++ b/view/sharedcache/workflow/SharedCacheWorkflow.cpp @@ -468,7 +468,7 @@ void fixObjCCallTypes(Ref ctx) const auto selectorRegister = params[0].GetParameterExprs()[1].GetSourceSSARegister(); rawSelector = ssa->GetSSARegisterValue(selectorRegister).value; } - if (rawSelector == 0) + if (!rawSelector || !bv->IsValidOffset(rawSelector)) return; // -- Do callsite override From 49bafa9cd9c0301235e806e0f868cf16cdaac405 Mon Sep 17 00:00:00 2001 From: kat Date: Wed, 11 Dec 2024 14:44:50 -0500 Subject: [PATCH 29/35] [SharedCache] Avoid crashing the product whenever bugs occur in ser/deser, fix compilation issue on linux --- view/sharedcache/core/SharedCache.cpp | 54 ++++++++++++++++----------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index b9150c31c8..33a4654da9 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -314,7 +314,9 @@ void SharedCache::PerformInitialLoad() } if (primaryCacheHeader.objcOptsOffset && primaryCacheHeader.objcOptsSize) { - MutableState().objcOptimizationDataRange = {primaryCacheHeader.objcOptsOffset, primaryCacheHeader.objcOptsSize}; + uint64_t objcOptsOffset = primaryCacheHeader.objcOptsOffset; + uint64_t objcOptsSize = primaryCacheHeader.objcOptsSize; + MutableState().objcOptimizationDataRange = {objcOptsOffset, objcOptsSize}; } switch (State().cacheFormat) @@ -3309,30 +3311,38 @@ extern "C" { if (cache->object) { - auto vm = cache->object->GetVMMap(true); - auto viewImageHeaders = cache->object->AllImageHeaders(); - *count = viewImageHeaders.size(); - BNDSCImage* images = (BNDSCImage*)malloc(sizeof(BNDSCImage) * viewImageHeaders.size()); - size_t i = 0; - for (const auto& [baseAddress, header] : viewImageHeaders) - { - images[i].name = BNAllocString(header.installName.c_str()); - images[i].headerAddress = baseAddress; - images[i].mappingCount = header.sections.size(); - images[i].mappings = (BNDSCImageMemoryMapping*)malloc(sizeof(BNDSCImageMemoryMapping) * header.sections.size()); - for (size_t j = 0; j < header.sections.size(); j++) + try { + auto vm = cache->object->GetVMMap(true); + auto viewImageHeaders = cache->object->AllImageHeaders(); + *count = viewImageHeaders.size(); + BNDSCImage* images = (BNDSCImage*)malloc(sizeof(BNDSCImage) * viewImageHeaders.size()); + size_t i = 0; + for (const auto& [baseAddress, header] : viewImageHeaders) { - const auto sectionStart = header.sections[j].addr; - images[i].mappings[j].rawViewOffset = header.sections[j].offset; - images[i].mappings[j].vmAddress = sectionStart; - images[i].mappings[j].size = header.sections[j].size; - images[i].mappings[j].name = BNAllocString(header.sectionNames[j].c_str()); - images[i].mappings[j].filePath = BNAllocString(vm->MappingAtAddress(sectionStart).first.filePath.c_str()); - images[i].mappings[j].loaded = cache->object->IsMemoryMapped(sectionStart); + images[i].name = BNAllocString(header.installName.c_str()); + images[i].headerAddress = baseAddress; + images[i].mappingCount = header.sections.size(); + images[i].mappings = (BNDSCImageMemoryMapping*)malloc(sizeof(BNDSCImageMemoryMapping) * header.sections.size()); + for (size_t j = 0; j < header.sections.size(); j++) + { + const auto sectionStart = header.sections[j].addr; + images[i].mappings[j].rawViewOffset = header.sections[j].offset; + images[i].mappings[j].vmAddress = sectionStart; + images[i].mappings[j].size = header.sections[j].size; + images[i].mappings[j].name = BNAllocString(header.sectionNames[j].c_str()); + images[i].mappings[j].filePath = BNAllocString(vm->MappingAtAddress(sectionStart).first.filePath.c_str()); + images[i].mappings[j].loaded = cache->object->IsMemoryMapped(sectionStart); + } + i++; } - i++; + return images; + } + catch (...) + { + LogError("SharedCache: Failed to load image listing. Likely caused by a ser/deserialization error or load failure"); + *count = 0; + return nullptr; } - return images; } *count = 0; return nullptr; From e95dc2c36a40814703109eafbf7b0112da084504 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Sat, 11 Jan 2025 18:36:48 -0500 Subject: [PATCH 30/35] MSVC RTTI: Fix crash due to function creation on invalid virtual function entry Also fix type clobbering on anonymous class names --- plugins/msvc_rtti/rtti.cpp | 55 ++++++++++++++++++++++++++------------ plugins/msvc_rtti/rtti.h | 3 ++- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/plugins/msvc_rtti/rtti.cpp b/plugins/msvc_rtti/rtti.cpp index c2832c1ef4..7dfa6130aa 100644 --- a/plugins/msvc_rtti/rtti.cpp +++ b/plugins/msvc_rtti/rtti.cpp @@ -441,6 +441,17 @@ std::optional MicrosoftRTTIProcessor::ProcessRTTI(uint64_t coLocatorA if (!className.has_value()) return std::nullopt; + // If the className is empty we will change it to the address, this is to fix type clobbering. + if (className->empty()) + { + if (!allowAnonymousClassNames) + { + m_logger->LogDebug("Skipping CompleteObjectorLocator with anonymous name %llx", coLocatorAddr); + return std::nullopt; + } + className = fmt::format("ANONYMOUS_{:#x}", coLocatorAddr); + } + auto classInfo = ClassInfo{className.value()}; if (coLocator->offset > 0) classInfo.classOffset = coLocator->offset; @@ -509,7 +520,8 @@ std::optional MicrosoftRTTIProcessor::ProcessVFT(uint6 // Gather all virtual functions BinaryReader reader = BinaryReader(m_view); reader.Seek(vftAddr); - std::vector > virtualFunctions = {}; + // Virtual functions and the analysis object of it, if it exists. + std::vector>>> virtualFunctions = {}; while (true) { uint64_t vFuncAddr = reader.ReadPointer(); @@ -525,10 +537,13 @@ std::optional MicrosoftRTTIProcessor::ProcessVFT(uint6 // TODO: Is likely a function check here? m_logger->LogDebug("Discovered function from virtual function table... %llx", vFuncAddr); auto vFunc = m_view->AddFunctionForAnalysis(m_view->GetDefaultPlatform(), vFuncAddr, true); - funcs.emplace_back(vFunc); + virtualFunctions.emplace_back(vFuncAddr, vFunc ? std::optional(vFunc) : std::nullopt); + } + else + { + // Only ever add one function. + virtualFunctions.emplace_back(vFuncAddr, funcs.front()); } - // Only ever add one function. - virtualFunctions.emplace_back(funcs.front()); } if (virtualFunctions.empty()) @@ -537,8 +552,8 @@ std::optional MicrosoftRTTIProcessor::ProcessVFT(uint6 return std::nullopt; } - for (auto &func: virtualFunctions) - vftInfo.virtualFunctions.emplace_back(VirtualFunctionInfo{func->GetStart()}); + for (auto &[vFuncAddr, _]: virtualFunctions) + vftInfo.virtualFunctions.emplace_back(VirtualFunctionInfo{vFuncAddr}); // Create virtual function table type auto vftTypeName = fmt::format("{}::VTable", classInfo.className); @@ -585,22 +600,27 @@ std::optional MicrosoftRTTIProcessor::ProcessVFT(uint6 } } - for (auto &&vFunc: virtualFunctions) + for (auto &&[_, vFunc]: virtualFunctions) { auto vFuncName = fmt::format("vFunc_{}", vFuncIdx); - // If we have a better name, use it. - auto vFuncSymName = vFunc->GetSymbol()->GetShortName(); - if (vFuncSymName.compare(0, 4, "sub_") != 0) - vFuncName = vFunc->GetSymbol()->GetShortName(); - // MyClass::func -> func - std::size_t pos = vFuncName.rfind("::"); - if (pos != std::string::npos) - vFuncName = vFuncName.substr(pos + 2); + if (vFunc.has_value()) + { + // If we have a better name, use it. + auto vFuncObj = vFunc.value(); + auto vFuncSymName = vFuncObj->GetSymbol()->GetShortName(); + if (vFuncSymName.compare(0, 4, "sub_") != 0) + vFuncName = vFuncObj->GetSymbol()->GetShortName(); + // MyClass::func -> func + std::size_t pos = vFuncName.rfind("::"); + if (pos != std::string::npos) + vFuncName = vFuncName.substr(pos + 2); + } // NOTE: The analyzed function type might not be available here. auto vFuncOffset = vFuncIdx * addrSize; + // We have access to a backing function type, use it, otherwise void! vftBuilder.AddMemberAtOffset( - Type::PointerType(addrSize, vFunc->GetType(), true), vFuncName, vFuncOffset); + Type::PointerType(addrSize, vFunc.has_value() ? vFunc.value()->GetType() : Type::VoidType(), true), vFuncName, vFuncOffset); vFuncIdx++; } m_view->DefineType(typeId, vftTypeName, @@ -616,10 +636,11 @@ std::optional MicrosoftRTTIProcessor::ProcessVFT(uint6 } -MicrosoftRTTIProcessor::MicrosoftRTTIProcessor(const Ref &view, bool useMangled, bool checkRData, bool vftSweep) : m_view(view) +MicrosoftRTTIProcessor::MicrosoftRTTIProcessor(const Ref &view, bool useMangled, bool checkRData, bool vftSweep, bool allowAnonymous) : m_view(view) { m_logger = new Logger("Microsoft RTTI"); allowMangledClassNames = useMangled; + allowAnonymousClassNames = allowAnonymous; checkWritableRData = checkRData; m_classInfo = {}; virtualFunctionTableSweep = vftSweep; diff --git a/plugins/msvc_rtti/rtti.h b/plugins/msvc_rtti/rtti.h index bcfb0e28ec..062c7b78fc 100644 --- a/plugins/msvc_rtti/rtti.h +++ b/plugins/msvc_rtti/rtti.h @@ -95,6 +95,7 @@ namespace BinaryNinja { Ref m_view; Ref m_logger; bool allowMangledClassNames; + bool allowAnonymousClassNames; bool checkWritableRData; bool virtualFunctionTableSweep; @@ -109,7 +110,7 @@ namespace BinaryNinja { std::optional ProcessVFT(uint64_t vftAddr, const ClassInfo &classInfo); public: - MicrosoftRTTIProcessor(const Ref &view, bool useMangled = true, bool checkRData = true, bool vftSweep = true); + MicrosoftRTTIProcessor(const Ref &view, bool useMangled = true, bool checkRData = true, bool vftSweep = true, bool allowAnonymous = true); Ref SerializedMetadata(); From 768bf0dd364239786b9594f60ced85b331ebda5f Mon Sep 17 00:00:00 2001 From: Brian Potchik Date: Mon, 13 Jan 2025 11:26:18 -0500 Subject: [PATCH 31/35] Move all FeatureMap processing to the background. --- ui/featuremap.h | 13 ++----------- ui/notificationsdispatcher.h | 4 ++-- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/ui/featuremap.h b/ui/featuremap.h index 17407d1ea7..d21760ecc4 100644 --- a/ui/featuremap.h +++ b/ui/featuremap.h @@ -33,7 +33,7 @@ class SplitPaneWidget; \ingroup featuremap */ -class BINARYNINJAUIAPI FeatureMap : public QWidget, public BinaryNinja::BinaryDataNotification +class BINARYNINJAUIAPI FeatureMap : public QWidget { Q_OBJECT @@ -42,7 +42,6 @@ class BINARYNINJAUIAPI FeatureMap : public QWidget, public BinaryNinja::BinaryDa std::unique_ptr m_staticImage = nullptr; std::vector m_ranges; - SplitPaneWidget* m_owner = nullptr; BinaryViewRef m_data; std::unique_ptr m_dispatcher = nullptr; @@ -81,15 +80,7 @@ class BINARYNINJAUIAPI FeatureMap : public QWidget, public BinaryNinja::BinaryDa void renderDataVariable(const BinaryNinja::DataVariable& var, bool ignoreString = false); - virtual void OnAnalysisFunctionAdded(BinaryNinja::BinaryView* data, BinaryNinja::Function* func) override; - virtual void OnAnalysisFunctionRemoved(BinaryNinja::BinaryView* data, BinaryNinja::Function* func) override; - virtual void OnAnalysisFunctionUpdated(BinaryNinja::BinaryView* data, BinaryNinja::Function* func) override; - virtual void OnDataVariableAdded(BinaryNinja::BinaryView* data, const BinaryNinja::DataVariable& var) override; - virtual void OnDataVariableRemoved(BinaryNinja::BinaryView* data, const BinaryNinja::DataVariable& var) override; - virtual void OnDataVariableUpdated(BinaryNinja::BinaryView* data, const BinaryNinja::DataVariable& var) override; - virtual void OnStringFound(BinaryNinja::BinaryView* data, BNStringType type, uint64_t offset, size_t len) override; - virtual void OnStringRemoved(BinaryNinja::BinaryView* data, BNStringType type, uint64_t offset, size_t len) override; - + uint8_t getSymbolColor(const SymbolRef& symbol); void drawImageRect(uint64_t addr, size_t len, uint8_t color); virtual QSize sizeHint() const override; diff --git a/ui/notificationsdispatcher.h b/ui/notificationsdispatcher.h index f3b3a7bd62..0507352505 100644 --- a/ui/notificationsdispatcher.h +++ b/ui/notificationsdispatcher.h @@ -165,8 +165,8 @@ class NotificationEvent void addSource(NotificationType source) { m_source = static_cast(static_cast(m_source) | static_cast(source)); } NotificationType getSource() const { return m_source; } NotificationTypes getSources() const { return static_cast(m_source); } - bool isObjectRemoval() const { return (m_source & (NotificationType::DataVariableRemoved | NotificationType::FunctionRemoved)); } - bool isRemoval() const { return (m_source & (NotificationType::DataVariableRemoved | NotificationType::FunctionRemoved | NotificationType::SymbolRemoved)); } + bool isObjectRemoval() const { return (m_source & (NotificationType::DataVariableRemoved | NotificationType::FunctionRemoved | NotificationType::StringRemoved)); } + bool isRemoval() const { return (m_source & (NotificationType::DataVariableRemoved | NotificationType::FunctionRemoved | NotificationType::SymbolRemoved | NotificationType::StringRemoved)); } }; From 17158e9942a73327f176ef642a1cafcaf8cb7b03 Mon Sep 17 00:00:00 2001 From: Brian Potchik Date: Mon, 13 Jan 2025 17:22:37 -0500 Subject: [PATCH 32/35] Add 'ui.log.wordWrap' setting with a default of disabled. --- ui/logview.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ui/logview.h b/ui/logview.h index bdf596ad39..8cbda7ece9 100644 --- a/ui/logview.h +++ b/ui/logview.h @@ -254,9 +254,12 @@ class BINARYNINJAUIAPI LogView : public SidebarWidget, public FilterTarget static void setLogLevel(BNLogLevel level); static void setLogSize(size_t maxSize); + static void setWordWrap(bool wrap); static bool IsHexString(const QString& str, std::pair offsetLen); static bool StartsWith0x(const QString& str, std::pair offsetLen); + void notifyWordWrapChanged(); + void notifyFontChanged() override; void notifyThemeChanged() override; void notifyViewChanged(ViewFrame* frame) override; From 73a4c0e64801c44c67a01cf6a6d173d4e7af5c70 Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Thu, 14 Nov 2024 16:04:22 -0800 Subject: [PATCH 33/35] [SharedCache] A collection of small optimizations 1. Use moves where possible to avoid unnecessary copies. 2. Remove redundant work within SymbolTableModel::updateSymbols. It calls setFilter which immediately clears then repopulates m_symbols. 3. Use unordered_map rather than map in `VM`. It is faster and the order isn't significant. 4. Avoid multiple accesses to the map with `VM` in the common cases. 5. Optimize the common case within SharedCache::HeaderForAddress. 6. Change return type of SharedCache::HeaderForAddress to avoid copying SharedCacheMachOHeaders. It is a large type that is expensive to copy. --- view/sharedcache/api/sharedcache.cpp | 20 ++++-- view/sharedcache/core/SharedCache.cpp | 26 +++++--- view/sharedcache/core/SharedCache.h | 8 +-- view/sharedcache/core/VM.cpp | 92 ++++++++++++++------------- view/sharedcache/core/VM.h | 1 + view/sharedcache/ui/dsctriage.cpp | 1 - 6 files changed, 82 insertions(+), 66 deletions(-) diff --git a/view/sharedcache/api/sharedcache.cpp b/view/sharedcache/api/sharedcache.cpp index e764531cd2..1c51a37753 100644 --- a/view/sharedcache/api/sharedcache.cpp +++ b/view/sharedcache/api/sharedcache.cpp @@ -3,6 +3,7 @@ // #include "sharedcacheapi.h" +#include namespace SharedCacheAPI { @@ -46,6 +47,7 @@ namespace SharedCacheAPI { } std::vector result; + result.reserve(count); for (size_t i = 0; i < count; i++) { result.push_back(value[i]); @@ -76,13 +78,14 @@ namespace SharedCacheAPI { } std::vector result; + result.reserve(count); for (size_t i = 0; i < count; i++) { DSCMemoryRegion region; region.vmAddress = value[i].vmAddress; region.size = value[i].size; region.prettyName = value[i].name; - result.push_back(region); + result.push_back(std::move(region)); } BNDSCViewFreeLoadedRegions(value, count); @@ -98,20 +101,22 @@ namespace SharedCacheAPI { } std::vector result; + result.reserve(count); for (size_t i = 0; i < count; i++) { BackingCache cache; cache.path = value[i].path; cache.isPrimary = value[i].isPrimary; + cache.mappings.reserve(value[i].mappingCount); for (size_t j = 0; j < value[i].mappingCount; j++) { BackingCacheMapping mapping; mapping.vmAddress = value[i].mappings[j].vmAddress; mapping.size = value[i].mappings[j].size; mapping.fileOffset = value[i].mappings[j].fileOffset; - cache.mappings.push_back(mapping); + cache.mappings.push_back(std::move(mapping)); } - result.push_back(cache); + result.push_back(std::move(cache)); } BNDSCViewFreeBackingCaches(value, count); @@ -128,11 +133,13 @@ namespace SharedCacheAPI { } std::vector result; + result.reserve(count); for (size_t i = 0; i < count; i++) { DSCImage img; img.name = value[i].name; img.headerAddress = value[i].headerAddress; + img.mappings.reserve(value[i].mappingCount); for (size_t j = 0; j < value[i].mappingCount; j++) { DSCImageMemoryMapping mapping; @@ -142,9 +149,9 @@ namespace SharedCacheAPI { mapping.rawViewOffset = value[i].mappings[j].rawViewOffset; mapping.size = value[i].mappings[j].size; mapping.loaded = value[i].mappings[j].loaded; - img.mappings.push_back(mapping); + img.mappings.push_back(std::move(mapping)); } - result.push_back(img); + result.push_back(std::move(img)); } BNDSCViewFreeAllImages(value, count); @@ -161,13 +168,14 @@ namespace SharedCacheAPI { } std::vector result; + result.reserve(count); for (size_t i = 0; i < count; i++) { DSCSymbol sym; sym.address = value[i].address; sym.name = value[i].name; sym.image = value[i].image; - result.push_back(sym); + result.push_back(std::move(sym)); } BNDSCViewFreeSymbols(value, count); diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index 33a4654da9..4ebdef444f 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -130,9 +130,9 @@ std::shared_ptr ViewSpecificStateForView(RefGetFile()->GetSessionId()); } -std::string base_name(std::string const& path) +std::string base_name(std::string_view const& path) { - return path.substr(path.find_last_of("/\\") + 1); + return std::string(path.substr(path.find_last_of("/\\") + 1)); } BNSegmentFlag SegmentFlagsFromMachOProtections(int initProt, int maxProt) { @@ -1456,8 +1456,13 @@ std::optional SharedCache::GetImageStart(std::string installName) return {}; } -std::optional SharedCache::HeaderForAddress(uint64_t address) +const SharedCacheMachOHeader* SharedCache::HeaderForAddress(uint64_t address) { + // It is very common for `HeaderForAddress` to be called with an address corresponding to a header. + if (auto it = State().headers.find(address); it != State().headers.end()) { + return &it->second; + } + // We _could_ mark each page with the image start? :grimacing emoji: // But that'd require mapping pages :grimacing emoji: :grimacing emoji: // There's not really any other hacks that could make this faster, that I can think of... @@ -1467,11 +1472,12 @@ std::optional SharedCache::HeaderForAddress(uint64_t add { if (segment.vmaddr <= address && segment.vmaddr + segment.vmsize > address) { - return header; + return &header; } } } - return {}; + + return nullptr; } std::string SharedCache::NameForAddress(uint64_t address) @@ -1761,7 +1767,7 @@ static void ProcessObjCSectionsForImageWithName(std::string baseName, std::share } } -void SharedCache::ProcessObjCSectionsForImageWithInstallName(std::string installName) +void SharedCache::ProcessObjCSectionsForImageWithInstallName(std::string_view installName) { bool processCFStrings; bool processObjCMetadata; @@ -1806,7 +1812,7 @@ void SharedCache::ProcessAllObjCSections() } } -bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObjC) +bool SharedCache::LoadImageWithInstallName(std::string_view installName, bool skipObjC) { auto settings = m_dscView->GetLoadSettings(VIEW_NAME); @@ -1815,7 +1821,7 @@ bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObj DeserializeFromRawView(); WillMutateState(); - m_logger->LogInfo("Loading image %s", installName.c_str()); + m_logger->LogInfo("Loading image %.*s", installName.size(), installName.data()); auto vm = GetVMMap(); CacheImage* targetImage = nullptr; @@ -1880,7 +1886,7 @@ bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObj if (regionsToLoad.empty()) { - m_logger->LogWarn("No regions to load for image %s", installName.c_str()); + m_logger->LogWarn("No regions to load for image %.*s", installName.size(), installName.data()); return false; } @@ -1919,7 +1925,7 @@ bool SharedCache::LoadImageWithInstallName(std::string installName, bool skipObj return true; } -std::optional SharedCache::LoadHeaderForAddress(std::shared_ptr vm, uint64_t address, std::string installName) +std::optional SharedCache::LoadHeaderForAddress(std::shared_ptr vm, uint64_t address, std::string_view installName) { SharedCacheMachOHeader header; diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index b5a0491fe2..053473bf7a 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -584,11 +584,11 @@ namespace SharedCacheCore { void ParseAndApplySlideInfoForFile(std::shared_ptr file); std::optional GetImageStart(std::string installName); - std::optional HeaderForAddress(uint64_t); - bool LoadImageWithInstallName(std::string installName, bool skipObjC); + const SharedCacheMachOHeader* HeaderForAddress(uint64_t); + bool LoadImageWithInstallName(std::string_view installName, bool skipObjC); bool LoadSectionAtAddress(uint64_t address); bool LoadImageContainingAddress(uint64_t address, bool skipObjC); - void ProcessObjCSectionsForImageWithInstallName(std::string installName); + void ProcessObjCSectionsForImageWithInstallName(std::string_view installName); void ProcessAllObjCSections(); std::string NameForAddress(uint64_t address); std::string ImageNameForAddress(uint64_t address); @@ -618,7 +618,7 @@ namespace SharedCacheCore { private: std::optional LoadHeaderForAddress( - std::shared_ptr vm, uint64_t address, std::string installName); + std::shared_ptr vm, uint64_t address, std::string_view installName); void InitializeHeader( Ref view, VM* vm, SharedCacheMachOHeader header, std::vector regionsToLoad); void ReadExportNode(std::vector>& symbolList, SharedCacheMachOHeader& header, DataBuffer& buffer, diff --git a/view/sharedcache/core/VM.cpp b/view/sharedcache/core/VM.cpp index 5aca51d908..97ed940c7b 100644 --- a/view/sharedcache/core/VM.cpp +++ b/view/sharedcache/core/VM.cpp @@ -206,52 +206,54 @@ void MMAP::Unmap() std::shared_ptr> MMappedFileAccessor::Open(BinaryNinja::Ref dscView, const uint64_t sessionID, const std::string &path, std::function)> postAllocationRoutine) { std::scoped_lock lock(fileAccessorsMutex); - if (fileAccessors.count(path) == 0) - { - auto fileAcccessor = std::shared_ptr>(new SelfAllocatingWeakPtr( - // Allocator logic for the SelfAllocatingWeakPtr - [path=path, sessionID=sessionID, dscView](){ - std::unique_lock _lock(fileAccessorDequeMutex); - - // Iterate through held references and start removing them until we can get a file pointer - // FIXME: This could clear all currently used file pointers and still not get one. FIX! - // We should probably use a condition variable here to wait for a file pointer to be released!!! - for (auto& [_, fileAccessorDeque] : fileAccessorReferenceHolder) - { - if (fileAccessorSemaphore.try_acquire()) - break; - fileAccessorDeque.pop_front(); - } - - mmapCount++; - _lock.unlock(); - auto accessor = std::shared_ptr(new MMappedFileAccessor(ResolveFilePath(dscView, path)), [](MMappedFileAccessor* accessor){ - // worker thread or we can deadlock on exit here. - BinaryNinja::WorkerEnqueue([accessor](){ - fileAccessorSemaphore.release(); - mmapCount--; - if (fileAccessors.count(accessor->m_path)) - { - std::scoped_lock lock(fileAccessorsMutex); - fileAccessors.erase(accessor->m_path); - } - delete accessor; - }, "MMappedFileAccessor Destructor"); - }); - _lock.lock(); - // If some background thread has managed to try and open a file when the BV was already closed, - // we can still give them the file they want so they dont crash, but as soon as they let go it's gone. - if (!blockedSessionIDs.count(sessionID)) - fileAccessorReferenceHolder[sessionID].push_back(accessor); - return accessor; - }, - [postAllocationRoutine=postAllocationRoutine](std::shared_ptr accessor){ - if (postAllocationRoutine) - postAllocationRoutine(accessor); - })); - fileAccessors.insert_or_assign(path, fileAcccessor); + if (auto it = fileAccessors.find(path); it != fileAccessors.end()) { + return it->second; } - return fileAccessors.at(path); + + auto fileAcccessor = std::shared_ptr>(new SelfAllocatingWeakPtr( + // Allocator logic for the SelfAllocatingWeakPtr + [path=path, sessionID=sessionID, dscView](){ + std::unique_lock _lock(fileAccessorDequeMutex); + + // Iterate through held references and start removing them until we can get a file pointer + // FIXME: This could clear all currently used file pointers and still not get one. FIX! + // We should probably use a condition variable here to wait for a file pointer to be released!!! + for (auto& [_, fileAccessorDeque] : fileAccessorReferenceHolder) + { + if (fileAccessorSemaphore.try_acquire()) + break; + fileAccessorDeque.pop_front(); + } + + mmapCount++; + _lock.unlock(); + auto accessor = std::shared_ptr(new MMappedFileAccessor(ResolveFilePath(dscView, path)), [](MMappedFileAccessor* accessor){ + // worker thread or we can deadlock on exit here. + BinaryNinja::WorkerEnqueue([accessor](){ + fileAccessorSemaphore.release(); + mmapCount--; + if (fileAccessors.count(accessor->m_path)) + { + std::scoped_lock lock(fileAccessorsMutex); + fileAccessors.erase(accessor->m_path); + } + delete accessor; + }, "MMappedFileAccessor Destructor"); + }); + _lock.lock(); + // If some background thread has managed to try and open a file when the BV was already closed, + // we can still give them the file they want so they dont crash, but as soon as they let go it's gone. + if (!blockedSessionIDs.count(sessionID)) + fileAccessorReferenceHolder[sessionID].push_back(accessor); + return accessor; + }, + [postAllocationRoutine=postAllocationRoutine](std::shared_ptr accessor){ + if (postAllocationRoutine) + postAllocationRoutine(accessor); + })); + + fileAccessors.insert_or_assign(path, fileAcccessor); + return fileAcccessor; } diff --git a/view/sharedcache/core/VM.h b/view/sharedcache/core/VM.h index e47cf15ef4..272d8f6ffa 100644 --- a/view/sharedcache/core/VM.h +++ b/view/sharedcache/core/VM.h @@ -6,6 +6,7 @@ #define SHAREDCACHE_VM_H #include #include +#include void VMShutdown(); diff --git a/view/sharedcache/ui/dsctriage.cpp b/view/sharedcache/ui/dsctriage.cpp index 008a4dd69e..9f456d7234 100644 --- a/view/sharedcache/ui/dsctriage.cpp +++ b/view/sharedcache/ui/dsctriage.cpp @@ -448,7 +448,6 @@ QVariant SymbolTableModel::headerData(int section, Qt::Orientation orientation, } void SymbolTableModel::updateSymbols() { - m_symbols = m_parent->m_symbols; setFilter(m_filter); } From c3f5d0a2a470b2395629e7af6df6eb9454a3b593 Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Thu, 14 Nov 2024 11:29:54 -0800 Subject: [PATCH 34/35] Use persistent data structures courtesy of immer [immer](https://github.com/arximboldi/immer) provides persistent, immutable data structures such as vectors and maps. These data structures support passing by value without copying any data and structural sharing to copy only a subset of data when a data structure is mutated. immer is published under the Boost Software License which should be compatible with its use in this context. Using these data structures eliminates a lot of the unnecessary copying of the shared cache's state when retrieving it from the view cache and beginning to mutate it. Instead of all of the vectors and maps contained within the state being copied, only the portions of the vectors or maps that are mutated end up being copied. The downside is that the APIs used when mutating are less ergonomic than using the native C++ types. The upside is that this cuts the time taken for the initial load and analysis of a macOS shared cache to around 45 seconds (from 70 seconds with the basic CoW implementation in #6129) and cuts the time taken to load and analyze AppKit from 14 minutes to around 8.5 minutes. --- .gitmodules | 3 + vendor/immer | 1 + view/sharedcache/CMakeLists.txt | 2 +- view/sharedcache/api/CMakeLists.txt | 2 +- view/sharedcache/core/CMakeLists.txt | 2 +- .../sharedcache/core/MetadataSerializable.cpp | 180 +++++++ .../sharedcache/core/MetadataSerializable.hpp | 40 ++ view/sharedcache/core/SharedCache.cpp | 482 +++++++++++++----- view/sharedcache/core/SharedCache.h | 61 ++- view/sharedcache/workflow/CMakeLists.txt | 2 +- 10 files changed, 603 insertions(+), 172 deletions(-) create mode 160000 vendor/immer diff --git a/.gitmodules b/.gitmodules index 13b8e29cc0..76abdfd276 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,3 +19,6 @@ [submodule "rust/examples/pdb-ng/pdb-0.8.0-patched"] path = rust/examples/pdb-ng/pdb-0.8.0-patched url = https://github.com/Vector35/pdb-rs.git +[submodule "vendor/immer"] + path = vendor/immer + url = https://github.com/arximboldi/immer.git diff --git a/vendor/immer b/vendor/immer new file mode 160000 index 0000000000..df6ef46d97 --- /dev/null +++ b/vendor/immer @@ -0,0 +1 @@ +Subproject commit df6ef46d97e1fe81f397015b9aeb32505cef653b diff --git a/view/sharedcache/CMakeLists.txt b/view/sharedcache/CMakeLists.txt index 407fb0d565..b0734725a0 100644 --- a/view/sharedcache/CMakeLists.txt +++ b/view/sharedcache/CMakeLists.txt @@ -62,7 +62,7 @@ set_target_properties(sharedcache PROPERTIES POSITION_INDEPENDENT_CODE ON ) -target_include_directories(sharedcache PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/core ${CMAKE_CURRENT_SOURCE_DIR}/api ${CMAKE_CURRENT_SOURCE_DIR}/workflow) +target_include_directories(sharedcache PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/core ${CMAKE_CURRENT_SOURCE_DIR}/api ${CMAKE_CURRENT_SOURCE_DIR}/workflow ${BN_API_PATH}/vendor/immer) target_link_libraries(sharedcache PUBLIC sharedcacheapi binaryninjaapi sharedcachecore sharedcacheworkflow) diff --git a/view/sharedcache/api/CMakeLists.txt b/view/sharedcache/api/CMakeLists.txt index c14674a77e..eb6526aeb4 100644 --- a/view/sharedcache/api/CMakeLists.txt +++ b/view/sharedcache/api/CMakeLists.txt @@ -64,7 +64,7 @@ endfunction() get_recursive_include_dirs(binaryninjaapi INCLUDES) target_include_directories(sharedcacheapi - PUBLIC ${PROJECT_SOURCE_DIR} ${INCLUDES}) + PUBLIC ${PROJECT_SOURCE_DIR} ${INCLUDES} ${BN_API_PATH}/vendor/immer) set_target_properties(sharedcacheapi PROPERTIES CXX_STANDARD 17 diff --git a/view/sharedcache/core/CMakeLists.txt b/view/sharedcache/core/CMakeLists.txt index 22c7ea2846..db4577c7df 100644 --- a/view/sharedcache/core/CMakeLists.txt +++ b/view/sharedcache/core/CMakeLists.txt @@ -77,7 +77,7 @@ target_compile_definitions(sharedcachecore PRIVATE ${COMPILE_DEFS}) target_compile_definitions(sharedcachecore PRIVATE SHAREDCACHE_LIBRARY ${COMPILE_DEFS}) -target_include_directories(sharedcachecore PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${INCLUDES}) +target_include_directories(sharedcachecore PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${INCLUDES} ${BN_API_PATH}/vendor/immer) set_target_properties(sharedcachecore PROPERTIES CXX_STANDARD 17 diff --git a/view/sharedcache/core/MetadataSerializable.cpp b/view/sharedcache/core/MetadataSerializable.cpp index ad8959f262..f8771fa98b 100644 --- a/view/sharedcache/core/MetadataSerializable.cpp +++ b/view/sharedcache/core/MetadataSerializable.cpp @@ -140,6 +140,46 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve b.emplace_back(i.GetString()); } +void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + transient.set(i.GetArray()[0].GetUint64(), i.GetArray()[1].GetString()); + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + transient.set(i.GetArray()[0].GetUint64(), i.GetArray()[1].GetUint64()); + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::map>& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + std::string key = i.GetArray()[0].GetString(); + immer::map_transient memArray; + for (auto& member : i.GetArray()[1].GetArray()) + { + memArray.set(member.GetArray()[0].GetUint64(), member.GetArray()[1].GetUint64()); + } + transient.set(key, std::move(memArray).persistent()); + } + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + transient.set(i.GetArray()[0].GetString(), i.GetArray()[1].GetString()); + b = std::move(transient).persistent(); +} + // Note: This flattens the pair into [first, second.first, second.second] with no nested arrays. void Serialize(SerializationContext& context, const std::pair>& value) { @@ -203,6 +243,77 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve } } +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + transient.push_back(i.GetString()); + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>>& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + std::pair> j; + j.first = i.GetArray()[0].GetUint64(); + j.second.first = i.GetArray()[1].GetUint64(); + j.second.second = i.GetArray()[2].GetUint64(); + transient.push_back(j); + } + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + std::pair j; + j.first = i.GetArray()[0].GetUint64(); + j.second = i.GetArray()[1].GetBool(); + transient.push_back(j); + } + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + transient.push_back(i.GetUint64()); + } + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + transient.set(i.GetArray()[0].GetString(), i.GetArray()[1].GetUint64()); + } + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>>>& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + std::pair>> j; + j.first = i.GetArray()[0].GetUint64(); + for (auto& k : i.GetArray()[1].GetArray()) + { + j.second.push_back({k.GetArray()[0].GetUint64(), k.GetArray()[1].GetString()}); + } + transient.push_back({j.first, std::move(j.second).persistent()}); + } + b = std::move(transient).persistent(); +} + void Serialize(SerializationContext& context, const mach_header_64& value) { context.writer.StartArray(); Serialize(context, value.magic); @@ -428,6 +539,35 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve } } +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) +{ + auto bArr = context.doc[name.data()].GetArray(); + auto transient = b.transient(); + for (auto& s : bArr) + { + section_64 sec; + auto s2 = s.GetArray(); + std::string sectNameStr = s2[0].GetString(); + memset(sec.sectname, 0, 16); + memcpy(sec.sectname, sectNameStr.c_str(), sectNameStr.size()); + std::string segNameStr = s2[1].GetString(); + memset(sec.segname, 0, 16); + memcpy(sec.segname, segNameStr.c_str(), segNameStr.size()); + sec.addr = s2[2].GetUint64(); + sec.size = s2[3].GetUint64(); + sec.offset = s2[4].GetUint(); + sec.align = s2[5].GetUint(); + sec.reloff = s2[6].GetUint(); + sec.nreloc = s2[7].GetUint(); + sec.flags = s2[8].GetUint(); + sec.reserved1 = s2[9].GetUint(); + sec.reserved2 = s2[10].GetUint(); + sec.reserved3 = s2[11].GetUint(); + transient.push_back(std::move(sec)); + } + b = std::move(transient).persistent(); +} + void Serialize(SerializationContext& context, const linkedit_data_command& value) { context.writer.StartArray(); @@ -501,6 +641,31 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve } } +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) +{ + auto bArr = context.doc[name.data()].GetArray(); + auto transient = b.transient(); + for (auto& s : bArr) + { + segment_command_64 sec; + auto s2 = s.GetArray(); + std::string segNameStr = s2[0].GetString(); + memset(sec.segname, 0, 16); + memcpy(sec.segname, segNameStr.c_str(), segNameStr.size()); + sec.vmaddr = s2[1].GetUint64(); + sec.vmsize = s2[2].GetUint64(); + sec.fileoff = s2[3].GetUint64(); + sec.filesize = s2[4].GetUint64(); + sec.maxprot = s2[5].GetUint(); + sec.initprot = s2[6].GetUint(); + sec.nsects = s2[7].GetUint(); + sec.flags = s2[8].GetUint(); + transient.push_back(std::move(sec)); + } + b = std::move(transient).persistent(); +} + + void Serialize(SerializationContext& context, const build_version_command& value) { context.writer.StartArray(); @@ -545,4 +710,19 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve } } +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) +{ + auto bArr = context.doc[name.data()].GetArray(); + auto transient = b.transient(); + for (auto& s : bArr) + { + build_tool_version sec; + auto s2 = s.GetArray(); + sec.tool = s2[0].GetUint(); + sec.version = s2[1].GetUint(); + transient.push_back(sec); + } + b = std::move(transient).persistent(); +} + } // namespace SharedCacheCore diff --git a/view/sharedcache/core/MetadataSerializable.hpp b/view/sharedcache/core/MetadataSerializable.hpp index 7b44ccbdbf..deb87b07f8 100644 --- a/view/sharedcache/core/MetadataSerializable.hpp +++ b/view/sharedcache/core/MetadataSerializable.hpp @@ -40,6 +40,10 @@ #include "rapidjson/prettywriter.h" #include "../api/sharedcachecore.h" #include "view/macho/machoview.h" +#include "immer/map.hpp" +#include "immer/vector.hpp" +#include "immer/vector_transient.hpp" +#include "immer/map_transient.hpp" #ifndef SHAREDCACHE_CORE_METADATASERIALIZABLE_HPP #define SHAREDCACHE_CORE_METADATASERIALIZABLE_HPP @@ -191,6 +195,29 @@ void Serialize(SerializationContext& context, const std::vector& values) context.writer.EndArray(); } +template +void Serialize(SerializationContext& context, const immer::map& value) +{ + context.writer.StartArray(); + for (auto& pair : value) + { + Serialize(context, pair); + } + context.writer.EndArray(); +} + +template +void Serialize(SerializationContext& context, const immer::vector& values) +{ + context.writer.StartArray(); + for (const auto& value : values) + { + Serialize(context, value); + } + context.writer.EndArray(); +} + + SHAREDCACHE_FFI_API void Serialize(SerializationContext& context, const char*); SHAREDCACHE_FFI_API void Serialize(SerializationContext& context, bool b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, bool& b); @@ -224,6 +251,16 @@ SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::strin SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, std::vector& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, std::unordered_map& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, std::vector>>>& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::map>& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>>& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>>>& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const mach_header_64& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, mach_header_64& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const symtab_command& b); @@ -238,15 +275,18 @@ SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const function_starts_ SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, function_starts_command& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const section_64& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, std::vector& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, immer::vector& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const linkedit_data_command& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, linkedit_data_command& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const segment_command_64& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, segment_command_64& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, std::vector& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, immer::vector& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const build_version_command& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, build_version_command& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const build_tool_version& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, std::vector& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, immer::vector& b); } // namespace SharedCacheCore diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index 4ebdef444f..137e82c330 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -35,6 +35,8 @@ #include #include +#include "immer/flex_vector.hpp" +#include "immer/vector_transient.hpp" using namespace BinaryNinja; using namespace SharedCacheCore; @@ -57,23 +59,23 @@ int count_trailing_zeros(uint64_t value) { struct SharedCache::State { - std::unordered_map>>> + immer::map>>> exportInfos; - std::unordered_map>>> + immer::map>>> symbolInfos; - std::unordered_map imageStarts; - std::unordered_map headers; + immer::map imageStarts; + immer::map headers; - std::vector images; + immer::vector images; - std::vector regionsMappedIntoMemory; + immer::vector regionsMappedIntoMemory; - std::vector backingCaches; + immer::vector backingCaches; - std::vector stubIslandRegions; // TODO honestly both of these should be refactored into nonImageRegions. :p - std::vector dyldDataRegions; - std::vector nonImageRegions; + immer::vector stubIslandRegions; // TODO honestly both of these should be refactored into nonImageRegions. :p + immer::vector dyldDataRegions; + immer::vector nonImageRegions; std::optional> objcOptimizationDataRange; @@ -328,21 +330,24 @@ void SharedCache::PerformInitialLoad() cache.isPrimary = true; cache.path = path; + auto mappings = cache.mappings.transient(); for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); - cache.mappings.push_back(mapping); + mappings.push_back(mapping); } - MutableState().backingCaches.push_back(std::move(cache)); + cache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; - + auto imageStarts = State().imageStarts.transient(); for (size_t i = 0; i < primaryCacheHeader.imagesCountOld; i++) { baseFile->Read(&img, primaryCacheHeader.imagesOffsetOld + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - MutableState().imageStarts[iname] = img.address; + imageStarts.set(iname, img.address); } + MutableState().imageStarts = std::move(imageStarts).persistent(); m_logger->LogInfo("Found %d images in the shared cache", primaryCacheHeader.imagesCountOld); @@ -355,6 +360,7 @@ void SharedCache::PerformInitialLoad() } baseFile.reset(); // No longer needed, we're about to remap this file into VM space so we can load these. uint64_t i = 0; + auto stubIslandRegions = State().stubIslandRegions.transient(); for (auto address : addresses) { i++; @@ -373,10 +379,11 @@ void SharedCache::PerformInitialLoad() std::string segNameStr = std::string(segName); stubIslandRegion.prettyName = "dyld_shared_cache_branch_islands_" + std::to_string(i) + "::" + segNameStr; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); + stubIslandRegions.push_back(std::move(stubIslandRegion)); } } } + MutableState().stubIslandRegions = std::move(stubIslandRegions).persistent(); } m_logger->LogInfo("Found %d branch pools in the shared cache", primaryCacheHeader.branchPoolsCount); @@ -391,21 +398,23 @@ void SharedCache::PerformInitialLoad() BackingCache cache; cache.isPrimary = true; cache.path = path; - + auto mappings = cache.mappings.transient(); for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); - cache.mappings.push_back(mapping); + mappings.push_back(mapping); } - MutableState().backingCaches.push_back(std::move(cache)); + cache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; + auto imageStarts = State().imageStarts.transient(); for (size_t i = 0; i < primaryCacheHeader.imagesCount; i++) { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - MutableState().imageStarts[iname] = img.address; + imageStarts.set(iname, img.address); } if (primaryCacheHeader.branchPoolsCount) @@ -413,10 +422,12 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - MutableState().imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = - baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.set("dyld_shared_cache_branch_islands_" + std::to_string(i), + baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))); } } + MutableState().imageStarts = std::move(imageStarts).persistent(); + std::string mainFileName = base_name(path); if (auto projectFile = m_dscView->GetFile()->GetProjectFile()) mainFileName = projectFile->GetName(); @@ -463,12 +474,14 @@ void SharedCache::PerformInitialLoad() subCache.isPrimary = false; subCache.path = subCachePath; + auto mappings = subCache.mappings.transient(); for (size_t j = 0; j < subCacheHeader.mappingCount; j++) { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), sizeof(subCacheMapping)); - subCache.mappings.push_back(subCacheMapping); + mappings.push_back(subCacheMapping); } + subCache.mappings = std::move(mappings).persistent(); if (subCacheHeader.mappingCount == 1 && subCacheHeader.imagesCountOld == 0 && subCacheHeader.imagesCount == 0 && subCacheHeader.imagesTextOffset == 0) @@ -481,10 +494,10 @@ void SharedCache::PerformInitialLoad() stubIslandRegion.size = size; stubIslandRegion.prettyName = subCacheFilename + "::_stubs"; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); + MutableState().stubIslandRegions = State().stubIslandRegions.push_back(std::move(stubIslandRegion)); } - MutableState().backingCaches.push_back(std::move(subCache)); + MutableState().backingCaches = State().backingCaches.push_back(std::move(subCache)); } break; } @@ -496,20 +509,23 @@ void SharedCache::PerformInitialLoad() cache.isPrimary = true; cache.path = path; + auto mappings = cache.mappings.transient(); for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); - cache.mappings.push_back(mapping); + mappings.push_back(mapping); } - MutableState().backingCaches.push_back(std::move(cache)); + cache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; + auto imageStarts = State().imageStarts.transient(); for (size_t i = 0; i < primaryCacheHeader.imagesCount; i++) { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - MutableState().imageStarts[iname] = img.address; + imageStarts.set(iname, img.address); } if (primaryCacheHeader.branchPoolsCount) @@ -517,10 +533,11 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - MutableState().imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = - baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.set("dyld_shared_cache_branch_islands_" + std::to_string(i), + baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))); } } + MutableState().imageStarts = std::move(imageStarts).persistent(); std::string mainFileName = base_name(path); if (auto projectFile = m_dscView->GetFile()->GetProjectFile()) @@ -551,14 +568,15 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info subCacheMapping {}; + auto mappings = subCache.mappings.transient(); for (size_t j = 0; j < subCacheHeader.mappingCount; j++) { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), sizeof(subCacheMapping)); - subCache.mappings.push_back(subCacheMapping); + mappings.push_back(subCacheMapping); } - - MutableState().backingCaches.push_back(std::move(subCache)); + subCache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(subCache)); if (subCacheHeader.mappingCount == 1 && subCacheHeader.imagesCountOld == 0 && subCacheHeader.imagesCount == 0 && subCacheHeader.imagesTextOffset == 0) @@ -571,7 +589,7 @@ void SharedCache::PerformInitialLoad() stubIslandRegion.size = size; stubIslandRegion.prettyName = subCacheFilename + "::_stubs"; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); + MutableState().stubIslandRegions = State().stubIslandRegions.push_back(std::move(stubIslandRegion)); } } @@ -592,15 +610,17 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info subCacheMapping {}; BackingCache subCache; + mappings = subCache.mappings.transient(); for (size_t j = 0; j < subCacheHeader.mappingCount; j++) { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), sizeof(subCacheMapping)); - subCache.mappings.push_back(subCacheMapping); + mappings.push_back(subCacheMapping); } + subCache.mappings = std::move(mappings).persistent(); - MutableState().backingCaches.push_back(std::move(subCache)); + MutableState().backingCaches = State().backingCaches.push_back(std::move(subCache)); break; } case iOS16CacheFormat: @@ -611,21 +631,23 @@ void SharedCache::PerformInitialLoad() cache.isPrimary = true; cache.path = path; + auto mappings = cache.mappings.transient(); for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); - cache.mappings.push_back(mapping); + mappings.push_back(mapping); } - - MutableState().backingCaches.push_back(std::move(cache)); + cache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; + auto imageStarts = State().imageStarts.transient(); for (size_t i = 0; i < primaryCacheHeader.imagesCount; i++) { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - MutableState().imageStarts[iname] = img.address; + imageStarts.set(iname, img.address); } if (primaryCacheHeader.branchPoolsCount) @@ -633,10 +655,11 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - MutableState().imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = - baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.set("dyld_shared_cache_branch_islands_" + std::to_string(i), + baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))); } } + MutableState().imageStarts = std::move(imageStarts).persistent(); std::string mainFileName = base_name(path); if (auto projectFile = m_dscView->GetFile()->GetProjectFile()) @@ -687,12 +710,13 @@ void SharedCache::PerformInitialLoad() BackingCache subCache; subCache.isPrimary = false; subCache.path = subCachePath; + auto mappings = subCache.mappings.transient(); for (size_t j = 0; j < subCacheHeader.mappingCount; j++) { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), sizeof(subCacheMapping)); - subCache.mappings.push_back(subCacheMapping); + mappings.push_back(subCacheMapping); if (subCachePath.find(".dylddata") != std::string::npos) { @@ -704,11 +728,12 @@ void SharedCache::PerformInitialLoad() dyldDataRegion.size = size; dyldDataRegion.prettyName = subCacheFilename + "::_data" + std::to_string(j); dyldDataRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable); - MutableState().dyldDataRegions.push_back(std::move(dyldDataRegion)); + MutableState().dyldDataRegions = State().dyldDataRegions.push_back(std::move(dyldDataRegion)); } } + subCache.mappings = std::move(mappings).persistent(); - MutableState().backingCaches.push_back(std::move(subCache)); + MutableState().backingCaches = State().backingCaches.push_back(std::move(subCache)); if (subCacheHeader.mappingCount == 1 && subCacheHeader.imagesCountOld == 0 && subCacheHeader.imagesCount == 0 && subCacheHeader.imagesTextOffset == 0) @@ -721,7 +746,7 @@ void SharedCache::PerformInitialLoad() stubIslandRegion.size = size; stubIslandRegion.prettyName = subCacheFilename + "::_stubs"; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); + MutableState().stubIslandRegions = State().stubIslandRegions.push_back(std::move(stubIslandRegion)); } } @@ -744,15 +769,15 @@ void SharedCache::PerformInitialLoad() subCache.path = subCachePath; dyld_cache_mapping_info subCacheMapping {}; - + auto mappings = subCache.mappings.transient(); for (size_t j = 0; j < subCacheHeader.mappingCount; j++) { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), sizeof(subCacheMapping)); - subCache.mappings.push_back(subCacheMapping); + mappings.push_back(subCacheMapping); } - - MutableState().backingCaches.push_back(std::move(subCache)); + subCache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(subCache)); } catch (...) {} @@ -771,6 +796,9 @@ void SharedCache::PerformInitialLoad() m_logger->LogError("Failed to map VM pages for Shared Cache on initial load, this is fatal."); return; } + + auto headers = State().headers.transient(); + auto images = State().images.transient(); for (const auto& start : State().imageStarts) { try { @@ -782,10 +810,11 @@ void SharedCache::PerformInitialLoad() auto mapping = vm->MappingAtAddress(imageHeader->linkeditSegment.vmaddr); imageHeader->exportTriePath = mapping.first.filePath; } - MutableState().headers[start.second] = imageHeader.value(); + headers.set(start.second, imageHeader.value()); CacheImage image; image.installName = start.first; image.headerLocation = start.second; + auto regions = image.regions.transient(); for (const auto& segment : imageHeader->segments) { char segName[17]; @@ -804,9 +833,10 @@ void SharedCache::PerformInitialLoad() flags |= SegmentExecutable; sectionRegion.flags = (BNSegmentFlag)flags; - image.regions.push_back(sectionRegion); + regions.push_back(sectionRegion); } - MutableState().images.push_back(image); + image.regions = std::move(regions).persistent(); + images.push_back(std::move(image)); } else { @@ -818,9 +848,12 @@ void SharedCache::PerformInitialLoad() m_logger->LogError("Failed to load Mach-O header for %s: %s", start.first.c_str(), ex.what()); } } + MutableState().headers = std::move(headers).persistent(); + MutableState().images = std::move(images).persistent(); m_logger->LogInfo("Loaded %d Mach-O headers", State().headers.size()); + auto nonImageRegions = State().nonImageRegions.transient(); for (const auto& cache : State().backingCaches) { size_t i = 0; @@ -831,14 +864,17 @@ void SharedCache::PerformInitialLoad() region.size = mapping.size; region.prettyName = base_name(cache.path) + "::" + std::to_string(i); region.flags = SegmentFlagsFromMachOProtections(mapping.initProt, mapping.maxProt); - MutableState().nonImageRegions.push_back(std::move(region)); + nonImageRegions.push_back(std::move(region)); i++; } } + MutableState().nonImageRegions = std::move(nonImageRegions).persistent(); // Iterate through each Mach-O header if (!State().dyldDataRegions.empty()) { + // Removal / insertion is not ergonomic with `immer::vector` so use std::vector for this instead. + std::vector dyldDataRegions(State().dyldDataRegions.begin(), State().dyldDataRegions.end()); for (const auto& [headerKey, header] : State().headers) { // Iterate through each segment of the header @@ -848,7 +884,7 @@ void SharedCache::PerformInitialLoad() uint64_t segmentEnd = segmentStart + segment.vmsize; // Iterate through each region in m_dyldDataRegions - for (auto it = State().dyldDataRegions.begin(); it != State().dyldDataRegions.end();) + for (auto it = dyldDataRegions.begin(); it != dyldDataRegions.end();) { uint64_t regionStart = it->start; uint64_t regionSize = it->size; @@ -879,12 +915,12 @@ void SharedCache::PerformInitialLoad() } // Erase the original region - it = MutableState().dyldDataRegions.erase(it); + it = dyldDataRegions.erase(it); // Insert the new regions (if any) for (const auto& newRegion : newRegions) { - it = MutableState().dyldDataRegions.insert(it, newRegion); + it = dyldDataRegions.insert(it, newRegion); ++it; // Move iterator to the next position } } @@ -895,11 +931,15 @@ void SharedCache::PerformInitialLoad() } } } + // TODO(bdash): Ideally this would move out of dyldDataRegions. + MutableState().dyldDataRegions = immer::vector(dyldDataRegions.begin(), dyldDataRegions.end()); } // Iterate through each Mach-O header if (!State().nonImageRegions.empty()) { + // Removal / insertion is not ergonomic with `immer::vector` so use std::vector for this instead. + std::vector nonImageRegions(State().nonImageRegions.begin(), State().nonImageRegions.end()); for (const auto& [headerKey, header] : State().headers) { // Iterate through each segment of the header @@ -909,7 +949,7 @@ void SharedCache::PerformInitialLoad() uint64_t segmentEnd = segmentStart + segment.vmsize; // Iterate through each region in m_dyldDataRegions - for (auto it = State().nonImageRegions.begin(); it != State().nonImageRegions.end();) + for (auto it = nonImageRegions.begin(); it != nonImageRegions.end();) { uint64_t regionStart = it->start; uint64_t regionSize = it->size; @@ -940,12 +980,12 @@ void SharedCache::PerformInitialLoad() } // Erase the original region - it = MutableState().nonImageRegions.erase(it); + it = nonImageRegions.erase(it); // Insert the new regions (if any) for (const auto& newRegion : newRegions) { - it = MutableState().nonImageRegions.insert(it, newRegion); + it = nonImageRegions.insert(it, newRegion); ++it; // Move iterator to the next position } } @@ -956,6 +996,8 @@ void SharedCache::PerformInitialLoad() } } } + // TODO(bdash): Ideally this would move out of nonImageRegions. + MutableState().nonImageRegions = immer::vector(nonImageRegions.begin(), nonImageRegions.end()); } SaveToDSCView(); @@ -1013,7 +1055,7 @@ void SharedCache::DeserializeFromRawView() m_metadataValid = true; WillMutateState(); MutableState().viewState = DSCViewStateUnloaded; - MutableState().images.clear(); // fixme ?? + MutableState().images = immer::vector(); } } @@ -1459,8 +1501,8 @@ std::optional SharedCache::GetImageStart(std::string installName) const SharedCacheMachOHeader* SharedCache::HeaderForAddress(uint64_t address) { // It is very common for `HeaderForAddress` to be called with an address corresponding to a header. - if (auto it = State().headers.find(address); it != State().headers.end()) { - return &it->second; + if (auto it = State().headers.find(address)) { + return it; } // We _could_ mark each page with the image start? :grimacing emoji: @@ -1558,18 +1600,24 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } SharedCacheMachOHeader targetHeader; - CacheImage* targetImage = nullptr; - MemoryRegion* targetSegment = nullptr; + const CacheImage* targetImage = nullptr; + decltype(State().images.begin()) targetImageIt; + const MemoryRegion* targetSegment = nullptr; + decltype(CacheImage().regions.begin()) targetSegmentIt; - for (auto& image : MutableState().images) + for (auto imageIt = State().images.begin(); imageIt != State().images.end(); ++imageIt) { - for (auto& region : image.regions) + auto& image = *imageIt; + for (auto regionIt = image.regions.begin(); regionIt != image.regions.end(); ++regionIt) { + auto& region = *regionIt; if (region.start <= address && region.start + region.size > address) { targetHeader = MutableState().headers[image.headerLocation]; targetImage = ℑ + targetImageIt = imageIt; targetSegment = ®ion; + targetSegmentIt = regionIt; break; } } @@ -1578,8 +1626,9 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } if (!targetSegment) { - for (auto& stubIsland : MutableState().stubIslandRegions) + for (auto it = State().stubIslandRegions.begin(); it != State().stubIslandRegions.end(); ++it) { + auto& stubIsland = *it; if (stubIsland.start <= address && stubIsland.start + stubIsland.size > address) { if (stubIsland.loaded) @@ -1603,11 +1652,11 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) m_dscView->AddUserSection(name, stubIsland.start, stubIsland.size, ReadOnlyCodeSectionSemantics); m_dscView->WriteBuffer(stubIsland.start, buff); - stubIsland.loaded = true; - - stubIsland.rawViewOffsetIfLoaded = rawViewEnd; - - MutableState().regionsMappedIntoMemory.push_back(stubIsland); + MemoryRegion newStubIsland(stubIsland); + newStubIsland.loaded = true; + newStubIsland.rawViewOffsetIfLoaded = rawViewEnd; + MutableState().regionsMappedIntoMemory = State().regionsMappedIntoMemory.push_back(newStubIsland); + MutableState().stubIslandRegions = State().stubIslandRegions.set(it.index(), std::move(newStubIsland)); SaveToDSCView(); @@ -1618,8 +1667,9 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } } - for (auto& dyldData : MutableState().dyldDataRegions) + for (auto it = State().dyldDataRegions.begin(); it != State().dyldDataRegions.end(); ++it) { + auto& dyldData = *it; if (dyldData.start <= address && dyldData.start + dyldData.size > address) { if (dyldData.loaded) @@ -1643,10 +1693,11 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) m_dscView->AddUserSection(name, dyldData.start, dyldData.size, ReadOnlyDataSectionSemantics); m_dscView->WriteBuffer(dyldData.start, buff); - dyldData.loaded = true; - dyldData.rawViewOffsetIfLoaded = rawViewEnd; - - MutableState().regionsMappedIntoMemory.push_back(dyldData); + MemoryRegion newDyldData(dyldData); + newDyldData.loaded = true; + newDyldData.rawViewOffsetIfLoaded = rawViewEnd; + MutableState().regionsMappedIntoMemory = State().regionsMappedIntoMemory.push_back(newDyldData); + MutableState().dyldDataRegions = State().dyldDataRegions.set(it.index(), std::move(newDyldData)); SaveToDSCView(); @@ -1657,8 +1708,9 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } } - for (auto& region : MutableState().nonImageRegions) + for (auto it = State().nonImageRegions.begin(); it != State().nonImageRegions.end(); ++it) { + auto& region = *it; if (region.start <= address && region.start + region.size > address) { if (region.loaded) @@ -1681,10 +1733,11 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) m_dscView->AddUserSection(name, region.start, region.size, region.flags & SegmentDenyExecute ? ReadOnlyDataSectionSemantics : ReadOnlyCodeSectionSemantics); m_dscView->WriteBuffer(region.start, buff); - region.loaded = true; - region.rawViewOffsetIfLoaded = rawViewEnd; - - MutableState().regionsMappedIntoMemory.push_back(region); + MemoryRegion newRegion(region); + newRegion.loaded = true; + newRegion.rawViewOffsetIfLoaded = rawViewEnd; + MutableState().regionsMappedIntoMemory = State().regionsMappedIntoMemory.push_back(newRegion); + MutableState().nonImageRegions = State().nonImageRegions.set(it.index(), std::move(newRegion)); SaveToDSCView(); @@ -1717,16 +1770,31 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) targetSegment->start, targetSegment->size, rawViewEnd, targetSegment->size, targetSegment->flags); m_dscView->WriteBuffer(targetSegment->start, buff); - targetSegment->loaded = true; - targetSegment->rawViewOffsetIfLoaded = rawViewEnd; + MemoryRegion newTargetSegment(*targetSegment); + newTargetSegment.loaded = true; + newTargetSegment.rawViewOffsetIfLoaded = rawViewEnd; + MutableState().regionsMappedIntoMemory = State().regionsMappedIntoMemory.push_back(newTargetSegment); - MutableState().regionsMappedIntoMemory.push_back(*targetSegment); + auto images = State().images; + auto regions = images[targetImageIt.index()].regions; + CacheImage newTargetImage(*targetImage); + newTargetImage.regions = regions.set(targetSegmentIt.index(), std::move(newTargetSegment)); + MutableState().images = images.set(targetImageIt.index(), std::move(newTargetImage)); SaveToDSCView(); if (!targetSegment->headerInitialized) { + targetSegment = &State().images[targetImageIt.index()].regions[targetSegmentIt.index()]; SharedCache::InitializeHeader(m_dscView, vm.get(), targetHeader, {targetSegment}); + + MemoryRegion newTargetSegment(*targetSegment); + newTargetSegment.headerInitialized = true; + auto images = State().images; + auto regions = images[targetImageIt.index()].regions; + CacheImage newTargetImage(*targetImage); + newTargetImage.regions = regions.set(targetSegmentIt.index(), std::move(newTargetSegment)); + MutableState().images = images.set(targetImageIt.index(), std::move(newTargetImage)); } m_dscView->AddAnalysisOption("linearsweep"); @@ -1824,22 +1892,25 @@ bool SharedCache::LoadImageWithInstallName(std::string_view installName, bool sk m_logger->LogInfo("Loading image %.*s", installName.size(), installName.data()); auto vm = GetVMMap(); - CacheImage* targetImage = nullptr; + const CacheImage* targetImage = nullptr; + decltype(State().images.begin()) targetImageIt; - for (auto& cacheImage : MutableState().images) + for (auto it = State().images.begin(); it != State().images.end(); ++it) { - if (cacheImage.installName == installName) + if (it->installName == installName) { - targetImage = &cacheImage; + targetImage = &*it; + targetImageIt = it; break; } } + auto it = State().headers.find(targetImage->headerLocation); - if (it == State().headers.end()) + if (!it) { return false; } - const auto& header = it->second; + const auto& header = *it; auto id = m_dscView->BeginUndoActions(); MutableState().viewState = DSCViewStateLoadedWithImages; @@ -1847,10 +1918,13 @@ bool SharedCache::LoadImageWithInstallName(std::string_view installName, bool sk auto reader = VMReader(vm); reader.Seek(targetImage->headerLocation); - std::vector regionsToLoad; + std::vector regionsToLoad; - for (auto& region : targetImage->regions) + auto newTargetImageRegions = targetImage->regions.transient(); + auto newRegionsMappedIntoMemory = State().regionsMappedIntoMemory.transient(); + for (auto it = targetImage->regions.begin(); it != targetImage->regions.end(); ++it) { + auto& region = *it; bool allowLoadingLinkedit = false; if (settings && settings->Contains("loader.dsc.allowLoadingLinkeditSegments")) allowLoadingLinkedit = settings->Get("loader.dsc.allowLoadingLinkeditSegments", m_dscView); @@ -1872,16 +1946,16 @@ bool SharedCache::LoadImageWithInstallName(std::string_view installName, bool sk m_dscView->GetParentView()->GetParentView()->WriteBuffer(rawViewEnd, buff); m_dscView->GetParentView()->WriteBuffer(rawViewEnd, buff); - region.loaded = true; - region.rawViewOffsetIfLoaded = rawViewEnd; - - MutableState().regionsMappedIntoMemory.push_back(region); + MemoryRegion newRegion(region); + newRegion.loaded = true; + newRegion.rawViewOffsetIfLoaded = rawViewEnd; + newRegionsMappedIntoMemory.push_back(newRegion); + newTargetImageRegions.set(it.index(), std::move(newRegion)); + regionsToLoad.push_back(it.index()); m_dscView->GetParentView()->AddAutoSegment(rawViewEnd, region.size, rawViewEnd, region.size, region.flags); m_dscView->AddUserSegment(region.start, region.size, rawViewEnd, region.size, region.flags); m_dscView->WriteBuffer(region.start, buff); - - regionsToLoad.push_back(®ion); } if (regionsToLoad.empty()) @@ -1890,6 +1964,13 @@ bool SharedCache::LoadImageWithInstallName(std::string_view installName, bool sk return false; } + MutableState().regionsMappedIntoMemory = std::move(newRegionsMappedIntoMemory).persistent(); + auto images = State().images; + CacheImage newTargetImage(*targetImage); + // newTargetImageRegions is intentionally not moved here as it is used again below. + newTargetImage.regions = newTargetImageRegions.persistent(); + MutableState().images = images.set(targetImageIt.index(), std::move(newTargetImage)); + auto typeLib = TypeLibraryForImage(header.installName); SaveToDSCView(); @@ -1900,14 +1981,25 @@ bool SharedCache::LoadImageWithInstallName(std::string_view installName, bool sk return false; } - std::vector regions; - for (auto& region : regionsToLoad) - { - regions.push_back(region); + std::vector regions; + for (size_t idx : regionsToLoad) { + regions.push_back(&newTargetImageRegions[idx]); } SharedCache::InitializeHeader(m_dscView, vm.get(), *h, regions); + { + for (size_t idx : regionsToLoad) { + MemoryRegion newTargetSegment(newTargetImageRegions[idx]); + newTargetSegment.headerInitialized = true; + newTargetImageRegions.set(idx, std::move(newTargetSegment)); + } + auto images = State().images; + CacheImage newTargetImage(*targetImage); + newTargetImage.regions = std::move(newTargetImageRegions).persistent(); + MutableState().images = images.set(targetImageIt.index(), std::move(newTargetImage)); + } + if (!skipObjC) { bool processCFStrings; @@ -1925,9 +2017,96 @@ bool SharedCache::LoadImageWithInstallName(std::string_view installName, bool sk return true; } +struct TransientSharedCacheMachOHeader +{ + uint64_t textBase = 0; + uint64_t loadCommandOffset = 0; + mach_header_64 ident {}; + std::string identifierPrefix; + std::string installName; + + immer::vector_transient> entryPoints; + immer::vector_transient m_entryPoints; // list of entrypoints + + symtab_command symtab {}; + dysymtab_command dysymtab {}; + dyld_info_command dyldInfo {}; + routines_command_64 routines64 {}; + function_starts_command functionStarts {}; + immer::vector_transient moduleInitSections; + linkedit_data_command exportTrie {}; + linkedit_data_command chainedFixups {}; + + uint64_t relocationBase = 0; + // Section and program headers, internally use 64-bit form as it is a superset of 32-bit + immer::vector_transient segments; // only three types of sections __TEXT, __DATA, __IMPORT + segment_command_64 linkeditSegment = {}; + immer::vector_transient sections; + immer::vector_transient sectionNames; + + immer::vector_transient symbolStubSections; + immer::vector_transient symbolPointerSections; + + immer::vector_transient dylibs; + + build_version_command buildVersion = {}; + immer::vector_transient buildToolVersions; + + std::string exportTriePath; + + bool linkeditPresent = false; + bool dysymPresent = false; + bool dyldInfoPresent = false; + bool exportTriePresent = false; + bool chainedFixupsPresent = false; + bool routinesPresent = false; + bool functionStartsPresent = false; + bool relocatable = false; + + SharedCacheMachOHeader persistent() && { + return SharedCacheMachOHeader { + .textBase = textBase, + .loadCommandOffset = loadCommandOffset, + .ident = ident, + .identifierPrefix = std::move(identifierPrefix), + .installName = std::move(installName), + .entryPoints = std::move(entryPoints).persistent(), + .m_entryPoints = std::move(m_entryPoints).persistent(), + .symtab = std::move(symtab), + .dysymtab = std::move(dysymtab), + .dyldInfo = std::move(dyldInfo), + .routines64 = std::move(routines64), + .functionStarts = std::move(functionStarts), + .moduleInitSections = std::move(moduleInitSections).persistent(), + .exportTrie = std::move(exportTrie), + .chainedFixups = std::move(chainedFixups), + .relocationBase = relocationBase, + .segments = std::move(segments).persistent(), + .linkeditSegment = std::move(linkeditSegment), + .sections = std::move(sections).persistent(), + .sectionNames = std::move(sectionNames).persistent(), + .symbolStubSections = std::move(symbolStubSections).persistent(), + .symbolPointerSections = std::move(symbolPointerSections).persistent(), + .dylibs = std::move(dylibs).persistent(), + .buildVersion = std::move(buildVersion), + .buildToolVersions = std::move(buildToolVersions).persistent(), + .exportTriePath = std::move(exportTriePath), + .linkeditPresent = linkeditPresent, + .dysymPresent = dysymPresent, + .dyldInfoPresent = dyldInfoPresent, + .exportTriePresent = exportTriePresent, + .chainedFixupsPresent = chainedFixupsPresent, + .routinesPresent = routinesPresent, + .functionStartsPresent = functionStartsPresent, + .relocatable = relocatable, + }; + } +}; + + std::optional SharedCache::LoadHeaderForAddress(std::shared_ptr vm, uint64_t address, std::string_view installName) { - SharedCacheMachOHeader header; + TransientSharedCacheMachOHeader header; header.textBase = address; header.installName = installName; @@ -2330,11 +2509,11 @@ std::optional SharedCache::LoadHeaderForAddress(std::sha return {}; } - return header; + return std::move(header).persistent(); } void SharedCache::InitializeHeader( - Ref view, VM* vm, SharedCacheMachOHeader header, std::vector regionsToLoad) + Ref view, VM* vm, SharedCacheMachOHeader header, const std::vector regionsToLoad) { WillMutateState(); @@ -2639,7 +2818,7 @@ void SharedCache::InitializeHeader( nlist_64 sym; memset(&sym, 0, sizeof(sym)); auto N_TYPE = 0xE; // idk - std::vector>> symbolInfos; + immer::vector_transient>> symbolInfos; for (size_t i = 0; i < header.symtab.nsyms; i++) { reader->Read(&sym, header.symtab.symoff + i * sizeof(nlist_64), sizeof(nlist_64)); @@ -2706,13 +2885,13 @@ void SharedCache::InitializeHeader( view->DefineAutoSymbol(symbolObj); symbolInfos.push_back({sym.n_value, {type, symbol}}); } - MutableState().symbolInfos[header.textBase] = symbolInfos; + MutableState().symbolInfos = State().symbolInfos.set(header.textBase, std::move(symbolInfos).persistent()); } if (header.exportTriePresent && header.linkeditPresent && vm->AddressIsMapped(header.linkeditSegment.vmaddr)) { auto symbols = SharedCache::ParseExportTrie(vm->MappingAtAddress(header.linkeditSegment.vmaddr).first.fileAccessor->lock(), header); - std::vector>> exportMapping; + immer::vector_transient>> exportMapping; for (const auto& symbol : symbols) { exportMapping.push_back({symbol->GetAddress(), {symbol->GetType(), symbol->GetRawName()}}); @@ -2752,14 +2931,15 @@ void SharedCache::InitializeHeader( else view->DefineAutoSymbol(symbol); } - MutableState().exportInfos[header.textBase] = std::move(exportMapping); + MutableState().exportInfos = State().exportInfos.set(header.textBase, std::move(exportMapping).persistent()); } view->EndBulkModifySymbols(); - for (auto region : regionsToLoad) - { - region->headerInitialized = true; - } + // TODO: The caller is responsible for this for now. + // for (auto region : regionsToLoad) + // { + // region->headerInitialized = true; + // } } struct ExportNode @@ -2773,6 +2953,7 @@ struct ExportNode void SharedCache::ReadExportNode(std::vector>& symbolList, SharedCacheMachOHeader& header, DataBuffer& buffer, uint64_t textBase, const std::string& currentText, size_t cursor, uint32_t endGuard) { + WillMutateState(); if (cursor > endGuard) throw ReadException(); @@ -2874,6 +3055,7 @@ std::vector>> SharedCache::LoadAllSymbolsAndW std::lock_guard initialLoadBlock(m_viewSpecificState->viewOperationsThatInfluenceMetadataMutex); std::vector>> symbols; + auto newExportInfos = State().exportInfos.transient(); for (const auto& img : State().images) { auto header = HeaderForAddress(img.headerLocation); @@ -2887,14 +3069,15 @@ std::vector>> SharedCache::LoadAllSymbolsAndW continue; } auto exportList = SharedCache::ParseExportTrie(mapping, *header); - std::vector>> exportMapping; + immer::vector_transient>> exportMapping; for (const auto& sym : exportList) { exportMapping.push_back({sym->GetAddress(), {sym->GetType(), sym->GetRawName()}}); symbols.push_back({img.installName, sym}); } - MutableState().exportInfos[header->textBase] = std::move(exportMapping); + newExportInfos.set(header->textBase, std::move(exportMapping).persistent()); } + MutableState().exportInfos = std::move(newExportInfos).persistent(); SaveToDSCView(); @@ -2915,9 +3098,9 @@ std::string SharedCache::SerializedImageHeaderForAddress(uint64_t address) std::string SharedCache::SerializedImageHeaderForName(std::string name) { - if (auto it = State().imageStarts.find(name); it != State().imageStarts.end()) + if (auto it = State().imageStarts.find(name)) { - if (auto header = HeaderForAddress(it->second)) + if (auto header = HeaderForAddress(*it)) { return header->AsString(); } @@ -2986,7 +3169,7 @@ void SharedCache::FindSymbolAtAddrAndApplyToAddr( return; } auto exportList = SharedCache::ParseExportTrie(mapping, *header); - std::vector>> exportMapping; + immer::vector_transient>> exportMapping; auto typeLib = TypeLibraryForImage(header->installName); id = m_dscView->BeginUndoActions(); m_dscView->BeginBulkModifySymbols(); @@ -3024,7 +3207,7 @@ void SharedCache::FindSymbolAtAddrAndApplyToAddr( } { std::lock_guard lock(m_viewSpecificState->viewOperationsThatInfluenceMetadataMutex); - MutableState().exportInfos[header->textBase] = std::move(exportMapping); + MutableState().exportInfos = State().exportInfos.set(header->textBase, std::move(exportMapping).persistent()); } m_dscView->EndBulkModifySymbols(); m_dscView->ForgetUndoActions(id); @@ -3056,7 +3239,8 @@ bool SharedCache::SaveToDSCView() } return false; } -std::vector SharedCache::GetMappedRegions() const + +immer::vector SharedCache::GetMappedRegions() const { std::lock_guard lock(m_viewSpecificState->viewOperationsThatInfluenceMetadataMutex); return State().regionsMappedIntoMemory; @@ -3446,10 +3630,11 @@ void Serialize(SerializationContext& context, const dyld_cache_mapping_info& val context.writer.EndArray(); } -void Deserialize(DeserializationContext& context, std::string_view name, std::vector& b) +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) { auto bArr = context.doc[name.data()].GetArray(); + auto transient = b.transient(); for (auto& s : bArr) { dyld_cache_mapping_info mapping; @@ -3459,8 +3644,9 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve mapping.fileOffset = s2[2].GetUint64(); mapping.maxProt = s2[3].GetUint(); mapping.initProt = s2[4].GetUint(); - b.push_back(mapping); + transient.push_back(mapping); } + b = std::move(transient).persistent(); } void SharedCache::Store(SerializationContext& context) const @@ -3554,82 +3740,98 @@ void SharedCache::Load(DeserializationContext& context) MutableState().viewState = static_cast(context.load("m_viewState")); MutableState().cacheFormat = static_cast(context.load("m_cacheFormat")); + auto headers = State().headers.transient(); for (auto& startAndHeader : context.doc["headers"].GetArray()) { SharedCacheMachOHeader header; header.LoadFromValue(startAndHeader); - MutableState().headers[header.textBase] = std::move(header); + headers.set(header.textBase, std::move(header)); } + MutableState().headers = std::move(headers).persistent(); Deserialize(context, "m_imageStarts", MutableState().imageStarts); Deserialize(context, "m_baseFilePath", MutableState().baseFilePath); + auto exportInfos = State().exportInfos.transient(); for (const auto& obj1 : context.doc["exportInfos"].GetArray()) { - std::vector>> innerVec; + immer::vector_transient>> innerVec; for (const auto& obj2 : obj1["value"].GetArray()) { std::pair innerPair = { (BNSymbolType)obj2["val1"].GetUint64(), obj2["val2"].GetString()}; innerVec.push_back({obj2["key"].GetUint64(), innerPair}); } - - MutableState().exportInfos[obj1["key"].GetUint64()] = std::move(innerVec); + exportInfos.set(obj1["key"].GetUint64(), std::move(innerVec).persistent()); } + MutableState().exportInfos = std::move(exportInfos).persistent(); + auto symbolInfos = State().symbolInfos.transient(); for (auto& symbolInfo : context.doc["symbolInfos"].GetArray()) { - std::vector>> - symbolInfos; + immer::vector_transient>> symbolInfoVec; for (auto& si : symbolInfo["value"].GetArray()) { - symbolInfos.push_back({si["key"].GetUint64(), + symbolInfoVec.push_back({si["key"].GetUint64(), {static_cast(si["val1"].GetUint64()), si["val2"].GetString()}}); } - MutableState().symbolInfos[symbolInfo["key"].GetUint64()] = std::move(symbolInfos); + symbolInfos.set(symbolInfo["key"].GetUint64(), std::move(symbolInfoVec).persistent()); } + MutableState().symbolInfos = std::move(symbolInfos).persistent(); + auto backingCaches = State().backingCaches.transient(); for (auto& bcV : context.doc["backingCaches"].GetArray()) { BackingCache bc; bc.LoadFromValue(bcV); - MutableState().backingCaches.push_back(std::move(bc)); + backingCaches.push_back(std::move(bc)); } + MutableState().backingCaches = std::move(backingCaches).persistent(); + auto images = State().images.transient(); for (auto& imgV : context.doc["images"].GetArray()) { CacheImage img; img.LoadFromValue(imgV); - MutableState().images.push_back(std::move(img)); + images.push_back(std::move(img)); } + MutableState().images = std::move(images).persistent(); + auto regionsMappedIntoMemory = State().regionsMappedIntoMemory.transient(); for (auto& rV : context.doc["regionsMappedIntoMemory"].GetArray()) { MemoryRegion r; r.LoadFromValue(rV); - MutableState().regionsMappedIntoMemory.push_back(std::move(r)); + regionsMappedIntoMemory.push_back(std::move(r)); } + MutableState().regionsMappedIntoMemory = std::move(regionsMappedIntoMemory).persistent(); + auto stubIslandRegions = State().stubIslandRegions.transient(); for (auto& siV : context.doc["stubIslands"].GetArray()) { MemoryRegion si; si.LoadFromValue(siV); - MutableState().stubIslandRegions.push_back(std::move(si)); + stubIslandRegions.push_back(std::move(si)); } + MutableState().stubIslandRegions = std::move(stubIslandRegions).persistent(); + auto dyldDataRegions = State().dyldDataRegions.transient(); for (auto& siV : context.doc["dyldDataSections"].GetArray()) { MemoryRegion si; si.LoadFromValue(siV); - MutableState().dyldDataRegions.push_back(std::move(si)); + dyldDataRegions.push_back(std::move(si)); } + MutableState().dyldDataRegions = std::move(dyldDataRegions).persistent(); + auto nonImageRegions = State().nonImageRegions.transient(); for (auto& siV : context.doc["nonImageRegions"].GetArray()) { MemoryRegion si; si.LoadFromValue(siV); - MutableState().nonImageRegions.push_back(std::move(si)); + nonImageRegions.push_back(std::move(si)); } + MutableState().nonImageRegions = std::move(nonImageRegions).persistent(); m_metadataValid = true; } @@ -3676,7 +3878,7 @@ void SharedCache::WillMutateState() } -const std::vector& SharedCache::BackingCaches() const +const immer::vector& SharedCache::BackingCaches() const { return State().backingCaches; } @@ -3686,12 +3888,12 @@ DSCViewState SharedCache::ViewState() const return State().viewState; } -const std::unordered_map& SharedCache::AllImageStarts() const +const immer::map& SharedCache::AllImageStarts() const { return State().imageStarts; } -const std::unordered_map& SharedCache::AllImageHeaders() const +const immer::map& SharedCache::AllImageHeaders() const { return State().headers; } diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 053473bf7a..8fb70ad1b9 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -8,6 +8,10 @@ #include "view/macho/machoview.h" #include "MetadataSerializable.hpp" #include "../api/sharedcachecore.h" +#include "immer/map.hpp" +#include "immer/vector.hpp" +#include "immer/vector_transient.hpp" +#include "immer/map_transient.hpp" #ifndef SHAREDCACHE_SHAREDCACHE_H #define SHAREDCACHE_SHAREDCACHE_H @@ -60,7 +64,7 @@ namespace SharedCacheCore { { std::string installName; uint64_t headerLocation; - std::vector regions; + immer::vector regions; void Store(SerializationContext& context) const { @@ -80,13 +84,14 @@ namespace SharedCacheCore { MSL(installName); MSL(headerLocation); auto bArr = context.doc["regions"].GetArray(); - regions.clear(); + auto local_regions = immer::vector_transient(); for (auto& region : bArr) { MemoryRegion r; r.LoadFromString(region.GetString()); - regions.push_back(r); + local_regions.push_back(r); } + regions = local_regions.persistent(); } }; @@ -115,7 +120,7 @@ namespace SharedCacheCore { { std::string path; bool isPrimary = false; - std::vector mappings; + immer::vector mappings; void Store(SerializationContext& context) const; void Load(DeserializationContext& context); @@ -378,36 +383,36 @@ namespace SharedCacheCore { { uint64_t textBase = 0; uint64_t loadCommandOffset = 0; - mach_header_64 ident; + mach_header_64 ident {}; std::string identifierPrefix; std::string installName; - std::vector> entryPoints; - std::vector m_entryPoints; // list of entrypoints + immer::vector> entryPoints; + immer::vector m_entryPoints; // list of entrypoints - symtab_command symtab; - dysymtab_command dysymtab; - dyld_info_command dyldInfo; - routines_command_64 routines64; - function_starts_command functionStarts; - std::vector moduleInitSections; - linkedit_data_command exportTrie; + symtab_command symtab {}; + dysymtab_command dysymtab {}; + dyld_info_command dyldInfo {}; + routines_command_64 routines64 {}; + function_starts_command functionStarts {}; + immer::vector moduleInitSections; + linkedit_data_command exportTrie {}; linkedit_data_command chainedFixups {}; uint64_t relocationBase; // Section and program headers, internally use 64-bit form as it is a superset of 32-bit - std::vector segments; // only three types of sections __TEXT, __DATA, __IMPORT - segment_command_64 linkeditSegment; - std::vector sections; - std::vector sectionNames; + immer::vector segments; // only three types of sections __TEXT, __DATA, __IMPORT + segment_command_64 linkeditSegment {}; + immer::vector sections; + immer::vector sectionNames; - std::vector symbolStubSections; - std::vector symbolPointerSections; + immer::vector symbolStubSections; + immer::vector symbolPointerSections; - std::vector dylibs; + immer::vector dylibs; - build_version_command buildVersion; - std::vector buildToolVersions; + build_version_command buildVersion {}; + immer::vector buildToolVersions; std::string exportTriePath; @@ -594,20 +599,20 @@ namespace SharedCacheCore { std::string ImageNameForAddress(uint64_t address); std::vector GetAvailableImages(); - std::vector GetMappedRegions() const; + immer::vector GetMappedRegions() const; bool IsMemoryMapped(uint64_t address); std::vector>> LoadAllSymbolsAndWait(); - const std::unordered_map& AllImageStarts() const; - const std::unordered_map& AllImageHeaders() const; + const immer::map& AllImageStarts() const; + const immer::map& AllImageHeaders() const; std::string SerializedImageHeaderForAddress(uint64_t address); std::string SerializedImageHeaderForName(std::string name); void FindSymbolAtAddrAndApplyToAddr(uint64_t symbolLocation, uint64_t targetLocation, bool triggerReanalysis); - const std::vector& BackingCaches() const; + const immer::vector& BackingCaches() const; DSCViewState ViewState() const; @@ -620,7 +625,7 @@ namespace SharedCacheCore { std::optional LoadHeaderForAddress( std::shared_ptr vm, uint64_t address, std::string_view installName); void InitializeHeader( - Ref view, VM* vm, SharedCacheMachOHeader header, std::vector regionsToLoad); + Ref view, VM* vm, SharedCacheMachOHeader header, const std::vector regionsToLoad); void ReadExportNode(std::vector>& symbolList, SharedCacheMachOHeader& header, DataBuffer& buffer, uint64_t textBase, const std::string& currentText, size_t cursor, uint32_t endGuard); std::vector> ParseExportTrie( diff --git a/view/sharedcache/workflow/CMakeLists.txt b/view/sharedcache/workflow/CMakeLists.txt index 7a9c157aa4..2310abe633 100644 --- a/view/sharedcache/workflow/CMakeLists.txt +++ b/view/sharedcache/workflow/CMakeLists.txt @@ -65,7 +65,7 @@ message(STATUS "RCD: ${BN_REF_COUNT_DEBUG}") get_recursive_include_dirs(binaryninjaapi INCLUDES) target_include_directories(sharedcacheworkflow - PUBLIC ${PROJECT_SOURCE_DIR} ${INCLUDES}) + PUBLIC ${PROJECT_SOURCE_DIR} ${INCLUDES} ${BN_API_PATH}/vendor/immer) set_target_properties(sharedcacheworkflow PROPERTIES CXX_STANDARD 17 From 8c82c59200f7fadd51cd0ce38aa6bf9fd9792c35 Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Fri, 22 Nov 2024 19:04:37 -0800 Subject: [PATCH 35/35] [SharedCache] Simplify MMappedFileAccessor::Read* methods This change is mostly motivated by simplifying the code, but it also brings minor correctness and performance benefits. 1. The pointer returned by mmap is stored as a uint8_t* rather than void* as that is how it is used. This reduces how often it needs to be cast to a different type before it is used. 2. Read methods for primitives delegate to a new Read template function that in turn delegates to the general-purpose `Read(void* dest, size_t address, size_t length)`. This improves the consistency of bounds checking and simplifies the code. The compiler is more than willing to inline this so we get less repetition with no overhead. 3. ReadNullTermString now uses std::find to find the nul byte and directly constructs the string from that range of bytes. This removes an unnecessary allocation that was previously being forced by the use reserve followed by shrink_to_fit. It also avoids repeated reallocation for longer strings as they grew past the the reserved size as they were being built up a character at a time. --- view/sharedcache/core/VM.cpp | 83 ++++++++++++++---------------------- view/sharedcache/core/VM.h | 6 ++- 2 files changed, 35 insertions(+), 54 deletions(-) diff --git a/view/sharedcache/core/VM.cpp b/view/sharedcache/core/VM.cpp index 5aca51d908..7ba94b24bd 100644 --- a/view/sharedcache/core/VM.cpp +++ b/view/sharedcache/core/VM.cpp @@ -149,12 +149,12 @@ void MMAP::Map() return; } - _mmap = MapViewOfFile( + _mmap = static_cast(MapViewOfFile( hMapping, // handle to the file mapping object FILE_MAP_COPY, // desired access 0, // file offset (high-order DWORD) 0, // file offset (low-order DWORD) - 0); // number of bytes to map (0 = entire file) + 0)); // number of bytes to map (0 = entire file) if (_mmap == nullptr) { @@ -174,13 +174,14 @@ void MMAP::Map() len = ftell(fd); fseek(fd, 0L, SEEK_SET); - _mmap = mmap(nullptr, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileno(fd), 0u); - if (_mmap == MAP_FAILED) + void *result = mmap(nullptr, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileno(fd), 0u); + if (result == MAP_FAILED) { // Handle error return; } + _mmap = static_cast(result); mapped = true; #endif } @@ -374,102 +375,80 @@ MMappedFileAccessor::~MMappedFileAccessor() void MMappedFileAccessor::WritePointer(size_t address, size_t pointer) { - ((size_t*)(&((uint8_t*)m_mmap._mmap)[address]))[0] = pointer; + *(size_t*)&m_mmap._mmap[address] = pointer; +} + +template +T MMappedFileAccessor::Read(size_t address) { + T result; + Read(&result, address, sizeof(T)); + return result; } std::string MMappedFileAccessor::ReadNullTermString(size_t address) { if (address > m_mmap.len) return ""; - size_t max = m_mmap.len; - size_t i = address; - std::string str; - str.reserve(140); - while (i < max) - { - char c = ((char*)(&((uint8_t*)m_mmap._mmap)[i]))[0]; - if (c == 0) - break; - str += c; - i++; - } - str.shrink_to_fit(); - return str; + auto start = &m_mmap._mmap[address]; + auto end = &m_mmap._mmap[m_mmap.len]; + auto nul = std::find(start, end, 0); + return std::string(start, nul); } uint8_t MMappedFileAccessor::ReadUChar(size_t address) { - if (address > m_mmap.len) - throw MappingReadException(); - return ((uint8_t*)(&(((uint8_t*)m_mmap._mmap)[address])))[0]; + return Read(address); } int8_t MMappedFileAccessor::ReadChar(size_t address) { - if (address > m_mmap.len) - throw MappingReadException(); - return ((int8_t*)(&(((uint8_t*)m_mmap._mmap)[address])))[0]; + return Read(address); } uint16_t MMappedFileAccessor::ReadUShort(size_t address) { - if (address > m_mmap.len) - throw MappingReadException(); - return ((uint16_t*)(&(((uint8_t*)m_mmap._mmap)[address])))[0]; + return Read(address); } int16_t MMappedFileAccessor::ReadShort(size_t address) { - if (address > m_mmap.len) - throw MappingReadException(); - return ((int16_t*)(&(((uint8_t*)m_mmap._mmap)[address])))[0]; + return Read(address); } uint32_t MMappedFileAccessor::ReadUInt32(size_t address) { - if (address > m_mmap.len) - throw MappingReadException(); - return ((uint32_t*)(&(((uint8_t*)m_mmap._mmap)[address])))[0]; + return Read(address); } int32_t MMappedFileAccessor::ReadInt32(size_t address) { - if (address > m_mmap.len) - throw MappingReadException(); - return ((int32_t*)(&(((uint8_t*)m_mmap._mmap)[address])))[0]; + return Read(address); } uint64_t MMappedFileAccessor::ReadULong(size_t address) { - if (address > m_mmap.len) - throw MappingReadException(); - return ((uint64_t*)(&(((uint8_t*)m_mmap._mmap)[address])))[0]; + return Read(address); } int64_t MMappedFileAccessor::ReadLong(size_t address) { - if (address > m_mmap.len) - throw MappingReadException(); - return ((int64_t*)(&(((uint8_t*)m_mmap._mmap)[address])))[0]; + return Read(address); } BinaryNinja::DataBuffer MMappedFileAccessor::ReadBuffer(size_t address, size_t length) { - if (address > m_mmap.len) + if (m_mmap.len <= length || address > m_mmap.len - length) throw MappingReadException(); - if (address + length > m_mmap.len) - throw MappingReadException(); - void* data = (void*)(&(((uint8_t*)m_mmap._mmap)[address])); - return BinaryNinja::DataBuffer(data, length); + + return BinaryNinja::DataBuffer(&m_mmap._mmap[address], length); } void MMappedFileAccessor::Read(void* dest, size_t address, size_t length) { - if (address > m_mmap.len) - throw MappingReadException(); - if (address + length > m_mmap.len) + if (m_mmap.len <= length || address > m_mmap.len - length) throw MappingReadException(); - memcpy(dest, (void*)&(((uint8_t*)m_mmap._mmap)[address]), length); + + memcpy(dest, &m_mmap._mmap[address], length); } diff --git a/view/sharedcache/core/VM.h b/view/sharedcache/core/VM.h index e47cf15ef4..45f1530deb 100644 --- a/view/sharedcache/core/VM.h +++ b/view/sharedcache/core/VM.h @@ -89,7 +89,7 @@ class MMappedFileAccessor; class MMAP { friend MMappedFileAccessor; - void *_mmap; + uint8_t *_mmap; FILE *fd; size_t len; @@ -175,8 +175,10 @@ class MMappedFileAccessor { BinaryNinja::DataBuffer ReadBuffer(size_t addr, size_t length); void Read(void *dest, size_t addr, size_t length); -}; + template + T Read(size_t address); +}; struct PageMapping { std::string filePath;