From 678d570ae7e851492133cd048dc60d972348aa69 Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Wed, 13 Nov 2024 18:40:37 -0800 Subject: [PATCH 1/7] [SharedCache] Use basic copy-on-write for viewStateCache Copying the state from the cache into a new `SharedCache` object is done with a global lock held and is so expensive that it results in much of the shared cache analysis running on a single thread, with others blocked waiting to acquire the lock. The cache now holds a `std::shared_ptr` to the state. New `SharedCache` objects take a reference to the cached state and only create their own copy of it the first time they perform an operation that would mutate it. The cached copy is never mutated, only replaced, so there is no danger of modifying the state out from under a `SharedCache` object. Since the copy happens at first mutation, it is performed without any global locks held. This avoids blocking other threads. This cuts the initial load time of a macOS shared cache from 3 minutes to 70 seconds, and cuts the time taken to load and analyze AppKit from multiple hours to around 14 minutes. --- view/sharedcache/core/SharedCache.cpp | 450 ++++++++++++++------------ view/sharedcache/core/SharedCache.h | 54 ++-- 2 files changed, 274 insertions(+), 230 deletions(-) diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index 652365985a..5f0a304c9d 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -53,30 +53,33 @@ int count_trailing_zeros(uint64_t value) { } #endif -struct ViewStateCacheStore { - SharedCache::SharedCacheFormat m_cacheFormat; +struct SharedCache::State +{ + std::unordered_map>>> + exportInfos; + std::unordered_map>>> + symbolInfos; - DSCViewState m_viewState; + std::unordered_map imageStarts; + std::unordered_map headers; - std::unordered_map m_imageStarts; - std::unordered_map m_headers; + std::vector images; - std::vector m_images; - std::vector m_regionsMappedIntoMemory; + std::vector regionsMappedIntoMemory; - std::vector m_backingCaches; - std::vector m_stubIslandRegions; // TODO honestly both of these should be refactored into nonImageRegions. :p - std::vector m_dyldDataRegions; - std::vector m_nonImageRegions; + std::vector backingCaches; - std::string m_baseFilePath; + std::vector stubIslandRegions; // TODO honestly both of these should be refactored into nonImageRegions. :p + std::vector dyldDataRegions; + std::vector nonImageRegions; - std::unordered_map>>> m_exportInfos; - std::unordered_map>>> m_symbolInfos; + std::string baseFilePath; + SharedCacheFormat cacheFormat; + DSCViewState viewState = DSCViewStateUnloaded; }; static std::recursive_mutex viewStateMutex; -static std::unordered_map viewStateCache; +static std::unordered_map> viewStateCache; std::mutex progressMutex; std::unordered_map progressMap; @@ -221,7 +224,9 @@ void SharedCache::PerformInitialLoad() progressMap[m_dscView->GetFile()->GetSessionId()] = LoadProgressLoadingCaches; progressMutex.unlock(); - m_baseFilePath = path; + WillMutateState(); + + MutableState().baseFilePath = path; DataBuffer sig = baseFile->ReadBuffer(0, 4); if (sig.GetLength() != 4) @@ -230,14 +235,14 @@ void SharedCache::PerformInitialLoad() if (strncmp(magic, "dyld", 4) != 0) abort(); - m_cacheFormat = RegularCacheFormat; + MutableState().cacheFormat = RegularCacheFormat; dyld_cache_header primaryCacheHeader {}; size_t header_size = baseFile->ReadUInt32(16); baseFile->Read(&primaryCacheHeader, 0, std::min(header_size, sizeof(dyld_cache_header))); if (primaryCacheHeader.imagesCountOld != 0) - m_cacheFormat = RegularCacheFormat; + MutableState().cacheFormat = RegularCacheFormat; size_t subCacheOff = offsetof(struct dyld_cache_header, subCacheArrayOffset); size_t headerEnd = primaryCacheHeader.mappingOffset; @@ -246,15 +251,15 @@ void SharedCache::PerformInitialLoad() if (primaryCacheHeader.cacheType != 2) { if (std::filesystem::exists(ResolveFilePath(m_dscView, baseFile->Path() + ".01"))) - m_cacheFormat = LargeCacheFormat; + MutableState().cacheFormat = LargeCacheFormat; else - m_cacheFormat = SplitCacheFormat; + MutableState().cacheFormat = SplitCacheFormat; } else - m_cacheFormat = iOS16CacheFormat; + MutableState().cacheFormat = iOS16CacheFormat; } - switch (m_cacheFormat) + switch (State().cacheFormat) { case RegularCacheFormat: { @@ -272,7 +277,7 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.second.second = mapping.size; cache.mappings.push_back(mapRawToAddrAndSize); } - m_backingCaches.push_back(cache); + MutableState().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; @@ -280,7 +285,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffsetOld + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + MutableState().imageStarts[iname] = img.address; } m_logger->LogInfo("Found %d images in the shared cache", primaryCacheHeader.imagesCountOld); @@ -312,7 +317,7 @@ void SharedCache::PerformInitialLoad() std::string segNameStr = std::string(segName); stubIslandRegion.prettyName = "dyld_shared_cache_branch_islands_" + std::to_string(i) + "::" + segNameStr; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - m_stubIslandRegions.push_back(stubIslandRegion); + MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); } } } @@ -338,9 +343,9 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = mapping.fileOffset; mapRawToAddrAndSize.second.first = mapping.address; mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(mapRawToAddrAndSize); + cache.mappings.push_back(std::move(mapRawToAddrAndSize)); } - m_backingCaches.push_back(cache); + MutableState().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; @@ -348,7 +353,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + MutableState().imageStarts[iname] = img.address; } if (primaryCacheHeader.branchPoolsCount) @@ -356,7 +361,8 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - m_imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + MutableState().imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = + baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); } } std::string mainFileName = base_name(path); @@ -413,7 +419,7 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = subCacheMapping.fileOffset; mapRawToAddrAndSize.second.first = subCacheMapping.address; mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(mapRawToAddrAndSize); + subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); } if (subCacheHeader.mappingCount == 1 && subCacheHeader.imagesCountOld == 0 && subCacheHeader.imagesCount == 0 @@ -427,10 +433,10 @@ void SharedCache::PerformInitialLoad() stubIslandRegion.size = size; stubIslandRegion.prettyName = subCacheFilename + "::_stubs"; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - m_stubIslandRegions.push_back(stubIslandRegion); + MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); } - m_backingCaches.push_back(subCache); + MutableState().backingCaches.push_back(std::move(subCache)); } break; } @@ -449,9 +455,9 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = mapping.fileOffset; mapRawToAddrAndSize.second.first = mapping.address; mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(mapRawToAddrAndSize); + cache.mappings.push_back(std::move(mapRawToAddrAndSize)); } - m_backingCaches.push_back(cache); + MutableState().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; @@ -459,7 +465,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + MutableState().imageStarts[iname] = img.address; } if (primaryCacheHeader.branchPoolsCount) @@ -467,7 +473,8 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - m_imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + MutableState().imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = + baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); } } @@ -508,10 +515,10 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = subCacheMapping.fileOffset; mapRawToAddrAndSize.second.first = subCacheMapping.address; mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(mapRawToAddrAndSize); + subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); } - m_backingCaches.push_back(subCache); + MutableState().backingCaches.push_back(std::move(subCache)); if (subCacheHeader.mappingCount == 1 && subCacheHeader.imagesCountOld == 0 && subCacheHeader.imagesCount == 0 && subCacheHeader.imagesTextOffset == 0) @@ -524,7 +531,7 @@ void SharedCache::PerformInitialLoad() stubIslandRegion.size = size; stubIslandRegion.prettyName = subCacheFilename + "::_stubs"; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - m_stubIslandRegions.push_back(stubIslandRegion); + MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); } } @@ -554,10 +561,10 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = subCacheMapping.fileOffset; mapRawToAddrAndSize.second.first = subCacheMapping.address; mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(mapRawToAddrAndSize); + subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); } - m_backingCaches.push_back(subCache); + MutableState().backingCaches.push_back(std::move(subCache)); break; } case iOS16CacheFormat: @@ -575,10 +582,10 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = mapping.fileOffset; mapRawToAddrAndSize.second.first = mapping.address; mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(mapRawToAddrAndSize); + cache.mappings.push_back(std::move(mapRawToAddrAndSize)); } - m_backingCaches.push_back(cache); + MutableState().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; @@ -586,7 +593,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + MutableState().imageStarts[iname] = img.address; } if (primaryCacheHeader.branchPoolsCount) @@ -594,7 +601,8 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - m_imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + MutableState().imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = + baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); } } @@ -657,7 +665,7 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = subCacheMapping.fileOffset; mapRawToAddrAndSize.second.first = subCacheMapping.address; mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(mapRawToAddrAndSize); + subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); if (subCachePath.find(".dylddata") != std::string::npos) { @@ -669,11 +677,11 @@ void SharedCache::PerformInitialLoad() dyldDataRegion.size = size; dyldDataRegion.prettyName = subCacheFilename + "::_data" + std::to_string(j); dyldDataRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable); - m_dyldDataRegions.push_back(dyldDataRegion); + MutableState().dyldDataRegions.push_back(std::move(dyldDataRegion)); } } - m_backingCaches.push_back(subCache); + MutableState().backingCaches.push_back(std::move(subCache)); if (subCacheHeader.mappingCount == 1 && subCacheHeader.imagesCountOld == 0 && subCacheHeader.imagesCount == 0 && subCacheHeader.imagesTextOffset == 0) @@ -686,7 +694,7 @@ void SharedCache::PerformInitialLoad() stubIslandRegion.size = size; stubIslandRegion.prettyName = subCacheFilename + "::_stubs"; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - m_stubIslandRegions.push_back(stubIslandRegion); + MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); } } @@ -718,10 +726,10 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = subCacheMapping.fileOffset; mapRawToAddrAndSize.second.first = subCacheMapping.address; mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(mapRawToAddrAndSize); + subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); } - m_backingCaches.push_back(subCache); + MutableState().backingCaches.push_back(std::move(subCache)); } catch (...) {} @@ -741,7 +749,7 @@ void SharedCache::PerformInitialLoad() m_logger->LogError("Failed to map VM pages for Shared Cache on initial load, this is fatal."); return; } - for (const auto &start : m_imageStarts) + for (const auto& start : State().imageStarts) { try { auto imageHeader = SharedCache::LoadHeaderForAddress(vm, start.second, start.first); @@ -752,7 +760,7 @@ void SharedCache::PerformInitialLoad() auto mapping = vm->MappingAtAddress(imageHeader->linkeditSegment.vmaddr); imageHeader->exportTriePath = mapping.first.filePath; } - m_headers[start.second] = imageHeader.value(); + MutableState().headers[start.second] = imageHeader.value(); CacheImage image; image.installName = start.first; image.headerLocation = start.second; @@ -781,14 +789,14 @@ void SharedCache::PerformInitialLoad() // if we're positive we have an entry point for some reason, force the segment // executable. this helps with kernel images. - for (auto &entryPoint: imageHeader->m_entryPoints) + for (auto &entryPoint : imageHeader->m_entryPoints) if (segment.vmaddr <= entryPoint && (entryPoint < (segment.vmaddr + segment.filesize))) flags |= SegmentExecutable; sectionRegion.flags = (BNSegmentFlag)flags; image.regions.push_back(sectionRegion); } - m_images.push_back(image); + MutableState().images.push_back(image); } else { @@ -801,9 +809,9 @@ void SharedCache::PerformInitialLoad() } } - m_logger->LogInfo("Loaded %d Mach-O headers", m_headers.size()); + m_logger->LogInfo("Loaded %d Mach-O headers", State().headers.size()); - for (const auto& cache : m_backingCaches) + for (const auto& cache : State().backingCaches) { size_t i = 0; for (const auto& mapping : cache.mappings) @@ -814,15 +822,15 @@ void SharedCache::PerformInitialLoad() region.prettyName = base_name(cache.path) + "::" + std::to_string(i); // FIXME flags!!! BackingCache.mapping needs refactored to store this information! region.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - m_nonImageRegions.push_back(region); + MutableState().nonImageRegions.push_back(std::move(region)); i++; } } // Iterate through each Mach-O header - if (!m_dyldDataRegions.empty()) + if (!State().dyldDataRegions.empty()) { - for (const auto& [headerKey, header] : m_headers) + for (const auto& [headerKey, header] : State().headers) { // Iterate through each segment of the header for (const auto& segment : header.segments) @@ -831,7 +839,7 @@ void SharedCache::PerformInitialLoad() uint64_t segmentEnd = segmentStart + segment.vmsize; // Iterate through each region in m_dyldDataRegions - for (auto it = m_dyldDataRegions.begin(); it != m_dyldDataRegions.end();) + for (auto it = State().dyldDataRegions.begin(); it != State().dyldDataRegions.end();) { uint64_t regionStart = it->start; uint64_t regionSize = it->size; @@ -864,12 +872,12 @@ void SharedCache::PerformInitialLoad() } // Erase the original region - it = m_dyldDataRegions.erase(it); + it = MutableState().dyldDataRegions.erase(it); // Insert the new regions (if any) for (const auto& newRegion : newRegions) { - it = m_dyldDataRegions.insert(it, newRegion); + it = MutableState().dyldDataRegions.insert(it, newRegion); ++it; // Move iterator to the next position } } @@ -883,9 +891,9 @@ void SharedCache::PerformInitialLoad() } // Iterate through each Mach-O header - if (!m_nonImageRegions.empty()) + if (!State().nonImageRegions.empty()) { - for (const auto& [headerKey, header] : m_headers) + for (const auto& [headerKey, header] : State().headers) { // Iterate through each segment of the header for (const auto& segment : header.segments) @@ -894,7 +902,7 @@ void SharedCache::PerformInitialLoad() uint64_t segmentEnd = segmentStart + segment.vmsize; // Iterate through each region in m_dyldDataRegions - for (auto it = m_nonImageRegions.begin(); it != m_nonImageRegions.end();) + for (auto it = State().nonImageRegions.begin(); it != State().nonImageRegions.end();) { uint64_t regionStart = it->start; uint64_t regionSize = it->size; @@ -927,12 +935,12 @@ void SharedCache::PerformInitialLoad() } // Erase the original region - it = m_nonImageRegions.erase(it); + it = MutableState().nonImageRegions.erase(it); // Insert the new regions (if any) for (const auto& newRegion : newRegions) { - it = m_nonImageRegions.insert(it, newRegion); + it = MutableState().nonImageRegions.insert(it, newRegion); ++it; // Move iterator to the next position } } @@ -959,7 +967,7 @@ std::shared_ptr SharedCache::GetVMMap(bool mapPages) if (mapPages) { - for (const auto& cache : m_backingCaches) + for (const auto& cache : State().backingCaches) { for (const auto& mapping : cache.mappings) { @@ -980,22 +988,10 @@ void SharedCache::DeserializeFromRawView() if (m_dscView->QueryMetadata(SharedCacheMetadataTag)) { std::unique_lock viewStateCacheLock(viewStateMutex); - if (viewStateCache.find(m_dscView->GetFile()->GetSessionId()) != viewStateCache.end()) - { - auto c = viewStateCache[m_dscView->GetFile()->GetSessionId()]; - m_imageStarts = c.m_imageStarts; - m_cacheFormat = c.m_cacheFormat; - m_backingCaches = c.m_backingCaches; - m_viewState = c.m_viewState; - m_headers = c.m_headers; - m_images = c.m_images; - m_regionsMappedIntoMemory = c.m_regionsMappedIntoMemory; - m_stubIslandRegions = c.m_stubIslandRegions; - m_dyldDataRegions = c.m_dyldDataRegions; - m_nonImageRegions = c.m_nonImageRegions; - m_baseFilePath = c.m_baseFilePath; - m_exportInfos = c.m_exportInfos; - m_symbolInfos = c.m_symbolInfos; + if (auto it = viewStateCache.find(m_dscView->GetFile()->GetSessionId()); it != viewStateCache.end()) + { + m_state = it->second; + m_stateIsShared = true; m_metadataValid = true; } else @@ -1005,14 +1001,16 @@ void SharedCache::DeserializeFromRawView() if (!m_metadataValid) { m_logger->LogError("Failed to deserialize Shared Cache metadata"); - m_viewState = DSCViewStateUnloaded; + WillMutateState(); + MutableState().viewState = DSCViewStateUnloaded; } } else { m_metadataValid = true; - m_viewState = DSCViewStateUnloaded; - m_images.clear(); // fixme ?? + WillMutateState(); + MutableState().viewState = DSCViewStateUnloaded; + MutableState().images.clear(); // fixme ?? } } @@ -1029,12 +1027,14 @@ void SharedCache::ParseAndApplySlideInfoForFile(std::shared_ptrSlideInfoWasApplied()) return; + + WillMutateState(); std::vector> rewrites; dyld_cache_header baseHeader; file->Read(&baseHeader, 0, sizeof(dyld_cache_header)); uint64_t base = UINT64_MAX; - for (const auto& backingCache : m_backingCaches) + for (const auto& backingCache : State().backingCaches) { for (const auto& mapping : backingCache.mappings) { @@ -1365,42 +1365,39 @@ SharedCache::SharedCache(BinaryNinja::Ref dscView) : m_ DeserializeFromRawView(); if (!m_metadataValid) return; - if (m_viewState == DSCViewStateUnloaded) + if (State().viewState == DSCViewStateUnloaded) { - if (m_viewState == DSCViewStateUnloaded) + std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); + try { + PerformInitialLoad(); + } + catch (...) { - std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); - try { - PerformInitialLoad(); - } - catch (...) - { - m_logger->LogError("Failed to perform initial load of Shared Cache"); - } + m_logger->LogError("Failed to perform initial load of Shared Cache"); + } - auto settings = m_dscView->GetLoadSettings(VIEW_NAME); - bool autoLoadLibsystem = true; - if (settings && settings->Contains("loader.dsc.autoLoadLibSystem")) - { - autoLoadLibsystem = settings->Get("loader.dsc.autoLoadLibSystem", m_dscView); - } - if (autoLoadLibsystem) + auto settings = m_dscView->GetLoadSettings(VIEW_NAME); + bool autoLoadLibsystem = true; + if (settings && settings->Contains("loader.dsc.autoLoadLibSystem")) + { + autoLoadLibsystem = settings->Get("loader.dsc.autoLoadLibSystem", m_dscView); + } + if (autoLoadLibsystem) + { + for (const auto& [_, header] : State().headers) { - for (const auto& [_, header] : m_headers) + if (header.installName.find("libsystem_c.dylib") != std::string::npos) { - if (header.installName.find("libsystem_c.dylib") != std::string::npos) - { - lock.unlock(); - m_logger->LogInfo("Loading core libsystem_c.dylib library"); - LoadImageWithInstallName(header.installName); - lock.lock(); - break; - } + lock.unlock(); + m_logger->LogInfo("Loading core libsystem_c.dylib library"); + LoadImageWithInstallName(header.installName); + lock.lock(); + break; } } - m_viewState = DSCViewStateLoaded; - SaveToDSCView(); } + MutableState().viewState = DSCViewStateLoaded; + SaveToDSCView(); } else { @@ -1430,7 +1427,7 @@ SharedCache* SharedCache::GetFromDSCView(BinaryNinja::Ref SharedCache::GetImageStart(std::string installName) { - for (const auto& [name, start] : m_imageStarts) + for (const auto& [name, start] : State().imageStarts) { if (name == installName) { @@ -1445,7 +1442,7 @@ std::optional SharedCache::HeaderForAddress(uint64_t add // We _could_ mark each page with the image start? :grimacing emoji: // But that'd require mapping pages :grimacing emoji: :grimacing emoji: // There's not really any other hacks that could make this faster, that I can think of... - for (const auto& [start, header] : m_headers) + for (const auto& [start, header] : State().headers) { for (const auto& segment : header.segments) { @@ -1460,21 +1457,21 @@ std::optional SharedCache::HeaderForAddress(uint64_t add std::string SharedCache::NameForAddress(uint64_t address) { - for (const auto& stubIsland : m_stubIslandRegions) + for (const auto& stubIsland : State().stubIslandRegions) { if (stubIsland.start <= address && stubIsland.start + stubIsland.size > address) { return stubIsland.prettyName; } } - for (const auto& dyldData : m_dyldDataRegions) + for (const auto& dyldData : State().dyldDataRegions) { if (dyldData.start <= address && dyldData.start + dyldData.size > address) { return dyldData.prettyName; } } - for (const auto& nonImageRegion : m_nonImageRegions) + for (const auto& nonImageRegion : State().nonImageRegions) { if (nonImageRegion.start <= address && nonImageRegion.start + nonImageRegion.size > address) { @@ -1508,7 +1505,7 @@ std::string SharedCache::ImageNameForAddress(uint64_t address) bool SharedCache::LoadImageContainingAddress(uint64_t address) { - for (const auto& [start, header] : m_headers) + for (const auto& [start, header] : State().headers) { for (const auto& segment : header.segments) { @@ -1526,6 +1523,8 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) { std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); DeserializeFromRawView(); + WillMutateState(); + auto vm = GetVMMap(); if (!vm) { @@ -1537,13 +1536,13 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) CacheImage* targetImage = nullptr; MemoryRegion* targetSegment = nullptr; - for (auto& image : m_images) + for (auto& image : MutableState().images) { for (auto& region : image.regions) { if (region.start <= address && region.start + region.size > address) { - targetHeader = m_headers[image.headerLocation]; + targetHeader = MutableState().headers[image.headerLocation]; targetImage = ℑ targetSegment = ®ion; break; @@ -1554,7 +1553,7 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } if (!targetSegment) { - for (auto& stubIsland : m_stubIslandRegions) + for (auto& stubIsland : MutableState().stubIslandRegions) { if (stubIsland.start <= address && stubIsland.start + stubIsland.size > address) { @@ -1583,7 +1582,7 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) stubIsland.rawViewOffsetIfLoaded = rawViewEnd; - m_regionsMappedIntoMemory.push_back(stubIsland); + MutableState().regionsMappedIntoMemory.push_back(stubIsland); SaveToDSCView(); @@ -1594,7 +1593,7 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } } - for (auto& dyldData : m_dyldDataRegions) + for (auto& dyldData : MutableState().dyldDataRegions) { if (dyldData.start <= address && dyldData.start + dyldData.size > address) { @@ -1622,7 +1621,7 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) dyldData.loaded = true; dyldData.rawViewOffsetIfLoaded = rawViewEnd; - m_regionsMappedIntoMemory.push_back(dyldData); + MutableState().regionsMappedIntoMemory.push_back(dyldData); SaveToDSCView(); @@ -1633,7 +1632,7 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } } - for (auto& region : m_nonImageRegions) + for (auto& region : MutableState().nonImageRegions) { if (region.start <= address && region.start + region.size > address) { @@ -1660,7 +1659,7 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) region.loaded = true; region.rawViewOffsetIfLoaded = rawViewEnd; - m_regionsMappedIntoMemory.push_back(region); + MutableState().regionsMappedIntoMemory.push_back(region); SaveToDSCView(); @@ -1696,7 +1695,7 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) targetSegment->loaded = true; targetSegment->rawViewOffsetIfLoaded = rawViewEnd; - m_regionsMappedIntoMemory.push_back(*targetSegment); + MutableState().regionsMappedIntoMemory.push_back(*targetSegment); SaveToDSCView(); @@ -1720,12 +1719,14 @@ bool SharedCache::LoadImageWithInstallName(std::string installName) std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); DeserializeFromRawView(); + WillMutateState(); + m_logger->LogInfo("Loading image %s", installName.c_str()); auto vm = GetVMMap(); CacheImage* targetImage = nullptr; - for (auto& cacheImage : m_images) + for (auto& cacheImage : MutableState().images) { if (cacheImage.installName == installName) { @@ -1733,11 +1734,15 @@ bool SharedCache::LoadImageWithInstallName(std::string installName) break; } } - - auto header = m_headers[targetImage->headerLocation]; + auto it = State().headers.find(targetImage->headerLocation); + if (it == State().headers.end()) + { + return false; + } + const auto& header = it->second; auto id = m_dscView->BeginUndoActions(); - m_viewState = DSCViewStateLoadedWithImages; + MutableState().viewState = DSCViewStateLoadedWithImages; auto reader = VMReader(vm); reader.Seek(targetImage->headerLocation); @@ -1770,7 +1775,7 @@ bool SharedCache::LoadImageWithInstallName(std::string installName) region.loaded = true; region.rawViewOffsetIfLoaded = rawViewEnd; - m_regionsMappedIntoMemory.push_back(region); + MutableState().regionsMappedIntoMemory.push_back(region); m_dscView->GetParentView()->AddAutoSegment(rawViewEnd, region.size, rawViewEnd, region.size, region.flags); m_dscView->AddUserSegment(region.start, region.size, rawViewEnd, region.size, region.flags); @@ -2260,6 +2265,7 @@ std::optional SharedCache::LoadHeaderForAddress(std::sha void SharedCache::InitializeHeader( Ref view, VM* vm, SharedCacheMachOHeader header, std::vector regionsToLoad) { + WillMutateState(); Ref settings = view->GetLoadSettings(VIEW_NAME); bool applyFunctionStarts = true; @@ -2629,7 +2635,7 @@ void SharedCache::InitializeHeader( view->DefineAutoSymbol(symbolObj); symbolInfos.push_back({sym.n_value, {type, symbol}}); } - m_symbolInfos[header.textBase] = symbolInfos; + MutableState().symbolInfos[header.textBase] = symbolInfos; } if (header.exportTriePresent && header.linkeditPresent && vm->AddressIsMapped(header.linkeditSegment.vmaddr)) @@ -2675,7 +2681,7 @@ void SharedCache::InitializeHeader( else view->DefineAutoSymbol(symbol); } - m_exportInfos[header.textBase] = exportMapping; + MutableState().exportInfos[header.textBase] = std::move(exportMapping); } view->EndBulkModifySymbols(); @@ -2782,7 +2788,7 @@ std::vector> SharedCache::ParseExportTrie(std::shared_ptr SharedCache::GetAvailableImages() { std::vector installNames; - for (const auto& header : m_headers) + for (const auto& header : State().headers) { installNames.push_back(header.second.installName); } @@ -2792,10 +2798,12 @@ std::vector SharedCache::GetAvailableImages() std::vector>> SharedCache::LoadAllSymbolsAndWait() { + WillMutateState(); + std::unique_lock initialLoadBlock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); std::vector>> symbols; - for (const auto& img : m_images) + for (const auto& img : State().images) { auto header = HeaderForAddress(img.headerLocation); std::shared_ptr mapping; @@ -2814,7 +2822,7 @@ std::vector>> SharedCache::LoadAllSymbolsAndW exportMapping.push_back({sym->GetAddress(), {sym->GetType(), sym->GetRawName()}}); symbols.push_back({img.installName, sym}); } - m_exportInfos[header->textBase] = exportMapping; + MutableState().exportInfos[header->textBase] = std::move(exportMapping); } SaveToDSCView(); @@ -2836,17 +2844,22 @@ std::string SharedCache::SerializedImageHeaderForAddress(uint64_t address) std::string SharedCache::SerializedImageHeaderForName(std::string name) { - auto header = HeaderForAddress(m_imageStarts[name]); - if (header) + if (auto it = State().imageStarts.find(name); it != State().imageStarts.end()) { - return header->AsString(); + if (auto header = HeaderForAddress(it->second)) + { + return header->AsString(); + } } return ""; } -void SharedCache::FindSymbolAtAddrAndApplyToAddr(uint64_t symbolLocation, uint64_t targetLocation, bool triggerReanalysis) +void SharedCache::FindSymbolAtAddrAndApplyToAddr( + uint64_t symbolLocation, uint64_t targetLocation, bool triggerReanalysis) { + WillMutateState(); + std::string prefix = ""; if (symbolLocation != targetLocation) prefix = "j_"; @@ -2933,7 +2946,7 @@ void SharedCache::FindSymbolAtAddrAndApplyToAddr(uint64_t symbolLocation, uint64 } { std::unique_lock _lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); - m_exportInfos[header->textBase] = exportMapping; + MutableState().exportInfos[header->textBase] = std::move(exportMapping); } m_dscView->EndBulkModifySymbols(); m_dscView->ForgetUndoActions(id); @@ -2948,22 +2961,16 @@ bool SharedCache::SaveToDSCView() auto data = AsMetadata(); m_dscView->StoreMetadata(SharedCacheMetadataTag, data); m_dscView->GetParentView()->GetParentView()->StoreMetadata(SharedCacheMetadataTag, data); + + // By moving our state the to cache we can avoid creating a copy in the case + // that no further mutations are made to `this`. If we're not done being mutated, + // the data will be copied on the first mutation. + auto cachedState = std::make_shared(std::move(*m_state)); + m_state = cachedState; + m_stateIsShared = true; + std::unique_lock viewStateCacheLock(viewStateMutex); - ViewStateCacheStore c; - c.m_imageStarts = m_imageStarts; - c.m_cacheFormat = m_cacheFormat; - c.m_backingCaches = m_backingCaches; - c.m_viewState = m_viewState; - c.m_headers = m_headers; - c.m_images = m_images; - c.m_regionsMappedIntoMemory = m_regionsMappedIntoMemory; - c.m_stubIslandRegions = m_stubIslandRegions; - c.m_dyldDataRegions = m_dyldDataRegions; - c.m_nonImageRegions = m_nonImageRegions; - c.m_baseFilePath = m_baseFilePath; - c.m_exportInfos = m_exportInfos; - c.m_symbolInfos = m_symbolInfos; - viewStateCache[m_dscView->GetFile()->GetSessionId()] = c; + viewStateCache[m_dscView->GetFile()->GetSessionId()] = std::move(cachedState); m_metadataValid = true; @@ -2974,7 +2981,7 @@ bool SharedCache::SaveToDSCView() std::vector SharedCache::GetMappedRegions() const { std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); - return m_regionsMappedIntoMemory; + return State().regionsMappedIntoMemory; } extern "C" @@ -3120,7 +3127,7 @@ extern "C" { if (cache->object) { - return (BNDSCViewState)cache->object->State(); + return (BNDSCViewState)cache->object->ViewState(); } return BNDSCViewState::Unloaded; @@ -3327,14 +3334,14 @@ void SharedCache::Store(SerializationContext& context) const { Serialize(context, "metadataVersion", METADATA_VERSION); - MSS(m_viewState); - MSS_CAST(m_cacheFormat, uint8_t); - MSS(m_imageStarts); - MSS(m_baseFilePath); + Serialize(context, "m_viewState", State().viewState); + Serialize(context, "m_cacheFormat", State().cacheFormat); + Serialize(context, "m_imageStarts", State().imageStarts); + Serialize(context, "m_baseFilePath", State().baseFilePath); Serialize(context, "headers"); context.writer.StartArray(); - for (auto& [k, v] : m_headers) + for (auto& [k, v] : State().headers) { context.writer.StartObject(); v.Store(context); @@ -3344,7 +3351,7 @@ void SharedCache::Store(SerializationContext& context) const Serialize(context, "exportInfos"); context.writer.StartArray(); - for (const auto& pair1 : m_exportInfos) + for (const auto& pair1 : State().exportInfos) { context.writer.StartObject(); Serialize(context, "key", pair1.first); @@ -3363,12 +3370,12 @@ void SharedCache::Store(SerializationContext& context) const } context.writer.EndArray(); - Serialize(context, "backingCaches", m_backingCaches); - Serialize(context, "stubIslands", m_stubIslandRegions); - Serialize(context, "images", m_images); - Serialize(context, "regionsMappedIntoMemory", m_regionsMappedIntoMemory); - Serialize(context, "dyldDataSections", m_dyldDataRegions); - Serialize(context, "nonImageRegions", m_nonImageRegions); + Serialize(context, "backingCaches", State().backingCaches); + Serialize(context, "stubIslands", State().stubIslandRegions); + Serialize(context, "images", State().images); + Serialize(context, "regionsMappedIntoMemory", State().regionsMappedIntoMemory); + Serialize(context, "dyldDataSections", State().dyldDataRegions); + Serialize(context, "nonImageRegions", State().nonImageRegions); } void SharedCache::Load(DeserializationContext& context) @@ -3386,18 +3393,23 @@ void SharedCache::Load(DeserializationContext& context) m_logger->LogError("Shared Cache metadata version missing"); return; } - m_viewState = MSL_CAST(m_viewState, uint8_t, DSCViewState); - m_cacheFormat = MSL_CAST(m_cacheFormat, uint8_t, SharedCacheFormat); - m_headers.clear(); + + m_stateIsShared = false; + m_state = std::make_shared(); + + MutableState().viewState = static_cast(context.load("m_viewState")); + MutableState().cacheFormat = static_cast(context.load("m_cacheFormat")); + for (auto& startAndHeader : context.doc["headers"].GetArray()) { SharedCacheMachOHeader header; header.LoadFromValue(startAndHeader); - m_headers[header.textBase] = header; + MutableState().headers[header.textBase] = std::move(header); } - MSL(m_imageStarts); - MSL(m_baseFilePath); - m_exportInfos.clear(); + + Deserialize(context, "m_imageStarts", MutableState().imageStarts); + Deserialize(context, "m_baseFilePath", MutableState().baseFilePath); + for (const auto& obj1 : context.doc["exportInfos"].GetArray()) { std::vector>> innerVec; @@ -3408,9 +3420,9 @@ void SharedCache::Load(DeserializationContext& context) innerVec.push_back({obj2["key"].GetUint64(), innerPair}); } - m_exportInfos[obj1["key"].GetUint64()] = innerVec; + MutableState().exportInfos[obj1["key"].GetUint64()] = std::move(innerVec); } - m_symbolInfos.clear(); + for (auto& symbolInfo : context.doc["symbolInfos"].GetArray()) { std::vector>> symbolInfoVec; @@ -3419,52 +3431,94 @@ void SharedCache::Load(DeserializationContext& context) symbolInfoVec.push_back({symbolInfoPair[0].GetUint64(), {(BNSymbolType)symbolInfoPair[1].GetUint(), symbolInfoPair[2].GetString()}}); } - m_symbolInfos[symbolInfo[0].GetUint64()] = std::move(symbolInfoVec); + MutableState().symbolInfos[symbolInfo[0].GetUint64()] = std::move(symbolInfoVec); } - m_backingCaches.clear(); + for (auto& bcV : context.doc["backingCaches"].GetArray()) { BackingCache bc; bc.LoadFromValue(bcV); - m_backingCaches.push_back(std::move(bc)); + MutableState().backingCaches.push_back(std::move(bc)); } - m_images.clear(); + for (auto& imgV : context.doc["images"].GetArray()) { CacheImage img; img.LoadFromValue(imgV); - m_images.push_back(std::move(img)); + MutableState().images.push_back(std::move(img)); } - m_regionsMappedIntoMemory.clear(); + for (auto& rV : context.doc["regionsMappedIntoMemory"].GetArray()) { MemoryRegion r; r.LoadFromValue(rV); - m_regionsMappedIntoMemory.push_back(std::move(r)); + MutableState().regionsMappedIntoMemory.push_back(std::move(r)); } - m_stubIslandRegions.clear(); + for (auto& siV : context.doc["stubIslands"].GetArray()) { MemoryRegion si; si.LoadFromValue(siV); - m_stubIslandRegions.push_back(std::move(si)); + MutableState().stubIslandRegions.push_back(std::move(si)); } - m_dyldDataRegions.clear(); + for (auto& siV : context.doc["dyldDataSections"].GetArray()) { MemoryRegion si; si.LoadFromValue(siV); - m_dyldDataRegions.push_back(std::move(si)); + MutableState().dyldDataRegions.push_back(std::move(si)); } - m_nonImageRegions.clear(); + for (auto& siV : context.doc["nonImageRegions"].GetArray()) { MemoryRegion si; si.LoadFromValue(siV); - m_nonImageRegions.push_back(std::move(si)); + MutableState().nonImageRegions.push_back(std::move(si)); } m_metadataValid = true; } +__attribute__((always_inline)) void SharedCache::AssertMutable() const +{ + if (m_stateIsShared) + { + abort(); + } +} + +void SharedCache::WillMutateState() +{ + if (!m_state) + { + m_state = std::make_shared(); + } + else if (m_stateIsShared) + { + m_state = std::make_shared(*m_state); + } + m_stateIsShared = false; +} + + +const std::vector& SharedCache::BackingCaches() const +{ + return State().backingCaches; +} + +DSCViewState SharedCache::ViewState() const +{ + return State().viewState; +} + +const std::unordered_map& SharedCache::AllImageStarts() const +{ + return State().imageStarts; +} + +const std::unordered_map& SharedCache::AllImageHeaders() const +{ + return State().headers; +} + } // namespace SharedCacheCore diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 4444f8473a..ac24b529f4 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -530,37 +530,21 @@ namespace SharedCacheCore { void Store(SerializationContext& context) const; void Load(DeserializationContext& context); + struct State; + private: Ref m_logger; /* VIEW STATE BEGIN -- SERIALIZE ALL OF THIS AND STORE IT IN RAW VIEW */ // Updated as the view is loaded further, more images are added, etc - DSCViewState m_viewState = DSCViewStateUnloaded; - std::unordered_map>>> - m_exportInfos; - std::unordered_map>>> - m_symbolInfos; - // --- + // NOTE: Access via `State()` or `MutableState()` below. + // `WillMutateState()` must be called before the first access to `MutableState()`. + std::shared_ptr m_state; + bool m_stateIsShared = false; // Serialized once by PerformInitialLoad and available after m_viewState == Loaded bool m_metadataValid = false; - std::string m_baseFilePath; - SharedCacheFormat m_cacheFormat; - - std::unordered_map m_imageStarts; - std::unordered_map m_headers; - - std::vector m_images; - - std::vector m_regionsMappedIntoMemory; - - std::vector m_backingCaches; - - std::vector m_stubIslandRegions; - std::vector m_dyldDataRegions; - std::vector m_nonImageRegions; - /* VIEWSTATE END -- NOTHING PAST THIS IS SERIALIZED */ /* API VIEW START */ @@ -592,20 +576,17 @@ namespace SharedCacheCore { std::vector>> LoadAllSymbolsAndWait(); - std::unordered_map AllImageStarts() const { return m_imageStarts; } - std::unordered_map AllImageHeaders() const { return m_headers; } + const std::unordered_map& AllImageStarts() const; + const std::unordered_map& AllImageHeaders() const; std::string SerializedImageHeaderForAddress(uint64_t address); std::string SerializedImageHeaderForName(std::string name); void FindSymbolAtAddrAndApplyToAddr(uint64_t symbolLocation, uint64_t targetLocation, bool triggerReanalysis); - std::vector BackingCaches() const { + const std::vector& BackingCaches() const; - return m_backingCaches; - } - - DSCViewState State() const { return m_viewState; } + DSCViewState ViewState() const; explicit SharedCache(BinaryNinja::Ref rawView); virtual ~SharedCache(); @@ -614,12 +595,21 @@ namespace SharedCacheCore { std::shared_ptr vm, uint64_t address, std::string installName); void InitializeHeader( Ref view, VM* vm, SharedCacheMachOHeader header, std::vector regionsToLoad); - void ReadExportNode(std::vector>& symbolList, SharedCacheMachOHeader& header, DataBuffer& buffer, uint64_t textBase, - const std::string& currentText, size_t cursor, uint32_t endGuard); + void ReadExportNode(std::vector>& symbolList, SharedCacheMachOHeader& header, DataBuffer& buffer, + uint64_t textBase, const std::string& currentText, size_t cursor, uint32_t endGuard); std::vector> ParseExportTrie( std::shared_ptr linkeditFile, SharedCacheMachOHeader header); - }; + const State& State() const { return *m_state; } + struct State& MutableState() { AssertMutable(); return *m_state; } + + void AssertMutable() const; + + // Ensures that the state is uniquely owned, copying it if it is not. + // Must be called before first access to `MutableState()` after the state + // is loaded from the cache. Can safely be called multiple times. + void WillMutateState(); + }; } From 59258c6a2937aca4aca07a17b108d1a6bbea8413 Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Fri, 15 Nov 2024 00:13:15 -0800 Subject: [PATCH 2/7] Fix uninitialized data in MemoryRegion instances causing bad behavior --- view/sharedcache/core/SharedCache.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index 5f0a304c9d..5ab8845f4a 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -917,21 +917,19 @@ void SharedCache::PerformInitialLoad() // Part before the overlap if (regionStart < segmentStart) { - MemoryRegion newRegion; + MemoryRegion newRegion(*it); newRegion.start = regionStart; newRegion.size = segmentStart - regionStart; - newRegion.prettyName = it->prettyName; - newRegions.push_back(newRegion); + newRegions.push_back(std::move(newRegion)); } // Part after the overlap if (regionEnd > segmentEnd) { - MemoryRegion newRegion; + MemoryRegion newRegion(*it); newRegion.start = segmentEnd; newRegion.size = regionEnd - segmentEnd; - newRegion.prettyName = it->prettyName; - newRegions.push_back(newRegion); + newRegions.push_back(std::move(newRegion)); } // Erase the original region From 7637818599574dceb52b3679e59e34987b467df7 Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Thu, 14 Nov 2024 16:04:22 -0800 Subject: [PATCH 3/7] [SharedCache] A collection of small optimizations 1. Use moves where possible to avoid unnecessary copies. 2. Remove redundant work within SymbolTableModel::updateSymbols. It calls setFilter which immediately clears then repopulates m_symbols. 3. Use unordered_map rather than map in `VM`. It is faster and the order isn't significant. 4. Avoid multiple accesses to the map with `VM` in the common cases. 5. Optimize the common case within SharedCache::HeaderForAddress. 6. Change return type of SharedCache::HeaderForAddress to avoid copying SharedCacheMachOHeaders. It is a large type that is expensive to copy. --- view/sharedcache/api/sharedcache.cpp | 28 ++++--- view/sharedcache/api/sharedcacheapi.h | 4 +- view/sharedcache/core/SharedCache.cpp | 18 +++-- view/sharedcache/core/SharedCache.h | 2 +- view/sharedcache/core/VM.cpp | 103 +++++++++++++------------- view/sharedcache/core/VM.h | 3 +- view/sharedcache/ui/dsctriage.cpp | 1 - 7 files changed, 87 insertions(+), 72 deletions(-) diff --git a/view/sharedcache/api/sharedcache.cpp b/view/sharedcache/api/sharedcache.cpp index 71d190da21..7c32835f63 100644 --- a/view/sharedcache/api/sharedcache.cpp +++ b/view/sharedcache/api/sharedcache.cpp @@ -3,6 +3,7 @@ // #include "sharedcacheapi.h" +#include namespace SharedCacheAPI { @@ -20,9 +21,9 @@ namespace SharedCacheAPI { return BNDSCViewFastGetBackingCacheCount(view->GetObject()); } - bool SharedCache::LoadImageWithInstallName(std::string installName) + bool SharedCache::LoadImageWithInstallName(std::string_view installName) { - char* str = BNAllocString(installName.c_str()); + char* str = BNAllocString(installName.data()); return BNDSCViewLoadImageWithInstallName(m_object, str); } @@ -46,6 +47,7 @@ namespace SharedCacheAPI { } std::vector result; + result.reserve(count); for (size_t i = 0; i < count; i++) { result.push_back(value[i]); @@ -65,13 +67,14 @@ namespace SharedCacheAPI { } std::vector result; + result.reserve(count); for (size_t i = 0; i < count; i++) { DSCMemoryRegion region; region.vmAddress = value[i].vmAddress; region.size = value[i].size; region.prettyName = value[i].name; - result.push_back(region); + result.push_back(std::move(region)); } BNDSCViewFreeLoadedRegions(value, count); @@ -87,20 +90,22 @@ namespace SharedCacheAPI { } std::vector result; + result.reserve(count); for (size_t i = 0; i < count; i++) { BackingCache cache; cache.path = value[i].path; cache.isPrimary = value[i].isPrimary; + cache.mappings.reserve(value[i].mappingCount); for (size_t j = 0; j < value[i].mappingCount; j++) { BackingCacheMapping mapping; mapping.vmAddress = value[i].mappings[j].vmAddress; mapping.size = value[i].mappings[j].size; mapping.fileOffset = value[i].mappings[j].fileOffset; - cache.mappings.push_back(mapping); + cache.mappings.push_back(std::move(mapping)); } - result.push_back(cache); + result.push_back(std::move(cache)); } BNDSCViewFreeBackingCaches(value, count); @@ -117,11 +122,13 @@ namespace SharedCacheAPI { } std::vector result; + result.reserve(count); for (size_t i = 0; i < count; i++) { DSCImage img; img.name = value[i].name; img.headerAddress = value[i].headerAddress; + img.mappings.reserve(value[i].mappingCount); for (size_t j = 0; j < value[i].mappingCount; j++) { DSCImageMemoryMapping mapping; @@ -131,9 +138,9 @@ namespace SharedCacheAPI { mapping.rawViewOffset = value[i].mappings[j].rawViewOffset; mapping.size = value[i].mappings[j].size; mapping.loaded = value[i].mappings[j].loaded; - img.mappings.push_back(mapping); + img.mappings.push_back(std::move(mapping)); } - result.push_back(img); + result.push_back(std::move(img)); } BNDSCViewFreeAllImages(value, count); @@ -150,13 +157,14 @@ namespace SharedCacheAPI { } std::vector result; + result.reserve(count); for (size_t i = 0; i < count; i++) { DSCSymbol sym; sym.address = value[i].address; sym.name = value[i].name; sym.image = value[i].image; - result.push_back(sym); + result.push_back(std::move(sym)); } BNDSCViewFreeSymbols(value, count); @@ -183,9 +191,9 @@ namespace SharedCacheAPI { return result; } - std::optional SharedCache::GetMachOHeaderForImage(std::string name) + std::optional SharedCache::GetMachOHeaderForImage(std::string_view name) { - char* str = BNAllocString(name.c_str()); + char* str = BNAllocString(name.data()); char* outputStr = BNDSCViewGetImageHeaderForName(m_object, str); if (outputStr == nullptr) return {}; diff --git a/view/sharedcache/api/sharedcacheapi.h b/view/sharedcache/api/sharedcacheapi.h index 7b049bc423..8098b3352b 100644 --- a/view/sharedcache/api/sharedcacheapi.h +++ b/view/sharedcache/api/sharedcacheapi.h @@ -257,7 +257,7 @@ namespace SharedCacheAPI { static BNDSCViewLoadProgress GetLoadProgress(Ref view); static uint64_t FastGetBackingCacheCount(Ref view); - bool LoadImageWithInstallName(std::string installName); + bool LoadImageWithInstallName(std::string_view installName); bool LoadSectionAtAddress(uint64_t addr); bool LoadImageContainingAddress(uint64_t addr); std::vector GetAvailableImages(); @@ -270,7 +270,7 @@ namespace SharedCacheAPI { std::vector GetBackingCaches(); std::vector GetImages(); - std::optional GetMachOHeaderForImage(std::string name); + std::optional GetMachOHeaderForImage(std::string_view name); std::optional GetMachOHeaderForAddress(uint64_t address); std::vector GetLoadedMemoryRegions(); diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index 5ab8845f4a..d39136c9b5 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -1367,11 +1367,15 @@ SharedCache::SharedCache(BinaryNinja::Ref dscView) : m_ { std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); try { + MutableState().viewState = DSCViewStateLoaded; PerformInitialLoad(); } catch (...) { m_logger->LogError("Failed to perform initial load of Shared Cache"); + + MutableState().viewState = DSCViewStateLoaded; + SaveToDSCView(); } auto settings = m_dscView->GetLoadSettings(VIEW_NAME); @@ -1394,8 +1398,6 @@ SharedCache::SharedCache(BinaryNinja::Ref dscView) : m_ } } } - MutableState().viewState = DSCViewStateLoaded; - SaveToDSCView(); } else { @@ -1435,8 +1437,13 @@ std::optional SharedCache::GetImageStart(std::string installName) return {}; } -std::optional SharedCache::HeaderForAddress(uint64_t address) +const SharedCacheMachOHeader* SharedCache::HeaderForAddress(uint64_t address) { + // It is very common for `HeaderForAddress` to be called with an address corresponding to a header. + if (auto it = State().headers.find(address)) { + return it; + } + // We _could_ mark each page with the image start? :grimacing emoji: // But that'd require mapping pages :grimacing emoji: :grimacing emoji: // There's not really any other hacks that could make this faster, that I can think of... @@ -1446,11 +1453,12 @@ std::optional SharedCache::HeaderForAddress(uint64_t add { if (segment.vmaddr <= address && segment.vmaddr + segment.vmsize > address) { - return header; + return &header; } } } - return {}; + + return nullptr; } std::string SharedCache::NameForAddress(uint64_t address) diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index ac24b529f4..49566d6008 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -564,7 +564,7 @@ namespace SharedCacheCore { void ParseAndApplySlideInfoForFile(std::shared_ptr file); std::optional GetImageStart(std::string installName); - std::optional HeaderForAddress(uint64_t); + const SharedCacheMachOHeader* HeaderForAddress(uint64_t); bool LoadImageWithInstallName(std::string installName); bool LoadSectionAtAddress(uint64_t address); bool LoadImageContainingAddress(uint64_t address); diff --git a/view/sharedcache/core/VM.cpp b/view/sharedcache/core/VM.cpp index c351a14609..d0ddf776d9 100644 --- a/view/sharedcache/core/VM.cpp +++ b/view/sharedcache/core/VM.cpp @@ -206,52 +206,54 @@ void MMAP::Unmap() std::shared_ptr> MMappedFileAccessor::Open(BinaryNinja::Ref dscView, const uint64_t sessionID, const std::string &path, std::function)> postAllocationRoutine) { std::scoped_lock lock(fileAccessorsMutex); - if (fileAccessors.count(path) == 0) - { - auto fileAcccessor = std::shared_ptr>(new SelfAllocatingWeakPtr( - // Allocator logic for the SelfAllocatingWeakPtr - [path=path, sessionID=sessionID, dscView](){ - std::unique_lock _lock(fileAccessorDequeMutex); - - // Iterate through held references and start removing them until we can get a file pointer - // FIXME: This could clear all currently used file pointers and still not get one. FIX! - // We should probably use a condition variable here to wait for a file pointer to be released!!! - for (auto& [_, fileAccessorDeque] : fileAccessorReferenceHolder) - { - if (fileAccessorSemaphore.try_acquire()) - break; - fileAccessorDeque.pop_front(); - } - - mmapCount++; - _lock.unlock(); - auto accessor = std::shared_ptr(new MMappedFileAccessor(ResolveFilePath(dscView, path)), [](MMappedFileAccessor* accessor){ - // worker thread or we can deadlock on exit here. - BinaryNinja::WorkerEnqueue([accessor](){ - fileAccessorSemaphore.release(); - mmapCount--; - if (fileAccessors.count(accessor->m_path)) - { - std::scoped_lock lock(fileAccessorsMutex); - fileAccessors.erase(accessor->m_path); - } - delete accessor; - }, "MMappedFileAccessor Destructor"); - }); - _lock.lock(); - // If some background thread has managed to try and open a file when the BV was already closed, - // we can still give them the file they want so they dont crash, but as soon as they let go it's gone. - if (!blockedSessionIDs.count(sessionID)) - fileAccessorReferenceHolder[sessionID].push_back(accessor); - return accessor; - }, - [postAllocationRoutine=postAllocationRoutine](std::shared_ptr accessor){ - if (postAllocationRoutine) - postAllocationRoutine(accessor); - })); - fileAccessors.insert_or_assign(path, fileAcccessor); + if (auto it = fileAccessors.find(path); it != fileAccessors.end()) { + return it->second; } - return fileAccessors.at(path); + + auto fileAcccessor = std::shared_ptr>(new SelfAllocatingWeakPtr( + // Allocator logic for the SelfAllocatingWeakPtr + [path=path, sessionID=sessionID, dscView](){ + std::unique_lock _lock(fileAccessorDequeMutex); + + // Iterate through held references and start removing them until we can get a file pointer + // FIXME: This could clear all currently used file pointers and still not get one. FIX! + // We should probably use a condition variable here to wait for a file pointer to be released!!! + for (auto& [_, fileAccessorDeque] : fileAccessorReferenceHolder) + { + if (fileAccessorSemaphore.try_acquire()) + break; + fileAccessorDeque.pop_front(); + } + + mmapCount++; + _lock.unlock(); + auto accessor = std::shared_ptr(new MMappedFileAccessor(ResolveFilePath(dscView, path)), [](MMappedFileAccessor* accessor){ + // worker thread or we can deadlock on exit here. + BinaryNinja::WorkerEnqueue([accessor](){ + fileAccessorSemaphore.release(); + mmapCount--; + if (fileAccessors.count(accessor->m_path)) + { + std::scoped_lock lock(fileAccessorsMutex); + fileAccessors.erase(accessor->m_path); + } + delete accessor; + }, "MMappedFileAccessor Destructor"); + }); + _lock.lock(); + // If some background thread has managed to try and open a file when the BV was already closed, + // we can still give them the file they want so they dont crash, but as soon as they let go it's gone. + if (!blockedSessionIDs.count(sessionID)) + fileAccessorReferenceHolder[sessionID].push_back(accessor); + return accessor; + }, + [postAllocationRoutine=postAllocationRoutine](std::shared_ptr accessor){ + if (postAllocationRoutine) + postAllocationRoutine(accessor); + })); + + fileAccessors.insert_or_assign(path, fileAcccessor); + return fileAcccessor; } @@ -504,15 +506,12 @@ void VM::MapPages(BinaryNinja::Ref dscView, uint64_t se // Our pages will be delimited by shifting off the page size // So, 0x12345000 will become 0x12345 (assuming m_pageSize is 0x1000) auto page = (vm_address + (i)) >> m_pageSizeBits; - if (m_map.count(page) != 0) + auto [it, inserted] = m_map.insert_or_assign(page, PageMapping(filePath, MMappedFileAccessor::Open(dscView, sessionID, filePath, postAllocationRoutine), i + fileoff)); + if (m_safe && !inserted) { - if (m_safe) - { - BNLogWarn("Remapping page 0x%lx (i == 0x%lx) (a: 0x%zx, f: 0x%zx)", page, i, vm_address, fileoff); - throw MappingCollisionException(); - } + BNLogWarn("Remapping page 0x%lx (i == 0x%lx) (a: 0x%zx, f: 0x%zx)", page, i, vm_address, fileoff); + throw MappingCollisionException(); } - m_map.insert_or_assign(page, PageMapping(filePath, MMappedFileAccessor::Open(dscView, sessionID, filePath, postAllocationRoutine), i + fileoff)); } } diff --git a/view/sharedcache/core/VM.h b/view/sharedcache/core/VM.h index b8e5c59b81..d40797f988 100644 --- a/view/sharedcache/core/VM.h +++ b/view/sharedcache/core/VM.h @@ -6,6 +6,7 @@ #define SHAREDCACHE_VM_H #include #include +#include void VMShutdown(); @@ -215,7 +216,7 @@ class VMReader; class VM { - std::map m_map; + std::unordered_map m_map; size_t m_pageSize; size_t m_pageSizeBits; bool m_safe; diff --git a/view/sharedcache/ui/dsctriage.cpp b/view/sharedcache/ui/dsctriage.cpp index 008a4dd69e..9f456d7234 100644 --- a/view/sharedcache/ui/dsctriage.cpp +++ b/view/sharedcache/ui/dsctriage.cpp @@ -448,7 +448,6 @@ QVariant SymbolTableModel::headerData(int section, Qt::Orientation orientation, } void SymbolTableModel::updateSymbols() { - m_symbols = m_parent->m_symbols; setFilter(m_filter); } From 5e765f70a1117130bf08605c92a63779b5a00caa Mon Sep 17 00:00:00 2001 From: Mark Rowe Date: Thu, 14 Nov 2024 11:29:54 -0800 Subject: [PATCH 4/7] Use persistent data structures courtesy of immer [immer](https://github.com/arximboldi/immer) provides persistent, immutable data structures such as vectors and maps. These data structures support passing by value without copying any data and structural sharing to copy only a subset of data when a data structure is mutated. immer is published under the Boost Software License which should be compatible with its use in this context. Using these data structures eliminates a lot of the unnecessary copying of the shared cache's state when retrieving it from the view cache and beginning to mutate it. Instead of all of the vectors and maps contained within the state being copied, only the portions of the vectors or maps that are mutated end up being copied. The downside is that the APIs used when mutating are less ergonomic than using the native C++ types. The upside is that this cuts the time taken for the initial load and analysis of a macOS shared cache to around 45 seconds (from 70 seconds with the basic CoW implementation in #6129) and cuts the time taken to load and analyze AppKit from 14 minutes to around 8.5 minutes. --- .gitmodules | 3 + vendor/immer | 1 + view/sharedcache/CMakeLists.txt | 2 +- view/sharedcache/api/CMakeLists.txt | 2 +- view/sharedcache/core/CMakeLists.txt | 2 +- .../sharedcache/core/MetadataSerializable.cpp | 180 +++++++ .../sharedcache/core/MetadataSerializable.hpp | 40 ++ view/sharedcache/core/SharedCache.cpp | 469 +++++++++++++----- view/sharedcache/core/SharedCache.h | 61 +-- view/sharedcache/workflow/CMakeLists.txt | 2 +- 10 files changed, 596 insertions(+), 166 deletions(-) create mode 160000 vendor/immer diff --git a/.gitmodules b/.gitmodules index 13b8e29cc0..76abdfd276 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,3 +19,6 @@ [submodule "rust/examples/pdb-ng/pdb-0.8.0-patched"] path = rust/examples/pdb-ng/pdb-0.8.0-patched url = https://github.com/Vector35/pdb-rs.git +[submodule "vendor/immer"] + path = vendor/immer + url = https://github.com/arximboldi/immer.git diff --git a/vendor/immer b/vendor/immer new file mode 160000 index 0000000000..df6ef46d97 --- /dev/null +++ b/vendor/immer @@ -0,0 +1 @@ +Subproject commit df6ef46d97e1fe81f397015b9aeb32505cef653b diff --git a/view/sharedcache/CMakeLists.txt b/view/sharedcache/CMakeLists.txt index 63ba602bf4..e7f9895975 100644 --- a/view/sharedcache/CMakeLists.txt +++ b/view/sharedcache/CMakeLists.txt @@ -59,7 +59,7 @@ set_target_properties(sharedcache PROPERTIES POSITION_INDEPENDENT_CODE ON ) -target_include_directories(sharedcache PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/core ${CMAKE_CURRENT_SOURCE_DIR}/api ${CMAKE_CURRENT_SOURCE_DIR}/workflow) +target_include_directories(sharedcache PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/core ${CMAKE_CURRENT_SOURCE_DIR}/api ${CMAKE_CURRENT_SOURCE_DIR}/workflow ${BN_API_PATH}/vendor/immer) target_link_libraries(sharedcache PUBLIC sharedcacheapi binaryninjaapi sharedcachecore sharedcacheworkflow) diff --git a/view/sharedcache/api/CMakeLists.txt b/view/sharedcache/api/CMakeLists.txt index c14674a77e..eb6526aeb4 100644 --- a/view/sharedcache/api/CMakeLists.txt +++ b/view/sharedcache/api/CMakeLists.txt @@ -64,7 +64,7 @@ endfunction() get_recursive_include_dirs(binaryninjaapi INCLUDES) target_include_directories(sharedcacheapi - PUBLIC ${PROJECT_SOURCE_DIR} ${INCLUDES}) + PUBLIC ${PROJECT_SOURCE_DIR} ${INCLUDES} ${BN_API_PATH}/vendor/immer) set_target_properties(sharedcacheapi PROPERTIES CXX_STANDARD 17 diff --git a/view/sharedcache/core/CMakeLists.txt b/view/sharedcache/core/CMakeLists.txt index 22c7ea2846..db4577c7df 100644 --- a/view/sharedcache/core/CMakeLists.txt +++ b/view/sharedcache/core/CMakeLists.txt @@ -77,7 +77,7 @@ target_compile_definitions(sharedcachecore PRIVATE ${COMPILE_DEFS}) target_compile_definitions(sharedcachecore PRIVATE SHAREDCACHE_LIBRARY ${COMPILE_DEFS}) -target_include_directories(sharedcachecore PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${INCLUDES}) +target_include_directories(sharedcachecore PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${INCLUDES} ${BN_API_PATH}/vendor/immer) set_target_properties(sharedcachecore PROPERTIES CXX_STANDARD 17 diff --git a/view/sharedcache/core/MetadataSerializable.cpp b/view/sharedcache/core/MetadataSerializable.cpp index 0fa712b17a..65c380b5f6 100644 --- a/view/sharedcache/core/MetadataSerializable.cpp +++ b/view/sharedcache/core/MetadataSerializable.cpp @@ -140,6 +140,46 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve b.emplace_back(i.GetString()); } +void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + transient.set(i.GetArray()[0].GetUint64(), i.GetArray()[1].GetString()); + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + transient.set(i.GetArray()[0].GetUint64(), i.GetArray()[1].GetUint64()); + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::map>& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + std::string key = i.GetArray()[0].GetString(); + immer::map_transient memArray; + for (auto& member : i.GetArray()[1].GetArray()) + { + memArray.set(member.GetArray()[0].GetUint64(), member.GetArray()[1].GetUint64()); + } + transient.set(key, std::move(memArray).persistent()); + } + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + transient.set(i.GetArray()[0].GetString(), i.GetArray()[1].GetString()); + b = std::move(transient).persistent(); +} + // Note: This flattens the pair into [first, second.first, second.second] with no nested arrays. void Serialize(SerializationContext& context, const std::pair>& value) { @@ -203,6 +243,77 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve } } +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + transient.push_back(i.GetString()); + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>>& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + std::pair> j; + j.first = i.GetArray()[0].GetUint64(); + j.second.first = i.GetArray()[1].GetUint64(); + j.second.second = i.GetArray()[2].GetUint64(); + transient.push_back(j); + } + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + std::pair j; + j.first = i.GetArray()[0].GetUint64(); + j.second = i.GetArray()[1].GetBool(); + transient.push_back(j); + } + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + transient.push_back(i.GetUint64()); + } + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + transient.set(i.GetArray()[0].GetString(), i.GetArray()[1].GetUint64()); + } + b = std::move(transient).persistent(); +} + +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>>>& b) +{ + auto transient = b.transient(); + for (auto& i : context.doc[name.data()].GetArray()) + { + std::pair>> j; + j.first = i.GetArray()[0].GetUint64(); + for (auto& k : i.GetArray()[1].GetArray()) + { + j.second.push_back({k.GetArray()[0].GetUint64(), k.GetArray()[1].GetString()}); + } + transient.push_back({j.first, std::move(j.second).persistent()}); + } + b = std::move(transient).persistent(); +} + void Serialize(SerializationContext& context, const mach_header_64& value) { context.writer.StartArray(); Serialize(context, value.magic); @@ -425,6 +536,35 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve } } +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) +{ + auto bArr = context.doc[name.data()].GetArray(); + auto transient = b.transient(); + for (auto& s : bArr) + { + section_64 sec; + auto s2 = s.GetArray(); + std::string sectNameStr = s2[0].GetString(); + memset(sec.sectname, 0, 16); + memcpy(sec.sectname, sectNameStr.c_str(), sectNameStr.size()); + std::string segNameStr = s2[1].GetString(); + memset(sec.segname, 0, 16); + memcpy(sec.segname, segNameStr.c_str(), segNameStr.size()); + sec.addr = s2[2].GetUint64(); + sec.size = s2[3].GetUint64(); + sec.offset = s2[4].GetUint(); + sec.align = s2[5].GetUint(); + sec.reloff = s2[6].GetUint(); + sec.nreloc = s2[7].GetUint(); + sec.flags = s2[8].GetUint(); + sec.reserved1 = s2[9].GetUint(); + sec.reserved2 = s2[10].GetUint(); + sec.reserved3 = s2[11].GetUint(); + transient.push_back(std::move(sec)); + } + b = std::move(transient).persistent(); +} + void Serialize(SerializationContext& context, const linkedit_data_command& value) { context.writer.StartArray(); @@ -498,6 +638,31 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve } } +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) +{ + auto bArr = context.doc[name.data()].GetArray(); + auto transient = b.transient(); + for (auto& s : bArr) + { + segment_command_64 sec; + auto s2 = s.GetArray(); + std::string segNameStr = s2[0].GetString(); + memset(sec.segname, 0, 16); + memcpy(sec.segname, segNameStr.c_str(), segNameStr.size()); + sec.vmaddr = s2[1].GetUint64(); + sec.vmsize = s2[2].GetUint64(); + sec.fileoff = s2[3].GetUint64(); + sec.filesize = s2[4].GetUint64(); + sec.maxprot = s2[5].GetUint(); + sec.initprot = s2[6].GetUint(); + sec.nsects = s2[7].GetUint(); + sec.flags = s2[8].GetUint(); + transient.push_back(std::move(sec)); + } + b = std::move(transient).persistent(); +} + + void Serialize(SerializationContext& context, const build_version_command& value) { context.writer.StartArray(); @@ -542,4 +707,19 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve } } +void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b) +{ + auto bArr = context.doc[name.data()].GetArray(); + auto transient = b.transient(); + for (auto& s : bArr) + { + build_tool_version sec; + auto s2 = s.GetArray(); + sec.tool = s2[0].GetUint(); + sec.version = s2[1].GetUint(); + transient.push_back(sec); + } + b = std::move(transient).persistent(); +} + } // namespace SharedCacheCore diff --git a/view/sharedcache/core/MetadataSerializable.hpp b/view/sharedcache/core/MetadataSerializable.hpp index 5a39f5f67b..2965c960a1 100644 --- a/view/sharedcache/core/MetadataSerializable.hpp +++ b/view/sharedcache/core/MetadataSerializable.hpp @@ -40,6 +40,10 @@ #include "rapidjson/prettywriter.h" #include "../api/sharedcachecore.h" #include "view/macho/machoview.h" +#include "immer/map.hpp" +#include "immer/vector.hpp" +#include "immer/vector_transient.hpp" +#include "immer/map_transient.hpp" #ifndef SHAREDCACHE_CORE_METADATASERIALIZABLE_HPP #define SHAREDCACHE_CORE_METADATASERIALIZABLE_HPP @@ -191,6 +195,29 @@ void Serialize(SerializationContext& context, const std::vector& values) context.writer.EndArray(); } +template +void Serialize(SerializationContext& context, const immer::map& value) +{ + context.writer.StartArray(); + for (auto& pair : value) + { + Serialize(context, pair); + } + context.writer.EndArray(); +} + +template +void Serialize(SerializationContext& context, const immer::vector& values) +{ + context.writer.StartArray(); + for (const auto& value : values) + { + Serialize(context, value); + } + context.writer.EndArray(); +} + + SHAREDCACHE_FFI_API void Serialize(SerializationContext& context, const char*); SHAREDCACHE_FFI_API void Serialize(SerializationContext& context, bool b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, bool& b); @@ -223,6 +250,16 @@ SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::strin SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, std::vector& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, std::unordered_map& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, std::vector>>>& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::map>& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>>& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::vector& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::map& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext& context, std::string_view name, immer::vector>>>& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const mach_header_64& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, mach_header_64& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const symtab_command& b); @@ -237,15 +274,18 @@ SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const function_starts_ SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, function_starts_command& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const section_64& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, std::vector& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, immer::vector& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const linkedit_data_command& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, linkedit_data_command& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const segment_command_64& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, segment_command_64& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, std::vector& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, immer::vector& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const build_version_command& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, build_version_command& b); SHAREDCACHE_FFI_API void Serialize(SerializationContext&, const build_tool_version& b); SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, std::vector& b); +SHAREDCACHE_FFI_API void Deserialize(DeserializationContext&, std::string_view name, immer::vector& b); } // namespace SharedCacheCore diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index d39136c9b5..01588044a2 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -33,6 +33,8 @@ #include #include +#include "immer/flex_vector.hpp" +#include "immer/vector_transient.hpp" using namespace BinaryNinja; using namespace SharedCacheCore; @@ -55,23 +57,23 @@ int count_trailing_zeros(uint64_t value) { struct SharedCache::State { - std::unordered_map>>> + immer::map>>> exportInfos; - std::unordered_map>>> + immer::map>>> symbolInfos; - std::unordered_map imageStarts; - std::unordered_map headers; + immer::map imageStarts; + immer::map headers; - std::vector images; + immer::vector images; - std::vector regionsMappedIntoMemory; + immer::vector regionsMappedIntoMemory; - std::vector backingCaches; + immer::vector backingCaches; - std::vector stubIslandRegions; // TODO honestly both of these should be refactored into nonImageRegions. :p - std::vector dyldDataRegions; - std::vector nonImageRegions; + immer::vector stubIslandRegions; // TODO honestly both of these should be refactored into nonImageRegions. :p + immer::vector dyldDataRegions; + immer::vector nonImageRegions; std::string baseFilePath; SharedCacheFormat cacheFormat; @@ -268,6 +270,7 @@ void SharedCache::PerformInitialLoad() cache.isPrimary = true; cache.path = path; + immer::vector_transient>> mappings; for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); @@ -275,18 +278,20 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = mapping.fileOffset; mapRawToAddrAndSize.second.first = mapping.address; mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(mapRawToAddrAndSize); + mappings.push_back(mapRawToAddrAndSize); } - MutableState().backingCaches.push_back(std::move(cache)); + cache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; - + auto imageStarts = State().imageStarts.transient(); for (size_t i = 0; i < primaryCacheHeader.imagesCountOld; i++) { baseFile->Read(&img, primaryCacheHeader.imagesOffsetOld + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - MutableState().imageStarts[iname] = img.address; + imageStarts.set(iname, img.address); } + MutableState().imageStarts = std::move(imageStarts).persistent(); m_logger->LogInfo("Found %d images in the shared cache", primaryCacheHeader.imagesCountOld); @@ -299,6 +304,7 @@ void SharedCache::PerformInitialLoad() } baseFile.reset(); // No longer needed, we're about to remap this file into VM space so we can load these. uint64_t i = 0; + auto stubIslandRegions = State().stubIslandRegions.transient(); for (auto address : addresses) { i++; @@ -317,10 +323,11 @@ void SharedCache::PerformInitialLoad() std::string segNameStr = std::string(segName); stubIslandRegion.prettyName = "dyld_shared_cache_branch_islands_" + std::to_string(i) + "::" + segNameStr; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); + stubIslandRegions.push_back(std::move(stubIslandRegion)); } } } + MutableState().stubIslandRegions = std::move(stubIslandRegions).persistent(); } m_logger->LogInfo("Found %d branch pools in the shared cache", primaryCacheHeader.branchPoolsCount); @@ -335,7 +342,7 @@ void SharedCache::PerformInitialLoad() BackingCache cache; cache.isPrimary = true; cache.path = path; - + auto mappings = cache.mappings.transient(); for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); @@ -343,17 +350,19 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = mapping.fileOffset; mapRawToAddrAndSize.second.first = mapping.address; mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(std::move(mapRawToAddrAndSize)); + mappings.push_back(std::move(mapRawToAddrAndSize)); } - MutableState().backingCaches.push_back(std::move(cache)); + cache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; + auto imageStarts = State().imageStarts.transient(); for (size_t i = 0; i < primaryCacheHeader.imagesCount; i++) { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - MutableState().imageStarts[iname] = img.address; + imageStarts.set(iname, img.address); } if (primaryCacheHeader.branchPoolsCount) @@ -361,10 +370,12 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - MutableState().imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = - baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.set("dyld_shared_cache_branch_islands_" + std::to_string(i), + baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))); } } + MutableState().imageStarts = std::move(imageStarts).persistent(); + std::string mainFileName = base_name(path); if (auto projectFile = m_dscView->GetFile()->GetProjectFile()) mainFileName = projectFile->GetName(); @@ -411,6 +422,7 @@ void SharedCache::PerformInitialLoad() subCache.isPrimary = false; subCache.path = subCachePath; + auto mappings = subCache.mappings.transient(); for (size_t j = 0; j < subCacheHeader.mappingCount; j++) { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), @@ -419,8 +431,9 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = subCacheMapping.fileOffset; mapRawToAddrAndSize.second.first = subCacheMapping.address; mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); + mappings.push_back(std::move(mapRawToAddrAndSize)); } + subCache.mappings = std::move(mappings).persistent(); if (subCacheHeader.mappingCount == 1 && subCacheHeader.imagesCountOld == 0 && subCacheHeader.imagesCount == 0 && subCacheHeader.imagesTextOffset == 0) @@ -433,10 +446,10 @@ void SharedCache::PerformInitialLoad() stubIslandRegion.size = size; stubIslandRegion.prettyName = subCacheFilename + "::_stubs"; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); + MutableState().stubIslandRegions = State().stubIslandRegions.push_back(std::move(stubIslandRegion)); } - MutableState().backingCaches.push_back(std::move(subCache)); + MutableState().backingCaches = State().backingCaches.push_back(std::move(subCache)); } break; } @@ -448,6 +461,7 @@ void SharedCache::PerformInitialLoad() cache.isPrimary = true; cache.path = path; + auto mappings = cache.mappings.transient(); for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); @@ -455,17 +469,19 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = mapping.fileOffset; mapRawToAddrAndSize.second.first = mapping.address; mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(std::move(mapRawToAddrAndSize)); + mappings.push_back(std::move(mapRawToAddrAndSize)); } - MutableState().backingCaches.push_back(std::move(cache)); + cache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; + auto imageStarts = State().imageStarts.transient(); for (size_t i = 0; i < primaryCacheHeader.imagesCount; i++) { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - MutableState().imageStarts[iname] = img.address; + imageStarts.set(iname, img.address); } if (primaryCacheHeader.branchPoolsCount) @@ -473,10 +489,11 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - MutableState().imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = - baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.set("dyld_shared_cache_branch_islands_" + std::to_string(i), + baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))); } } + MutableState().imageStarts = std::move(imageStarts).persistent(); std::string mainFileName = base_name(path); if (auto projectFile = m_dscView->GetFile()->GetProjectFile()) @@ -507,6 +524,7 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info subCacheMapping {}; + auto mappings = subCache.mappings.transient(); for (size_t j = 0; j < subCacheHeader.mappingCount; j++) { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), @@ -515,10 +533,10 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = subCacheMapping.fileOffset; mapRawToAddrAndSize.second.first = subCacheMapping.address; mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); + mappings.push_back(std::move(mapRawToAddrAndSize)); } - - MutableState().backingCaches.push_back(std::move(subCache)); + subCache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(subCache)); if (subCacheHeader.mappingCount == 1 && subCacheHeader.imagesCountOld == 0 && subCacheHeader.imagesCount == 0 && subCacheHeader.imagesTextOffset == 0) @@ -531,7 +549,7 @@ void SharedCache::PerformInitialLoad() stubIslandRegion.size = size; stubIslandRegion.prettyName = subCacheFilename + "::_stubs"; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); + MutableState().stubIslandRegions = State().stubIslandRegions.push_back(std::move(stubIslandRegion)); } } @@ -552,6 +570,7 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info subCacheMapping {}; BackingCache subCache; + mappings = subCache.mappings.transient(); for (size_t j = 0; j < subCacheHeader.mappingCount; j++) { @@ -561,10 +580,11 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = subCacheMapping.fileOffset; mapRawToAddrAndSize.second.first = subCacheMapping.address; mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); + mappings.push_back(std::move(mapRawToAddrAndSize)); } + subCache.mappings = std::move(mappings).persistent(); - MutableState().backingCaches.push_back(std::move(subCache)); + MutableState().backingCaches = State().backingCaches.push_back(std::move(subCache)); break; } case iOS16CacheFormat: @@ -575,6 +595,7 @@ void SharedCache::PerformInitialLoad() cache.isPrimary = true; cache.path = path; + auto mappings = cache.mappings.transient(); for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) { baseFile->Read(&mapping, primaryCacheHeader.mappingOffset + (i * sizeof(mapping)), sizeof(mapping)); @@ -582,18 +603,19 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = mapping.fileOffset; mapRawToAddrAndSize.second.first = mapping.address; mapRawToAddrAndSize.second.second = mapping.size; - cache.mappings.push_back(std::move(mapRawToAddrAndSize)); + mappings.push_back(std::move(mapRawToAddrAndSize)); } - - MutableState().backingCaches.push_back(std::move(cache)); + cache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(cache)); dyld_cache_image_info img {}; + auto imageStarts = State().imageStarts.transient(); for (size_t i = 0; i < primaryCacheHeader.imagesCount; i++) { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - MutableState().imageStarts[iname] = img.address; + imageStarts.set(iname, img.address); } if (primaryCacheHeader.branchPoolsCount) @@ -601,10 +623,11 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - MutableState().imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = - baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.set("dyld_shared_cache_branch_islands_" + std::to_string(i), + baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))); } } + MutableState().imageStarts = std::move(imageStarts).persistent(); std::string mainFileName = base_name(path); if (auto projectFile = m_dscView->GetFile()->GetProjectFile()) @@ -655,6 +678,7 @@ void SharedCache::PerformInitialLoad() BackingCache subCache; subCache.isPrimary = false; subCache.path = subCachePath; + auto mappings = subCache.mappings.transient(); for (size_t j = 0; j < subCacheHeader.mappingCount; j++) { @@ -665,7 +689,7 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = subCacheMapping.fileOffset; mapRawToAddrAndSize.second.first = subCacheMapping.address; mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); + mappings.push_back(std::move(mapRawToAddrAndSize)); if (subCachePath.find(".dylddata") != std::string::npos) { @@ -677,11 +701,12 @@ void SharedCache::PerformInitialLoad() dyldDataRegion.size = size; dyldDataRegion.prettyName = subCacheFilename + "::_data" + std::to_string(j); dyldDataRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable); - MutableState().dyldDataRegions.push_back(std::move(dyldDataRegion)); + MutableState().dyldDataRegions = State().dyldDataRegions.push_back(std::move(dyldDataRegion)); } } + subCache.mappings = std::move(mappings).persistent(); - MutableState().backingCaches.push_back(std::move(subCache)); + MutableState().backingCaches = State().backingCaches.push_back(std::move(subCache)); if (subCacheHeader.mappingCount == 1 && subCacheHeader.imagesCountOld == 0 && subCacheHeader.imagesCount == 0 && subCacheHeader.imagesTextOffset == 0) @@ -694,7 +719,7 @@ void SharedCache::PerformInitialLoad() stubIslandRegion.size = size; stubIslandRegion.prettyName = subCacheFilename + "::_stubs"; stubIslandRegion.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - MutableState().stubIslandRegions.push_back(std::move(stubIslandRegion)); + MutableState().stubIslandRegions = State().stubIslandRegions.push_back(std::move(stubIslandRegion)); } } @@ -717,7 +742,7 @@ void SharedCache::PerformInitialLoad() subCache.path = subCachePath; dyld_cache_mapping_info subCacheMapping {}; - + auto mappings = subCache.mappings.transient(); for (size_t j = 0; j < subCacheHeader.mappingCount; j++) { subCacheFile->Read(&subCacheMapping, subCacheHeader.mappingOffset + (j * sizeof(subCacheMapping)), @@ -726,10 +751,10 @@ void SharedCache::PerformInitialLoad() mapRawToAddrAndSize.first = subCacheMapping.fileOffset; mapRawToAddrAndSize.second.first = subCacheMapping.address; mapRawToAddrAndSize.second.second = subCacheMapping.size; - subCache.mappings.push_back(std::move(mapRawToAddrAndSize)); + mappings.push_back(std::move(mapRawToAddrAndSize)); } - - MutableState().backingCaches.push_back(std::move(subCache)); + subCache.mappings = std::move(mappings).persistent(); + MutableState().backingCaches = State().backingCaches.push_back(std::move(subCache)); } catch (...) {} @@ -749,6 +774,9 @@ void SharedCache::PerformInitialLoad() m_logger->LogError("Failed to map VM pages for Shared Cache on initial load, this is fatal."); return; } + + auto headers = State().headers.transient(); + auto images = State().images.transient(); for (const auto& start : State().imageStarts) { try { @@ -760,10 +788,11 @@ void SharedCache::PerformInitialLoad() auto mapping = vm->MappingAtAddress(imageHeader->linkeditSegment.vmaddr); imageHeader->exportTriePath = mapping.first.filePath; } - MutableState().headers[start.second] = imageHeader.value(); + headers.set(start.second, imageHeader.value()); CacheImage image; image.installName = start.first; image.headerLocation = start.second; + auto regions = image.regions.transient(); for (const auto& segment : imageHeader->segments) { char segName[17]; @@ -794,9 +823,10 @@ void SharedCache::PerformInitialLoad() flags |= SegmentExecutable; sectionRegion.flags = (BNSegmentFlag)flags; - image.regions.push_back(sectionRegion); + regions.push_back(sectionRegion); } - MutableState().images.push_back(image); + image.regions = std::move(regions).persistent(); + images.push_back(std::move(image)); } else { @@ -808,9 +838,12 @@ void SharedCache::PerformInitialLoad() m_logger->LogError("Failed to load Mach-O header for %s: %s", start.first.c_str(), ex.what()); } } + MutableState().headers = std::move(headers).persistent(); + MutableState().images = std::move(images).persistent(); m_logger->LogInfo("Loaded %d Mach-O headers", State().headers.size()); + auto nonImageRegions = State().nonImageRegions.transient(); for (const auto& cache : State().backingCaches) { size_t i = 0; @@ -822,14 +855,17 @@ void SharedCache::PerformInitialLoad() region.prettyName = base_name(cache.path) + "::" + std::to_string(i); // FIXME flags!!! BackingCache.mapping needs refactored to store this information! region.flags = (BNSegmentFlag)(BNSegmentFlag::SegmentReadable | BNSegmentFlag::SegmentExecutable); - MutableState().nonImageRegions.push_back(std::move(region)); + nonImageRegions.push_back(std::move(region)); i++; } } + MutableState().nonImageRegions = std::move(nonImageRegions).persistent(); // Iterate through each Mach-O header if (!State().dyldDataRegions.empty()) { + // Removal / insertion is not ergonomic with `immer::vector` so use std::vector for this instead. + std::vector dyldDataRegions(State().dyldDataRegions.begin(), State().dyldDataRegions.end()); for (const auto& [headerKey, header] : State().headers) { // Iterate through each segment of the header @@ -839,7 +875,7 @@ void SharedCache::PerformInitialLoad() uint64_t segmentEnd = segmentStart + segment.vmsize; // Iterate through each region in m_dyldDataRegions - for (auto it = State().dyldDataRegions.begin(); it != State().dyldDataRegions.end();) + for (auto it = dyldDataRegions.begin(); it != dyldDataRegions.end();) { uint64_t regionStart = it->start; uint64_t regionSize = it->size; @@ -872,12 +908,12 @@ void SharedCache::PerformInitialLoad() } // Erase the original region - it = MutableState().dyldDataRegions.erase(it); + it = dyldDataRegions.erase(it); // Insert the new regions (if any) for (const auto& newRegion : newRegions) { - it = MutableState().dyldDataRegions.insert(it, newRegion); + it = dyldDataRegions.insert(it, newRegion); ++it; // Move iterator to the next position } } @@ -888,11 +924,15 @@ void SharedCache::PerformInitialLoad() } } } + // TODO(bdash): Ideally this would move out of dyldDataRegions. + MutableState().dyldDataRegions = immer::vector(dyldDataRegions.begin(), dyldDataRegions.end()); } // Iterate through each Mach-O header if (!State().nonImageRegions.empty()) { + // Removal / insertion is not ergonomic with `immer::vector` so use std::vector for this instead. + std::vector nonImageRegions(State().nonImageRegions.begin(), State().nonImageRegions.end()); for (const auto& [headerKey, header] : State().headers) { // Iterate through each segment of the header @@ -902,7 +942,7 @@ void SharedCache::PerformInitialLoad() uint64_t segmentEnd = segmentStart + segment.vmsize; // Iterate through each region in m_dyldDataRegions - for (auto it = State().nonImageRegions.begin(); it != State().nonImageRegions.end();) + for (auto it = nonImageRegions.begin(); it != nonImageRegions.end();) { uint64_t regionStart = it->start; uint64_t regionSize = it->size; @@ -933,12 +973,12 @@ void SharedCache::PerformInitialLoad() } // Erase the original region - it = MutableState().nonImageRegions.erase(it); + it = nonImageRegions.erase(it); // Insert the new regions (if any) for (const auto& newRegion : newRegions) { - it = MutableState().nonImageRegions.insert(it, newRegion); + it = nonImageRegions.insert(it, newRegion); ++it; // Move iterator to the next position } } @@ -949,6 +989,8 @@ void SharedCache::PerformInitialLoad() } } } + // TODO(bdash): Ideally this would move out of nonImageRegions. + MutableState().nonImageRegions = immer::vector(nonImageRegions.begin(), nonImageRegions.end()); } SaveToDSCView(); @@ -1008,7 +1050,7 @@ void SharedCache::DeserializeFromRawView() m_metadataValid = true; WillMutateState(); MutableState().viewState = DSCViewStateUnloaded; - MutableState().images.clear(); // fixme ?? + MutableState().images = immer::vector(); } } @@ -1539,18 +1581,24 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } SharedCacheMachOHeader targetHeader; - CacheImage* targetImage = nullptr; - MemoryRegion* targetSegment = nullptr; + const CacheImage* targetImage = nullptr; + decltype(State().images.begin()) targetImageIt; + const MemoryRegion* targetSegment = nullptr; + decltype(CacheImage().regions.begin()) targetSegmentIt; - for (auto& image : MutableState().images) + for (auto imageIt = State().images.begin(); imageIt != State().images.end(); ++imageIt) { - for (auto& region : image.regions) + auto& image = *imageIt; + for (auto regionIt = image.regions.begin(); regionIt != image.regions.end(); ++regionIt) { + auto& region = *regionIt; if (region.start <= address && region.start + region.size > address) { targetHeader = MutableState().headers[image.headerLocation]; targetImage = ℑ + targetImageIt = imageIt; targetSegment = ®ion; + targetSegmentIt = regionIt; break; } } @@ -1559,8 +1607,9 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } if (!targetSegment) { - for (auto& stubIsland : MutableState().stubIslandRegions) + for (auto it = State().stubIslandRegions.begin(); it != State().stubIslandRegions.end(); ++it) { + auto& stubIsland = *it; if (stubIsland.start <= address && stubIsland.start + stubIsland.size > address) { if (stubIsland.loaded) @@ -1584,11 +1633,11 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) m_dscView->AddUserSection(name, stubIsland.start, stubIsland.size, ReadOnlyCodeSectionSemantics); m_dscView->WriteBuffer(stubIsland.start, buff); - stubIsland.loaded = true; - - stubIsland.rawViewOffsetIfLoaded = rawViewEnd; - - MutableState().regionsMappedIntoMemory.push_back(stubIsland); + MemoryRegion newStubIsland(stubIsland); + newStubIsland.loaded = true; + newStubIsland.rawViewOffsetIfLoaded = rawViewEnd; + MutableState().regionsMappedIntoMemory = State().regionsMappedIntoMemory.push_back(newStubIsland); + MutableState().stubIslandRegions = State().stubIslandRegions.set(it.index(), std::move(newStubIsland)); SaveToDSCView(); @@ -1599,8 +1648,9 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } } - for (auto& dyldData : MutableState().dyldDataRegions) + for (auto it = State().dyldDataRegions.begin(); it != State().dyldDataRegions.end(); ++it) { + auto& dyldData = *it; if (dyldData.start <= address && dyldData.start + dyldData.size > address) { if (dyldData.loaded) @@ -1624,10 +1674,11 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) m_dscView->AddUserSection(name, dyldData.start, dyldData.size, ReadOnlyDataSectionSemantics); m_dscView->WriteBuffer(dyldData.start, buff); - dyldData.loaded = true; - dyldData.rawViewOffsetIfLoaded = rawViewEnd; - - MutableState().regionsMappedIntoMemory.push_back(dyldData); + MemoryRegion newDyldData(dyldData); + newDyldData.loaded = true; + newDyldData.rawViewOffsetIfLoaded = rawViewEnd; + MutableState().regionsMappedIntoMemory = State().regionsMappedIntoMemory.push_back(newDyldData); + MutableState().dyldDataRegions = State().dyldDataRegions.set(it.index(), std::move(newDyldData)); SaveToDSCView(); @@ -1638,8 +1689,9 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) } } - for (auto& region : MutableState().nonImageRegions) + for (auto it = State().nonImageRegions.begin(); it != State().nonImageRegions.end(); ++it) { + auto& region = *it; if (region.start <= address && region.start + region.size > address) { if (region.loaded) @@ -1662,10 +1714,11 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) m_dscView->AddUserSection(name, region.start, region.size, ReadOnlyCodeSectionSemantics); m_dscView->WriteBuffer(region.start, buff); - region.loaded = true; - region.rawViewOffsetIfLoaded = rawViewEnd; - - MutableState().regionsMappedIntoMemory.push_back(region); + MemoryRegion newRegion(region); + newRegion.loaded = true; + newRegion.rawViewOffsetIfLoaded = rawViewEnd; + MutableState().regionsMappedIntoMemory = State().regionsMappedIntoMemory.push_back(newRegion); + MutableState().nonImageRegions = State().nonImageRegions.set(it.index(), std::move(newRegion)); SaveToDSCView(); @@ -1698,16 +1751,31 @@ bool SharedCache::LoadSectionAtAddress(uint64_t address) targetSegment->start, targetSegment->size, rawViewEnd, targetSegment->size, targetSegment->flags); m_dscView->WriteBuffer(targetSegment->start, buff); - targetSegment->loaded = true; - targetSegment->rawViewOffsetIfLoaded = rawViewEnd; + MemoryRegion newTargetSegment(*targetSegment); + newTargetSegment.loaded = true; + newTargetSegment.rawViewOffsetIfLoaded = rawViewEnd; + MutableState().regionsMappedIntoMemory = State().regionsMappedIntoMemory.push_back(newTargetSegment); - MutableState().regionsMappedIntoMemory.push_back(*targetSegment); + auto images = State().images; + auto regions = images[targetImageIt.index()].regions; + CacheImage newTargetImage(*targetImage); + newTargetImage.regions = regions.set(targetSegmentIt.index(), std::move(newTargetSegment)); + MutableState().images = images.set(targetImageIt.index(), std::move(newTargetImage)); SaveToDSCView(); if (!targetSegment->headerInitialized) { + targetSegment = &State().images[targetImageIt.index()].regions[targetSegmentIt.index()]; SharedCache::InitializeHeader(m_dscView, vm.get(), targetHeader, {targetSegment}); + + MemoryRegion newTargetSegment(*targetSegment); + newTargetSegment.headerInitialized = true; + auto images = State().images; + auto regions = images[targetImageIt.index()].regions; + CacheImage newTargetImage(*targetImage); + newTargetImage.regions = regions.set(targetSegmentIt.index(), std::move(newTargetSegment)); + MutableState().images = images.set(targetImageIt.index(), std::move(newTargetImage)); } m_dscView->AddAnalysisOption("linearsweep"); @@ -1730,22 +1798,25 @@ bool SharedCache::LoadImageWithInstallName(std::string installName) m_logger->LogInfo("Loading image %s", installName.c_str()); auto vm = GetVMMap(); - CacheImage* targetImage = nullptr; + const CacheImage* targetImage = nullptr; + decltype(State().images.begin()) targetImageIt; - for (auto& cacheImage : MutableState().images) + for (auto it = State().images.begin(); it != State().images.end(); ++it) { - if (cacheImage.installName == installName) + if (it->installName == installName) { - targetImage = &cacheImage; + targetImage = &*it; + targetImageIt = it; break; } } + auto it = State().headers.find(targetImage->headerLocation); - if (it == State().headers.end()) + if (!it) { return false; } - const auto& header = it->second; + const auto& header = *it; auto id = m_dscView->BeginUndoActions(); MutableState().viewState = DSCViewStateLoadedWithImages; @@ -1753,10 +1824,13 @@ bool SharedCache::LoadImageWithInstallName(std::string installName) auto reader = VMReader(vm); reader.Seek(targetImage->headerLocation); - std::vector regionsToLoad; + std::vector regionsToLoad; - for (auto& region : targetImage->regions) + auto newTargetImageRegions = targetImage->regions.transient(); + auto newRegionsMappedIntoMemory = State().regionsMappedIntoMemory.transient(); + for (auto it = targetImage->regions.begin(); it != targetImage->regions.end(); ++it) { + auto& region = *it; bool allowLoadingLinkedit = false; if (settings && settings->Contains("loader.dsc.allowLoadingLinkeditSegments")) allowLoadingLinkedit = settings->Get("loader.dsc.allowLoadingLinkeditSegments", m_dscView); @@ -1778,16 +1852,16 @@ bool SharedCache::LoadImageWithInstallName(std::string installName) m_dscView->GetParentView()->GetParentView()->WriteBuffer(rawViewEnd, buff); m_dscView->GetParentView()->WriteBuffer(rawViewEnd, buff); - region.loaded = true; - region.rawViewOffsetIfLoaded = rawViewEnd; - - MutableState().regionsMappedIntoMemory.push_back(region); + MemoryRegion newRegion(region); + newRegion.loaded = true; + newRegion.rawViewOffsetIfLoaded = rawViewEnd; + newRegionsMappedIntoMemory.push_back(newRegion); + newTargetImageRegions.set(it.index(), std::move(newRegion)); + regionsToLoad.push_back(it.index()); m_dscView->GetParentView()->AddAutoSegment(rawViewEnd, region.size, rawViewEnd, region.size, region.flags); m_dscView->AddUserSegment(region.start, region.size, rawViewEnd, region.size, region.flags); m_dscView->WriteBuffer(region.start, buff); - - regionsToLoad.push_back(®ion); } if (regionsToLoad.empty()) @@ -1796,6 +1870,13 @@ bool SharedCache::LoadImageWithInstallName(std::string installName) return false; } + MutableState().regionsMappedIntoMemory = std::move(newRegionsMappedIntoMemory).persistent(); + auto images = State().images; + CacheImage newTargetImage(*targetImage); + // newTargetImageRegions is intentionally not moved here as it is used again below. + newTargetImage.regions = newTargetImageRegions.persistent(); + MutableState().images = images.set(targetImageIt.index(), std::move(newTargetImage)); + std::unique_lock typelibLock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].typeLibraryLookupAndApplicationMutex); auto typeLib = m_dscView->GetTypeLibrary(header.installName); @@ -1820,14 +1901,25 @@ bool SharedCache::LoadImageWithInstallName(std::string installName) return false; } - std::vector regions; - for (auto& region : regionsToLoad) - { - regions.push_back(region); + std::vector regions; + for (size_t idx : regionsToLoad) { + regions.push_back(&newTargetImageRegions[idx]); } SharedCache::InitializeHeader(m_dscView, vm.get(), *h, regions); + { + for (size_t idx : regionsToLoad) { + MemoryRegion newTargetSegment(newTargetImageRegions[idx]); + newTargetSegment.headerInitialized = true; + newTargetImageRegions.set(idx, std::move(newTargetSegment)); + } + auto images = State().images; + CacheImage newTargetImage(*targetImage); + newTargetImage.regions = std::move(newTargetImageRegions).persistent(); + MutableState().images = images.set(targetImageIt.index(), std::move(newTargetImage)); + } + try { auto objc = std::make_unique(m_dscView, this, false); @@ -1860,9 +1952,96 @@ bool SharedCache::LoadImageWithInstallName(std::string installName) return true; } +struct TransientSharedCacheMachOHeader +{ + uint64_t textBase = 0; + uint64_t loadCommandOffset = 0; + mach_header_64 ident {}; + std::string identifierPrefix; + std::string installName; + + immer::vector_transient> entryPoints; + immer::vector_transient m_entryPoints; // list of entrypoints + + symtab_command symtab {}; + dysymtab_command dysymtab {}; + dyld_info_command dyldInfo {}; + routines_command_64 routines64 {}; + function_starts_command functionStarts {}; + immer::vector_transient moduleInitSections; + linkedit_data_command exportTrie {}; + linkedit_data_command chainedFixups {}; + + uint64_t relocationBase = 0; + // Section and program headers, internally use 64-bit form as it is a superset of 32-bit + immer::vector_transient segments; // only three types of sections __TEXT, __DATA, __IMPORT + segment_command_64 linkeditSegment = {}; + immer::vector_transient sections; + immer::vector_transient sectionNames; + + immer::vector_transient symbolStubSections; + immer::vector_transient symbolPointerSections; + + immer::vector_transient dylibs; + + build_version_command buildVersion = {}; + immer::vector_transient buildToolVersions; + + std::string exportTriePath; + + bool linkeditPresent = false; + bool dysymPresent = false; + bool dyldInfoPresent = false; + bool exportTriePresent = false; + bool chainedFixupsPresent = false; + bool routinesPresent = false; + bool functionStartsPresent = false; + bool relocatable = false; + + SharedCacheMachOHeader persistent() && { + return SharedCacheMachOHeader { + .textBase = textBase, + .loadCommandOffset = loadCommandOffset, + .ident = ident, + .identifierPrefix = std::move(identifierPrefix), + .installName = std::move(installName), + .entryPoints = std::move(entryPoints).persistent(), + .m_entryPoints = std::move(m_entryPoints).persistent(), + .symtab = std::move(symtab), + .dysymtab = std::move(dysymtab), + .dyldInfo = std::move(dyldInfo), + .routines64 = std::move(routines64), + .functionStarts = std::move(functionStarts), + .moduleInitSections = std::move(moduleInitSections).persistent(), + .exportTrie = std::move(exportTrie), + .chainedFixups = std::move(chainedFixups), + .relocationBase = relocationBase, + .segments = std::move(segments).persistent(), + .linkeditSegment = std::move(linkeditSegment), + .sections = std::move(sections).persistent(), + .sectionNames = std::move(sectionNames).persistent(), + .symbolStubSections = std::move(symbolStubSections).persistent(), + .symbolPointerSections = std::move(symbolPointerSections).persistent(), + .dylibs = std::move(dylibs).persistent(), + .buildVersion = std::move(buildVersion), + .buildToolVersions = std::move(buildToolVersions).persistent(), + .exportTriePath = std::move(exportTriePath), + .linkeditPresent = linkeditPresent, + .dysymPresent = dysymPresent, + .dyldInfoPresent = dyldInfoPresent, + .exportTriePresent = exportTriePresent, + .chainedFixupsPresent = chainedFixupsPresent, + .routinesPresent = routinesPresent, + .functionStartsPresent = functionStartsPresent, + .relocatable = relocatable, + }; + } +}; + + std::optional SharedCache::LoadHeaderForAddress(std::shared_ptr vm, uint64_t address, std::string installName) { - SharedCacheMachOHeader header; + TransientSharedCacheMachOHeader header; header.textBase = address; header.installName = installName; @@ -2265,11 +2444,11 @@ std::optional SharedCache::LoadHeaderForAddress(std::sha return {}; } - return header; + return std::move(header).persistent(); } void SharedCache::InitializeHeader( - Ref view, VM* vm, SharedCacheMachOHeader header, std::vector regionsToLoad) + Ref view, VM* vm, SharedCacheMachOHeader header, const std::vector regionsToLoad) { WillMutateState(); @@ -2574,7 +2753,7 @@ void SharedCache::InitializeHeader( nlist_64 sym; memset(&sym, 0, sizeof(sym)); auto N_TYPE = 0xE; // idk - std::vector>> symbolInfos; + immer::vector_transient>> symbolInfos; for (size_t i = 0; i < header.symtab.nsyms; i++) { reader->Read(&sym, header.symtab.symoff + i * sizeof(nlist_64), sizeof(nlist_64)); @@ -2641,13 +2820,13 @@ void SharedCache::InitializeHeader( view->DefineAutoSymbol(symbolObj); symbolInfos.push_back({sym.n_value, {type, symbol}}); } - MutableState().symbolInfos[header.textBase] = symbolInfos; + MutableState().symbolInfos = State().symbolInfos.set(header.textBase, std::move(symbolInfos).persistent()); } if (header.exportTriePresent && header.linkeditPresent && vm->AddressIsMapped(header.linkeditSegment.vmaddr)) { auto symbols = SharedCache::ParseExportTrie(vm->MappingAtAddress(header.linkeditSegment.vmaddr).first.fileAccessor->lock(), header); - std::vector>> exportMapping; + immer::vector_transient>> exportMapping; for (const auto& symbol : symbols) { exportMapping.push_back({symbol->GetAddress(), {symbol->GetType(), symbol->GetRawName()}}); @@ -2687,14 +2866,15 @@ void SharedCache::InitializeHeader( else view->DefineAutoSymbol(symbol); } - MutableState().exportInfos[header.textBase] = std::move(exportMapping); + MutableState().exportInfos = State().exportInfos.set(header.textBase, std::move(exportMapping).persistent()); } view->EndBulkModifySymbols(); - for (auto region : regionsToLoad) - { - region->headerInitialized = true; - } + // TODO: The caller is responsible for this for now. + // for (auto region : regionsToLoad) + // { + // region->headerInitialized = true; + // } } struct ExportNode @@ -2708,6 +2888,7 @@ struct ExportNode void SharedCache::ReadExportNode(std::vector>& symbolList, SharedCacheMachOHeader& header, DataBuffer& buffer, uint64_t textBase, const std::string& currentText, size_t cursor, uint32_t endGuard) { + WillMutateState(); if (cursor > endGuard) throw ReadException(); @@ -2809,6 +2990,7 @@ std::vector>> SharedCache::LoadAllSymbolsAndW std::unique_lock initialLoadBlock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); std::vector>> symbols; + auto newExportInfos = State().exportInfos.transient(); for (const auto& img : State().images) { auto header = HeaderForAddress(img.headerLocation); @@ -2822,14 +3004,15 @@ std::vector>> SharedCache::LoadAllSymbolsAndW continue; } auto exportList = SharedCache::ParseExportTrie(mapping, *header); - std::vector>> exportMapping; + immer::vector_transient>> exportMapping; for (const auto& sym : exportList) { exportMapping.push_back({sym->GetAddress(), {sym->GetType(), sym->GetRawName()}}); symbols.push_back({img.installName, sym}); } - MutableState().exportInfos[header->textBase] = std::move(exportMapping); + newExportInfos.set(header->textBase, std::move(exportMapping).persistent()); } + MutableState().exportInfos = std::move(newExportInfos).persistent(); SaveToDSCView(); @@ -2850,9 +3033,9 @@ std::string SharedCache::SerializedImageHeaderForAddress(uint64_t address) std::string SharedCache::SerializedImageHeaderForName(std::string name) { - if (auto it = State().imageStarts.find(name); it != State().imageStarts.end()) + if (auto it = State().imageStarts.find(name)) { - if (auto header = HeaderForAddress(it->second)) + if (auto header = HeaderForAddress(*it)) { return header->AsString(); } @@ -2903,7 +3086,7 @@ void SharedCache::FindSymbolAtAddrAndApplyToAddr( return; } auto exportList = SharedCache::ParseExportTrie(mapping, *header); - std::vector>> exportMapping; + immer::vector_transient>> exportMapping; std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].typeLibraryLookupAndApplicationMutex); auto typeLib = m_dscView->GetTypeLibrary(header->installName); if (!typeLib) @@ -2952,7 +3135,7 @@ void SharedCache::FindSymbolAtAddrAndApplyToAddr( } { std::unique_lock _lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); - MutableState().exportInfos[header->textBase] = std::move(exportMapping); + MutableState().exportInfos = State().exportInfos.set(header->textBase, std::move(exportMapping).persistent()); } m_dscView->EndBulkModifySymbols(); m_dscView->ForgetUndoActions(id); @@ -2984,7 +3167,8 @@ bool SharedCache::SaveToDSCView() } return false; } -std::vector SharedCache::GetMappedRegions() const + +immer::vector SharedCache::GetMappedRegions() const { std::unique_lock lock(viewSpecificMutexes[m_dscView->GetFile()->GetSessionId()].viewOperationsThatInfluenceMetadataMutex); return State().regionsMappedIntoMemory; @@ -3406,81 +3590,98 @@ void SharedCache::Load(DeserializationContext& context) MutableState().viewState = static_cast(context.load("m_viewState")); MutableState().cacheFormat = static_cast(context.load("m_cacheFormat")); + auto headers = State().headers.transient(); for (auto& startAndHeader : context.doc["headers"].GetArray()) { SharedCacheMachOHeader header; header.LoadFromValue(startAndHeader); - MutableState().headers[header.textBase] = std::move(header); + headers.set(header.textBase, std::move(header)); } + MutableState().headers = std::move(headers).persistent(); Deserialize(context, "m_imageStarts", MutableState().imageStarts); Deserialize(context, "m_baseFilePath", MutableState().baseFilePath); + auto exportInfos = State().exportInfos.transient(); for (const auto& obj1 : context.doc["exportInfos"].GetArray()) { - std::vector>> innerVec; + immer::vector_transient>> innerVec; for (const auto& obj2 : obj1["value"].GetArray()) { std::pair innerPair = { (BNSymbolType)obj2["val1"].GetUint64(), obj2["val2"].GetString()}; innerVec.push_back({obj2["key"].GetUint64(), innerPair}); } - - MutableState().exportInfos[obj1["key"].GetUint64()] = std::move(innerVec); + exportInfos.set(obj1["key"].GetUint64(), std::move(innerVec).persistent()); } + MutableState().exportInfos = std::move(exportInfos).persistent(); + auto symbolInfos = State().symbolInfos.transient(); for (auto& symbolInfo : context.doc["symbolInfos"].GetArray()) { - std::vector>> symbolInfoVec; + immer::vector_transient>> symbolInfoVec; for (auto& symbolInfoPair : symbolInfo.GetArray()) { symbolInfoVec.push_back({symbolInfoPair[0].GetUint64(), {(BNSymbolType)symbolInfoPair[1].GetUint(), symbolInfoPair[2].GetString()}}); } - MutableState().symbolInfos[symbolInfo[0].GetUint64()] = std::move(symbolInfoVec); + symbolInfos.set(symbolInfo[0].GetUint64(), std::move(symbolInfoVec).persistent()); } + MutableState().symbolInfos = std::move(symbolInfos).persistent(); + auto backingCaches = State().backingCaches.transient(); for (auto& bcV : context.doc["backingCaches"].GetArray()) { BackingCache bc; bc.LoadFromValue(bcV); - MutableState().backingCaches.push_back(std::move(bc)); + backingCaches.push_back(std::move(bc)); } + MutableState().backingCaches = std::move(backingCaches).persistent(); + auto images = State().images.transient(); for (auto& imgV : context.doc["images"].GetArray()) { CacheImage img; img.LoadFromValue(imgV); - MutableState().images.push_back(std::move(img)); + images.push_back(std::move(img)); } + MutableState().images = std::move(images).persistent(); + auto regionsMappedIntoMemory = State().regionsMappedIntoMemory.transient(); for (auto& rV : context.doc["regionsMappedIntoMemory"].GetArray()) { MemoryRegion r; r.LoadFromValue(rV); - MutableState().regionsMappedIntoMemory.push_back(std::move(r)); + regionsMappedIntoMemory.push_back(std::move(r)); } + MutableState().regionsMappedIntoMemory = std::move(regionsMappedIntoMemory).persistent(); + auto stubIslandRegions = State().stubIslandRegions.transient(); for (auto& siV : context.doc["stubIslands"].GetArray()) { MemoryRegion si; si.LoadFromValue(siV); - MutableState().stubIslandRegions.push_back(std::move(si)); + stubIslandRegions.push_back(std::move(si)); } + MutableState().stubIslandRegions = std::move(stubIslandRegions).persistent(); + auto dyldDataRegions = State().dyldDataRegions.transient(); for (auto& siV : context.doc["dyldDataSections"].GetArray()) { MemoryRegion si; si.LoadFromValue(siV); - MutableState().dyldDataRegions.push_back(std::move(si)); + dyldDataRegions.push_back(std::move(si)); } + MutableState().dyldDataRegions = std::move(dyldDataRegions).persistent(); + auto nonImageRegions = State().nonImageRegions.transient(); for (auto& siV : context.doc["nonImageRegions"].GetArray()) { MemoryRegion si; si.LoadFromValue(siV); - MutableState().nonImageRegions.push_back(std::move(si)); + nonImageRegions.push_back(std::move(si)); } + MutableState().nonImageRegions = std::move(nonImageRegions).persistent(); m_metadataValid = true; } @@ -3507,7 +3708,7 @@ void SharedCache::WillMutateState() } -const std::vector& SharedCache::BackingCaches() const +const immer::vector& SharedCache::BackingCaches() const { return State().backingCaches; } @@ -3517,12 +3718,12 @@ DSCViewState SharedCache::ViewState() const return State().viewState; } -const std::unordered_map& SharedCache::AllImageStarts() const +const immer::map& SharedCache::AllImageStarts() const { return State().imageStarts; } -const std::unordered_map& SharedCache::AllImageHeaders() const +const immer::map& SharedCache::AllImageHeaders() const { return State().headers; } diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 49566d6008..5ed04110c8 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -8,6 +8,10 @@ #include "view/macho/machoview.h" #include "MetadataSerializable.hpp" #include "../api/sharedcachecore.h" +#include "immer/map.hpp" +#include "immer/vector.hpp" +#include "immer/vector_transient.hpp" +#include "immer/map_transient.hpp" #ifndef SHAREDCACHE_SHAREDCACHE_H #define SHAREDCACHE_SHAREDCACHE_H @@ -61,7 +65,7 @@ namespace SharedCacheCore { { std::string installName; uint64_t headerLocation; - std::vector regions; + immer::vector regions; void Store(SerializationContext& context) const { @@ -81,13 +85,14 @@ namespace SharedCacheCore { MSL(installName); MSL(headerLocation); auto bArr = context.doc["regions"].GetArray(); - regions.clear(); + auto local_regions = immer::vector_transient(); for (auto& region : bArr) { MemoryRegion r; r.LoadFromString(region.GetString()); - regions.push_back(r); + local_regions.push_back(r); } + regions = local_regions.persistent(); } }; @@ -95,7 +100,7 @@ namespace SharedCacheCore { { std::string path; bool isPrimary = false; - std::vector>> mappings; + immer::vector>> mappings; void Store(SerializationContext& context) const { @@ -361,36 +366,36 @@ namespace SharedCacheCore { { uint64_t textBase = 0; uint64_t loadCommandOffset = 0; - mach_header_64 ident; + mach_header_64 ident {}; std::string identifierPrefix; std::string installName; - std::vector> entryPoints; - std::vector m_entryPoints; // list of entrypoints + immer::vector> entryPoints; + immer::vector m_entryPoints; // list of entrypoints - symtab_command symtab; - dysymtab_command dysymtab; - dyld_info_command dyldInfo; - routines_command_64 routines64; - function_starts_command functionStarts; - std::vector moduleInitSections; - linkedit_data_command exportTrie; + symtab_command symtab {}; + dysymtab_command dysymtab {}; + dyld_info_command dyldInfo {}; + routines_command_64 routines64 {}; + function_starts_command functionStarts {}; + immer::vector moduleInitSections; + linkedit_data_command exportTrie {}; linkedit_data_command chainedFixups {}; uint64_t relocationBase; // Section and program headers, internally use 64-bit form as it is a superset of 32-bit - std::vector segments; // only three types of sections __TEXT, __DATA, __IMPORT - segment_command_64 linkeditSegment; - std::vector sections; - std::vector sectionNames; + immer::vector segments; // only three types of sections __TEXT, __DATA, __IMPORT + segment_command_64 linkeditSegment {}; + immer::vector sections; + immer::vector sectionNames; - std::vector symbolStubSections; - std::vector symbolPointerSections; + immer::vector symbolStubSections; + immer::vector symbolPointerSections; - std::vector dylibs; + immer::vector dylibs; - build_version_command buildVersion; - std::vector buildToolVersions; + build_version_command buildVersion {}; + immer::vector buildToolVersions; std::string exportTriePath; @@ -572,19 +577,19 @@ namespace SharedCacheCore { std::string ImageNameForAddress(uint64_t address); std::vector GetAvailableImages(); - std::vector GetMappedRegions() const; + immer::vector GetMappedRegions() const; std::vector>> LoadAllSymbolsAndWait(); - const std::unordered_map& AllImageStarts() const; - const std::unordered_map& AllImageHeaders() const; + const immer::map& AllImageStarts() const; + const immer::map& AllImageHeaders() const; std::string SerializedImageHeaderForAddress(uint64_t address); std::string SerializedImageHeaderForName(std::string name); void FindSymbolAtAddrAndApplyToAddr(uint64_t symbolLocation, uint64_t targetLocation, bool triggerReanalysis); - const std::vector& BackingCaches() const; + const immer::vector& BackingCaches() const; DSCViewState ViewState() const; @@ -594,7 +599,7 @@ namespace SharedCacheCore { std::optional LoadHeaderForAddress( std::shared_ptr vm, uint64_t address, std::string installName); void InitializeHeader( - Ref view, VM* vm, SharedCacheMachOHeader header, std::vector regionsToLoad); + Ref view, VM* vm, SharedCacheMachOHeader header, const std::vector regionsToLoad); void ReadExportNode(std::vector>& symbolList, SharedCacheMachOHeader& header, DataBuffer& buffer, uint64_t textBase, const std::string& currentText, size_t cursor, uint32_t endGuard); std::vector> ParseExportTrie( diff --git a/view/sharedcache/workflow/CMakeLists.txt b/view/sharedcache/workflow/CMakeLists.txt index 7a9c157aa4..2310abe633 100644 --- a/view/sharedcache/workflow/CMakeLists.txt +++ b/view/sharedcache/workflow/CMakeLists.txt @@ -65,7 +65,7 @@ message(STATUS "RCD: ${BN_REF_COUNT_DEBUG}") get_recursive_include_dirs(binaryninjaapi INCLUDES) target_include_directories(sharedcacheworkflow - PUBLIC ${PROJECT_SOURCE_DIR} ${INCLUDES}) + PUBLIC ${PROJECT_SOURCE_DIR} ${INCLUDES} ${BN_API_PATH}/vendor/immer) set_target_properties(sharedcacheworkflow PROPERTIES CXX_STANDARD 17 From cff089fb7a068f51ce95950c43c6c01860176d2e Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Mon, 18 Nov 2024 20:49:31 +0000 Subject: [PATCH 5/7] Fix serialization deserialization type mismatch for small integers --- view/sharedcache/core/MetadataSerializable.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/view/sharedcache/core/MetadataSerializable.cpp b/view/sharedcache/core/MetadataSerializable.cpp index 65c380b5f6..0101c956c1 100644 --- a/view/sharedcache/core/MetadataSerializable.cpp +++ b/view/sharedcache/core/MetadataSerializable.cpp @@ -53,17 +53,17 @@ void Deserialize(DeserializationContext& context, std::string_view name, bool& b void Deserialize(DeserializationContext& context, std::string_view name, uint8_t& b) { - b = static_cast(context.doc[name.data()].GetUint64()); + b = static_cast(context.doc[name.data()].GetUint()); } void Deserialize(DeserializationContext& context, std::string_view name, uint16_t& b) { - b = static_cast(context.doc[name.data()].GetUint64()); + b = static_cast(context.doc[name.data()].GetUint()); } void Deserialize(DeserializationContext& context, std::string_view name, uint32_t& b) { - b = static_cast(context.doc[name.data()].GetUint64()); + b = static_cast(context.doc[name.data()].GetUint()); } void Deserialize(DeserializationContext& context, std::string_view name, uint64_t& b) @@ -73,12 +73,12 @@ void Deserialize(DeserializationContext& context, std::string_view name, uint64_ void Deserialize(DeserializationContext& context, std::string_view name, int8_t& b) { - b = context.doc[name.data()].GetInt64(); + b = context.doc[name.data()].GetInt(); } void Deserialize(DeserializationContext& context, std::string_view name, int16_t& b) { - b = context.doc[name.data()].GetInt64(); + b = context.doc[name.data()].GetInt(); } void Deserialize(DeserializationContext& context, std::string_view name, int32_t& b) From 0058ea0605ddc134af1f74a78706450a224ce774 Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Mon, 18 Nov 2024 20:52:36 +0000 Subject: [PATCH 6/7] Improve deserialization typing and fix some bugs There was an issue when deserializing a `mach_header_64` where `cpusubtype` (and another field I think) were being deserialized to the wrong integer type and causing an assertion failure. I've added a macro to avoid these simple issues in the future. Additionally there was another bug with deserializing `std::pair>` where it was not being serialized in an expected way. The deserialization has been corrected and the unused serialization function for it has been removed. --- .../sharedcache/core/MetadataSerializable.cpp | 259 +++++++++--------- 1 file changed, 127 insertions(+), 132 deletions(-) diff --git a/view/sharedcache/core/MetadataSerializable.cpp b/view/sharedcache/core/MetadataSerializable.cpp index 0101c956c1..0acef32d29 100644 --- a/view/sharedcache/core/MetadataSerializable.cpp +++ b/view/sharedcache/core/MetadataSerializable.cpp @@ -2,6 +2,9 @@ namespace SharedCacheCore { +#define DESERIALIZE_FIELD(field, container) \ + field = container.Get(); + void Serialize(SerializationContext& context, std::string_view str) { context.writer.String(str.data(), str.length()); } @@ -180,24 +183,14 @@ void Deserialize(DeserializationContext& context, std::string_view name, immer:: b = std::move(transient).persistent(); } -// Note: This flattens the pair into [first, second.first, second.second] with no nested arrays. -void Serialize(SerializationContext& context, const std::pair>& value) -{ - context.writer.StartArray(); - Serialize(context, value.first); - Serialize(context, value.second.first); - Serialize(context, value.second.second); - context.writer.EndArray(); -} - void Deserialize(DeserializationContext& context, std::string_view name, std::vector>>& b) { for (auto& i : context.doc[name.data()].GetArray()) { std::pair> j; - j.first = i.GetArray()[0].GetUint64(); - j.second.first = i.GetArray()[1].GetUint64(); - j.second.second = i.GetArray()[2].GetUint64(); + DESERIALIZE_FIELD(j.first, i.GetArray()[0]) + DESERIALIZE_FIELD(j.second.first, i.GetArray()[1].GetArray()[0]) + DESERIALIZE_FIELD(j.second.second, i.GetArray()[1].GetArray()[1]) b.push_back(j); } } @@ -207,8 +200,8 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve for (auto& i : context.doc[name.data()].GetArray()) { std::pair j; - j.first = i.GetArray()[0].GetUint64(); - j.second = i.GetArray()[1].GetBool(); + DESERIALIZE_FIELD(j.first, i.GetArray()[0]) + DESERIALIZE_FIELD(j.second, i.GetArray()[1]) b.push_back(j); } } @@ -257,9 +250,9 @@ void Deserialize(DeserializationContext& context, std::string_view name, immer:: for (auto& i : context.doc[name.data()].GetArray()) { std::pair> j; - j.first = i.GetArray()[0].GetUint64(); - j.second.first = i.GetArray()[1].GetUint64(); - j.second.second = i.GetArray()[2].GetUint64(); + DESERIALIZE_FIELD(j.first, i.GetArray()[0]) + DESERIALIZE_FIELD(j.second.first, i.GetArray()[1].GetArray()[0]) + DESERIALIZE_FIELD(j.second.second, i.GetArray()[1].GetArray()[1]) transient.push_back(j); } b = std::move(transient).persistent(); @@ -271,8 +264,8 @@ void Deserialize(DeserializationContext& context, std::string_view name, immer:: for (auto& i : context.doc[name.data()].GetArray()) { std::pair j; - j.first = i.GetArray()[0].GetUint64(); - j.second = i.GetArray()[1].GetBool(); + DESERIALIZE_FIELD(j.first, i.GetArray()[0]) + DESERIALIZE_FIELD(j.second, i.GetArray()[1]) transient.push_back(j); } b = std::move(transient).persistent(); @@ -330,14 +323,14 @@ void Serialize(SerializationContext& context, const mach_header_64& value) { void Deserialize(DeserializationContext& context, std::string_view name, mach_header_64& b) { auto bArr = context.doc[name.data()].GetArray(); - b.magic = bArr[0].GetUint(); - b.cputype = bArr[1].GetUint(); - b.cpusubtype = bArr[2].GetUint(); - b.filetype = bArr[3].GetUint(); - b.ncmds = bArr[4].GetUint(); - b.sizeofcmds = bArr[5].GetUint(); - b.flags = bArr[6].GetUint(); - b.reserved = bArr[7].GetUint(); + DESERIALIZE_FIELD(b.magic, bArr[0]) + DESERIALIZE_FIELD(b.cputype, bArr[1]) + DESERIALIZE_FIELD(b.cpusubtype, bArr[2]) + DESERIALIZE_FIELD(b.filetype, bArr[3]) + DESERIALIZE_FIELD(b.ncmds, bArr[4]) + DESERIALIZE_FIELD(b.sizeofcmds, bArr[5]) + DESERIALIZE_FIELD(b.flags, bArr[6]) + DESERIALIZE_FIELD(b.reserved, bArr[7]) } void Serialize(SerializationContext& context, const symtab_command& value) @@ -355,12 +348,12 @@ void Serialize(SerializationContext& context, const symtab_command& value) void Deserialize(DeserializationContext& context, std::string_view name, symtab_command& b) { auto bArr = context.doc[name.data()].GetArray(); - b.cmd = bArr[0].GetUint(); - b.cmdsize = bArr[1].GetUint(); - b.symoff = bArr[2].GetUint(); - b.nsyms = bArr[3].GetUint(); - b.stroff = bArr[4].GetUint(); - b.strsize = bArr[5].GetUint(); + DESERIALIZE_FIELD(b.cmd, bArr[0]) + DESERIALIZE_FIELD(b.cmdsize, bArr[1]) + DESERIALIZE_FIELD(b.symoff, bArr[2]) + DESERIALIZE_FIELD(b.nsyms, bArr[3]) + DESERIALIZE_FIELD(b.stroff, bArr[4]) + DESERIALIZE_FIELD(b.strsize, bArr[5]) } void Serialize(SerializationContext& context, const dysymtab_command& value) @@ -392,26 +385,26 @@ void Serialize(SerializationContext& context, const dysymtab_command& value) void Deserialize(DeserializationContext& context, std::string_view name, dysymtab_command& b) { auto bArr = context.doc[name.data()].GetArray(); - b.cmd = bArr[0].GetUint(); - b.cmdsize = bArr[1].GetUint(); - b.ilocalsym = bArr[2].GetUint(); - b.nlocalsym = bArr[3].GetUint(); - b.iextdefsym = bArr[4].GetUint(); - b.nextdefsym = bArr[5].GetUint(); - b.iundefsym = bArr[6].GetUint(); - b.nundefsym = bArr[7].GetUint(); - b.tocoff = bArr[8].GetUint(); - b.ntoc = bArr[9].GetUint(); - b.modtaboff = bArr[10].GetUint(); - b.nmodtab = bArr[11].GetUint(); - b.extrefsymoff = bArr[12].GetUint(); - b.nextrefsyms = bArr[13].GetUint(); - b.indirectsymoff = bArr[14].GetUint(); - b.nindirectsyms = bArr[15].GetUint(); - b.extreloff = bArr[16].GetUint(); - b.nextrel = bArr[17].GetUint(); - b.locreloff = bArr[18].GetUint(); - b.nlocrel = bArr[19].GetUint(); + b.cmd = bArr[0].Get(); + DESERIALIZE_FIELD(b.cmdsize, bArr[1]) + DESERIALIZE_FIELD(b.ilocalsym, bArr[2]) + DESERIALIZE_FIELD(b.nlocalsym, bArr[3]) + DESERIALIZE_FIELD(b.iextdefsym, bArr[4]) + DESERIALIZE_FIELD(b.nextdefsym, bArr[5]) + DESERIALIZE_FIELD(b.iundefsym, bArr[6]) + DESERIALIZE_FIELD(b.nundefsym, bArr[7]) + DESERIALIZE_FIELD(b.tocoff, bArr[8]) + DESERIALIZE_FIELD(b.ntoc, bArr[9]) + DESERIALIZE_FIELD(b.modtaboff, bArr[10]) + DESERIALIZE_FIELD(b.nmodtab, bArr[11]) + DESERIALIZE_FIELD(b.extrefsymoff, bArr[12]) + DESERIALIZE_FIELD(b.nextrefsyms, bArr[13]) + DESERIALIZE_FIELD(b.indirectsymoff, bArr[14]) + DESERIALIZE_FIELD(b.nindirectsyms, bArr[15]) + DESERIALIZE_FIELD(b.extreloff, bArr[16]) + DESERIALIZE_FIELD(b.nextrel, bArr[17]) + DESERIALIZE_FIELD(b.locreloff, bArr[18]) + DESERIALIZE_FIELD(b.nlocrel, bArr[19]) } void Serialize(SerializationContext& context, const dyld_info_command& value) @@ -435,18 +428,18 @@ void Serialize(SerializationContext& context, const dyld_info_command& value) void Deserialize(DeserializationContext& context, std::string_view name, dyld_info_command& b) { auto bArr = context.doc[name.data()].GetArray(); - b.cmd = bArr[0].GetUint(); - b.cmdsize = bArr[1].GetUint(); - b.rebase_off = bArr[2].GetUint(); - b.rebase_size = bArr[3].GetUint(); - b.bind_off = bArr[4].GetUint(); - b.bind_size = bArr[5].GetUint(); - b.weak_bind_off = bArr[6].GetUint(); - b.weak_bind_size = bArr[7].GetUint(); - b.lazy_bind_off = bArr[8].GetUint(); - b.lazy_bind_size = bArr[9].GetUint(); - b.export_off = bArr[10].GetUint(); - b.export_size = bArr[11].GetUint(); + DESERIALIZE_FIELD(b.cmd, bArr[0]) + DESERIALIZE_FIELD(b.cmdsize, bArr[1]) + DESERIALIZE_FIELD(b.rebase_off, bArr[2]) + DESERIALIZE_FIELD(b.rebase_size, bArr[3]) + DESERIALIZE_FIELD(b.bind_off, bArr[4]) + DESERIALIZE_FIELD(b.bind_size, bArr[5]) + DESERIALIZE_FIELD(b.weak_bind_off, bArr[6]) + DESERIALIZE_FIELD(b.weak_bind_size, bArr[7]) + DESERIALIZE_FIELD(b.lazy_bind_off, bArr[8]) + DESERIALIZE_FIELD(b.lazy_bind_size, bArr[9]) + DESERIALIZE_FIELD(b.export_off, bArr[10]) + DESERIALIZE_FIELD(b.export_size, bArr[11]) } void Serialize(SerializationContext& context, const routines_command_64& value) @@ -462,10 +455,10 @@ void Serialize(SerializationContext& context, const routines_command_64& value) void Deserialize(DeserializationContext& context, std::string_view name, routines_command_64& b) { auto bArr = context.doc[name.data()].GetArray(); - b.cmd = bArr[0].GetUint(); - b.cmdsize = bArr[1].GetUint(); - b.init_address = bArr[2].GetUint(); - b.init_module = bArr[3].GetUint(); + DESERIALIZE_FIELD(b.cmd, bArr[0]) + DESERIALIZE_FIELD(b.cmdsize, bArr[1]) + DESERIALIZE_FIELD(b.init_address, bArr[2]) + DESERIALIZE_FIELD(b.init_module, bArr[3]) } void Serialize(SerializationContext& context, const function_starts_command& value) @@ -481,10 +474,10 @@ void Serialize(SerializationContext& context, const function_starts_command& val void Deserialize(DeserializationContext& context, std::string_view name, function_starts_command& b) { auto bArr = context.doc[name.data()].GetArray(); - b.cmd = bArr[0].GetUint(); - b.cmdsize = bArr[1].GetUint(); - b.funcoff = bArr[2].GetUint(); - b.funcsize = bArr[3].GetUint(); + DESERIALIZE_FIELD(b.cmd, bArr[0]) + DESERIALIZE_FIELD(b.cmdsize, bArr[1]) + DESERIALIZE_FIELD(b.funcoff, bArr[2]) + DESERIALIZE_FIELD(b.funcsize, bArr[3]) } void Serialize(SerializationContext& context, const section_64& value) @@ -522,16 +515,16 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve std::string segNameStr = s2[1].GetString(); memset(sec.segname, 0, 16); memcpy(sec.segname, segNameStr.c_str(), segNameStr.size()); - sec.addr = s2[2].GetUint64(); - sec.size = s2[3].GetUint64(); - sec.offset = s2[4].GetUint(); - sec.align = s2[5].GetUint(); - sec.reloff = s2[6].GetUint(); - sec.nreloc = s2[7].GetUint(); - sec.flags = s2[8].GetUint(); - sec.reserved1 = s2[9].GetUint(); - sec.reserved2 = s2[10].GetUint(); - sec.reserved3 = s2[11].GetUint(); + DESERIALIZE_FIELD(sec.addr, s2[2]) + DESERIALIZE_FIELD(sec.size, s2[3]) + DESERIALIZE_FIELD(sec.offset, s2[4]) + DESERIALIZE_FIELD(sec.align, s2[5]) + DESERIALIZE_FIELD(sec.reloff, s2[6]) + DESERIALIZE_FIELD(sec.nreloc, s2[7]) + DESERIALIZE_FIELD(sec.flags, s2[8]) + DESERIALIZE_FIELD(sec.reserved1, s2[9]) + DESERIALIZE_FIELD(sec.reserved2, s2[10]) + DESERIALIZE_FIELD(sec.reserved3, s2[11]) b.push_back(std::move(sec)); } } @@ -550,16 +543,16 @@ void Deserialize(DeserializationContext& context, std::string_view name, immer:: std::string segNameStr = s2[1].GetString(); memset(sec.segname, 0, 16); memcpy(sec.segname, segNameStr.c_str(), segNameStr.size()); - sec.addr = s2[2].GetUint64(); - sec.size = s2[3].GetUint64(); - sec.offset = s2[4].GetUint(); - sec.align = s2[5].GetUint(); - sec.reloff = s2[6].GetUint(); - sec.nreloc = s2[7].GetUint(); - sec.flags = s2[8].GetUint(); - sec.reserved1 = s2[9].GetUint(); - sec.reserved2 = s2[10].GetUint(); - sec.reserved3 = s2[11].GetUint(); + DESERIALIZE_FIELD(sec.addr, s2[2]) + DESERIALIZE_FIELD(sec.size, s2[3]) + DESERIALIZE_FIELD(sec.offset, s2[4]) + DESERIALIZE_FIELD(sec.align, s2[5]) + DESERIALIZE_FIELD(sec.reloff, s2[6]) + DESERIALIZE_FIELD(sec.nreloc, s2[7]) + DESERIALIZE_FIELD(sec.flags, s2[8]) + DESERIALIZE_FIELD(sec.reserved1, s2[9]) + DESERIALIZE_FIELD(sec.reserved2, s2[10]) + DESERIALIZE_FIELD(sec.reserved3, s2[11]) transient.push_back(std::move(sec)); } b = std::move(transient).persistent(); @@ -578,10 +571,10 @@ void Serialize(SerializationContext& context, const linkedit_data_command& value void Deserialize(DeserializationContext& context, std::string_view name, linkedit_data_command& b) { auto bArr = context.doc[name.data()].GetArray(); - b.cmd = bArr[0].GetUint(); - b.cmdsize = bArr[1].GetUint(); - b.dataoff = bArr[2].GetUint(); - b.datasize = bArr[3].GetUint(); + DESERIALIZE_FIELD(b.cmd, bArr[0]) + DESERIALIZE_FIELD(b.cmdsize, bArr[1]) + DESERIALIZE_FIELD(b.dataoff, bArr[2]) + DESERIALIZE_FIELD(b.datasize, bArr[3]) } void Serialize(SerializationContext& context, const segment_command_64& value) @@ -606,14 +599,14 @@ void Deserialize(DeserializationContext& context, std::string_view name, segment std::string segNameStr = bArr[0].GetString(); memset(b.segname, 0, 16); memcpy(b.segname, segNameStr.c_str(), segNameStr.size()); - b.vmaddr = bArr[1].GetUint64(); - b.vmsize = bArr[2].GetUint64(); - b.fileoff = bArr[3].GetUint64(); - b.filesize = bArr[4].GetUint64(); - b.maxprot = bArr[5].GetUint(); - b.initprot = bArr[6].GetUint(); - b.nsects = bArr[7].GetUint(); - b.flags = bArr[8].GetUint(); + DESERIALIZE_FIELD(b.vmaddr, bArr[1]) + DESERIALIZE_FIELD(b.vmsize, bArr[2]) + DESERIALIZE_FIELD(b.fileoff, bArr[3]) + DESERIALIZE_FIELD(b.filesize, bArr[4]) + DESERIALIZE_FIELD(b.maxprot, bArr[5]) + DESERIALIZE_FIELD(b.initprot, bArr[6]) + DESERIALIZE_FIELD(b.nsects, bArr[7]) + DESERIALIZE_FIELD(b.flags, bArr[8]) } void Deserialize(DeserializationContext& context, std::string_view name, std::vector& b) @@ -626,14 +619,14 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve std::string segNameStr = s2[0].GetString(); memset(sec.segname, 0, 16); memcpy(sec.segname, segNameStr.c_str(), segNameStr.size()); - sec.vmaddr = s2[1].GetUint64(); - sec.vmsize = s2[2].GetUint64(); - sec.fileoff = s2[3].GetUint64(); - sec.filesize = s2[4].GetUint64(); - sec.maxprot = s2[5].GetUint(); - sec.initprot = s2[6].GetUint(); - sec.nsects = s2[7].GetUint(); - sec.flags = s2[8].GetUint(); + DESERIALIZE_FIELD(sec.vmaddr, s2[1]) + DESERIALIZE_FIELD(sec.vmsize, s2[2]) + DESERIALIZE_FIELD(sec.fileoff, s2[3]) + DESERIALIZE_FIELD(sec.filesize, s2[4]) + DESERIALIZE_FIELD(sec.maxprot, s2[5]) + DESERIALIZE_FIELD(sec.initprot, s2[6]) + DESERIALIZE_FIELD(sec.nsects, s2[7]) + DESERIALIZE_FIELD(sec.flags, s2[8]) b.push_back(std::move(sec)); } } @@ -649,14 +642,14 @@ void Deserialize(DeserializationContext& context, std::string_view name, immer:: std::string segNameStr = s2[0].GetString(); memset(sec.segname, 0, 16); memcpy(sec.segname, segNameStr.c_str(), segNameStr.size()); - sec.vmaddr = s2[1].GetUint64(); - sec.vmsize = s2[2].GetUint64(); - sec.fileoff = s2[3].GetUint64(); - sec.filesize = s2[4].GetUint64(); - sec.maxprot = s2[5].GetUint(); - sec.initprot = s2[6].GetUint(); - sec.nsects = s2[7].GetUint(); - sec.flags = s2[8].GetUint(); + DESERIALIZE_FIELD(sec.vmaddr, s2[1]) + DESERIALIZE_FIELD(sec.vmsize, s2[2]) + DESERIALIZE_FIELD(sec.fileoff, s2[3]) + DESERIALIZE_FIELD(sec.filesize, s2[4]) + DESERIALIZE_FIELD(sec.maxprot, s2[5]) + DESERIALIZE_FIELD(sec.initprot, s2[6]) + DESERIALIZE_FIELD(sec.nsects, s2[7]) + DESERIALIZE_FIELD(sec.flags, s2[8]) transient.push_back(std::move(sec)); } b = std::move(transient).persistent(); @@ -678,12 +671,12 @@ void Serialize(SerializationContext& context, const build_version_command& value void Deserialize(DeserializationContext& context, std::string_view name, build_version_command& b) { auto bArr = context.doc[name.data()].GetArray(); - b.cmd = bArr[0].GetUint(); - b.cmdsize = bArr[1].GetUint(); - b.platform = bArr[2].GetUint(); - b.minos = bArr[3].GetUint(); - b.sdk = bArr[4].GetUint(); - b.ntools = bArr[5].GetUint(); + DESERIALIZE_FIELD(b.cmd, bArr[0]) + DESERIALIZE_FIELD(b.cmdsize, bArr[1]) + DESERIALIZE_FIELD(b.platform, bArr[2]) + DESERIALIZE_FIELD(b.minos, bArr[3]) + DESERIALIZE_FIELD(b.sdk, bArr[4]) + DESERIALIZE_FIELD(b.ntools, bArr[5]) } void Serialize(SerializationContext& context, const build_tool_version& value) @@ -701,8 +694,8 @@ void Deserialize(DeserializationContext& context, std::string_view name, std::ve { build_tool_version sec; auto s2 = s.GetArray(); - sec.tool = s2[0].GetUint(); - sec.version = s2[1].GetUint(); + DESERIALIZE_FIELD(sec.tool, s2[0]) + DESERIALIZE_FIELD(sec.version, s2[1]) b.push_back(sec); } } @@ -715,11 +708,13 @@ void Deserialize(DeserializationContext& context, std::string_view name, immer:: { build_tool_version sec; auto s2 = s.GetArray(); - sec.tool = s2[0].GetUint(); - sec.version = s2[1].GetUint(); + DESERIALIZE_FIELD(sec.tool, s2[0]) + DESERIALIZE_FIELD(sec.version, s2[1]) transient.push_back(sec); } b = std::move(transient).persistent(); } +#undef DESERIALIZE_FIELD + } // namespace SharedCacheCore From ea5fb17a863db49fe77d3033bc3729d96d70f808 Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Mon, 18 Nov 2024 20:54:02 +0000 Subject: [PATCH 7/7] Add `symbolInfos` serialization The `symbolInfos` field was not being serialized but on load it attempts to be deserialized, which fails because its missing. This adds in the serialization. --- view/sharedcache/core/SharedCache.cpp | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index 01588044a2..b87c10d234 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -3560,6 +3560,27 @@ void SharedCache::Store(SerializationContext& context) const } context.writer.EndArray(); + Serialize(context, "symbolInfos"); + context.writer.StartArray(); + for (const auto& pair1 : State().symbolInfos) + { + context.writer.StartObject(); + Serialize(context, "key", pair1.first); + Serialize(context, "value"); + context.writer.StartArray(); + for (const auto& pair2 : pair1.second) + { + context.writer.StartArray(); + Serialize(context, pair2.first); + Serialize(context, pair2.second.first); + Serialize(context, pair2.second.second); + context.writer.EndArray(); + } + context.writer.EndArray(); + context.writer.EndObject(); + } + context.writer.EndArray(); + Serialize(context, "backingCaches", State().backingCaches); Serialize(context, "stubIslands", State().stubIslandRegions); Serialize(context, "images", State().images); @@ -3620,12 +3641,12 @@ void SharedCache::Load(DeserializationContext& context) for (auto& symbolInfo : context.doc["symbolInfos"].GetArray()) { immer::vector_transient>> symbolInfoVec; - for (auto& symbolInfoPair : symbolInfo.GetArray()) + for (auto& symbolInfoPair : symbolInfo["value"].GetArray()) { symbolInfoVec.push_back({symbolInfoPair[0].GetUint64(), {(BNSymbolType)symbolInfoPair[1].GetUint(), symbolInfoPair[2].GetString()}}); } - symbolInfos.set(symbolInfo[0].GetUint64(), std::move(symbolInfoVec).persistent()); + symbolInfos.set(symbolInfo["key"].GetUint64(), std::move(symbolInfoVec).persistent()); } MutableState().symbolInfos = std::move(symbolInfos).persistent();