diff --git a/src/elf/elf_image.cpp b/src/elf/elf_image.cpp new file mode 100644 index 0000000..23dd86f --- /dev/null +++ b/src/elf/elf_image.cpp @@ -0,0 +1,213 @@ +#include "elf_image.h" +#include "common/timer.h" +#include "common/utils.h" + +#include +#include +#include + +namespace { + // Helper functions to read little-endian values from a buffer safely. + template + T read_from_buffer(std::span buffer, size_t offset) { + T value; + std::memcpy(&value, buffer.data() + offset, sizeof(T)); + return value; + } + + uint16_t read_u16(std::span b, size_t off) { return read_from_buffer(b, off); } + uint32_t read_u32(std::span b, size_t off) { return read_from_buffer(b, off); } + uint64_t read_u64(std::span b, size_t off) { return read_from_buffer(b, off); } + + // ELF constants + constexpr uint16_t ET_EXEC = 2; + constexpr uint16_t ET_DYN = 3; +} + +ELFImage::ELFImage(std::span data) : m_data(data) {} + +std::unique_ptr ELFImage::parse(std::span data) { + Timer timer("ELF parse"); + + // --- ELF Header Checks (64 bytes minimum) --- + if (data.size() < 64) return nullptr; + + // Check ELF magic: 0x7F 'E' 'L' 'F' + if (data[0] != 0x7F || data[1] != 'E' || data[2] != 'L' || data[3] != 'F') { + return nullptr; + } + + // Check for 64-bit class (EI_CLASS at offset 0x04) + if (data[4] != 2) { // ELFCLASS64 + DBG("[ELF] Not a 64-bit ELF file. Class: ", static_cast(data[4])); + return nullptr; + } + + // Check for little-endian (EI_DATA at offset 0x05) + if (data[5] != 1) { // ELFDATA2LSB + DBG("[ELF] Not a little-endian ELF file. Data encoding: ", static_cast(data[5])); + return nullptr; + } + + // Read ELF type (ET_EXEC or ET_DYN) at offset 0x10 + uint16_t e_type = read_u16(data, 0x10); + + // Read section header table offset (e_shoff) at offset 0x28 + uint64_t e_shoff = read_u64(data, 0x28); + + // Read section header entry size (e_shentsize) at offset 0x3A + uint16_t e_shentsize = read_u16(data, 0x3A); + + // Read number of section headers (e_shnum) at offset 0x3C + uint16_t e_shnum = read_u16(data, 0x3C); + + // Read section header string table index (e_shstrndx) at offset 0x3E + uint16_t e_shstrndx = read_u16(data, 0x3E); + + DBG("[ELF] e_type=", e_type, " e_shoff=", e_shoff, " e_shnum=", e_shnum, " e_shstrndx=", e_shstrndx); + + // Validate section header table + if (e_shoff + static_cast(e_shnum) * e_shentsize > data.size()) { + DBG("[ELF] Section header table extends beyond file size."); + return nullptr; + } + + // --- Create and Populate Image --- + auto img = std::unique_ptr(new ELFImage(data)); + img->m_is_elf64 = true; + + // Set ELF type + // Note: ELF section sh_addr values are already absolute VAs, so base_address + // should be 0. Unlike PE where sections have RVAs relative to ImageBase, + // ELF section headers contain the actual virtual addresses. + if (e_type == ET_EXEC) { + img->m_elf_type = ELFType::EXEC; + img->m_base_address = 0; // Section addresses are already absolute VAs + } else if (e_type == ET_DYN) { + img->m_elf_type = ELFType::DYN; + img->m_base_address = 0; // PIE - use section addresses directly + } else { + DBG("[ELF] Unknown e_type: ", e_type); + return nullptr; + } + + // Read the section header string table section header + if (e_shstrndx >= e_shnum) { + DBG("[ELF] Invalid e_shstrndx: ", e_shstrndx); + return nullptr; + } + + uint64_t shstrtab_shdr_offset = e_shoff + static_cast(e_shstrndx) * e_shentsize; + if (shstrtab_shdr_offset + 64 > data.size()) { + DBG("[ELF] String table section header out of bounds."); + return nullptr; + } + + // Read string table section offset and size + img->m_shstrtab_offset = read_u64(data, shstrtab_shdr_offset + 0x18); // sh_offset + img->m_shstrtab_size = read_u64(data, shstrtab_shdr_offset + 0x20); // sh_size + + if (img->m_shstrtab_offset + img->m_shstrtab_size > data.size()) { + DBG("[ELF] String table extends beyond file size."); + return nullptr; + } + + DBG("[ELF] String table at offset ", img->m_shstrtab_offset, " size ", img->m_shstrtab_size); + + // --- Parse Section Headers --- + for (uint16_t i = 0; i < e_shnum; ++i) { + uint64_t shdr_offset = e_shoff + static_cast(i) * e_shentsize; + if (shdr_offset + 64 > data.size()) break; + + uint32_t sh_name = read_u32(data, shdr_offset + 0x00); // Name offset in string table + uint64_t sh_addr = read_u64(data, shdr_offset + 0x10); // Virtual address + uint64_t sh_offset = read_u64(data, shdr_offset + 0x18); // File offset + uint64_t sh_size = read_u64(data, shdr_offset + 0x20); // Section size + + std::string section_name = img->read_section_name(sh_name); + + // Skip empty sections + if (section_name.empty() || sh_size == 0) continue; + + img->m_sections.emplace_back(Section{ + section_name, + static_cast(sh_addr), + static_cast(sh_size), + static_cast(sh_offset), + static_cast(sh_size) + }); + + DBG("[ELF] Section: ", section_name, " addr=0x", std::hex, sh_addr, " offset=0x", sh_offset, " size=0x", sh_size, std::dec); + } + + return img; +} + +std::string ELFImage::read_section_name(uint32_t name_offset) const { + if (name_offset >= m_shstrtab_size) { + return ""; + } + + // Find null terminator + uint64_t start = m_shstrtab_offset + name_offset; + uint64_t end = m_shstrtab_offset + m_shstrtab_size; + + const uint8_t* name_start = m_data.data() + start; + const uint8_t* name_end = m_data.data() + end; + const uint8_t* null_pos = static_cast(std::memchr(name_start, '\0', name_end - name_start)); + + if (null_pos) { + return std::string(reinterpret_cast(name_start), null_pos - name_start); + } + + return ""; +} + +const Section* ELFImage::get_section(const std::string& name) const { + auto it = std::find_if(m_sections.begin(), m_sections.end(), [&](const Section& s) { + return s.name == name; + }); + return (it != m_sections.end()) ? &(*it) : nullptr; +} + +int64_t ELFImage::va_to_file_offset(uint64_t va) const { + // For PIE (ET_DYN), va is already relative to base 0 + // For non-PIE (ET_EXEC), subtract the base address + uint64_t adjusted_va = va; + if (m_elf_type == ELFType::EXEC && va >= m_base_address) { + adjusted_va = va - m_base_address; + } + + for (const auto& s : m_sections) { + if (adjusted_va >= s.virtual_address && adjusted_va < s.virtual_address + s.virtual_size) { + uint32_t delta = adjusted_va - s.virtual_address; + int64_t offset = static_cast(s.file_offset) + delta; + + // Sanity check + if (offset >= 0 && static_cast(offset) < m_data.size()) { + return offset; + } + } + } + return -1; +} + +std::optional> ELFImage::read_va(uint64_t va, size_t size) const { + int64_t offset = va_to_file_offset(va); + if (offset < 0 || static_cast(offset) + size > m_data.size()) { + DBG("[READ] read_va(0x", std::hex, va, ", ", std::dec, size, ") failed: out of bounds."); + return std::nullopt; + } + + auto start_it = m_data.begin() + offset; + auto end_it = start_it + size; + return std::vector(start_it, end_it); +} + +std::optional ELFImage::read_u64_va(uint64_t va) const { + auto buf_opt = read_va(va, 8); + if (!buf_opt) { + return std::nullopt; + } + return read_u64(*buf_opt, 0); +} diff --git a/src/elf/elf_patterns.cpp b/src/elf/elf_patterns.cpp new file mode 100644 index 0000000..f803040 --- /dev/null +++ b/src/elf/elf_patterns.cpp @@ -0,0 +1,205 @@ +#include "elf_patterns.h" +#include "common/timer.h" +#include "common/utils.h" + +#include +#include +#include +#include + +bool is_va_in_section(uint64_t va, const ELFImage& elf, const Section& section) { + uint64_t start_va = elf.get_base_address() + section.virtual_address; + uint64_t end_va = start_va + section.virtual_size; + bool in_section = (va >= start_va && va < end_va); + + // DBG call is useful but can be very noisy, so it's good to have it conditional + if (is_debug_enabled()) { + DBG("[is_va_in_section] VA=0x", std::hex, va, " section=", section.name, + " range=[0x", start_va, ", 0x", end_va, ") -> ", std::boolalpha, in_section, std::dec); + } + return in_section; +} + +uint64_t find_lea_to_target_va(const ELFImage& elf, const Section& text_sec, uint64_t target_va) { + Timer timer("find_lea_to_target_va"); + DBG("[find_lea] target_va=0x", std::hex, target_va, std::dec); + + auto text_data = elf.get_raw_data().subspan(text_sec.file_offset, text_sec.file_size); + if (text_data.size() < 7) return 0; + + const uint64_t text_va_base = elf.get_base_address() + text_sec.virtual_address; + + // A set of valid ModR/M bytes for [RIP + disp32] addressing with any register operand. + // The format is 00_REG_101. + static const std::unordered_set valid_modrm = { + 0x05, 0x0D, 0x15, 0x1D, 0x25, 0x2D, 0x35, 0x3D + }; + + // First, try LEA with RIP-relative addressing (7 bytes: REX.W + 8D + ModR/M + disp32) + // This is what MSVC typically generates on Windows. + for (size_t i = 1; i < text_data.size() - 6; ++i) { + if (text_data[i] == 0x8D) { // LEA opcode + uint8_t rex = text_data[i - 1]; + if ((rex & 0xF8) == 0x48) { // REX.W prefix + uint8_t modrm = text_data[i + 1]; + if (valid_modrm.count(modrm)) { + int32_t disp; + std::memcpy(&disp, &text_data[i + 2], sizeof(disp)); + + uint64_t instr_va = text_va_base + (i - 1); + uint64_t rip_after = instr_va + 7; + uint64_t calculated_target = rip_after + disp; + + if (calculated_target == target_va) { + DBG("[find_lea] Found LEA at VA=0x", std::hex, instr_va); + return instr_va; + } + } + } + } + } + + // Second, try MOV r32, imm32 with 32-bit immediate (GCC on Linux often uses this). + // For non-PIE binaries where addresses fit in 32 bits, GCC uses: + // MOV EAX-EDI, imm32: B8-BF + imm32 (5 bytes) + // MOV R8D-R15D, imm32: 41 B8-BF + imm32 (6 bytes, REX.B prefix) + // The 32-bit value is zero-extended to 64 bits. + if (target_va <= 0xFFFFFFFF) { + uint32_t target_imm = static_cast(target_va); + + for (size_t i = 0; i < text_data.size() - 5; ++i) { + uint8_t byte = text_data[i]; + + // Check for MOV EAX-EDI, imm32 (B8-BF) + if (byte >= 0xB8 && byte <= 0xBF) { + uint32_t imm; + std::memcpy(&imm, &text_data[i + 1], sizeof(imm)); + if (imm == target_imm) { + uint64_t instr_va = text_va_base + i; + DBG("[find_lea] Found MOV r32,imm32 at VA=0x", std::hex, instr_va); + return instr_va; + } + } + + // Check for MOV R8D-R15D, imm32 (41 B8-BF) + if (byte == 0x41 && i + 6 <= text_data.size()) { + uint8_t opcode = text_data[i + 1]; + if (opcode >= 0xB8 && opcode <= 0xBF) { + uint32_t imm; + std::memcpy(&imm, &text_data[i + 2], sizeof(imm)); + if (imm == target_imm) { + uint64_t instr_va = text_va_base + i; + DBG("[find_lea] Found MOV r32,imm32 (REX.B) at VA=0x", std::hex, instr_va); + return instr_va; + } + } + } + } + } + + DBG("[find_lea] No matching instruction found."); + return 0; +} + +std::optional find_rip_relative_load_in_window( + const ELFImage& elf, const Section& text_sec, uint64_t from_va, size_t window) +{ + Timer timer("find_rip_relative_load_in_window"); + DBG("[LOAD_SCAN] from_va=0x", std::hex, from_va, " window=", std::dec, window); + + int64_t start_offset = elf.va_to_file_offset(from_va); + if (start_offset < 0) { + DBG("[LOAD_SCAN] from_va is not a valid address."); + return std::nullopt; + } + + auto text_data = elf.get_raw_data().subspan(text_sec.file_offset, text_sec.file_size); + size_t search_start = start_offset - text_sec.file_offset; + size_t search_end = std::min(search_start + window, text_data.size()); + + const uint64_t text_va_base = text_sec.virtual_address; + + // Helper lambda to check if an address is in a data section + // Note: We prioritize .data over .bss because encryption keys are in initialized data. + // .bss is uninitialized and often doesn't exist in the file. + auto is_in_data_section = [&](uint64_t va) -> bool { + for (const auto& s : elf.get_sections()) { + if (s.name == ".data" || s.name == ".data.rel.ro") { + if (is_va_in_section(va, elf, s)) { + return true; + } + } + } + return false; + }; + + for (size_t i = search_start; i + 6 < search_end; ++i) { + // Pattern 1: REX.W + MOV/LEA with RIP-relative addressing (7 bytes) + if ((text_data[i] & 0xF8) == 0x48) { + uint8_t opcode = text_data[i + 1]; + + if (opcode == 0x8B || opcode == 0x8D) { // MOV or LEA + uint8_t modrm = text_data[i + 2]; + if ((modrm & 0xC7) == 0x05) { // RIP-relative addressing + int32_t disp; + std::memcpy(&disp, &text_data[i + 3], sizeof(disp)); + + uint64_t instr_va = text_va_base + i; + uint64_t rip_after = instr_va + 7; + uint64_t target_va = rip_after + disp; + + uint64_t final_blob_va = 0; + + if (opcode == 0x8B) { // MOV - dereference pointer + auto ptr_opt = elf.read_u64_va(target_va); + if (!ptr_opt) continue; + final_blob_va = *ptr_opt; + } else { // LEA - direct address + final_blob_va = target_va; + } + + if (is_in_data_section(final_blob_va)) { + LoadType type = (opcode == 0x8B) ? LoadType::MOV_DEREF : LoadType::LEA_ADDRESS; + DBG("[LOAD_SCAN] Found valid ", (type == LoadType::MOV_DEREF ? "MOV" : "LEA"), + " at VA=0x", std::hex, instr_va, " -> VA=0x", final_blob_va, std::dec); + return RipRelativeLoad{instr_va, target_va, type}; + } + } + } + } + + // Pattern 2: MOV r32, imm32 (5 bytes) - GCC on Linux uses this for addresses < 2GB + // Format: B8-BF + imm32 (loads into EAX-EDI) + if (text_data[i] >= 0xB8 && text_data[i] <= 0xBF && i + 5 <= search_end) { + uint32_t imm; + std::memcpy(&imm, &text_data[i + 1], sizeof(imm)); + + if (is_in_data_section(imm)) { + uint64_t instr_va = text_va_base + i; + DBG("[LOAD_SCAN] Found MOV r32,imm32 at VA=0x", std::hex, instr_va, + " loading addr=0x", imm, std::dec); + return RipRelativeLoad{instr_va, imm, LoadType::LEA_ADDRESS}; + } + } + + // Pattern 3: REX.B + MOV r32, imm32 (6 bytes) + // Format: 41 B8-BF + imm32 (loads into R8D-R15D) + if (text_data[i] == 0x41 && i + 6 <= search_end) { + uint8_t opcode = text_data[i + 1]; + if (opcode >= 0xB8 && opcode <= 0xBF) { + uint32_t imm; + std::memcpy(&imm, &text_data[i + 2], sizeof(imm)); + + if (is_in_data_section(imm)) { + uint64_t instr_va = text_va_base + i; + DBG("[LOAD_SCAN] Found MOV r32,imm32 (REX.B) at VA=0x", std::hex, instr_va, + " loading addr=0x", imm, std::dec); + return RipRelativeLoad{instr_va, imm, LoadType::LEA_ADDRESS}; + } + } + } + } + + DBG("[LOAD_SCAN] No valid MOV/LEA found in window."); + return std::nullopt; +} diff --git a/src/elf/elf_scanner.cpp b/src/elf/elf_scanner.cpp new file mode 100644 index 0000000..fa987e2 --- /dev/null +++ b/src/elf/elf_scanner.cpp @@ -0,0 +1,269 @@ +#include "keydot/elf_scanner.h" +#include "common/mapped_file.h" +#include "common/timer.h" +#include "common/utils.h" +#include "elf/elf_image.h" +#include "elf/elf_patterns.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +std::vector find_subsequence( + std::span haystack, + size_t start, + size_t length, + std::string_view needle) +{ + Timer timer("find_subsequence '" + std::string(needle) + "'", false); + if (start + length > haystack.size()) { + length = haystack.size() - start; + } + + std::vector found_indices; + auto search_area = haystack.subspan(start, length); + std::span needle_span( + reinterpret_cast(needle.data()), + needle.size() + ); + + auto it = search_area.begin(); + while (true) { + it = std::search(it, search_area.end(), needle_span.begin(), needle_span.end()); + if (it == search_area.end()) { + break; + } + // Calculate offset relative to the full haystack, not the subspan + size_t absolute_offset = (it - haystack.begin()); + found_indices.push_back(absolute_offset); + ++it; // Continue search after the found occurrence + } + + timer.print_manual(std::string(needle), needle.length()); + return found_indices; +} + +// Extract a bounded C-string view from [start, end). +// Returns a view from start to the first '\0' or end if no '\0' found. +inline std::string_view bounded_cstr_view(const char* start, const char* end) { + const char* nul = std::find(start, end, '\0'); + return std::string_view(start, static_cast(nul - start)); +} + +// Parse version substring "v" from a string_view. +// Returns only the version part (without the 'v'), up to whitespace/end. +inline std::optional parse_version_from_view(std::string_view s) { + size_t pos = s.find('v'); + while (pos != std::string_view::npos) { + if (pos + 1 < s.size() && std::isdigit(static_cast(s[pos + 1]))) { + size_t end = s.find_first_of(" \t", pos); + const size_t start = pos + 1; + const size_t count = (end == std::string_view::npos ? s.size() : end) - start; + return std::string(s.substr(start, count)); + } + pos = s.find('v', pos + 1); + } + return std::nullopt; +} + +std::optional find_godot_version_in_elf(const ELFImage& elf) { + Timer timer("find_godot_version_in_elf"); + + const Section* rodata = elf.get_section(".rodata"); + if (!rodata) { + DBG("[GodotVer] .rodata section not found"); + return std::nullopt; + } + + const uint8_t* base = elf.get_raw_data().data(); + const char* seg_begin = reinterpret_cast(base + rodata->file_offset); + const char* seg_end = seg_begin + rodata->file_size; + + static const std::string needle = "Godot Engine"; + auto searcher = std::boyer_moore_searcher(needle.begin(), needle.end()); + + DBG("[GodotVer] Scanning .rodata for '", needle, "' (", rodata->file_size, " bytes)"); + + const char* pos = seg_begin; + size_t occ_idx = 0; + while (true) { + auto it = std::search(pos, seg_end, searcher); + if (it == seg_end) break; // no more matches + ++occ_idx; + + std::string_view full_sv = bounded_cstr_view(it, seg_end); + DBG("[GodotVer] Occurrence ", occ_idx, ": ", std::string(full_sv)); + + if (auto ver = parse_version_from_view(full_sv)) { + DBG("[GodotVer] Parsed version: ", *ver); + return ver; + } + + pos = it + needle.size(); + } + + DBG("[GodotVer] No occurrence contained a version pattern"); + return std::nullopt; +} + +} + +int scan_elf_file(const std::string& path) { + // --- Stage 1: Memory Map the file --- + MappedFile mapped_file(path); + if (!mapped_file.is_valid()) { + return 1; // MappedFile constructor already printed the error + } + + // --- Stage 2: ELF parse --- + Timer elf_parse_timer("ELFImage::parse"); + auto elf = ELFImage::parse(mapped_file.get_data()); + elf_parse_timer.~Timer(); + + if (!elf || !elf->is_elf64()) { + std::cerr << "Error: Not a valid ELF64 (x64) image." << std::endl; + return 2; + } + + DBG("[ELF] BaseAddress=0x", std::hex, elf->get_base_address(), std::dec); + DBG("[ELF] Type=", (elf->get_elf_type() == ELFType::EXEC ? "ET_EXEC" : "ET_DYN")); + DBG("[ELF] Section count: ", elf->get_sections().size()); + + // --- Stage 3: section lookups --- + const Section *text, *rodata, *data; + { + Timer section_lookup_timer("Section lookups"); + text = elf->get_section(".text"); + rodata = elf->get_section(".rodata"); + if (!rodata) { + rodata = elf->get_section(".data.rel.ro"); + } + data = elf->get_section(".data"); + } + if (!text || !rodata || !data) { + std::cerr << "Error: Required sections .text/.rodata/.data not found." << std::endl; + return 3; + } + + DBG("[SECT] .text VA=0x", std::hex, text->virtual_address, " size=0x", text->virtual_size, std::dec); + DBG("[SECT] rodata (", rodata->name, ") VA=0x", std::hex, rodata->virtual_address, " size=0x", rodata->virtual_size, std::dec); + DBG("[SECT] .data VA=0x", std::hex, data->virtual_address, " size=0x", data->virtual_size, std::dec); + + // Optional: Godot version extraction + auto godot_ver = find_godot_version_in_elf(*elf); + if (godot_ver) { + std::cout << "Godot Engine version: " << *godot_ver << std::endl; + } else { + std::cout << "Could not determine Godot Engine version from ELF." << std::endl; + } + + // --- Stage 4: anchor search loop --- + const std::vector anchors = { + "Can't open encrypted pack directory.", + "Can't open encrypted pack-referenced file '%s'.", + "Condition \"fae.is_null()\" is true." + }; + + bool found = false; + for (const auto& anchor_str : anchors) { + Timer anchor_timer("Anchor '" + anchor_str + "' search"); + DBG("[ANCHOR] Searching for: '", anchor_str, "'"); + + // 4a: Find the anchor string in the .rodata section + auto hits = find_subsequence(elf->get_raw_data(), rodata->file_offset, rodata->file_size, anchor_str); + DBG("[ANCHOR] Hits: ", hits.size()); + + for (const auto& hit : hits) { + uint32_t anchor_rva = rodata->virtual_address + static_cast(hit - rodata->file_offset); + uint64_t anchor_va = elf->get_base_address() + anchor_rva; + DBG("[ANCHOR] VA=0x", std::hex, anchor_va, std::dec); + + // 4b: Find a `LEA` instruction in the .text section that points to our string + uint64_t lea_site = find_lea_to_target_va(*elf, *text, anchor_va); + if (lea_site == 0) { + DBG("[LEA] Not found for anchor VA=0x", std::hex, anchor_va, std::dec); + continue; + } + DBG("[LEA] Site=0x", std::hex, lea_site, std::dec); + + // 4c: Scan forward from AFTER the anchor instruction for the key blob load + // GCC on Linux may place the key load far from the error string (up to 64KB away) + auto load_instr_opt = find_rip_relative_load_in_window(*elf, *text, lea_site + 1, 0x600); + if (!load_instr_opt) { + DBG("[LOAD_SCAN] Not found in 0x600 window. Expanding to 0x10000..."); + load_instr_opt = find_rip_relative_load_in_window(*elf, *text, lea_site + 1, 0x10000); + if (!load_instr_opt) { + DBG("[LOAD_SCAN] Not found in 0x10000 window either."); + continue; + } + } + const auto& load_instr = *load_instr_opt; + + // 4d: Get the blob pointer VA, handling MOV vs LEA difference + uint64_t ptr_to_blob_va = 0; + if (load_instr.type == LoadType::MOV_DEREF) { + // For MOV, the target_va is a pointer we must read to get the final address + DBG("[SCAN] Instruction is MOV, reading pointer from 0x", std::hex, load_instr.target_va, std::dec); + auto ptr_opt = elf->read_u64_va(load_instr.target_va); + if (!ptr_opt) { + DBG("[READ] Failed to read pointer for MOV at VA=0x", std::hex, load_instr.target_va, std::dec); + continue; + } + ptr_to_blob_va = *ptr_opt; + } else { // LoadType::LEA_ADDRESS + DBG("[SCAN] Instruction is LEA, target VA is the pointer."); + ptr_to_blob_va = load_instr.target_va; + } + DBG("[READ] Final Blob pointer VA=0x", std::hex, ptr_to_blob_va, std::dec); + + // 4e: Validate that the blob pointer is in a valid data section + const Section* blob_data_section = nullptr; + for (const auto& s : elf->get_sections()) { + if ((s.name == ".data" || s.name == ".bss" || s.name == ".data.rel.ro") && is_va_in_section(ptr_to_blob_va, *elf, s)) { + blob_data_section = &s; + break; + } + } + if (!blob_data_section) { + DBG("[SECT] Final blob VA 0x", std::hex, ptr_to_blob_va, " not in any data section.", std::dec); + continue; + } + DBG("[SECT] Blob VA is in section '", blob_data_section->name, "'."); + + // 4f: Read the final 32-byte key blob + auto blob = elf->read_va(ptr_to_blob_va, 32); + if (!blob || blob->size() != 32) { + DBG("[READ] Blob read failed or not 32 bytes."); + continue; + } + + std::cout << std::left << std::setw(17) << "Anchor" << ": " << anchor_str << std::endl; + std::cout << std::hex << std::uppercase << std::setfill('0'); + std::cout << std::left << std::setw(17) << "String VA" << ": 0x" << anchor_va << std::endl; + std::cout << std::left << std::setw(17) << "LEA at" << ": 0x" << lea_site << std::endl; + std::cout << std::left << std::setw(17) << "off_* qword VA" << ": 0x" << load_instr.target_va << std::endl; + std::cout << std::left << std::setw(17) << "Blob VA" << ": 0x" << ptr_to_blob_va << std::endl; + std::cout << std::dec << std::setfill(' '); + std::cout << std::left << std::setw(17) << "32-byte (hex)" << ": " << hex_string(*blob) << std::endl; + + found = true; + break; + } + + if (found) break; + } + + if (!found) { + std::cerr << "Failed to locate the 32-byte key blob using the provided anchors." << std::endl; + return 4; + } + + return 0; +} diff --git a/src/pe/pe_patterns.cpp b/src/pe/pe_patterns.cpp index 1b9a2b5..80da67c 100644 --- a/src/pe/pe_patterns.cpp +++ b/src/pe/pe_patterns.cpp @@ -21,7 +21,7 @@ static const std::array VALID_MODRM_BYTES = { 0x05, 0x0D, 0x15, 0x1D, 0x25, 0x2D, 0x35, 0x3D }; -static bool is_va_in_named_section(uint64_t va, const PEImage& pe, +static bool is_va_in_named_section(uint64_t va, const PEImage& pe, const std::vector& section_prefixes) { for (const auto& section : pe.get_sections()) { for (const auto& prefix : section_prefixes) { @@ -40,7 +40,7 @@ static std::optional match_rip_relative_instruction( const uint8_t* data, size_t data_size, size_t offset, uint64_t base_va, const PEImage& pe, const std::vector& allowed_sections) { - + if (offset + 6 >= data_size) { return std::nullopt; } @@ -86,7 +86,7 @@ static std::optional match_rip_relative_instruction( final_va = *ptr; } - if (!allowed_sections.empty() && + if (!allowed_sections.empty() && !is_va_in_named_section(final_va, pe, allowed_sections)) { return std::nullopt; } @@ -101,9 +101,9 @@ static std::optional match_rip_relative_instruction( static std::vector find_pattern_in_span( const uint8_t* haystack, size_t haystack_size, const uint8_t* needle, size_t needle_size) { - + std::vector indices; - + for (size_t i = 0; i + needle_size <= haystack_size; ++i) { bool found = true; for (size_t j = 0; j < needle_size; ++j) { @@ -116,7 +116,7 @@ static std::vector find_pattern_in_span( indices.push_back(i); } } - + return indices; } @@ -125,25 +125,25 @@ std::vector find_subsequence( size_t start, size_t length, std::string_view needle) { - + Timer timer("find_subsequence", false); - + if (start >= haystack.size()) { return {}; } - + length = std::min(length, haystack.size() - start); - + const uint8_t* haystack_ptr = haystack.data() + start; const uint8_t* needle_ptr = reinterpret_cast(needle.data()); size_t needle_size = needle.size(); - + auto indices = find_pattern_in_span(haystack_ptr, length, needle_ptr, needle_size); - + for (auto& idx : indices) { idx += start; } - + timer.print_manual(std::string(needle), needle.length()); return indices; } @@ -152,27 +152,27 @@ bool is_va_in_section(uint64_t va, const PEImage& pe, const Section& section) { const uint64_t start_va = pe.get_image_base() + section.virtual_address; const uint64_t end_va = start_va + section.virtual_size; const bool in_section = (va >= start_va && va < end_va); - + if (is_debug_enabled()) { DBG("[is_va_in_section] VA=0x", std::hex, va, " section=", section.name, - " range=[0x", start_va, ", 0x", end_va, ") -> ", + " range=[0x", start_va, ", 0x", end_va, ") -> ", std::boolalpha, in_section, std::dec); } - + return in_section; } uint64_t find_lea_to_target_va(const PEImage& pe, const Section& text_sec, uint64_t target_va) { Timer timer("find_lea_to_target_va"); DBG("[find_lea] target_va=0x", std::hex, target_va, std::dec); - + auto text_data = pe.get_raw_data().subspan(text_sec.file_offset, text_sec.file_size); if (text_data.size() < 7) return 0; - + const uint64_t text_va_base = pe.get_image_base() + text_sec.virtual_address; const uint8_t* data = text_data.data(); size_t data_size = text_data.size(); - + // Search for REX.W (0x48) + LEA (0x8D) pattern for (size_t i = 1; i < data_size - 6; ++i) { if (data[i] == LEA_OPCODE) { @@ -188,15 +188,15 @@ uint64_t find_lea_to_target_va(const PEImage& pe, const Section& text_sec, uint6 break; } } - + if (valid_modrm) { int32_t disp; std::memcpy(&disp, &data[i + 2], sizeof(disp)); - + const uint64_t instr_va = text_va_base + (i - 1); const uint64_t rip_after = instr_va + 7; const uint64_t calculated_target = rip_after + disp; - + if (calculated_target == target_va) { DBG("[find_lea] Found LEA at VA=0x", std::hex, instr_va); return instr_va; @@ -204,50 +204,50 @@ uint64_t find_lea_to_target_va(const PEImage& pe, const Section& text_sec, uint6 } } } - + DBG("[find_lea] No matching LEA instruction found."); return 0; } std::optional find_key_load_near_mov_edx_20h( const PEImage& pe, const Section& text_sec, uint64_t lea_site, size_t search_radius) { - + Timer timer("find_key_load_near_mov_edx_20h"); - DBG("[EDX_SEARCH] Starting search near LEA at 0x", std::hex, lea_site, + DBG("[EDX_SEARCH] Starting search near LEA at 0x", std::hex, lea_site, " radius=0x", search_radius, std::dec); - + auto text_data = pe.get_raw_data().subspan(text_sec.file_offset, text_sec.file_size); const uint64_t text_va_base = pe.get_image_base() + text_sec.virtual_address; const uint8_t* data = text_data.data(); size_t data_size = text_data.size(); - + const int64_t lea_offset = pe.va_to_file_offset(lea_site); if (lea_offset < 0) { DBG("[EDX_SEARCH] Invalid LEA VA"); return std::nullopt; } - + const size_t lea_in_text = lea_offset - text_sec.file_offset; const size_t search_start = (lea_in_text > search_radius) ? lea_in_text - search_radius : 0; const size_t search_end = std::min(lea_in_text + search_radius, data_size); - - DBG("[EDX_SEARCH] Searching in text range [0x", std::hex, + + DBG("[EDX_SEARCH] Searching in text range [0x", std::hex, text_va_base + search_start, "-0x", text_va_base + search_end, ")", std::dec); - + // Look for "mov edx, 20h" pattern (BA 20 00 00 00) for (size_t i = search_start; i + 4 < search_end; ++i) { - if (data[i] == 0xBA && + if (data[i] == 0xBA && data[i + 1] == 0x20 && data[i + 2] == 0x00 && data[i + 3] == 0x00 && data[i + 4] == 0x00) { - + const uint64_t edx_instr_va = text_va_base + i; DBG("[EDX_SEARCH] Found 'mov edx, 20h' at VA=0x", std::hex, edx_instr_va, std::dec); - + const size_t key_search_start = i + 5; const size_t key_search_end = std::min(key_search_start + 0x200, data_size); - + for (size_t j = key_search_start; j + 6 < key_search_end; ++j) { auto match = match_rip_relative_instruction(data, data_size, j, text_va_base, pe, @@ -259,25 +259,25 @@ std::optional find_key_load_near_mov_edx_20h( break; } } - + DBG("[EDX_SEARCH] No 'mov edx, 20h' pattern found in search radius"); return std::nullopt; } std::optional find_rip_relative_in_range( - const PEImage& pe, const Section& text_sec, + const PEImage& pe, const Section& text_sec, size_t start_offset, size_t end_offset, uint64_t reference_va, bool require_data_section) { - + auto text_data = pe.get_raw_data().subspan(text_sec.file_offset, text_sec.file_size); const uint64_t text_va_base = pe.get_image_base() + text_sec.virtual_address; const uint8_t* data = text_data.data(); size_t data_size = text_data.size(); - + const std::vector allowed_sections = require_data_section ? std::vector{".data"} : std::vector{".data", ".rdata"}; - + for (size_t i = start_offset; i + 6 < end_offset && i + 6 < data_size; ++i) { auto match = match_rip_relative_instruction(data, data_size, i, text_va_base, pe, @@ -286,35 +286,35 @@ std::optional find_rip_relative_in_range( return RipRelativeLoad{match->instruction_va, match->target_va, match->type}; } } - + return std::nullopt; } std::optional find_rip_relative_load_around_va( const PEImage& pe, const Section& text_sec, uint64_t anchor_va, size_t radius) { - + Timer timer("find_rip_relative_load_around_va"); DBG("[AROUND_SCAN] anchor_va=0x", std::hex, anchor_va, " radius=", std::dec, radius); - + const int64_t anchor_offset = pe.va_to_file_offset(anchor_va); if (anchor_offset < 0) { DBG("[AROUND_SCAN] anchor_va is not a valid address."); return std::nullopt; } - + auto text_data = pe.get_raw_data().subspan(text_sec.file_offset, text_sec.file_size); const uint64_t text_va_base = pe.get_image_base() + text_sec.virtual_address; const uint8_t* data = text_data.data(); size_t data_size = text_data.size(); - + const size_t anchor_in_text = anchor_offset - text_sec.file_offset; const size_t search_start = (anchor_in_text > radius) ? anchor_in_text - radius : 0; const size_t search_end = std::min(anchor_in_text + radius, data_size); - - DBG("[AROUND_SCAN] Searching in range [0x", std::hex, - text_va_base + search_start, "-0x", text_va_base + search_end, + + DBG("[AROUND_SCAN] Searching in range [0x", std::hex, + text_va_base + search_start, "-0x", text_va_base + search_end, ") relative to anchor at 0x", anchor_va, std::dec); - + // Pattern 1: Key-loading loop pattern (LEA + MOVZX) for (size_t i = search_start; i + 11 < search_end; ++i) { // Check for LEA pattern: 48 8D 05 ?? ?? ?? ?? @@ -327,20 +327,20 @@ std::optional find_rip_relative_load_around_va( break; } } - + if (valid_modrm) { // Check for MOVZX pattern: 41 0F B6 04 07 if (data[i + 7] == 0x41 && data[i + 8] == MOVZX_OPCODE_1 && data[i + 9] == MOVZX_OPCODE_2 && data[i + 10] == 0x04 && data[i + 11] == 0x07) { - + int32_t disp; std::memcpy(&disp, &data[i + 3], sizeof(disp)); - + const uint64_t instr_va = text_va_base + i; const uint64_t rip_after = instr_va + 7; const uint64_t target_va = rip_after + disp; - + if (pe.read_va(target_va, 32)) { DBG("[AROUND_SCAN] Found PATTERN 1 at VA=0x", std::hex, instr_va); return RipRelativeLoad{instr_va, target_va, LoadType::LEA_ADDRESS}; @@ -349,18 +349,18 @@ std::optional find_rip_relative_load_around_va( } } } - + // Pattern 2: MOV EDX, 20h followed by RIP-relative instruction for (size_t i = search_start; i + 4 < search_end; ++i) { - if (data[i] == 0xBA && + if (data[i] == 0xBA && data[i + 1] == 0x20 && data[i + 2] == 0x00 && data[i + 3] == 0x00 && data[i + 4] == 0x00) { - + const size_t pattern_start = i + 5; const size_t pattern_end = std::min(i + 0x100, search_end); - + for (size_t j = pattern_start; j + 6 < pattern_end; ++j) { auto match = match_rip_relative_instruction(data, data_size, j, text_va_base, pe, @@ -369,7 +369,7 @@ std::optional find_rip_relative_load_around_va( // Check if it points to valid 32-byte data uint64_t final_va = match->type == LoadType::MOV_DEREF ? *pe.read_u64_va(match->target_va) : match->target_va; - + if (pe.read_va(final_va, 32)) { DBG("[AROUND_SCAN] Found PATTERN 2 at VA=0x", std::hex, match->instruction_va); return RipRelativeLoad{match->instruction_va, match->target_va, match->type}; @@ -378,11 +378,11 @@ std::optional find_rip_relative_load_around_va( } } } - + // Pattern 3: General search with filtering std::optional best_match; size_t best_distance = std::numeric_limits::max(); - + for (size_t i = search_start; i + 6 < search_end; ++i) { auto match = match_rip_relative_instruction(data, data_size, i, text_va_base, pe, @@ -391,25 +391,25 @@ std::optional find_rip_relative_load_around_va( if (match->instruction_va == anchor_va) { continue; } - + const size_t distance = (i > anchor_in_text) ? (i - anchor_in_text) : (anchor_in_text - i); - + uint64_t final_va = match->type == LoadType::MOV_DEREF ? *pe.read_u64_va(match->target_va) : match->target_va; - + if (pe.read_va(final_va, 32) && distance < best_distance) { best_match = match; best_distance = distance; } } } - + if (best_match) { DBG("[AROUND_SCAN] Selected candidate at VA=0x", std::hex, best_match->instruction_va, " distance=0x", best_distance, std::dec); return RipRelativeLoad{best_match->instruction_va, best_match->target_va, best_match->type}; } - + DBG("[AROUND_SCAN] No valid MOV/LEA found in radius."); return std::nullopt; } \ No newline at end of file