From bd8ec289406356c6d23b2e8e62da15a540a3918f Mon Sep 17 00:00:00 2001 From: Luke Craig Date: Thu, 14 May 2026 22:23:04 -0400 Subject: [PATCH 1/3] dffi: add load_elf and load_elf_bytes --- src/dwarffi/dffi.py | 99 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/src/dwarffi/dffi.py b/src/dwarffi/dffi.py index 2aad0ad..028b0f0 100644 --- a/src/dwarffi/dffi.py +++ b/src/dwarffi/dffi.py @@ -1359,4 +1359,101 @@ def find_types_with_member(self, member_name: str) -> Dict[str, "VtypeUserType"] for name, t in self.types.items(): if member_name in t.get_flattened_fields(self): results[name] = t - return results \ No newline at end of file + return results + + def load_elf( + self, + elf_path: str, + dwarf2json_cmd: Optional[str] = None, + save_isf_to: Optional[str] = None, + ) -> None: + """ + Extracts DWARF info from an existing ELF/Mach-O binary via dwarf2json, + and dynamically loads the resulting types and symbols into this DFFI instance. + + Args: + elf_path: Path to the ELF or Mach-O file. + dwarf2json_cmd: Path to the dwarf2json executable. + save_isf_to: Optional file path to cache the generated ISF. + """ + dwarf2json_cmd = dwarf2json_cmd or get_dwarf2json_path() + if dwarf2json_cmd is None: + raise RuntimeError( + "'dwarf2json' not found in PATH.\n" + "dwarffi requires dwarf2json to extract type info from compiled C code.\n" + "Please download or build it from: https://github.com/volatilityfoundation/dwarf2json" + ) + + if not os.path.exists(elf_path): + raise FileNotFoundError(f"Binary file not found: {elf_path}") + + # Run dwarf2json (using --elf / --macho to get both types and symbols) + if sys.platform == "darwin": + cmd = [dwarf2json_cmd, "mac", "--macho", elf_path] + else: + cmd = [dwarf2json_cmd, "linux", "--elf", elf_path] + + try: + res = subprocess.run(cmd, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"dwarf2json failed:\nCommand: {' '.join(cmd)}\nStderr: {e.stderr}" + ) from e + + # Parse the JSON output + try: + isf_dict = json.loads(res.stdout) + except json.JSONDecodeError as e: + raise RuntimeError( + f"Failed to parse dwarf2json output: {e}\nOutput head: {res.stdout[:500]}" + ) from e + + # Optionally save the ISF to disk + if save_isf_to: + out_path = os.path.abspath(save_isf_to) + os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True) + + if out_path.endswith(".json.xz"): + with lzma.open(out_path, "wt", encoding="utf-8") as xf: + json.dump(isf_dict, xf, indent=2, sort_keys=True) + elif out_path.endswith(".json"): + with open(out_path, "w", encoding="utf-8") as jf: + json.dump(isf_dict, jf, indent=2, sort_keys=True) + else: + raise ValueError("save_isf_to must end with '.json' or '.json.xz'") + + # Load into this DFFI instance + vtype_obj = VtypeJson(isf_dict) + pseudo_path = f"" + self._add_vtypejson(pseudo_path, vtype_obj) + + def load_elf_bytes( + self, + elf_bytes: Union[bytes, bytearray], + dwarf2json_cmd: Optional[str] = None, + save_isf_to: Optional[str] = None, + ) -> None: + """ + Extracts DWARF info from an in-memory ELF/Mach-O binary via dwarf2json, + and dynamically loads the resulting types and symbols into this DFFI instance. + + Ideal for handling API uploads without managing manual cleanup. + + Args: + elf_bytes: Raw bytes of the uploaded ELF/Mach-O file. + dwarf2json_cmd: Path to the dwarf2json executable. + save_isf_to: Optional file path to cache the generated ISF. + """ + with tempfile.NamedTemporaryFile(delete=False) as tmp_elf: + tmp_elf.write(elf_bytes) + tmp_elf_path = tmp_elf.name + + try: + self.load_elf( + tmp_elf_path, + dwarf2json_cmd=dwarf2json_cmd, + save_isf_to=save_isf_to + ) + finally: + if os.path.exists(tmp_elf_path): + os.remove(tmp_elf_path) \ No newline at end of file From 862f63de3d2bd19139cd62c1a7a7fbea70d11d45 Mon Sep 17 00:00:00 2001 From: Luke Craig Date: Thu, 14 May 2026 22:30:18 -0400 Subject: [PATCH 2/3] test: elf load --- tests/test_elf_load.py | 273 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 tests/test_elf_load.py diff --git a/tests/test_elf_load.py b/tests/test_elf_load.py new file mode 100644 index 0000000..0feb3e6 --- /dev/null +++ b/tests/test_elf_load.py @@ -0,0 +1,273 @@ +import json +import os +import subprocess +from unittest import mock + +import pytest + +from dwarffi import DFFI + +def test_load_elf_success(tmp_path): + ffi = DFFI() + + def mock_subprocess_run(cmd, **kwargs): + class MockCompletedProcess: + def __init__(self, stdout): + self.stdout = stdout + + if "dwarf2json" in cmd[0]: + fake_isf = { + "metadata": {}, + "base_types": { + "custom_int": {"kind": "int", "size": 4, "signed": True, "endian": "little"} + }, + "user_types": {}, + "enums": {}, + "symbols": {}, + "typedefs": {}, + } + return MockCompletedProcess(json.dumps(fake_isf)) + + raise ValueError(f"Unexpected command: {cmd}") + + # Create a dummy ELF file on disk to pass the os.path.exists check + fake_elf = tmp_path / "test_binary.elf" + fake_elf.write_bytes(b"\x7fELF...") + + out_file = tmp_path / "types.json.xz" + + with mock.patch("dwarffi.dffi.get_dwarf2json_path", return_value="/usr/bin/dwarf2json"): + with mock.patch("subprocess.run", side_effect=mock_subprocess_run): + ffi.load_elf( + str(fake_elf), + save_isf_to=str(out_file), + ) + + # The ISF dictionary returned by our mock should be successfully loaded + assert ffi.sizeof("custom_int") == 4 + + # The resulting file should have been written and compressed + assert out_file.exists() + + # Verify we can load the newly generated compressed file + ffi_new = DFFI(str(out_file)) + assert ffi_new.sizeof("custom_int") == 4 + + +def test_load_elf_missing_dwarf2json(tmp_path): + ffi = DFFI() + fake_elf = tmp_path / "test_binary.elf" + fake_elf.write_bytes(b"\x7fELF...") + + # Simulate get_dwarf2json_path failing to find the executable + with mock.patch("dwarffi.dffi.get_dwarf2json_path", return_value=None): + with pytest.raises(RuntimeError, match="'dwarf2json' not found in PATH"): + ffi.load_elf(str(fake_elf)) + + +def test_load_elf_file_not_found(): + ffi = DFFI() + + with mock.patch("dwarffi.dffi.get_dwarf2json_path", return_value="/usr/bin/dwarf2json"): + with pytest.raises(FileNotFoundError, match="Binary file not found"): + ffi.load_elf("/this/does/not/exist.elf") + + +def test_load_elf_invalid_binary(tmp_path): + ffi = DFFI() + fake_elf = tmp_path / "bad_binary.elf" + fake_elf.write_bytes(b"BAD DATA") + + def mock_subprocess_run(cmd, **kwargs): + raise subprocess.CalledProcessError( + returncode=1, + cmd=cmd, + stderr="fatal: invalid ELF magic" + ) + + with mock.patch("dwarffi.dffi.get_dwarf2json_path", return_value="/usr/bin/dwarf2json"): + with mock.patch("subprocess.run", side_effect=mock_subprocess_run): + # Added (?s) so the .* will successfully jump over the \n in the error string + with pytest.raises(RuntimeError, match=r"(?s)dwarf2json failed:.*invalid ELF magic"): + ffi.load_elf(str(fake_elf)) + + +def test_load_elf_invalid_json(tmp_path): + ffi = DFFI() + fake_elf = tmp_path / "test_binary.elf" + fake_elf.write_bytes(b"\x7fELF...") + + def mock_subprocess_run(cmd, **kwargs): + class MockCompletedProcess: + def __init__(self, stdout): + self.stdout = stdout + return MockCompletedProcess("This is not valid JSON data") + + with mock.patch("dwarffi.dffi.get_dwarf2json_path", return_value="/usr/bin/dwarf2json"): + with mock.patch("subprocess.run", side_effect=mock_subprocess_run): + with pytest.raises(RuntimeError, match="Failed to parse dwarf2json output"): + ffi.load_elf(str(fake_elf)) + + +def test_load_elf_architectures(tmp_path): + """Ensure the proper flags (--macho vs --elf) are passed based on the OS.""" + ffi = DFFI() + fake_elf = tmp_path / "test_binary.elf" + fake_elf.write_bytes(b"\x7fELF...") + + executed_cmds = [] + + def mock_subprocess_run(cmd, **kwargs): + class MockCompletedProcess: + def __init__(self, stdout): + self.stdout = stdout + executed_cmds.append(cmd) + + fake_isf = {"base_types": {}, "user_types": {}, "symbols": {}, "enums": {}, "typedefs": {}} + return MockCompletedProcess(json.dumps(fake_isf)) + + # 1. Test Linux platform + with mock.patch("dwarffi.dffi.get_dwarf2json_path", return_value="/usr/bin/dwarf2json"): + with mock.patch("sys.platform", "linux"): + with mock.patch("subprocess.run", side_effect=mock_subprocess_run): + ffi.load_elf(str(fake_elf)) + + assert executed_cmds[0] == ["/usr/bin/dwarf2json", "linux", "--elf", str(fake_elf)] + executed_cmds.clear() + + # 2. Test Mac platform + with mock.patch("dwarffi.dffi.get_dwarf2json_path", return_value="/usr/bin/dwarf2json"): + with mock.patch("sys.platform", "darwin"): + with mock.patch("subprocess.run", side_effect=mock_subprocess_run): + ffi.load_elf(str(fake_elf)) + + assert executed_cmds[0] == ["/usr/bin/dwarf2json", "mac", "--macho", str(fake_elf)] + + +def test_load_elf_bytes_success(): + """Test that the byte wrapper correctly manages the temp file lifecycle.""" + ffi = DFFI() + fake_elf_data = b"\x7fELF..." + + # Mock the internal load_elf call so we don't have to stub subprocess again + with mock.patch.object(ffi, "load_elf") as mock_load_elf: + ffi.load_elf_bytes(fake_elf_data, dwarf2json_cmd="/custom/dwarf2json") + + mock_load_elf.assert_called_once() + args, kwargs = mock_load_elf.call_args + + # Grab the temporary path it generated + tmp_path = args[0] + assert os.path.isabs(tmp_path) + assert kwargs.get("dwarf2json_cmd") == "/custom/dwarf2json" + + # Verify the file was cleaned up upon exit + assert not os.path.exists(tmp_path) + +@pytest.mark.parametrize( + "compiler, compiler_flags", + [ + # Architectures with debug symbols + ("gcc", ["-g", "-O0"]), + ("clang", ["-g", "-O0", "-target", "x86_64-linux-gnu"]), + ("arm-none-eabi-gcc", ["-g", "-mthumb", "-O1"]), + ("aarch64-linux-gnu-gcc", ["-g", "-mabi=lp64"]), + ("powerpc-linux-gnu-gcc", ["-g", "-m32"]), + + # Architectures without debug symbols (stripped / optimized) + ("gcc", ["-O2", "-s"]), + ("clang", ["-O3"]), + ("arm-none-eabi-gcc", ["-O3", "-fomit-frame-pointer"]), + ] +) +def test_cdef_hello_world_architectures(tmp_path, compiler, compiler_flags): + """ + Tests compiling a simple hello world C program across various architectures + and verifies that dwarffi correctly handles binaries with and without debug symbols. + """ + ffi = DFFI() + source_code = """ + #include + int hello_world_value = 42; + int main() { + printf("Hello World\\n"); + return 0; + } + """ + + # Determine if this parameter configuration includes debug symbols + has_debug_symbols = "-g" in compiler_flags + + def mock_which(cmd, *args, **kwargs): + if cmd == "dwarf2json": + return "/usr/bin/dwarf2json" + if cmd == compiler.split()[0]: + return f"/usr/bin/{cmd}" + return None + + def mock_subprocess_run(cmd, **kwargs): + class MockCompletedProcess: + def __init__(self, stdout): + self.stdout = stdout + + cmd_str = " ".join(cmd) + + # 1. Simulate the C compiler succeeding + if compiler in cmd_str: + return MockCompletedProcess("") + + # 2. Simulate dwarf2json's reaction to the compiled object file + if "dwarf2json" in cmd_str: + if has_debug_symbols: + # With -g, dwarf2json successfully extracts types and symbols + fake_isf = { + "metadata": {}, + "base_types": { + "int": {"kind": "int", "size": 4, "signed": True, "endian": "little"} + }, + "user_types": {}, + "enums": {}, + "symbols": { + "hello_world_value": {"address": 0x1000, "type_info": {"kind": "base", "name": "int"}}, + "main": {"address": 0x1020, "type_info": {"kind": "function"}} + }, + "typedefs": {}, + } + return MockCompletedProcess(json.dumps(fake_isf)) + else: + # Without -g, dwarf2json returns empty datasets (no DWARF info found) + fake_empty_isf = { + "metadata": {}, + "base_types": {}, + "user_types": {}, + "enums": {}, + "symbols": {}, + "typedefs": {}, + } + return MockCompletedProcess(json.dumps(fake_empty_isf)) + + raise ValueError(f"Unexpected command: {cmd}") + + with mock.patch("shutil.which", side_effect=mock_which): + # Mock get_dwarf2json_path specifically if your dffi.py relies on it internally + with mock.patch("dwarffi.dffi.get_dwarf2json_path", return_value="/usr/bin/dwarf2json"): + with mock.patch("subprocess.run", side_effect=mock_subprocess_run): + + # Append -c so it creates an object file (as cdef does) + ffi.cdef( + source=source_code, + compiler=compiler, + compiler_flags=compiler_flags + ["-c"] + ) + + # Assertions based on whether debug info was compiled in + if has_debug_symbols: + assert ffi.sizeof("int") == 4 + assert "hello_world_value" in ffi.symbols + assert "main" in ffi.symbols + else: + # If no debug symbols were extracted, the types and symbols dicts should be empty + with pytest.raises(KeyError, match="Unknown type 'int'"): + ffi.sizeof("int") + assert "hello_world_value" not in ffi.symbols + assert "main" not in ffi.symbols \ No newline at end of file From e2a1fab2ab9e16cc165223153a2bc0d0036cc049 Mon Sep 17 00:00:00 2001 From: Luke Craig Date: Thu, 14 May 2026 22:32:45 -0400 Subject: [PATCH 3/3] ruff --- tests/test_elf_load.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_elf_load.py b/tests/test_elf_load.py index 0feb3e6..8e43f17 100644 --- a/tests/test_elf_load.py +++ b/tests/test_elf_load.py @@ -7,6 +7,7 @@ from dwarffi import DFFI + def test_load_elf_success(tmp_path): ffi = DFFI()