From 7b2c682e211f80de316b9ca74e572bd15239f660 Mon Sep 17 00:00:00 2001 From: Luke Craig Date: Thu, 28 May 2026 22:47:18 -0400 Subject: [PATCH 1/2] Add Ghidra ISF import and export scripts --- .github/workflows/ci.yml | 83 ++- README.md | 41 ++ pyproject.toml | 6 +- src/dwarffi/ghidra_scripts/Ghidra2ISF.java | 672 ++++++++++++++++++ src/dwarffi/ghidra_scripts/ISF2Ghidra.java | 768 +++++++++++++++++++++ src/dwarffi/ghidra_scripts/__init__.py | 1 + tests/test_ghidra2isf.py | 359 ++++++++++ 7 files changed, 1927 insertions(+), 3 deletions(-) create mode 100644 src/dwarffi/ghidra_scripts/Ghidra2ISF.java create mode 100644 src/dwarffi/ghidra_scripts/ISF2Ghidra.java create mode 100644 src/dwarffi/ghidra_scripts/__init__.py create mode 100644 tests/test_ghidra2isf.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2a74e6e..b16fb62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,4 +56,85 @@ jobs: run: mypy src - name: Pytest - run: pytest -q \ No newline at end of file + run: pytest -q + + ghidra-script-compile: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + ghidra: + - version: "11.0" + tag: "Ghidra_11.0_build" + asset: "ghidra_11.0_PUBLIC_20231222.zip" + - version: "11.4.2" + tag: "Ghidra_11.4.2_build" + asset: "ghidra_11.4.2_PUBLIC_20250826.zip" + - version: "12.1" + tag: "Ghidra_12.1_build" + asset: "ghidra_12.1_PUBLIC_20260513.zip" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check Ghidra script changes + id: changes + shell: bash + run: | + set -euxo pipefail + BASE_SHA="${{ github.event.pull_request.base.sha }}" + HEAD_SHA="${{ github.event.pull_request.head.sha }}" + if git diff --name-only "$BASE_SHA" "$HEAD_SHA" | grep -E '^(src/dwarffi/ghidra_scripts/|tests/test_ghidra2isf.py|\.github/workflows/ci\.yml)'; then + echo "changed=true" >> "$GITHUB_OUTPUT" + else + echo "changed=false" >> "$GITHUB_OUTPUT" + fi + + - uses: actions/setup-java@v4 + if: steps.changes.outputs.changed == 'true' + with: + distribution: temurin + java-version: "21" + + - name: Cache Ghidra ${{ matrix.ghidra.version }} + if: steps.changes.outputs.changed == 'true' + uses: actions/cache@v4 + with: + path: .cache/ghidra/${{ matrix.ghidra.asset }} + key: ghidra-${{ matrix.ghidra.asset }} + + - name: Download Ghidra ${{ matrix.ghidra.version }} + if: steps.changes.outputs.changed == 'true' + shell: bash + run: | + set -euxo pipefail + mkdir -p .cache/ghidra + if [ ! -f ".cache/ghidra/${{ matrix.ghidra.asset }}" ]; then + curl -L \ + -o ".cache/ghidra/${{ matrix.ghidra.asset }}" \ + "https://github.com/NationalSecurityAgency/ghidra/releases/download/${{ matrix.ghidra.tag }}/${{ matrix.ghidra.asset }}" + fi + + - name: Compile Ghidra scripts against ${{ matrix.ghidra.version }} + if: steps.changes.outputs.changed == 'true' + shell: bash + run: | + set -euxo pipefail + rm -rf /tmp/ghidra-ci /tmp/ghidra-script-classes + mkdir -p /tmp/ghidra-ci /tmp/ghidra-script-classes + unzip -q ".cache/ghidra/${{ matrix.ghidra.asset }}" -d /tmp/ghidra-ci + GHIDRA_HOME="$(find /tmp/ghidra-ci -maxdepth 1 -type d -name 'ghidra_*' | head -n 1)" + test -n "$GHIDRA_HOME" + find "$GHIDRA_HOME" -name '*.jar' -print > /tmp/ghidra-jars.txt + test -s /tmp/ghidra-jars.txt + javac -proc:none \ + -cp "$(paste -sd: /tmp/ghidra-jars.txt)" \ + -d /tmp/ghidra-script-classes \ + src/dwarffi/ghidra_scripts/Ghidra2ISF.java \ + src/dwarffi/ghidra_scripts/ISF2Ghidra.java + + - name: Skip Ghidra compile + if: steps.changes.outputs.changed != 'true' + run: echo "No Ghidra script or workflow changes detected; skipping Ghidra compile matrix." diff --git a/README.md b/README.md index 8172c73..b4202a6 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ as defined by the toolchain and target architecture. - **Linux / embedded workflows:** ISF generated from **DWARF** in ELF binaries (e.g., via `dwarf2json`). - **Windows workflows:** ISF generated from **PDB** symbols (e.g., Volatility3-style Windows ISFs generated from PDBs). - **MacOS / Mach-O workflows:** ISF generated from DWARF in Mach-O binaries. +- **Ghidra workflows:** ISF generated from the active Ghidra program using the bundled `Ghidra2ISF.java` script. Read more about `dwarf2json` and ISF in the [dwarf2json README](https://github.com/volatilityfoundation/dwarf2json). @@ -138,6 +139,46 @@ ffi.inspect_layout("struct _UNICODE_STRING") --- +## Ghidra ISF scripts + +`dwarffi` includes Ghidra scripts under `src/dwarffi/ghidra_scripts`. + +Export the active Ghidra program to ISF: + +```bash +analyzeHeadless /tmp/ghidra-project DffiExport \ + -import ./firmware.elf \ + -scriptPath ./src/dwarffi/ghidra_scripts \ + -postScript Ghidra2ISF.java ./firmware.isf.json \ + -deleteProject +``` + +The exporter writes `base_types`, `user_types`, `enums`, `typedefs`, `symbols`, +and `functions` in the same ISF shape consumed by `DFFI`. + +Import an ISF into the active Ghidra program: + +```bash +analyzeHeadless /tmp/ghidra-project DffiImport \ + -import ./firmware.elf \ + -scriptPath ./src/dwarffi/ghidra_scripts \ + -postScript ISF2Ghidra.java ./firmware.isf.json +``` + +The importer creates a `/ISF` data type category, imports base types, structs, +unions, enums, typedefs, arrays, pointers, and bitfields, then applies labels, +data types, and function signatures when addresses are present. + +Optional script arguments for both scripts: + +```bash +--types-only +--no-symbols +--no-functions +``` + +--- + ## CFFI-style `cdef` We do support inline C definitions that compile down to DWARF and ISF on the fly. This is ideal for quick prototyping or when you have a small struct definition that isn't already in your ISF. diff --git a/pyproject.toml b/pyproject.toml index a4ed5b6..54d2f90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,9 @@ version-file = "src/dwarffi/_version.py" [tool.hatch.build] artifacts = [ "src/dwarffi/bin/dwarf2json", - "src/dwarffi/bin/dwarf2json.exe" + "src/dwarffi/bin/dwarf2json.exe", + "src/dwarffi/ghidra_scripts/Ghidra2ISF.java", + "src/dwarffi/ghidra_scripts/ISF2Ghidra.java" ] [tool.hatch.version.raw-options] @@ -88,4 +90,4 @@ module = "dwarffi._version" ignore_missing_imports = true [project.scripts] -dwarf2json = "dwarffi.cli:main" \ No newline at end of file +dwarf2json = "dwarffi.cli:main" diff --git a/src/dwarffi/ghidra_scripts/Ghidra2ISF.java b/src/dwarffi/ghidra_scripts/Ghidra2ISF.java new file mode 100644 index 0000000..17cf2ef --- /dev/null +++ b/src/dwarffi/ghidra_scripts/Ghidra2ISF.java @@ -0,0 +1,672 @@ +// Exports the current Ghidra program's data types, symbols, and functions to +// Volatility-style Intermediate Symbol File (ISF) JSON. +// +// Usage from Ghidra Script Manager: +// Run the script and choose an output .json file when prompted. +// +// Usage from analyzeHeadless: +// analyzeHeadless /tmp/proj Proj -import sample.bin \ +// -scriptPath /path/to/src/dwarffi/ghidra_scripts \ +// -postScript Ghidra2ISF.java /tmp/sample.isf.json +// +// Optional arguments: +// --types-only Export only type sections +// --no-symbols Do not export symbols +// --no-functions Do not export functions + +import ghidra.app.script.GhidraScript; +import ghidra.program.model.address.Address; +import ghidra.program.model.data.Array; +import ghidra.program.model.data.BitFieldDataType; +import ghidra.program.model.data.BooleanDataType; +import ghidra.program.model.data.CharDataType; +import ghidra.program.model.data.DataType; +import ghidra.program.model.data.DataTypeComponent; +import ghidra.program.model.data.DataTypeManager; +import ghidra.program.model.data.DefaultDataType; +import ghidra.program.model.data.Enum; +import ghidra.program.model.data.FloatDataType; +import ghidra.program.model.data.FunctionDefinition; +import ghidra.program.model.data.ParameterDefinition; +import ghidra.program.model.data.Pointer; +import ghidra.program.model.data.Structure; +import ghidra.program.model.data.TypeDef; +import ghidra.program.model.data.Union; +import ghidra.program.model.data.VoidDataType; +import ghidra.program.model.listing.Data; +import ghidra.program.model.listing.Function; +import ghidra.program.model.listing.FunctionIterator; +import ghidra.program.model.listing.Listing; +import ghidra.program.model.listing.Parameter; +import ghidra.program.model.listing.Program; +import ghidra.program.model.symbol.Symbol; +import ghidra.program.model.symbol.SymbolIterator; +import ghidra.program.model.symbol.SymbolTable; +import ghidra.program.model.symbol.SymbolType; +import ghidra.util.task.TaskMonitor; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Method; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.TreeMap; + +public class Ghidra2ISF extends GhidraScript { + private static final String TOOL_NAME = "ghidra2isf"; + private static final String TOOL_VERSION = "0.1.0"; + private static final String FORMAT_VERSION = "6.2.0"; + + @Override + protected void run() throws Exception { + if (currentProgram == null) { + printerr("No current program is open."); + return; + } + + ExportOptions options = ExportOptions.fromArgs(getScriptArgs()); + if (options.outputFile == null) { + options.outputFile = askFile("Save ISF JSON", "Save"); + } + + IsfExporter exporter = new IsfExporter(currentProgram, monitor); + Map document = exporter.export(options); + writeJson(options.outputFile, document); + + println(String.format( + "Wrote ISF to %s (%d base types, %d user types, %d enums, %d typedefs, %d symbols, %d functions)", + options.outputFile.getAbsolutePath(), + exporter.baseTypes.size(), + exporter.userTypes.size(), + exporter.enums.size(), + exporter.typedefs.size(), + exporter.symbols.size(), + exporter.functions.size() + )); + } + + private static void writeJson(File outputFile, Map document) throws IOException { + File parent = outputFile.getAbsoluteFile().getParentFile(); + if (parent != null) { + Files.createDirectories(parent.toPath()); + } + + try (BufferedWriter writer = Files.newBufferedWriter(outputFile.toPath(), StandardCharsets.UTF_8)) { + JsonWriter.write(document, writer); + writer.write('\n'); + } + } + + private static final class ExportOptions { + File outputFile; + boolean exportSymbols = true; + boolean exportFunctions = true; + + static ExportOptions fromArgs(String[] args) { + ExportOptions options = new ExportOptions(); + if (args == null) { + return options; + } + + for (String arg : args) { + if ("--types-only".equals(arg)) { + options.exportSymbols = false; + options.exportFunctions = false; + } else if ("--no-symbols".equals(arg)) { + options.exportSymbols = false; + } else if ("--no-functions".equals(arg)) { + options.exportFunctions = false; + } else if (options.outputFile == null) { + options.outputFile = new File(arg); + } else { + throw new IllegalArgumentException("Unexpected argument: " + arg); + } + } + return options; + } + } + + private static final class IsfExporter { + private final Program program; + private final TaskMonitor monitor; + private final String endian; + private final int pointerSize; + + final TreeMap baseTypes = new TreeMap<>(); + final TreeMap userTypes = new TreeMap<>(); + final TreeMap enums = new TreeMap<>(); + final TreeMap typedefs = new TreeMap<>(); + final TreeMap symbols = new TreeMap<>(); + final TreeMap functions = new TreeMap<>(); + + IsfExporter(Program program, TaskMonitor monitor) { + this.program = program; + this.monitor = monitor; + this.endian = program.getLanguage().isBigEndian() ? "big" : "little"; + this.pointerSize = Math.max(program.getDefaultPointerSize(), 1); + } + + Map export(ExportOptions options) throws Exception { + exportTypes(); + if (options.exportSymbols) { + exportSymbols(); + } + if (options.exportFunctions) { + exportFunctions(); + } + + LinkedHashMap document = new LinkedHashMap<>(); + document.put("metadata", metadata()); + document.put("base_types", baseTypes); + document.put("user_types", userTypes); + document.put("enums", enums); + document.put("symbols", symbols); + document.put("functions", functions); + document.put("typedefs", typedefs); + return document; + } + + private Map metadata() { + LinkedHashMap producer = new LinkedHashMap<>(); + producer.put("name", TOOL_NAME); + producer.put("version", TOOL_VERSION); + producer.put("ghidra_version", System.getProperty("application.version", "unknown")); + + LinkedHashMap source = new LinkedHashMap<>(); + source.put("kind", "ghidra_program"); + source.put("name", program.getName()); + + LinkedHashMap ghidra = new LinkedHashMap<>(); + ghidra.put("types", listOf(source)); + ghidra.put("symbols", listOf(source)); + + LinkedHashMap metadata = new LinkedHashMap<>(); + metadata.put("producer", producer); + metadata.put("format", FORMAT_VERSION); + metadata.put("ghidra", ghidra); + return metadata; + } + + private static List listOf(Object value) { + ArrayList values = new ArrayList<>(); + values.add(value); + return values; + } + + private void exportTypes() throws Exception { + ensureVoid(); + ensurePointer(); + + DataTypeManager dataTypeManager = program.getDataTypeManager(); + Iterator iterator = dataTypeManager.getAllDataTypes(); + while (iterator.hasNext()) { + monitor.checkCancelled(); + DataType dataType = iterator.next(); + if (dataType == null || dataType instanceof DefaultDataType) { + continue; + } + exportDataType(dataType); + } + } + + private void exportDataType(DataType dataType) { + if (dataType instanceof Structure) { + exportComposite((Structure) dataType, "struct"); + } else if (dataType instanceof Union) { + exportComposite((Union) dataType, "union"); + } else if (dataType instanceof Enum) { + exportEnum((Enum) dataType); + } else if (dataType instanceof TypeDef) { + exportTypedef((TypeDef) dataType); + } else if (isBaseLike(dataType)) { + ensureBase(dataType); + } else if (dataType instanceof Pointer) { + ensurePointer(); + typeRef(((Pointer) dataType).getDataType()); + } else if (dataType instanceof Array) { + typeRef(((Array) dataType).getDataType()); + } + } + + private void exportComposite(DataType composite, String kind) { + String name = typeName(composite); + if (userTypes.containsKey(name)) { + return; + } + + TreeMap fields = new TreeMap<>(); + DataTypeComponent[] components; + if (composite instanceof Structure) { + components = ((Structure) composite).getComponents(); + } else { + components = ((Union) composite).getComponents(); + } + + int anonymousCount = 0; + for (DataTypeComponent component : components) { + if (component == null) { + continue; + } + + DataType fieldType = component.getDataType(); + if (fieldType == null || fieldType instanceof DefaultDataType) { + continue; + } + + String fieldName = component.getFieldName(); + boolean anonymous = fieldName == null || fieldName.isEmpty(); + if (anonymous) { + fieldName = "unnamed_field_" + anonymousCount; + anonymousCount += 1; + } + + LinkedHashMap field = new LinkedHashMap<>(); + field.put("type", fieldTypeRef(component)); + field.put("offset", Math.max(component.getOffset(), 0)); + if (anonymous) { + field.put("anonymous", true); + } + fields.put(fieldName, field); + } + + LinkedHashMap record = new LinkedHashMap<>(); + record.put("size", Math.max(composite.getLength(), 0)); + record.put("fields", fields); + record.put("kind", kind); + userTypes.put(name, record); + } + + private Map fieldTypeRef(DataTypeComponent component) { + DataType dataType = component.getDataType(); + if (dataType instanceof BitFieldDataType) { + BitFieldDataType bitField = (BitFieldDataType) dataType; + LinkedHashMap wrapper = new LinkedHashMap<>(); + wrapper.put("kind", "bitfield"); + wrapper.put("bit_length", firstPositiveInt(bitField, 0, "getDeclaredBitSize", "getBitSize")); + wrapper.put("bit_position", firstPositiveInt(component, 0, "getBitOffset")); + wrapper.put("type", typeRef(bitField.getBaseDataType())); + return wrapper; + } + return typeRef(dataType); + } + + private void exportEnum(Enum enumType) { + String name = typeName(enumType); + if (enums.containsKey(name)) { + return; + } + + TreeMap constants = new TreeMap<>(); + for (String constantName : enumType.getNames()) { + constants.put(constantName, enumType.getValue(constantName)); + } + + LinkedHashMap record = new LinkedHashMap<>(); + record.put("size", Math.max(enumType.getLength(), 0)); + record.put("base", enumBaseName(enumType)); + record.put("constants", constants); + enums.put(name, record); + } + + private String enumBaseName(Enum enumType) { + int length = Math.max(enumType.getLength(), 0); + boolean signed = false; + for (String constantName : enumType.getNames()) { + if (enumType.getValue(constantName) < 0) { + signed = true; + break; + } + } + String name = (signed ? "int" : "uint") + (length * 8) + "_t"; + ensureSyntheticBase(name, length, "int", signed); + return name; + } + + private void exportTypedef(TypeDef typeDef) { + String name = typeName(typeDef); + if (typedefs.containsKey(name)) { + return; + } + typedefs.put(name, typeRef(typeDef.getBaseDataType())); + } + + private void exportSymbols() throws Exception { + SymbolTable symbolTable = program.getSymbolTable(); + Listing listing = program.getListing(); + SymbolIterator iterator = symbolTable.getAllSymbols(true); + while (iterator.hasNext()) { + monitor.checkCancelled(); + Symbol symbol = iterator.next(); + if (symbol == null || symbol.isExternal() || symbol.getAddress() == null) { + continue; + } + if (symbol.getSymbolType() == SymbolType.FUNCTION) { + continue; + } + Address address = symbol.getAddress(); + if (!program.getMemory().contains(address)) { + continue; + } + + LinkedHashMap record = new LinkedHashMap<>(); + Data data = listing.getDataAt(address); + if (data != null && data.getDataType() != null) { + record.put("type", typeRef(data.getDataType())); + } + record.put("address", address.getOffset()); + symbols.put(symbol.getName(true), record); + } + } + + private void exportFunctions() throws Exception { + FunctionIterator iterator = program.getListing().getFunctions(true); + while (iterator.hasNext()) { + monitor.checkCancelled(); + Function function = iterator.next(); + if (function == null || function.isExternal()) { + continue; + } + + ArrayList parameters = new ArrayList<>(); + for (Parameter parameter : function.getParameters()) { + LinkedHashMap p = new LinkedHashMap<>(); + p.put("name", parameter.getName()); + p.put("type", typeRef(parameter.getDataType())); + parameters.add(p); + } + + LinkedHashMap record = new LinkedHashMap<>(); + record.put("address", function.getEntryPoint().getOffset()); + record.put("return_type", typeRef(function.getReturnType())); + record.put("parameters", parameters); + functions.put(function.getName(true), record); + } + } + + private Map typeRef(DataType dataType) { + LinkedHashMap result = new LinkedHashMap<>(); + if (dataType == null || dataType instanceof VoidDataType) { + ensureVoid(); + result.put("kind", "base"); + result.put("name", "void"); + return result; + } + + if (dataType instanceof TypeDef) { + exportTypedef((TypeDef) dataType); + result.put("kind", "typedef"); + result.put("name", typeName(dataType)); + return result; + } + + if (dataType instanceof Pointer) { + ensurePointer(); + result.put("kind", "pointer"); + result.put("subtype", typeRef(((Pointer) dataType).getDataType())); + return result; + } + + if (dataType instanceof Array) { + Array array = (Array) dataType; + result.put("kind", "array"); + result.put("count", Math.max(array.getNumElements(), 0)); + result.put("subtype", typeRef(array.getDataType())); + return result; + } + + if (dataType instanceof Structure) { + exportComposite((Structure) dataType, "struct"); + result.put("kind", "struct"); + result.put("name", typeName(dataType)); + return result; + } + + if (dataType instanceof Union) { + exportComposite((Union) dataType, "union"); + result.put("kind", "union"); + result.put("name", typeName(dataType)); + return result; + } + + if (dataType instanceof Enum) { + exportEnum((Enum) dataType); + result.put("kind", "enum"); + result.put("name", typeName(dataType)); + return result; + } + + if (dataType instanceof FunctionDefinition) { + FunctionDefinition functionDefinition = (FunctionDefinition) dataType; + ArrayList parameters = new ArrayList<>(); + for (ParameterDefinition parameter : functionDefinition.getArguments()) { + LinkedHashMap p = new LinkedHashMap<>(); + p.put("name", parameter.getName()); + p.put("type", typeRef(parameter.getDataType())); + parameters.add(p); + } + result.put("kind", "function"); + result.put("return_type", typeRef(functionDefinition.getReturnType())); + result.put("parameters", parameters); + return result; + } + + String baseName = ensureBase(dataType); + result.put("kind", "base"); + result.put("name", baseName); + return result; + } + + private boolean isBaseLike(DataType dataType) { + return !(dataType instanceof Structure) + && !(dataType instanceof Union) + && !(dataType instanceof Enum) + && !(dataType instanceof TypeDef) + && !(dataType instanceof Pointer) + && !(dataType instanceof Array) + && !(dataType instanceof FunctionDefinition); + } + + private String ensureBase(DataType dataType) { + if (dataType == null || dataType instanceof VoidDataType) { + ensureVoid(); + return "void"; + } + + String name = typeName(dataType); + String kind = baseKind(dataType); + int size = Math.max(dataType.getLength(), 0); + boolean signed = isSignedBase(dataType); + + if (size == 0 && !"void".equals(name)) { + name = "opaque_0"; + } + ensureSyntheticBase(name, size, kind, signed); + return name; + } + + private void ensureVoid() { + ensureSyntheticBase("void", 0, "void", false); + } + + private void ensurePointer() { + ensureSyntheticBase("pointer", pointerSize, "pointer", false); + } + + private void ensureSyntheticBase(String name, int size, String kind, boolean signed) { + if (baseTypes.containsKey(name)) { + return; + } + LinkedHashMap record = new LinkedHashMap<>(); + record.put("size", Math.max(size, 0)); + record.put("signed", signed); + record.put("kind", kind); + record.put("endian", endian); + baseTypes.put(name, record); + } + + private String baseKind(DataType dataType) { + String lower = dataType.getName().toLowerCase(Locale.ROOT); + if (dataType instanceof VoidDataType || dataType.getLength() == 0 || "void".equals(lower)) { + return "void"; + } + if (dataType instanceof BooleanDataType || lower.contains("bool")) { + return "bool"; + } + if (dataType instanceof CharDataType || lower.equals("char") || lower.endsWith(" char")) { + return "char"; + } + if (dataType instanceof FloatDataType || lower.contains("float") || lower.contains("double")) { + return "float"; + } + return "int"; + } + + private boolean isSignedBase(DataType dataType) { + String lower = dataType.getName().toLowerCase(Locale.ROOT); + if (lower.contains("unsigned") || lower.startsWith("u") || lower.startsWith("uint") + || lower.contains("byte")) { + return false; + } + return !"bool".equals(baseKind(dataType)) && !"void".equals(baseKind(dataType)); + } + + private String typeName(DataType dataType) { + String name = dataType.getName(); + if (name == null || name.isEmpty() || name.startsWith("undefined")) { + String category = dataType.getCategoryPath() == null + ? "root" + : dataType.getCategoryPath().getPath(); + name = "unnamed_" + Integer.toHexString((category + ":" + dataType.getPathName()).hashCode()); + } + return name; + } + } + + private static int firstPositiveInt(Object target, int fallback, String... methodNames) { + for (String methodName : methodNames) { + try { + Method method = target.getClass().getMethod(methodName); + Object value = method.invoke(target); + if (value instanceof Number) { + int intValue = ((Number) value).intValue(); + if (intValue >= 0) { + return intValue; + } + } + } catch (Exception ignored) { + // Try the next method name for compatibility across Ghidra versions. + } + } + return fallback; + } + + private static final class JsonWriter { + static void write(Object value, Appendable out) throws IOException { + writeValue(value, out, 0); + } + + @SuppressWarnings("unchecked") + private static void writeValue(Object value, Appendable out, int indent) throws IOException { + if (value == null) { + out.append("null"); + } else if (value instanceof String) { + writeString((String) value, out); + } else if (value instanceof Number || value instanceof Boolean) { + out.append(String.valueOf(value)); + } else if (value instanceof Map) { + writeMap((Map) value, out, indent); + } else if (value instanceof Iterable) { + writeIterable((Iterable) value, out, indent); + } else { + writeString(String.valueOf(value), out); + } + } + + private static void writeMap(Map map, Appendable out, int indent) throws IOException { + out.append('{'); + if (!map.isEmpty()) { + boolean first = true; + for (Map.Entry entry : map.entrySet()) { + if (!first) { + out.append(','); + } + newline(out, indent + 2); + writeString(entry.getKey(), out); + out.append(": "); + writeValue(entry.getValue(), out, indent + 2); + first = false; + } + newline(out, indent); + } + out.append('}'); + } + + private static void writeIterable(Iterable values, Appendable out, int indent) throws IOException { + out.append('['); + boolean first = true; + for (Object value : values) { + if (!first) { + out.append(','); + } + newline(out, indent + 2); + writeValue(value, out, indent + 2); + first = false; + } + if (!first) { + newline(out, indent); + } + out.append(']'); + } + + private static void newline(Appendable out, int indent) throws IOException { + out.append('\n'); + for (int i = 0; i < indent; i++) { + out.append(' '); + } + } + + private static void writeString(String value, Appendable out) throws IOException { + out.append('"'); + for (int i = 0; i < value.length(); i++) { + char c = value.charAt(i); + switch (c) { + case '"': + out.append("\\\""); + break; + case '\\': + out.append("\\\\"); + break; + case '\b': + out.append("\\b"); + break; + case '\f': + out.append("\\f"); + break; + case '\n': + out.append("\\n"); + break; + case '\r': + out.append("\\r"); + break; + case '\t': + out.append("\\t"); + break; + default: + if (c < 0x20) { + out.append(String.format("\\u%04x", (int) c)); + } else { + out.append(c); + } + } + } + out.append('"'); + } + } +} diff --git a/src/dwarffi/ghidra_scripts/ISF2Ghidra.java b/src/dwarffi/ghidra_scripts/ISF2Ghidra.java new file mode 100644 index 0000000..234ab6b --- /dev/null +++ b/src/dwarffi/ghidra_scripts/ISF2Ghidra.java @@ -0,0 +1,768 @@ +// Imports Volatility-style Intermediate Symbol File (ISF) JSON into the +// current Ghidra program. +// +// Usage from Ghidra Script Manager: +// Run the script and choose an input .json file when prompted. +// +// Usage from analyzeHeadless: +// analyzeHeadless /tmp/proj Proj -import sample.bin \ +// -scriptPath /path/to/src/dwarffi/ghidra_scripts \ +// -postScript ISF2Ghidra.java /tmp/sample.isf.json +// +// Optional arguments: +// --types-only Import only data types +// --no-symbols Do not create labels/data from ISF symbols +// --no-functions Do not create/update functions from ISF functions + +import ghidra.app.script.GhidraScript; +import ghidra.program.database.function.OverlappingFunctionException; +import ghidra.program.model.address.Address; +import ghidra.program.model.address.AddressSet; +import ghidra.program.model.data.AbstractIntegerDataType; +import ghidra.program.model.data.ArrayDataType; +import ghidra.program.model.data.BooleanDataType; +import ghidra.program.model.data.ByteDataType; +import ghidra.program.model.data.CategoryPath; +import ghidra.program.model.data.CharDataType; +import ghidra.program.model.data.DataType; +import ghidra.program.model.data.DataTypeConflictHandler; +import ghidra.program.model.data.DataTypeManager; +import ghidra.program.model.data.DoubleDataType; +import ghidra.program.model.data.EnumDataType; +import ghidra.program.model.data.Float4DataType; +import ghidra.program.model.data.Float8DataType; +import ghidra.program.model.data.FloatDataType; +import ghidra.program.model.data.InvalidDataTypeException; +import ghidra.program.model.data.PointerDataType; +import ghidra.program.model.data.StructureDataType; +import ghidra.program.model.data.TypedefDataType; +import ghidra.program.model.data.UnionDataType; +import ghidra.program.model.data.VoidDataType; +import ghidra.program.model.listing.Function; +import ghidra.program.model.listing.Listing; +import ghidra.program.model.listing.ParameterImpl; +import ghidra.program.model.listing.Variable; +import ghidra.program.model.symbol.SourceType; +import ghidra.program.model.symbol.SymbolTable; +import ghidra.program.model.util.CodeUnitInsertionException; +import ghidra.util.exception.DuplicateNameException; +import ghidra.util.exception.InvalidInputException; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Method; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; + +public class ISF2Ghidra extends GhidraScript { + private static final CategoryPath ISF_CATEGORY = new CategoryPath("/ISF"); + + @Override + protected void run() throws Exception { + if (currentProgram == null) { + printerr("No current program is open."); + return; + } + + ImportOptions options = ImportOptions.fromArgs(getScriptArgs()); + if (options.inputFile == null) { + options.inputFile = askFile("Open ISF JSON", "Import"); + } + + Object parsed = new JsonParser(Files.readString(options.inputFile.toPath(), StandardCharsets.UTF_8)).parse(); + if (!(parsed instanceof Map)) { + throw new IllegalArgumentException("ISF root must be a JSON object"); + } + + int tx = currentProgram.startTransaction("Import ISF"); + boolean commit = false; + try { + IsfImporter importer = new IsfImporter(asMap(parsed), currentProgram.getDataTypeManager()); + importer.importTypes(); + if (options.importSymbols) { + importer.importSymbols(currentProgram.getListing(), currentProgram.getSymbolTable()); + } + if (options.importFunctions) { + importer.importFunctions(); + } + commit = true; + println(String.format( + "Imported ISF from %s (%d base types, %d user types, %d enums, %d typedefs, %d symbols, %d functions)", + options.inputFile.getAbsolutePath(), + importer.baseCount, + importer.userTypeCount, + importer.enumCount, + importer.typedefCount, + importer.symbolCount, + importer.functionCount + )); + } finally { + currentProgram.endTransaction(tx, commit); + } + } + + private static final class ImportOptions { + File inputFile; + boolean importSymbols = true; + boolean importFunctions = true; + + static ImportOptions fromArgs(String[] args) { + ImportOptions options = new ImportOptions(); + if (args == null) { + return options; + } + for (String arg : args) { + if ("--types-only".equals(arg)) { + options.importSymbols = false; + options.importFunctions = false; + } else if ("--no-symbols".equals(arg)) { + options.importSymbols = false; + } else if ("--no-functions".equals(arg)) { + options.importFunctions = false; + } else if (options.inputFile == null) { + options.inputFile = new File(arg); + } else { + throw new IllegalArgumentException("Unexpected argument: " + arg); + } + } + return options; + } + } + + private final class IsfImporter { + private final Map isf; + private final DataTypeManager dataTypeManager; + private final Map baseTypes = new TreeMap<>(); + private final Map userTypes = new TreeMap<>(); + private final Map enums = new TreeMap<>(); + private final Map typedefs = new TreeMap<>(); + private final Set resolvingTypedefs = new HashSet<>(); + + int baseCount; + int userTypeCount; + int enumCount; + int typedefCount; + int symbolCount; + int functionCount; + + IsfImporter(Map isf, DataTypeManager dataTypeManager) { + this.isf = isf; + this.dataTypeManager = dataTypeManager; + } + + void importTypes() throws Exception { + importBaseTypes(); + predeclareUserTypes(); + importEnums(); + importTypedefs(); + populateUserTypes(); + } + + private void importBaseTypes() { + for (Map.Entry entry : objectMap("base_types").entrySet()) { + Map spec = asMap(entry.getValue()); + DataType dataType = baseDataType(entry.getKey(), spec); + baseTypes.put(entry.getKey(), dataType); + + if (!isBuiltInName(entry.getKey(), dataType)) { + DataType alias = new TypedefDataType(ISF_CATEGORY, entry.getKey(), dataType); + dataTypeManager.resolve(alias, DataTypeConflictHandler.REPLACE_HANDLER); + } + baseCount += 1; + } + baseTypes.putIfAbsent("void", VoidDataType.dataType); + baseTypes.putIfAbsent("pointer", new PointerDataType(dataTypeManager)); + } + + private DataType baseDataType(String name, Map spec) { + String kind = stringValue(spec.get("kind"), "int"); + int size = intValue(spec.get("size"), 1); + boolean signed = boolValue(spec.get("signed"), true); + + if ("void".equals(kind) || size == 0) { + return VoidDataType.dataType; + } + if ("pointer".equals(kind) || "pointer".equals(name)) { + return new PointerDataType(dataTypeManager); + } + if ("bool".equals(kind)) { + return BooleanDataType.dataType; + } + if ("char".equals(kind)) { + return CharDataType.dataType; + } + if ("float".equals(kind)) { + if (size == 4) { + return Float4DataType.dataType; + } + if (size == 8) { + return Float8DataType.dataType; + } + if (size == 8 && name.toLowerCase(Locale.ROOT).contains("double")) { + return DoubleDataType.dataType; + } + return FloatDataType.dataType; + } + DataType integer = signed + ? AbstractIntegerDataType.getSignedDataType(size, dataTypeManager) + : AbstractIntegerDataType.getUnsignedDataType(size, dataTypeManager); + return integer == null ? ByteDataType.dataType : integer; + } + + private boolean isBuiltInName(String name, DataType dataType) { + return name.equals(dataType.getName()) + || "void".equals(name) + || "pointer".equals(name) + || dataType instanceof PointerDataType; + } + + private void predeclareUserTypes() { + for (Map.Entry entry : objectMap("user_types").entrySet()) { + String name = entry.getKey(); + Map spec = asMap(entry.getValue()); + String kind = stringValue(spec.get("kind"), "struct"); + int size = intValue(spec.get("size"), 0); + DataType dataType = "union".equals(kind) + ? new UnionDataType(ISF_CATEGORY, name, dataTypeManager) + : new StructureDataType(ISF_CATEGORY, name, Math.max(size, 0), dataTypeManager); + userTypes.put(name, dataType); + userTypeCount += 1; + } + } + + private void importEnums() { + for (Map.Entry entry : objectMap("enums").entrySet()) { + String name = entry.getKey(); + Map spec = asMap(entry.getValue()); + int size = Math.max(intValue(spec.get("size"), 4), 1); + EnumDataType enumType = new EnumDataType(ISF_CATEGORY, name, size, dataTypeManager); + for (Map.Entry constant : asMap(spec.get("constants")).entrySet()) { + enumType.add(constant.getKey(), longValue(constant.getValue(), 0)); + } + enums.put(name, dataTypeManager.resolve(enumType, DataTypeConflictHandler.REPLACE_HANDLER)); + enumCount += 1; + } + } + + private void populateUserTypes() throws InvalidDataTypeException { + for (Map.Entry entry : objectMap("user_types").entrySet()) { + String name = entry.getKey(); + Map spec = asMap(entry.getValue()); + DataType dataType = userTypes.get(name); + List fields = fields(spec); + if (dataType instanceof StructureDataType) { + StructureDataType structure = (StructureDataType) dataType; + structure.deleteAll(); + setStructureLength(structure, Math.max(intValue(spec.get("size"), 0), 0)); + for (FieldSpec field : fields) { + addStructureField(structure, field); + } + userTypes.put(name, dataTypeManager.resolve(structure, DataTypeConflictHandler.REPLACE_HANDLER)); + } else if (dataType instanceof UnionDataType) { + UnionDataType union = (UnionDataType) dataType; + for (FieldSpec field : fields) { + addUnionField(union, field); + } + userTypes.put(name, dataTypeManager.resolve(union, DataTypeConflictHandler.REPLACE_HANDLER)); + } + } + } + + private void setStructureLength(StructureDataType structure, int length) { + try { + Method setLength = structure.getClass().getMethod("setLength", int.class); + setLength.invoke(structure, length); + return; + } catch (Exception ignored) { + // Ghidra 11.0 lacks setLength; grow the empty structure instead. + } + int delta = length - Math.max(structure.getLength(), 0); + if (delta > 0) { + structure.growStructure(delta); + } + } + + private List fields(Map spec) { + ArrayList result = new ArrayList<>(); + for (Map.Entry fieldEntry : asMap(spec.get("fields")).entrySet()) { + Map field = asMap(fieldEntry.getValue()); + result.add(new FieldSpec( + fieldEntry.getKey(), + intValue(field.get("offset"), 0), + asMap(field.get("type")), + boolValue(field.get("anonymous"), false) + )); + } + result.sort(Comparator.comparingInt((FieldSpec f) -> f.offset).thenComparing(f -> f.name)); + return result; + } + + private void addStructureField(StructureDataType structure, FieldSpec field) + throws InvalidDataTypeException { + Map typeSpec = field.typeSpec; + String name = field.anonymous ? null : field.name; + if ("bitfield".equals(stringValue(typeSpec.get("kind"), ""))) { + DataType baseType = typeFromSpec(asMap(typeSpec.get("type"))); + int bitLength = intValue(typeSpec.get("bit_length"), 0); + int bitPosition = intValue(typeSpec.get("bit_position"), 0); + int byteWidth = Math.max(baseType.getLength(), 1); + structure.insertBitFieldAt(field.offset, byteWidth, bitPosition, baseType, bitLength, name, null); + return; + } + + DataType fieldType = typeFromSpec(typeSpec); + int length = Math.max(fieldType.getLength(), 1); + structure.insertAtOffset(field.offset, fieldType, length, name, null); + } + + private void addUnionField(UnionDataType union, FieldSpec field) throws InvalidDataTypeException { + Map typeSpec = field.typeSpec; + String name = field.anonymous ? null : field.name; + if ("bitfield".equals(stringValue(typeSpec.get("kind"), ""))) { + DataType baseType = typeFromSpec(asMap(typeSpec.get("type"))); + union.addBitField(baseType, intValue(typeSpec.get("bit_length"), 0), name, null); + return; + } + DataType fieldType = typeFromSpec(typeSpec); + union.add(fieldType, Math.max(fieldType.getLength(), 1), name, null); + } + + private void importTypedefs() { + for (Map.Entry entry : objectMap("typedefs").entrySet()) { + ensureTypedef(entry.getKey()); + } + } + + private DataType ensureTypedef(String name) { + DataType existing = typedefs.get(name); + if (existing != null) { + return existing; + } + if (resolvingTypedefs.contains(name)) { + return ByteDataType.dataType; + } + Object targetSpec = objectMap("typedefs").get(name); + if (targetSpec == null) { + return null; + } + + resolvingTypedefs.add(name); + DataType target = typeFromSpec(asMap(targetSpec)); + resolvingTypedefs.remove(name); + + DataType typedef = new TypedefDataType(ISF_CATEGORY, name, target, dataTypeManager); + DataType resolved = dataTypeManager.resolve(typedef, DataTypeConflictHandler.REPLACE_HANDLER); + typedefs.put(name, resolved); + typedefCount += 1; + return resolved; + } + + void importSymbols(Listing listing, SymbolTable symbolTable) { + for (Map.Entry entry : objectMap("symbols").entrySet()) { + Map spec = asMap(entry.getValue()); + Address address = address(spec.get("address")); + if (address == null) { + continue; + } + try { + if (symbolTable.getGlobalSymbol(entry.getKey(), address) == null) { + symbolTable.createLabel(address, entry.getKey(), SourceType.IMPORTED); + } + if (spec.containsKey("type")) { + DataType dataType = typeFromSpec(asMap(spec.get("type"))); + if (dataType != VoidDataType.dataType && listing.getDataAt(address) == null) { + listing.createData(address, dataType); + } + } + symbolCount += 1; + } catch (InvalidInputException | CodeUnitInsertionException ignored) { + // Keep importing the rest of the ISF if one address is invalid or occupied. + } + } + } + + void importFunctions() { + for (Map.Entry entry : objectMap("functions").entrySet()) { + Map spec = asMap(entry.getValue()); + Address address = address(spec.get("address")); + if (address == null) { + continue; + } + try { + Function function = currentProgram.getFunctionManager().getFunctionAt(address); + if (function == null) { + function = currentProgram.getFunctionManager().createFunction( + entry.getKey(), + address, + new AddressSet(address), + SourceType.IMPORTED + ); + } + function.setReturnType(typeFromSpec(asMap(spec.get("return_type"))), SourceType.IMPORTED); + + ArrayList parameters = new ArrayList<>(); + int ordinal = 0; + for (Object parameterObject : asList(spec.get("parameters"))) { + Map parameter = asMap(parameterObject); + String paramName = stringValue(parameter.get("name"), "param_" + ordinal); + parameters.add(new ParameterImpl( + paramName, + typeFromSpec(asMap(parameter.get("type"))), + ordinal, + currentProgram, + SourceType.IMPORTED + )); + ordinal += 1; + } + function.replaceParameters( + parameters, + Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, + true, + SourceType.IMPORTED + ); + functionCount += 1; + } catch (InvalidInputException | DuplicateNameException | OverlappingFunctionException ignored) { + // Keep importing remaining functions if this address cannot host a function. + } + } + } + + private DataType typeFromSpec(Map spec) { + String kind = stringValue(spec.get("kind"), "base"); + String name = stringValue(spec.get("name"), ""); + + if ("base".equals(kind)) { + DataType dataType = typedefs.get(name); + if (dataType == null) { + dataType = dataTypeManager.getDataType(ISF_CATEGORY, name); + } + if (dataType == null) { + dataType = baseTypes.get(name); + } + return dataType == null ? ByteDataType.dataType : dataType; + } + if ("typedef".equals(kind)) { + DataType dataType = typedefs.get(name); + if (dataType == null) { + dataType = ensureTypedef(name); + } + if (dataType == null) { + dataType = dataTypeManager.getDataType(ISF_CATEGORY, name); + } + return dataType == null ? ByteDataType.dataType : dataType; + } + if ("struct".equals(kind) || "union".equals(kind)) { + DataType dataType = userTypes.get(name); + if (dataType == null) { + dataType = dataTypeManager.getDataType(ISF_CATEGORY, name); + } + return dataType == null ? ByteDataType.dataType : dataType; + } + if ("enum".equals(kind)) { + DataType dataType = enums.get(name); + if (dataType == null) { + dataType = dataTypeManager.getDataType(ISF_CATEGORY, name); + } + return dataType == null ? ByteDataType.dataType : dataType; + } + if ("pointer".equals(kind)) { + return new PointerDataType(typeFromSpec(asMap(spec.get("subtype"))), dataTypeManager); + } + if ("array".equals(kind)) { + DataType subtype = typeFromSpec(asMap(spec.get("subtype"))); + return new ArrayDataType(subtype, intValue(spec.get("count"), 0), Math.max(subtype.getLength(), 1), dataTypeManager); + } + if ("function".equals(kind)) { + return new PointerDataType(VoidDataType.dataType, dataTypeManager); + } + return ByteDataType.dataType; + } + + private Address address(Object value) { + try { + return currentProgram.getAddressFactory() + .getDefaultAddressSpace() + .getAddress(longValue(value, 0)); + } catch (Exception e) { + return null; + } + } + + private Map objectMap(String key) { + return asMap(isf.get(key)); + } + } + + private static final class FieldSpec { + final String name; + final int offset; + final Map typeSpec; + final boolean anonymous; + + FieldSpec(String name, int offset, Map typeSpec, boolean anonymous) { + this.name = name; + this.offset = offset; + this.typeSpec = typeSpec; + this.anonymous = anonymous; + } + } + + @SuppressWarnings("unchecked") + private static Map asMap(Object value) { + if (value instanceof Map) { + return (Map) value; + } + return new LinkedHashMap<>(); + } + + private static List asList(Object value) { + if (value instanceof List) { + return (List) value; + } + return new ArrayList<>(); + } + + private static String stringValue(Object value, String fallback) { + return value == null ? fallback : String.valueOf(value); + } + + private static int intValue(Object value, int fallback) { + if (value instanceof Number) { + return ((Number) value).intValue(); + } + try { + return value == null ? fallback : Integer.parseInt(String.valueOf(value)); + } catch (NumberFormatException e) { + return fallback; + } + } + + private static long longValue(Object value, long fallback) { + if (value instanceof Number) { + return ((Number) value).longValue(); + } + try { + return value == null ? fallback : Long.parseLong(String.valueOf(value)); + } catch (NumberFormatException e) { + return fallback; + } + } + + private static boolean boolValue(Object value, boolean fallback) { + if (value instanceof Boolean) { + return (Boolean) value; + } + return value == null ? fallback : Boolean.parseBoolean(String.valueOf(value)); + } + + private static final class JsonParser { + private final String text; + private int index; + + JsonParser(String text) { + this.text = text; + } + + Object parse() { + Object value = parseValue(); + skipWhitespace(); + if (index != text.length()) { + throw error("Unexpected trailing input"); + } + return value; + } + + private Object parseValue() { + skipWhitespace(); + if (index >= text.length()) { + throw error("Unexpected end of input"); + } + char c = text.charAt(index); + if (c == '{') { + return parseObject(); + } + if (c == '[') { + return parseArray(); + } + if (c == '"') { + return parseString(); + } + if (c == 't') { + consumeLiteral("true"); + return Boolean.TRUE; + } + if (c == 'f') { + consumeLiteral("false"); + return Boolean.FALSE; + } + if (c == 'n') { + consumeLiteral("null"); + return null; + } + if (c == '-' || Character.isDigit(c)) { + return parseNumber(); + } + throw error("Unexpected character: " + c); + } + + private Map parseObject() { + expect('{'); + LinkedHashMap result = new LinkedHashMap<>(); + skipWhitespace(); + if (peek('}')) { + expect('}'); + return result; + } + while (true) { + String key = parseString(); + skipWhitespace(); + expect(':'); + result.put(key, parseValue()); + skipWhitespace(); + if (peek('}')) { + expect('}'); + return result; + } + expect(','); + } + } + + private List parseArray() { + expect('['); + ArrayList result = new ArrayList<>(); + skipWhitespace(); + if (peek(']')) { + expect(']'); + return result; + } + while (true) { + result.add(parseValue()); + skipWhitespace(); + if (peek(']')) { + expect(']'); + return result; + } + expect(','); + } + } + + private String parseString() { + expect('"'); + StringBuilder builder = new StringBuilder(); + while (index < text.length()) { + char c = text.charAt(index++); + if (c == '"') { + return builder.toString(); + } + if (c != '\\') { + builder.append(c); + continue; + } + if (index >= text.length()) { + throw error("Unterminated escape"); + } + char escaped = text.charAt(index++); + switch (escaped) { + case '"': + case '\\': + case '/': + builder.append(escaped); + break; + case 'b': + builder.append('\b'); + break; + case 'f': + builder.append('\f'); + break; + case 'n': + builder.append('\n'); + break; + case 'r': + builder.append('\r'); + break; + case 't': + builder.append('\t'); + break; + case 'u': + if (index + 4 > text.length()) { + throw error("Invalid unicode escape"); + } + builder.append((char) Integer.parseInt(text.substring(index, index + 4), 16)); + index += 4; + break; + default: + throw error("Invalid escape: " + escaped); + } + } + throw error("Unterminated string"); + } + + private Number parseNumber() { + int start = index; + if (peek('-')) { + index += 1; + } + while (index < text.length() && Character.isDigit(text.charAt(index))) { + index += 1; + } + boolean isFloating = false; + if (peek('.')) { + isFloating = true; + index += 1; + while (index < text.length() && Character.isDigit(text.charAt(index))) { + index += 1; + } + } + if (peek('e') || peek('E')) { + isFloating = true; + index += 1; + if (peek('+') || peek('-')) { + index += 1; + } + while (index < text.length() && Character.isDigit(text.charAt(index))) { + index += 1; + } + } + String number = text.substring(start, index); + return isFloating ? Double.parseDouble(number) : Long.parseLong(number); + } + + private void consumeLiteral(String literal) { + if (!text.startsWith(literal, index)) { + throw error("Expected " + literal); + } + index += literal.length(); + } + + private boolean peek(char c) { + return index < text.length() && text.charAt(index) == c; + } + + private void expect(char c) { + skipWhitespace(); + if (!peek(c)) { + throw error("Expected '" + c + "'"); + } + index += 1; + } + + private void skipWhitespace() { + while (index < text.length() && Character.isWhitespace(text.charAt(index))) { + index += 1; + } + } + + private IllegalArgumentException error(String message) { + return new IllegalArgumentException(message + " at byte " + index); + } + } +} diff --git a/src/dwarffi/ghidra_scripts/__init__.py b/src/dwarffi/ghidra_scripts/__init__.py new file mode 100644 index 0000000..d82bdb1 --- /dev/null +++ b/src/dwarffi/ghidra_scripts/__init__.py @@ -0,0 +1 @@ +"""Bundled Ghidra scripts for exporting ISF data.""" diff --git a/tests/test_ghidra2isf.py b/tests/test_ghidra2isf.py new file mode 100644 index 0000000..aaed449 --- /dev/null +++ b/tests/test_ghidra2isf.py @@ -0,0 +1,359 @@ +from __future__ import annotations + +import json +import os +import shutil +import stat +import subprocess +import urllib.request +import zipfile +from pathlib import Path + +import pytest + +from dwarffi import DFFI, VtypeJson + + +GHIDRA_SCRIPT_DIR = ( + Path(__file__).resolve().parents[1] + / "src" + / "dwarffi" + / "ghidra_scripts" +) +EXPORT_SCRIPT_PATH = GHIDRA_SCRIPT_DIR / "Ghidra2ISF.java" +IMPORT_SCRIPT_PATH = GHIDRA_SCRIPT_DIR / "ISF2Ghidra.java" + + +def _sample_full_profile_isf() -> dict: + return { + "metadata": { + "producer": {"name": "ghidra2isf", "version": "0.1.0"}, + "format": "6.2.0", + }, + "base_types": { + "void": {"size": 0, "signed": False, "kind": "void", "endian": "little"}, + "pointer": {"size": 8, "signed": False, "kind": "pointer", "endian": "little"}, + "uint8_t": {"size": 1, "signed": False, "kind": "int", "endian": "little"}, + "uint16_t": {"size": 2, "signed": False, "kind": "int", "endian": "little"}, + "uint32_t": {"size": 4, "signed": False, "kind": "int", "endian": "little"}, + "int": {"size": 4, "signed": True, "kind": "int", "endian": "little"}, + }, + "user_types": { + "Inner": { + "size": 4, + "kind": "struct", + "fields": { + "a": {"offset": 0, "type": {"kind": "base", "name": "uint16_t"}}, + "b": {"offset": 2, "type": {"kind": "base", "name": "uint8_t"}}, + }, + }, + "Value": { + "size": 4, + "kind": "union", + "fields": { + "word": {"offset": 0, "type": {"kind": "base", "name": "uint32_t"}}, + "bytes": { + "offset": 0, + "type": { + "kind": "array", + "count": 4, + "subtype": {"kind": "base", "name": "uint8_t"}, + }, + }, + }, + }, + "Packet": { + "size": 16, + "kind": "struct", + "fields": { + "id": {"offset": 0, "type": {"kind": "typedef", "name": "my_u32"}}, + "inner": {"offset": 4, "type": {"kind": "struct", "name": "Inner"}}, + "value": {"offset": 8, "type": {"kind": "union", "name": "Value"}}, + "flags": { + "offset": 12, + "type": { + "kind": "bitfield", + "bit_length": 3, + "bit_position": 0, + "type": {"kind": "base", "name": "uint8_t"}, + }, + }, + }, + }, + }, + "enums": { + "Color": { + "size": 4, + "base": "int", + "constants": {"RED": 1, "BLUE": 2}, + } + }, + "symbols": { + "global_counter": { + "address": 0x4010, + "type": {"kind": "base", "name": "int"}, + } + }, + "functions": { + "add_packet": { + "address": 0x1000, + "return_type": {"kind": "base", "name": "int"}, + "parameters": [ + { + "name": "p", + "type": { + "kind": "pointer", + "subtype": {"kind": "struct", "name": "Packet"}, + }, + }, + {"name": "x", "type": {"kind": "base", "name": "int"}}, + ], + } + }, + "typedefs": { + "my_u32": {"kind": "base", "name": "uint32_t"}, + }, + } + + +def test_ghidra2isf_script_is_bundled() -> None: + source = EXPORT_SCRIPT_PATH.read_text(encoding="utf-8") + + assert "public class Ghidra2ISF extends GhidraScript" in source + assert '"base_types"' in source + assert '"functions"' in source + assert "--types-only" in source + + +def test_isf2ghidra_script_is_bundled() -> None: + source = IMPORT_SCRIPT_PATH.read_text(encoding="utf-8") + + assert "public class ISF2Ghidra extends GhidraScript" in source + assert 'new CategoryPath("/ISF")' in source + assert "importTypedefs" in source + assert "--no-functions" in source + + +def test_ghidra2isf_full_profile_shape_loads_in_dwarffi(tmp_path: Path) -> None: + isf_path = tmp_path / "ghidra_sample.isf.json" + isf_path.write_text(json.dumps(_sample_full_profile_isf()), encoding="utf-8") + + parsed = VtypeJson(str(isf_path)) + assert parsed.get_type("Packet") is not None + assert parsed.get_enum("Color") is not None + assert parsed.get_symbol("global_counter") is not None + assert parsed.get_function("add_packet") is not None + + ffi = DFFI(str(isf_path)) + assert ffi.sizeof("Packet") == 16 + assert ffi.sizeof("my_u32") == 4 + + +def _download_ghidra(cache_dir: Path) -> Path: + version = os.environ.get("DFFI_GHIDRA_VERSION", "12.1_PUBLIC_20260513") + build = os.environ.get("DFFI_GHIDRA_BUILD", "Ghidra_12.1_build") + dirname = f"ghidra_{version}" + install_dir = cache_dir / dirname + if (install_dir / "support" / "analyzeHeadless").exists(): + _ensure_executable(install_dir / "support" / "analyzeHeadless") + return install_dir + for candidate in cache_dir.glob("ghidra_*"): + if (candidate / "support" / "analyzeHeadless").exists(): + _ensure_executable(candidate / "support" / "analyzeHeadless") + return candidate + + url = os.environ.get( + "DFFI_GHIDRA_URL", + f"https://github.com/NationalSecurityAgency/ghidra/releases/download/{build}/{dirname}.zip", + ) + zip_path = cache_dir / f"{dirname}.zip" + cache_dir.mkdir(parents=True, exist_ok=True) + if not zip_path.exists(): + urllib.request.urlretrieve(url, zip_path) + + with zipfile.ZipFile(zip_path) as archive: + archive.extractall(cache_dir) + if (install_dir / "support" / "analyzeHeadless").exists(): + _ensure_executable(install_dir / "support" / "analyzeHeadless") + return install_dir + for candidate in cache_dir.glob("ghidra_*"): + if (candidate / "support" / "analyzeHeadless").exists(): + _ensure_executable(candidate / "support" / "analyzeHeadless") + return candidate + raise FileNotFoundError("Downloaded Ghidra archive did not contain support/analyzeHeadless") + + +def _ensure_executable(path: Path) -> None: + path.chmod(path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + for helper in path.parent.glob("*.sh"): + helper.chmod(helper.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + +def _ghidra_home() -> Path | None: + env_home = os.environ.get("GHIDRA_HOME") + if env_home: + return Path(env_home) + + analyze_headless = shutil.which("analyzeHeadless") + if analyze_headless: + return Path(analyze_headless).resolve().parents[1] + + if os.environ.get("DFFI_GHIDRA_DOWNLOAD") == "1": + return _download_ghidra(Path(os.environ.get("DFFI_GHIDRA_CACHE", "/tmp/dwarffi-ghidra"))) + + return None + + +@pytest.mark.skipif( + os.environ.get("DFFI_GHIDRA_TEST") != "1", + reason="set DFFI_GHIDRA_TEST=1 to run the Ghidra integration test", +) +def test_ghidra2isf_exports_real_program_with_analyze_headless(tmp_path: Path) -> None: + ghidra_home = _ghidra_home() + if ghidra_home is None: + pytest.skip("GHIDRA_HOME/analyzeHeadless not found; set DFFI_GHIDRA_DOWNLOAD=1 to download") + + analyze_headless = ghidra_home / "support" / "analyzeHeadless" + gcc = shutil.which("gcc") + if gcc is None: + pytest.skip("gcc is required for the Ghidra integration fixture") + + source_path = tmp_path / "fixture.c" + binary_path = tmp_path / "fixture" + isf_path = tmp_path / "fixture.isf.json" + project_dir = tmp_path / "ghidra_project" + ghidra_user_home = tmp_path / "ghidra_home" + source_path.write_text( + """ + #include + + enum Color { RED = 1, BLUE = 2 }; + typedef uint32_t my_u32; + + struct Inner { + uint16_t a; + uint8_t b; + }; + + union Value { + uint32_t word; + uint8_t bytes[4]; + }; + + struct Packet { + my_u32 id; + struct Inner inner; + union Value value; + uint8_t flags; + }; + + int global_counter = 7; + + int add_packet(struct Packet *p, int x) { + return (int)p->id + x + global_counter; + } + """, + encoding="utf-8", + ) + + subprocess.run( + [ + gcc, + "-g", + "-O0", + "-fno-eliminate-unused-debug-types", + "-c", + str(source_path), + "-o", + str(binary_path), + ], + check=True, + text=True, + capture_output=True, + ) + + env = os.environ.copy() + env["HOME"] = str(ghidra_user_home) + env["XDG_CONFIG_HOME"] = str(ghidra_user_home / ".config") + env["XDG_CACHE_HOME"] = str(ghidra_user_home / ".cache") + env["XDG_DATA_HOME"] = str(ghidra_user_home / ".local" / "share") + java_path = shutil.which("java") + if java_path and "JAVA_HOME" not in env: + env["JAVA_HOME"] = str(Path(java_path).resolve().parents[1]) + java_options = env.get("JAVA_TOOL_OPTIONS", "") + localhost_options = "-Djava.net.preferIPv4Stack=true -Djava.rmi.server.hostname=localhost" + env["JAVA_TOOL_OPTIONS"] = f"{java_options} {localhost_options}".strip() + + result = subprocess.run( + [ + str(analyze_headless), + str(project_dir), + "DffiGhidraTest", + "-import", + str(binary_path), + "-scriptPath", + str(GHIDRA_SCRIPT_DIR), + "-postScript", + "Ghidra2ISF.java", + str(isf_path), + "-deleteProject", + ], + text=True, + capture_output=True, + timeout=180, + env=env, + ) + if result.returncode != 0: + output = result.stdout + result.stderr + if "InetAddress.getLocalHost" in output or "Name or service not known" in output: + pytest.skip("Ghidra cannot resolve the container hostname in this environment") + raise subprocess.CalledProcessError( + result.returncode, + result.args, + output=result.stdout, + stderr=result.stderr, + ) + + exported = json.loads(isf_path.read_text(encoding="utf-8")) + assert exported["metadata"]["producer"]["name"] == "ghidra2isf" + assert exported["user_types"] + assert exported["symbols"] + assert exported["functions"] + + parsed = VtypeJson(str(isf_path)) + assert parsed.get_function("add_packet") is not None + + +@pytest.mark.skipif( + os.environ.get("DFFI_GHIDRA_TEST") != "1", + reason="set DFFI_GHIDRA_TEST=1 to compile bundled Ghidra scripts", +) +def test_bundled_ghidra_scripts_compile_against_ghidra(tmp_path: Path) -> None: + ghidra_home = _ghidra_home() + if ghidra_home is None: + pytest.skip("GHIDRA_HOME/analyzeHeadless not found; set DFFI_GHIDRA_DOWNLOAD=1 to download") + javac = shutil.which("javac") + if javac is None: + pytest.skip("javac is required to compile bundled Ghidra scripts") + + jars = [str(path) for path in ghidra_home.rglob("*.jar")] + if not jars: + pytest.skip("No Ghidra jars found") + + classes_dir = tmp_path / "classes" + classes_dir.mkdir() + subprocess.run( + [ + javac, + "-proc:none", + "-cp", + os.pathsep.join(jars), + "-d", + str(classes_dir), + str(EXPORT_SCRIPT_PATH), + str(IMPORT_SCRIPT_PATH), + ], + check=True, + text=True, + capture_output=True, + ) From 425c5bc507f1444ab286211481eafb6940a35cc3 Mon Sep 17 00:00:00 2001 From: Luke Craig Date: Thu, 28 May 2026 22:50:20 -0400 Subject: [PATCH 2/2] ruff --- tests/test_ghidra2isf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_ghidra2isf.py b/tests/test_ghidra2isf.py index aaed449..608102b 100644 --- a/tests/test_ghidra2isf.py +++ b/tests/test_ghidra2isf.py @@ -13,7 +13,6 @@ from dwarffi import DFFI, VtypeJson - GHIDRA_SCRIPT_DIR = ( Path(__file__).resolve().parents[1] / "src"