From 66692042a9ab2df6719032cdc472a7f8b2a5ed29 Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Mon, 25 May 2026 17:57:10 +0530 Subject: [PATCH 01/14] Enhance ghidra backend with existing project feature --- capa/ghidra/helpers.py | 49 +++++++++++++++++++++++++++++++++++++++++- capa/loader.py | 35 ++++++++++++++++++++++-------- capa/main.py | 22 +++++++++++++++++-- rules | 2 +- tests/data | 2 +- tests/test_main.py | 14 ++++++++++++ 6 files changed, 110 insertions(+), 14 deletions(-) diff --git a/capa/ghidra/helpers.py b/capa/ghidra/helpers.py index c71fbd88fc..b0b48479de 100644 --- a/capa/ghidra/helpers.py +++ b/capa/ghidra/helpers.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import logging import datetime import contextlib @@ -28,7 +29,11 @@ logger = logging.getLogger("capa") # file type as returned by Ghidra -SUPPORTED_FILE_TYPES = ("Executable and Linking Format (ELF)", "Portable Executable (PE)", "Raw Binary") +SUPPORTED_FILE_TYPES = ( + "Executable and Linking Format (ELF)", + "Portable Executable (PE)", + "Raw Binary", +) def get_current_program(): @@ -43,6 +48,48 @@ def get_monitor(): return ghidra_context.get_context().monitor +def iter_program_files(folder): + yield from folder.getFiles() + + for child_folder in folder.getFolders(): + yield from iter_program_files(child_folder) + + +# Programs within a Ghidra project +def list_project_files(project): + project_data = project.getProjectData() + root_folder = project_data.getRootFolder() + return list(iter_program_files(root_folder)) + + +def select_project_file(project): + programs = list_project_files(project) + + if not programs: + raise ValueError("no programs found in Ghidra project") + + if len(programs) == 1: + return programs[0] + + requested_path = os.environ.get("CAPA_GHIDRA_PROGRAM_PATH") + if requested_path: + for program in programs: + if program.getPathname() == requested_path: + return program + + available = "\n".join(f"- {program.getPathname()}" for program in programs) + raise ValueError( + f"""CAPA_GHIDRA_PROGRAM_PATH did not match any program in the Ghidra project\n + available programs:\n{available}""" + ) + + available = "\n".join(f"- {program.getPathname()}" for program in programs) + raise ValueError( + f"""multiple programs found in the Ghidra project\n available programs:\n{available}\n + set CAPA_GHIDRA_PROGRAM_PATH to select one""" + ) + + class GHIDRAIO: """ An object that acts as a file-like object, diff --git a/capa/loader.py b/capa/loader.py index b0895b2524..b31f2497d7 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -32,6 +32,7 @@ from capa.engine import MatchResults from capa.helpers import assert_never from capa.exceptions import ( + InvalidArgument, UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, @@ -434,24 +435,39 @@ def get_extractor( if not capa.ghidra.helpers.is_supported_ghidra_version(): raise RuntimeError("unsupported Ghidra version") - import tempfile + project_path = input_path + tmpdir = None + if input_path.suffix.lower() == ".gpr": + project_cm = pyghidra.open_project(str(project_path.parent), project_path.stem, create=False) + else: + import tempfile - tmpdir = tempfile.TemporaryDirectory() + tmpdir = tempfile.TemporaryDirectory() + project_cm = pyghidra.open_project(tmpdir.name, "CapaProject", create=True) - project_cm = pyghidra.open_project(tmpdir.name, "CapaProject", create=True) project = project_cm.__enter__() try: from ghidra.util.task import TaskMonitor monitor = TaskMonitor.DUMMY - # Import file - loader = pyghidra.program_loader().project(project).source(str(input_path)).name(input_path.name) - with loader.load() as load_results: - load_results.save(monitor) + if input_path.suffix.lower() == ".gpr": + try: + selected_program = capa.ghidra.helpers.select_project_file(project) + except ValueError as e: + raise InvalidArgument(str(e)) from e + program_path = selected_program.getPathname() + logger.debug("ghidra: selected program path: %s", program_path) + else: + # Import file + loader = pyghidra.program_loader().project(project).source(str(input_path)).name(input_path.name) + with loader.load() as load_results: + load_results.save(monitor) + + program_path = "/" + input_path.name # Open program - program, consumer = pyghidra.consume_program(project, "/" + input_path.name) + program, consumer = pyghidra.consume_program(project, program_path) # Analyze pyghidra.analyze(program, monitor) @@ -479,7 +495,8 @@ def __exit__(self, exc_type, exc_val, exc_tb): except Exception: project_cm.__exit__(None, None, None) - tmpdir.cleanup() + if tmpdir: + tmpdir.cleanup() raise import capa.features.extractors.ghidra.extractor diff --git a/capa/main.py b/capa/main.py index 837974f54c..615f29f8ec 100644 --- a/capa/main.py +++ b/capa/main.py @@ -555,6 +555,9 @@ def get_input_format_from_cli(args) -> str: if format_ != FORMAT_AUTO: return format_ + if args.input_file.suffix.lower() == ".gpr": + return FORMAT_AUTO + try: return get_auto_format(args.input_file) except PEFormatError as e: @@ -580,6 +583,9 @@ def get_backend_from_cli(args, input_format: str) -> str: if args.backend != BACKEND_AUTO: return args.backend + if args.input_file.suffix.lower() == ".gpr": + return BACKEND_GHIDRA + if input_format == FORMAT_CAPE: return BACKEND_CAPE @@ -618,6 +624,8 @@ def get_sample_path_from_cli(args, backend: str) -> Optional[Path]: """ if backend in (BACKEND_CAPE, BACKEND_DRAKVUF, BACKEND_VMRAY): return None + elif backend == BACKEND_GHIDRA: + return None elif backend == BACKEND_BINEXPORT2: import capa.features.extractors.binexport2 @@ -726,6 +734,10 @@ def get_file_extractors_from_cli(args, input_format: str) -> list[FeatureExtract # # this pass can inspect multiple file extractors, e.g., dotnet and pe to identify # various limitations + if args.input_file.suffix.lower() == ".gpr": + logger.debug("skipping generic file extractor probe for Ghidra project input") + return [] + try: return capa.loader.get_file_extractors(args.input_file, input_format) except PEFormatError as e: @@ -869,7 +881,7 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr os_ = get_os_from_cli(args, backend) sample_path = get_sample_path_from_cli(args, backend) - extractor_filters = get_extractor_filters_from_cli(args, input_format) + extractor_filters = get_extractor_filters_from_cli(args, input_format, backend) logger.debug("format: %s", input_format) logger.debug("backend: %s", backend) @@ -886,6 +898,9 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr sample_path=sample_path, ) return apply_extractor_filters(extractor, extractor_filters) + except InvalidArgument as e: + logger.error("%s", str(e)) + raise ShouldExitError(E_INVALID_INPUT_FORMAT) from e except UnsupportedFormatError as e: if input_format == FORMAT_CAPE: log_unsupported_cape_report_error(str(e)) @@ -907,11 +922,14 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr raise ShouldExitError(E_CORRUPT_FILE) from e -def get_extractor_filters_from_cli(args, input_format) -> FilterConfig: +def get_extractor_filters_from_cli(args, input_format, backend: Optional[str] = None) -> FilterConfig: if not hasattr(args, "restrict_to_processes") and not hasattr(args, "restrict_to_functions"): # no processes or function filters were installed in the args return {} + if backend == BACKEND_GHIDRA: + return {} + if input_format in STATIC_FORMATS: if args.restrict_to_processes: raise InvalidArgument("Cannot filter processes with static analysis.") diff --git a/rules b/rules index aed45e2571..e240160da4 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit aed45e2571ebf7d2330e3daddbb5c472cc54966e +Subproject commit e240160da4e38238afc8fd6bbdf47be7a454df2d diff --git a/tests/data b/tests/data index ef87fcedcc..a24fe4e48c 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit ef87fcedcc69fa18acf669d8cf194f11a03b26ff +Subproject commit a24fe4e48c130d58fee33832339372fc1840bcc8 diff --git a/tests/test_main.py b/tests/test_main.py index 10a316a2c4..6b00f385c1 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -15,6 +15,7 @@ import gzip import json import textwrap +from types import SimpleNamespace from pathlib import Path import fixtures @@ -352,3 +353,16 @@ def test_main_cape_gzip(): / "./data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz" ) assert capa.main.main([path]) == 0 + + +def test_gpr_uses_ghidra_backend(tmp_path): + args = SimpleNamespace(input_file=tmp_path / "sample.gpr", backend=capa.main.BACKEND_AUTO) + + assert capa.main.get_backend_from_cli(args, FORMAT_AUTO) == capa.main.BACKEND_GHIDRA + assert capa.main.get_sample_path_from_cli(args, capa.main.BACKEND_GHIDRA) is None + + +def test_gpr_skips_generic_file_extractor_probe(tmp_path): + args = SimpleNamespace(input_file=tmp_path / "sample.gpr") + + assert capa.main.get_file_extractors_from_cli(args, FORMAT_AUTO) == [] From b388cb161141f33deef4b1b5015b55153f1943bd Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Mon, 25 May 2026 18:28:28 +0530 Subject: [PATCH 02/14] Update changelog with new feature --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1381441c2..adc89d3e61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -147,6 +147,7 @@ This release includes Ghidra PyGhidra support, performance improvements, depende - ghidra: support PyGhidra @mike-hunhoff #2788 - vmray: extract number features from whitelisted void_ptr parameters (hKey, hKeyRoot) @adeboyedn #2835 +- ghidra: support analyzing existing Ghidra projects via .gpr:program input syntax @saniyafatima07 #3087 ### Breaking Changes From 1525906724e8eb401728e3ed2dfd137e3ef0f1e2 Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Mon, 25 May 2026 18:40:30 +0530 Subject: [PATCH 03/14] Fix errors --- capa/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/loader.py b/capa/loader.py index b31f2497d7..2c8486d676 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -16,7 +16,7 @@ import logging import datetime import contextlib -from typing import Optional +from typing import Optional, assert_never from pathlib import Path from rich.console import Console From 957b854b3e414cfa4530dd6b4f7c664fdc119bdf Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Mon, 25 May 2026 19:09:58 +0530 Subject: [PATCH 04/14] Update documentation --- doc/usage.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/usage.md b/doc/usage.md index d8ec6c9587..4f381fe577 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -43,6 +43,9 @@ which threads perform what actions (encrypt/decrypt data, initiate a connection, ### IDA Pro plugin: capa explorer Please check out the [capa explorer documentation](/capa/ida/plugin/README.md). +### Ghidra project support +Capa can analyze programs directly from Ghidra projects by specifying the project file path (`.gpr`). If the project contains multiple programs, set the `CAPA_GHIDRA_PROGRAM_PATH` environment variable to specify which program to analyze. For example: `CAPA_GHIDRA_PROGRAM_PATH=/myprogram capa /path/to/project.gpr`. + ### save time by reusing .viv files Set the environment variable `CAPA_SAVE_WORKSPACE` to instruct the underlying analysis engine to cache its intermediate results to the file system. For example, vivisect will create `.viv` files. From 45fbf0bbcc4da61ffda9f8e10675031c61ec174c Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Tue, 26 May 2026 20:10:51 +0530 Subject: [PATCH 05/14] Minor fix --- capa/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/loader.py b/capa/loader.py index 2c8486d676..25a0d71d8f 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -438,7 +438,7 @@ def get_extractor( project_path = input_path tmpdir = None if input_path.suffix.lower() == ".gpr": - project_cm = pyghidra.open_project(str(project_path.parent), project_path.stem, create=False) + project_cm = pyghidra.open_project(str(project_path.parent.resolve()), project_path.stem, create=False) else: import tempfile From 93385325b9cda30ca3cf56d10fe5f79e83f57457 Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Thu, 28 May 2026 23:22:40 +0530 Subject: [PATCH 06/14] Fix: Changelog and doc --- CHANGELOG.md | 2 +- doc/usage.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index adc89d3e61..5546d20700 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -147,7 +147,7 @@ This release includes Ghidra PyGhidra support, performance improvements, depende - ghidra: support PyGhidra @mike-hunhoff #2788 - vmray: extract number features from whitelisted void_ptr parameters (hKey, hKeyRoot) @adeboyedn #2835 -- ghidra: support analyzing existing Ghidra projects via .gpr:program input syntax @saniyafatima07 #3087 +- ghidra: support analyzing existing Ghidra projects via .gpr input files @saniyafatima07 #3087 ### Breaking Changes diff --git a/doc/usage.md b/doc/usage.md index 4f381fe577..e5482e9112 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -44,7 +44,7 @@ which threads perform what actions (encrypt/decrypt data, initiate a connection, Please check out the [capa explorer documentation](/capa/ida/plugin/README.md). ### Ghidra project support -Capa can analyze programs directly from Ghidra projects by specifying the project file path (`.gpr`). If the project contains multiple programs, set the `CAPA_GHIDRA_PROGRAM_PATH` environment variable to specify which program to analyze. For example: `CAPA_GHIDRA_PROGRAM_PATH=/myprogram capa /path/to/project.gpr`. +capa can analyze programs directly from Ghidra projects by specifying the project file path (`.gpr`). If the project contains multiple programs, set the `CAPA_GHIDRA_PROGRAM_PATH` environment variable to specify which program to analyze. For example: `CAPA_GHIDRA_PROGRAM_PATH=/myprogram capa /path/to/project.gpr`. ### save time by reusing .viv files Set the environment variable `CAPA_SAVE_WORKSPACE` to instruct the underlying analysis engine to From c58bb2bd7cdc45f4595d1d745e161f28dd72ccc4 Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Thu, 28 May 2026 23:23:29 +0530 Subject: [PATCH 07/14] Fix: Formatting issues --- capa/ghidra/helpers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/capa/ghidra/helpers.py b/capa/ghidra/helpers.py index b0b48479de..23ae2b2d1d 100644 --- a/capa/ghidra/helpers.py +++ b/capa/ghidra/helpers.py @@ -79,14 +79,15 @@ def select_project_file(project): available = "\n".join(f"- {program.getPathname()}" for program in programs) raise ValueError( - f"""CAPA_GHIDRA_PROGRAM_PATH did not match any program in the Ghidra project\n - available programs:\n{available}""" + "CAPA_GHIDRA_PROGRAM_PATH did not match any program in the Ghidra project.\n" + + f"available programs:\n{available}" ) available = "\n".join(f"- {program.getPathname()}" for program in programs) raise ValueError( - f"""multiple programs found in the Ghidra project\n available programs:\n{available}\n - set CAPA_GHIDRA_PROGRAM_PATH to select one""" + "multiple programs found in the Ghidra project.\n" + + f"available programs:\n{available}\n" + + "set CAPA_GHIDRA_PROGRAM_PATH to select one" ) From 9f7ee5aeb961f240f4da4381974bb131804d24aa Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Thu, 28 May 2026 23:25:12 +0530 Subject: [PATCH 08/14] Handle cleanup during exception --- capa/loader.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/capa/loader.py b/capa/loader.py index 25a0d71d8f..b3194a9dda 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -446,6 +446,7 @@ def get_extractor( project_cm = pyghidra.open_project(tmpdir.name, "CapaProject", create=True) project = project_cm.__enter__() + program, consumer = None, None try: from ghidra.util.task import TaskMonitor @@ -494,6 +495,8 @@ def __exit__(self, exc_type, exc_val, exc_tb): cm = GhidraContextWrapper(project_cm, program, consumer) except Exception: + if program is not None: + program.release(consumer) project_cm.__exit__(None, None, None) if tmpdir: tmpdir.cleanup() From 58f211fd639e7ba2f7152e709084eb6a722bd875 Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Thu, 28 May 2026 23:51:59 +0530 Subject: [PATCH 09/14] Add additional logs for lock exception handling --- capa/main.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/capa/main.py b/capa/main.py index 615f29f8ec..e787863787 100644 --- a/capa/main.py +++ b/capa/main.py @@ -201,12 +201,21 @@ def simple_message_exception_handler( if exctype is KeyboardInterrupt: print("KeyboardInterrupt detected, program terminated", file=sys.stderr) else: - print( - f"Unexpected exception raised: {exctype}. Please run capa in debug mode (-d/--debug) " - + "to see the stack trace.\nPlease also report your issue on the capa GitHub page so we " - + "can improve the code! (https://github.com/mandiant/capa/issues)", - file=sys.stderr, - ) + exctype_str = str(exctype) + # Give a targeted message when the Ghidra project DB is locked. + if "LockException" in exctype_str or "ghidra.framework.store.LockException" in exctype_str: + print( + f"Unexpected exception raised: {exctype}.\n It looks like the Ghidra project database is locked. " + "Please close the project in the Ghidra GUI (or other process) and try again. For details, run in debug mode (-d/--debug).", + file=sys.stderr, + ) + else: + print( + f"Unexpected exception raised: {exctype}. Please run capa in debug mode (-d/--debug) " + + "to see the stack trace.\nPlease also report your issue on the capa GitHub page so we " + + "can improve the code! (https://github.com/mandiant/capa/issues)", + file=sys.stderr, + ) def install_common_args(parser, wanted=None): From c4b5ab5a03557640365056c2e66a35ae26062f3e Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Mon, 1 Jun 2026 19:30:52 +0530 Subject: [PATCH 10/14] Fix: import assert_never error --- capa/loader.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/capa/loader.py b/capa/loader.py index b3194a9dda..ebbacf5111 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -16,7 +16,7 @@ import logging import datetime import contextlib -from typing import Optional, assert_never +from typing import Optional from pathlib import Path from rich.console import Console @@ -31,12 +31,7 @@ from capa.rules import RuleSet from capa.engine import MatchResults from capa.helpers import assert_never -from capa.exceptions import ( - InvalidArgument, - UnsupportedOSError, - UnsupportedArchError, - UnsupportedFormatError, -) +from capa.exceptions import InvalidArgument, UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError from capa.features.common import ( OS_AUTO, FORMAT_PE, From b8622c926207e10d9b557269ea4c37b4212c232a Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Fri, 5 Jun 2026 19:19:08 +0530 Subject: [PATCH 11/14] Add exception and new return value --- capa/exceptions.py | 4 ++++ capa/loader.py | 22 ++++++++++++++++++++-- capa/main.py | 10 ++++++---- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/capa/exceptions.py b/capa/exceptions.py index c00420e100..773a7ba4f2 100644 --- a/capa/exceptions.py +++ b/capa/exceptions.py @@ -43,3 +43,7 @@ class NonExistantFunctionError(ValueError): class NonExistantProcessError(ValueError): pass + + +class LockedProjectDatabaseError(RuntimeError): + pass diff --git a/capa/loader.py b/capa/loader.py index ebbacf5111..2d03668870 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -31,7 +31,13 @@ from capa.rules import RuleSet from capa.engine import MatchResults from capa.helpers import assert_never -from capa.exceptions import InvalidArgument, UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError +from capa.exceptions import ( + InvalidArgument, + UnsupportedOSError, + UnsupportedArchError, + UnsupportedFormatError, + LockedProjectDatabaseError, +) from capa.features.common import ( OS_AUTO, FORMAT_PE, @@ -433,7 +439,19 @@ def get_extractor( project_path = input_path tmpdir = None if input_path.suffix.lower() == ".gpr": - project_cm = pyghidra.open_project(str(project_path.parent.resolve()), project_path.stem, create=False) + try: + project_cm = pyghidra.open_project( + str(project_path.parent.resolve()), project_path.stem, create=False + ) + except Exception as e: + err = str(e) + if "LockException" in err or "Database is locked" in err: + msg = ( + f"Ghidra project database is locked. Ensure all programs accessing " + f"{project_path.name} are closed before proceeding." + ) + raise LockedProjectDatabaseError(msg) from e + raise else: import tempfile diff --git a/capa/main.py b/capa/main.py index e787863787..5fe13fbd23 100644 --- a/capa/main.py +++ b/capa/main.py @@ -72,6 +72,7 @@ UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, + LockedProjectDatabaseError, ) from capa.features.common import ( OS_AUTO, @@ -130,6 +131,7 @@ E_UNSUPPORTED_GHIDRA_EXECUTION_MODE = 24 E_INVALID_INPUT_FORMAT = 25 E_INVALID_FEATURE_EXTRACTOR = 26 +E_GHIDRA_DB_LOCKED = 27 logger = logging.getLogger("capa") @@ -929,6 +931,9 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr except capa.loader.CorruptFile as e: logger.error("Input file '%s' is not a valid file: %s", args.input_file, str(e)) raise ShouldExitError(E_CORRUPT_FILE) from e + except LockedProjectDatabaseError as e: + logger.error("%s", str(e)) + raise ShouldExitError(E_GHIDRA_DB_LOCKED) from e def get_extractor_filters_from_cli(args, input_format, backend: Optional[str] = None) -> FilterConfig: @@ -936,10 +941,7 @@ def get_extractor_filters_from_cli(args, input_format, backend: Optional[str] = # no processes or function filters were installed in the args return {} - if backend == BACKEND_GHIDRA: - return {} - - if input_format in STATIC_FORMATS: + if input_format in STATIC_FORMATS or backend == BACKEND_GHIDRA: if args.restrict_to_processes: raise InvalidArgument("Cannot filter processes with static analysis.") return {"functions": {int(addr, 0) for addr in args.restrict_to_functions}} From ef70aa8620047c698ca06452fc95e18a5a5c023f Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Sat, 6 Jun 2026 16:46:01 +0530 Subject: [PATCH 12/14] Improve cleanup, add ghidra to static format --- capa/features/common.py | 2 ++ capa/loader.py | 13 ++++++++++--- capa/main.py | 27 ++++++++++----------------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/capa/features/common.py b/capa/features/common.py index fa2e29f926..4d2a00a51a 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -502,6 +502,7 @@ def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True): FORMAT_FREEZE = "freeze" FORMAT_RESULT = "result" FORMAT_BINJA_DB = "binja_database" +FORMAT_GHIDRA_PROJECT = "ghidra_project" STATIC_FORMATS = { FORMAT_SC32, FORMAT_SC64, @@ -512,6 +513,7 @@ def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True): FORMAT_RESULT, FORMAT_BINEXPORT2, FORMAT_BINJA_DB, + FORMAT_GHIDRA_PROJECT, } DYNAMIC_FORMATS = { FORMAT_CAPE, diff --git a/capa/loader.py b/capa/loader.py index 2d03668870..8099c67197 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -509,10 +509,17 @@ def __exit__(self, exc_type, exc_val, exc_tb): except Exception: if program is not None: - program.release(consumer) - project_cm.__exit__(None, None, None) + try: + program.release(consumer) + except Exception: + logger.warning("failed to release program handle", exc_info=True) + try: + project_cm.__exit__(None, None, None) + except Exception: + logger.warning("failed to close Ghidra project", exc_info=True) if tmpdir: - tmpdir.cleanup() + with contextlib.suppress(Exception): + tmpdir.cleanup() raise import capa.features.extractors.ghidra.extractor diff --git a/capa/main.py b/capa/main.py index 5fe13fbd23..9c188a8d5f 100644 --- a/capa/main.py +++ b/capa/main.py @@ -94,6 +94,7 @@ DYNAMIC_FORMATS, FORMAT_BINJA_DB, FORMAT_BINEXPORT2, + FORMAT_GHIDRA_PROJECT, ) from capa.capabilities.common import ( Capabilities, @@ -203,21 +204,12 @@ def simple_message_exception_handler( if exctype is KeyboardInterrupt: print("KeyboardInterrupt detected, program terminated", file=sys.stderr) else: - exctype_str = str(exctype) - # Give a targeted message when the Ghidra project DB is locked. - if "LockException" in exctype_str or "ghidra.framework.store.LockException" in exctype_str: - print( - f"Unexpected exception raised: {exctype}.\n It looks like the Ghidra project database is locked. " - "Please close the project in the Ghidra GUI (or other process) and try again. For details, run in debug mode (-d/--debug).", - file=sys.stderr, - ) - else: - print( - f"Unexpected exception raised: {exctype}. Please run capa in debug mode (-d/--debug) " - + "to see the stack trace.\nPlease also report your issue on the capa GitHub page so we " - + "can improve the code! (https://github.com/mandiant/capa/issues)", - file=sys.stderr, - ) + print( + f"Unexpected exception raised: {exctype}. Please run capa in debug mode (-d/--debug) " + + "to see the stack trace.\nPlease also report your issue on the capa GitHub page so we " + + "can improve the code! (https://github.com/mandiant/capa/issues)", + file=sys.stderr, + ) def install_common_args(parser, wanted=None): @@ -290,6 +282,7 @@ def install_common_args(parser, wanted=None): (FORMAT_FREEZE, "features previously frozen by capa"), (FORMAT_BINEXPORT2, "BinExport2"), (FORMAT_BINJA_DB, "Binary Ninja Database"), + (FORMAT_GHIDRA_PROJECT, "Ghidra project"), ] format_help = ", ".join([f"{f[0]}: {f[1]}" for f in formats]) @@ -567,7 +560,7 @@ def get_input_format_from_cli(args) -> str: return format_ if args.input_file.suffix.lower() == ".gpr": - return FORMAT_AUTO + return FORMAT_GHIDRA_PROJECT try: return get_auto_format(args.input_file) @@ -941,7 +934,7 @@ def get_extractor_filters_from_cli(args, input_format, backend: Optional[str] = # no processes or function filters were installed in the args return {} - if input_format in STATIC_FORMATS or backend == BACKEND_GHIDRA: + if input_format in STATIC_FORMATS: if args.restrict_to_processes: raise InvalidArgument("Cannot filter processes with static analysis.") return {"functions": {int(addr, 0) for addr in args.restrict_to_functions}} From 4ba5dd78fd7a64a6854c82115d5da13767a9e267 Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Wed, 24 Jun 2026 00:53:06 +0530 Subject: [PATCH 13/14] Address comments --- capa/ghidra/README.md | 12 ++++++++++++ capa/helpers.py | 4 ++++ capa/main.py | 27 +++++++++++---------------- doc/usage.md | 3 --- tests/test_main.py | 14 -------------- 5 files changed, 27 insertions(+), 33 deletions(-) diff --git a/capa/ghidra/README.md b/capa/ghidra/README.md index 41da474a99..df4d4e7435 100644 --- a/capa/ghidra/README.md +++ b/capa/ghidra/README.md @@ -65,6 +65,18 @@ To use the Ghidra backend, specify it with the `-b` or `--backend` flag: $ capa -b ghidra /path/to/sample ``` +capa can also analyze programs directly from Ghidra projects by specifying the project file path (`.gpr`): + +```bash +$ capa /path/to/project.gpr +``` + +If the project contains multiple programs, set the `CAPA_GHIDRA_PROGRAM_PATH` environment variable to specify which program to analyze: + +```bash +$ CAPA_GHIDRA_PROGRAM_PATH=/myprogram capa /path/to/project.gpr +``` + capa will: 1. Initialize a headless Ghidra instance. 2. Create a temporary project. diff --git a/capa/helpers.py b/capa/helpers.py index 6d723c378f..ec2c12d081 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -56,6 +56,7 @@ FORMAT_UNKNOWN, FORMAT_BINJA_DB, FORMAT_BINEXPORT2, + FORMAT_GHIDRA_PROJECT, Format, ) @@ -69,6 +70,7 @@ EXTENSIONS_ELF = ".elf_" EXTENSIONS_FREEZE = ".frz" EXTENSIONS_BINJA_DB = ".bndb" +EXTENSIONS_GHIDRA = ".gpr" logger = logging.getLogger("capa") @@ -236,6 +238,8 @@ def get_format_from_extension(sample: Path) -> str: format_ = FORMAT_BINEXPORT2 elif sample.name.endswith(EXTENSIONS_BINJA_DB): format_ = FORMAT_BINJA_DB + elif sample.name.endswith(EXTENSIONS_GHIDRA): + format_ = FORMAT_GHIDRA_PROJECT return format_ diff --git a/capa/main.py b/capa/main.py index 9c188a8d5f..73a51131fb 100644 --- a/capa/main.py +++ b/capa/main.py @@ -559,9 +559,6 @@ def get_input_format_from_cli(args) -> str: if format_ != FORMAT_AUTO: return format_ - if args.input_file.suffix.lower() == ".gpr": - return FORMAT_GHIDRA_PROJECT - try: return get_auto_format(args.input_file) except PEFormatError as e: @@ -587,7 +584,7 @@ def get_backend_from_cli(args, input_format: str) -> str: if args.backend != BACKEND_AUTO: return args.backend - if args.input_file.suffix.lower() == ".gpr": + if input_format == FORMAT_GHIDRA_PROJECT: return BACKEND_GHIDRA if input_format == FORMAT_CAPE: @@ -612,7 +609,7 @@ def get_backend_from_cli(args, input_format: str) -> str: return BACKEND_VIV -def get_sample_path_from_cli(args, backend: str) -> Optional[Path]: +def get_sample_path_from_cli(args, input_format, backend) -> Optional[Path]: """ Determine the path to the underlying sample, if it exists. @@ -621,6 +618,7 @@ def get_sample_path_from_cli(args, backend: str) -> Optional[Path]: args: args: The parsed command line arguments from `install_common_args`. + input_format: The file format of the input file. backend: The backend that will handle the input file. raises: @@ -628,7 +626,7 @@ def get_sample_path_from_cli(args, backend: str) -> Optional[Path]: """ if backend in (BACKEND_CAPE, BACKEND_DRAKVUF, BACKEND_VMRAY): return None - elif backend == BACKEND_GHIDRA: + elif input_format == FORMAT_GHIDRA_PROJECT: return None elif backend == BACKEND_BINEXPORT2: import capa.features.extractors.binexport2 @@ -641,7 +639,7 @@ def get_sample_path_from_cli(args, backend: str) -> Optional[Path]: return args.input_file -def get_os_from_cli(args, backend) -> str: +def get_os_from_cli(args, input_format, backend) -> str: """ Determine the OS for the given sample. Respects an override provided by the user, otherwise, use heuristics and @@ -649,6 +647,7 @@ def get_os_from_cli(args, backend) -> str: args: args: The parsed command line arguments from `install_common_args`. + input_format: The file format of the input file. backend: The backend that will handle the input file. raises: @@ -657,7 +656,7 @@ def get_os_from_cli(args, backend) -> str: if args.os: return args.os - sample_path = get_sample_path_from_cli(args, backend) + sample_path = get_sample_path_from_cli(args, input_format, backend) if sample_path is None: return "unknown" return capa.loader.get_os(sample_path) @@ -738,10 +737,6 @@ def get_file_extractors_from_cli(args, input_format: str) -> list[FeatureExtract # # this pass can inspect multiple file extractors, e.g., dotnet and pe to identify # various limitations - if args.input_file.suffix.lower() == ".gpr": - logger.debug("skipping generic file extractor probe for Ghidra project input") - return [] - try: return capa.loader.get_file_extractors(args.input_file, input_format) except PEFormatError as e: @@ -883,9 +878,9 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr None, ) - os_ = get_os_from_cli(args, backend) - sample_path = get_sample_path_from_cli(args, backend) - extractor_filters = get_extractor_filters_from_cli(args, input_format, backend) + os_ = get_os_from_cli(args, input_format, backend) + sample_path = get_sample_path_from_cli(args, input_format, backend) + extractor_filters = get_extractor_filters_from_cli(args, input_format) logger.debug("format: %s", input_format) logger.debug("backend: %s", backend) @@ -929,7 +924,7 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr raise ShouldExitError(E_GHIDRA_DB_LOCKED) from e -def get_extractor_filters_from_cli(args, input_format, backend: Optional[str] = None) -> FilterConfig: +def get_extractor_filters_from_cli(args, input_format) -> FilterConfig: if not hasattr(args, "restrict_to_processes") and not hasattr(args, "restrict_to_functions"): # no processes or function filters were installed in the args return {} diff --git a/doc/usage.md b/doc/usage.md index e5482e9112..d8ec6c9587 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -43,9 +43,6 @@ which threads perform what actions (encrypt/decrypt data, initiate a connection, ### IDA Pro plugin: capa explorer Please check out the [capa explorer documentation](/capa/ida/plugin/README.md). -### Ghidra project support -capa can analyze programs directly from Ghidra projects by specifying the project file path (`.gpr`). If the project contains multiple programs, set the `CAPA_GHIDRA_PROGRAM_PATH` environment variable to specify which program to analyze. For example: `CAPA_GHIDRA_PROGRAM_PATH=/myprogram capa /path/to/project.gpr`. - ### save time by reusing .viv files Set the environment variable `CAPA_SAVE_WORKSPACE` to instruct the underlying analysis engine to cache its intermediate results to the file system. For example, vivisect will create `.viv` files. diff --git a/tests/test_main.py b/tests/test_main.py index 6b00f385c1..10a316a2c4 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -15,7 +15,6 @@ import gzip import json import textwrap -from types import SimpleNamespace from pathlib import Path import fixtures @@ -353,16 +352,3 @@ def test_main_cape_gzip(): / "./data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz" ) assert capa.main.main([path]) == 0 - - -def test_gpr_uses_ghidra_backend(tmp_path): - args = SimpleNamespace(input_file=tmp_path / "sample.gpr", backend=capa.main.BACKEND_AUTO) - - assert capa.main.get_backend_from_cli(args, FORMAT_AUTO) == capa.main.BACKEND_GHIDRA - assert capa.main.get_sample_path_from_cli(args, capa.main.BACKEND_GHIDRA) is None - - -def test_gpr_skips_generic_file_extractor_probe(tmp_path): - args = SimpleNamespace(input_file=tmp_path / "sample.gpr") - - assert capa.main.get_file_extractors_from_cli(args, FORMAT_AUTO) == [] From 5ee451bf17f3c0fc0fb87de14a6329ce26f86c2d Mon Sep 17 00:00:00 2001 From: saniyafatima07 Date: Wed, 24 Jun 2026 01:19:27 +0530 Subject: [PATCH 14/14] Update tests data submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index a24fe4e48c..905e00e91f 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit a24fe4e48c130d58fee33832339372fc1840bcc8 +Subproject commit 905e00e91fb617954dedad099668f4706aea3d09