diff --git a/.serena/memories/project_overview.md b/.serena/memories/project_overview.md new file mode 100644 index 00000000..4875964f --- /dev/null +++ b/.serena/memories/project_overview.md @@ -0,0 +1,44 @@ +# Project Overview + +## Purpose +Trustify Dependency Analytics is a Java/Quarkus backend service for dependency analysis. +It accepts SBOMs (CycloneDX/SPDX), queries vulnerability providers (Trustify), resolves +licenses via deps.dev, and returns analysis results as JSON, HTML (self-contained React app +via FreeMarker), or multipart/mixed. + +## Tech Stack +- **Language**: Java 21 +- **Framework**: Quarkus 3.31.3 +- **Routing**: Apache Camel for Quarkus +- **REST**: JAX-RS / Jakarta RESTful Web Services with Jackson +- **ORM**: Hibernate ORM with Panache, PostgreSQL, Flyway +- **Cache**: Redis +- **Build**: Maven, Spotless (Google Java Format), Frontend Maven Plugin (Node/Yarn for UI) +- **Testing**: JUnit 5, REST Assured, WireMock 3.4.2, HtmlUnit +- **API Models**: `trustify-da-api-model` artifact (version 2.0.7) provides generated model classes +- **UI**: React 18 + PatternFly 5 + TypeScript (compiled into FreeMarker template) +- **Monitoring**: Sentry 7.8.0 + +## Package Structure +Base package: `io.github.guacsec.trustifyda` +- `integration/backend/` — Camel REST routes (ExhortIntegration) +- `integration/providers/trustify/` — Trustify provider (OAuth2, vulnerabilities, recommendations) +- `integration/licenses/` — deps.dev license integration +- `integration/sbom/` — SBOM parsing (CycloneDX, SPDX) +- `integration/report/` — Report generation (JSON, HTML, multipart) +- `integration/cache/` — Redis caching +- `model/` — Domain models (DependencyTree, etc.) +- `config/` — Configuration, exception handling +- `service/` — OIDC, auth services +- `monitoring/` — Sentry + +## API Versions +- v4: `/api/v4/analysis`, `/api/v4/batch-analysis`, `/api/v4/token` +- v5: `/api/v5/analysis`, `/api/v5/batch-analysis`, `/api/v5/licenses`, `/api/v5/token` + +## Data Flow +Client Request (SBOM) -> ExhortIntegration -> SbomParser -> DependencyTree + -> analyzeSbom multicast: [findVulnerabilities, getLicensesFromSbom] in parallel + -> findVulnerabilities -> split by provider -> trustifyScan -> trustifyRequest + -> multicast: [vulnerabilities, recommendations] in parallel + -> ReportIntegration (JSON/HTML/Multipart) -> Response diff --git a/.serena/memories/suggested_commands.md b/.serena/memories/suggested_commands.md new file mode 100644 index 00000000..d624553d --- /dev/null +++ b/.serena/memories/suggested_commands.md @@ -0,0 +1,26 @@ +# Suggested Commands + +## Build & Verify +- `mvn spotless:apply` — Auto-format all Java sources (Google Java Format) +- `mvn test` — Run unit tests +- `mvn verify` — Run all tests including integration tests +- `mvn compile` — Compile (also builds UI if ui/ changed) +- `mvn clean install` — Full build + +## UI (in ui/ directory) +- `yarn install` — Install dependencies +- `yarn build` — Build React app (output goes to freemarker/templates/generated/) +- `yarn lint` / `yarn lint:fix` — ESLint + Prettier checks + +## Pre-Commit Checklist +1. `mvn spotless:apply` +2. If ui/ changed: `cd ui && yarn lint && yarn build` +3. `mvn verify` + +## Development +- `mvn quarkus:dev` — Run in dev mode +- Dev profile: `%dev.quarkus.log.level=DEBUG` + +## System Utilities (macOS / Darwin) +- `git`, `ls`, `find`, `grep` — Standard Unix commands +- `mvn` — Maven wrapper or system Maven diff --git a/.serena/project.yml b/.serena/project.yml index 7a4a4268..948eb279 100644 --- a/.serena/project.yml +++ b/.serena/project.yml @@ -3,21 +3,26 @@ project_name: "trustify-dependency-analytics" # list of languages for which language servers are started; choose from: -# al bash clojure cpp csharp -# csharp_omnisharp dart elixir elm erlang -# fortran fsharp go groovy haskell -# java julia kotlin lua markdown -# matlab nix pascal perl php -# php_phpactor powershell python python_jedi r -# rego ruby ruby_solargraph rust scala -# swift terraform toml typescript typescript_vts -# vue yaml zig +# al angular ansible bash clojure +# cpp cpp_ccls crystal csharp csharp_omnisharp +# dart elixir elm erlang fortran +# fsharp go groovy haskell haxe +# hlsl html java json julia +# kotlin lean4 lua luau markdown +# matlab msl nix ocaml pascal +# perl php php_phpactor powershell python +# python_jedi python_ty r rego ruby +# ruby_solargraph rust scala scss solidity +# swift systemverilog terraform toml typescript +# typescript_vts vue yaml zig # (This list may be outdated. For the current list, see values of Language enum here: # https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py # For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.) # Note: # - For C, use cpp # - For JavaScript, use typescript +# - For Angular projects, use angular (subsumes typescript+html; requires `npm install` in the project root) +# - For SCSS / Sass / plain CSS, use scss (some-sass-language-server handles all three) # - For Free Pascal/Lazarus, use pascal # Special requirements: # Some languages require additional setup/installations. @@ -61,53 +66,17 @@ read_only: false # list of tool names to exclude. # This extends the existing exclusions (e.g. from the global configuration) -# -# Below is the complete list of tools for convenience. -# To make sure you have the latest list of tools, and to view their descriptions, -# execute `uv run scripts/print_tool_overview.py`. -# -# * `activate_project`: Activates a project by name. -# * `check_onboarding_performed`: Checks whether project onboarding was already performed. -# * `create_text_file`: Creates/overwrites a file in the project directory. -# * `delete_lines`: Deletes a range of lines within a file. -# * `delete_memory`: Deletes a memory from Serena's project-specific memory store. -# * `execute_shell_command`: Executes a shell command. -# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced. -# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type). -# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type). -# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes. -# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file. -# * `initial_instructions`: Gets the initial instructions for the current project. -# Should only be used in settings where the system prompt cannot be set, -# e.g. in clients you have no control over, like Claude Desktop. -# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol. -# * `insert_at_line`: Inserts content at a given line in a file. -# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol. -# * `list_dir`: Lists files and directories in the given directory (optionally with recursion). -# * `list_memories`: Lists memories in Serena's project-specific memory store. -# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building). -# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context). -# * `read_file`: Reads a file within the project directory. -# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store. -# * `remove_project`: Removes a project from the Serena configuration. -# * `replace_lines`: Replaces a range of lines within a file with new content. -# * `replace_symbol_body`: Replaces the full definition of a symbol. -# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen. -# * `search_for_pattern`: Performs a search for a pattern in the project. -# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase. -# * `switch_modes`: Activates modes by providing a list of their names -# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information. -# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task. -# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed. -# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store. +# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html excluded_tools: [] # list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default). # This extends the existing inclusions (e.g. from the global configuration). +# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html included_optional_tools: [] # fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools. # This cannot be combined with non-empty excluded_tools or included_optional_tools. +# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html fixed_tools: [] # list of mode names to that are always to be included in the set of active modes @@ -118,16 +87,23 @@ fixed_tools: [] # Set this to a list of mode names to always include the respective modes for this project. base_modes: -# list of mode names that are to be activated by default. -# The full set of modes to be activated is base_modes + default_modes. -# If the setting is undefined, the default_modes from the global configuration (serena_config.yml) apply. +# list of mode names that are to be activated by default, overriding the setting in the global configuration. +# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes. +# If the setting is undefined/empty, the default_modes from the global configuration (serena_config.yml) apply. # Otherwise, this overrides the setting from the global configuration (serena_config.yml). +# Therefore, you can set this to [] if you do not want the default modes defined in the global config to apply +# for this project. # This setting can, in turn, be overridden by CLI parameters (--mode). +# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes default_modes: # initial prompt for the project. It will always be given to the LLM upon activating the project # (contrary to the memories, which are loaded on demand). -initial_prompt: "Trustify Dependency Analytics is a Java/Quarkus backend service for dependency analysis. It accepts SBOMs (CycloneDX/SPDX), queries vulnerability providers, and returns analysis results as JSON or a self-contained HTML report. The HTML report is a React/TypeScript app (under ui/) compiled and inlined into a Freemarker template. Routing uses Apache Camel." +initial_prompt: "Trustify Dependency Analytics is a Java/Quarkus backend service for + dependency analysis. It accepts SBOMs (CycloneDX/SPDX), queries vulnerability providers, + and returns analysis results as JSON or a self-contained HTML report. The HTML report + is a React/TypeScript app (under ui/) compiled and inlined into a Freemarker template. + Routing uses Apache Camel." # time budget (seconds) per tool call for the retrieval of additional symbol information # such as docstrings or parameter information. @@ -152,3 +128,19 @@ ignored_memory_patterns: [] # Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available. # No documentation on options means no options are available. ls_specific_settings: {} + +# list of mode names to be activated additionally for this project, e.g. ["query-projects"] +# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes. +# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes +added_modes: + +# list of additional workspace folder paths for cross-package reference support (e.g. in monorepos). +# Paths can be absolute or relative to the project root. +# Each folder is registered as an LSP workspace folder, enabling language servers to discover +# symbols and references across package boundaries. +# Currently supported for: TypeScript. +# Example: +# additional_workspace_folders: +# - ../sibling-package +# - ../shared-lib +additional_workspace_folders: [] diff --git a/CONVENTIONS.md b/CONVENTIONS.md index cd97c23c..b851eb9a 100644 --- a/CONVENTIONS.md +++ b/CONVENTIONS.md @@ -30,11 +30,12 @@ ## Naming Conventions - **Packages**: `io.github.guacsec.trustifyda.` (domain-driven) - - Subpackages: `integration.providers`, `integration.licenses`, `integration.backend`, `integration.sbom` + - Subpackages: `integration.providers`, `integration.licenses`, `integration.backend`, `integration.sbom`, `integration.registry` - **Classes**: PascalCase - Services: `*Service` (`ModelCardService`, `CacheService`, `SpdxLicenseService`) - Repositories: `*Repository` (`ModelCardRepository`, `GuardrailRepository`) - - Route Builders: `*Integration` (`ExhortIntegration`, `LicensesIntegration`) + - Route Builders: `*Integration` (`ExhortIntegration`, `LicensesIntegration`, `Pep691Integration`) + - Enrichment Services: `*EnrichmentService` — stateless helpers instantiated directly, not CDI-managed (e.g., `RegistryEnrichmentService`) - Exceptions: `*Exception` (`DetailedException`, `SbomValidationException`) - Utility: Private constructor, `final` class (e.g., `ExceptionUtils`) - **Methods**: camelCase, verb-first (`get*`, `find*`, `process*`, `validate*`, `is*`) @@ -57,6 +58,7 @@ src/main/java/io/github/guacsec/trustifyda/ │ ├── providers/ # Vulnerability providers │ │ └── trustify/ │ ├── report/ # Report generation +│ ├── registry/ # Ecosystem registry integrations (PEP 691, etc.) │ ├── sbom/ # SBOM parsing │ │ ├── cyclonedx/ │ │ └── spdx/ @@ -64,6 +66,7 @@ src/main/java/io/github/guacsec/trustifyda/ ├── model/ # Domain models │ ├── trustify/ │ ├── modelcards/ +│ ├── registry/ # Registry response records (Pep691Response, etc.) │ └── licenses/ ├── modelcards/ # Model card service layer ├── monitoring/ # Sentry, observability @@ -87,6 +90,53 @@ ui/ # React frontend for HTML report - Feature/capability-based organization, not layered - Integration routes separated from business logic +## Configuration Properties + +- **Required properties**: Use plain `String` or typed field. Quarkus throws `DeploymentException` at startup if the value is missing. +- **Optional properties**: Use `Optional` **without** `defaultValue`. This allows the application to start when the environment variable is unset. Do **not** use `String` with `defaultValue = ""` — it prevents distinguishing "unconfigured" from "explicitly empty". Example: + ```java + @ConfigProperty(name = "api.pypi.registry.host") + Optional registryHost; + ``` + Check with `registryHost.isPresent() && !registryHost.get().isBlank()`. +- **Timeout properties**: Use `String` type with a duration suffix (e.g., `"10s"`), passed to Camel fault tolerance configuration. + +## CDI Extensibility Pattern + +When a feature needs to support multiple ecosystem implementations (e.g., registry lookups for pypi, maven, npm), use CDI `Instance` discovery: + +1. **Define a package-private interface** (not public) with `isEnabled()` and the operation method: + ```java + interface RegistryIntegration { + boolean isEnabled(); + void enrich(AnalysisReport report, DependencyTree tree); + } + ``` +2. **Implement per-ecosystem** as `@ApplicationScoped` beans extending `EndpointRouteBuilder` and implementing the interface. Each implementation owns its own Camel routes and config properties. +3. **Orchestrate via `Instance`** in a single orchestrator class that iterates all discovered implementations, calls only enabled ones, and isolates exceptions: + ```java + @Inject Instance registryIntegrations; + ``` +4. **Keep Camel concerns out of the interface** — the interface methods accept domain objects (`AnalysisReport`, `DependencyTree`), not `Exchange`. The orchestrator handles Exchange extraction. +5. **Run sequentially** — enrichment services mutate shared report structures that are not thread-safe. +6. **Adding a new ecosystem** requires only one new class implementing the interface. No changes to the orchestrator or main route. + +## Stateless Helper Services + +For reusable business logic shared across multiple CDI beans (e.g., report enrichment), use package-private stateless classes instantiated directly (not CDI-managed): + +```java +class RegistryEnrichmentService { + void enrichReport(AnalysisReport report, DependencyTree tree, + String packagePrefix, + BiFunction> registryQuery) { ... } +} +``` + +Instantiate in the field initializer of the owning bean: `private final RegistryEnrichmentService enrichmentService = new RegistryEnrichmentService();` + +Use this pattern when the helper has no injected dependencies and serves as a pure function container. If the helper needs CDI injection, make it `@ApplicationScoped` instead. + ## Error Handling - **Exception hierarchy**: @@ -99,6 +149,15 @@ ui/ # React frontend for HTML report - **Error responses**: `text/plain` for validation errors, JSON for complex errors, all include `ex-request-id` header - **Utilities**: `ExceptionUtils.findInChain()`, `ExceptionUtils.getLongestMessage()` +## Camel Integration Patterns + +- **Circuit breaker**: Use MicroProfile Fault Tolerance via Camel for external HTTP calls. Configure `timeoutEnabled(true)` and `timeoutDuration()` from a config property. +- **Fallback handling**: Define `.onFallback().process(this::handleLookupFallback)` to return a safe default (e.g., 504 status, null body) when a circuit breaker trips. +- **HTTP header cleanup**: Before making outbound HTTP calls, remove stale headers (`HTTP_RAW_QUERY`, `HTTP_QUERY`, `HTTP_URI`, `HTTP_PATH`, `HTTP_HOST`, `ACCEPT_ENCODING`, `CONTENT_TYPE`) to prevent header leakage between requests. +- **Route naming**: Route IDs must match the method/direct endpoint name (e.g., `direct("pep691Lookup")` → `.routeId("pep691Lookup")`). +- **Dynamic URLs**: Use `.toD("${exchangeProperty.propertyName}?throwExceptionOnFailure=false")` for URLs resolved at runtime from exchange properties. +- **Single entry point**: The main analysis route (`ExhortIntegration`) calls `direct:enrichTrustedLibraries` as a single entry point. The orchestrator (`TrustedLibrariesIntegration`) discovers and runs all registry integrations. Never add ecosystem-specific routes directly to the main analysis route. + ## Testing Conventions - **Frameworks**: JUnit 5 (Jupiter), Quarkus Test, Mockito (via `quarkus-junit5-mockito`) @@ -109,8 +168,18 @@ ui/ # React frontend for HTML report - **Custom extensions**: `WiremockExtension`, `OidcWiremockExtension`, `@InjectWireMock` - **REST Assured pattern**: `given().header(...).body(...).when().post(...).then().assertThat().statusCode(...)` - **Cache testing**: Two-request pattern to verify cache hits; `server.resetRequests()` between tests -- **Test data**: JSON fixtures in `src/test/resources/{format}/` +- **Test data**: JSON fixtures in `src/test/resources/{format}/` (e.g., `pypi-registry/`, `depsdev/`, `trustify/`, `reports/`) - **Assertions**: JUnit static imports + Hamcrest matchers +- **Unit testing CDI beans**: For non-Quarkus unit tests, instantiate beans directly and set `@Inject`/`@ConfigProperty` fields manually (package-private visibility). Mock CDI `Instance` with Mockito. +- **Unit testing Camel routes**: For route builder tests that don't need full Camel context, test the `process()` methods directly by constructing mock `Exchange` and `Message` objects. +- **Integration testing with WireMock**: Register WireMock stubs for external registries in test setup. Use `AbstractAnalysisTest` helpers like `replaceMockedRegistryUrl()` to inject WireMock URLs into test fixtures. + +## PURL Construction + +When constructing Package URL (PURL) strings with qualifiers: +- **URL-encode qualifier values** using `URLEncoder.encode(value, StandardCharsets.UTF_8)`, especially for `repository_url` which contains full URLs with `://` and path separators. +- **Use `PackageRef.builder().purl(...)` pattern** for constructing PURL-based references. +- **Normalize package names** for registry lookups: lowercase, replace `-` and `.` with `_` (PEP 503/PEP 691 normalization for pypi). ## Commit Messages diff --git a/src/main/java/io/github/guacsec/trustifyda/integration/registry/RegistryEnrichmentService.java b/src/main/java/io/github/guacsec/trustifyda/integration/registry/RegistryEnrichmentService.java index 2f752b45..dfc754e8 100644 --- a/src/main/java/io/github/guacsec/trustifyda/integration/registry/RegistryEnrichmentService.java +++ b/src/main/java/io/github/guacsec/trustifyda/integration/registry/RegistryEnrichmentService.java @@ -17,6 +17,8 @@ package io.github.guacsec.trustifyda.integration.registry; +import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Optional; @@ -29,6 +31,8 @@ import io.github.guacsec.trustifyda.api.v5.ProviderReport; import io.github.guacsec.trustifyda.api.v5.Remediation; import io.github.guacsec.trustifyda.api.v5.RemediationTrustedContent; +import io.github.guacsec.trustifyda.api.v5.Source; +import io.github.guacsec.trustifyda.api.v5.SourceSummary; import io.github.guacsec.trustifyda.model.DependencyTree; class RegistryEnrichmentService { @@ -141,11 +145,18 @@ private void enrichUnreportedDependencies( for (var providerEntry : providers.entrySet()) { var providerReport = providerEntry.getValue(); - if (providerReport == null - || providerReport.getSources() == null - || providerReport.getSources().isEmpty()) { + if (providerReport == null) { continue; } + if (providerReport.getSources() == null) { + providerReport.sources(new HashMap<>()); + } + if (providerReport.getSources().isEmpty()) { + var defaultSource = new Source(); + defaultSource.dependencies(new ArrayList<>()); + defaultSource.summary(new SourceSummary()); + providerReport.getSources().put(providerEntry.getKey(), defaultSource); + } for (var sourceEntry : providerReport.getSources().entrySet()) { var sourceReport = sourceEntry.getValue(); if (sourceReport != null) { diff --git a/src/test/java/io/github/guacsec/trustifyda/integration/registry/RegistryEnrichmentServiceTest.java b/src/test/java/io/github/guacsec/trustifyda/integration/registry/RegistryEnrichmentServiceTest.java index 87530170..bdab1041 100644 --- a/src/test/java/io/github/guacsec/trustifyda/integration/registry/RegistryEnrichmentServiceTest.java +++ b/src/test/java/io/github/guacsec/trustifyda/integration/registry/RegistryEnrichmentServiceTest.java @@ -18,6 +18,7 @@ package io.github.guacsec.trustifyda.integration.registry; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; @@ -211,6 +212,41 @@ void passHashToRegistryQuery() { assertEquals("expected-hash", capturedHash[0]); } + @Test + void enrichCreatesSourceWhenProvidersHaveEmptySources() { + var providerReport = new ProviderReport(); + providerReport.sources(new HashMap<>()); + var report = new AnalysisReport(); + report.providers(new HashMap<>(Map.of("provider1", providerReport))); + + var tree = buildTree("pkg:pypi/amqp@5.3.1", Map.of("SHA-256", "abc123")); + + service.enrichReport(report, tree, PKG_PYPI_PREFIX, alwaysRecommend); + + assertFalse(providerReport.getSources().isEmpty()); + var deps = providerReport.getSources().values().iterator().next().getDependencies(); + assertEquals(1, deps.size()); + assertNotNull(deps.get(0).getRecommendation()); + } + + @Test + void enrichCreatesSourceWhenSourcesIsNull() { + var providerReport = new ProviderReport(); + var report = new AnalysisReport(); + report.providers(new HashMap<>(Map.of("provider1", providerReport))); + + var tree = buildTree("pkg:pypi/amqp@5.3.1", Map.of("SHA-256", "abc123")); + + service.enrichReport(report, tree, PKG_PYPI_PREFIX, alwaysRecommend); + + assertNotNull(providerReport.getSources()); + assertFalse(providerReport.getSources().isEmpty()); + + var deps = providerReport.getSources().values().iterator().next().getDependencies(); + assertEquals(1, deps.size()); + assertNotNull(deps.get(0).getRecommendation()); + } + private AnalysisReport buildReportWithPypiDep(String purl) { return buildReportWithDep(purl); }