From e6d925ff4377f1b8ef7a094e822f5eae9276868a Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:14:06 +0300 Subject: [PATCH 01/54] Add skills --- .gitignore | 1 + Makefile | 62 + agent-mode/design/agent-mode-design.md | 1407 +++++++++++ agent-mode/impl/agent-mode-impl.md | 1275 ++++++++++ agent-mode/info/agent-pipeline.md | 672 ++++++ agent-mode/info/approximations-config.md | 487 ++++ agent-mode/info/pattern-rules.md | 313 +++ agent-mode/mismatch.md | 358 +++ agent-mode/mitigation-plan.md | 407 ++++ agent-mode/plan.md | 303 +++ agent-mode/test-status.md | 95 + agent-mode/test/agent-mode-test.md | 2085 +++++++++++++++++ agent-mode/test/conftest.py | 497 ++++ .../java/PdfBoxDocumentApprox.java | 50 + .../yaml/custom-propagators.yaml | 37 + .../rules/java/lib/stirling-source.yaml | 10 + .../security/stirling-path-traversal.yaml | 18 + .../src/main/java/test/PathTraversalTest.java | 38 + agent-mode/test/pytest.ini | 5 + agent-mode/test/test_approximations.py | 358 +++ agent-mode/test/test_build.py | 152 ++ agent-mode/test/test_external_methods.py | 260 ++ agent-mode/test/test_full_loop.py | 290 +++ agent-mode/test/test_rules.py | 409 ++++ agent/meta-prompt.md | 119 + agent/skills/analyze-findings.md | 95 + agent/skills/build-project.md | 77 + agent/skills/create-approximation.md | 120 + agent/skills/create-rule.md | 150 ++ agent/skills/create-yaml-config.md | 157 ++ agent/skills/debug-rule-reachability.md | 60 + agent/skills/discover-entry-points.md | 45 + agent/skills/generate-poc.md | 83 + agent/skills/opentaint-issue-investigation.md | 169 ++ agent/skills/run-analysis.md | 94 + agent/skills/test-rule.md | 160 ++ cli/Makefile | 27 + cli/cmd/agent.go | 16 + cli/cmd/agent_init_test_project.go | 178 ++ cli/cmd/agent_prompt.go | 26 + cli/cmd/agent_rules_path.go | 41 + cli/cmd/agent_skills.go | 26 + cli/cmd/agent_test_rules.go | 153 ++ cli/cmd/analyzer_exit.go | 68 + cli/cmd/command_builder.go | 60 +- cli/cmd/scan.go | 35 +- cli/internal/agent/.gitignore | 2 + cli/internal/agent/agent.go | 140 ++ cli/internal/testutil/.gitignore | 2 + cli/internal/testutil/testutil.go | 69 + task.md | 86 + 51 files changed, 11813 insertions(+), 34 deletions(-) create mode 100644 Makefile create mode 100644 agent-mode/design/agent-mode-design.md create mode 100644 agent-mode/impl/agent-mode-impl.md create mode 100644 agent-mode/info/agent-pipeline.md create mode 100644 agent-mode/info/approximations-config.md create mode 100644 agent-mode/info/pattern-rules.md create mode 100644 agent-mode/mismatch.md create mode 100644 agent-mode/mitigation-plan.md create mode 100644 agent-mode/plan.md create mode 100644 agent-mode/test-status.md create mode 100644 agent-mode/test/agent-mode-test.md create mode 100644 agent-mode/test/conftest.py create mode 100644 agent-mode/test/fixtures/approximations/java/PdfBoxDocumentApprox.java create mode 100644 agent-mode/test/fixtures/approximations/yaml/custom-propagators.yaml create mode 100644 agent-mode/test/fixtures/rules/java/lib/stirling-source.yaml create mode 100644 agent-mode/test/fixtures/rules/java/security/stirling-path-traversal.yaml create mode 100644 agent-mode/test/fixtures/test-samples/src/main/java/test/PathTraversalTest.java create mode 100644 agent-mode/test/pytest.ini create mode 100644 agent-mode/test/test_approximations.py create mode 100644 agent-mode/test/test_build.py create mode 100644 agent-mode/test/test_external_methods.py create mode 100644 agent-mode/test/test_full_loop.py create mode 100644 agent-mode/test/test_rules.py create mode 100644 agent/meta-prompt.md create mode 100644 agent/skills/analyze-findings.md create mode 100644 agent/skills/build-project.md create mode 100644 agent/skills/create-approximation.md create mode 100644 agent/skills/create-rule.md create mode 100644 agent/skills/create-yaml-config.md create mode 100644 agent/skills/debug-rule-reachability.md create mode 100644 agent/skills/discover-entry-points.md create mode 100644 agent/skills/generate-poc.md create mode 100644 agent/skills/opentaint-issue-investigation.md create mode 100644 agent/skills/run-analysis.md create mode 100644 agent/skills/test-rule.md create mode 100644 cli/Makefile create mode 100644 cli/cmd/agent.go create mode 100644 cli/cmd/agent_init_test_project.go create mode 100644 cli/cmd/agent_prompt.go create mode 100644 cli/cmd/agent_rules_path.go create mode 100644 cli/cmd/agent_skills.go create mode 100644 cli/cmd/agent_test_rules.go create mode 100644 cli/cmd/analyzer_exit.go create mode 100644 cli/internal/agent/.gitignore create mode 100644 cli/internal/agent/agent.go create mode 100644 cli/internal/testutil/.gitignore create mode 100644 cli/internal/testutil/testutil.go create mode 100644 task.md diff --git a/.gitignore b/.gitignore index 27bf36d6d..2d3487922 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ config.local.* **/.gradle **/build +core/**/bin/ # Ignore all hidden files and directories .* diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..87c7ad91e --- /dev/null +++ b/Makefile @@ -0,0 +1,62 @@ +MAKE ?= make +GRADLEW := $(CURDIR)/core/gradlew +INSTALL ?= install + +PREFIX ?= /usr/local +BINDIR ?= $(PREFIX)/bin +LIBDIR ?= $(PREFIX)/lib + +CORE_DIR := core +CLI_DIR := cli + +CLI_BINARY_NAME := opentaint +CLI_DEV_BINARY_NAME := opentaint-dev +ANALYZER_TASK := :projectAnalyzerJar +AUTOBUILDER_TASK := opentaint-jvm-autobuilder:projectAutoBuilderJar +TEST_UTIL_TASK := :opentaint-sast-test-util:jar + +ANALYZER_JAR := $(CORE_DIR)/build/libs/opentaint-project-analyzer.jar +AUTOBUILDER_JAR := $(CORE_DIR)/opentaint-jvm-autobuilder/build/libs/opentaint-project-auto-builder.jar +TEST_UTIL_JAR := $(CORE_DIR)/opentaint-sast-test-util/build/libs/opentaint-sast-test-util.jar +INSTALLED_ANALYZER_JAR := $(LIBDIR)/$(notdir $(ANALYZER_JAR)) +INSTALLED_AUTOBUILDER_JAR := $(LIBDIR)/$(notdir $(AUTOBUILDER_JAR)) +INSTALLED_CLI_BINARY := $(BINDIR)/$(CLI_BINARY_NAME) +INSTALLED_DEV_BINARY := $(BINDIR)/$(CLI_DEV_BINARY_NAME) + +.PHONY: all core projectAnalyzerJar core/autobuilder core/opentaint-sast-test-util cli install clean + +all: core cli + +core: projectAnalyzerJar core/autobuilder core/opentaint-sast-test-util + +projectAnalyzerJar: + cd $(CORE_DIR) && $(GRADLEW) $(ANALYZER_TASK) + +core/autobuilder: + cd $(CORE_DIR) && $(GRADLEW) $(AUTOBUILDER_TASK) + +core/opentaint-sast-test-util: + cd $(CORE_DIR) && $(GRADLEW) $(TEST_UTIL_TASK) + +cli: + $(MAKE) -C $(CLI_DIR) build + +install: core cli + mkdir -p $(BINDIR) $(LIBDIR) + $(MAKE) -C $(CLI_DIR) install PREFIX=$(PREFIX) BINDIR=$(BINDIR) + $(INSTALL) -m 0644 $(ANALYZER_JAR) $(INSTALLED_ANALYZER_JAR) + $(INSTALL) -m 0644 $(AUTOBUILDER_JAR) $(INSTALLED_AUTOBUILDER_JAR) + $(INSTALL) -m 0644 $(TEST_UTIL_JAR) $(LIBDIR)/$(notdir $(TEST_UTIL_JAR)) + printf '%s\n' \ + '#!/bin/sh' \ + 'set -eu' \ + 'BIN_DIR=$$(CDPATH= cd -- "$$(dirname -- "$$0")" && pwd)' \ + 'PREFIX_DIR=$$(CDPATH= cd -- "$$BIN_DIR/.." && pwd)' \ + 'LIB_DIR="$$PREFIX_DIR/lib"' \ + 'exec "$$BIN_DIR/$(CLI_BINARY_NAME)" --experimental --analyzer-jar "$$LIB_DIR/$(notdir $(ANALYZER_JAR))" --autobuilder-jar "$$LIB_DIR/$(notdir $(AUTOBUILDER_JAR))" "$$@"' \ + > $(INSTALLED_DEV_BINARY) + chmod 0755 $(INSTALLED_DEV_BINARY) + +clean: + $(MAKE) -C $(CLI_DIR) clean + cd $(CORE_DIR) && $(GRADLEW) clean diff --git a/agent-mode/design/agent-mode-design.md b/agent-mode/design/agent-mode-design.md new file mode 100644 index 000000000..ad6ee61ef --- /dev/null +++ b/agent-mode/design/agent-mode-design.md @@ -0,0 +1,1407 @@ +# Agent Mode Design + +## Table of Contents + +1. [Required Engine Changes](#1-required-engine-changes) +2. [Go CLI API Design](#2-go-cli-api-design) +3. [Agent Skills](#3-agent-skills) +4. [Meta Prompt](#4-meta-prompt) + +--- + +## 1. Required Engine Changes + +### 1.1 External Methods List Output + +**Problem**: The engine currently performs call-to-return passthrough for unresolved external methods — taint is silently preserved. There is no reporting of which external methods were encountered, making it impossible for the agent to know where taint propagation models are missing. + +**Current behavior** (in `JIRMethodCallFlowFunction.applyPassRulesOrCallSkip()`): +1. Taint fact arrives at a call to an unresolved method +2. `unresolvedCallDefaultFactPropagation()` copies the fact unchanged to the return site +3. If YAML pass-through rules exist for the method, those are also applied +4. No record is kept of this event + +**Required change**: Collect external method call information during analysis and output it as a YAML file. + +**Collection architecture**: Follow the `TaintSinkTracker` / `TaintAnalysisUnitStorage` pattern: + +``` +ExternalMethodTracker (like TaintSinkTracker) + └── backed by per-unit storage in TaintAnalysisUnitStorage + └── ConcurrentLinkedQueue + +Wiring: + TaintAnalysisContext (already carries TaintSinkTracker) + └── + val externalMethodTracker: ExternalMethodTracker + + TaintAnalysisUnitRunnerManager + └── spawnNewRunner() + ├── creates ExternalMethodTracker(storage) per unit + └── passes it into TaintAnalysisContext + └── getExternalMethods() (aggregates across all units, like getVulnerabilities()) +``` + +**Collection point**: `JIRMethodCallFlowFunction.applyPassRulesOrCallSkip()` — this is called for every taint fact that encounters an unresolved method. At this point we know: +- The called method (class, name, signature) +- The taint fact position that was passthrough-ed (the `factReader`/`factAp` tells us `this`, `arg(N)`, etc.) +- Whether YAML pass-through rules were found for this method + +The tracker records each encounter. Deduplication (by method identity) and aggregation (merging fact positions, counting call sites) happen at collection time via `ConcurrentHashMap`, same pattern as `TaintSinkTracker`'s `reportedVulnerabilities`. + +**Output format** (`external-methods.yaml`): + +Two separate lists — methods without rules (agent's priority list) and methods with rules (already modeled, for review): + +```yaml +withoutRules: + - method: com.example.lib.DataWrapper#getValue + signature: "() java.lang.String" + factPositions: + - this + callSites: 5 + + - method: com.example.lib.Processor#transform + signature: "(java.lang.Object) java.lang.Object" + factPositions: + - arg(0) + - this + callSites: 12 + +withRules: + - method: java.lang.StringBuilder#append + signature: "(java.lang.String) java.lang.StringBuilder" + factPositions: + - arg(0) + callSites: 87 +``` + +Fields: +- `method`: Fully qualified `Class#method` (class and method name are derivable from this, no need to store separately) +- `signature`: JVM-style `(paramTypes) returnType` +- `factPositions`: Deduplicated list of taint positions that were passthrough-ed at this method +- `callSites`: Number of distinct call sites where this method was encountered with taint + +The split into `withoutRules` / `withRules` reduces the agent's effort — it can focus on `withoutRules` first (methods with no propagation model at all), and only review `withRules` if specific traces look suspicious. + +**Kotlin CLI flag**: `--track-external-methods` (boolean, on `ProjectAnalyzerRunner`). Output filenames are fixed: `<--output-dir>/external-methods-without-rules.yaml` and `<--output-dir>/external-methods-with-rules.yaml`. The path is not configurable. +**Go CLI flag**: `--track-external-methods` (boolean, on `scan`). The two YAMLs are written into the same directory as the SARIF file specified by `-o`. + +### 1.2 Allow `--approximations-config` + `--semgrep-rule-set` Together + +**Problem**: `--config` and `--semgrep-rule-set` are mutually exclusive (`check(options.customConfig == null)` in `ProjectAnalyzer.preloadRules()`). The agent needs both: +- `--semgrep-rule-set` for pattern rules (sources, sinks, vulnerability patterns) +- `--approximations-config` for YAML propagation rules (passThrough) + +**Required change**: Rename the existing `--config` flag to `--approximations-config` to clarify its purpose. When both `--approximations-config` and `--semgrep-rule-set` are provided, load Semgrep rules as the pattern-matching layer and use the custom config to **override** the default propagation config. + +**Implementation**: In `ProjectAnalyzer.preloadRules()`, add a fourth branch: + +```kotlin +if (options.semgrepRuleSet.isNotEmpty() && options.approximationsConfig != null) { + val semgrepRules = loadSemgrepRules(...) + val customConfig = loadSerializedTaintConfig(options.approximationsConfig) + return PreloadedRules.SemgrepRulesWithCustomConfig(semgrepRules, customConfig) +} +``` + +In `loadTaintConfig()`, the new `SemgrepRulesWithCustomConfig` case should: +1. Load default pass-through rules into a `TaintConfiguration` +2. Load the custom config into another `TaintConfiguration` +3. Merge via `JIRCombinedTaintRulesProvider(defaultRules, customRules)` with **OVERRIDE** mode for all categories + +The agent's custom config intentionally overrides the default config — when the agent provides rules for a method, it means the agent has determined the correct behavior and the default should be replaced, not merged. Using EXTEND would mix the agent's corrections with the (possibly wrong) defaults, defeating the purpose. + +**Note**: Despite the YAML config schema supporting a `cleaner` section, the analyzer currently cannot use sanitizers from the config. The `--approximations-config` is used exclusively for `passThrough` rules. + +**Kotlin CLI**: `--approximations-config` is repeatable (`List`). Every occurrence is OVERRIDE-merged with the default config. +**Go CLI**: Exposes `--approximations-config ` on the `scan` command as a repeatable flag; each occurrence is forwarded to the analyzer. + +### 1.3 Custom Code-Based Approximations via CLI + +**Problem**: There is no way to pass custom approximation source code via CLI. The agent needs to provide code-based approximations for complex methods (lambdas, async, callbacks). + +**Required change**: The `--dataflow-approximations ` flag on `scan` accepts a directory of Java source files. The CLI automatically compiles them during scan and passes the resulting `.class` files to the analyzer. + +**Design**: Custom approximations are **dataflow approximations** — they go through the same `useDataflowApproximation` path as the built-in ones (Stream, CompletableFuture, etc.), not through the separate `useOpentaintApproximations` / environment variable mechanism. + +**Implementation in `DataFlowApproximationLoader`**: + +1. Add `customApproximationPaths: List = emptyList()` to `Options` +2. In `approximationFiles()`, append custom paths **after** built-in ones: + +```kotlin +private fun approximationFiles(options: Options): List { + val result = mutableListOf() + if (options.useDataflowApproximation) { + result += listOfNotNull(dataflowApproximationsPath?.toFile()) + } + result += options.customApproximationPaths.map { it.toFile() } + return result +} +``` + +No changes needed to `installApproximations()` or `createCpWithApproximations()` — they already consume whatever `approximationFiles()` returns. The `Approximations` feature indexes `@Approximate` annotations from all paths uniformly. + +**Conflict behavior**: If a custom approximation targets the same class as a built-in one, the `ApproximationIndexer`'s bijection `require()` assertions will fire and **report an error**. This is intentional — the agent must not silently override built-in approximations. If the agent needs different behavior for a class that already has a built-in approximation, this indicates a design problem that should be escalated, not silently resolved. + +**Kotlin CLI flag**: `--dataflow-approximations ` (repeatable, accepts directories of compiled `.class` files) +**Go CLI flag**: `--dataflow-approximations ` on `scan`, accepts source directory, compiles automatically (see 1.4) + +### 1.4 Automatic Approximation Compilation During Scan + +**Problem**: The agent writes Java source files for approximations. These need to be compiled to `.class` files before the analyzer can use them. This should be seamless. + +**Design**: The Go CLI's `--dataflow-approximations ` flag: + +1. Scans the directory for `.java` files +2. If `.java` files are found, compiles them automatically: + - Resolves `opentaint-analyzer.jar` (same tier resolution as `scan`) + - Resolves `javac` from managed JRE + - Resolves additional classpath from the target project's dependencies (from `project.yaml`) + - Runs: `javac -source 8 -target 8 -cp : -d ` +3. If compilation fails, reports errors to the agent and aborts scan +4. If compilation succeeds, passes the compiled `.class` directory to the analyzer via `--dataflow-approximations` +5. If only `.class` files are found (no `.java`), passes them directly (pre-compiled) + +**Why this is better than a separate command**: The agent writes source → runs scan → gets results. One command. No intermediate compile step to manage. If compilation fails, the error is reported in the context of the scan attempt. + +**Error reporting**: The CLI captures `javac` stderr and presents compilation errors clearly: +``` +Approximation compilation failed: + agent-approximations/src/ReactiveProcessor.java:12: error: cannot find symbol + com.example.lib.ReactiveProcessor self = ... + ^ + symbol: class ReactiveProcessor + +Hint: Ensure the library being approximated is in the project's dependencies. +``` + +### 1.5 Rule Test Command via Go CLI + +**Problem**: Running rule tests currently requires invoking the Kotlin analyzer JAR directly with `--debug-run-rule-tests`. The Go CLI doesn't expose this capability. + +**Required change**: Add a `test-rules` command to the Go CLI. + +**Go CLI**: +``` +opentaint agent test-rules \ + --ruleset # required, rule files to test + --output # output directory for test-result.json +``` + +The positional argument is the directory produced by `opentaint compile` (contains `project.yaml`). + +**Behavior**: +1. If input is a project directory (not project.yaml), auto-compile via autobuilder +2. Invoke analyzer JAR with `--debug-run-rule-tests --semgrep-rule-set ` +3. Parse and display `test-result.json` summary +4. Exit with non-zero code if any `falsePositive` or `falseNegative` entries exist + +### 1.6 Rule ID Filter + +**Problem**: The agent creates its own rules and may reference built-in library rules. When running analysis, the agent wants to execute **only its rules** (plus the referenced built-in library rules they depend on), without all other built-in security rules firing and producing noise. + +**Current state**: The `--semgrep-rule-severity` flag filters rules by severity. There is no way to filter by rule ID. When `--ruleset builtin --ruleset ./agent-rules` is used, ALL rules from both rulesets are active. + +**Required change**: Add a `--semgrep-rule-id` filter flag (repeatable) that restricts which rules are active. Only rules whose **full** ID is in the filter are kept; every other rule (including library rules referenced via `refs`) is dropped. The filter is intentionally exact: callers must list every rule they want active. + +The full rule ID has the form `.yaml:`, e.g. +`java/security/my-vuln.yaml:my-vulnerability`. + +**Kotlin CLI flag**: `--semgrep-rule-id ` (repeatable) +**Go CLI flag**: `--rule-id ` (repeatable, on `scan` command) + +**Example**: +```bash +# The agent's own rule plus every library rule it depends on must be listed explicitly. +opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin \ + --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --rule-id java/lib/generic/servlet-untrusted-data-source.yaml:java-servlet-untrusted-data-source \ + --rule-id java/lib/generic/jdbc-sql-sink.yaml:java-jdbc-sql-sink +``` + +**Implementation**: In `SemgrepRuleLoader.loadRules`, the filter is applied per-rule via +`ruleIdAllow(rule, filter)`: a rule is kept iff the filter is empty or +`rule.info.ruleId in filter`. Referenced library rules are NOT auto-included. If +`--semgrep-rule-id` is not provided, all loaded rules are active (current behavior preserved). + +### 1.7 Hidden Local JAR Path Flags (Development) + +**Problem**: The CLI resolves analyzer and autobuilder JARs via a 3-tier path system (bundled > install > cache), tied to a version string embedded at compile time. For development, this means the CLI is unusable without publishing the JARs to GitHub Releases. Developers building the analyzer locally cannot test through the CLI. + +**Current state**: Hidden `--analyzer-version` and `--autobuilder-version` flags exist but only change the version tag used for resolution/download — they still require the JAR to be published. + +**Required change**: Add hidden flags that accept a direct filesystem path to the JAR, bypassing version-based resolution entirely. + +**New hidden persistent flags on root command** (`root.go`): + +| Flag | Type | Viper Key | Description | +|---|---|---|---| +| `--analyzer-jar` | string | `analyzer.jar` | Direct path to analyzer JAR (bypasses version resolution) | +| `--autobuilder-jar` | string | `autobuilder.jar` | Direct path to autobuilder JAR (bypasses version resolution) | + +**Implementation in `scan.go`**: +```go +func ensureAnalyzerAvailable() (string, error) { + // Direct path takes priority — skip all resolution and download + if directPath := globals.Config.Analyzer.Jar; directPath != "" { + if _, err := os.Stat(directPath); err != nil { + return "", fmt.Errorf("analyzer JAR not found at %s", directPath) + } + return directPath, nil + } + // Fall back to version-based resolution + analyzerJarPath, err := utils.GetAnalyzerJarPath(globals.Config.Analyzer.Version) + // ...existing logic... +} +``` + +Identical pattern in `compile.go` for `ensureAutobuilderAvailable()`. + +**Usage**: +```bash +# Use locally-built analyzer +opentaint scan --project-model ./opentaint-project -o report.sarif \ + --analyzer-jar ./core/build/libs/opentaint-project-analyzer.jar + +# Use locally-built autobuilder +opentaint compile ./project -o ./opentaint-project \ + --autobuilder-jar ./autobuilder/build/libs/opentaint-project-auto-builder.jar + +# Both +opentaint scan --project-model ./opentaint-project -o report.sarif \ + --analyzer-jar /path/to/local/analyzer.jar \ + --autobuilder-jar /path/to/local/autobuilder.jar + +# Via environment variables (viper binding) +export OPENTAINT_ANALYZER_JAR=/path/to/local/analyzer.jar +opentaint scan --project-model ./opentaint-project -o report.sarif +``` + +**Note**: These flags are hidden (not shown in `--help`) — they are for development use only. When set, no download is attempted. + +### 1.8 Builtin Rules Path Command + +**Problem**: The agent needs to read built-in rules (to understand existing sources/sinks/patterns, to reference them via `refs`, and to decide whether custom rules are needed). Rules are a separate artifact (`opentaint-rules.tar.gz`) resolved via a 3-tier path system (bundled > install > cache) and downloaded lazily. The agent has no way to discover where the rules directory is on disk. + +**Required change**: Add a `rules-path` command to the Go CLI that prints the resolved filesystem path to the built-in rules directory, downloading the rules if not already present. + +**Go CLI**: +``` +opentaint agent rules-path +``` + +**Behavior**: +1. Resolves the rules path using the same 3-tier logic as `scan --ruleset builtin` +2. If rules are not present on disk, downloads `opentaint-rules.tar.gz` from GitHub Releases and extracts +3. Prints the absolute path to stdout (e.g., `/home/user/.opentaint/install/lib/rules`) +4. Exit code 0 on success + +**Usage by the agent**: +```bash +# Get the rules path +RULES_DIR=$(opentaint agent rules-path) + +# Read builtin rules to understand available sources/sinks +ls $RULES_DIR/java/lib/generic/ +cat $RULES_DIR/java/lib/generic/servlet-untrusted-data-source.yaml + +# Read builtin security rules to check coverage +ls $RULES_DIR/java/security/ +``` + +**Implementation**: New command in `cli/cmd/rules_path.go`. Reuses `utils.GetRulesPath()` and the existing download logic from `scan.go:214-224`. + +### 1.8 Test Project Bootstrap Command + +**Problem**: Creating a test project for rule testing requires setting up a Gradle project with the correct `opentaint-sast-test-util` dependency. The agent needs to know how to obtain this JAR and wire it into the build script. This is error-prone. + +**Required change**: Add an `init-test-project` command to the Go CLI that bootstraps a ready-to-use test project. + +**Go CLI**: +``` +opentaint agent init-test-project \ + [--dependency ] ... # additional maven dependencies for test code +``` + +**Behavior**: +1. Creates the directory structure: + ``` + / + ├── build.gradle.kts + ├── settings.gradle.kts + ├── libs/ + │ └── opentaint-sast-test-util.jar + └── src/main/java/test/ + └── .gitkeep + ``` +2. Downloads `opentaint-sast-test-util.jar` from the same artifact source as the analyzer (GitHub releases, tiered resolution: bundled > install > cache). Alternatively, extracts it from the `opentaint-analyzer.jar` if bundled inside. +3. Generates `build.gradle.kts` referencing the local JAR: + ```kotlin + plugins { java } + java { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + } + repositories { mavenCentral() } + dependencies { + compileOnly(files("libs/opentaint-sast-test-util.jar")) + // User-requested dependencies: + compileOnly("javax.servlet:javax.servlet-api:4.0.1") + } + ``` +4. Generates `settings.gradle.kts` with a project name derived from the directory. +5. Prints next steps: + ``` + Test project created at ./agent-test-project + + Next steps: + 1. Add test samples in src/main/java/test/ + 2. Build: opentaint compile ./agent-test-project -o ./agent-test-compiled + 3. Test: opentaint agent test-rules ./agent-test-compiled --ruleset -o ./test-output + ``` + +--- + +## 2. Go CLI API Design + +All agent operations flow through the Go CLI (`opentaint`). The design adds 4 new commands and 4 new flags to existing commands. + +### 2.1 Complete Command Reference (Existing + New) + +#### `opentaint compile` (existing) +Build project and create project model. +``` +opentaint compile -o [--dry-run] +``` + +#### `opentaint project` (existing) +Create project model from precompiled artifacts. +``` +opentaint project \ + --output \ + --source-root \ + --classpath ... \ + --package ... \ + [--dependency ...] +``` + +#### `opentaint scan` (existing, extended) +Run analysis. **New flags** marked with ★. +``` +opentaint scan [] \ + [--project-model ] \ + -o \ + [--ruleset builtin] \ + [--ruleset ] \ + [--rule-id ] ★ filter: only run these rule IDs (repeatable) + [--approximations-config ] ★ YAML passThrough config, OVERRIDE mode (repeatable) + [--dataflow-approximations ] ★ approximation source/class dir (auto-compiles .java) + [--track-external-methods] ★ write external-methods-{without,with}-rules.yaml next to SARIF + [--timeout ] \ + [--max-memory ] \ + [--severity ] \ + [--code-flow-limit ] +``` + +Flag interactions: +- Pass either the source project as a positional argument (will be compiled) or a pre-compiled project model via `--project-model ` (contains `project.yaml`). Not both. +- `--ruleset` and `--approximations-config` can be used together (engine change 1.2). +- `--dataflow-approximations` accepts `.java` source dir (auto-compiled) or `.class` dir (passed directly). +- `--track-external-methods` is a boolean; output filenames and directory are fixed (next to the SARIF). +- `--rule-id` takes the FULL rule ID `.yaml:`; rules whose full ID is not listed are dropped, including library rules referenced via join-mode `refs`. + +#### `opentaint agent test-rules` ★ NEW +Run rule tests against a test project. Registered under the `agent` command group. +``` +opentaint agent test-rules \ + --ruleset \ + -o \ + [--timeout ] \ + [--max-memory ] +``` + +The positional argument is the **directory** that contains `project.yaml` (e.g. +`./agent-test-compiled`), not the `project.yaml` file path. + +Output: `/test-result.json` with verdicts per test sample. + +Exit codes: +- `0`: All tests pass (only `success` and `disabled` entries) +- `1`: Test failures exist (`falsePositive`, `falseNegative`, or `skipped` entries) + +Prints a summary table: +``` +Rule Tests Summary: + ✓ success: 12 + ✗ false positive: 1 + ✗ false negative: 2 + - skipped: 0 + - disabled: 1 +``` + +#### `opentaint agent rules-path` ★ NEW +Print the resolved filesystem path to built-in rules (downloads if needed). +Registered under the `agent` command group. +``` +opentaint agent rules-path +``` + +Prints absolute path to stdout. The agent uses this to read built-in rule YAML files. + +#### `opentaint agent init-test-project` ★ NEW +Bootstrap a test project for rule testing. +Registered under the `agent` command group. +``` +opentaint agent init-test-project \ + [--dependency ] ... +``` + +Downloads `opentaint-sast-test-util.jar`, generates `build.gradle.kts` and directory structure. + +#### `opentaint summary` (existing) +Print SARIF results. +``` +opentaint summary \ + [--show-findings] \ + [--show-code-snippets] \ + [--verbose-flow] +``` + +### 2.2 Command Builder Changes + +The `AnalyzerBuilder` in `command_builder.go` needs new methods for the new flags: + +```go +func (b *AnalyzerBuilder) SetApproximationsConfig(configPath string) *AnalyzerBuilder +func (b *AnalyzerBuilder) AddDataflowApproximations(approxPath string) *AnalyzerBuilder +func (b *AnalyzerBuilder) SetExternalMethodsOutput(path string) *AnalyzerBuilder +func (b *AnalyzerBuilder) SetDebugRunRuleTests(enabled bool) *AnalyzerBuilder +func (b *AnalyzerBuilder) AddRuleIdFilter(ruleId string) *AnalyzerBuilder +``` + +These translate to: +| Go CLI flag | Analyzer CLI flag | +|---|---| +| `--approximations-config ` (repeatable) | `--approximations-config ` (repeatable) | +| `--dataflow-approximations ` | `--dataflow-approximations ` (compiled classes dir) | +| `--track-external-methods` | `--track-external-methods` | +| `--rule-id ` | `--semgrep-rule-id ` | +| (`opentaint agent test-rules` command) | `--debug-run-rule-tests` | + +The Go CLI `AnalyzerBuilder` methods: `AddApproximationsConfig(path)`, +`AddDataflowApproximations(path)`, `SetTrackExternalMethods(bool)`, `AddRuleID(id)`, +`EnableRunRuleTests()`. + +--- + +## 3. Agent Skills + +Skills are self-contained instruction sets the agent loads to perform specific operations. Each skill contains: purpose, prerequisites, step-by-step instructions, CLI commands with examples, expected outputs, and error handling. + +### 3.1 Skill: `build-project` + +**Purpose**: Build a target project and prepare it for analysis. + +**Instructions**: + +1. Determine the project type by examining the project directory: + - Look for `build.gradle`, `build.gradle.kts` → Gradle project + - Look for `pom.xml` → Maven project + - Look for pre-compiled JARs → classpath mode + +2. For Gradle/Maven projects, use the autobuilder: + ```bash + opentaint compile /path/to/project -o ./opentaint-project + ``` + +3. For pre-compiled artifacts, use the project command: + ```bash + opentaint project \ + --output ./opentaint-project \ + --source-root /path/to/src \ + --classpath /path/to/app.jar \ + --package com.example.app + ``` + +4. Verify `./opentaint-project/project.yaml` was created. + +5. If compilation fails: + - Check build tool is installed and project builds independently + - Check Java version compatibility (OpenTaint uses Java 21) + - Examine the autobuilder log for specific errors + - Fall back to `opentaint project` with pre-compiled artifacts + +**Expected output**: A directory containing `project.yaml` and compiled class files. + +### 3.2 Skill: `discover-entry-points` + +**Purpose**: Identify entry points and attack surface of the target project by reading source code and analyzing project structure. + +**Instructions**: + +The agent discovers entry points itself — no special CLI command is needed. The analysis engine automatically selects entry points (all public/protected methods for generic projects, Spring endpoints for Spring projects). The agent's role is to **understand** the attack surface to plan rules effectively. + +1. Read the project's source code and identify: + - **Spring controllers**: Search for `@RestController`, `@Controller` annotations. Read `@RequestMapping`, `@GetMapping`, `@PostMapping`, `@PutMapping`, `@DeleteMapping` to understand routes and parameters. + - **Servlet handlers**: Search for classes extending `HttpServlet` with `doGet`, `doPost`, `doPut`, `doDelete` methods. + - **JAX-RS endpoints**: Search for `@Path`, `@GET`, `@POST`, `@PUT`, `@DELETE` annotations. + - **Message handlers**: Search for `@JmsListener`, `@KafkaListener`, `@RabbitListener` annotations. + - **CLI entry points**: Find `main(String[])` methods that process external input (command-line args, stdin, files). + - **Scheduled tasks**: Search for `@Scheduled` methods that read external state (files, DB, network). + +2. For each entry point, determine: + - What external data it receives (HTTP params, headers, body, path variables, message payloads) + - What operations it performs (DB queries, file I/O, command execution, HTTP responses, serialization) + - Which vulnerability classes are relevant (SQLi, XSS, command injection, path traversal, SSRF, XXE, etc.) + +3. Examine the project's dependencies (from `build.gradle`, `pom.xml`, or `project.yaml`) to understand: + - Which frameworks are used (Spring, Servlets, JAX-RS, etc.) + - Which database libraries (JDBC, JPA, MyBatis, etc.) + - Which template engines (Thymeleaf, JSP, Freemarker) + - Which HTTP clients (OkHttp, Apache HttpClient, RestTemplate) + +4. Record findings in `opentaint-analysis-plan.md`. + +**Note**: The engine handles entry point selection automatically during analysis: +- For `--project-kind spring-web`: Uses Spring endpoint discovery (`SpringWebProject.kt`) +- For `--project-kind unknown` (default): Uses all public/protected methods from public project classes +- For targeted analysis: Agent can use `--debug-run-analysis-on-selected-entry-points "com.example.Class#method"` via the Kotlin CLI directly + +### 3.3 Skill: `create-rule` + +**Purpose**: Create a pattern rule for detecting a specific vulnerability class. + +**Instructions**: + +1. Determine the rule architecture: + - **Source**: Where does untrusted data enter? (HTTP params, headers, body, etc.) + - **Sink**: Where is the data dangerous? (SQL query, command exec, file path, HTML output, etc.) + - **Sanitizers**: What makes the data safe? (encoding, escaping, parameterized queries, etc.) + +2. Read built-in rules to check existing coverage: + ```bash + RULES_DIR=$(opentaint agent rules-path) + # List available source/sink library rules + ls $RULES_DIR/java/lib/generic/ + ls $RULES_DIR/java/lib/spring/ + # Read specific rules to understand their patterns and IDs + cat $RULES_DIR/java/lib/generic/servlet-untrusted-data-source.yaml + cat $RULES_DIR/java/lib/generic/jdbc-sql-sink.yaml + # List existing security rules to check what's already covered + ls $RULES_DIR/java/security/ + ``` + - Sources: `$RULES_DIR/java/lib/generic/` and `$RULES_DIR/java/lib/spring/` + - Sinks: Same directories + - If existing rules cover the needed source/sink, skip to step 4 (join-mode composition referencing built-in rules) + +3. If new source/sink patterns are needed, create library rules: + + **Source library rule** (`agent-rules/java/lib/my-source.yaml`): + ```yaml + rules: + - id: my-custom-source + options: + lib: true + severity: NOTE + message: Custom untrusted data source + languages: [java] + patterns: + - pattern-either: + - patterns: + - pattern: | + $RETURNTYPE $METHOD(HttpServletRequest $UNTRUSTED, ...) { ... } + - metavariable-pattern: + metavariable: $METHOD + pattern-either: + - pattern: doGet + - pattern: doPost + ``` + + **Sink library rule** (`agent-rules/java/lib/my-sink.yaml`): + ```yaml + rules: + - id: my-custom-sink + options: + lib: true + severity: NOTE + message: Custom dangerous operation + languages: [java] + mode: taint + pattern-sinks: + - patterns: + - pattern-either: + - pattern: (java.sql.Statement $S).executeQuery($UNTRUSTED) + - pattern: (java.sql.Statement $S).execute($UNTRUSTED) + - focus-metavariable: $UNTRUSTED + ``` + +4. Create the join-mode security rule (`agent-rules/java/security/my-vuln.yaml`): + ```yaml + rules: + - id: my-vulnerability + severity: ERROR + message: >- + Untrusted data flows to dangerous operation + metadata: + cwe: CWE-89 + short-description: SQL Injection via untrusted input + languages: [java] + mode: join + join: + refs: + - rule: java/lib/my-source.yaml#my-custom-source + as: source + - rule: java/lib/my-sink.yaml#my-custom-sink + as: sink + on: + - 'source.$UNTRUSTED -> sink.$UNTRUSTED' + ``` + + You can reference built-in library rules — they will be auto-included when the agent's rule is active: + ```yaml + refs: + - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + as: servlet-source + - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source + as: spring-source + ``` + +5. For simple structural patterns (no dataflow), use default mode: + ```yaml + rules: + - id: weak-crypto + severity: WARNING + message: Use of weak cryptographic algorithm + metadata: + cwe: CWE-327 + short-description: Weak cryptography + languages: [java] + patterns: + - pattern: Cipher.getInstance("DES") + ``` + +6. When running analysis, use `--rule-id` to activate only the agent's rules. The flag + takes the FULL rule ID (`.yaml:`). Library rules referenced + via `refs` are NOT auto-included — the filter drops every rule whose full ID is missing, + so either list every library rule explicitly or omit `--rule-id` to keep all loaded rules active. + ```bash + opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --rule-id java/security/weak-crypto.yaml:weak-crypto + ``` + +**Constraints**: +- Rule IDs must be globally unique +- Library rules must have `options.lib: true` and `severity: NOTE` +- Security rules must have `metadata.cwe` and `metadata.short-description` +- Source/sink metavariable names must match across `refs` + `on` clauses (convention: `$UNTRUSTED`) +- The `rule:` path in `refs` is relative to the ruleset root; when using `--ruleset`, the root is the ruleset directory +- `--rule-id` does not auto-include rules referenced via `refs`; list every library rule explicitly or omit the flag + +### 3.4 Skill: `test-rule` + +**Purpose**: Create test samples for a rule and verify it works correctly. + +**Instructions**: + +1. Bootstrap a test project: + ```bash + opentaint agent init-test-project ./agent-test-project \ + --dependency "javax.servlet:javax.servlet-api:4.0.1" + ``` + + This creates the directory structure with `build.gradle.kts`, `settings.gradle.kts`, and the `opentaint-sast-test-util.jar` in `libs/`. Add more `--dependency` flags for additional libraries your test code needs (e.g., Spring, JDBC drivers). + +2. Create test samples in `src/main/java/test/MyVulnTest.java`: + ```java + package test; + + import org.opentaint.sast.test.util.PositiveRuleSample; + import org.opentaint.sast.test.util.NegativeRuleSample; + import javax.servlet.http.HttpServletRequest; + import java.sql.Connection; + import java.sql.Statement; + + public class MyVulnTest { + + private Connection db; + + @PositiveRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") + public void vulnerable(HttpServletRequest req) throws Exception { + String input = req.getParameter("id"); + Statement stmt = db.createStatement(); + stmt.executeQuery("SELECT * FROM users WHERE id = " + input); + } + + @NegativeRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") + public void safe(HttpServletRequest req) throws Exception { + String input = req.getParameter("id"); + var pstmt = db.prepareStatement("SELECT * FROM users WHERE id = ?"); + pstmt.setString(1, input); + pstmt.executeQuery(); + } + } + ``` + + Annotation fields: + - `value`: Path to the rule YAML file, relative to the ruleset root + - `id`: The rule ID within that file + +3. Build the test project: + ```bash + opentaint compile ./agent-test-project -o ./agent-test-compiled + ``` + +4. Run rule tests (positional argument is the project-model **directory**, not `project.yaml`): + ```bash + opentaint agent test-rules ./agent-test-compiled \ + --ruleset ./agent-rules \ + -o ./test-output + ``` + +5. Check results in `./test-output/test-result.json`: + ```json + { + "success": [ + {"className": "test.MyVulnTest", "methodName": "vulnerable", + "rule": {"rulePath": "java/security/my-vuln.yaml", "ruleId": "my-vulnerability"}}, + {"className": "test.MyVulnTest", "methodName": "safe", + "rule": {"rulePath": "java/security/my-vuln.yaml", "ruleId": "my-vulnerability"}} + ], + "falsePositive": [], + "falseNegative": [], + "skipped": [], + "disabled": [] + } + ``` + +6. If tests fail: + - `falseNegative` (positive sample didn't trigger): Rule patterns too narrow, or missing source/sink patterns + - `falsePositive` (negative sample triggered): Rule patterns too broad, need `pattern-not` or sanitizer exclusion + - `skipped` (rule not found): Check that `value` path and `id` in annotations match the rule file + +7. Fix the rule or test samples and repeat from step 3. + +### 3.5 Skill: `run-analysis` + +**Purpose**: Run OpenTaint analysis on the target project and collect results. + +**Instructions**: + +1. Run analysis with the agent's rules. Pass the pre-compiled model via `--project-model`: + ```bash + opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin \ + --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --track-external-methods \ + --timeout 900s \ + --severity warning,error + ``` + + If you have custom passThrough config: + ```bash + opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --approximations-config ./agent-config/custom-propagators.yaml \ + --track-external-methods + ``` + + If you have approximation source files: + ```bash + opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --approximations-config ./agent-config/custom-propagators.yaml \ + --dataflow-approximations ./agent-approximations/src \ + --track-external-methods + ``` + + The `--dataflow-approximations` flag accepts a directory. If it contains `.java` files, the CLI auto-compiles them using `opentaint-analyzer.jar` as the classpath (which contains `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) plus the target project's dependencies. Compilation errors are reported before analysis starts. + +2. View results summary: + ```bash + opentaint summary ./results/report.sarif --show-findings --verbose-flow + ``` + +3. Collect outputs for the decision loop (all next to the SARIF file): + - `./results/report.sarif` — vulnerability findings with traces + - `./results/external-methods-without-rules.yaml` — priority list (killed dataflow) + - `./results/external-methods-with-rules.yaml` — already modeled (for review) + + The `--track-external-methods` flag is a boolean; filenames and directory are fixed. + +### 3.6 Skill: `analyze-findings` + +**Purpose**: Interpret SARIF findings and decide on TP/FP/FN actions. + +**Instructions**: + +For each finding in the SARIF report: + +1. **Read the trace** (codeFlows in SARIF): + - First location = source (where tainted data enters) + - Last location = sink (where tainted data is used dangerously) + - Intermediate locations = dataflow path + +2. **Classify the finding**: + + **TRUE POSITIVE (TP)**: The trace represents a real vulnerability. + - The source genuinely provides attacker-controlled data + - The sink genuinely performs a dangerous operation with that data + - No sanitization occurs between source and sink + - Action: Generate a proof-of-concept, document in `vulnerabilities.md` + + **FALSE POSITIVE (FP) — fixable via Rule**: The trace is invalid due to over-broad pattern matching. + - The sink pattern is too broad (matches safe methods) + - A sanitizer is not recognized by the pattern + - The source pattern matches non-attacker-controlled data + - Action: Add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or narrow `metavariable-regex`. Update tests. Re-run. + + **FALSE POSITIVE (FP) — fixable via Approximation** (non-preferred): The trace is invalid due to imprecise taint propagation modeling. + - A library method is modeled as propagating taint when it actually transforms data in a way that neutralizes the threat + - Action: Override the passThrough approximation to remove the incorrect propagation. Re-run. + +3. **For external methods list** (FN discovery): + + Focus on the `withoutRules` section first — these methods have no propagation model at all. Classify each: + + **PROPAGATOR**: The method passes taint from input to output. + - Example: `DataWrapper#getValue()` — taint on `this` flows to `result` + - Action: Create a `passThrough` YAML rule via `--approximations-config` + + **TRANSFORMER with lambdas**: The method invokes callbacks/lambdas. + - Example: `ReactiveStream#map(Function)` — taint flows through the function + - Action: Create a code-based approximation via `--dataflow-approximations` + + **NEUTRAL**: The method is irrelevant to taint flow (logging, metrics, sanitizers, etc.) + - Action: Skip — the default call-to-return passthrough is correct + + The `withRules` section can be reviewed if specific traces look suspicious (existing rules may be incorrect or incomplete). + +### 3.7 Skill: `create-yaml-config` + +**Purpose**: Create YAML propagation rules (passThrough) for library methods. + +**Instructions**: + +1. Create a YAML config file (`agent-config/custom-propagators.yaml`): + + **Simple getter propagation** (taint on `this` → `result`): + ```yaml + passThrough: + - function: com.example.lib.DataWrapper#getValue + copy: + - from: this + to: result + ``` + + **Argument-to-result propagation**: + ```yaml + passThrough: + - function: com.example.lib.Converter#convert + copy: + - from: arg(0) + to: result + ``` + + **Builder pattern** (taint flows through builder chain): + ```yaml + passThrough: + - function: com.example.lib.Builder#withName + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + ``` + + **Object with internal state** (using ``): + ```yaml + passThrough: + # Store taint + - function: com.example.lib.Container#put + copy: + - from: arg(0) + to: + - this + - .com.example.lib.Container##java.lang.Object + # Retrieve taint + - function: com.example.lib.Container#get + copy: + - from: + - this + - .com.example.lib.Container##java.lang.Object + to: result + ``` + + **Package-wide getter pattern** (all getters in a package): + ```yaml + passThrough: + - function: + package: com.example.dto + class: + pattern: .* + name: + pattern: get.* + copy: + - from: this + to: result + ``` + + **Conditional propagation**: + ```yaml + passThrough: + - function: com.example.lib.Parser#parse + condition: + typeIs: + position: arg(0) + type: java.lang.String + copy: + - from: arg(0) + to: result + ``` + +2. Use with analysis (`--approximations-config` is repeatable, each OVERRIDE-merged): + ```bash + opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --approximations-config ./agent-config/custom-propagators.yaml + ``` + +**Constraints**: +- The `function` field format is `package.Class#method` (simple) or `{package, class, name}` (complex with patterns) +- Position values: `this`, `result`, `arg(0)`, `arg(1)`, ..., `arg(*)`, `any(classifier)` +- Position modifiers (YAML list): `.[*]` (array element), `.ClassName#fieldName#fieldType` (field access), `.` (synthetic internal state) +- `overrides: true` (default) means the rule applies to subclasses too +- Custom config rules **override** the default config when passed via `--approximations-config` +- Only `passThrough` rules are supported; the analyzer cannot use sanitizers from the config + +### 3.8 Skill: `create-approximation` + +**Purpose**: Create code-based approximations for complex library methods (lambdas, async, callbacks). + +**Instructions**: + +1. Create a Java source file for the approximation in `agent-approximations/src/`: + + ```java + package agent.approximations; + + import org.opentaint.ir.approximation.annotation.Approximate; + // For methods with lambda parameters: + import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + // For non-deterministic branching: + import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + + import java.util.function.Function; + + @Approximate(com.example.lib.ReactiveProcessor.class) + public class ReactiveProcessor { + + // Model: taint on this flows through the function to the result + public Object transform(@ArgumentTypeContext Function fn) throws Throwable { + com.example.lib.ReactiveProcessor self = + (com.example.lib.ReactiveProcessor) (Object) this; + if (OpentaintNdUtil.nextBool()) return null; // async failure path + Object input = self.getValue(); + return fn.apply(input); + } + + // Model: taint on this flows to the consumer argument + public void subscribe(@ArgumentTypeContext java.util.function.Consumer consumer) { + com.example.lib.ReactiveProcessor self = + (com.example.lib.ReactiveProcessor) (Object) this; + if (OpentaintNdUtil.nextBool()) { + consumer.accept(self.getValue()); + } + } + } + ``` + + **Key patterns**: + - `@Approximate(TargetClass.class)` or `@ApproximateByName("fqn")` on the class + - `(TargetClass) (Object) this` cast to access the real object's methods + - `@ArgumentTypeContext` on lambda/functional interface parameters + - `OpentaintNdUtil.nextBool()` for non-deterministic branching (models both success and failure paths) + - Java 8 source compatibility + - One approximation class per target class (strict bijection) + - Must NOT target a class that already has a built-in approximation (will error) + +2. Use with analysis — compilation is automatic: + ```bash + opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --dataflow-approximations ./agent-approximations/src + ``` + + The `--dataflow-approximations` flag detects `.java` files and auto-compiles them using: + - `opentaint-analyzer.jar` as classpath (contains `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) + - Target project's dependencies from `project.yaml` (so `javac` can resolve the library being approximated) + + If compilation fails, errors are reported before analysis starts. + If a custom approximation targets a class that already has a built-in approximation, the analyzer reports an error and aborts. + +**When to use code-based approximations vs YAML config**: +- Lambda/callback invocation → code-based (YAML cannot model lambda calls) +- Non-deterministic branching (async paths) → code-based (`OpentaintNdUtil.nextBool()`) +- Complex internal state with multiple method interactions → code-based (more expressive) +- Simple from→to propagation → YAML passThrough (simpler, faster to write) + +### 3.9 Skill: `generate-poc` + +**Purpose**: Generate a proof-of-concept exploit for a confirmed true positive vulnerability. + +**Instructions**: + +1. Extract the vulnerability trace from SARIF: + - Source: entry point method and parameter (e.g., HTTP request parameter `id`) + - Path: sequence of method calls through which taint flows + - Sink: dangerous operation (e.g., SQL query execution) + +2. Construct a PoC: + - For HTTP-based sources: a `curl` command or HTTP request demonstrating the attack + - For command injection: the payload that achieves command execution + - For SQL injection: the payload that demonstrates data extraction + - For path traversal: the payload that reads/writes unauthorized files + - For XSS: the payload that executes JavaScript in the browser + +3. Document in `vulnerabilities.md`: + ```markdown + ## VULN-001: SQL Injection in UserController.getUser + + **Severity**: Critical (CWE-89) + **Location**: `src/main/java/com/example/controller/UserController.java:45` + **Rule**: `my-vulnerability` + + ### Description + User-controlled input from HTTP parameter `id` flows unsanitized into + a SQL query via `Statement.executeQuery()`. + + ### Trace + 1. **Source**: `UserController.getUser()` — `request.getParameter("id")` (line 42) + 2. **Flow**: String concatenation `"SELECT * FROM users WHERE id = " + input` (line 44) + 3. **Sink**: `Statement.executeQuery(query)` (line 45) + + ### Proof of Concept + ``` + curl "http://target:8080/api/users/1' OR '1'='1" + ``` + + ### Remediation + Use parameterized queries: + ```java + PreparedStatement pstmt = conn.prepareStatement("SELECT * FROM users WHERE id = ?"); + pstmt.setString(1, input); + ``` + ``` + +--- + +## 4. Meta Prompt + +The meta prompt orchestrates the agent through the complete workflow. It references skills and implements the decision loop from task.md steps 1-10. + +``` +You are a security analysis agent using OpenTaint, a dataflow-based SAST analyzer for JVM projects. +OpenTaint is available on PATH as `opentaint`. + +Your goal: Perform comprehensive security analysis of a target project, discovering all vulnerabilities +and minimizing false positives and false negatives. + +## Your Capabilities + +You can: +- Generate pattern rules (YAML) defining vulnerability patterns (sources, sinks, sanitizers) +- Generate YAML passThrough config for library methods +- Generate code-based approximations (Java stubs) for complex methods with lambdas/callbacks +- Test rules against sample code +- Run analysis and interpret results +- Override existing passThrough rules via --approximations-config + +You cannot: +- Modify framework support (Spring detection is automatic) +- Change the analysis algorithm itself +- Add sanitizers via YAML config (sanitizers are handled via pattern rules only) +- Override built-in code-based approximations (will error on conflict) + +## Available Skills + +Load these skills as needed during your workflow: +- `build-project` — Build and prepare the target project +- `discover-entry-points` — Analyze source code to find entry points and attack surface +- `create-rule` — Create pattern rules for vulnerability detection +- `test-rule` — Test rules with annotated samples +- `run-analysis` — Run OpenTaint and collect results +- `analyze-findings` — Interpret SARIF findings and external methods list +- `create-yaml-config` — Create YAML passThrough rules +- `create-approximation` — Create code-based approximations for complex methods +- `generate-poc` — Generate proof-of-concept for confirmed vulnerabilities + +## Workflow + +### Phase 1: Project Setup + +1. Load `build-project` skill. Build the target project: + ```bash + opentaint compile -o ./opentaint-project + ``` + +2. Load `discover-entry-points` skill. Read source code, analyze project structure, identify: + - Framework in use (Spring, Servlets, JAX-RS, etc.) + - Entry points (controllers, servlets, listeners, CLI entry points) + - Attack surface (what external data enters, what dangerous operations are performed) + - Relevant vulnerability classes to test + +3. Create `opentaint-analysis-plan.md` with: + - Project description and technology stack + - Identified entry points and attack surface + - Relevant vulnerability classes to test + - Plan for rule creation + +### Phase 2: Rule Creation + +4. For each relevant vulnerability class (SQLi, XSS, command injection, path traversal, etc.): + + a. Load `create-rule` skill. Read built-in rules to check coverage: + ```bash + RULES_DIR=$(opentaint agent rules-path) + ls $RULES_DIR/java/security/ # existing security rules + ls $RULES_DIR/java/lib/generic/ # available source/sink libraries + ``` + + b. Create rules in `./agent-rules/`: + - Library rules in `./agent-rules/java/lib/` + - Security rules in `./agent-rules/java/security/` + - Reference built-in library rules where applicable + + c. Load `test-rule` skill. Bootstrap and test: + ```bash + opentaint agent init-test-project ./agent-test-project --dependency "javax.servlet:javax.servlet-api:4.0.1" + ``` + - Add `@PositiveRuleSample` and `@NegativeRuleSample` test methods + - Run: `opentaint agent test-rules ./agent-test-compiled --ruleset ./agent-rules -o ./test-output` + - Fix until `test-result.json` shows zero failures + +### Phase 3: Analysis Loop + +5. Load `run-analysis` skill. Run initial analysis: + ```bash + opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --track-external-methods + ``` + +6. Load `analyze-findings` skill. For each SARIF finding: + + **If TRUE POSITIVE**: + - Load `generate-poc` skill + - Generate proof-of-concept exploit + - Document in `vulnerabilities.md` + + **If FALSE POSITIVE (fixable via rule)**: + - Load `create-rule` skill + - Add `pattern-not`, `pattern-sanitizers`, or narrow patterns + - Load `test-rule` skill — add `@NegativeRuleSample` for the FP case + - Re-run tests, then goto step 5 + + **If FALSE POSITIVE (fixable via approximation)** (non-preferred): + - Load `create-yaml-config` skill + - Override the passThrough approximation to remove incorrect propagation + - Goto step 5 + +7. For each entry in `external-methods.yaml` (focus on `withoutRules` section): + + Classify the method (propagator / transformer / neutral): + + **If PROPAGATOR** (simple taint flow): + - Load `create-yaml-config` skill + - Create passThrough rule + - Goto step 5 + + **If TRANSFORMER** (involves lambdas/callbacks): + - Load `create-approximation` skill + - Create approximation source file in `./agent-approximations/src/` + - Goto step 5 + + **If NEUTRAL** (logging, metrics, sanitizers, irrelevant): + - Skip — default passthrough is correct + +### Phase 4: Finalization + +8. When the agent determines analysis is complete: + - All traces have been reviewed and classified + - All identified FP have been fixed + - All relevant external methods have been addressed + - Remaining external methods are classified as NEUTRAL + +9. Update `opentaint-analysis-plan.md` with final status. + +10. Deliver: + - `vulnerabilities.md` — confirmed vulnerabilities with PoCs + - `opentaint-analysis-plan.md` — analysis log + - `./agent-rules/` — custom pattern rules + - `./agent-config/` — custom YAML passThrough rules (if any) + - `./agent-approximations/src/` — custom code-based approximation sources (if any) + +## Working Directory Layout + +``` +/ +├── opentaint-analysis-plan.md # Analysis progress tracking +├── vulnerabilities.md # Confirmed vulnerabilities +├── opentaint-project/ # Compiled project model +│ └── project.yaml +├── agent-rules/ # Agent-created pattern rules +│ └── java/ +│ ├── security/ # Executable security rules +│ └── lib/ # Reusable library rules +├── agent-config/ # Agent-created YAML passThrough config +│ └── custom-propagators.yaml +├── agent-approximations/ # Agent-created code-based approximations +│ └── src/ # Java source files (auto-compiled by CLI) +├── agent-test-project/ # Test project (bootstrapped via init-test-project) +│ ├── build.gradle.kts +│ ├── libs/opentaint-sast-test-util.jar +│ └── src/main/java/test/ +└── results/ # Analysis outputs + ├── report.sarif + └── external-methods.yaml +``` + +## Decision Priorities + +When fixing FN: +1. YAML passThrough rule (simplest, covers most cases) +2. Code-based approximation (for lambdas/callbacks only) +3. Rule pattern fix (only if FN is due to missing source/sink pattern, not missing propagation) + +When fixing FP: +1. Rule fix via `pattern-not` / `pattern-sanitizers` (preferred, scoped to one rule) +2. PassThrough override (non-preferred, affects all rules globally) + +## Iteration Strategy + +- Process findings batch by batch (don't try to fix everything at once) +- After each batch of fixes, re-run analysis and check for regressions +- Group external methods by library/package for efficient batch processing +- Stop when the external methods list stabilizes (no new entries between iterations) + and all SARIF findings are classified +``` + +--- + +## Appendix A: Sample Test Project Bootstrap + +```bash +# Bootstrap test project with servlet API dependency +opentaint agent init-test-project ./agent-test-project \ + --dependency "javax.servlet:javax.servlet-api:4.0.1" + +# Add test samples +cat > ./agent-test-project/src/main/java/test/SampleTest.java << 'EOF' +package test; + +import org.opentaint.sast.test.util.PositiveRuleSample; +import org.opentaint.sast.test.util.NegativeRuleSample; + +public class SampleTest { + + @PositiveRuleSample(value = "java/security/my-rule.yaml", id = "my-rule-id") + public void vulnerableMethod() { + // Write code that demonstrates the vulnerability pattern + } + + @NegativeRuleSample(value = "java/security/my-rule.yaml", id = "my-rule-id") + public void safeMethod() { + // Write code that is safe (sanitized, parameterized, etc.) + } +} +EOF + +# Build and test — the test-rules argument is the project-model directory +opentaint compile ./agent-test-project -o ./agent-test-compiled +opentaint agent test-rules ./agent-test-compiled \ + --ruleset ./agent-rules -o ./test-output +cat ./test-output/test-result.json +``` + +--- + +## Appendix B: SARIF Output Structure (Quick Reference) + +```json +{ + "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1/schema/sarif-schema-2.1.0.json", + "version": "2.1.0", + "runs": [{ + "tool": { "driver": { "name": "OpenTaint", "rules": [...] } }, + "results": [{ + "ruleId": "my-vulnerability", + "level": "error", + "message": { "text": "Untrusted data flows to SQL query" }, + "locations": [{ + "physicalLocation": { + "artifactLocation": { "uri": "src/main/java/com/example/UserController.java" }, + "region": { "startLine": 45, "startColumn": 9 } + } + }], + "codeFlows": [{ + "threadFlows": [{ + "locations": [ + { "location": { "physicalLocation": { "region": { "startLine": 42 } }, "message": { "text": "source" } } }, + { "location": { "physicalLocation": { "region": { "startLine": 44 } }, "message": { "text": "flow" } } }, + { "location": { "physicalLocation": { "region": { "startLine": 45 } }, "message": { "text": "sink" } } } + ] + }] + }], + "relatedLocations": [...] + }] + }] +} +``` + +--- + +## Appendix C: External Methods Output Structure (Quick Reference) + +```yaml +withoutRules: + - method: com.example.lib.DataWrapper#getValue + signature: "() java.lang.String" + factPositions: + - this + callSites: 5 + + - method: com.example.lib.Processor#transform + signature: "(java.lang.Object) java.lang.Object" + factPositions: + - arg(0) + - this + callSites: 12 + +withRules: + - method: java.lang.StringBuilder#append + signature: "(java.lang.String) java.lang.StringBuilder" + factPositions: + - arg(0) + callSites: 87 +``` diff --git a/agent-mode/impl/agent-mode-impl.md b/agent-mode/impl/agent-mode-impl.md new file mode 100644 index 000000000..bcb5cc3da --- /dev/null +++ b/agent-mode/impl/agent-mode-impl.md @@ -0,0 +1,1275 @@ +# Agent Mode — Implementation Plan + +This document translates the design in `agent-mode/design/agent-mode-design.md` into a concrete, file-level implementation plan. It covers every module that needs modification, where skills and meta-prompt live, how they're distributed, and how to test without the CLI installed on PATH. + +--- + +## Table of Contents + +1. [Implementation Overview](#1-implementation-overview) +2. [Kotlin Analyzer Changes](#2-kotlin-analyzer-changes) + - 2.1 [External Methods Tracker](#21-external-methods-tracker) + - 2.2 [Rule ID Filter](#22-rule-id-filter) + - 2.3 [Approximations Config + Semgrep Rules Together](#23-approximations-config--semgrep-rules-together) + - 2.4 [Custom Dataflow Approximations Path](#24-custom-dataflow-approximations-path) + - 2.5 [New CLI Flags Wiring](#25-new-cli-flags-wiring) +3. [Go CLI Changes](#3-go-cli-changes) + - 3.1 [New Flags on `scan` Command](#31-new-flags-on-scan-command) + - 3.2 [Approximation Auto-Compilation](#32-approximation-auto-compilation) + - 3.3 [`opentaint agent` Command Group](#33-opentaint-agent-command-group) + - 3.4 [Hidden Dev Flags](#34-hidden-dev-flags) + - 3.5 [AnalyzerBuilder Extensions](#35-analyzerbuilder-extensions) +4. [Skills and Meta-Prompt Location](#4-skills-and-meta-prompt-location) + - 4.1 [Source Layout](#41-source-layout) + - 4.2 [Bundling and Distribution](#42-bundling-and-distribution) + - 4.3 [Runtime Access (Direct File Read)](#43-runtime-access-direct-file-read) +5. [Testing Without CLI on PATH](#5-testing-without-cli-on-path) + - 5.1 [Hidden `--analyzer-jar` / `--autobuilder-jar` Flags](#51-hidden---analyzer-jar----autobuilder-jar-flags) + - 5.2 [Environment Variables](#52-environment-variables) + - 5.3 [Python Test Infrastructure (conftest.py)](#53-python-test-infrastructure-conftestpy) + - 5.4 [Local Dev Workflow](#54-local-dev-workflow) +6. [Implementation Order](#6-implementation-order) +7. [File Change Summary](#7-file-change-summary) + +--- + +## 1. Implementation Overview + +The implementation spans two main codebases (Kotlin analyzer, Go CLI) plus a new `agent/` directory for distributable agent artifacts (skills, meta-prompt). The `agent-mode/` directory remains for design docs and tests only — it is not distributed. + +| Area | Scope | Effort | +|------|-------|--------| +| Kotlin analyzer | 4 features: external methods tracker, rule ID filter, combined config+rules, custom approximations path | Medium-Large | +| Go CLI | 4 new flags on `scan`, `opentaint agent` command group (5 subcommands), hidden dev flags, auto-compilation logic | Medium | +| Skills + Meta-prompt | 9 skill files + 1 meta-prompt in `agent/`, bundled into CLI distribution | Small | +| Distribution | Release pipeline changes to bundle `lib/agent/` | Small | +| Test infrastructure | Already built in Phase 3; needs hidden flag support | Small | + +--- + +## 2. Kotlin Analyzer Changes + +### 2.1 External Methods Tracker + +**Goal**: Collect all external (unresolved) method calls during taint analysis and output them as YAML. + +#### New files + +**`core/opentaint-dataflow-core/opentaint-dataflow/src/main/kotlin/org/opentaint/dataflow/ap/ifds/taint/ExternalMethodTracker.kt`** + +```kotlin +package org.opentaint.dataflow.ap.ifds.taint + +import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent.ConcurrentLinkedQueue + +data class ExternalMethodRecord( + val method: String, // "com.example.Foo#bar" + val signature: String, // JVM-style: "(Ljava/lang/String;)V" + val factPositions: Set, // "this", "arg(0)", "arg(1)", "result" + val passRulesApplied: Boolean, // true if passThrough rules were actually applied for this method +) + +class ExternalMethodTracker { + // Dedup key: method+signature+factPosition + private val seen = ConcurrentHashMap.newKeySet() + + // Per-method aggregation: method+signature → (factPositions, passRulesApplied, callSiteCount) + private val records = ConcurrentHashMap() + + fun report( + method: String, + signature: String, + factPosition: String, + passRulesApplied: Boolean, + ) { + val key = "$method|$signature|$factPosition" + if (!seen.add(key)) return + + records.computeIfAbsent("$method|$signature") { + ExternalMethodAggregation(method, signature, passRulesApplied) + }.apply { + addFactPosition(factPosition) + if (passRulesApplied) markPassRulesApplied() + } + } + + fun reportCallSite(method: String, signature: String) { + records.computeIfAbsent("$method|$signature") { + ExternalMethodAggregation(method, signature, false) + }.incrementCallSites() + } + + fun getResults(): ExternalMethodResults { + val withoutRules = mutableListOf() + val withRules = mutableListOf() + + for (agg in records.values) { + val record = agg.toRecord() + if (record.passRulesApplied) withRules.add(record) else withoutRules.add(record) + } + + return ExternalMethodResults( + withoutRules.sortedByDescending { it.callSites }, + withRules.sortedByDescending { it.callSites }, + ) + } +} +``` + +**Pattern**: Modeled after `TaintSinkTracker` (same file location, same `ConcurrentHashMap` dedup pattern, same wiring through storage). + +#### Modified files + +| File | Change | +|------|--------| +| `core/opentaint-dataflow-core/opentaint-dataflow/.../taint/TaintAnalysisUnitStorage.kt` | Add `externalMethodTracker: ExternalMethodTracker` field | +| `core/opentaint-dataflow-core/opentaint-dataflow/.../taint/TaintAnalysisContext.kt` | Expose `externalMethodTracker` from storage | +| `core/opentaint-dataflow-core/opentaint-jvm-dataflow/.../JIRMethodCallFlowFunction.kt` | In `applyPassRulesOrCallSkip()` at line ~617: after resolving `callExpr.callee`, call `externalMethodTracker.report(...)` | +| `core/opentaint-jvm-sast-dataflow/.../TaintAnalysisUnitRunnerManager.kt` | Wire `ExternalMethodTracker` into unit storage creation (same pattern as `TaintSinkTracker`) | +| `core/src/main/kotlin/.../project/ProjectAnalyzer.kt` | After analysis completes, if `externalMethodsOutput` path is set, serialize tracker results to YAML | +| `core/src/main/kotlin/.../project/ProjectAnalysisOptions.kt` | Add `externalMethodsOutput: Path? = null` field | + +#### Integration point in `JIRMethodCallFlowFunction` + +The key insertion point is `applyPassRulesOrCallSkip()`. The existing code already computes whether pass-through rules were applied via `passThroughFacts.onSome { ... }` (line 651). We use this result directly — no separate lookup needed. + +```kotlin +// EXISTING: line 617 +val method = callExpr.callee + +// EXISTING: lines 642-649 +val passThroughFacts = applyPassThrough( + config, method, statement, + fact = passFactReader.factAp, + simpleConditionEvaluator, passEvaluator +) + +// NEW: report to tracker using the actual applyPassThrough result +val tracker = analysisContext.taint.externalMethodTracker +if (tracker != null) { + val methodName = "${method.declaringClass.name}#${method.name}" + val signature = method.jvmSignature + val factPosition = resolveFactPosition(factAp) // "this", "arg(0)", etc. + val passRulesApplied = passThroughFacts.isSome + tracker.report(methodName, signature, factPosition, passRulesApplied) +} +``` + +The `resolveFactPosition` helper maps `FinalFactAp` base to a human-readable position string. The `passRulesApplied` boolean comes directly from checking whether `applyPassThrough` returned `Some` (rules matched and were applied) vs `None` (no matching rules). This is more accurate than checking whether rules *exist* for the method — it reflects whether rules actually *fired* for the given fact position. + +#### Output format + +YAML file written by `ProjectAnalyzer` after analysis: + +```yaml +withoutRules: + - method: "org.apache.pdfbox.pdmodel.PDDocument#save" + signature: "(Ljava/io/OutputStream;)V" + factPositions: ["arg(0)", "this"] + callSites: 12 + - method: "com.fasterxml.jackson.databind.ObjectMapper#readValue" + signature: "(Ljava/lang/String;Ljava/lang/Class;)Ljava/lang/Object;" + factPositions: ["arg(0)", "result"] + callSites: 7 + +withRules: + - method: "java.lang.String#substring" + signature: "(I)Ljava/lang/String;" + factPositions: ["this", "result"] + callSites: 45 +``` + +Serialization uses `kaml` (already a dependency) or `snakeyaml` — consistent with `Project.kt` pattern. + +--- + +### 2.2 Rule ID Filter + +**Goal**: Filter loaded rules by ID. Same mechanism as the existing severity filter. + +#### Modified files + +| File | Change | +|------|--------| +| `core/opentaint-java-querylang/.../SemgrepRuleLoader.kt` | Add `ruleIdFilter` parameter to `loadRules()`, add ID check to `skip()` predicate | +| `core/src/main/kotlin/.../project/ProjectAnalysisOptions.kt` | Add `semgrepRuleId: List = emptyList()` field | +| `core/src/main/kotlin/.../runner/ProjectAnalyzerRunner.kt` | Add `--semgrep-rule-id` Clikt option, wire to `ProjectAnalysisOptions` | + +#### Implementation in `SemgrepRuleLoader.loadRules()` + +The existing `loadRules()` (line 106) already has a `skip()` predicate that filters by severity and library/disabled status: + +```kotlin +fun loadRules(severity: List = emptyList()): RuleLoadResult { + fun Rule<*>.skip(): Boolean = + info.isDisabled || info.isLibraryRule || !ruleSeverityAllow(this, severity) +``` + +The rule ID filter works the same way — just another predicate in `skip()`: + +```kotlin +fun loadRules( + severity: List = emptyList(), + ruleIdFilter: List = emptyList(), +): RuleLoadResult { + fun Rule<*>.skip(): Boolean = + info.isDisabled || info.isLibraryRule + || !ruleSeverityAllow(this, severity) + || !ruleIdAllow(this, ruleIdFilter) + // ... rest unchanged +} + +private fun ruleIdAllow(rule: Rule<*>, ruleIdFilter: List): Boolean = + ruleIdFilter.isEmpty() || rule.id in ruleIdFilter +``` + +Library rules (`isLibraryRule = true`) are already excluded by the existing `skip()` logic — they are loaded but not run directly. They only participate when referenced by join-mode rules. The ID filter does not need to walk `refs` because the existing rule resolution pipeline already handles library rule inclusion for join-mode rules independently of the skip filter. + +--- + +### 2.3 Approximations Config + Semgrep Rules Together + +**Goal**: Remove the mutual exclusion between `--config` (approximations) and `--semgrep-rule-set`. + +#### Modified files + +| File | Change | +|------|--------| +| `core/src/main/kotlin/.../project/ProjectAnalyzer.kt` | Modify `preloadRules()` at lines 54-80: add 4th variant, remove `check()` at line 62 | +| `core/src/main/kotlin/.../runner/ProjectAnalyzerRunner.kt` | Rename `--config` flag to `--approximations-config` | + +#### Implementation in `ProjectAnalyzer.preloadRules()` + +Current code (lines 54-80): +```kotlin +private sealed interface PreloadedRules { + data class SemgrepRules(val rules: List) : PreloadedRules + data class Custom(val config: SerializedTaintConfig) : PreloadedRules + data object DefaultRules : PreloadedRules +} + +private fun preloadRules(): PreloadedRules { + if (options.semgrepRuleSet.isNotEmpty()) { + check(options.customConfig == null) { "Unsupported custom config" } // ← REMOVE THIS + val loadedRules = options.loadSemgrepRules() + ruleMetadatas += loadedRules.rulesWithMeta.map { it.second } + return PreloadedRules.SemgrepRules(loadedRules.rulesWithMeta.map { it.first }) + } + // ... +} +``` + +New code: +```kotlin +private sealed interface PreloadedRules { + data class SemgrepRules(val rules: List) : PreloadedRules + data class Custom(val config: SerializedTaintConfig) : PreloadedRules + data class SemgrepRulesWithCustomConfig( + val rules: List, + val config: SerializedTaintConfig, + ) : PreloadedRules + data object DefaultRules : PreloadedRules +} + +private fun preloadRules(): PreloadedRules { + val customConfig = options.customConfig?.let { cfg -> + cfg.inputStream().use { loadSerializedTaintConfig(it) } + } + + if (options.semgrepRuleSet.isNotEmpty()) { + val loadedRules = options.loadSemgrepRules(ruleIdFilter = options.semgrepRuleId) + ruleMetadatas += loadedRules.rulesWithMeta.map { it.second } + val rules = loadedRules.rulesWithMeta.map { it.first } + + return if (customConfig != null) { + PreloadedRules.SemgrepRulesWithCustomConfig(rules, customConfig) + } else { + PreloadedRules.SemgrepRules(rules) + } + } + + if (customConfig != null) { + return PreloadedRules.Custom(customConfig) + } + + return PreloadedRules.DefaultRules +} +``` + +Then in `loadTaintConfig()` (lines 82-103), add a branch for `SemgrepRulesWithCustomConfig`: + +```kotlin +is PreloadedRules.SemgrepRulesWithCustomConfig -> { + // Load default config, override with custom, then layer semgrep rules on top + val defaultConfig = loadDefaultConfig() + val mergedConfig = JIRCombinedTaintRulesProvider(defaultConfig, rules.config) // OVERRIDE mode + // Then apply semgrep rules to mergedConfig + // ... (same as SemgrepRules branch but with mergedConfig as base) +} +``` + +The OVERRIDE semantics means: custom config entries for the same method signature replace (not extend) the default config entries. This is already how `JIRCombinedTaintRulesProvider` works — later entries take precedence. + +--- + +### 2.4 Custom Dataflow Approximations Path + +**Goal**: Accept external directories of compiled approximation `.class` files via CLI flag. + +#### Modified files + +| File | Change | +|------|--------| +| `core/opentaint-jvm-sast-dataflow/.../DataFlowApproximationLoader.kt` | Add `customApproximationPaths: List` to `Options`, append in `approximationFiles()` | +| `core/src/main/kotlin/.../project/ProjectAnalysisOptions.kt` | Already has `approximationOptions: DataFlowApproximationLoader.Options` — new paths flow through this | +| `core/src/main/kotlin/.../runner/ProjectAnalyzerRunner.kt` | Add `--dataflow-approximations` Clikt option | + +#### Implementation in `DataFlowApproximationLoader` + +Current `Options` (line 20-23): +```kotlin +data class Options( + val useDataflowApproximation: Boolean = true, + val useOpentaintApproximations: Boolean = false, +) +``` + +New `Options`: +```kotlin +data class Options( + val useDataflowApproximation: Boolean = true, + val useOpentaintApproximations: Boolean = false, + val customApproximationPaths: List = emptyList(), +) +``` + +Modified `approximationFiles()` (lines 52-63): +```kotlin +private fun approximationFiles(options: Options): List { + val result = mutableListOf() + if (options.useDataflowApproximation) { + result += listOfNotNull(dataflowApproximationsPath?.toFile()) + } + if (options.useOpentaintApproximations) { + result += approximationPaths.presentPaths.map { File(it) } + } + // NEW: append custom paths AFTER built-in ones + result += options.customApproximationPaths.map { it.toFile() } + return result +} +``` + +Custom paths are appended **after** built-in ones. The `ApproximationIndexer` (which scans `@Approximate` annotations) maintains a bijection map from target class → approximation class. If a custom approximation targets the same class as a built-in one, the bijection's `require()` will throw — this is intentional (no silent override of built-in approximations). + +--- + +### 2.5 New CLI Flags Wiring + +**File**: `core/src/main/kotlin/org/opentaint/jvm/sast/runner/ProjectAnalyzerRunner.kt` + +Current flag definitions (lines 23-52): + +```kotlin +class ProjectAnalyzerRunner : AbstractAnalyzerRunner(name = "analyze") { + // existing flags... + private val config: Path? by option("--config").file(mustExist = true) + private val semgrepRuleSet: List by option("--semgrep-rule-set").file(mustExist = true).multiple() + private val semgrepSeverity: List by option("--semgrep-rule-severity").enum().multiple() + // ... +} +``` + +New flags to add: + +```kotlin +// Rename --config → --approximations-config (keep --config as hidden alias for backward compat) +private val approximationsConfig: Path? by option("--approximations-config", "--config") + .file(mustExist = true) + +// New: Rule ID filter +private val semgrepRuleId: List by option("--semgrep-rule-id") + .multiple() + +// New: External methods output path +private val externalMethodsOutput: Path? by option("--external-methods-output") + .newFile() + +// New: Custom dataflow approximation directories +private val dataflowApproximations: List by option("--dataflow-approximations") + .file(mustExist = true, canBeDir = true) + .multiple() +``` + +In `analyzeProject()` (line ~80), wire to `ProjectAnalysisOptions`: + +```kotlin +val options = ProjectAnalysisOptions( + customConfig = approximationsConfig, // was: config + semgrepRuleSet = semgrepRuleSet, + semgrepRuleId = semgrepRuleId, // NEW + externalMethodsOutput = externalMethodsOutput, // NEW + // ... + approximationOptions = DataFlowApproximationLoader.Options( + useDataflowApproximation = true, + customApproximationPaths = dataflowApproximations, // NEW + ), +) +``` + +--- + +## 3. Go CLI Changes + +### 3.1 New Flags on `scan` Command + +**File**: `cli/cmd/scan.go` + +Add to `init()`: + +```go +// New flags +var RuleId []string +var ApproximationsConfig string +var DataflowApproximations string +var ExternalMethods string + +func init() { + rootCmd.AddCommand(scanCmd) + // ... existing flags ... + + // NEW + scanCmd.Flags().StringArrayVar(&RuleId, "rule-id", nil, + "Filter active rules by ID (repeatable)") + scanCmd.Flags().StringVar(&ApproximationsConfig, "approximations-config", "", + "Path to YAML passThrough approximations config (OVERRIDE mode)") + scanCmd.Flags().StringVar(&DataflowApproximations, "dataflow-approximations", "", + "Directory of .java or .class approximation files") + scanCmd.Flags().StringVar(&ExternalMethods, "external-methods", "", + "Output path for external methods YAML list") +} +``` + +In the `scan` command's `Run` function, before building the analyzer command: + +```go +// Handle --dataflow-approximations auto-compilation +compiledApproxDir := "" +if DataflowApproximations != "" { + compiledApproxDir = compileApproximationsIfNeeded(DataflowApproximations, projectPath) +} + +// Build analyzer command +nativeBuilder := NewAnalyzerBuilder(). + // ... existing ... + +// NEW: wire flags +for _, id := range RuleId { + nativeBuilder.AddRuleIdFilter(id) +} +if ApproximationsConfig != "" { + nativeBuilder.SetApproximationsConfig(ApproximationsConfig) +} +if compiledApproxDir != "" { + nativeBuilder.AddDataflowApproximations(compiledApproxDir) +} else if DataflowApproximations != "" { + nativeBuilder.AddDataflowApproximations(DataflowApproximations) +} +if ExternalMethods != "" { + nativeBuilder.SetExternalMethodsOutput(ExternalMethods) +} +``` + +--- + +### 3.2 Approximation Auto-Compilation + +**New file**: `cli/cmd/compile_approximations.go` + +This function handles the `--dataflow-approximations` flag when the directory contains `.java` sources: + +```go +package cmd + +func compileApproximationsIfNeeded(approxDir string, projectPath string) string { + // 1. Scan dir for .java files + javaFiles := findJavaFiles(approxDir) + if len(javaFiles) == 0 { + return approxDir // .class files only — pass directly + } + + // 2. Resolve javac (from managed JRE or system) + javaRunner := java.NewJavaRunner().TrySystem().TrySpecificVersion(globals.DefaultJavaVersion) + javacPath := resolveJavac(javaRunner) + + // 3. Resolve analyzer JAR (for @Approximate annotation classes) + analyzerJar := resolveAnalyzerJar() + + // 4. Resolve project dependencies (from project.yaml) + projectDeps := resolveProjectDeps(projectPath) + + // 5. Create temp output dir + outputDir := createTempDir("opentaint-approx-compiled-") + + // 6. Build classpath: analyzer.jar + project deps + classpath := analyzerJar + string(os.PathListSeparator) + strings.Join(projectDeps, string(os.PathListSeparator)) + + // 7. Run javac + args := []string{ + "-source", "8", "-target", "8", + "-cp", classpath, + "-d", outputDir, + } + args = append(args, javaFiles...) + + err := exec.Command(javacPath, args...).Run() + if err != nil { + // Report javac stderr, abort + out.Fatalf("Failed to compile approximations: %v\n%s", err, stderr) + } + + return outputDir +} +``` + +**Dependencies**: Uses `java.NewJavaRunner()` (existing) for JDK resolution. Uses `utils.GetAnalyzerJarPath()` (existing) for analyzer JAR. Reads `project.yaml` via `utils/project/config.go` (existing) for dependency classpath. + +**Key detail**: The `-source 8 -target 8` ensures compatibility with the analyzer's classloader. The analyzer JAR is needed on the compilation classpath because it contains `@Approximate`, `@ApproximateByName`, `@ArgumentTypeContext`, and `OpentaintNdUtil` classes. + +--- + +### 3.3 `opentaint agent` Command Group + +All agent-related commands are grouped under `opentaint agent`: + +| Command | Purpose | +|---------|---------| +| `opentaint agent skills` | Print resolved path to bundled skill files | +| `opentaint agent prompt` | Print resolved path to the meta-prompt file | +| `opentaint agent rules-path` | Print resolved path to builtin rules (downloads on demand) | +| `opentaint agent test-rules` | Run rule tests against annotated test samples | +| `opentaint agent init-test-project` | Bootstrap a rule test project with build.gradle.kts and test utility JAR | + +#### Parent command + +**New file**: `cli/cmd/agent.go` + +```go +package cmd + +import "github.com/spf13/cobra" + +var agentCmd = &cobra.Command{ + Use: "agent", + Short: "Agent mode commands: skills, prompts, rule testing", +} + +func init() { + rootCmd.AddCommand(agentCmd) +} +``` + +#### `opentaint agent skills` + +**New file**: `cli/cmd/agent_skills.go` + +```go +package cmd + +var agentSkillsCmd = &cobra.Command{ + Use: "skills", + Short: "Print the resolved path to bundled agent skill files", + Args: cobra.NoArgs, + Run: func(cmd *cobra.Command, args []string) { + skillsDir, err := utils.GetAgentPath("skills") + if err != nil { + out.Fatalf("Skills not found: %v", err) + } + fmt.Println(skillsDir) + }, +} + +func init() { + agentCmd.AddCommand(agentSkillsCmd) +} +``` + +#### `opentaint agent prompt` + +**New file**: `cli/cmd/agent_prompt.go` + +```go +package cmd + +var agentPromptCmd = &cobra.Command{ + Use: "prompt", + Short: "Print the resolved path to the agent meta-prompt", + Args: cobra.NoArgs, + Run: func(cmd *cobra.Command, args []string) { + promptPath, err := utils.GetAgentPath("meta-prompt.md") + if err != nil { + out.Fatalf("Meta-prompt not found: %v", err) + } + fmt.Println(promptPath) + }, +} + +func init() { + agentCmd.AddCommand(agentPromptCmd) +} +``` + +#### `opentaint agent rules-path` + +**New file**: `cli/cmd/agent_rules_path.go` + +```go +package cmd + +var agentRulesPathCmd = &cobra.Command{ + Use: "rules-path", + Short: "Print the resolved path to builtin rules", + Args: cobra.NoArgs, + Run: func(cmd *cobra.Command, args []string) { + version := globals.Config.Rules.Version + if version == "" { + version = globals.RulesBindVersion + } + + rulesPath, err := utils.GetRulesPath(version) + if err != nil { + err = ensureArtifactAvailable("rules", version, rulesPath, downloadRules) + if err != nil { + out.Fatalf("Failed to resolve rules: %v", err) + } + rulesPath, _ = utils.GetRulesPath(version) + } + + fmt.Println(rulesPath) + }, +} + +func init() { + agentCmd.AddCommand(agentRulesPathCmd) +} +``` + +Reuses `utils.GetRulesPath()` and download logic already present in `scan.go:214-224`. Downloads rules on demand (same 3-tier resolution: bundled > install > cache). + +#### `opentaint agent test-rules` + +**New file**: `cli/cmd/agent_test_rules.go` + +```go +package cmd + +var TestRulesRuleset string +var TestRulesOutput string + +var agentTestRulesCmd = &cobra.Command{ + Use: "test-rules ", + Short: "Run rule tests against annotated test samples", + Args: cobra.ExactArgs(1), + Annotations: map[string]string{"PrintConfig": "true"}, + Run: func(cmd *cobra.Command, args []string) { + projectPath := args[0] + + // 1. If projectPath is a directory (not project.yaml), auto-compile + if isDirectory(projectPath) { + projectPath = autoCompile(projectPath, TestRulesOutput) + } + + // 2. Build analyzer command with --debug-run-rule-tests + builder := NewAnalyzerBuilder(). + SetProject(projectPath). + SetOutputDir(TestRulesOutput). + SetDebugRunRuleTests(true) + + if TestRulesRuleset != "" { + builder.AddRuleSet(resolveRuleset(TestRulesRuleset)) + } + + // 3. Execute + err := executeAnalyzer(builder) + + // 4. Parse test-result.json + result := parseTestResult(filepath.Join(TestRulesOutput, "test-result.json")) + + // 5. Print summary table + printTestSummary(result) + + // 6. Exit code: 0 if only success/disabled, 1 if any falsePositive/falseNegative/skipped + if result.HasFailures() { + os.Exit(1) + } + }, +} + +func init() { + agentCmd.AddCommand(agentTestRulesCmd) + agentTestRulesCmd.Flags().StringVar(&TestRulesRuleset, "ruleset", "", "Path to rules directory") + agentTestRulesCmd.Flags().StringVarP(&TestRulesOutput, "output", "o", "", "Output directory") + _ = agentTestRulesCmd.MarkFlagRequired("output") +} +``` + +#### `opentaint agent init-test-project` + +**New file**: `cli/cmd/agent_init_test_project.go` + +```go +package cmd + +var InitTestProjectDeps []string + +var agentInitTestProjectCmd = &cobra.Command{ + Use: "init-test-project ", + Short: "Bootstrap a rule test project with build.gradle.kts and test utility JAR", + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + outputDir := args[0] + + // 1. Create directory structure + os.MkdirAll(filepath.Join(outputDir, "libs"), 0755) + os.MkdirAll(filepath.Join(outputDir, "src", "main", "java", "test"), 0755) + + // 2. Resolve and copy opentaint-sast-test-util.jar + testUtilJar := resolveTestUtilJar() + copyFile(testUtilJar, filepath.Join(outputDir, "libs", "opentaint-sast-test-util.jar")) + + // 3. Generate build.gradle.kts + generateBuildGradle(outputDir, InitTestProjectDeps) + + // 4. Generate settings.gradle.kts + generateSettingsGradle(outputDir) + }, +} + +func init() { + agentCmd.AddCommand(agentInitTestProjectCmd) + agentInitTestProjectCmd.Flags().StringArrayVar(&InitTestProjectDeps, "dependency", nil, + "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") +} +``` + +The `opentaint-sast-test-util.jar` is bundled in the CLI distribution as `lib/opentaint-sast-test-util.jar`. It's tiny (just 2 annotation classes). The release workflow's "Download bundled artifacts" step fetches it alongside the analyzer and autobuilder JARs. + +Generated `build.gradle.kts`: +```kotlin +plugins { + java +} + +java { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 +} + +repositories { + mavenCentral() +} + +dependencies { + compileOnly(files("libs/opentaint-sast-test-util.jar")) + // User-specified dependencies: + // compileOnly("javax.servlet:javax.servlet-api:4.0.1") +} +``` + +#### Resolution logic + +**File**: `cli/internal/utils/opentaint_home.go` + +```go +// GetAgentPath resolves a path within the bundled agent directory. +// Checks bundled tier (exe-dir/lib/agent/) then install tier (~/.opentaint/install/lib/agent/). +func GetAgentPath(subpath string) (string, error) { + exeDir := getExeDir() + bundled := filepath.Join(exeDir, "lib", "agent", subpath) + if _, err := os.Stat(bundled); err == nil { + return bundled, nil + } + + install := filepath.Join(OpentaintHome(), "install", "lib", "agent", subpath) + if _, err := os.Stat(install); err == nil { + return install, nil + } + + return "", fmt.Errorf("agent resource '%s' not found; reinstall opentaint or run 'opentaint pull'", subpath) +} +``` + +--- + +### 3.4 Hidden Dev Flags + +**File**: `cli/cmd/root.go` + +Add persistent flags (hidden): + +```go +func init() { + // ... existing PersistentFlags ... + + // Hidden: direct JAR paths for development + rootCmd.PersistentFlags().StringVar(&globals.Config.Analyzer.JarPath, "analyzer-jar", "", + "Direct path to analyzer JAR (bypasses version resolution)") + rootCmd.PersistentFlags().StringVar(&globals.Config.Autobuilder.JarPath, "autobuilder-jar", "", + "Direct path to autobuilder JAR (bypasses version resolution)") + + rootCmd.PersistentFlags().MarkHidden("analyzer-jar") + rootCmd.PersistentFlags().MarkHidden("autobuilder-jar") + + _ = viper.BindPFlag("analyzer.jar", rootCmd.PersistentFlags().Lookup("analyzer-jar")) + _ = viper.BindPFlag("autobuilder.jar", rootCmd.PersistentFlags().Lookup("autobuilder-jar")) +} +``` + +**File**: `cli/internal/globals/global.go` + +Add fields to `ConfigType`: + +```go +type ConfigType struct { + // ... existing ... + Analyzer struct { + Version string `mapstructure:"version"` + JarPath string `mapstructure:"jar"` // NEW + } + Autobuilder struct { + Version string `mapstructure:"version"` + JarPath string `mapstructure:"jar"` // NEW + } +} +``` + +**File**: `cli/cmd/artifacts.go` or `scan.go` + +In `ensureAnalyzerAvailable()` or wherever the JAR path is resolved: + +```go +func resolveAnalyzerJar() string { + // NEW: check direct path first + if globals.Config.Analyzer.JarPath != "" { + if _, err := os.Stat(globals.Config.Analyzer.JarPath); err == nil { + return globals.Config.Analyzer.JarPath + } + out.Fatalf("Analyzer JAR not found at specified path: %s", globals.Config.Analyzer.JarPath) + } + + // Existing: 3-tier resolution + return existingResolutionLogic() +} +``` + +**Environment variables**: Via viper's env binding, these are also settable as: +- `OPENTAINT_ANALYZER_JAR=/path/to/jar` +- `OPENTAINT_AUTOBUILDER_JAR=/path/to/jar` + +--- + +### 3.5 AnalyzerBuilder Extensions + +**File**: `cli/cmd/command_builder.go` + +Add fields to `AnalyzerBuilder`: + +```go +type AnalyzerBuilder struct { + *BaseCommandBuilder + // ... existing fields ... + + // NEW + ruleIdFilters []string + approximationsConfig string + dataflowApproximations []string + externalMethodsOutput string + debugRunRuleTests bool +} +``` + +Add setter methods: + +```go +func (a *AnalyzerBuilder) AddRuleIdFilter(ruleId string) *AnalyzerBuilder { + a.ruleIdFilters = append(a.ruleIdFilters, ruleId) + return a +} + +func (a *AnalyzerBuilder) SetApproximationsConfig(path string) *AnalyzerBuilder { + a.approximationsConfig = path + return a +} + +func (a *AnalyzerBuilder) AddDataflowApproximations(path string) *AnalyzerBuilder { + a.dataflowApproximations = append(a.dataflowApproximations, path) + return a +} + +func (a *AnalyzerBuilder) SetExternalMethodsOutput(path string) *AnalyzerBuilder { + a.externalMethodsOutput = path + return a +} + +func (a *AnalyzerBuilder) SetDebugRunRuleTests(enabled bool) *AnalyzerBuilder { + a.debugRunRuleTests = enabled + return a +} +``` + +Modify `BuildNativeCommand()`: + +```go +func (a *AnalyzerBuilder) BuildNativeCommand() []string { + flags := []string{...} // existing + + // NEW: append new flags + for _, id := range a.ruleIdFilters { + flags = append(flags, "--semgrep-rule-id", id) + } + if a.approximationsConfig != "" { + flags = append(flags, "--approximations-config", a.approximationsConfig) + } + for _, path := range a.dataflowApproximations { + flags = append(flags, "--dataflow-approximations", path) + } + if a.externalMethodsOutput != "" { + flags = append(flags, "--external-methods-output", a.externalMethodsOutput) + } + if a.debugRunRuleTests { + flags = append(flags, "--debug-run-rule-tests") + } + + return flags +} +``` + +--- + +## 4. Skills and Meta-Prompt Location + +### 4.1 Source Layout + +Skills and meta-prompt are Markdown files in a dedicated `agent/` directory at the repository root. This directory contains **distributable artifacts only** — design docs and tests remain in `agent-mode/`. + +``` +opentaint/ +├── agent/ # Distributable agent artifacts +│ ├── meta-prompt.md # The system prompt for the agent +│ └── skills/ # Individual skill files +│ ├── build-project.md # Skill 3.1 +│ ├── discover-entry-points.md # Skill 3.2 +│ ├── create-rule.md # Skill 3.3 +│ ├── test-rule.md # Skill 3.4 +│ ├── run-analysis.md # Skill 3.5 +│ ├── analyze-findings.md # Skill 3.6 +│ ├── create-yaml-config.md # Skill 3.7 +│ ├── create-approximation.md # Skill 3.8 +│ └── generate-poc.md # Skill 3.9 +├── agent-mode/ # Design docs and tests (NOT distributed) +│ ├── design/ +│ ├── impl/ +│ ├── info/ +│ └── test/ +└── ... +``` + +Each skill file is a self-contained Markdown document with: +- **Title and purpose** — what the skill does +- **Prerequisites** — what must be true before using this skill +- **Procedure** — step-by-step instructions with CLI commands +- **Examples** — concrete YAML/Java/command examples +- **Troubleshooting** — common errors and fixes + +The meta-prompt (`meta-prompt.md`) is the top-level system prompt that references skills by name and defines the 4-phase agent workflow. + +### 4.2 Bundling and Distribution + +Skills are bundled into the CLI distribution archive as `lib/agent/`, following the same pattern as rules. + +**Archive layout**: + +``` +opentaint_linux_amd64.tar.gz +├── opentaint # Go binary +└── lib/ + ├── opentaint-project-analyzer.jar + ├── opentaint-project-auto-builder.jar + ├── opentaint-sast-test-util.jar # NEW + ├── rules/ # Extracted rules + └── agent/ # NEW + ├── meta-prompt.md + └── skills/ + ├── build-project.md + ├── create-rule.md + ├── test-rule.md + ├── run-analysis.md + ├── analyze-findings.md + ├── create-yaml-config.md + ├── create-approximation.md + ├── discover-entry-points.md + └── generate-poc.md +``` + +All three archive variants (`cli`, `default`, `full`) include agent files. + +**Release workflow change** (`.github/workflows/release-cli.yaml`): + +Add step after "Download bundled artifacts": + +```yaml +- name: Bundle agent skills and prompt + run: | + mkdir -p cli/lib/agent/skills + cp agent/meta-prompt.md cli/lib/agent/ + cp agent/skills/*.md cli/lib/agent/skills/ + +- name: Bundle test utility JAR + run: | + # Download from analyzer release (built alongside the analyzer) + cp opentaint-sast-test-util.jar cli/lib/ +``` + +No changes to installation scripts needed. The `install.sh`/`install.ps1` scripts download and extract the archive — agent files and test-util JAR come along automatically. + +### 4.3 Runtime Access (Direct File Read) + +The agent reads skill files directly from the filesystem. The meta-prompt instructs: + +```markdown +## Setup +1. Run `opentaint agent skills` to get the skills directory path +2. Run `opentaint agent prompt` to get the meta-prompt file path +3. Read the meta-prompt for the overall workflow +4. Read individual skill files as needed during each phase +``` + +This works with any agent framework (Cursor, Cline, Aider, custom). The agent resolves paths via CLI commands and reads files using its native file-read capabilities. + +--- + +## 5. Testing Without CLI on PATH + +### 5.1 Hidden `--analyzer-jar` / `--autobuilder-jar` Flags + +When `opentaint` IS on PATH but JARs haven't been downloaded (no `~/.opentaint`), the hidden flags allow pointing directly to locally-built JARs: + +```bash +opentaint scan /path/to/project.yaml \ + --analyzer-jar ./core/build/libs/opentaint-jvm-sast.jar \ + -o report.sarif +``` + +This skips the 3-tier resolution entirely. + +### 5.2 Environment Variables + +Via viper's env binding (prefix `OPENTAINT_`, `_` separator): + +```bash +export OPENTAINT_ANALYZER_JAR=/home/sobol/IdeaProjects/opentaint/core/build/libs/opentaint-jvm-sast.jar +export OPENTAINT_AUTOBUILDER_JAR=/home/sobol/IdeaProjects/opentaint/core/opentaint-jvm-autobuilder/build/libs/opentaint-project-auto-builder.jar + +# Now scan just works +opentaint scan /path/to/project.yaml -o report.sarif +``` + +### 5.3 Python Test Infrastructure (`conftest.py`) + +The test infrastructure already handles the "no CLI on PATH" case with a dual-mode strategy: + +1. **`_find_opentaint_cli()`** — calls `shutil.which("opentaint")`. Returns `None` if not found. +2. **`OpenTaintCLI.has_cli`** — `True` if Go CLI found, `False` otherwise. +3. **Each command method** (`.scan()`, `.compile()`, etc.) branches: + - CLI mode: `opentaint scan ...` + - JAR mode: `java -jar analyzer.jar --project ... --output-dir ...` + +**Flag translation** (Go CLI → Kotlin CLI): + +| Go CLI | Kotlin CLI (JAR mode) | +|--------|----------------------| +| `--ruleset ` | `--semgrep-rule-set ` | +| `--rule-id ` | `--semgrep-rule-id ` | +| `--approximations-config ` | `--approximations-config ` (same name after rename) | +| `--dataflow-approximations ` | `--dataflow-approximations ` (same name) | +| `--external-methods ` | `--external-methods-output ` | +| `--severity ` | `--semgrep-rule-severity=` | + +**Limitation in JAR mode**: Agent subcommands (`opentaint agent rules-path`, `opentaint agent init-test-project`, `opentaint agent skills`) have no JAR equivalent. They return hardcoded results or skip: + +```python +def agent_rules_path(self) -> CLIResult: + if self.has_cli: + return self._run(["agent", "rules-path"]) + # Fallback: return known path in dev environment + return CLIResult(0, str(BUILTIN_RULES_DIR), "", []) + +def agent_init_test_project(self, output_dir, dependencies=None) -> CLIResult: + if self.has_cli: + args = ["agent", "init-test-project", str(output_dir)] + for dep in (dependencies or []): + args += ["--dependency", dep] + return self._run(args) + # No JAR equivalent — skip + return CLIResult(1, "", "init-test-project not available in direct JAR mode", []) +``` + +**For `--dataflow-approximations` in JAR mode**: The auto-compilation step (which the Go CLI does) must be done manually. The test infrastructure should detect `.java` files and compile them before passing the compiled directory to the JAR. This is already handled in the test fixture setup. + +### 5.4 Local Dev Workflow + +#### Option 1: Build Go CLI locally + use hidden flags + +```bash +# Build CLI +cd cli && go build -o opentaint . + +# Build analyzer (if needed) +cd core && ./gradlew :projectAnalyzerJar + +# Run scan with direct JAR paths +./cli/opentaint scan /path/to/project.yaml \ + --analyzer-jar ./core/build/libs/opentaint-jvm-sast.jar \ + -o report.sarif + +# Agent commands work too +./cli/opentaint agent rules-path +./cli/opentaint agent test-rules ./test-project/project.yaml \ + --ruleset ./agent-rules -o ./test-output +``` + +#### Option 2: Direct JAR mode (no Go CLI) + +```bash +# Build analyzer +cd core && ./gradlew :projectAnalyzerJar + +# Run directly +java -Xmx8G \ + -Dorg.opentaint.ir.impl.storage.defaultBatchSize=2000 \ + -Djdk.util.jar.enableMultiRelease=false \ + -jar core/build/libs/opentaint-jvm-sast.jar \ + --project /path/to/project.yaml \ + --output-dir ./output \ + --semgrep-rule-set ./rules/ruleset \ + --semgrep-rule-id my-rule-id \ + --approximations-config ./my-config.yaml \ + --external-methods-output ./external-methods.yaml +``` + +#### Option 3: Python tests with auto-detection + +```bash +# Build analyzer +cd core && ./gradlew :projectAnalyzerJar + +# Run tests — conftest.py auto-detects JAR, falls back from Go CLI +cd agent-mode/test +pytest test_build.py -v -k "not slow" +``` + +The `conftest.py` tries `shutil.which("opentaint")` first. If not found, it searches for the JAR at: +- `core/build/libs/opentaint-jvm-sast.jar` +- `core/build/libs/opentaint-project-analyzer.jar` + +Both paths are relative to `OPENTAINT_ROOT` (3 levels up from `conftest.py`). + +--- + +## 6. Implementation Order + +Recommended sequence based on dependency analysis: + +### Phase A: Kotlin Analyzer Core (can be parallelized internally) + +| # | Task | Files | Depends On | +|---|------|-------|------------| +| A1 | Add `ExternalMethodTracker` class | `ExternalMethodTracker.kt` (new) | — | +| A2 | Wire tracker into analysis pipeline | `TaintAnalysisUnitStorage.kt`, `TaintAnalysisContext.kt`, `TaintAnalysisUnitRunnerManager.kt` | A1 | +| A3 | Report external methods from flow function | `JIRMethodCallFlowFunction.kt` | A2 | +| A4 | Add `--external-methods-output` flag + YAML serialization | `ProjectAnalyzerRunner.kt`, `ProjectAnalysisOptions.kt`, `ProjectAnalyzer.kt` | A3 | +| A5 | Add `--semgrep-rule-id` flag + filtering in loader | `SemgrepRuleLoader.kt`, `ProjectAnalyzerRunner.kt`, `ProjectAnalysisOptions.kt` | — | +| A6 | Rename `--config` → `--approximations-config`, remove mutual exclusion, add `SemgrepRulesWithCustomConfig` variant | `ProjectAnalyzerRunner.kt`, `ProjectAnalyzer.kt` | — | +| A7 | Add `customApproximationPaths` to `DataFlowApproximationLoader.Options`, add `--dataflow-approximations` flag | `DataFlowApproximationLoader.kt`, `ProjectAnalyzerRunner.kt`, `ProjectAnalysisOptions.kt` | — | + +A1-A4 are sequential (pipeline). A5, A6, A7 are independent of each other and of A1-A4. + +### Phase B: Go CLI (depends on Phase A for flag names) + +| # | Task | Files | Depends On | +|---|------|-------|------------| +| B1 | Add hidden `--analyzer-jar`/`--autobuilder-jar` flags | `root.go`, `global.go`, `artifacts.go` | — | +| B2 | Add `AnalyzerBuilder` extensions | `command_builder.go` | — | +| B3 | Add new flags to `scan` command | `scan.go` | B2 | +| B4 | Implement approximation auto-compilation | `compile_approximations.go` (new) | B2 | +| B5 | Implement `opentaint agent` parent command | `agent.go` (new) | — | +| B6 | Implement `opentaint agent rules-path` | `agent_rules_path.go` (new) | B5 | +| B7 | Implement `opentaint agent test-rules` | `agent_test_rules.go` (new) | B2, B5 | +| B8 | Implement `opentaint agent init-test-project` | `agent_init_test_project.go` (new) | B5 | +| B9 | Implement `opentaint agent skills` + `opentaint agent prompt` | `agent_skills.go`, `agent_prompt.go` (new) | B5 | + +B1, B5 are independent starting points. B2 must precede B3, B4, B7. B5 must precede B6-B9. + +### Phase C: Skills and Meta-Prompt + +| # | Task | Files | Depends On | +|---|------|-------|------------| +| C1 | Write 9 skill files | `agent/skills/*.md` | A, B (need final CLI flag names) | +| C2 | Write meta-prompt | `agent/meta-prompt.md` | C1 | +| C3 | Update release workflow | `.github/workflows/release-cli.yaml` | C1 | +| C4 | Publish test-util JAR as release asset | `.github/workflows/publish-analyzer.yaml` | — | + +### Phase D: Validation + +| # | Task | Depends On | +|---|------|------------| +| D1 | Run existing Python test suite (6 passing tests) | A, B | +| D2 | Run `new_feature` tests (20 tests) | A, B | +| D3 | Run full agent loop test | A, B, C | + +--- + +## 7. File Change Summary + +### New Files (17) + +| File | Purpose | +|------|---------| +| `core/.../taint/ExternalMethodTracker.kt` | External method collection during analysis | +| `cli/cmd/agent.go` | `opentaint agent` parent command | +| `cli/cmd/agent_skills.go` | `opentaint agent skills` subcommand | +| `cli/cmd/agent_prompt.go` | `opentaint agent prompt` subcommand | +| `cli/cmd/agent_rules_path.go` | `opentaint agent rules-path` subcommand | +| `cli/cmd/agent_test_rules.go` | `opentaint agent test-rules` subcommand | +| `cli/cmd/agent_init_test_project.go` | `opentaint agent init-test-project` subcommand | +| `cli/cmd/compile_approximations.go` | Auto-compile .java approximations to .class | +| `agent/meta-prompt.md` | Agent system prompt | +| `agent/skills/build-project.md` | Skill: build project | +| `agent/skills/discover-entry-points.md` | Skill: discover entry points | +| `agent/skills/create-rule.md` | Skill: create pattern rules | +| `agent/skills/test-rule.md` | Skill: test rules | +| `agent/skills/run-analysis.md` | Skill: run analysis | +| `agent/skills/analyze-findings.md` | Skill: analyze SARIF findings | +| `agent/skills/create-yaml-config.md` | Skill: create YAML passThrough config | +| `agent/skills/create-approximation.md` | Skill: create code-based approximations | +| `agent/skills/generate-poc.md` | Skill: generate proof-of-concept | + +### Modified Files (15) + +| File | Change Summary | +|------|----------------| +| `core/.../taint/TaintAnalysisUnitStorage.kt` | Add `externalMethodTracker` field | +| `core/.../taint/TaintAnalysisContext.kt` | Expose tracker from storage | +| `core/.../TaintAnalysisUnitRunnerManager.kt` | Wire tracker into unit storage creation | +| `core/.../JIRMethodCallFlowFunction.kt` | Report to tracker in `applyPassRulesOrCallSkip()` | +| `core/.../project/ProjectAnalyzer.kt` | New `PreloadedRules` variant, YAML output, combined config+rules | +| `core/.../project/ProjectAnalysisOptions.kt` | New fields: `externalMethodsOutput`, `semgrepRuleId` | +| `core/.../runner/ProjectAnalyzerRunner.kt` | 4 new Clikt flags | +| `core/.../dataflow/DataFlowApproximationLoader.kt` | `customApproximationPaths` in `Options` | +| `core/.../semgrep/pattern/SemgrepRuleLoader.kt` | Rule ID filter in `loadRules()` | +| `cli/cmd/root.go` | Hidden `--analyzer-jar`, `--autobuilder-jar` flags | +| `cli/cmd/scan.go` | 4 new flags: `--rule-id`, `--approximations-config`, `--dataflow-approximations`, `--external-methods` | +| `cli/cmd/command_builder.go` | 5 new `AnalyzerBuilder` methods + fields | +| `cli/internal/globals/global.go` | `JarPath` fields in `Analyzer`/`Autobuilder` config structs | +| `cli/internal/utils/opentaint_home.go` | `GetAgentPath()` function | +| `.github/workflows/release-cli.yaml` | Bundle agent files + test-util JAR | diff --git a/agent-mode/info/agent-pipeline.md b/agent-mode/info/agent-pipeline.md new file mode 100644 index 000000000..0b876db31 --- /dev/null +++ b/agent-mode/info/agent-pipeline.md @@ -0,0 +1,672 @@ +# Agent Pipeline Design Document + +## Overview + +This document describes the end-to-end pipeline for an LLM agent to perform security analysis of a JVM project using OpenTaint. The agent builds the project, creates rules, tests them, runs analysis, interprets results (SARIF + external methods list), and iterates to fix FP/FN until coverage is satisfactory. + +## Agent Capabilities Summary + +| Capability | Artifact Type | Reference | +|-----------|---------------|-----------| +| Generate vulnerability detection rules | Pattern rules (YAML) | `pattern-rules.md` | +| Debug/fix rules (FP/FN) | Pattern rules (YAML) | `pattern-rules.md` | +| Generate taint propagation rules | YAML config rules | `approximations-config.md` | +| Generate complex propagators | Code-based approximations (Java) | `approximations-config.md` | +| Override existing propagation | Either YAML or Java stubs | `approximations-config.md` | +| Framework support | Not configurable | Provided as-is | + +## CLI Interfaces + +OpenTaint provides two CLI interfaces. The agent uses them at different pipeline stages. + +### Go CLI (`opentaint`) + +High-level wrapper. Manages Java runtime, downloads artifacts, invokes the analyzer JAR. + +| Command | Purpose | +|---------|---------| +| `opentaint compile -o ` | Build project and create `project.yaml` | +| `opentaint project --output --source-root --classpath --package ` | Create `project.yaml` from precompiled JARs/classes | +| `opentaint scan -o [--ruleset builtin] [--ruleset ]` | Run full analysis (optionally compile first) | +| `opentaint summary [--show-findings] [--show-code-snippets] [--verbose-flow]` | Print SARIF results summary | +| `opentaint pull` | Download all artifacts + JRE | + +**Key flags for `opentaint scan`:** +- `--output ` — SARIF output file (required) +- `--ruleset builtin` — use built-in rules (default) +- `--ruleset ` — custom Semgrep rule file/directory (can specify multiple times; combinable with `builtin`) +- `--timeout ` — analysis timeout (default: 900) +- `--max-memory ` — JVM memory limit (default: `8G`) +- `--severity ` — severity filter (default: `warning,error`) +- `--code-flow-limit ` — max code flows per finding + +### Kotlin CLI (`opentaint-project-analyzer.jar`) + +Low-level analyzer JAR. Invoked by the Go CLI, but can be used directly for advanced features. + +| Flag | Purpose | +|------|---------| +| `--project ` | Project model (required) | +| `--output-dir ` | Output directory (required) | +| `--semgrep-rule-set ` | Semgrep rule files/directories (multiple) | +| `--config ` | Custom passThrough/approximation YAML (**mutually exclusive** with `--semgrep-rule-set`) | +| `--debug-run-rule-tests` | Run rule tests instead of project analysis | +| `--debug-run-analysis-on-selected-entry-points ` | `*` for all methods or `com.example.Class#method` | +| `--semgrep-rule-load-trace ` | Output rule loader diagnostics | +| `--sarif-file-name ` | SARIF filename (default: `report-ifds.sarif`) | +| `--ifds-analysis-timeout ` | IFDS timeout (default: 10000) | +| `--project-kind ` | `unknown` or `spring-web` | + +**Important**: `--config` and `--semgrep-rule-set` are **mutually exclusive**. The `--config` flag is the only way to pass custom passThrough/cleaner YAML rules directly. The Go CLI does not expose `--config` — it only passes `--semgrep-rule-set` via `--ruleset`. + +### Autobuilder (`opentaint-project-auto-builder.jar`) + +| Flag | Purpose | +|------|---------| +| `--project-root-dir ` | Project root (required) | +| `--build portable` | Build + create self-contained project directory | +| `--result-dir ` | Output directory for portable build | +| `--build simple` | Just dump `project.yaml` | +| `--result ` | Output path for simple build | + +## Full Agent Workflow + +### Step 1: Project Setup + +Agent takes the path to the target project and prepares it for analysis. + +**Option A: Use Go CLI (recommended)** +```bash +# Build and create project model +opentaint compile /path/to/project -o ./opentaint-project + +# Result: ./opentaint-project/project.yaml +``` + +**Option B: Use Autobuilder directly** +```bash +java -jar opentaint-project-auto-builder.jar \ + --project-root-dir /path/to/project \ + --build portable \ + --result-dir ./opentaint-project \ + --logs-file autobuild.log \ + --verbosity debug +``` + +**Option C: Create project.yaml manually** + +For projects that don't use standard Gradle/Maven builds, or for pre-compiled artifacts: + +```bash +opentaint project \ + --output ./opentaint-project \ + --source-root /path/to/sources \ + --classpath /path/to/classes.jar \ + --classpath /path/to/dependency.jar \ + --package com.example.app +``` + +The generated `project.yaml` follows this schema: +```yaml +sourceRoot: sources +javaToolchain: toolchain/jdk-17 +modules: + - moduleSourceRoot: sources/src/main/java + packages: [com.example.app] + moduleClasses: + - classes/c0_main +dependencies: + - dependencies/spring-web-5.3.39.jar + - dependencies/javax.servlet-api-4.0.1.jar +``` + +### Step 2: Entry Point Discovery + +Agent searches for entry points and potentially vulnerable places. This is a code-level analysis step. + +The agent should examine: +- **Spring controllers**: `@RestController`/`@Controller` classes with `@RequestMapping`/`@GetMapping` etc. +- **Servlet handlers**: Classes extending `HttpServlet` with `doGet`/`doPost`/etc. +- **JAX-RS endpoints**: Classes with `@Path` and `@GET`/`@POST` annotations +- **Message handlers**: JMS/Kafka/RabbitMQ listeners +- **CLI entry points**: `main()` methods that process external input + +The engine automatically discovers Spring entry points (via `SpringWebProject.kt`) and for unknown projects selects all public/protected methods from public project classes. The agent can also use `--debug-run-analysis-on-selected-entry-points "com.example.Class#method"` to target specific methods. + +### Step 3: Analysis Planning + +Agent creates `opentaint-analysis-plan.md` to track progress. This document records: +- Target project description +- Identified entry points and attack surface +- Rules to create/apply +- Analysis iterations with findings +- FP/FN tracking and resolution status +- Final vulnerability inventory + +### Step 4: Rule Creation + +Agent creates pattern rules for the vulnerability classes relevant to the target project. See `pattern-rules.md` for the full rule language. + +**Typical rule structure:** + +``` +rules/ +├── agent-rules/ # Agent-created rules +│ ├── java/ +│ │ ├── security/ +│ │ │ └── custom-sqli.yaml # Security rule (join mode) +│ │ └── lib/ +│ │ └── custom-sinks.yaml # Sink library rule +│ └── test/ +│ └── CustomSqliTest.java # Test samples +``` + +The agent composes rules using the three modes: +1. **Simple patterns** — for structural issues (no dataflow) +2. **Taint mode** — for defining sinks with `focus-metavariable` +3. **Join mode** — for composing source + sink library rules via `refs` and `on` clauses + +### Step 5: Rule Testing + +Agent creates test samples and validates rules work before running on the real project. + +**5a. Create a simple test project:** + +A minimal Gradle project with Java source files containing annotated test samples: + +```java +import org.opentaint.sast.test.util.PositiveRuleSample; +import org.opentaint.sast.test.util.NegativeRuleSample; + +public class CustomSqliTest { + + @PositiveRuleSample(value = "java/security/custom-sqli.yaml", id = "custom-sql-injection") + public void vulnerable(HttpServletRequest req) { + String input = req.getParameter("id"); + db.execute("SELECT * FROM users WHERE id = " + input); + } + + @NegativeRuleSample(value = "java/security/custom-sqli.yaml", id = "custom-sql-injection") + public void safe(HttpServletRequest req) { + String input = req.getParameter("id"); + db.execute("SELECT * FROM users WHERE id = ?", input); + } +} +``` + +**5b. Build the test project:** +```bash +opentaint compile ./test-project -o ./test-opentaint-project +``` + +**5c. Run rule tests (via Kotlin CLI directly):** +```bash +java -Xmx8G -jar opentaint-project-analyzer.jar \ + --project ./test-opentaint-project/project.yaml \ + --output-dir ./test-result \ + --semgrep-rule-set ./agent-rules \ + --debug-run-rule-tests \ + --verbosity debug +``` + +This produces `test-result/test-result.json` with per-sample verdicts: +```json +{ + "success": [...], + "falsePositive": [...], + "falseNegative": [...], + "skipped": [...], + "disabled": [...] +} +``` + +**5d. Fix and repeat** until all tests pass (no falsePositive/falseNegative entries). + +### Step 6: Run Analysis on Target Project + +```bash +# Option A: Go CLI +opentaint scan ./opentaint-project/project.yaml \ + -o ./results/report.sarif \ + --ruleset builtin \ + --ruleset ./agent-rules + +# Option B: Kotlin CLI (if custom --config needed) +java -Xmx8G -jar opentaint-project-analyzer.jar \ + --project ./opentaint-project/project.yaml \ + --output-dir ./results \ + --semgrep-rule-set ./agent-rules \ + --ifds-analysis-timeout 900 \ + --verbosity info +``` + +### Step 7: Interpret Results + +The analyzer produces two output files: + +**7a. SARIF report** (`results/report-ifds.sarif`) + +Standard SARIF 2.1.0 format containing: +- `runs[0].results[]` — each result is a vulnerability finding with: + - `ruleId` — which rule triggered + - `message.text` — human-readable description + - `level` — severity (error/warning/note) + - `locations[]` — sink location (file, line, column) + - `codeFlows[]` — taint traces from source to sink + - `relatedLocations[]` — HTTP endpoints, parameter info + +View results: +```bash +opentaint summary ./results/report.sarif --show-findings --show-code-snippets --verbose-flow +``` + +**7b. External methods list** (`results/external-methods.json`) + +JSON list of external methods where a dataflow fact was killed during analysis. Each entry contains: +- Method signature (class, name, parameter types) +- Fact position information (the taint flow position from the passThrough rule perspective) + +This is the primary signal for fixing FN caused by missing taint propagation models. + +### Step 8: Decision Loop + +For each analysis result, the agent decides between the following actions: + +``` +For each finding in SARIF: +│ +├── Analyze the trace (codeFlow) +│ │ +│ ├── Trace is a TRUE POSITIVE (TP) +│ │ → Generate POC exploit +│ │ → Save to vulnerabilities.md +│ │ +│ ├── Trace contains FALSE POSITIVE (FP) — fixable via Rule +│ │ → Add pattern-not / pattern-not-inside to exclude the safe pattern +│ │ → Update tests (add @NegativeRuleSample) +│ │ → Re-run analysis (go to Step 6) +│ │ +│ └── Trace contains FALSE POSITIVE (FP) — fixable via Approximation (non-preferred) +│ → Override approximation to remove impossible dataflow path +│ → Re-run analysis (go to Step 6) +│ +For each entry in external methods list: +│ +├── Method is a taint PROPAGATOR +│ → Generate passThrough YAML rule (preferred) +│ → Re-run analysis (go to Step 6) +│ +├── Method is a complex TRANSFORMER (lambdas/callbacks) +│ → Generate code-based approximation (Java stub) +│ → Re-run analysis (go to Step 6) +│ +├── Method is a SANITIZER +│ → Generate cleaner YAML rule +│ → Re-run analysis (go to Step 6) +│ +└── Method is NEUTRAL (logging, metrics) + → Skip (default call-to-return passthrough is correct) +``` + +**FN fix via Rule** (non-preferred): If the FN is due to a missing source/sink pattern (not a missing approximation), the agent can add more patterns and tests to the rule. This is less common since most FN stem from taint being lost at external method calls. + +### Step 9: Iteration + +Steps 6-8 repeat until the agent determines: +- All traces have been reviewed +- All identified FP have been fixed +- All relevant external methods have been addressed +- Remaining external methods are classified as NEUTRAL +- All TPs have been documented with POCs in `vulnerabilities.md` + +## Detailed Sub-Scenarios + +### Fixing FN via External Methods List + +This is the most common and impactful iteration. Each external methods list entry provides: + +```json +{ + "method": "com.example.lib.DataWrapper#getValue", + "signature": "() java.lang.String", + "factPosition": "this" +} +``` + +The `factPosition` tells the agent **from where** taint should propagate. The agent uses this to generate the correct `copy.from` in the passThrough rule: + +```yaml +# factPosition: "this" means taint is on the receiver → should flow to result +passThrough: + - function: com.example.lib.DataWrapper#getValue + copy: + - from: this + to: result +``` + +```yaml +# factPosition: "arg(0)" means taint is on first argument → should flow to result/this +passThrough: + - function: com.example.lib.DataWrapper#process + copy: + - from: arg(0) + to: result +``` + +#### Decision Tree: YAML Config vs Code-Based Approximation + +``` +Does the method involve lambdas/callbacks/functional interfaces? +├── YES → Code-based approximation required +│ (YAML cannot model lambda invocation) +└── NO + Does the method involve complex internal state? + ├── YES → YAML with pattern + │ (model internal state with synthetic fields) + └── NO + Is it a simple from→to propagation? + ├── YES → YAML passThrough rule (simplest, preferred) + └── NO + Does it require non-deterministic branching? + ├── YES → Code-based approximation + │ (use OpentaintNdUtil.nextBool()) + └── NO → YAML passThrough rule with conditions +``` + +#### Batch Processing Strategy + +When the external methods list is large, process library-by-library: + +``` +1. Group by package/library + com.fasterxml.jackson.* → 47 methods + org.springframework.* → 23 methods + org.apache.commons.* → 15 methods + +2. For each library: + ├── Check if built-in config already covers it (jar-split/ configs) + ├── Look up library documentation / source + ├── Generate comprehensive passThrough rules + └── Save as agent-config/.yaml + +3. Re-run analysis after each library batch +``` + +### Fixing FP via Rule + +When a SARIF trace shows a false positive: + +**Common causes and fixes:** + +| Cause | Fix | Example | +|-------|-----|---------| +| Sanitization not recognized | Add `pattern-not-inside` or `pattern-sanitizers` | `Encoder.htmlEncode()` not recognized | +| Safe type not excluded | Add `metavariable-regex` with negative lookahead | Primitive types flowing to sink | +| Context makes it safe | Add `pattern-not-inside` for the safe context | Inside validation block | +| Wrong method matched | Narrow `metavariable-regex` or pattern | Too broad `$OBJ.$METHOD(...)` | + +**Example: Adding a sanitizer exclusion** +```yaml +# Before: sink matches all calls +pattern-sinks: + - pattern: response.getWriter().write($UNTRUSTED) + +# After: exclude sanitized paths +pattern-sinks: + - patterns: + - pattern: response.getWriter().write($UNTRUSTED) + - pattern-not-inside: | + $X = Encoder.htmlEncode(...); + ... + response.getWriter().write($X); +``` + +### Fixing FP via Approximation (Non-Preferred) + +Sometimes a false dataflow path exists because an approximation is too permissive (e.g., models a method as propagating taint when it actually transforms data in a way that neutralizes it). + +**Fix**: Override with a more precise approximation or add a cleaner rule: + +```yaml +# Add a cleaner rule to kill taint at the sanitizing method +cleaner: + - function: com.example.security.Sanitizer#clean + clean: + - position: result + mark: tainted +``` + +**Note**: This is non-preferred because approximation changes affect all rules globally, not just the specific FP case. + +### Overriding Existing Approximations + +The agent can override built-in approximations at two levels: + +**Override YAML config rules**: Provide a custom config via `--config` flag (Kotlin CLI only). PassThrough and cleaner rules are **extended** (merged with built-in), while source/sink/entryPoint rules are **overridden** (replace built-in). + +**Override with code-based approximations**: Create a Java stub class with `@Approximate`. Code-based approximations always take priority over YAML config for the same method. However, custom code-based approximations are **not currently passable via CLI flags** — they require building a custom approximations JAR and setting environment variables (`opentaint.jvm.api.jar.path`, `opentaint.jvm.approximations.jar.path`). + +### Priority chain: + +``` +Code-based approximation ← Highest (analyzed as actual code) + overrides +YAML config rules ← Agent CAN provide via --config + merged with +Auto-generated defaults ← Engine auto-generates for get* on non-project classes + fall back to +Intra-procedural analysis ← Engine analyzes callee body if available + fall back to +Call-to-return passthrough ← Taint preserved, method treated as no-op +``` + +## Passing Custom Approximations to the Analyzer + +### Via `--semgrep-rule-set` (Go CLI `--ruleset`) + +Pattern rules (source/sink/sanitizer patterns). This is the primary path for agent-generated rules. + +```bash +opentaint scan project.yaml -o report.sarif \ + --ruleset builtin \ + --ruleset ./agent-rules/ +``` + +### Via `--config` (Kotlin CLI only, not exposed in Go CLI) + +PassThrough/cleaner/source/sink YAML in `SerializedTaintConfig` format. Use this when the agent needs to add custom taint propagation models. + +```bash +java -jar opentaint-project-analyzer.jar \ + --project project.yaml \ + --output-dir ./results \ + --config ./agent-config/custom-propagators.yaml +``` + +**Limitation**: `--config` and `--semgrep-rule-set` are **mutually exclusive**. If the agent needs both custom pattern rules and custom propagation config, it must either: +1. Use `--semgrep-rule-set` — pattern rules include only passThrough from the default built-in config (agent cannot add extra passThrough rules this way) +2. Use `--config` — loses the ability to provide Semgrep-format pattern rules + +This is a current limitation that may need to be addressed (see "Required Engine Enhancements" below). + +### Via Environment Variables (Code-Based Approximations) + +Custom compiled Java stub JARs. Requires `useOpentaintApproximations=true` which is not exposed via CLI. + +```bash +export opentaint.jvm.api.jar.path=/path/to/api.jar +export opentaint.jvm.approximations.jar.path=/path/to/approximations.jar +``` + +**Current status**: Not practically usable via CLI. Requires programmatic API access. + +## Common Templates + +### Template: PassThrough for Simple Getter + +```yaml +passThrough: + - function: com.example.Type#getField + copy: + - from: this + to: result +``` + +### Template: PassThrough for Builder Pattern + +```yaml +passThrough: + - function: com.example.Builder#withField + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result +``` + +### Template: PassThrough for Collection Wrapper + +```yaml +passThrough: + - function: com.example.Collection#add + copy: + - from: arg(0) + to: + - this + - .com.example.Collection##java.lang.Object + - function: com.example.Collection#get + copy: + - from: + - this + - .com.example.Collection##java.lang.Object + to: result +``` + +### Template: PassThrough for Generic Pattern (All Getters in a Package) + +```yaml +passThrough: + - function: + package: com.example.dto + class: + pattern: .* + name: + pattern: get.* + copy: + - from: this + to: result +``` + +### Template: Code-Based Approximation for Functional API + +```java +@Approximate(com.example.FunctionalApi.class) +public class FunctionalApi { + public Object transform(@ArgumentTypeContext Function fn) throws Throwable { + FunctionalApi self = (FunctionalApi) (Object) this; + if (OpentaintNdUtil.nextBool()) return null; + Object input = self.getValue(); + return fn.apply(input); + } + + public void consume(@ArgumentTypeContext Consumer consumer) { + FunctionalApi self = (FunctionalApi) (Object) this; + if (OpentaintNdUtil.nextBool()) { + consumer.accept(self.getValue()); + } + } +} +``` + +### Template: Cleaner Rule (Sanitizer) + +```yaml +cleaner: + - function: com.example.security.HtmlEncoder#encode + clean: + - position: result + mark: tainted +``` + +## Required Engine Enhancements + +Based on the requirements in the task specification, the following features need to be implemented: + +### 1. External Methods List Output (JSON) + +**Requirement**: "Engine will return a list of external methods, where dataflow fact was killed" (task.md line 13). + +**Current state**: The engine does not produce this output. When a method is unresolvable (external), the fact is preserved via call-to-return passthrough, not killed. The engine needs a new mechanism to: +- Track which external methods were encountered during analysis +- Record the fact position (from which taint was propagating) at each external call +- Output this as a JSON file alongside the SARIF report + +**Proposed format**: +```json +[ + { + "method": "com.example.lib.Wrapper#getValue", + "signature": "() java.lang.String", + "factPosition": "this", + "callCount": 5 + } +] +``` + +A CLI flag (e.g., `--external-methods-output `) should be added to both CLIs. + +### 2. Combined `--config` + `--semgrep-rule-set` + +**Current state**: These flags are mutually exclusive. + +**Requirement**: The agent needs to provide both custom pattern rules (`--semgrep-rule-set`) and custom passThrough/approximation YAML (`--config`) simultaneously. + +**Proposed fix**: Allow both flags. When both are provided, load Semgrep rules as the pattern-matching layer and merge the custom config's passThrough/cleaner rules with the default config. + +### 3. Custom Code-Based Approximations via CLI + +**Current state**: No CLI flag to pass custom approximation JARs. The `useOpentaintApproximations` flag is hardcoded to `false`. + +**Requirement**: Agent must be able to provide code-based approximations for complex methods. + +**Proposed fix**: Add a CLI flag (e.g., `--approximations-jar `) that enables `useOpentaintApproximations` and sets the JAR paths. Expose this in both CLIs. + +## Integration Constraints + +### What the Agent CAN Do + +1. Create/modify pattern rules (YAML) in custom rule directories +2. Create/modify YAML config rules (passThrough, source, sink, cleaner) via `--config` +3. Create code-based approximation Java stubs (pending CLI support) +4. Generate test cases for rules +5. Override YAML config rules with more specific rules +6. Override YAML config rules with code-based approximations (pending CLI support) +7. Use `--ruleset` with multiple custom rule directories alongside `builtin` + +### What the Agent CANNOT Do + +1. Modify framework support (Spring, etc.) — provided as-is +2. Change the IFDS analysis algorithm +3. Change the access path abstraction mode +4. Change how the call graph is constructed +5. Modify the pattern matching engine semantics +6. Currently: combine `--config` and `--semgrep-rule-set` in a single run +7. Currently: pass custom code-based approximations via CLI + +### Validation Checklist + +Before submitting any artifact, the agent should verify: + +- [ ] YAML is valid and parseable +- [ ] Rule IDs are globally unique +- [ ] Library rules have `options.lib: true` +- [ ] Security rules have `metadata.cwe` and `metadata.short-description` +- [ ] Source/sink rules use consistent metavariable names (`$UNTRUSTED`) +- [ ] Join-mode `on` clauses reference valid aliases defined in `refs` +- [ ] Test cases exist for all enabled non-library rules +- [ ] passThrough `from`/`to` positions are valid (`this`, `arg(N)`, `result`, etc.) +- [ ] Code-based approximations compile and use `@Approximate` annotation +- [ ] No regressions: existing test cases still pass +- [ ] `opentaint-analysis-plan.md` is updated with current iteration status diff --git a/agent-mode/info/approximations-config.md b/agent-mode/info/approximations-config.md new file mode 100644 index 000000000..974d50f36 --- /dev/null +++ b/agent-mode/info/approximations-config.md @@ -0,0 +1,487 @@ +# Approximations Configuration Design Document + +## Overview + +Approximations tell the dataflow engine **how taint propagates** through library and framework methods that the engine cannot analyze directly (because their source code is not part of the project). There are two layers: + +| Layer | Format | Location | Scope | Override Priority | +|-------|--------|----------|-------|-------------------| +| **YAML config rules** | Declarative passThrough YAML | `core/opentaint-config/config/` | Tens of thousands of library methods | Base layer (lowest priority) | +| **Code-based approximations** | Java stub classes | `core/opentaint-jvm-sast-dataflow/dataflow-approximations/` | Complex functional/async APIs | Overrides YAML config (highest priority) | + +When both exist for the same method, **code-based approximations always win** — the engine analyzes the stub body directly instead of applying config rules. + +## Layer 1: YAML Config Rules (passThrough) + +### File Layout + +``` +core/opentaint-config/config/config/ +├── stdlib.yaml # ~21,000 lines — java.io, java.lang, java.util, java.net, java.nio +├── config.yaml # ~18,000 lines — javax.*, org.json +├── jmod.yaml # ~3,400 lines — javax.naming, javax.script, javax.sql +├── unverified.yaml # ~1,100 lines — Jackson, Spring utils, Reactor, XML parsers +└── jar-split/ # Per-library configs (29 files) + ├── spring-web-7.0.2.yaml + ├── spring-webmvc-7.0.2.yaml + ├── reactor-core-3.8.2.yaml + ├── guava-33.5.0-jre.yaml + ├── jackson-databind-2.20.1.yaml + ├── netty-buffer-4.2.0.Final.yaml + └── ... +``` + +### YAML Schema + +All config files share the same top-level `passThrough:` schema: + +```yaml +passThrough: + - function: + signature: + overrides: + condition: + copy: + - from: + to: +``` + +### Function Name Matching + +#### Simple Form (String) + +```yaml +function: java.lang.String#concat +``` + +Parsed as: package = `java.lang`, class = `String`, method = `concat`. + +#### Complex Form (Map with Patterns) + +```yaml +function: + package: org.apache.axis.types + class: + pattern: .* + name: + pattern: get.* +``` + +Each of `package`, `class`, `name` can be either a plain string (exact match) or `pattern: ` (substring regex match). + +#### Pattern Matching Semantics + +- **Exact name** (`"get"`) — matches only methods named exactly `get` +- **Pattern** (`pattern: "get.*"`) — matches any method whose name contains a substring matching `get.*` (uses `containsMatchIn`, not `fullMatch`) +- **Wildcard** (`pattern: ".*"`) — matches any name + +### Signature Matching + +```yaml +signature: (java.lang.String) java.lang.String +``` + +Format: `() `. Used to disambiguate overloaded methods. + +Alternative structured form: +```yaml +signature: + params: + - index: 0 + type: java.lang.String +``` + +### Taint Flow Positions + +Positions describe where taint lives on a method's interface: + +| Position | Meaning | +|-----------------------------------------------|----------------------------------------------------| +| `this` | The receiver object (`this` reference) | +| `arg(0)`, `arg(1)`, ... | Method arguments (0-indexed) | +| `arg(*)` | All arguments (expanded to individual arg rules) | +| `result` | The method's return value | +| `[*]` | Array element access (appended to a base position) | +| `.#$` | Field access (appended to a base position) | + +#### Internal State Tracking (Rule Storage) + +For modeling taint that persists inside an object across method calls: + +```yaml +from: + - this + - .java.io.ByteArrayOutputStream##java.lang.Object +to: result +``` + +The `` is a synthetic field — it doesn't exist in the real class. The engine uses it as a virtual container to track taint flow through an object's internal state. When a method stores taint into the object, the `to` side points to the rule-storage field. When another method retrieves it, the `from` side reads from the same field. + +#### Named Field Access + +```yaml +from: + - this + - .java.lang.Throwable#message#java.lang.Object +to: result +``` + +This models taint flowing from a specific named field (`message`) of the `this` object to the result. + +### Actions + +#### `copy` — Propagate Taint + +```yaml +copy: + - from: arg(0) + to: result + - from: this + to: result +``` + +Copies all taint marks from `from` position to `to` position. The most common action. + +### Conditions + +Conditions restrict when a rule applies: + +```yaml +condition: + typeIs: + position: arg(0) + type: java.lang.String + +condition: + anyOf: + - typeIs: + position: arg(0) + type: java.lang.String + - typeIs: + position: arg(0) + type: java.lang.CharSequence + +condition: + not: + isConstant: + position: arg(0) + +condition: + allOf: + - annotatedWith: + position: arg(0) + type: javax.annotation.Nonnull + - numberOfArgs: 2 +``` + +#### Condition Types + +| Condition | YAML Key | Description | +|-----------|----------|-------------| +| Type check | `typeIs` | Position's type matches a name/pattern | +| Annotation | `annotatedWith` | Position has an annotation | +| Constant | `isConstant` | Position is a compile-time constant | +| Null | `isNull` | Position is null | +| Constant regex | `constantMatches` | Constant value matches regex | +| Constant comparison | `constantEq`, `constantGt`, `constantLt` | Compare constant value | +| Taint check | `tainted` | Position already carries a taint mark | +| Arg count | `numberOfArgs` | Method has N parameters | +| Method annotation | `methodAnnotated` | Method has annotation | +| Class annotation | `classAnnotated` | Enclosing class has annotation | +| Method name | `methodNameMatches` | Method name matches pattern | +| Class name | `classNameMatches` | Class name matches pattern | +| Static field | `isStaticField` | Position is a specific static field | +| Combinators | `anyOf`, `allOf`, `not` | Boolean logic | + +### The `overrides` Field and Hierarchy + +The `overrides` field (default: `true`) controls **class hierarchy inheritance**: + +- `overrides: true` — Rule applies to the specified class **and all subclasses**. When looking up rules for a method, the engine walks the class hierarchy upward and includes matching rules from superclasses. +- `overrides: false` — Rule applies **only** to the exact specified class. + +#### Hierarchical Matching (Method Name Level) + +The `MethodTaintRulesStorage` indexes rules in three tiers: + +1. **Concrete name rules** — exact method name match (e.g., `getEntry`) +2. **Pattern method rules** — regex match on method name (e.g., `get.*`) +3. **Any method rules** — wildcard `.*` match + +When resolving rules for a specific method: +1. Check concrete name match first +2. Evaluate all pattern matches +3. Include any-method wildcard matches +4. All matching rules are **merged** (not prioritized) — they all apply + +**Important**: There is no priority between concrete and pattern rules at this level. If both a `get*` pattern rule and a `getEntry` concrete rule match `getEntry`, **both apply**. To make a specific rule override a general pattern, use conditions to restrict the general rule, or ensure the specific rule's actions make the general rule's actions redundant. + +#### Hierarchical Matching (Class Name Level) + +The `MethodClassTaintRulesStorage` resolves rules by: + +1. **Exact class match** — highest specificity +2. **Pattern class match** — four strategies: + - Concrete class name, any package + - Concrete class name, package pattern + - Concrete package, class pattern + - Both class and package patterns +3. **Any-class wildcard** — lowest specificity +4. **Hierarchy walk** — for superclasses, only `overrides: true` rules propagate +5. **Subclass push** — rules are pushed to supertypes with added `typeIs: This` conditions + +### Complete Example + +```yaml +passThrough: + # String.concat: taint on this or arg flows to result + - function: java.lang.String#concat + signature: (java.lang.String) java.lang.String + copy: + - from: arg(0) + to: result + - from: this + to: result + + # StringBuilder.append: taint on arg flows to this and result + - function: java.lang.StringBuilder#append + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # ByteArrayOutputStream: write stores taint, toString retrieves it + - function: java.io.ByteArrayOutputStream#write + copy: + - from: arg(0) + to: + - this + - .java.io.ByteArrayOutputStream##java.lang.Object + - function: java.io.ByteArrayOutputStream#toString + copy: + - from: + - this + - .java.io.ByteArrayOutputStream##java.lang.Object + to: result + + # Generic getter pattern for Axis types: any get* method propagates this to result + - function: + package: org.apache.axis.types + class: + pattern: .* + name: + pattern: get.* + copy: + - from: this + to: result +``` + +## Layer 2: Code-Based Approximations + +### Purpose + +Code-based approximations replace complex library method bodies with simplified Java implementations that the IFDS taint analyzer can reason about. They are essential for: + +- **Functional APIs** (Stream, Optional) — make lambda data flow explicit +- **Async APIs** (CompletableFuture, CompletionStage) — linearize async composition +- **Threading** (Thread, Executor) — make cross-thread data flow visible +- **Coroutines** (Kotlin builders) — linearize coroutine control flow + +### File Layout + +``` +core/opentaint-jvm-sast-dataflow/dataflow-approximations/ +└── src/main/java/org/opentaint/jvm/dataflow/approximations/ + ├── OpentaintNdUtil.java # Non-deterministic boolean utility + ├── ArgumentTypeContext.java # Annotation for type context parameters + └── stdlib/ + ├── Stream.java # java.util.stream.Stream + ├── Optional.java # java.util.Optional + ├── CompletableFuture.java # java.util.concurrent.CompletableFuture + ├── CompletionStage.java # java.util.concurrent.CompletionStage + ├── Executor.java # java.util.concurrent.Executor + ├── ExecutorService.java # java.util.concurrent.ExecutorService + └── Thread.java # java.lang.Thread + └── kotlin/ + ├── Builders.java # kotlinx.coroutines builders + ├── BuildersBuilders.java + └── BuildersBuildersCommon.java +``` + +### How Approximations Work + +#### Annotation-Based Registration + +```java +@Approximate(java.util.stream.Stream.class) +public class Stream { + // Methods here replace the real Stream methods during analysis +} +``` + +The `@Approximate` annotation binds this stub class to the real `java.util.stream.Stream`. The analyzer loads approximation bytecode from a JAR resource and installs them as `JIRClasspathFeature`. When the analyzer encounters a call to a method that has an approximation, it analyzes the stub body instead of: +- Looking up YAML config rules +- Treating the method as opaque external + +#### Key Infrastructure + +**`OpentaintNdUtil.nextBool()`** — Non-deterministic choice. The analyzer considers **both** branches, enabling modeling of success + failure paths: + +```java +if (OpentaintNdUtil.nextBool()) { + // success path +} else { + // failure path (or return null/empty) +} +``` + +**`@ArgumentTypeContext`** — Marks parameters that carry generic type context (e.g., lambda types). The analyzer uses this to resolve lambda parameter/return types for dataflow through higher-order functions. + +#### Common Patterns + +**Functional transformation** (explicit lambda data flow): + +```java +// Approximation for Stream.map(Function) +public java.util.stream.Stream map(@ArgumentTypeContext Function mapper) { + java.util.stream.Stream t = (java.util.stream.Stream) (Object) this; + Iterator it = t.iterator(); + if (it.hasNext()) { + Object result = mapper.apply(it.next()); + return java.util.stream.Stream.of(result); + } + return java.util.stream.Stream.empty(); +} +``` + +This makes explicit: element extracted from stream → passed to lambda → result wrapped in new stream. + +**Async linearization** (flatten future composition): + +```java +// Approximation for CompletableFuture.thenApply(Function) +public CompletableFuture thenApply(@ArgumentTypeContext Function fn) throws Throwable { + CompletableFuture t = (CompletableFuture) (Object) this; + if (OpentaintNdUtil.nextBool()) return null; + Object result = fn.apply(t.get()); + return CompletableFuture.completedFuture(result); +} +``` + +This linearizes: future value extracted via `.get()` → passed to function → wrapped in completed future. + +**Threading** (potential direct invocation): + +```java +// Approximation for Thread.start() +public void start() { + Thread t = (Thread) (Object) this; + if (OpentaintNdUtil.nextBool()) { + t.run(); + } +} +``` + +Models `Thread.start()` as a potential direct call to `run()` so the analyzer can trace data through threads. + +### Covered API Surface + +| API | Methods Approximated | +|-----|---------------------| +| `Stream` | filter, map, flatMap, mapToInt/Long/Double, peek, sorted, forEach, reduce, collect, min, max, anyMatch/allMatch/noneMatch, takeWhile/dropWhile, toArray, mapMulti | +| `Optional` | ifPresent, ifPresentOrElse, filter, map, flatMap, or, orElseGet, orElseThrow | +| `CompletableFuture` | supplyAsync, thenApply/Accept/Run, thenCombine/AcceptBoth, thenCompose, handle, whenComplete, exceptionally, all `*Async` variants | +| `CompletionStage` | All corresponding CompletionStage methods | +| `Executor` | execute | +| `ExecutorService` | submit, invokeAll, invokeAny | +| `Thread` | start, constructors with Runnable | +| Kotlin Coroutines | runBlocking, launch, async, withContext | + +## Override Hierarchy + +The engine resolves taint propagation rules in this priority order: + +``` +1. Code-based approximations (HIGHEST — analyzed as actual code) + ↓ if no approximation exists +2. YAML passThrough config (applied at call sites as summary edges) + ↓ if no config rule exists +3. Auto-generated defaults (JIRMethodGetDefaultProvider: get* methods → copy this to result) + ↓ if none of the above +4. Intra-procedural analysis (analyze the actual callee body if available) + ↓ if callee is external/unknown +5. Call-to-return passthrough (taint preserved, method treated as no-op) +``` + +### Provider Chain (Runtime) + +``` +JIRTaintRulesProvider ← loads from TaintConfiguration (YAML) + └── StringConcatRuleProvider ← adds synthetic rules for string concat + └── JIRMethodGetDefaultProvider ← auto-generates get* passthrough + └── JIRCombinedTaintRulesProvider ← merges base + custom config + └── JIRFilteredTaintRulesProvider ← applies TaintRuleFilter +``` + +### JIRCombinedTaintRulesProvider + +When a custom config is provided alongside the default config, `JIRCombinedTaintRulesProvider` merges them with configurable per-category modes: + +| Category | Default Mode | Behavior | +|----------|-------------|----------| +| Entry points | OVERRIDE | Custom replaces base | +| Sources | OVERRIDE | Custom replaces base | +| Sinks | OVERRIDE | Custom replaces base | +| PassThrough | EXTEND | Custom + base merged | +| Cleaners | EXTEND | Custom + base merged | + +Modes: `EXTEND` (union), `OVERRIDE` (only custom), `IGNORE` (only base). + +## Agent Interaction Points + +### Generating YAML Config Rules + +An agent can create new passThrough rules to fix false negatives where taint is lost through library method calls: + +```yaml +# Agent-generated rule for a missed library method +passThrough: + - function: com.example.lib.DataProcessor#transform + copy: + - from: arg(0) + to: result +``` + +### Generating Code-Based Approximations + +For complex APIs with lambdas/callbacks, the agent can write Java stub classes: + +```java +@Approximate(com.example.lib.AsyncProcessor.class) +public class AsyncProcessor { + public CompletableFuture processAsync(@ArgumentTypeContext Function fn) { + AsyncProcessor self = (AsyncProcessor) (Object) this; + if (OpentaintNdUtil.nextBool()) return null; + Object result = fn.apply(self.getData()); + return CompletableFuture.completedFuture(result); + } +} +``` + +### Overriding Existing Rules + +An agent can override existing rules by: + +1. **For YAML config**: Add rules with more specific function/class matchers. Since all matching rules are merged, add a `cleaner` rule to cancel out an incorrect passthrough, or provide a corrected passthrough with more specific conditions. +2. **For code-based approximations**: Create a new approximation class for the same target class. Code-based approximations always override YAML config for the same methods. + +### Important Constraints + +1. YAML config rules follow a **merge** (not replace) model — all matching rules contribute +2. Code-based approximations require compilation to bytecode and inclusion in the approximations JAR +3. The `` pattern must be used consistently for object state tracking +4. Conditions are resolved at rule load time for structural checks (annotations, class names) and at runtime for value checks (constants, types, taint marks) +5. Framework support (Spring) is provided as-is and is **not** configurable through config rules diff --git a/agent-mode/info/pattern-rules.md b/agent-mode/info/pattern-rules.md new file mode 100644 index 000000000..1c1583da6 --- /dev/null +++ b/agent-mode/info/pattern-rules.md @@ -0,0 +1,313 @@ +# Pattern Rules Design Document + +## Overview + +Pattern rules are Semgrep-compatible YAML files that define **what** the analyzer should look for in the target codebase. They describe vulnerable dataflow patterns by composing sources, sinks, sanitizers, and structural code patterns. All rules live under `rules/ruleset/`. + +## Directory Layout + +``` +rules/ruleset/ +├── java/ +│ ├── security/ # Executable rules (one per vulnerability class) +│ │ ├── sqli.yaml +│ │ ├── xss.yaml +│ │ ├── command-injection.yaml +│ │ ├── path-traversal.yaml +│ │ └── ... (22 files) +│ └── lib/ # Reusable library rules (non-executable) +│ ├── generic/ # Framework-agnostic sources/sinks +│ │ ├── servlet-untrusted-data-source.yaml +│ │ ├── command-injection-sinks.yaml +│ │ ├── path-traversal-sinks.yaml +│ │ └── ... (17 files) +│ └── spring/ # Spring-specific sources/sinks +│ ├── untrusted-data-source.yaml +│ ├── jdbc-sqli-sinks.yaml +│ └── ... (6 files) +└── test/ # Test samples and coverage enforcement +``` + +## Rule File Structure + +Every rule file is a YAML document with a top-level `rules:` list. Each entry is a single rule. + +### Common Fields (All Modes) + +```yaml +rules: + - id: # Required. Globally unique identifier. + severity: ERROR | WARNING | NOTE # Required. ERROR = critical, WARNING = medium, NOTE = library/informational. + message: >- # Required. Human-readable finding message. + Description of the vulnerability + metadata: # Required for security rules. Structured metadata. + cwe: CWE-xxx # CWE identifier(s) + short-description: ... # One-line summary + full-description: |- # Multiline markdown with code examples (vulnerable + safe) + ... + references: # External links (OWASP, CWE, etc.) + - https://... + provenance: ... # Upstream rule source + license: ... # License info + languages: + - java # Target language + options: # Optional flags + lib: true # Marks as non-executable library rule + disabled: "reason" # Disables rule with explanation +``` + +## Three Pattern Modes + +### Mode 1: Simple Pattern Matching (Default) + +No `mode:` key needed. Uses structural code patterns to find matches. + +#### Pattern Operators + +| Operator | Semantics | +|----------|-----------| +| `pattern` | Match a single code pattern | +| `patterns` | Conjunction (AND) — all sub-patterns must match | +| `pattern-either` | Disjunction (OR) — any sub-pattern matches | +| `pattern-inside` | Match must occur inside another pattern | +| `pattern-not` | Negation — exclude matches fitting this pattern | +| `pattern-not-inside` | Exclude matches inside another pattern | +| `metavariable-regex` | Constrain a captured metavariable by regex | +| `metavariable-pattern` | Constrain a captured metavariable by sub-pattern | +| `focus-metavariable` | Narrow the match region to a specific metavariable | + +#### Metavariables + +Metavariables (prefixed with `$`) capture parts of the matched code: + +- `$VAR` — single expression or identifier +- `$...ARGS` — zero or more expressions (variadic) +- `$_` — anonymous wildcard (don't-care) + +#### Example: Structural Pattern + +```yaml +rules: + - id: wicket-xss + severity: WARNING + message: XSS via Wicket setEscapeModelStrings + languages: [java] + patterns: + - pattern: | + (org.apache.wicket.$A $OBJ).setEscapeModelStrings(false); +``` + +#### Example: Pattern with Metavariable Constraints + +```yaml +patterns: + - pattern-either: + - pattern: | + $RETURNTYPE $METHOD(HttpServletRequest $UNTRUSTED, ...) { ... } + - metavariable-pattern: + metavariable: $METHOD + pattern-either: + - pattern: doGet + - pattern: doPost + - pattern: doPut +``` + +### Mode 2: Taint Mode + +Explicitly declares `mode: taint`. Used to define source/sink/sanitizer triples within a single rule file. + +```yaml +rules: + - id: rule-id + mode: taint + pattern-sources: # Where tainted data originates + - patterns: + - pattern: ... + pattern-sinks: # Where tainted data is dangerous + - patterns: + - pattern-either: + - pattern: $DB.execute($UNTRUSTED, ...) + - focus-metavariable: $UNTRUSTED # Narrow to the tainted arg + pattern-sanitizers: # What makes data safe + - pattern: Encoder.encode(...) + pattern-propagators: # Custom propagation through methods + - pattern: ... + from: $FROM + to: $TO +``` + +**Key feature**: `focus-metavariable` in sinks narrows the match to the specific tainted expression, not the entire call. + +**Used primarily for library sink rules** that define only `pattern-sinks` (no sources), relying on join-mode composition to supply sources. + +### Mode 3: Join Mode (Primary Composition Mechanism) + +Explicitly declares `mode: join`. Composes library rules to form complete vulnerability detectors. + +```yaml +rules: + - id: sql-injection + mode: join + join: + refs: + - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + as: servlet-source + - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source + as: spring-source + - rule: java/lib/spring/jdbc-sqli-sinks.yaml#spring-sqli-sink + as: sink + on: + - 'servlet-source.$UNTRUSTED -> sink.$UNTRUSTED' + - 'spring-source.$UNTRUSTED -> sink.$UNTRUSTED' +``` + +#### Reference Syntax + +``` +rule: # +as: +``` + +The path is relative to `rules/ruleset/`. The `#rule-id` fragment selects which rule from a multi-rule file. + +#### `on` Clause Syntax + +``` +'.$METAVAR -> .$METAVAR' +``` + +- `->` denotes a **dataflow relationship**: the value captured by `$METAVAR` in the source must flow (through taint propagation) to the same `$METAVAR` in the sink. +- Multiple `on` clauses act as **alternatives (OR)** — any match triggers a finding. +- The metavariable `$UNTRUSTED` is the conventional name for the tainted data binding point across source and sink rules. + +## Library Rules + +Library rules are reusable building blocks marked with `options.lib: true`. They are **never executed standalone** — they exist only to be referenced by join-mode rules. + +### Source Library Rules + +Define where untrusted data enters the application: + +```yaml +rules: + - id: java-servlet-untrusted-data-source + options: + lib: true + severity: NOTE + patterns: + - pattern-either: + - patterns: + - pattern: | + $RETURNTYPE $ENTRYPOINT(HttpServletRequest $UNTRUSTED, ...) { ... } + - metavariable-pattern: + metavariable: $ENTRYPOINT + pattern-either: + - pattern: doGet + - pattern: doPost +``` + +Captures `$UNTRUSTED` at the point where user-controlled data enters. + +### Sink Library Rules + +Define where tainted data becomes dangerous. Can use either pattern mode or taint mode: + +**Pattern-based sink** (simple structure): +```yaml +rules: + - id: command-injection-sinks + options: + lib: true + patterns: + - pattern-either: + - pattern: new ProcessBuilder($UNTRUSTED, ...) + - pattern: Runtime.$EXEC($UNTRUSTED, ...) + - metavariable-regex: + metavariable: $EXEC + regex: (exec|loadLibrary|load) +``` + +**Taint-mode sink** (for complex matching with focus): +```yaml +rules: + - id: spring-sqli-sink + mode: taint + options: + lib: true + pattern-sinks: + - patterns: + - pattern-either: + - pattern: (Statement $S).execute($UNTRUSTED) + - pattern: (JdbcTemplate $T).$METHOD($UNTRUSTED, ...) + - metavariable-regex: + metavariable: $METHOD + regex: (query|update|execute|batchUpdate) + - focus-metavariable: $UNTRUSTED +``` + +## How the Engine Processes Pattern Rules + +1. **Rule loading**: YAML files are parsed and rules are categorized by mode +2. **Join resolution**: Join-mode rules resolve their `refs` to load referenced library rules +3. **Pattern compilation**: Code patterns are compiled into Semgrep-compatible matchers +4. **Dataflow binding**: In join mode, `$UNTRUSTED` from sources and sinks are linked via the `->` operator. The engine performs taint analysis to determine if data flows from source to sink. +5. **Result generation**: Matches produce SARIF findings with vulnerability traces + +## Agent Interaction Points + +### Generating New Rules + +An agent can generate new security rules by: + +1. **Creating source library rules** — define new entry points for untrusted data +2. **Creating sink library rules** — define new dangerous operations +3. **Creating join-mode rules** — compose sources and sinks into vulnerability detectors + +### Fixing False Positives (FP) + +An agent can fix FP by: + +1. **Adding `pattern-not` / `pattern-not-inside`** to exclude safe patterns +2. **Adding `pattern-sanitizers`** (in taint mode) to recognize sanitization +3. **Adding `metavariable-regex`** with negative lookaheads to exclude safe types/methods +4. **Setting `options.disabled`** with a reason to disable overly broad rules + +### Fixing False Negatives (FN) + +An agent can fix FN by: + +1. **Adding new patterns to `pattern-either`** in source/sink library rules +2. **Creating new library rules** for uncovered frameworks/APIs +3. **Adding new `on` clauses** in join-mode rules to link new source/sink combinations +4. **Widening `metavariable-regex`** to accept more matching patterns + +### Constraints for Agent-Generated Rules + +1. All rules **must** follow the YAML schema above +2. Library rules **must** have `options.lib: true` and `severity: NOTE` +3. Security rules **must** have `metadata.cwe` and `metadata.short-description` +4. Source rules **must** capture `$UNTRUSTED` (or equivalent metavariable) +5. Sink rules **must** use the same metavariable name for the tainted position +6. Join-mode `on` clauses **must** reference aliases defined in `refs` +7. Rule ids **must** be globally unique +8. Each enabled non-lib rule **must** have corresponding test coverage (`@PositiveRuleSample` / `@NegativeRuleSample`) + +## Testing Rules + +Rules are tested via annotated Java code samples in `rules/test/`: + +```java +@PositiveRuleSample(ruleId = "sql-injection") +public void vulnerable(HttpServletRequest req) { + String input = req.getParameter("id"); + db.execute("SELECT * FROM users WHERE id = " + input); +} + +@NegativeRuleSample(ruleId = "sql-injection") +public void safe(HttpServletRequest req) { + String input = req.getParameter("id"); + db.execute("SELECT * FROM users WHERE id = ?", input); +} +``` + +The `checkRulesCoverage` Gradle task enforces that all enabled, non-library rules have test samples. diff --git a/agent-mode/mismatch.md b/agent-mode/mismatch.md new file mode 100644 index 000000000..572318a92 --- /dev/null +++ b/agent-mode/mismatch.md @@ -0,0 +1,358 @@ +# Agent Mode — Design vs Implementation Mismatches + +Scope of review: +- Design: `agent-mode/design/agent-mode-design.md` +- Agent prompt: `agent/meta-prompt.md` +- Agent skills: `agent/skills/*.md` +- Go CLI: `cli/cmd/*.go` +- Core analyzer CLI: `core/src/main/kotlin/org/opentaint/jvm/sast/runner/{AbstractAnalyzerRunner,ProjectAnalyzerRunner}.kt` + +The classification below splits findings into: +- **CLI ↔ Core mismatches**: Go CLI passes a wrong flag name, wrong semantics, or wrong value to the Kotlin analyzer. +- **Skill ↔ CLI mismatches**: The skill / meta-prompt tells the agent to use a CLI surface that does not exist or works differently. +- **Skill ↔ Design mismatches**: The design promises a behavior that the skill contradicts (even if the skill happens to match the implementation). + +Severities: +- **BLOCKER** — user command fails (non-zero exit) or produces no output. +- **MAJOR** — produces incorrect behavior or wrong output path. +- **MINOR** — misleading documentation; commands still work. + +--- + +## 1. CLI ↔ Core analyzer mismatches + +### 1.1 BLOCKER — `--external-methods-output` flag does not exist on the analyzer + +**Go CLI** (`cli/cmd/command_builder.go:255`): +```go +if a.externalMethodsOutput != "" { + flags = append(flags, "--external-methods-output", a.externalMethodsOutput) +} +``` + +**Kotlin analyzer** (`core/src/main/kotlin/org/opentaint/jvm/sast/runner/ProjectAnalyzerRunner.kt:51`): +```kotlin +private val trackExternalMethods: Boolean by option(help = "Track external methods, produce external methods YAML lists") + .flag() +``` + +The analyzer exposes a **boolean flag** (`--track-external-methods`), not a path option. The +Go CLI sends `--external-methods-output `, which Clikt will reject because no such +option is declared. + +Additionally, the output **location is not configurable** in the analyzer — it always writes +into the analyzer output directory (`ProjectAnalyzer.writeExternalMethodsYaml`, lines 222–237): + +```kotlin +val withoutRulesPath = resultDir / "external-methods-without-rules.yaml" +val withRulesPath = resultDir / "external-methods-with-rules.yaml" +``` + +**Consequence**: the Go CLI flag `--external-methods` is broken end-to-end. Passing it fails +the scan (unknown option). Even if the flag name were fixed, the user-supplied base path +would be ignored. + +**Fix options**: +- Change the Go CLI to pass `--track-external-methods` when `ExternalMethodsOutput != ""`, and + surface the files from the analyzer output dir (`/external-methods-{without,with}-rules.yaml`); +- Or extend the analyzer to accept `--external-methods-output ` (matching the design). + +> Historical note: `core/bin/main/.../ProjectAnalyzerRunner.kt:50–51` did declare +> `--external-methods-output` as `Path? by option(...).newFile()`. The current +> source has replaced it with a boolean `trackExternalMethods`. The Go CLI still +> targets the old contract. + +### 1.2 MAJOR — Design / CLI disagreement on `--approximations-config` cardinality + +**Design (1.2)**: +> **Kotlin CLI**: Rename `--config` to `--approximations-config`. + +Design implies a **single** config path (consistent with pre-existing `customConfig: Path?`). + +**Kotlin analyzer** (current source, `ProjectAnalyzerRunner.kt:37`): +```kotlin +private val approximationsConfig: List by option(help = "...") + .file() + .multiple() +``` + +So the real core API is now **repeatable** (`List`), but: + +**Go CLI** (`cli/cmd/scan.go:35`): +```go +ApproximationsConfig string +... +scanCmd.Flags().StringVar(&ApproximationsConfig, "approximations-config", "", "...") +``` + +and builder (`command_builder.go:60, 246–248`): +```go +approximationsConfig string +... +if a.approximationsConfig != "" { + flags = append(flags, "--approximations-config", a.approximationsConfig) +} +``` + +The Go CLI exposes a **single-valued** flag and passes at most one occurrence. + +**Consequence**: agents relying on "OVERRIDE mode" semantics documented in the design can +only ever supply a single file. If the analyzer expects multiple (it accepts `.multiple()`) +there is no way to supply them through the Go CLI. + +**Fix options**: make the Go CLI flag repeatable (`StringArrayVar`) and proxy every value, +or revert the Kotlin option to `Path?` and update the design to make the single-path +contract explicit. + +### 1.3 MINOR — `--dataflow-approximations` accepts different path kinds + +**Design (1.3/1.4)** and all skills: `--dataflow-approximations ` — "Directory of compiled +approximation class files" (or sources which the Go CLI auto-compiles). + +**Kotlin analyzer** current source (`ProjectAnalyzerRunner.kt:54`): +```kotlin +private val dataflowApproximations: List by option(help = "Directory of compiled approximation class files") + .directory() + .multiple() +``` + +The `core/bin/...` copy uses `.path()` instead of `.directory()`. Not a behavioural +mismatch between the Go CLI and the analyzer (the CLI does compile sources to a directory +and passes the directory path), but there is an inconsistency between the compiled artifact +and the source, which can silently bite integrators using the `bin` classpath. + +--- + +## 2. Skill / meta-prompt ↔ Go CLI mismatches + +### 2.1 BLOCKER — `opentaint rules-path` does not exist + +**Design (1.8, 2.1)** and `agent/skills/create-rule.md:15` prescribe: +```bash +RULES_DIR=$(opentaint rules-path) +``` + +**Meta prompt** (`agent/meta-prompt.md:24`): +``` +1. **Check built-in rules** -- read rules in `$(opentaint agent rules-path)` +``` + +**Actual CLI** (`cli/cmd/agent_rules_path.go`): the command is registered under +the `agent` command group, i.e. `opentaint agent rules-path`, not `opentaint rules-path`. + +- The meta-prompt uses the correct form (`opentaint agent rules-path`). +- The `create-rule.md` skill uses the **wrong** form (`opentaint rules-path`), + matching the design document verbatim. + +**Fix**: change `create-rule.md:15` to `opentaint agent rules-path`. + +### 2.2 BLOCKER — `opentaint test-rules` does not exist as a top-level command + +**Design (1.5, 2.1)** and all design examples: +```bash +opentaint test-rules --ruleset ... -o ... +``` + +**Actual CLI** (`cli/cmd/agent_test_rules.go:24`): the command is registered under the +`agent` group: +```go +agentCmd.AddCommand(agentTestRulesCmd) +``` +Real invocation is `opentaint agent test-rules ...`. + +- The `agent/skills/test-rule.md:64` uses the correct form (`opentaint agent test-rules`). +- The design document and every `run-analysis`/phased example in the design file use the + incorrect top-level form. + +### 2.3 BLOCKER — `opentaint init-test-project` does not exist as a top-level command + +Same pattern as 2.2. Design says `opentaint init-test-project `; the implementation +registers it as `opentaint agent init-test-project` (`cli/cmd/agent_init_test_project.go:67`). + +- `agent/skills/test-rule.md:16` uses the correct `opentaint agent init-test-project`. +- Design document uses `opentaint init-test-project` in Appendix A and §2.1 / §3.4. + +### 2.4 MAJOR — `opentaint agent test-rules` argument is a directory, not `project.yaml` + +**Design (1.5, 2.1)** and **skill `run-analysis.md`** repeatedly say: +```bash +opentaint test-rules ./agent-test-compiled/project.yaml --ruleset ... -o ... +``` + +**Actual CLI** (`cli/cmd/agent_test_rules.go:37-42`): +```go +projectPath := log.AbsPathOrExit(args[0], "project-model") +nativeProjectPath := filepath.Join(projectPath, "project.yaml") + +if _, err := os.Stat(nativeProjectPath); os.IsNotExist(err) { + out.Fatalf("Project model not found: %s", nativeProjectPath) +} +``` + +The CLI joins the argument with `project.yaml` and then stats it. If the user passes +`./agent-test-compiled/project.yaml`, the CLI stats `./agent-test-compiled/project.yaml/project.yaml` +and aborts. + +Skill `test-rule.md:64-66` has the **correct** form (passes a directory). Design file +and the `analyze-findings` narrative in the design have the wrong form. + +### 2.5 MAJOR — `opentaint scan` argument is **not** the directory containing `project.yaml` only + +**Meta prompt** (`agent/meta-prompt.md:36`) and skills `run-analysis.md`, +`create-yaml-config.md`, `debug-rule-reachability.md`, `create-rule.md`, +`create-approximation.md` pass `./opentaint-project` (a directory) to `scan`. +`agent/skills/run-analysis.md:78` even states this as a "Note": +> The scan path is the **directory** containing `project.yaml`, not the path to `project.yaml` itself + +**Actual CLI** (`cli/cmd/scan.go:158-167`): +- Checks `validation.ValidateSourceProject(absUserProjectRoot)` against source-project markers + (`pom.xml`, `build.gradle*`, `mvnw`, `gradlew`, `.mvn`). A directory that contains only + `project.yaml` and compiled classes has **none** of these markers. +- When validation fails it then tests `validation.IsProjectModel(absUserProjectRoot)` and, if + true, **aborts with a suggestion** to use `--project-model`, exit code 1. + +So `opentaint scan ./opentaint-project` (directory with `project.yaml`) **does not scan**; +it prints a suggestion and exits. The correct invocation is either `opentaint scan` on the +source directory (for compile+scan) or `opentaint scan --project-model ./opentaint-project`. + +The design file is particularly bad about this — e.g. §3.5: +```bash +opentaint scan ./opentaint-project/project.yaml -o ./results/report.sarif ... +``` +This passes a file to a command expecting a directory and fails validation in a different +way. + +**Fix**: update every skill and design snippet to use +```bash +opentaint scan --project-model ./opentaint-project ... +``` +(or pass the source directory if a fresh compile is desired). + +### 2.6 MAJOR — `agent/skills/run-analysis.md` claims `--external-methods` produces two files whose base path is user-configurable + +`run-analysis.md:59-60`: +> The `--external-methods` flag specifies the **base path**. The analyzer derives two +> filenames by appending `-without-rules` and `-with-rules` before the `.yaml` extension. + +Both statements are wrong against the current analyzer: +1. Per 1.1 above, the analyzer does not accept a path at all — only a boolean + `--track-external-methods`. The Go CLI itself currently passes an unsupported + `--external-methods-output ` flag, so `--external-methods` in the Go CLI never + actually drives the output path. +2. Output file names are hard-coded (`external-methods-{without,with}-rules.yaml`), + written into the analyzer `resultDir`, not to a user-supplied base path. + +The meta-prompt (`agent/meta-prompt.md:40,81`) and `analyze-findings.md:40-42` repeat the +"two files" expectation; the files do exist but at the fixed location above. + +### 2.7 MINOR — `--rule-id` argument format + +**Implementation** (`ruleIdAllow` in `SemgrepRuleLoader.kt:493-494`) compares the +`--semgrep-rule-id` value against `rule.info.ruleId`, which is built by +`SemgrepRuleUtils.getRuleId(ruleSetName, id)` as `"$ruleSetName:$id"` where `ruleSetName` +is the rule file path relative to the ruleset root (e.g. `java/security/my-vuln.yaml`). + +Skills `run-analysis.md`, `create-rule.md`, `debug-rule-reachability.md`, and `test-rule.md` +all correctly document `--rule-id java/security/my-vuln.yaml:my-vulnerability`. + +**Design file is wrong** — §1.6 and §3.3 examples give: +```bash +--rule-id my-vulnerability +``` +and state "No need to list [refs] in `--rule-id`". With the implementation, the plain short +ID does not match any rule (`ruleIdAllow` will drop every rule), yielding zero findings. + +The design's claim that referenced library rules are auto-included when a join rule is in +the filter is **not** visible in the current `SemgrepRuleLoader.loadRules`: the single filter +check is `ruleIdAllow(this, ruleIdFilter)` applied to every rule independently. If a library +rule's full ID is not in the filter, it is skipped (library rules are also skipped by +`info.isLibraryRule` anyway, regardless of refs). + +The meta-prompt (`agent/meta-prompt.md:39`) writes `--rule-id ` without +specifying the format, which is less wrong but still misleading for an agent; the explicit +full-ID examples in the skills are correct. + +### 2.8 MINOR — `create-rule.md` duplicates the design's wrong `RULES_DIR=$(opentaint rules-path)` + +See 2.1 — `create-rule.md:15` needs the `agent` prefix. + +--- + +## 3. Skill ↔ Design mismatches + +### 3.1 Rule filter semantics re-refs + +**Design (1.6)**: "Library rules (`options.lib: true`) referenced by active rules via `refs` +are automatically included — they don't need to be listed explicitly." and §3.3. + +**Implementation** (`SemgrepRuleLoader.loadRules` and `ruleIdAllow`, lines 105-107, 493-494): +A rule is kept iff it is not disabled, not a library rule, passes severity, **and** passes +the `ruleIdFilter`. Library rules are always skipped (`info.isLibraryRule` skip), so the +"auto-include" only works to the extent that join rules physically carry their refs' +patterns internally. There is no code path that adds library rule IDs to the filter or +treats them as implicitly active via a join rule's `refs`. + +This is a **design ↔ implementation** mismatch; it also explains why skills that repeat the +design's claim (`create-rule.md:127`, `meta-prompt.md:99`) are misleading. + +### 3.2 `--approximations-config` OVERRIDE mode and scope + +**Design (1.2, §3.7)** says the custom config **overrides** the default config and is used +**exclusively for passThrough**, because the analyzer "currently cannot use sanitizers from +the config". + +**Implementation** (`ProjectAnalyzer.approximationConfigCombinationOptions`, lines 246-252): +```kotlin +private val approximationConfigCombinationOptions = CombinationOptions( + entryPoint = CombinationMode.IGNORE, + source = CombinationMode.IGNORE, + sink = CombinationMode.IGNORE, + cleaner = CombinationMode.IGNORE, + passThrough = CombinationMode.OVERRIDE, +) +``` + +Skills match the design (OVERRIDE, passThrough only), so no mismatch between skills and +implementation **for this category**. The design note in §3.7 says *"cleaner ignored"* but +describes it as `conditions` in §3.7 enumeration which includes `cleaner`-ish constructs +implicitly via custom configurations — that part is consistent. + +### 3.3 `agent-approximations/` directory layout + +- **Design § Working Directory Layout** says `agent-approximations/src/` contains Java + sources that are auto-compiled by the CLI. +- **Meta prompt §Working Directory Layout** says `agent-approximations/classes/` (compiled + classes). +- **Skill `create-approximation.md`** uses `agent-approximations/src/` for sources and + `agent-approximations/classes/` for compiled output (compile manually with `javac`). +- **Go CLI** (`cli/cmd/compile_approximations.go`) auto-compiles `.java` files found in the + given `--dataflow-approximations` directory (aligning with design). + +The documentation is internally inconsistent: meta-prompt's layout omits the `src/` +directory and assumes the agent compiles manually, while design/skill direct the agent to +let the CLI auto-compile. Pick one convention; currently the agent may do both, depending +on which file it reads. + +### 3.4 `agent-approximations/classes` vs `src` (skill text) + +`agent/skills/create-approximation.md:52-56` tells the agent to compile manually and pass +`./agent-approximations/classes`, even though the CLI auto-compiles `.java` files. This is +not wrong (CLI accepts `.class` directories unchanged) but contradicts the design's +"one command" story and makes it awkward for agents that wrote only `.java` sources in +`src/`. + +--- + +## 4. Summary of concrete fixes required + +| # | Severity | File(s) | Change | +|---|---|---|---| +| 1 | BLOCKER | `cli/cmd/command_builder.go` | Replace `--external-methods-output ` with boolean `--track-external-methods` **or** add the corresponding option to `ProjectAnalyzerRunner.kt`. Align `ExternalMethodsOutput` semantics with whichever direction is chosen. | +| 2 | BLOCKER | `agent-mode/design/agent-mode-design.md`, `agent/skills/create-rule.md:15` | Use `opentaint agent rules-path`, `opentaint agent test-rules`, `opentaint agent init-test-project` consistently. | +| 3 | MAJOR | design doc, `agent/skills/run-analysis.md`, `agent/skills/create-rule.md`, `agent/skills/create-approximation.md`, `agent/skills/debug-rule-reachability.md`, `agent/skills/create-yaml-config.md`, `agent/meta-prompt.md` | When passing a pre-compiled model, use `opentaint scan --project-model ./opentaint-project ...`. Never pass `./opentaint-project` or `./opentaint-project/project.yaml` as the positional argument. | +| 4 | MAJOR | design doc | For `opentaint agent test-rules`, pass the **directory** (e.g. `./agent-test-compiled`), not `project.yaml`. | +| 5 | MAJOR | `cli/cmd/scan.go`, `cli/cmd/command_builder.go` | Decide single vs multiple `--approximations-config`; align Go CLI and Kotlin analyzer to one cardinality. | +| 6 | MAJOR | `agent/skills/run-analysis.md`, `agent/meta-prompt.md` | Document that the external-methods YAML files are emitted in the analyzer output directory with fixed names `external-methods-{without,with}-rules.yaml`, and that `--external-methods ` has no effect on the output location. | +| 7 | MINOR | `agent-mode/design/agent-mode-design.md` | Replace `--rule-id my-vulnerability` with `--rule-id java/security/my-vuln.yaml:my-vulnerability` everywhere, and remove the "refs are auto-included" claim unless `SemgrepRuleLoader` is updated to implement it. | +| 8 | MINOR | `agent/meta-prompt.md`, `agent/skills/create-approximation.md` | Standardise on `agent-approximations/src/` (auto-compile) and remove `classes/` references or vice versa. | +| 9 | MINOR | `core/bin/main/org/opentaint/jvm/sast/runner/ProjectAnalyzerRunner.kt` | Rebuild — the binary copy is out of sync with `core/src` on several option declarations (see diff in §1.1, §1.3). | diff --git a/agent-mode/mitigation-plan.md b/agent-mode/mitigation-plan.md new file mode 100644 index 000000000..9a2464ce2 --- /dev/null +++ b/agent-mode/mitigation-plan.md @@ -0,0 +1,407 @@ +# Agent Mode — Mismatch Mitigation Plan + +## Priority Rules + +1. **Core analyzer API is frozen.** Its current surface (`AbstractAnalyzerRunner` + + `ProjectAnalyzerRunner`) is the source of truth. We do **not** add, rename, or change + the semantics of any Kotlin option. +2. **CLI follows Core.** Any Go CLI flag that does not correctly map onto a Core option is + changed until it does. +3. **Skills and design docs follow CLI.** Anything still documented incorrectly is rewritten + in skills / meta-prompt / design. + +Every fix below is locked to this hierarchy. + +--- + +## 1. Frozen Core API (reference) + +From `core/src/main/kotlin/org/opentaint/jvm/sast/runner/ProjectAnalyzerRunner.kt` +(+ `AbstractAnalyzerRunner.kt`). These are the options the CLI is allowed to use: + +| Kotlin name | CLI flag (Clikt-derived) | Kind | Notes | +|---|---|---|---| +| `approximationsConfig` | `--approximations-config` | `List` (repeatable) | Custom YAML passThrough; OVERRIDE mode. | +| `semgrepRuleSet` | `--semgrep-rule-set` | `List` | Ruleset roots. | +| `semgrepRuleSeverity` | `--semgrep-rule-severity` | `List` | | +| `semgrepRuleId` | `--semgrep-rule-id` | `List` | Full ID `.yaml:`. | +| `trackExternalMethods` | `--track-external-methods` | Boolean flag | Writes fixed-name YAMLs into `outputDir`. | +| `dataflowApproximations` | `--dataflow-approximations` | `List` (directories) | Compiled class dirs. | +| `semgrepRuleLoadTrace` | `--semgrep-rule-load-trace` | `Path?` | | +| `sarifFileName`, `sarifCodeFlowLimit`, `sarifSemgrepStyleId`, `sarifToolVersion`, `sarifToolSemanticVersion`, `sarifGenerateFingerprint`, `sarifUriBase` | corresponding `--sarif-*` flags | … | | +| `debugFactReachabilitySarif` | `--debug-fact-reachability-sarif` | Flag | Output: `outputDir/debug-ifds-fact-reachability.sarif`. | +| `debugRunRuleTests` | `--debug-run-rule-tests` | Flag | Output: `outputDir/test-result.json`. | +| `--project`, `--output-dir`, `--project-kind`, `--ifds-analysis-timeout`, `--ifds-ap-mode`, `--verbosity`, `--logs-file` | inherited | | | + +**Hard consequences** locked in by this surface: +- External-methods output: fixed filenames (`external-methods-{without,with}-rules.yaml`) in + `outputDir`. Users cannot choose a path. +- `--approximations-config` is repeatable. +- No "refs auto-include" for `--semgrep-rule-id`: filtering is purely + `rule.info.ruleId in filter` (`SemgrepRuleLoader.kt:493`). + +Anything in CLI / skills / design that contradicts the table above must yield. + +--- + +## 2. CLI changes (to match Core) + +### 2.1 External methods — decision: boolean `--track-external-methods` + +**File**: `cli/cmd/scan.go`, `cli/cmd/command_builder.go`. + +We considered two CLI shapes: + +| Option | UX | Code | Failure modes | +|---|---|---|---| +| **A. Boolean** `--track-external-methods` | Files always in `/external-methods-{without,with}-rules.yaml` (next to SARIF). | 1:1 with Core; emitter is three lines. | None introduced. | +| B. String `--external-methods ` with post-scan rename | User picks base path; CLI renames/copies after the analyzer exits. | Extra I/O code; partial-failure semantics if analyzer crashes mid-write or user path is on a different volume; must handle two files atomically. | CLI must also decide whether to leave the originals; rename logic has to run even when analyzer returns non-zero (for partial output). | + +**Decision: Option A — boolean `--track-external-methods`.** It matches the frozen Core API 1:1 (Priority 1 rule), removes a class of failure modes, and the agent workflow doesn't need a custom base path — it already knows the output directory because it controls `-o`. + +Concrete changes: + +- Remove from `cli/cmd/command_builder.go`: the `externalMethodsOutput string` field, the `SetExternalMethodsOutput(...)` method, and the `--external-methods-output` emitter (`command_builder.go:254-256`). Add `trackExternalMethods bool` + `SetTrackExternalMethods(bool)`; emit only `"--track-external-methods"` (no value). +- Remove from `cli/cmd/scan.go`: the `ExternalMethodsOutput string` var, the `--external-methods` flag registration, and the absolute-path resolution block that calls `SetExternalMethodsOutput(...)`. Add: + ```go + var TrackExternalMethods bool + scanCmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, + "Write external-methods-{without,with}-rules.yaml next to the SARIF report") + ... + if TrackExternalMethods { + nativeBuilder.SetTrackExternalMethods(true) + } + ``` +- Document the fixed output location in the flag help text and in the updated skill (`run-analysis.md`): `/external-methods-{without,with}-rules.yaml`. + +### 2.2 `--approximations-config` — make repeatable + +**File**: `cli/cmd/scan.go`, `cli/cmd/command_builder.go`. + +Core takes `List`; CLI currently takes `string`. Promote to `[]string`: + +```go +var ApproximationsConfig []string +scanCmd.Flags().StringArrayVar(&ApproximationsConfig, "approximations-config", nil, + "YAML passThrough approximations config (OVERRIDE mode, repeatable)") +``` + +Builder gains `AddApproximationsConfig(path string)`; the emitter loops and appends +`--approximations-config

` per entry. Same treatment for the absolute-path resolution +loop in `scan.go`. + +### 2.3 Verify `--dataflow-approximations` stays compatible + +The current `directory()` typing in Core rejects files. The CLI already produces a +directory (auto-compile branch in `compile_approximations.go`). Keep as-is; add a +defensive check in the CLI that `info.IsDir()` before proxying (we do already). +No changes needed. + +### 2.4 Other flags reviewed — keep as-is + +- `--rule-id` → `--semgrep-rule-id` proxy: correct. +- `--ruleset` → `--semgrep-rule-set`: correct. +- `--severity` → `--semgrep-rule-severity`: correct. +- `--timeout` → `--ifds-analysis-timeout`: correct. +- `--debug-fact-reachability-sarif`: correct. +- `--debug-run-rule-tests` (used via `EnableRunRuleTests`): correct. + +### 2.5 `opentaint agent test-rules` — clarify argument + +The CLI already requires a project-model **directory** (it joins `project.yaml`). No code +change; just ensure the `Short`/`Long` help strings are unambiguous: + +``` +Usage: opentaint agent test-rules +``` + +(`project-model` is already used but is easy to misread as a file.) + +### 2.6 `opentaint scan` with compiled model — keep `--project-model` + +No CLI change required. Validation already tells users the right command via `suggest(...)`. +All misuse is documentation-side; fixed in §3. + +### 2.7 Sync `core/bin/main/...` to `core/src/main/...` + +Clean-rebuild the `core/bin` tree (or delete it if it's a stale IDE artefact). It diverges +from `core/src` (still declares the old `--external-methods-output`, the `--config` alias +on `approximationsConfig`, etc.), which will confuse anyone who runs the binary copy. + +Action: add a pre-commit / CI check or just `./gradlew clean build` + commit the result; if +`core/bin` is in `.gitignore`, drop the stale directory from the workspace. + +--- + +## 3. Skill & design changes (to match CLI) + +For each file, exhaustive edits: + +### 3.1 `agent/meta-prompt.md` + +- Line 36 example `opentaint scan ./opentaint-project \...` → replace with + `opentaint scan --project-model ./opentaint-project \...`. +- Line 40 `--external-methods ./results/external-methods.yaml` → replace with + `--track-external-methods` and add note that files are emitted next to the SARIF in + the analyzer output directory. +- Line 42 "Collect `report.sarif`, `external-methods-without-rules.yaml`..." → update paths + to `/external-methods-{without,with}-rules.yaml`. +- Line 39 `--rule-id ` → add a note that IDs are `.yaml:`. +- Layout block (§Working Directory Layout): drop `agent-approximations/classes/`; use + `agent-approximations/src/` only (the CLI auto-compiles). +- Line 99 remove the "library rules auto-included via join-mode refs" claim; replace with: + "every rule ID you want active (including library rules referenced by join rules) must be + listed explicitly in `--rule-id`". + +### 3.2 `agent/skills/run-analysis.md` + +- Every `opentaint scan ./opentaint-project \` → `opentaint scan --project-model ./opentaint-project \`. +- Replace all `--external-methods ./results/external-methods.yaml` with + `--track-external-methods`. +- Rewrite the "base path" note: the analyzer writes + `/external-methods-{without,with}-rules.yaml`, where `` is the + directory that contains the SARIF file. The user does **not** choose filenames. +- Update the "Outputs" section accordingly. +- "Key Flags" table: replace `--external-methods` row with `--track-external-methods`; + mark `--approximations-config` as repeatable. + +### 3.3 `agent/skills/create-rule.md` + +- Line 15 `RULES_DIR=$(opentaint rules-path)` → `RULES_DIR=$(opentaint agent rules-path)`. +- §6 example: `opentaint scan ./opentaint-project \...` → `opentaint scan --project-model ./opentaint-project \...`. +- Keep the `--rule-id .yaml:` guidance (already correct). + +### 3.4 `agent/skills/create-approximation.md` + +- Remove the manual `javac` block (§2). The Go CLI auto-compiles `.java` from the + `--dataflow-approximations` directory. +- Replace §3 example `opentaint scan ./opentaint-project` with + `opentaint scan --project-model ./opentaint-project`. +- Change `--dataflow-approximations ./agent-approximations/classes` to + `--dataflow-approximations ./agent-approximations/src` (CLI handles compilation). + +### 3.5 `agent/skills/create-yaml-config.md` + +- Replace `opentaint scan ./opentaint-project` with + `opentaint scan --project-model ./opentaint-project`. + +### 3.6 `agent/skills/debug-rule-reachability.md` + +- Same `--project-model` replacement. +- Keep the single `--rule-id` warning. + +### 3.7 `agent/skills/test-rule.md` + +- Already uses `opentaint agent test-rules` and `opentaint agent init-test-project`. No + change needed beyond one pass to verify the project-model argument is a directory + (already is). + +### 3.8 `agent/skills/analyze-findings.md` + +- Under §3 ("Process external methods"), update to fixed filenames in the SARIF output + directory; remove the "base path" wording. + +### 3.9 `agent/skills/build-project.md`, `discover-entry-points.md`, `generate-poc.md` + +- No CLI usages to fix (these are about source reading and documentation). +- Spot-check: `discover-entry-points.md` mentions + `--debug-run-analysis-on-selected-entry-points`; verify it is still in + `AbstractAnalyzerRunner.kt` (it is, line 45). Keep. + +### 3.10 `agent-mode/design/agent-mode-design.md` + +This is the biggest offender; edit inline, do **not** re-open the design question: + +- §1.1 "Kotlin CLI flag: `--external-methods-output `" → replace with + "Kotlin CLI flag: `--track-external-methods` (boolean). Output filenames are fixed + (`external-methods-{without,with}-rules.yaml` in `--output-dir`)." +- §1.1 "Go CLI flag: `--external-methods `" → "Go CLI flag: + `--track-external-methods`." +- §1.2 Kotlin CLI rename note: keep but mark `approximationsConfig` as `List`. +- §1.6 rewrite: remove "referenced library rules are automatically included"; state + explicitly: "Each rule whose ID is not in the filter is dropped, including library rules + referenced via `refs`. Callers must list every rule they want active." +- §2.1 Complete Command Reference: + - Every `opentaint scan ./opentaint-project/project.yaml` → either `opentaint scan ` + (for compile+scan) or `opentaint scan --project-model ./opentaint-project`. + - `opentaint test-rules` → `opentaint agent test-rules`, and argument is a directory. + - `opentaint rules-path` → `opentaint agent rules-path`. + - `opentaint init-test-project` → `opentaint agent init-test-project`. + - `--external-methods ` row → `--track-external-methods`. + - `--rule-id my-vulnerability` → `--rule-id java/security/my-vuln.yaml:my-vulnerability`. +- §2.2 Command Builder mapping: + - `--external-methods ` → `--external-methods-output ` row → delete; replace + with `--track-external-methods` → `--track-external-methods`. +- §3.3 / §3.5 / §3.7 / §3.8 / §3.9 examples: propagate the same three changes + (`--project-model`, `--track-external-methods`, full `--rule-id`). +- Appendix A: `opentaint init-test-project` → `opentaint agent init-test-project`; + `opentaint test-rules ./agent-test-compiled/project.yaml` → + `opentaint agent test-rules ./agent-test-compiled`. +- Appendix C: keep (output format itself is correct, only the path control prose in body is + wrong). + +--- + +## 4. Execution order + +Pick the order to minimise churn re-testing: + +1. **CLI code changes** (§2.1, §2.2, §2.5 help text, §2.7 bin cleanup). + After this step, `opentaint scan --track-external-methods` and repeatable + `--approximations-config` work end-to-end against the unchanged Core. +2. **Test suite sync** (§4.5). Update `conftest.py` + the five `test_*.py` files + so the CLI changes can be validated. Running the suite after step 2 is the + primary regression gate for the whole mitigation. +3. **Skill updates** (§3.1–§3.9). These only touch `.md`; do in one pass with a single + `edit_file` per skill, guided by this plan. +4. **Design doc rewrite** (§3.10). Largest text change; do last so the design reflects + settled CLI + skill wording. +5. **Verification pass** — for each updated file, grep for the banned tokens and fail the + build if any survive: + ``` + opentaint rules-path -> must be absent (except after "agent ") + opentaint test-rules -> must be absent + opentaint init-test-project -> must be absent + --external-methods -> must be absent (replaced by --track-external-methods) + --external-methods-output -> must be absent (Core doesn't have it) + opentaint scan ./opentaint-project -> must be absent + scan .*project\.yaml -> must be absent + --rule-id my-vulnerability -> must be absent (must be full ID) + ``` + Add this grep as a `scripts/check-docs.sh` and run it in CI. + +## 4.5 `agent-mode/test/` — verification suite fixes + +The pytest suite under `agent-mode/test/` is currently wired to the old/wrong +CLI surface. After Core freezes and CLI is corrected, every test file needs targeted +edits; otherwise **no test can pass** against the new CLI (scan rejects the model path, +`--external-methods` no longer exists, etc.). Changes below are keyed to the decisions +made in §1–§3. + +### 4.5.1 `agent-mode/test/conftest.py` + +- `OpenTaintCLI.scan(...)` (lines ~190–230): + - Signature: replace `external_methods: Optional[str] = None` with + `track_external_methods: bool = False`. + - Body: when the incoming `project_path` points at a pre-compiled model (directory + containing `project.yaml` or the file itself), pass `--project-model

` instead of + a positional argument. Pseudocode: + ```python + p = Path(project_path) + if p.name == "project.yaml" and p.is_file(): + p = p.parent + if (p / "project.yaml").is_file(): + cmd = self._base_cmd() + ["scan", "-o", output, "--project-model", str(p)] + else: + cmd = self._base_cmd() + ["scan", str(p), "-o", output] + ``` + - Flag emission: drop `--external-methods`; when `track_external_methods=True`, append + `--track-external-methods` (no value). +- `OpenTaintCLI.test_rules(...)` (lines ~230–245): the current code already passes a + directory — just verify the swap from `project.yaml` file to parent directory stays + (it does). No semantic change required; keep as-is. +- Helper `_derive_external_methods_paths(base_path)` and `load_external_methods(base_path)` + (lines ~335–380): switch them to take the **SARIF path** (or its parent directory) and + return the two fixed filenames in that directory: + ```python + def _derive_external_methods_paths(sarif_path: Path) -> tuple[Path, Path]: + parent = sarif_path.parent + return ( + parent / "external-methods-without-rules.yaml", + parent / "external-methods-with-rules.yaml", + ) + + def load_external_methods(sarif_path: Path) -> dict: ... + def external_methods_exist(sarif_path: Path) -> bool: ... + ``` +- No other helper changes needed; `count_external_methods`, `sarif_*`, and timing helpers + are agnostic to CLI wiring. + +### 4.5.2 `agent-mode/test/test_external_methods.py` + +Every test in this file threads the `ext_methods_path = tmp_output / "external-methods.yaml"` +variable through `external_methods=str(ext_methods_path)` and then loads the pair via +`load_external_methods(ext_methods_path)`. After the CLI change: + +- Drop the `ext_methods_path` computation entirely. +- Call `cli.scan(..., track_external_methods=True)`. +- Pass the SARIF path to `load_external_methods` / `external_methods_exist` (they are the + fixed files next to SARIF). + +All three test classes (`TestExternalMethodsBasic`, `TestExternalMethodsContent`, +`TestExternalMethodsWithApproximations`, `TestExternalMethodsAlongsideSarif`) are updated +the same way; the `run1`/`run2` subdirectory pattern in +`test_approximations_reduce_without_rules` is kept so the two runs don't collide. + +### 4.5.3 `agent-mode/test/test_full_loop.py` + +- `test_full_agent_loop` sets `ext_methods_path = ws["results"] / "external-methods-1.yaml"` + and `-2.yaml`, passes them to `cli.scan(external_methods=...)`, and inspects them. +- The two scan runs share `ws["results"]`, which means the fixed filenames would + collide. Fix: give each scan its own subdir (`ws["results"] / "run-1"` and `run-2`), + and write SARIF into that subdir. Then `load_external_methods(sarif_path)` picks up + the two files next to it. +- Replace `external_methods=str(...)` with `track_external_methods=True`. +- Replace every `cli.scan(project_path=str(stirling_project), ...)` call's argument + handling via conftest (no per-test change needed once conftest uses `--project-model`). + +### 4.5.4 `agent-mode/test/test_approximations.py` + +- No `external_methods=` usage — only scan-with-approx. The fix is entirely indirect via + conftest (project-model routing). The test `test_approximations_change_results` keeps its + separate `run1` / `run2` output dirs; no collision. +- `test_approximations_config_with_custom_ruleset` currently passes a single + `approximations_config` value; this continues to work since CLI accepts the flag once. + If we also promote `cli.scan`'s signature to accept a list (optional), add a follow-up. +- Verify `test_approximation_compilation_failure` against the current + `compile_approximations.go` — keep assertions. + +### 4.5.5 `agent-mode/test/test_rules.py` + +- No `external_methods=` usage. Indirect fix via conftest (`--project-model`). +- `test_rules_path_command`: keep — already uses `cli.rules_path()` which is + `opentaint agent rules-path`. +- `test_init_test_project`, `test_rule_test_all_pass`, + `test_rule_test_detects_false_negative`: keep — already use + `cli.init_test_project` / `cli.test_rules` (both under `agent` subcommand). +- `test_rule_test_all_pass` still passes `project_path=str(compiled_dir / "project.yaml")` + to `cli.test_rules`. Conftest already strips `project.yaml` for that method; keep. + +### 4.5.6 `agent-mode/test/test_build.py` + +- No external-methods or rule-id references that need changing. +- `test_scan_nonexistent_project`, `test_scan_missing_output_flag`: keep as-is (exit-code + checks). +- All other tests pass `stirling_project` which resolves via conftest — will work once + conftest routes pre-compiled models through `--project-model`. + +### 4.5.7 Run/verify procedure + +After CLI and test edits: + +1. `cd cli && go build -o ./bin/opentaint .` +2. Build local JARs once (`cd core && ./gradlew build`) so the hidden `--analyzer-jar` / + `--autobuilder-jar` resolution finds them. +3. `cd agent-mode/test && pytest -m "not slow" -q` for the fast smoke set, then + `pytest -q` for the full (slow) set. +4. Expected outcomes: + - `test_external_methods.py::TestExternalMethodsBasic::test_scan_produces_external_methods_file` passes because the CLI now enables tracking and the helper looks at the fixed file names. + - `test_full_loop.py::test_full_agent_loop` passes after the per-run subdir split. + - `test_rules.py::test_rules_path_command` passes (already correct). + - `test_build.py::test_scan_with_builtin_rules` passes because conftest now uses + `--project-model`. +5. If a test still fails, inspect the CLI's stderr (captured in `CLIResult.stderr`) for + the real Clikt / analyzer error; do not reintroduce the old flag names. + +--- + +## 5. Out of scope / explicitly NOT changed + +- **Core option rename** (e.g. bringing `--external-methods-output` back). Design's §1.1 + lost out to the frozen-Core rule; the external-methods output path is not configurable. +- **"Auto-include library refs" in `SemgrepRuleLoader`**. Implementation requires + explicit IDs; design doc gets rewritten, not the loader. +- **`agent-approximations/classes/` as the canonical compiled directory**. The CLI + auto-compiles; we commit to `src/`. +- **Changing `opentaint agent ...` subcommands to top-level**. Current grouping stays; + docs follow the grouping. diff --git a/agent-mode/plan.md b/agent-mode/plan.md new file mode 100644 index 000000000..7f0075b62 --- /dev/null +++ b/agent-mode/plan.md @@ -0,0 +1,303 @@ +# Agent Mode — Implementation Progress + +Tracking document for the implementation of agent mode features. +Refer to `agent-mode/impl/agent-mode-impl.md` for the full design. + +--- + +## Phase A: Kotlin Analyzer Changes + +### A1: ExternalMethodTracker class — [x] +- New file: `ExternalMethodTracker.kt` +- Data classes: `ExternalMethodRecord`, `SkippedExternalMethods`, `ExternalMethodAggregation` +- Thread-safe via ConcurrentHashMap (same pattern as TaintSinkTracker) + +### A2: Wire tracker into analysis pipeline — [x] +- `TaintAnalysisContext.kt` — added `externalMethodTracker: ExternalMethodTracker?` +- `TaintAnalysisUnitRunnerManager.kt` — constructor param, pass through `TaintAnalysisManagerWithContext` +- `JIRTaintAnalyzer.kt` — constructor param, pass to engine, expose `getSkippedExternalMethods()` + +### A3: Report external methods from flow function — [x] +- `JIRMethodCallFlowFunction.kt` — report to tracker in `applyPassRulesOrCallSkip()` +- Uses `passThroughFacts.isSome` to determine `passRulesApplied` + +### A4: External methods output flag + YAML serialization — [x] +- `ProjectAnalysisOptions.kt` — `externalMethodsOutput: Path?` +- `ProjectAnalyzerRunner.kt` — `--external-methods-output` Clikt flag +- `ProjectAnalyzer.kt` — `@Serializable` data classes + kaml `encodeToStream` + +### A5: Rule ID filter — [x] +- `SemgrepRuleLoader.kt` — `ruleIdFilter` parameter, `ruleIdAllow()` in `skip()` +- `ProjectAnalysisOptions.kt` — `semgrepRuleId: List` +- `ProjectAnalyzerRunner.kt` — `--semgrep-rule-id` Clikt flag +- `LoadSemgrepRules.kt` — pass filter through + +### A6: Combined config+rules — [x] +- `ProjectAnalyzer.kt` — removed `check()`, added `SemgrepRulesWithCustomConfig` variant +- `ProjectAnalyzerRunner.kt` — renamed `--config` → `--approximations-config` (with `--config` alias) + +### A7: Custom dataflow approximations path — [x] +- `DataFlowApproximationLoader.kt` — `customApproximationPaths: List` in `Options` +- `ProjectAnalyzerRunner.kt` — `--dataflow-approximations` Clikt flag + +--- + +## Phase B: Go CLI Changes + +### B1: Hidden dev flags — [x] +- `root.go` — `--analyzer-jar`, `--autobuilder-jar` persistent hidden flags +- `global.go` — `JarPath` fields on `Analyzer`/`Autobuilder` structs + +### B2: AnalyzerBuilder extensions — [x] +- `command_builder.go` — new fields, setters, `BuildNativeCommand` entries + +### B3: New scan flags — [x] +- `scan.go` — `--rule-id`, `--approximations-config`, `--dataflow-approximations`, `--external-methods` + +### B4: Agent command group — [x] +- `agent.go`, `agent_skills.go`, `agent_prompt.go`, `agent_rules_path.go`, `agent_test_rules.go` +- `opentaint_home.go` — `GetBundledAgentPath()` +- `compile.go` — autobuilder jar override support + +--- + +## Phase C: Skills and Meta-Prompt + +### C1: Write skill files — [x] +- 9 skill files in `agent/skills/` + +### C2: Write meta-prompt — [x] +- `agent/meta-prompt.md` + +### C3-C4: Release pipeline changes — [ ] +- Bundle agent files + test-util JAR in release (deferred to release work) + +--- + +## Phase D: Validation + +### D1: Run existing tests — [x] +- 6 passed, 1 skipped (quick tests) +- Fixed conftest JAR resolution order + +### D2: Run slow tests — [x] +- 6 passed, 1 failed (autobuilder JAR not built locally — expected) +- Scan tests against Stirling-PDF all pass + +### D3: Run new_feature tests — [x] +- 1 passed (rules-path command) + +--- + +## Phase E: CLI Testing and Fixes + +### E1: Revert ruleIdAllow to match full rule ID only — [x] +- `SemgrepRuleLoader.kt` — removed `shortRuleId` fallback, keep only `info.ruleId` match +- Full rule ID format is `:`, e.g. `java/security/path-traversal.yaml:path-traversal` + +### E2: Skip external method tracking for static fact base — [x] +- `JIRMethodCallFlowFunction.kt` — added `startFactBase !is AccessPathBase.ClassStatic` guard +- External methods YAML reduced from ~10,643 to ~2,246 lines (no `` entries) + +### E3: Update skills with full rule ID format — [x] +- `agent/skills/create-rule.md` — documented full ID format `:`, how to discover IDs +- `agent/skills/run-analysis.md` — updated `--rule-id` examples with full IDs +- `agent/skills/test-rule.md` — clarified annotation `id` field vs full rule ID + +### E4: Rebuild analyzer and CLI, retest — [x] +- Rebuilt `projectAnalyzerJar` + Go CLI binary +- CLI scan with `--rule-id java/security/path-traversal.yaml:path-traversal` → 20 findings +- External methods output confirmed clean (0 `` entries) +- Updated test expectations: full rule IDs, fact position format (``, `arg(N)`, `ret`) +- All pytest tests pass (29 passed, 1 skipped, 5 pre-existing failures excluded) + +--- + +## Phase F: Test Infrastructure and Missing Features + +### F1: Refactor tests to use Go CLI only — [x] +- Removed dual-mode (Go CLI + direct JAR) from `conftest.py` +- All tests now require the Go CLI binary at `cli/bin/opentaint` (dev mode) +- Hidden `--analyzer-jar` / `--autobuilder-jar` flags auto-detected for local builds +- Removed `_find_java()`, `has_cli` branching, direct JAR invocation code paths +- Fixed CLI scan path: auto-strip `project.yaml` from file paths (CLI expects directory) + +### F2: `opentaint agent init-test-project` command — [x] +- New file: `cli/cmd/agent_init_test_project.go` +- Creates directory structure, copies test-util JAR, generates `build.gradle.kts` and `settings.gradle.kts` +- Supports `--dependency` flag for Maven coordinates +- Resolves test-util JAR from bundled, install, or dev build tiers +- `test_init_test_project` now passes (was previously skipping) + +### F3: Add timing instrumentation to all tests — [x] +- Added pytest hooks (`pytest_runtest_setup`/`pytest_runtest_teardown`) for per-test timing +- Added per-phase `time.time()` checkpoints to `test_full_agent_loop` +- All test output now includes `[timing]` lines with elapsed seconds + +### F4: Run all tests via CLI, write test report — [x] +- Full suite: 31 passed, 3 failed (all pre-existing), 0 skipped +- Report written to `agent-mode/test-status.md` +- Pre-existing failures: analyzer exit code 0 on approximation errors (2 tests), autobuilder JAR not built (1 test) + +--- + +## Phase G: Known Issues + +### G1: Fix sink rule ID mismatch in fixture rule and tests — [x] +- Fixed `#java-path-traversal-sink` → `#java-path-traversal-sinks` in fixture rule and inline test YAML +- Tests now produce 4 path-traversal findings on Stirling-PDF + +### G2: Fix `agent test-rules` Go command — missing flags and output — [x] +- Rewrote `agent_test_rules.go`: local flag vars for `--ruleset`, `-o`, `--timeout`, `--max-memory`, `--rule-id` +- Output dir uses `-o` flag (temp dir only as fallback); user rulesets passed to builder + +### G3: Strengthen test assertions — remove vacuous passes — [x] +- `test_rule_test_detects_false_negative`: added `assert result_json.exists()` + `test_result.assert_ok()` +- `test_scan_stirling_with_path_traversal_rule`: added `assert len(findings) > 0` +- `test_approximations_change_results`: added `assert count1 != count2` +- `test_full_agent_loop`: added `assert len(findings) > 0` +- Updated `sarif_findings_for_rule()` to match both exact and semgrep-style dot-separated IDs + +### G4: Analyzer exits non-zero on errors + auto-compile approximations — [x] +- `AbstractAnalyzerRunner.runProjectAnalysisRecursively()`: re-throw exceptions after logging +- `AbstractAnalyzerRunner.main()`: removed `return` on project load failure (let exception propagate) +- NEW: `cli/cmd/compile_approximations.go` — auto-compile `.java` files in `--dataflow-approximations` + - Resolves `javac` from managed JDK + - Extracts approximation utility classes from analyzer JAR (`opentaint-dataflow-approximations/` prefix) + - Resolves project dependencies from `project.yaml` for the compilation classpath + - Compiles with `javac -source 8 -target 8` and returns compiled output directory + - On compilation failure, reports `javac` output and aborts scan +- Wired into `scan.go`: `compileApproximationsIfNeeded()` called for each `--dataflow-approximations` path + +### G5: Build autobuilder JAR or skip test gracefully — [x] +- `test_rule_test_all_pass`: skip with clear message when compilation fails (autobuilder not available) +- `test_rule_test_detects_false_negative`: same skip logic + +### G6: Verify error message in test_invalid_approximations_config_errors — [x] +- Added assertion checking combined stdout+stderr for config/yaml/parse/fail keywords + +### G7: CLI errors go to stdout — update tests to check both — [x] +- `test_approximation_compilation_failure`: check `combined_output` (stdout + stderr) +- `test_invalid_approximations_config_errors`: same approach + +### G8: Better timing breakdown — [x] +- Added `parse_analyzer_timing()` helper to `conftest.py` — parses IFDS elapsed time, phase markers, vulnerability count from analyzer output +- Added `print_timing_breakdown()` helper for formatted output +- Wired into `test_full_agent_loop` for initial scan and rescan phases + +--- + +## Phase H: Discovered Issues (from design-vs-implementation comparison) + +### H1: ~~Release pipeline — bundle agent files~~ → Embed agent files in binary — [x] +- Agent files (~28KB) embedded in Go binary via `go:generate` + `go:embed` +- New package `cli/internal/agent/` with `GetPath()`: + - Tier 1: bundled `/lib/agent/` (release archives) + - Tier 2: extract from embedded FS to `~/.opentaint/agent/` (go install, dev builds) + - SHA-256 content hash marker for staleness detection +- Removed `GetBundledAgentPath()` from `opentaint_home.go` +- Updated `agent_prompt.go` and `agent_skills.go` to use `agent.GetPath()` +- Works with: `go install`, released builds, dev builds + +### H2: Release pipeline — bundle test-util JAR — [ ] +- `.github/workflows/release-cli.yaml` — add step to build/download `opentaint-sast-test-util.jar` to `cli/lib/` +- Without this, `opentaint agent init-test-project` fails in released builds +- `resolveTestUtilJar()` tier 1/2 won't find the JAR in release archives +- **Priority: HIGH** + +### H3: Fix short rule IDs in skill docs — [x] +- `agent/skills/create-yaml-config.md:101` — uses `--rule-id my-vulnerability` instead of full format `java/security/my-vuln.yaml:my-vulnerability` +- `agent/skills/create-approximation.md:66` — same issue +- Inconsistent with the documented full rule ID format in `create-rule.md` and `run-analysis.md` +- **Priority: MEDIUM** + +### H4: ~~Agent path resolution — single-tier only~~ — [x] +- Superseded by H1: agent files are now embedded in binary and extracted on demand +- Two-tier resolution: bundled (release) → embedded extraction (`~/.opentaint/agent/`) +- No longer depends on external file distribution + +### H5: Env var naming mismatch in docs — [ ] +- Design docs say `OPENTAINT_ANALYZER_JAR` / `OPENTAINT_AUTOBUILDER_JAR` +- Actual viper binding uses `OPENTAINT_ANALYZER_JAR_PATH` / `OPENTAINT_AUTOBUILDER_JAR_PATH` +- Update `agent-mode/impl/agent-mode-impl.md` section 5.2 to match actual env var names +- **Priority: LOW** + +### H6: Pre-existing analyzer exit code issues — [ ] +- `test_approximation_compilation_failure` — analyzer still exits 0 on some approximation loading errors +- `test_duplicate_approximation_errors` — same root cause (bijection violation swallowed) +- G4 fix addressed `runProjectAnalysisRecursively` but approximation loading errors in `installApproximations()` may not propagate +- **Priority: LOW** + +--- + +## Phase I: Skill Fixes and Clarifications + +### I1: Test-util JAR not bundled — `init-test-project` broken after `go install` — [x] +- JAR is only 1.8KB (2 annotation classes) — small enough to embed in binary +- New package `cli/internal/testutil/` with `go:generate` + `go:embed`: + - `go:generate` copies JAR from `core/opentaint-sast-test-util/build/libs/` to `jar/` + - `go:embed jar/opentaint-sast-test-util.jar` embeds the JAR data + - `ExtractJar()` extracts to `~/.opentaint/test-util/` with SHA-256 content hash staleness detection +- Added Tier 4 (embedded extraction) to `resolveTestUtilJar()` as fallback after bundled/install/dev-build +- Also fixed `defer os.RemoveAll(tmpDir)` bug in `agent_test_rules.go` — temp dir no longer deleted +- Added output path printing: `Results directory:` and `Test results:` lines + +### I2: `test-rule.md` — unclear where to find test results — [x] +- Updated skill to always specify `-o ./agent-test-results` in the `opentaint agent test-rules` example +- Changed result reading instruction from generic "in the output directory" to explicit `./agent-test-results/test-result.json` + +### I3: `scan` command expects directory, not `project.yaml` path — [x] +- Changed all scan examples from `./opentaint-project/project.yaml` to `./opentaint-project` +- Files fixed: `run-analysis.md` (3 examples), `create-yaml-config.md`, `create-approximation.md`, `create-rule.md` +- Added note to `run-analysis.md`: scan path is the directory containing `project.yaml`, not the file itself + +### I4: `analyze-findings.md` — clarify external methods represent missed *fact propagations* — [x] +- Rewrote section 3 to explain that external methods show where the analyzer killed dataflow facts +- Added priority levels: HIGH (generic propagators like collections/strings), MEDIUM (lambda/callback), LOW (vulnerability-specific) +- Added concrete examples: `List.add/get`, `Map.put/get`, `StringBuilder.append`, `Iterator.next` +- Updated batch processing guidance to start with generic propagators + +### I5: `build-project.md` — add manual build fallback with `opentaint project` and `--package` warning — [x] +- Added section 2b: manual build with `./gradlew build` or `mvn package` followed by `opentaint project` +- Added CRITICAL warning about `--package` being mandatory +- Added multi-module project example with multiple `--classpath` and `--package` flags +- Updated troubleshooting: added "Analysis hangs" entry pointing to missing `--package` + +### I6: Update meta-prompt scan example to use directory path — [x] +- Fixed `meta-prompt.md` line 37: `./opentaint-project/project.yaml` → `./opentaint-project` +- Part of I3 fix + +### I7: Split external methods output into two files — [x] +- **Problem**: Agent sees both `withoutRules` and `withRules` in a single file and doesn't understand only `withoutRules` contains taint-killing methods +- **Kotlin**: Rewrote `writeExternalMethodsYaml()` in `ProjectAnalyzer.kt` to derive two filenames from the base path: + - `-without-rules.yaml` — methods with NO approximation rules (taint killed here) + - `-with-rules.yaml` — methods with existing approximation rules + - Each file has a `methods:` top-level key with the list of records + - Removed `SerializedSkippedExternalMethods` (combined wrapper), added `SerializedExternalMethodRecordList` +- **Go CLI**: Updated `--external-methods` flag help text to document two-file output +- **Skills**: Updated `run-analysis.md` (Outputs section), `analyze-findings.md` (Section 3), `meta-prompt.md` (Phase 3, Phase 4, directory layout) +- **Tests**: Updated `conftest.py`: + - `load_external_methods()` now derives two paths from base, reads both files, recombines into legacy dict + - Added `external_methods_exist()` helper + - Updated `test_external_methods.py` and `test_full_loop.py` to use `external_methods_exist()` instead of `.exists()` +- All 6 external methods tests pass, full loop test passes + +--- + +## Git Commits + +| Commit | Tasks | Description | +|--------|-------|-------------| +| e204e455 | A1-A7 | Phase A: Kotlin analyzer agent-mode features | +| e53f8c16 | Fix | Rename ExternalMethodResults -> SkippedExternalMethods, use kaml | +| 6d445b36 | B1-B4 | Phase B: Go CLI agent-mode features | +| 8734ae31 | C1-C2 | Phase C: Skills and meta-prompt | +| 7d094862 | D1 | Fix conftest JAR resolution order | +| 4e06427b | E-plan | Add Phase E tasks to plan | +| 67b9276f | E1-E4 | Phase E: Filter static facts, update rule ID format in skills/tests | +| 7c3f94ed | F-plan | Add Phase F to plan | +| 195d23a9 | F1 | Refactor tests to CLI-only mode | +| 592f2667 | F2 | Implement opentaint agent init-test-project command | +| 63c84b96 | F3 | Add timing instrumentation to all tests | +| 235af7e3 | F4 | Fix CLI scan path, run full suite, write test report | +| (pending) | G1-G8 | Phase G: Fix known issues, auto-compile approximations, strengthen tests | diff --git a/agent-mode/test-status.md b/agent-mode/test-status.md new file mode 100644 index 000000000..fc12d0210 --- /dev/null +++ b/agent-mode/test-status.md @@ -0,0 +1,95 @@ +# Agent Mode — Test Status Report + +**Date**: 2026-03-31 +**CLI binary**: `cli/bin/opentaint` (dev build with `--analyzer-jar` override) +**Analyzer JAR**: `core/build/libs/opentaint-project-analyzer.jar` +**Test target**: Stirling-PDF at `/home/sobol/data/Stirling-PDF/seqra-project/project.yaml` +**Total**: 34 tests (8 quick, 26 slow) +**Results**: 31 passed, 3 failed (all pre-existing) + +--- + +## Quick Tests (non-slow) + +| Suite | Test | Scenario | Status | Time | +|-------|------|----------|--------|------| +| build | test_scan_nonexistent_project | Error: scan with bad path | PASS | <0.1s | +| build | test_scan_missing_output_flag | Error: scan without -o | PASS | <0.1s | +| rules | test_builtin_rules_directory_exists | Verify rule directory structure | PASS | <0.1s | +| rules | test_builtin_lib_rules_exist | Verify library rule files | PASS | <0.1s | +| rules | test_rules_path_command | `opentaint agent rules-path` | PASS | <0.1s | +| rules | test_custom_rules_are_valid_yaml | Validate fixture rule YAML | PASS | <0.1s | +| rules | test_library_rule_has_lib_option | Library rule options.lib:true | PASS | <0.1s | +| rules | test_security_rule_has_metadata | Security rule CWE metadata | PASS | <0.1s | + +## Slow Tests — Build (test_build.py) + +| Test | Scenario | Status | Time | +|------|----------|--------|------| +| test_scan_with_builtin_rules | Scan Stirling-PDF, 69 findings across 9 rules | PASS | 44.4s | +| test_scan_with_custom_ruleset_directory | Scan with explicit rules path | PASS | 44.1s | +| test_scan_severity_filter_note | Include note-severity findings | PASS | 50.4s | +| test_scan_from_source_directory | Auto-compile + scan | PASS | 78.6s | +| test_compile_source_project | Compile-only (autobuilder) | PASS | 28.7s | + +## Slow Tests — Rules (test_rules.py) + +| Test | Scenario | Status | Time | Notes | +|------|----------|--------|------|-------| +| test_scan_with_rule_id_filter | `--rule-id` filters SARIF output | PASS | 25.0s | | +| test_scan_without_rule_id_filter_includes_all | No filter → multiple rule IDs | PASS | 42.6s | | +| test_init_test_project | `opentaint agent init-test-project` | PASS | <0.1s | Previously skipped | +| test_rule_test_all_pass | Compile test project + test-rules | **FAIL** | 4.2s | Pre-existing: autobuilder JAR not built locally | +| test_rule_test_detects_false_negative | FN detection in test framework | PASS | 5.1s | | +| test_scan_stirling_with_path_traversal_rule | Custom rule on Stirling-PDF | PASS | 26.1s | | + +## Slow Tests — Approximations (test_approximations.py) + +| Test | Scenario | Status | Time | Notes | +|------|----------|--------|------|-------| +| test_scan_with_approximations_config | YAML passThrough config | PASS | 22.0s | | +| test_approximations_config_with_custom_ruleset | Config + custom ruleset together | PASS | 25.2s | | +| test_invalid_approximations_config_errors | Bad YAML → error | PASS | 0.3s | | +| test_scan_with_java_source_approximations | Code-based .java approximations | PASS | 46.0s | | +| test_approximation_compilation_failure | Bad Java source → error | **FAIL** | 44.7s | Pre-existing: analyzer exits 0 despite error | +| test_duplicate_approximation_errors | Duplicate builtin class → error | **FAIL** | 43.6s | Pre-existing: analyzer exits 0 despite error | +| test_scan_with_both_approximation_types | Combined YAML + Java approx | PASS | 24.4s | | +| test_approximations_change_results | Compare with/without approx | PASS | 49.3s | | + +## Slow Tests — External Methods (test_external_methods.py) + +| Test | Scenario | Status | Time | +|------|----------|--------|------| +| test_scan_produces_external_methods_file | `--external-methods` flag | PASS | 45.5s | +| test_external_methods_structure | YAML structure validation | PASS | 52.8s | +| test_without_rules_nonempty_for_real_project | withoutRules non-empty (324) | PASS | 50.0s | +| test_with_rules_contains_standard_library_methods | withRules has stdlib (167) | PASS | 49.8s | +| test_approximations_reduce_without_rules | Approx reduces withoutRules | PASS | 75.8s | +| test_both_outputs_produced | SARIF + external methods together | PASS | 45.9s | + +## Slow Tests — Full Loop (test_full_loop.py) + +| Test | Scenario | Status | Time | Phase Timing | +|------|----------|--------|------|-------------| +| test_full_agent_loop | End-to-end agent workflow | PASS | 25.6s | P1: 0.0s, P2: 0.0s, P3 (scan): 25.6s, P3b: 25.6s, P4: 25.6s | + +--- + +## Pre-Existing Failures (not caused by agent-mode) + +1. **`test_approximation_compilation_failure`** — The Kotlin analyzer catches the compilation error internally but still exits with code 0. The Go CLI propagates this as success. Fix requires analyzer to exit non-zero on approximation compilation failures. + +2. **`test_duplicate_approximation_errors`** — Same root cause: analyzer detects the bijection violation but exits with code 0. Fix requires analyzer exit code propagation. + +3. **`test_rule_test_all_pass`** — The `compile` step fails because the autobuilder JAR is not built locally. This test requires `./gradlew :autobuilder:jar` to be run first. The test itself is correct; the environment is incomplete. + +--- + +## Phase F Summary + +| Task | Description | Status | +|------|-------------|--------| +| F1 | Refactor tests to CLI-only (remove direct JAR mode) | Done | +| F2 | Implement `opentaint agent init-test-project` command | Done | +| F3 | Add timing instrumentation to all tests | Done | +| F4 | Run all tests via CLI, write test report | Done | diff --git a/agent-mode/test/agent-mode-test.md b/agent-mode/test/agent-mode-test.md new file mode 100644 index 000000000..8550a50e4 --- /dev/null +++ b/agent-mode/test/agent-mode-test.md @@ -0,0 +1,2085 @@ +# Agent Mode Test Pipeline + +## Table of Contents + +1. [Overview](#1-overview) +2. [Test Environment Setup](#2-test-environment-setup) +3. [Test Infrastructure (`conftest.py`)](#3-test-infrastructure-conftestpy) +4. [Test Suite 1: Project Build Scenarios](#4-test-suite-1-project-build-scenarios) +5. [Test Suite 2: Rule Generation Pipeline](#5-test-suite-2-rule-generation-pipeline) +6. [Test Suite 3: Approximations Generation/Override](#6-test-suite-3-approximations-generationoverride) +7. [Test Suite 4: External Methods Extraction](#7-test-suite-4-external-methods-extraction) +8. [Test Suite 5: Full Agent Loop (Integration)](#8-test-suite-5-full-agent-loop-integration) +9. [Running Tests](#9-running-tests) + +--- + +## 1. Overview + +This document defines a test pipeline for validating the agent-mode features designed in `agent-mode/design/agent-mode-design.md`. Tests use **Python (pytest)** scripts that invoke the `opentaint` Go CLI and the analyzer JAR directly, validating outputs against expected results. + +### Test target project + +All tests use the Stirling-PDF project at `/home/sobol/data/Stirling-PDF/seqra-project/project.yaml` — a real-world Spring Boot application with 538 Java source files, 3 modules (proprietary, core, common), and 400 dependencies. This project is already compiled (classes + dependencies + sources are in place), so tests can skip the build step for faster iteration, or exercise the build pipeline explicitly. + +### What we are testing + +The test pipeline validates that the **new CLI features** from the design doc work correctly: + +| Feature | Design Section | Test Suite | +|---|---|---| +| `opentaint scan` with pre-compiled project | §2.1 | Suite 1 | +| `opentaint compile` (autobuilder) | §2.1 | Suite 1 | +| `--ruleset` with custom rules | §2.1 | Suite 2 | +| `--rule-id` filter | §1.6, §2.1 | Suite 2 | +| `opentaint test-rules` | §1.5, §2.1 | Suite 2 | +| `opentaint init-test-project` | §1.8 | Suite 2 | +| `--approximations-config` (YAML passThrough) | §1.2, §2.1 | Suite 3 | +| `--dataflow-approximations` (code-based, auto-compile) | §1.3, §1.4, §2.1 | Suite 3 | +| `--external-methods` output | §1.1, §2.1 | Suite 4 | +| `opentaint rules-path` | §1.8, §2.1 | Suite 4 | +| Full loop: rule → test → scan → external methods → approx → rescan | §4 (Meta Prompt) | Suite 5 | + +### Constraints + +Since the new CLI features are **not yet implemented**, the tests serve two purposes: +1. **Specification** — define the expected behavior precisely so implementation can be verified +2. **Incremental validation** — tests that exercise current (existing) functionality can run today; tests for new features are marked `@pytest.mark.new_feature` and will pass once implemented + +Where a new CLI command doesn't exist yet, we fall back to invoking the analyzer JAR directly with the equivalent Kotlin CLI flags. This ensures we can test the **engine behavior** even before the Go CLI wrapper is ready. + +--- + +## 2. Test Environment Setup + +### Directory layout + +``` +agent-mode/test/ +├── agent-mode-test.md # This document +├── conftest.py # Shared fixtures and helpers +├── test_build.py # Suite 1: Project build scenarios +├── test_rules.py # Suite 2: Rule generation pipeline +├── test_approximations.py # Suite 3: Approximations +├── test_external_methods.py # Suite 4: External methods extraction +├── test_full_loop.py # Suite 5: Full agent loop +├── fixtures/ +│ ├── rules/ # Test rule YAML files +│ │ ├── java/ +│ │ │ ├── lib/ +│ │ │ │ └── stirling-source.yaml +│ │ │ └── security/ +│ │ │ ├── stirling-path-traversal.yaml +│ │ │ └── stirling-sqli.yaml +│ │ └── README.md +│ ├── approximations/ +│ │ ├── yaml/ +│ │ │ └── custom-propagators.yaml +│ │ └── java/ +│ │ └── StirlingPDFUtils.java +│ └── test-samples/ +│ └── src/main/java/test/ +│ ├── PathTraversalTest.java +│ └── SqlInjectionTest.java +└── pytest.ini +``` + +### Prerequisites + +```bash +# Python dependencies +pip install pytest pyyaml + +# OpenTaint CLI on PATH (or use --analyzer-jar / --autobuilder-jar flags for local dev) +which opentaint || echo "opentaint not on PATH — will use direct JAR invocation" + +# Stirling-PDF project available +test -f /home/sobol/data/Stirling-PDF/seqra-project/project.yaml +``` + +### `pytest.ini` + +```ini +[pytest] +testpaths = . +markers = + new_feature: Tests for features not yet implemented (deselect with -m "not new_feature") + slow: Tests that run full analysis (>60s) +``` + +--- + +## 3. Test Infrastructure (`conftest.py`) + +```python +""" +Shared fixtures and helpers for agent-mode tests. + +Handles two execution modes: +1. Go CLI mode: when `opentaint` is on PATH (production) +2. Direct JAR mode: when running against locally-built JARs (development) +""" + +import json +import os +import shutil +import subprocess +import tempfile +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + +import pytest +import yaml + + +# ─── Paths ─────────────────────────────────────────────────────────────────── + +STIRLING_PROJECT = Path("/home/sobol/data/Stirling-PDF/seqra-project/project.yaml") +STIRLING_PROJECT_DIR = STIRLING_PROJECT.parent +OPENTAINT_ROOT = Path(__file__).resolve().parent.parent.parent # -> opentaint/ +FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures" +BUILTIN_RULES_DIR = OPENTAINT_ROOT / "rules" / "ruleset" + + +# ─── CLI Abstraction ───────────────────────────────────────────────────────── + +def _find_opentaint_cli() -> Optional[str]: + """Check if opentaint is on PATH.""" + return shutil.which("opentaint") + + +def _find_analyzer_jar() -> Optional[Path]: + """Find locally-built analyzer JAR.""" + candidates = [ + OPENTAINT_ROOT / "core" / "build" / "libs" / "opentaint-jvm-sast.jar", + OPENTAINT_ROOT / "core" / "build" / "libs" / "opentaint-project-analyzer.jar", + ] + for c in candidates: + if c.exists(): + return c + return None + + +def _find_autobuilder_jar() -> Optional[Path]: + """Find locally-built autobuilder JAR.""" + candidates = [ + OPENTAINT_ROOT / "autobuilder" / "build" / "libs" / "opentaint-project-auto-builder.jar", + ] + for c in candidates: + if c.exists(): + return c + return None + + +def _find_java() -> str: + """Find Java 21 (analyzer requires it).""" + # Check JAVA_HOME first + java_home = os.environ.get("JAVA_HOME") + if java_home: + java = Path(java_home) / "bin" / "java" + if java.exists(): + return str(java) + # Fall back to PATH + java = shutil.which("java") + if java: + return java + raise RuntimeError("Java not found. Set JAVA_HOME or add java to PATH.") + + +@dataclass +class CLIResult: + """Result of a CLI command execution.""" + returncode: int + stdout: str + stderr: str + command: list[str] + + @property + def ok(self) -> bool: + return self.returncode == 0 + + def assert_ok(self, msg: str = ""): + assert self.ok, ( + f"Command failed (rc={self.returncode}){': ' + msg if msg else ''}\n" + f" cmd: {' '.join(self.command)}\n" + f" stderr: {self.stderr[:2000]}" + ) + + def assert_failed(self, msg: str = ""): + assert not self.ok, ( + f"Command unexpectedly succeeded{': ' + msg if msg else ''}\n" + f" cmd: {' '.join(self.command)}\n" + f" stdout: {self.stdout[:2000]}" + ) + + +@dataclass +class OpenTaintCLI: + """ + Abstraction over the opentaint CLI. + + Supports two modes: + - Go CLI: uses `opentaint` binary from PATH + - Direct JAR: uses `java -jar analyzer.jar` for scan, `java -jar autobuilder.jar` for compile + """ + cli_path: Optional[str] = None + analyzer_jar: Optional[Path] = None + autobuilder_jar: Optional[Path] = None + java_path: str = "java" + timeout: int = 600 # seconds + + @property + def has_cli(self) -> bool: + return self.cli_path is not None + + def run(self, args: list[str], timeout: Optional[int] = None, env: Optional[dict] = None) -> CLIResult: + """Run an arbitrary command and return the result.""" + run_env = {**os.environ, **(env or {})} + t = timeout or self.timeout + try: + proc = subprocess.run( + args, + capture_output=True, + text=True, + timeout=t, + env=run_env, + ) + return CLIResult(proc.returncode, proc.stdout, proc.stderr, args) + except subprocess.TimeoutExpired: + return CLIResult(-1, "", f"Timeout after {t}s", args) + + def scan( + self, + project_path: str, + output: str, + rulesets: list[str] = None, + rule_ids: list[str] = None, + approximations_config: Optional[str] = None, + dataflow_approximations: Optional[str] = None, + external_methods: Optional[str] = None, + severity: list[str] = None, + timeout: int = 900, + max_memory: str = "8G", + extra_flags: list[str] = None, + ) -> CLIResult: + """Run opentaint scan (or direct analyzer JAR invocation).""" + + if self.has_cli: + cmd = [self.cli_path, "scan", project_path, "-o", output] + for rs in (rulesets or ["builtin"]): + cmd.extend(["--ruleset", rs]) + for rid in (rule_ids or []): + cmd.extend(["--rule-id", rid]) + if approximations_config: + cmd.extend(["--approximations-config", approximations_config]) + if dataflow_approximations: + cmd.extend(["--dataflow-approximations", dataflow_approximations]) + if external_methods: + cmd.extend(["--external-methods", external_methods]) + for sev in (severity or ["warning", "error"]): + cmd.extend(["--severity", sev]) + cmd.extend(["--timeout", f"{timeout}s", "--max-memory", max_memory]) + cmd.extend(extra_flags or []) + return self.run(cmd, timeout=timeout + 60) + + # Direct JAR invocation + assert self.analyzer_jar, "No analyzer JAR found" + output_dir = str(Path(output).parent) + sarif_name = Path(output).name + cmd = [ + self.java_path, f"-Xmx{max_memory}", + "-Dorg.opentaint.ir.impl.storage.defaultBatchSize=2000", + "-Djdk.util.jar.enableMultiRelease=false", + "-jar", str(self.analyzer_jar), + "--project", project_path, + "--output-dir", output_dir, + "--sarif-file-name", sarif_name, + f"--ifds-analysis-timeout={timeout}", + "--verbosity=info", + ] + for rs in (rulesets or []): + if rs == "builtin": + cmd.extend(["--semgrep-rule-set", str(BUILTIN_RULES_DIR)]) + else: + cmd.extend(["--semgrep-rule-set", rs]) + for rid in (rule_ids or []): + cmd.extend(["--semgrep-rule-id", rid]) + if approximations_config: + cmd.extend(["--config", approximations_config]) + if external_methods: + cmd.extend(["--external-methods-output", external_methods]) + for sev in (severity or ["warning", "error"]): + cmd.extend([f"--semgrep-rule-severity={sev}"]) + # Note: --dataflow-approximations needs auto-compile in Go CLI; + # for direct JAR, pass pre-compiled classes directory + if dataflow_approximations: + cmd.extend(["--dataflow-approximations", dataflow_approximations]) + cmd.extend(extra_flags or []) + return self.run(cmd, timeout=timeout + 60) + + def test_rules( + self, + project_path: str, + rulesets: list[str], + output_dir: str, + timeout: int = 300, + max_memory: str = "8G", + ) -> CLIResult: + """Run opentaint test-rules (or direct JAR with --debug-run-rule-tests).""" + + if self.has_cli: + cmd = [self.cli_path, "test-rules", project_path] + for rs in rulesets: + cmd.extend(["--ruleset", rs]) + cmd.extend(["-o", output_dir]) + cmd.extend(["--timeout", f"{timeout}s", "--max-memory", max_memory]) + return self.run(cmd, timeout=timeout + 60) + + # Direct JAR invocation + assert self.analyzer_jar, "No analyzer JAR found" + cmd = [ + self.java_path, f"-Xmx{max_memory}", + "-Dorg.opentaint.ir.impl.storage.defaultBatchSize=2000", + "-Djdk.util.jar.enableMultiRelease=false", + "-jar", str(self.analyzer_jar), + "--project", project_path, + "--output-dir", output_dir, + "--debug-run-rule-tests", + f"--ifds-analysis-timeout={timeout}", + "--verbosity=info", + ] + for rs in rulesets: + cmd.extend(["--semgrep-rule-set", rs]) + return self.run(cmd, timeout=timeout + 60) + + def compile( + self, + project_path: str, + output_dir: str, + timeout: int = 300, + ) -> CLIResult: + """Run opentaint compile (or direct autobuilder JAR invocation).""" + + if self.has_cli: + cmd = [self.cli_path, "compile", project_path, "-o", output_dir] + return self.run(cmd, timeout=timeout + 60) + + # Direct JAR invocation + assert self.autobuilder_jar, "No autobuilder JAR found" + cmd = [ + self.java_path, "-Xmx1G", + "-jar", str(self.autobuilder_jar), + "--project-root-dir", project_path, + "--result-dir", output_dir, + "--build", "portable", + "--verbosity=info", + ] + return self.run(cmd, timeout=timeout + 60) + + def rules_path(self) -> CLIResult: + """Run opentaint rules-path.""" + if self.has_cli: + return self.run([self.cli_path, "rules-path"]) + # Fall back to known builtin path + return CLIResult(0, str(BUILTIN_RULES_DIR), "", ["echo", str(BUILTIN_RULES_DIR)]) + + def init_test_project( + self, + output_dir: str, + dependencies: list[str] = None, + ) -> CLIResult: + """Run opentaint init-test-project.""" + if self.has_cli: + cmd = [self.cli_path, "init-test-project", output_dir] + for dep in (dependencies or []): + cmd.extend(["--dependency", dep]) + return self.run(cmd) + # Fallback: not available without Go CLI + return CLIResult(1, "", "init-test-project not available in direct JAR mode", []) + + +# ─── Fixtures ───────────────────────────────────────────────────────────────── + +@pytest.fixture(scope="session") +def cli() -> OpenTaintCLI: + """Provide an OpenTaintCLI instance configured for the current environment.""" + return OpenTaintCLI( + cli_path=_find_opentaint_cli(), + analyzer_jar=_find_analyzer_jar(), + autobuilder_jar=_find_autobuilder_jar(), + java_path=_find_java(), + ) + + +@pytest.fixture(scope="session") +def stirling_project() -> Path: + """Path to the Stirling-PDF project.yaml.""" + assert STIRLING_PROJECT.exists(), f"Stirling-PDF project not found at {STIRLING_PROJECT}" + return STIRLING_PROJECT + + +@pytest.fixture +def tmp_output(tmp_path) -> Path: + """Provide a temporary output directory for test results.""" + return tmp_path + + +@pytest.fixture(scope="session") +def builtin_rules() -> Path: + """Path to the built-in rules directory.""" + assert BUILTIN_RULES_DIR.exists(), f"Builtin rules not found at {BUILTIN_RULES_DIR}" + return BUILTIN_RULES_DIR + + +# ─── Helpers ────────────────────────────────────────────────────────────────── + +def load_sarif(path: Path) -> dict: + """Load and validate a SARIF file.""" + assert path.exists(), f"SARIF file not found: {path}" + with open(path) as f: + data = json.load(f) + assert data.get("version") == "2.1.0", "Not a valid SARIF 2.1.0 file" + assert "runs" in data and len(data["runs"]) > 0, "SARIF has no runs" + return data + + +def sarif_results(data: dict) -> list[dict]: + """Extract results from a SARIF report.""" + return data["runs"][0].get("results", []) + + +def sarif_rule_ids(data: dict) -> set[str]: + """Extract unique rule IDs from SARIF results.""" + return {r["ruleId"] for r in sarif_results(data)} + + +def sarif_findings_for_rule(data: dict, rule_id: str) -> list[dict]: + """Get findings for a specific rule ID.""" + return [r for r in sarif_results(data) if r["ruleId"] == rule_id] + + +def load_external_methods(path: Path) -> dict: + """Load and validate an external methods YAML file.""" + assert path.exists(), f"External methods file not found: {path}" + with open(path) as f: + data = yaml.safe_load(f) + assert isinstance(data, dict), "External methods file must be a YAML mapping" + assert "withoutRules" in data or "withRules" in data, "Missing withoutRules/withRules sections" + return data + + +def count_external_methods(data: dict) -> tuple[int, int]: + """Return (without_rules_count, with_rules_count).""" + without = len(data.get("withoutRules", [])) + with_rules = len(data.get("withRules", [])) + return without, with_rules + + +def write_yaml(path: Path, content: dict): + """Write a YAML file.""" + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + yaml.dump(content, f, default_flow_style=False, sort_keys=False) + + +def write_text(path: Path, content: str): + """Write a text file.""" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content) +``` + +--- + +## 4. Test Suite 1: Project Build Scenarios + +**File: `test_build.py`** + +Tests that `opentaint scan` and `opentaint compile` work with different project input modes. + +```python +""" +Suite 1: Project Build Scenarios + +Tests: +1.1 Scan with pre-compiled project model (project.yaml) +1.2 Scan with source project (triggers auto-compile) +1.3 Compile-only (autobuilder) +1.4 Scan with invalid project path (error handling) +1.5 Scan with pre-compiled project, custom output directory +""" + +import pytest +from pathlib import Path +from conftest import ( + OpenTaintCLI, load_sarif, sarif_results, sarif_rule_ids, + STIRLING_PROJECT_DIR, BUILTIN_RULES_DIR, +) + + +class TestScanPreCompiledProject: + """1.1: Scan using the pre-compiled Stirling-PDF project model.""" + + @pytest.mark.slow + def test_scan_with_builtin_rules(self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path): + """Basic scan with builtin rules produces a valid SARIF with findings.""" + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with builtin rules failed") + + # Validate SARIF output + data = load_sarif(sarif_path) + results = sarif_results(data) + assert len(results) > 0, "Scan produced no findings — expected some on Stirling-PDF" + + # Should contain known vulnerability types + rule_ids = sarif_rule_ids(data) + # Stirling-PDF is known to have path-traversal and XSS issues + print(f"Found {len(results)} findings across rules: {rule_ids}") + + @pytest.mark.slow + def test_scan_with_custom_ruleset_directory(self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path): + """Scan with a custom ruleset directory works alongside builtin.""" + sarif_path = tmp_output / "report.sarif" + + # Use the builtin rules directory directly as a "custom" ruleset + # This is equivalent to --ruleset builtin but tests the custom path logic + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=[str(BUILTIN_RULES_DIR)], + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with custom ruleset directory failed") + data = load_sarif(sarif_path) + assert len(sarif_results(data)) > 0 + + @pytest.mark.slow + def test_scan_severity_filter_note(self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path): + """Scan with severity=note should include more findings.""" + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + severity=["note", "warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with note severity failed") + + +class TestScanFromSourceProject: + """1.2: Scan from source (auto-compiles via autobuilder first).""" + + @pytest.mark.slow + def test_scan_from_source_directory(self, cli: OpenTaintCLI, tmp_output: Path): + """ + Scan the Stirling-PDF source directory (not pre-compiled). + This triggers auto-compilation via autobuilder. + + Uses the mirrored source tree inside seqra-project/sources/ which + is a full copy of the Stirling-PDF repo. + """ + sarif_path = tmp_output / "report.sarif" + source_dir = STIRLING_PROJECT_DIR / "sources" + + if not source_dir.exists(): + pytest.skip("Stirling-PDF source directory not available") + + result = cli.scan( + project_path=str(source_dir), + output=str(sarif_path), + rulesets=["builtin"], + timeout=900, + ) + # This may fail if the autobuilder can't build Stirling-PDF + # (requires Java 17+, Gradle wrapper). That's acceptable — the test + # validates the auto-compile → scan pipeline. + if result.ok: + data = load_sarif(sarif_path) + assert len(sarif_results(data)) > 0 + + +class TestCompileOnly: + """1.3: Test the compile command separately.""" + + @pytest.mark.slow + def test_compile_source_project(self, cli: OpenTaintCLI, tmp_output: Path): + """Compile a source project into a project model.""" + source_dir = STIRLING_PROJECT_DIR / "sources" + model_dir = tmp_output / "project-model" + + if not source_dir.exists(): + pytest.skip("Stirling-PDF source directory not available") + + result = cli.compile( + project_path=str(source_dir), + output_dir=str(model_dir), + timeout=300, + ) + if result.ok: + project_yaml = model_dir / "project.yaml" + assert project_yaml.exists(), "compile did not produce project.yaml" + + +class TestErrorHandling: + """1.4: Error handling for invalid inputs.""" + + def test_scan_nonexistent_project(self, cli: OpenTaintCLI, tmp_output: Path): + """Scan with nonexistent project path should fail gracefully.""" + sarif_path = tmp_output / "report.sarif" + result = cli.scan( + project_path="/nonexistent/project/path", + output=str(sarif_path), + ) + result.assert_failed("Scan should fail for nonexistent project") + + def test_scan_missing_output_flag(self, cli: OpenTaintCLI, stirling_project: Path): + """Scan without -o flag should fail (it's required).""" + if not cli.has_cli: + pytest.skip("Requires Go CLI for flag validation") + # Invoke without -o + result = cli.run([cli.cli_path, "scan", str(stirling_project)]) + result.assert_failed("Scan should require -o flag") +``` + +--- + +## 5. Test Suite 2: Rule Generation Pipeline + +**File: `test_rules.py`** + +Tests the full rule lifecycle: create rule → create test samples → build test project → run rule tests → run scan with rule. + +### Fixture rules used by tests + +**`fixtures/rules/java/lib/stirling-source.yaml`** — a library rule defining a source for Spring `@PostMapping` multipart file parameters: + +```yaml +rules: + - id: stirling-multipart-file-source + options: + lib: true + severity: NOTE + message: Untrusted multipart file data from Spring controller + languages: [java] + patterns: + - pattern: | + $RETURNTYPE $METHOD(..., @RequestParam MultipartFile $UNTRUSTED, ...) { ... } +``` + +**`fixtures/rules/java/security/stirling-path-traversal.yaml`** — a security rule joining the source with a built-in path traversal sink: + +```yaml +rules: + - id: stirling-path-traversal + severity: ERROR + message: >- + User-uploaded file name flows to file system operation without sanitization + metadata: + cwe: CWE-22 + short-description: Path Traversal via uploaded file name + languages: [java] + mode: join + join: + refs: + - rule: java/lib/stirling-source.yaml#stirling-multipart-file-source + as: source + - rule: java/lib/generic/path-traversal-sinks.yaml#java-path-traversal-sink + as: sink + on: + - 'source.$UNTRUSTED -> sink.$UNTRUSTED' +``` + +### Test samples + +**`fixtures/test-samples/src/main/java/test/PathTraversalTest.java`**: + +```java +package test; + +import org.opentaint.sast.test.util.PositiveRuleSample; +import org.opentaint.sast.test.util.NegativeRuleSample; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +@RestController +public class PathTraversalTest { + + @PositiveRuleSample(value = "java/security/stirling-path-traversal.yaml", id = "stirling-path-traversal") + @PostMapping("/upload-vulnerable") + public String vulnerable(@RequestParam MultipartFile file) throws IOException { + // Directly use original filename — path traversal possible + String filename = file.getOriginalFilename(); + Path dest = Paths.get("/uploads/" + filename); + Files.copy(file.getInputStream(), dest); + return "uploaded"; + } + + @NegativeRuleSample(value = "java/security/stirling-path-traversal.yaml", id = "stirling-path-traversal") + @PostMapping("/upload-safe") + public String safe(@RequestParam MultipartFile file) throws IOException { + // Use sanitized filename — only the base name, no path components + String filename = new File(file.getOriginalFilename()).getName(); + Path dest = Paths.get("/uploads/").resolve(filename); + Files.copy(file.getInputStream(), dest); + return "uploaded"; + } +} +``` + +### Test script + +```python +""" +Suite 2: Rule Generation Pipeline + +Tests: +2.1 Read builtin rules via `opentaint rules-path` (or known path) +2.2 Create custom library + security rules, verify YAML validity +2.3 Run scan with custom ruleset + --rule-id filter +2.4 Run scan with custom ruleset without --rule-id filter (all rules active) +2.5 Bootstrap test project, build, and run rule tests +2.6 Rule test: false negative detected (positive sample with wrong pattern) +2.7 Rule test: false positive detected (negative sample with too-broad pattern) +2.8 Run scan on Stirling-PDF with custom path-traversal rule +""" + +import json +import shutil +import pytest +from pathlib import Path +from conftest import ( + OpenTaintCLI, load_sarif, sarif_results, sarif_rule_ids, + sarif_findings_for_rule, write_yaml, write_text, + BUILTIN_RULES_DIR, FIXTURES_DIR, +) + + +class TestReadBuiltinRules: + """2.1: Agent can discover and read builtin rules.""" + + def test_builtin_rules_directory_exists(self, builtin_rules: Path): + """Builtin rules directory exists and contains rule files.""" + security_dir = builtin_rules / "java" / "security" + assert security_dir.exists(), f"No security rules at {security_dir}" + rule_files = list(security_dir.glob("*.yaml")) + assert len(rule_files) > 10, f"Expected >10 security rules, found {len(rule_files)}" + + def test_builtin_lib_rules_exist(self, builtin_rules: Path): + """Library rules (sources/sinks) exist.""" + lib_generic = builtin_rules / "java" / "lib" / "generic" + assert lib_generic.exists() + assert (lib_generic / "servlet-untrusted-data-source.yaml").exists() + assert (lib_generic / "path-traversal-sinks.yaml").exists() + + @pytest.mark.new_feature + def test_rules_path_command(self, cli: OpenTaintCLI): + """opentaint rules-path prints the rules directory.""" + result = cli.rules_path() + result.assert_ok("rules-path command failed") + rules_dir = Path(result.stdout.strip()) + assert rules_dir.exists(), f"rules-path returned non-existent dir: {rules_dir}" + assert (rules_dir / "java" / "security").is_dir() + + +class TestCustomRuleCreation: + """2.2: Create and validate custom rules.""" + + def test_custom_rules_are_valid_yaml(self): + """Fixture rule files are syntactically valid YAML with expected structure.""" + import yaml + for rule_file in FIXTURES_DIR.rglob("*.yaml"): + if rule_file.parent.name == "yaml": + continue # skip approximation configs + with open(rule_file) as f: + data = yaml.safe_load(f) + assert "rules" in data, f"Rule file {rule_file} missing 'rules' key" + for rule in data["rules"]: + assert "id" in rule, f"Rule in {rule_file} missing 'id'" + assert "severity" in rule, f"Rule {rule['id']} missing 'severity'" + assert "languages" in rule, f"Rule {rule['id']} missing 'languages'" + + def test_library_rule_has_lib_option(self): + """Library rules must have options.lib: true.""" + import yaml + lib_rule = FIXTURES_DIR / "rules" / "java" / "lib" / "stirling-source.yaml" + if not lib_rule.exists(): + pytest.skip("Library rule fixture not created yet") + with open(lib_rule) as f: + data = yaml.safe_load(f) + for rule in data["rules"]: + assert rule.get("options", {}).get("lib") is True, \ + f"Library rule {rule['id']} missing options.lib: true" + + def test_security_rule_has_metadata(self): + """Security rules must have metadata.cwe and metadata.short-description.""" + import yaml + sec_rule = FIXTURES_DIR / "rules" / "java" / "security" / "stirling-path-traversal.yaml" + if not sec_rule.exists(): + pytest.skip("Security rule fixture not created yet") + with open(sec_rule) as f: + data = yaml.safe_load(f) + for rule in data["rules"]: + if rule.get("options", {}).get("lib"): + continue + meta = rule.get("metadata", {}) + assert "cwe" in meta, f"Security rule {rule['id']} missing metadata.cwe" + assert "short-description" in meta, f"Security rule {rule['id']} missing metadata.short-description" + + +class TestScanWithRuleIdFilter: + """2.3-2.4: Scan with --rule-id filter.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_scan_with_rule_id_filter(self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path): + """ + Scan with --rule-id should only produce findings for the specified rule. + Library rules referenced via refs should be auto-included. + """ + sarif_path = tmp_output / "report.sarif" + custom_rules = FIXTURES_DIR / "rules" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin", str(custom_rules)], + rule_ids=["stirling-path-traversal"], + severity=["note", "warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with --rule-id filter failed") + + data = load_sarif(sarif_path) + rule_ids = sarif_rule_ids(data) + # Only our rule should appear (lib rules don't produce top-level findings) + for rid in rule_ids: + assert rid == "stirling-path-traversal", \ + f"Unexpected rule '{rid}' in output — --rule-id filter not working" + + @pytest.mark.slow + def test_scan_without_rule_id_filter_includes_all(self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path): + """ + Scan without --rule-id should include findings from all active rules. + """ + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok("Scan without rule-id filter failed") + + data = load_sarif(sarif_path) + rule_ids = sarif_rule_ids(data) + # Should have multiple rule IDs + assert len(rule_ids) > 1, f"Expected multiple rule IDs, got: {rule_ids}" + + +class TestRuleTests: + """2.5-2.7: Rule test workflow.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_init_test_project(self, cli: OpenTaintCLI, tmp_output: Path): + """ + opentaint init-test-project bootstraps a valid Gradle test project. + """ + test_project_dir = tmp_output / "test-project" + + result = cli.init_test_project( + output_dir=str(test_project_dir), + dependencies=["org.springframework:spring-web:6.2.12", "jakarta.servlet:jakarta.servlet-api:6.0.0"], + ) + if not result.ok: + pytest.skip("init-test-project not available (new feature)") + + # Verify structure + assert (test_project_dir / "build.gradle.kts").exists() + assert (test_project_dir / "settings.gradle.kts").exists() + assert (test_project_dir / "libs" / "opentaint-sast-test-util.jar").exists() + assert (test_project_dir / "src" / "main" / "java" / "test").is_dir() + + @pytest.mark.slow + @pytest.mark.new_feature + def test_rule_test_all_pass(self, cli: OpenTaintCLI, tmp_output: Path): + """ + Create a test project with correct positive/negative samples. + Rule tests should all pass. + """ + # Setup: copy fixture test samples and rules + test_project_dir = tmp_output / "test-project" + compiled_dir = tmp_output / "test-compiled" + test_output = tmp_output / "test-output" + rules_dir = FIXTURES_DIR / "rules" + + # Bootstrap (or manually create if CLI not available) + result = cli.init_test_project( + output_dir=str(test_project_dir), + dependencies=[ + "org.springframework:spring-web:6.2.12", + "jakarta.servlet:jakarta.servlet-api:6.0.0", + ], + ) + if not result.ok: + pytest.skip("init-test-project not available") + + # Copy test samples + samples_src = FIXTURES_DIR / "test-samples" / "src" + samples_dst = test_project_dir / "src" + if samples_src.exists(): + shutil.copytree(samples_src, samples_dst, dirs_exist_ok=True) + + # Compile test project + compile_result = cli.compile(str(test_project_dir), str(compiled_dir)) + compile_result.assert_ok("Failed to compile test project") + + # Run rule tests + test_result = cli.test_rules( + project_path=str(compiled_dir / "project.yaml"), + rulesets=[str(rules_dir)], + output_dir=str(test_output), + ) + test_result.assert_ok("Rule tests failed") + + # Check test-result.json + result_json = test_output / "test-result.json" + assert result_json.exists(), "test-result.json not produced" + with open(result_json) as f: + results = json.load(f) + + assert len(results.get("falsePositive", [])) == 0, \ + f"Unexpected false positives: {results['falsePositive']}" + assert len(results.get("falseNegative", [])) == 0, \ + f"Unexpected false negatives: {results['falseNegative']}" + assert len(results.get("success", [])) > 0, \ + "No successful tests — something is wrong" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_rule_test_detects_false_negative(self, cli: OpenTaintCLI, tmp_output: Path): + """ + A @PositiveRuleSample that doesn't match the rule → false negative. + This tests that the test framework correctly detects missing findings. + """ + test_project_dir = tmp_output / "test-project-fn" + compiled_dir = tmp_output / "test-compiled-fn" + test_output = tmp_output / "test-output-fn" + + # Create a rule that intentionally won't match the test sample + rules_dir = tmp_output / "broken-rules" / "java" / "security" + rules_dir.mkdir(parents=True) + write_text(rules_dir / "broken-rule.yaml", """\ +rules: + - id: broken-path-traversal + severity: ERROR + message: This rule intentionally won't match + metadata: + cwe: CWE-22 + short-description: Broken rule for testing FN detection + languages: [java] + patterns: + - pattern: ThisClassDoesNotExist.neverCalled($X) +""") + + # Create test sample that references the rule + result = cli.init_test_project( + output_dir=str(test_project_dir), + dependencies=["jakarta.servlet:jakarta.servlet-api:6.0.0"], + ) + if not result.ok: + pytest.skip("init-test-project not available") + + test_file = test_project_dir / "src" / "main" / "java" / "test" / "FalseNegativeTest.java" + write_text(test_file, """\ +package test; + +import org.opentaint.sast.test.util.PositiveRuleSample; + +public class FalseNegativeTest { + + @PositiveRuleSample(value = "java/security/broken-rule.yaml", id = "broken-path-traversal") + public void shouldTriggerButWont() { + String x = System.getenv("USER_INPUT"); + System.out.println(x); // not a real sink for the broken rule + } +} +""") + + compile_result = cli.compile(str(test_project_dir), str(compiled_dir)) + if not compile_result.ok: + pytest.skip("Cannot compile test project") + + test_result = cli.test_rules( + project_path=str(compiled_dir / "project.yaml"), + rulesets=[str(tmp_output / "broken-rules")], + output_dir=str(test_output), + ) + + # The test framework should detect this as a false negative + result_json = test_output / "test-result.json" + if result_json.exists(): + with open(result_json) as f: + results = json.load(f) + assert len(results.get("falseNegative", [])) > 0, \ + "Expected false negative not detected" + + +class TestScanStirlingWithCustomRule: + """2.8: Run custom path-traversal rule on Stirling-PDF.""" + + @pytest.mark.slow + def test_scan_stirling_with_path_traversal_rule( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Scan Stirling-PDF with our custom path-traversal rule. + Stirling-PDF handles file uploads in several controllers — + we expect the rule to find some findings. + """ + sarif_path = tmp_output / "report.sarif" + custom_rules = FIXTURES_DIR / "rules" + + if not custom_rules.exists(): + pytest.skip("Fixture rules not created yet") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin", str(custom_rules)], + rule_ids=["stirling-path-traversal"], + severity=["note", "warning", "error"], + timeout=600, + ) + + if result.ok: + data = load_sarif(sarif_path) + findings = sarif_findings_for_rule(data, "stirling-path-traversal") + print(f"Found {len(findings)} path-traversal findings in Stirling-PDF") + for f in findings[:5]: + locs = f.get("locations", [{}]) + if locs: + uri = locs[0].get("physicalLocation", {}).get("artifactLocation", {}).get("uri", "?") + line = locs[0].get("physicalLocation", {}).get("region", {}).get("startLine", "?") + print(f" - {uri}:{line}") + else: + # Rule might not match if patterns are wrong — that's part of testing + print(f"Scan failed or produced no output: {result.stderr[:500]}") +``` + +--- + +## 6. Test Suite 3: Approximations Generation/Override + +**File: `test_approximations.py`** + +Tests YAML passThrough config (`--approximations-config`) and code-based approximations (`--dataflow-approximations`). Both types of approximations are **only applicable to external methods** — library classes without source code in the project. The agent discovers which methods need approximations via the `--external-methods` output. + +### Key constraint: External methods only + +**Approximations (both YAML passThrough and code-based) are ONLY applicable to external methods** — library classes whose source code is NOT part of the analyzed project. Project classes with source code are analyzed directly by the engine; approximations for them would be ignored or cause errors. + +The agent's workflow is: run scan → get external methods list → create approximations for methods in `withoutRules` → rescan. The external methods list drives which methods need approximations. + +### Fixture: YAML approximation config + +**`fixtures/approximations/yaml/custom-propagators.yaml`** — models external library methods from Stirling-PDF's dependencies: + +```yaml +# Custom passThrough rules for external library methods encountered by the engine. +# +# IMPORTANT: Approximations are ONLY applicable to external methods — library +# classes whose source code is NOT part of the project. These methods would +# appear in the external-methods.yaml output under withoutRules. + +passThrough: + # org.apache.pdfbox.pdmodel.PDDocument#getPage — taint on this flows to result + # PDFBox is an external dependency of Stirling-PDF + - function: org.apache.pdfbox.pdmodel.PDDocument#getPage + copy: + - from: this + to: result + + # org.apache.pdfbox.text.PDFTextStripper#getText — taint on arg(0) flows to result + - function: org.apache.pdfbox.text.PDFTextStripper#getText + copy: + - from: arg(0) + to: result + + # com.fasterxml.jackson.databind.ObjectMapper#readValue — taint flows through deserialization + - function: com.fasterxml.jackson.databind.ObjectMapper#readValue + copy: + - from: arg(0) + to: result + + # org.jsoup.Jsoup#parse — taint on arg(0) flows to result + - function: org.jsoup.Jsoup#parse + copy: + - from: arg(0) + to: result +``` + +### Fixture: Code-based approximation + +**`fixtures/approximations/java/PdfBoxDocumentApprox.java`** — approximation for PDFBox's `PDDocument` (an external library class): + +```java +package agent.approximations; + +import org.opentaint.ir.approximation.annotation.ApproximateByName; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +/** + * Code-based approximation for PDFBox's PDDocument class. + * + * IMPORTANT: Approximations are ONLY applicable to external methods — + * library classes whose source code is NOT part of the project being analyzed. + * PDFBox is an external dependency of Stirling-PDF (pdfbox-3.0.6.jar). + */ +@ApproximateByName("org.apache.pdfbox.pdmodel.PDDocument") +public class PdfBoxDocumentApprox { + + /** + * Model save(OutputStream) — taint on this flows to arg(0). + * A tainted document writes tainted bytes to the output stream. + */ + public void save(java.io.OutputStream output) throws java.io.IOException { + org.apache.pdfbox.pdmodel.PDDocument self = + (org.apache.pdfbox.pdmodel.PDDocument) (Object) this; + if (OpentaintNdUtil.nextBool()) { + throw new java.io.IOException("approximation: failure path"); + } + byte[] data = new byte[1]; + output.write(data); + } + + /** + * Model getPage(int) — taint on this flows to result. + * A tainted document produces tainted pages. + */ + public Object getPage(int pageIndex) { + org.apache.pdfbox.pdmodel.PDDocument self = + (org.apache.pdfbox.pdmodel.PDDocument) (Object) this; + if (OpentaintNdUtil.nextBool()) { + return null; + } + return self.getPages().get(pageIndex); + } +} +``` + +### Test script + +```python +""" +Suite 3: Approximations Generation/Override + +Tests: +3.1 Scan with --approximations-config (YAML passThrough) +3.2 Scan with --approximations-config + --ruleset together (§1.2) +3.3 Scan with --dataflow-approximations (pre-compiled .class files) +3.4 Scan with --dataflow-approximations from .java sources (auto-compile, §1.4) +3.5 Approximation compilation failure handling (bad Java source) +3.6 Duplicate approximation targeting built-in class (error) +3.7 Scan with both --approximations-config and --dataflow-approximations +3.8 Verify approximation changes analysis results +""" + +import pytest +import shutil +from pathlib import Path +from conftest import ( + OpenTaintCLI, load_sarif, sarif_results, sarif_rule_ids, + sarif_findings_for_rule, write_text, write_yaml, + FIXTURES_DIR, BUILTIN_RULES_DIR, +) + + +class TestYAMLApproximationsConfig: + """3.1-3.2: YAML passThrough config.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_scan_with_approximations_config( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Scan with --approximations-config applies custom passThrough rules. + We verify the scan completes successfully (the config is accepted). + """ + sarif_path = tmp_output / "report.sarif" + config_path = FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" + + if not config_path.exists(): + pytest.skip("Fixture approximation config not created yet") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + approximations_config=str(config_path), + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with --approximations-config failed") + data = load_sarif(sarif_path) + assert len(sarif_results(data)) >= 0 # May have results, may not + + @pytest.mark.slow + @pytest.mark.new_feature + def test_approximations_config_with_custom_ruleset( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + --approximations-config and --ruleset can be used together (§1.2). + Previously these were mutually exclusive. + """ + sarif_path = tmp_output / "report.sarif" + config_path = FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" + custom_rules = FIXTURES_DIR / "rules" + + if not config_path.exists() or not custom_rules.exists(): + pytest.skip("Fixture files not created yet") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin", str(custom_rules)], + rule_ids=["stirling-path-traversal"], + approximations_config=str(config_path), + severity=["note", "warning", "error"], + timeout=600, + ) + result.assert_ok( + "Scan with both --approximations-config and --ruleset failed. " + "These should work together per design §1.2" + ) + + @pytest.mark.slow + @pytest.mark.new_feature + def test_invalid_approximations_config_errors( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """Invalid YAML config should produce a clear error.""" + sarif_path = tmp_output / "report.sarif" + bad_config = tmp_output / "bad-config.yaml" + write_text(bad_config, "this is not: [valid: yaml: config") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + approximations_config=str(bad_config), + timeout=120, + ) + result.assert_failed("Scan should fail with invalid approximations config") + + +class TestCodeBasedApproximations: + """3.3-3.6: Code-based approximations via --dataflow-approximations.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_scan_with_java_source_approximations( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + --dataflow-approximations with .java source files auto-compiles them. + The scan should complete successfully. + """ + sarif_path = tmp_output / "report.sarif" + approx_dir = FIXTURES_DIR / "approximations" / "java" + + if not approx_dir.exists(): + pytest.skip("Fixture approximation source not created yet") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + dataflow_approximations=str(approx_dir), + severity=["warning", "error"], + timeout=600, + ) + # If the auto-compile works, scan should succeed + if result.ok: + data = load_sarif(sarif_path) + print(f"Scan with code-based approximations: {len(sarif_results(data))} findings") + + @pytest.mark.slow + @pytest.mark.new_feature + def test_approximation_compilation_failure( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + --dataflow-approximations with invalid Java source should fail + with compilation errors before analysis starts. + """ + sarif_path = tmp_output / "report.sarif" + bad_approx_dir = tmp_output / "bad-approximations" + bad_approx_dir.mkdir() + write_text(bad_approx_dir / "BrokenApprox.java", """\ +package agent.approximations; + +import org.opentaint.ir.approximation.annotation.Approximate; + +// This won't compile — referencing nonexistent class +@Approximate(com.nonexistent.library.DoesNotExist.class) +public class BrokenApprox { + public void broken() { + com.nonexistent.library.DoesNotExist x = null; // compile error + } +} +""") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + dataflow_approximations=str(bad_approx_dir), + timeout=120, + ) + result.assert_failed("Scan should fail when approximation compilation fails") + # Error message should mention compilation + assert "compil" in result.stderr.lower() or "error" in result.stderr.lower(), \ + f"Error message should mention compilation failure: {result.stderr[:500]}" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_duplicate_approximation_errors( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + A custom approximation targeting a class that already has a built-in + approximation should produce an error (bijection violation). + """ + sarif_path = tmp_output / "report.sarif" + dup_approx_dir = tmp_output / "dup-approximations" + dup_approx_dir.mkdir() + + # java.util.stream.Stream already has a built-in approximation + write_text(dup_approx_dir / "StreamDuplicate.java", """\ +package agent.approximations; + +import org.opentaint.ir.approximation.annotation.Approximate; + +@Approximate(java.util.stream.Stream.class) +public class StreamDuplicate { + public Object map(java.util.function.Function fn) throws Throwable { + return fn.apply(null); + } +} +""") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + dataflow_approximations=str(dup_approx_dir), + timeout=300, + ) + # Should fail due to ApproximationIndexer bijection assertion + result.assert_failed("Duplicate approximation should produce an error") + + +class TestCombinedApproximations: + """3.7-3.8: Combining YAML config + code-based approximations.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_scan_with_both_approximation_types( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Both --approximations-config and --dataflow-approximations can be + used in the same scan. YAML handles simple passThrough, code-based + handles complex methods. + """ + sarif_path = tmp_output / "report.sarif" + yaml_config = FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" + java_approx = FIXTURES_DIR / "approximations" / "java" + custom_rules = FIXTURES_DIR / "rules" + + if not yaml_config.exists() or not java_approx.exists(): + pytest.skip("Fixture files not created yet") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin", str(custom_rules)], + rule_ids=["stirling-path-traversal"], + approximations_config=str(yaml_config), + dataflow_approximations=str(java_approx), + severity=["note", "warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with combined approximation types failed") + + @pytest.mark.slow + @pytest.mark.new_feature + def test_approximations_change_results( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Running the same scan with and without custom approximations should + produce different results (the approximations add propagation paths + that weren't there before). + + This is a differential test — we compare finding counts. + """ + custom_rules = FIXTURES_DIR / "rules" + yaml_config = FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" + + if not custom_rules.exists() or not yaml_config.exists(): + pytest.skip("Fixture files not created yet") + + # Run 1: without approximations + sarif_no_approx = tmp_output / "no-approx" / "report.sarif" + (tmp_output / "no-approx").mkdir() + r1 = cli.scan( + project_path=str(stirling_project), + output=str(sarif_no_approx), + rulesets=["builtin", str(custom_rules)], + rule_ids=["stirling-path-traversal"], + severity=["note", "warning", "error"], + timeout=600, + ) + + # Run 2: with approximations + sarif_with_approx = tmp_output / "with-approx" / "report.sarif" + (tmp_output / "with-approx").mkdir() + r2 = cli.scan( + project_path=str(stirling_project), + output=str(sarif_with_approx), + rulesets=["builtin", str(custom_rules)], + rule_ids=["stirling-path-traversal"], + approximations_config=str(yaml_config), + severity=["note", "warning", "error"], + timeout=600, + ) + + if r1.ok and r2.ok: + data1 = load_sarif(sarif_no_approx) + data2 = load_sarif(sarif_with_approx) + count1 = len(sarif_results(data1)) + count2 = len(sarif_results(data2)) + print(f"Without approximations: {count1} findings") + print(f"With approximations: {count2} findings") + # We don't assert which is larger — just that they're potentially different + # The agent would analyze the difference to validate the approximations +``` + +--- + +## 7. Test Suite 4: External Methods Extraction + +**File: `test_external_methods.py`** + +Tests the `--external-methods` output functionality. + +```python +""" +Suite 4: External Methods Extraction + +Tests: +4.1 Scan with --external-methods produces a YAML file +4.2 External methods file has correct structure (withoutRules/withRules) +4.3 External methods contain expected fields (method, signature, factPositions, callSites) +4.4 withoutRules list is non-empty for a real project (Stirling-PDF has many unmodeled methods) +4.5 withRules list contains known standard library methods +4.6 Scan with custom approximations reduces withoutRules count +4.7 External methods extraction alongside SARIF output +""" + +import pytest +from pathlib import Path +from conftest import ( + OpenTaintCLI, load_sarif, sarif_results, + load_external_methods, count_external_methods, + FIXTURES_DIR, BUILTIN_RULES_DIR, +) + + +class TestExternalMethodsBasic: + """4.1-4.3: Basic external methods output.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_scan_produces_external_methods_file( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + --external-methods flag produces a YAML file alongside SARIF output. + """ + sarif_path = tmp_output / "report.sarif" + ext_methods_path = tmp_output / "external-methods.yaml" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + external_methods=str(ext_methods_path), + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with --external-methods failed") + assert ext_methods_path.exists(), "External methods file not produced" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_external_methods_structure( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + External methods file has two sections: withoutRules and withRules. + Each entry has: method, signature, factPositions, callSites. + """ + sarif_path = tmp_output / "report.sarif" + ext_methods_path = tmp_output / "external-methods.yaml" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + external_methods=str(ext_methods_path), + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok() + + data = load_external_methods(ext_methods_path) + + # Validate structure + for section_name in ["withoutRules", "withRules"]: + section = data.get(section_name, []) + for entry in section[:5]: # Check first 5 entries + assert "method" in entry, f"Entry in {section_name} missing 'method'" + assert "signature" in entry, f"Entry in {section_name} missing 'signature'" + assert "factPositions" in entry, f"Entry in {section_name} missing 'factPositions'" + assert "callSites" in entry, f"Entry in {section_name} missing 'callSites'" + + # Validate method format: Class#method + assert "#" in entry["method"], \ + f"Method should be in Class#method format: {entry['method']}" + + # Validate factPositions is a list + assert isinstance(entry["factPositions"], list), \ + f"factPositions should be a list: {entry['factPositions']}" + + # Validate callSites is a positive integer + assert isinstance(entry["callSites"], int) and entry["callSites"] > 0, \ + f"callSites should be a positive integer: {entry['callSites']}" + + # Validate factPositions values + valid_positions = {"this", "result"} + for pos in entry["factPositions"]: + assert pos == "this" or pos == "result" or pos.startswith("arg("), \ + f"Invalid fact position: {pos}" + + +class TestExternalMethodsContent: + """4.4-4.5: External methods content validation.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_without_rules_nonempty_for_real_project( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Stirling-PDF uses many libraries without built-in approximations. + The withoutRules list should be non-empty. + """ + sarif_path = tmp_output / "report.sarif" + ext_methods_path = tmp_output / "external-methods.yaml" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + external_methods=str(ext_methods_path), + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok() + + data = load_external_methods(ext_methods_path) + without_count, with_count = count_external_methods(data) + print(f"External methods: {without_count} without rules, {with_count} with rules") + + assert without_count > 0, \ + "Expected non-empty withoutRules for Stirling-PDF (it uses many unmodeled libraries)" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_with_rules_contains_standard_library_methods( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + The withRules section should contain standard library methods that + have built-in approximations (e.g., StringBuilder, String methods). + """ + sarif_path = tmp_output / "report.sarif" + ext_methods_path = tmp_output / "external-methods.yaml" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + external_methods=str(ext_methods_path), + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok() + + data = load_external_methods(ext_methods_path) + with_rules = data.get("withRules", []) + with_rules_methods = {e["method"] for e in with_rules} + + # Known methods that should have rules in the default config + # (these are common and Stirling-PDF definitely calls them) + print(f"Methods with rules ({len(with_rules_methods)}):") + for m in sorted(list(with_rules_methods))[:20]: + print(f" - {m}") + + +class TestExternalMethodsWithApproximations: + """4.6: Custom approximations reduce withoutRules count.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_approximations_reduce_without_rules( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Adding custom passThrough rules for methods that were in withoutRules + should move them to withRules (or remove them from withoutRules entirely). + """ + yaml_config = FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" + custom_rules = FIXTURES_DIR / "rules" + + if not yaml_config.exists(): + pytest.skip("Fixture approximation config not created yet") + + # Run 1: without custom approximations + sarif1 = tmp_output / "run1" / "report.sarif" + ext1 = tmp_output / "run1" / "external-methods.yaml" + (tmp_output / "run1").mkdir() + r1 = cli.scan( + project_path=str(stirling_project), + output=str(sarif1), + rulesets=["builtin"], + external_methods=str(ext1), + severity=["warning", "error"], + timeout=600, + ) + + # Run 2: with custom approximations + sarif2 = tmp_output / "run2" / "report.sarif" + ext2 = tmp_output / "run2" / "external-methods.yaml" + (tmp_output / "run2").mkdir() + r2 = cli.scan( + project_path=str(stirling_project), + output=str(sarif2), + rulesets=["builtin"], + approximations_config=str(yaml_config), + external_methods=str(ext2), + severity=["warning", "error"], + timeout=600, + ) + + if r1.ok and r2.ok: + data1 = load_external_methods(ext1) + data2 = load_external_methods(ext2) + wo1, _ = count_external_methods(data1) + wo2, _ = count_external_methods(data2) + print(f"Without custom approx: {wo1} methods without rules") + print(f"With custom approx: {wo2} methods without rules") + + # Methods we added rules for should no longer be in withoutRules + methods_without_1 = {e["method"] for e in data1.get("withoutRules", [])} + methods_without_2 = {e["method"] for e in data2.get("withoutRules", [])} + newly_covered = methods_without_1 - methods_without_2 + if newly_covered: + print(f"Newly covered methods ({len(newly_covered)}):") + for m in sorted(newly_covered): + print(f" + {m}") + + +class TestExternalMethodsAlongsideSarif: + """4.7: External methods and SARIF are produced together.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_both_outputs_produced( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + A single scan produces both SARIF report and external methods file. + """ + sarif_path = tmp_output / "report.sarif" + ext_methods_path = tmp_output / "external-methods.yaml" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + external_methods=str(ext_methods_path), + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok() + + # Both files should exist + assert sarif_path.exists(), "SARIF report not produced" + assert ext_methods_path.exists(), "External methods file not produced" + + # Both should be non-trivial + sarif_data = load_sarif(sarif_path) + ext_data = load_external_methods(ext_methods_path) + assert len(sarif_results(sarif_data)) > 0, "SARIF has no results" + wo, wr = count_external_methods(ext_data) + assert wo + wr > 0, "External methods file is empty" +``` + +--- + +## 8. Test Suite 5: Full Agent Loop (Integration) + +**File: `test_full_loop.py`** + +End-to-end test simulating the agent's workflow from the meta prompt: create rule → test → scan → analyze external methods → create approximation → rescan. + +```python +""" +Suite 5: Full Agent Loop (Integration) + +This test simulates the complete agent workflow on Stirling-PDF: +1. Discover entry points (by reading source) +2. Create a custom path-traversal rule +3. Test the rule with samples +4. Run initial scan on Stirling-PDF +5. Analyze external methods +6. Create YAML approximation for an unmodeled method +7. Re-scan and verify the approximation has effect + +This is a single large integration test, not meant for fast CI. +""" + +import json +import shutil +import pytest +from pathlib import Path +from conftest import ( + OpenTaintCLI, load_sarif, sarif_results, sarif_rule_ids, + sarif_findings_for_rule, load_external_methods, count_external_methods, + write_text, write_yaml, + FIXTURES_DIR, BUILTIN_RULES_DIR, STIRLING_PROJECT_DIR, +) + + +@pytest.mark.slow +@pytest.mark.new_feature +class TestFullAgentLoop: + """ + Simulates the agent's analysis workflow on Stirling-PDF. + + This test class follows the meta prompt phases: + Phase 1 → discover entry points (manual) + Phase 2 → create rule + test + Phase 3 → scan + analyze + create approx + rescan + """ + + def _setup_workspace(self, tmp_output: Path) -> dict: + """Create the agent workspace directory layout.""" + workspace = { + "root": tmp_output, + "rules": tmp_output / "agent-rules", + "config": tmp_output / "agent-config", + "approximations": tmp_output / "agent-approximations" / "src", + "results": tmp_output / "results", + "test_project": tmp_output / "agent-test-project", + "test_compiled": tmp_output / "agent-test-compiled", + "test_output": tmp_output / "agent-test-output", + } + for d in workspace.values(): + if isinstance(d, Path): + d.mkdir(parents=True, exist_ok=True) + return workspace + + def test_full_agent_loop( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """Full end-to-end agent loop on Stirling-PDF.""" + ws = self._setup_workspace(tmp_output) + + # ── Phase 1: Source Discovery (simulated) ───────────────────── + # The agent would read source files to identify controllers and + # attack surface. Here we simulate the discovery result. + controllers = [ + "stirling.software.SPDF.controller.api.misc.PrintFileController", + "stirling.software.SPDF.controller.api.MergeController", + "stirling.software.SPDF.controller.api.SplitPDFController", + "stirling.software.SPDF.controller.api.security.*", + ] + print(f"Phase 1: Discovered {len(controllers)} controller groups") + + # ── Phase 2: Create Rule ────────────────────────────────────── + + # 2a: Read builtin rules to check coverage + builtin_path_traversal = BUILTIN_RULES_DIR / "java" / "security" / "path-traversal.yaml" + assert builtin_path_traversal.exists(), "Builtin path-traversal rule not found" + print("Phase 2a: Read builtin path-traversal rule") + + # 2b: Create custom source library rule for Stirling's multipart upload + lib_dir = ws["rules"] / "java" / "lib" + lib_dir.mkdir(parents=True, exist_ok=True) + write_text(lib_dir / "stirling-source.yaml", """\ +rules: + - id: stirling-multipart-file-source + options: + lib: true + severity: NOTE + message: Untrusted multipart file data from Spring controller + languages: [java] + patterns: + - pattern: | + $RETURNTYPE $METHOD(..., @RequestParam MultipartFile $UNTRUSTED, ...) { ... } +""") + + # 2c: Create join-mode security rule + sec_dir = ws["rules"] / "java" / "security" + sec_dir.mkdir(parents=True, exist_ok=True) + write_text(sec_dir / "stirling-path-traversal.yaml", """\ +rules: + - id: stirling-path-traversal + severity: ERROR + message: >- + User-uploaded file name flows to file system operation without sanitization + metadata: + cwe: CWE-22 + short-description: Path Traversal via uploaded file name + languages: [java] + mode: join + join: + refs: + - rule: java/lib/stirling-source.yaml#stirling-multipart-file-source + as: source + - rule: java/lib/generic/path-traversal-sinks.yaml#java-path-traversal-sink + as: sink + on: + - 'source.$UNTRUSTED -> sink.$UNTRUSTED' +""") + print("Phase 2b-c: Created custom rules") + + # ── Phase 3: Initial Scan ───────────────────────────────────── + + sarif_path = ws["results"] / "report-1.sarif" + ext_methods_path = ws["results"] / "external-methods-1.yaml" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin", str(ws["rules"])], + rule_ids=["stirling-path-traversal"], + external_methods=str(ext_methods_path), + severity=["note", "warning", "error"], + timeout=600, + ) + result.assert_ok("Initial scan failed") + + # Analyze results + sarif_data = load_sarif(sarif_path) + findings = sarif_findings_for_rule(sarif_data, "stirling-path-traversal") + print(f"Phase 3: Initial scan found {len(findings)} path-traversal findings") + + for f in findings[:5]: + locs = f.get("locations", [{}]) + if locs: + uri = locs[0].get("physicalLocation", {}).get("artifactLocation", {}).get("uri", "?") + line = locs[0].get("physicalLocation", {}).get("region", {}).get("startLine", "?") + print(f" Finding: {uri}:{line}") + + # ── Phase 3b: Analyze External Methods ──────────────────────── + + if ext_methods_path.exists(): + ext_data = load_external_methods(ext_methods_path) + wo_count, wr_count = count_external_methods(ext_data) + print(f"Phase 3b: External methods — {wo_count} without rules, {wr_count} with rules") + + # Identify methods the agent would want to model + without_rules = ext_data.get("withoutRules", []) + priority_methods = [ + m for m in without_rules + if m.get("callSites", 0) > 5 + ] + priority_methods.sort(key=lambda m: m.get("callSites", 0), reverse=True) + print(f" Priority unmodeled methods (>5 call sites): {len(priority_methods)}") + for m in priority_methods[:10]: + print(f" {m['method']} ({m['callSites']} call sites, positions: {m['factPositions']})") + + # ── Phase 4: Create Approximation and Rescan ────────────────── + # Approximations are ONLY for external methods (from withoutRules). + # These are library methods without source code in the project. + + # Create YAML approximation for top unmodeled external methods + if ext_methods_path.exists() and priority_methods: + pass_through_rules = [] + for m in priority_methods[:5]: + method_name = m["method"] + positions = m["factPositions"] + + # Simple heuristic: if taint is on arg(0), propagate to result + copies = [] + for pos in positions: + if pos.startswith("arg("): + copies.append({"from": pos, "to": "result"}) + elif pos == "this": + copies.append({"from": "this", "to": "result"}) + + if copies: + pass_through_rules.append({ + "function": method_name, + "copy": copies, + }) + + if pass_through_rules: + config_file = ws["config"] / "custom-propagators.yaml" + write_yaml(config_file, {"passThrough": pass_through_rules}) + print(f"Phase 4: Created {len(pass_through_rules)} custom passThrough rules") + + # Rescan with approximations + sarif_path_2 = ws["results"] / "report-2.sarif" + ext_methods_path_2 = ws["results"] / "external-methods-2.yaml" + + result2 = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path_2), + rulesets=["builtin", str(ws["rules"])], + rule_ids=["stirling-path-traversal"], + approximations_config=str(config_file), + external_methods=str(ext_methods_path_2), + severity=["note", "warning", "error"], + timeout=600, + ) + + if result2.ok: + sarif_data_2 = load_sarif(sarif_path_2) + findings_2 = sarif_findings_for_rule(sarif_data_2, "stirling-path-traversal") + print(f"Phase 4: Rescan found {len(findings_2)} findings (was {len(findings)})") + + if ext_methods_path_2.exists(): + ext_data_2 = load_external_methods(ext_methods_path_2) + wo2, wr2 = count_external_methods(ext_data_2) + print(f" External methods after approx: {wo2} without (was {wo_count}), {wr2} with (was {wr_count})") + + # Verify the approximations had some effect + delta_findings = len(findings_2) - len(findings) + delta_methods = wo_count - wo2 + print(f" Delta: {delta_findings:+d} findings, {delta_methods:+d} newly modeled methods") + + print("\n=== Full agent loop completed ===") +``` + +--- + +## 9. Running Tests + +### Quick validation (existing features only) + +```bash +cd agent-mode/test + +# Run only tests that work with current implementation +pytest test_build.py -m "not new_feature and not slow" -v + +# Run build tests including slow ones (actual scans) +pytest test_build.py -m "not new_feature" -v --timeout=900 +``` + +### Full test suite (after new features are implemented) + +```bash +cd agent-mode/test + +# Run all tests +pytest -v --timeout=900 + +# Run specific suite +pytest test_rules.py -v --timeout=900 +pytest test_approximations.py -v --timeout=900 +pytest test_external_methods.py -v --timeout=900 + +# Run the full integration loop +pytest test_full_loop.py -v --timeout=1800 + +# Exclude slow tests for quick checks +pytest -m "not slow" -v +``` + +### Development mode (direct JAR invocation) + +When `opentaint` is not on PATH, tests automatically fall back to invoking the analyzer/autobuilder JARs directly. Set environment variables if the JARs are in non-default locations: + +```bash +# Point to locally-built JARs +export JAVA_HOME=/path/to/java-21 +export OPENTAINT_ANALYZER_JAR=/path/to/opentaint-project-analyzer.jar +export OPENTAINT_AUTOBUILDER_JAR=/path/to/opentaint-project-auto-builder.jar + +pytest -v --timeout=900 +``` + +### CI Integration + +For CI, use a matrix of test suites to parallelize: + +```yaml +# .github/workflows/ci-agent-mode-tests.yaml +jobs: + test: + strategy: + matrix: + suite: [test_build, test_rules, test_approximations, test_external_methods] + steps: + - name: Run agent-mode tests + run: | + cd agent-mode/test + pytest ${{ matrix.suite }}.py -v --timeout=900 -m "not new_feature" +``` + +--- + +## Summary + +| Suite | Tests | Markers | Purpose | +|---|---|---|---| +| `test_build.py` | 5 | `slow` | Project build: pre-compiled, auto-compile, error handling | +| `test_rules.py` | 8 | `slow`, `new_feature` | Rule creation, rule-id filter, rule tests, custom rules on Stirling | +| `test_approximations.py` | 8 | `slow`, `new_feature` | YAML config, code-based approximations, compilation errors, combined | +| `test_external_methods.py` | 7 | `slow`, `new_feature` | External methods output, structure validation, coverage changes | +| `test_full_loop.py` | 1 | `slow`, `new_feature` | Full agent workflow: rule → test → scan → approx → rescan | +| **Total** | **29** | | | + +Tests marked `new_feature` will pass once the corresponding engine/CLI changes from `agent-mode-design.md` are implemented. Tests without that marker can run today against the existing codebase (using direct JAR invocation). diff --git a/agent-mode/test/conftest.py b/agent-mode/test/conftest.py new file mode 100644 index 000000000..1a61de157 --- /dev/null +++ b/agent-mode/test/conftest.py @@ -0,0 +1,497 @@ +""" +Shared fixtures and helpers for agent-mode tests. + +All tests use the Go CLI binary (`opentaint`). In development mode, the binary +is located at `cli/bin/opentaint` relative to the repo root, and hidden +`--analyzer-jar` / `--autobuilder-jar` flags are passed automatically to point +at locally-built JARs. +""" + +import json +import os +import shutil +import subprocess +import tempfile +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + +import pytest +import yaml + + +# ─── Timing ────────────────────────────────────────────────────────────────── + + +@pytest.hookimpl(tryfirst=True) +def pytest_runtest_setup(item): + """Record start time before each test.""" + item._start_time = time.time() + + +@pytest.hookimpl(trylast=True) +def pytest_runtest_teardown(item, nextitem): + """Print elapsed time after each test.""" + start = getattr(item, "_start_time", None) + if start is not None: + elapsed = time.time() - start + print(f"\n [timing] {item.nodeid}: {elapsed:.1f}s") + + +# ─── Paths ─────────────────────────────────────────────────────────────────── + +STIRLING_PROJECT = Path("/home/sobol/data/Stirling-PDF/seqra-project/project.yaml") +STIRLING_PROJECT_DIR = STIRLING_PROJECT.parent +OPENTAINT_ROOT = Path(__file__).resolve().parent.parent.parent # -> opentaint/ +FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures" +BUILTIN_RULES_DIR = OPENTAINT_ROOT / "rules" / "ruleset" + + +# ─── CLI Resolution ────────────────────────────────────────────────────────── + + +def _find_cli_binary() -> str: + """ + Find the opentaint CLI binary. Resolution order: + 1. OPENTAINT_CLI env var + 2. Local dev build at cli/bin/opentaint + 3. opentaint on PATH + """ + env_cli = os.environ.get("OPENTAINT_CLI") + if env_cli: + p = Path(env_cli) + if p.exists(): + return str(p) + + dev_binary = OPENTAINT_ROOT / "cli" / "bin" / "opentaint" + if dev_binary.exists(): + return str(dev_binary) + + on_path = shutil.which("opentaint") + if on_path: + return on_path + + pytest.exit( + "opentaint CLI binary not found. Build it with: cd cli && go build -o ./bin/opentaint .", + returncode=1, + ) + + +def _find_local_jar(env_var: str, candidates: list) -> Optional[str]: + """Find a locally-built JAR by env var or candidate paths.""" + env_jar = os.environ.get(env_var) + if env_jar: + p = Path(env_jar) + if p.exists(): + return str(p) + + for c in candidates: + if c.exists(): + return str(c) + return None + + +def _find_analyzer_jar() -> Optional[str]: + """Find locally-built analyzer JAR for --analyzer-jar hidden flag.""" + return _find_local_jar( + "OPENTAINT_ANALYZER_JAR", + [ + OPENTAINT_ROOT + / "core" + / "build" + / "libs" + / "opentaint-project-analyzer.jar", + ], + ) + + +def _find_autobuilder_jar() -> Optional[str]: + """Find locally-built autobuilder JAR for --autobuilder-jar hidden flag.""" + return _find_local_jar( + "OPENTAINT_AUTOBUILDER_JAR", + [ + OPENTAINT_ROOT + / "autobuilder" + / "build" + / "libs" + / "opentaint-project-auto-builder.jar", + ], + ) + + +# ─── CLI Abstraction ───────────────────────────────────────────────────────── + + +@dataclass +class CLIResult: + """Result of a CLI command execution.""" + + returncode: int + stdout: str + stderr: str + command: list + + @property + def ok(self) -> bool: + return self.returncode == 0 + + def assert_ok(self, msg: str = ""): + assert self.ok, ( + f"Command failed (rc={self.returncode}){': ' + msg if msg else ''}\n" + f" cmd: {' '.join(str(c) for c in self.command)}\n" + f" stderr: {self.stderr[:2000]}" + ) + + def assert_failed(self, msg: str = ""): + assert not self.ok, ( + f"Command unexpectedly succeeded{': ' + msg if msg else ''}\n" + f" cmd: {' '.join(str(c) for c in self.command)}\n" + f" stdout: {self.stdout[:2000]}" + ) + + +@dataclass +class OpenTaintCLI: + """ + Abstraction over the opentaint Go CLI binary. + + All commands go through the CLI. In dev mode, hidden --analyzer-jar and + --autobuilder-jar flags are passed to point at locally-built JARs. + """ + + cli_path: str = "" + analyzer_jar: Optional[str] = None + autobuilder_jar: Optional[str] = None + timeout: int = 600 # seconds + + def _base_cmd(self) -> list: + """Return the base command with hidden JAR flags if set.""" + cmd = [self.cli_path] + if self.analyzer_jar: + cmd.extend(["--analyzer-jar", self.analyzer_jar]) + if self.autobuilder_jar: + cmd.extend(["--autobuilder-jar", self.autobuilder_jar]) + return cmd + + def run( + self, args: list, timeout: Optional[int] = None, env: Optional[dict] = None + ) -> CLIResult: + """Run an arbitrary command and return the result.""" + str_args = [str(a) for a in args] + run_env = {**os.environ, **(env or {})} + t = timeout or self.timeout + try: + proc = subprocess.run( + str_args, + capture_output=True, + text=True, + timeout=t, + env=run_env, + ) + return CLIResult(proc.returncode, proc.stdout, proc.stderr, str_args) + except subprocess.TimeoutExpired: + return CLIResult(-1, "", f"Timeout after {t}s", str_args) + + def scan( + self, + project_path: str, + output: str, + rulesets: list = None, + rule_ids: list = None, + approximations_config: Optional[str] = None, + dataflow_approximations: Optional[str] = None, + track_external_methods: bool = False, + severity: list = None, + timeout: int = 900, + max_memory: str = "8G", + extra_flags: list = None, + ) -> CLIResult: + """Run opentaint scan. + + If ``project_path`` points at a pre-compiled project model (either the + directory containing ``project.yaml`` or the ``project.yaml`` file + itself), the scan is invoked with ``--project-model ``. Otherwise + the path is forwarded as a source project. + """ + p = Path(project_path) + if p.name == "project.yaml" and p.is_file(): + p = p.parent + if p.is_dir() and (p / "project.yaml").is_file(): + cmd = self._base_cmd() + ["scan", "-o", output, "--project-model", str(p)] + else: + cmd = self._base_cmd() + ["scan", str(p), "-o", output] + for rs in rulesets or ["builtin"]: + cmd.extend(["--ruleset", rs]) + for rid in rule_ids or []: + cmd.extend(["--rule-id", rid]) + if approximations_config: + cmd.extend(["--approximations-config", approximations_config]) + if dataflow_approximations: + cmd.extend(["--dataflow-approximations", dataflow_approximations]) + if track_external_methods: + cmd.append("--track-external-methods") + for sev in severity or ["warning", "error"]: + cmd.extend(["--severity", sev]) + cmd.extend(["--timeout", f"{timeout}s", "--max-memory", max_memory]) + cmd.extend(extra_flags or []) + return self.run(cmd, timeout=timeout + 60) + + def test_rules( + self, + project_path: str, + rulesets: list, + output_dir: str, + timeout: int = 300, + max_memory: str = "8G", + ) -> CLIResult: + """Run opentaint agent test-rules. + + The CLI expects a directory path (it looks for project.yaml inside). + If project_path points to a project.yaml file, the parent directory is used. + """ + p = Path(project_path) + if p.name == "project.yaml" and p.is_file(): + project_path = str(p.parent) + cmd = self._base_cmd() + ["agent", "test-rules", project_path] + for rs in rulesets: + cmd.extend(["--ruleset", rs]) + cmd.extend(["-o", output_dir]) + cmd.extend(["--timeout", f"{timeout}s", "--max-memory", max_memory]) + return self.run(cmd, timeout=timeout + 60) + + def compile( + self, + project_path: str, + output_dir: str, + timeout: int = 300, + ) -> CLIResult: + """Run opentaint compile.""" + cmd = self._base_cmd() + ["compile", project_path, "-o", output_dir] + return self.run(cmd, timeout=timeout + 60) + + def rules_path(self) -> CLIResult: + """Run opentaint agent rules-path.""" + return self.run(self._base_cmd() + ["agent", "rules-path"]) + + def init_test_project( + self, + output_dir: str, + dependencies: list = None, + ) -> CLIResult: + """Run opentaint agent init-test-project.""" + cmd = self._base_cmd() + ["agent", "init-test-project", output_dir] + for dep in dependencies or []: + cmd.extend(["--dependency", dep]) + return self.run(cmd) + + +# ─── Fixtures ───────────────────────────────────────────────────────────────── + + +@pytest.fixture(scope="session") +def cli() -> OpenTaintCLI: + """Provide an OpenTaintCLI instance configured for the current environment.""" + return OpenTaintCLI( + cli_path=_find_cli_binary(), + analyzer_jar=_find_analyzer_jar(), + autobuilder_jar=_find_autobuilder_jar(), + ) + + +@pytest.fixture(scope="session") +def stirling_project() -> Path: + """Path to the Stirling-PDF project.yaml.""" + assert STIRLING_PROJECT.exists(), ( + f"Stirling-PDF project not found at {STIRLING_PROJECT}" + ) + return STIRLING_PROJECT + + +@pytest.fixture +def tmp_output(tmp_path) -> Path: + """Provide a temporary output directory for test results.""" + return tmp_path + + +@pytest.fixture(scope="session") +def builtin_rules() -> Path: + """Path to the built-in rules directory.""" + assert BUILTIN_RULES_DIR.exists(), f"Builtin rules not found at {BUILTIN_RULES_DIR}" + return BUILTIN_RULES_DIR + + +# ─── Helpers ────────────────────────────────────────────────────────────────── + + +def load_sarif(path: Path) -> dict: + """Load and validate a SARIF file.""" + assert path.exists(), f"SARIF file not found: {path}" + with open(path) as f: + data = json.load(f) + assert data.get("version") == "2.1.0", "Not a valid SARIF 2.1.0 file" + assert "runs" in data and len(data["runs"]) > 0, "SARIF has no runs" + return data + + +def sarif_results(data: dict) -> list: + """Extract results from a SARIF report.""" + return data["runs"][0].get("results", []) + + +def sarif_rule_ids(data: dict) -> set: + """Extract unique rule IDs from SARIF results.""" + return {r["ruleId"] for r in sarif_results(data)} + + +def sarif_findings_for_rule(data: dict, rule_id: str) -> list: + """Get findings for a specific rule ID. + + Matches both exact ID and semgrep-style dot-separated ID (e.g. + 'stirling-path-traversal' matches 'java.security.stirling-path-traversal'). + """ + return [ + r + for r in sarif_results(data) + if r["ruleId"] == rule_id or r["ruleId"].endswith("." + rule_id) + ] + + +def _derive_external_methods_paths(sarif_path: Path) -> tuple: + """Return the two fixed external-methods file paths next to the SARIF. + + The analyzer always writes ``external-methods-without-rules.yaml`` and + ``external-methods-with-rules.yaml`` into its output directory. Here we + key off the SARIF path (or its parent directory), matching how the + Go CLI routes ``-o`` to ``--output-dir``. + """ + parent = sarif_path if sarif_path.is_dir() else sarif_path.parent + return ( + parent / "external-methods-without-rules.yaml", + parent / "external-methods-with-rules.yaml", + ) + + +def load_external_methods(sarif_path: Path) -> dict: + """Load external methods from the two fixed files next to the SARIF. + + Returns ``{"withoutRules": [...], "withRules": [...]}``. + """ + wo_path, wr_path = _derive_external_methods_paths(sarif_path) + + without_rules = [] + with_rules = [] + + def _read(path: Path) -> list: + if not path.exists(): + return [] + with open(path) as f: + data = yaml.safe_load(f) + if data is None: + return [] + # Analyzer writes a top-level YAML list; tolerate {methods: [...]} too. + if isinstance(data, list): + return data + if isinstance(data, dict): + return data.get("methods", []) + return [] + + without_rules = _read(wo_path) + with_rules = _read(wr_path) + + assert wo_path.exists() or wr_path.exists(), ( + f"Neither external methods file found: {wo_path}, {wr_path}" + ) + return {"withoutRules": without_rules, "withRules": with_rules} + + +def external_methods_exist(sarif_path: Path) -> bool: + """Check if at least one external-methods file exists next to the SARIF.""" + wo_path, wr_path = _derive_external_methods_paths(sarif_path) + return wo_path.exists() or wr_path.exists() + + +def count_external_methods(data: dict) -> tuple: + """Return (without_rules_count, with_rules_count).""" + without = len(data.get("withoutRules", [])) + with_rules = len(data.get("withRules", [])) + return without, with_rules + + +def write_yaml(path: Path, content: dict): + """Write a YAML file.""" + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + yaml.dump(content, f, default_flow_style=False, sort_keys=False) + + +def write_text(path: Path, content: str): + """Write a text file.""" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content) + + +import re + + +def parse_analyzer_timing(output: str) -> dict: + """ + Parse timing information from CLI/analyzer output. + + Looks for patterns like: + - "Compiling project model" (Go CLI spinner phase) + - "Analyzing project" (Go CLI spinner phase) + - "Start IFDS analysis" / "Finish IFDS analysis" (analyzer log) + - "Analysis done in " (IFDS elapsed) + - "Start SARIF report generation" / "Finish SARIF report" + - "Start vulnerability confirmation" + - "Start trace generation" / "Finish trace generation" + + Returns a dict with discovered timing info (best-effort, may be empty + if the analyzer doesn't log at info level to stdout). + """ + timing = {} + + # Look for IFDS analysis elapsed time: "Analysis done in 12.345s" or "Analysis done in 1m 23s" etc. + m = re.search(r"Analysis done in (.+?)$", output, re.MULTILINE) + if m: + timing["ifds_elapsed"] = m.group(1).strip() + + # Count phase markers + phases = [ + ("ifds_start", r"Start IFDS analysis"), + ("ifds_finish", r"Finish IFDS analysis"), + ("sarif_start", r"Start SARIF report generation"), + ("sarif_finish", r"Finish SARIF report"), + ("vuln_confirm", r"Start vulnerability confirmation"), + ("trace_gen_start", r"Start trace generation"), + ("trace_gen_finish", r"Finish trace generation"), + ("se_start", r"Start SE for project"), + ("se_finish", r"Finish SE for project"), + ] + for key, pattern in phases: + if re.search(pattern, output): + timing[key] = True + + # Total vulnerabilities count + m = re.search(r"Total vulnerabilities:\s*(\d+)", output) + if m: + timing["total_vulnerabilities"] = int(m.group(1)) + + return timing + + +def print_timing_breakdown(label: str, result: "CLIResult"): + """Print a timing breakdown from CLI output if available.""" + timing = parse_analyzer_timing(result.stdout + result.stderr) + if timing: + parts = [] + if "ifds_elapsed" in timing: + parts.append(f"IFDS: {timing['ifds_elapsed']}") + if "total_vulnerabilities" in timing: + parts.append(f"vulns: {timing['total_vulnerabilities']}") + detected_phases = [k for k, v in timing.items() if v is True] + if detected_phases: + parts.append(f"phases: {', '.join(detected_phases)}") + if parts: + print(f" [timing:{label}] {' | '.join(parts)}") diff --git a/agent-mode/test/fixtures/approximations/java/PdfBoxDocumentApprox.java b/agent-mode/test/fixtures/approximations/java/PdfBoxDocumentApprox.java new file mode 100644 index 000000000..fdd16d141 --- /dev/null +++ b/agent-mode/test/fixtures/approximations/java/PdfBoxDocumentApprox.java @@ -0,0 +1,50 @@ +package agent.approximations; + +import org.opentaint.ir.approximation.annotation.ApproximateByName; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +/** + * Code-based approximation for PDFBox's PDDocument class. + * + * IMPORTANT: Approximations are ONLY applicable to external methods — + * library classes whose source code is NOT part of the project being analyzed. + * PDFBox is an external dependency of Stirling-PDF (pdfbox-3.0.6.jar). + * + * This models complex taint propagation through PDDocument methods that + * involve internal state and cannot be expressed with simple YAML passThrough. + * + * PDDocument.save(OutputStream) — taint on the document (this) flows to + * the output stream, modeling the case where a tainted PDF is serialized. + */ +@ApproximateByName("org.apache.pdfbox.pdmodel.PDDocument") +public class PdfBoxDocumentApprox { + + /** + * Model save(OutputStream) — taint on this flows to arg(0). + * A tainted document writes tainted bytes to the output stream. + */ + public void save(java.io.OutputStream output) throws java.io.IOException { + org.apache.pdfbox.pdmodel.PDDocument self = + (org.apache.pdfbox.pdmodel.PDDocument) (Object) this; + if (OpentaintNdUtil.nextBool()) { + throw new java.io.IOException("approximation: failure path"); + } + // Model: taint from document flows to output stream + byte[] data = new byte[1]; + output.write(data); + } + + /** + * Model getPage(int) — taint on this flows to result. + * A tainted document produces tainted pages. + */ + public Object getPage(int pageIndex) { + org.apache.pdfbox.pdmodel.PDDocument self = + (org.apache.pdfbox.pdmodel.PDDocument) (Object) this; + if (OpentaintNdUtil.nextBool()) { + return null; + } + return self.getPages().get(pageIndex); + } +} diff --git a/agent-mode/test/fixtures/approximations/yaml/custom-propagators.yaml b/agent-mode/test/fixtures/approximations/yaml/custom-propagators.yaml new file mode 100644 index 000000000..6b475284a --- /dev/null +++ b/agent-mode/test/fixtures/approximations/yaml/custom-propagators.yaml @@ -0,0 +1,37 @@ +# Custom passThrough rules for external library methods encountered by the engine. +# +# IMPORTANT: Approximations (both YAML and code-based) are ONLY applicable to +# external methods — library classes whose source code is NOT part of the project. +# Project classes with source code are analyzed directly by the engine. +# +# These methods would appear in the external-methods.yaml output under +# withoutRules, indicating the engine has no propagation model for them. + +passThrough: + # org.apache.pdfbox.pdmodel.PDDocument#getPage — taint on this flows to result + # PDFBox is an external dependency of Stirling-PDF + - function: org.apache.pdfbox.pdmodel.PDDocument#getPage + copy: + - from: this + to: result + + # org.apache.pdfbox.text.PDFTextStripper#getText — taint on arg(0) flows to result + # Extracts text from a tainted document + - function: org.apache.pdfbox.text.PDFTextStripper#getText + copy: + - from: arg(0) + to: result + + # com.fasterxml.jackson.databind.ObjectMapper#readValue — taint flows through deserialization + # Jackson is an external dependency + - function: com.fasterxml.jackson.databind.ObjectMapper#readValue + copy: + - from: arg(0) + to: result + + # org.jsoup.Jsoup#parse — taint on arg(0) flows to result + # Jsoup is an external dependency used for HTML parsing + - function: org.jsoup.Jsoup#parse + copy: + - from: arg(0) + to: result diff --git a/agent-mode/test/fixtures/rules/java/lib/stirling-source.yaml b/agent-mode/test/fixtures/rules/java/lib/stirling-source.yaml new file mode 100644 index 000000000..3b3d04763 --- /dev/null +++ b/agent-mode/test/fixtures/rules/java/lib/stirling-source.yaml @@ -0,0 +1,10 @@ +rules: + - id: stirling-multipart-file-source + options: + lib: true + severity: NOTE + message: Untrusted multipart file data from Spring controller + languages: [java] + patterns: + - pattern: | + $RETURNTYPE $METHOD(..., @RequestParam MultipartFile $UNTRUSTED, ...) { ... } diff --git a/agent-mode/test/fixtures/rules/java/security/stirling-path-traversal.yaml b/agent-mode/test/fixtures/rules/java/security/stirling-path-traversal.yaml new file mode 100644 index 000000000..da645e1a5 --- /dev/null +++ b/agent-mode/test/fixtures/rules/java/security/stirling-path-traversal.yaml @@ -0,0 +1,18 @@ +rules: + - id: stirling-path-traversal + severity: ERROR + message: >- + User-uploaded file name flows to file system operation without sanitization + metadata: + cwe: CWE-22 + short-description: Path Traversal via uploaded file name + languages: [java] + mode: join + join: + refs: + - rule: java/lib/stirling-source.yaml#stirling-multipart-file-source + as: source + - rule: java/lib/generic/path-traversal-sinks.yaml#java-path-traversal-sinks + as: sink + on: + - 'source.$UNTRUSTED -> sink.$UNTRUSTED' diff --git a/agent-mode/test/fixtures/test-samples/src/main/java/test/PathTraversalTest.java b/agent-mode/test/fixtures/test-samples/src/main/java/test/PathTraversalTest.java new file mode 100644 index 000000000..94c96ff51 --- /dev/null +++ b/agent-mode/test/fixtures/test-samples/src/main/java/test/PathTraversalTest.java @@ -0,0 +1,38 @@ +package test; + +import org.opentaint.sast.test.util.PositiveRuleSample; +import org.opentaint.sast.test.util.NegativeRuleSample; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +@RestController +public class PathTraversalTest { + + @PositiveRuleSample(value = "java/security/stirling-path-traversal.yaml", id = "stirling-path-traversal") + @PostMapping("/upload-vulnerable") + public String vulnerable(@RequestParam MultipartFile file) throws IOException { + // Directly use original filename — path traversal possible + String filename = file.getOriginalFilename(); + Path dest = Paths.get("/uploads/" + filename); + Files.copy(file.getInputStream(), dest); + return "uploaded"; + } + + @NegativeRuleSample(value = "java/security/stirling-path-traversal.yaml", id = "stirling-path-traversal") + @PostMapping("/upload-safe") + public String safe(@RequestParam MultipartFile file) throws IOException { + // Use sanitized filename — only the base name, no path components + String filename = new File(file.getOriginalFilename()).getName(); + Path dest = Paths.get("/uploads/").resolve(filename); + Files.copy(file.getInputStream(), dest); + return "uploaded"; + } +} diff --git a/agent-mode/test/pytest.ini b/agent-mode/test/pytest.ini new file mode 100644 index 000000000..cc24b35fb --- /dev/null +++ b/agent-mode/test/pytest.ini @@ -0,0 +1,5 @@ +[pytest] +testpaths = . +markers = + new_feature: Tests for features not yet implemented (deselect with -m "not new_feature") + slow: Tests that run full analysis (>60s) diff --git a/agent-mode/test/test_approximations.py b/agent-mode/test/test_approximations.py new file mode 100644 index 000000000..6f911c151 --- /dev/null +++ b/agent-mode/test/test_approximations.py @@ -0,0 +1,358 @@ +""" +Suite 3: Approximations Generation/Override + +Approximations (both YAML passThrough and code-based) are ONLY applicable to +external methods — library classes whose source code is NOT part of the analyzed +project. The agent discovers which methods need approximations via the +--external-methods output (withoutRules section). + +Tests: +3.1 Scan with --approximations-config (YAML passThrough for external library methods) +3.2 Scan with --approximations-config + --ruleset together (§1.2) +3.3 Scan with --dataflow-approximations from .java sources (auto-compile, §1.4) +3.4 Approximation compilation failure handling (bad Java source) +3.5 Duplicate approximation targeting built-in class (error) +3.6 Scan with both --approximations-config and --dataflow-approximations +3.7 Verify approximation changes analysis results +3.8 Invalid YAML config error handling +""" + +import pytest +from pathlib import Path +from conftest import ( + OpenTaintCLI, + load_sarif, + sarif_results, + sarif_rule_ids, + sarif_findings_for_rule, + write_text, + write_yaml, + FIXTURES_DIR, + BUILTIN_RULES_DIR, +) + + +class TestYAMLApproximationsConfig: + """3.1-3.2: YAML passThrough config for external library methods.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_scan_with_approximations_config( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Scan with --approximations-config applies custom passThrough rules + for external library methods (PDFBox, Jackson, etc.). + """ + sarif_path = tmp_output / "report.sarif" + config_path = ( + FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" + ) + + if not config_path.exists(): + pytest.skip("Fixture approximation config not created yet") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + approximations_config=str(config_path), + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with --approximations-config failed") + data = load_sarif(sarif_path) + assert len(sarif_results(data)) >= 0 + + @pytest.mark.slow + @pytest.mark.new_feature + def test_approximations_config_with_custom_ruleset( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + --approximations-config and --ruleset can be used together (§1.2). + Previously these were mutually exclusive. + """ + sarif_path = tmp_output / "report.sarif" + config_path = ( + FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" + ) + custom_rules = FIXTURES_DIR / "rules" + + if not config_path.exists() or not custom_rules.exists(): + pytest.skip("Fixture files not created yet") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin", str(custom_rules)], + rule_ids=[ + "java/security/stirling-path-traversal.yaml:stirling-path-traversal" + ], + approximations_config=str(config_path), + severity=["note", "warning", "error"], + timeout=600, + ) + result.assert_ok( + "Scan with both --approximations-config and --ruleset failed. " + "These should work together per design §1.2" + ) + + @pytest.mark.slow + @pytest.mark.new_feature + def test_invalid_approximations_config_errors( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """Invalid YAML config should produce a clear error.""" + sarif_path = tmp_output / "report.sarif" + bad_config = tmp_output / "bad-config.yaml" + write_text(bad_config, "this is not: [valid: yaml: config") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + approximations_config=str(bad_config), + timeout=120, + ) + result.assert_failed("Scan should fail with invalid approximations config") + # Verify the error mentions config/yaml/parse/fail (may be in stdout or stderr) + combined_output = (result.stdout + result.stderr).lower() + assert any( + kw in combined_output + for kw in ["config", "yaml", "parse", "error", "failed", "fail"] + ), ( + f"Error output should mention config/yaml/parse/fail.\n" + f" stdout: {result.stdout[:500]}\n" + f" stderr: {result.stderr[:500]}" + ) + + +class TestCodeBasedApproximations: + """3.3-3.5: Code-based approximations via --dataflow-approximations.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_scan_with_java_source_approximations( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + --dataflow-approximations with .java source files auto-compiles them. + The approximation targets PDFBox's PDDocument (an external library class). + """ + sarif_path = tmp_output / "report.sarif" + approx_dir = FIXTURES_DIR / "approximations" / "java" + + if not approx_dir.exists(): + pytest.skip("Fixture approximation source not created yet") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + dataflow_approximations=str(approx_dir), + severity=["warning", "error"], + timeout=600, + ) + if result.ok: + data = load_sarif(sarif_path) + print( + f"Scan with code-based approximations: {len(sarif_results(data))} findings" + ) + + @pytest.mark.slow + @pytest.mark.new_feature + def test_approximation_compilation_failure( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + --dataflow-approximations with invalid Java source should fail + with compilation errors. The Go CLI auto-compiles .java files + using javac before passing them to the analyzer. + """ + sarif_path = tmp_output / "report.sarif" + bad_approx_dir = tmp_output / "bad-approximations" + bad_approx_dir.mkdir() + write_text( + bad_approx_dir / "BrokenApprox.java", + """\ +package agent.approximations; + +import org.opentaint.ir.approximation.annotation.Approximate; + +@Approximate(com.nonexistent.library.DoesNotExist.class) +public class BrokenApprox { + public void broken() { + com.nonexistent.library.DoesNotExist x = null; + } +} +""", + ) + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + dataflow_approximations=str(bad_approx_dir), + timeout=120, + ) + result.assert_failed("Scan should fail when approximation compilation fails") + combined_output = (result.stdout + result.stderr).lower() + assert ( + "compil" in combined_output + or "javac" in combined_output + or "error" in combined_output + ), ( + f"Error should mention compilation failure.\n" + f" stdout: {result.stdout[:500]}\n" + f" stderr: {result.stderr[:500]}" + ) + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + dataflow_approximations=str(bad_approx_dir), + timeout=120, + ) + result.assert_failed( + "Scan should fail when approximation directory has no .class files" + ) + combined_output = (result.stdout + result.stderr).lower() + assert ( + "compil" in combined_output + or ".class" in combined_output + or ".java" in combined_output + ), ( + f"Error should mention compilation or .class/.java files.\n" + f" stdout: {result.stdout[:500]}\n" + f" stderr: {result.stderr[:500]}" + ) + + @pytest.mark.slow + @pytest.mark.new_feature + def test_duplicate_approximation_errors( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + A custom approximation targeting a class that already has a built-in + approximation should produce an error (bijection violation). + + This test requires a pre-compiled fixture class file. If the fixture + directory doesn't contain .class files, the test is skipped. + """ + dup_approx_fixture = FIXTURES_DIR / "approximations" / "duplicate" + if not dup_approx_fixture.exists() or not list( + dup_approx_fixture.rglob("*.class") + ): + pytest.skip( + "Duplicate approximation fixture not available — " + "requires pre-compiled .class file with @Approximate targeting a builtin class" + ) + + sarif_path = tmp_output / "report.sarif" + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + dataflow_approximations=str(dup_approx_fixture), + timeout=300, + ) + result.assert_failed("Duplicate approximation should produce an error") + + +class TestCombinedApproximations: + """3.6-3.7: Combining YAML config + code-based approximations.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_scan_with_both_approximation_types( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Both --approximations-config and --dataflow-approximations can be + used in the same scan. + """ + sarif_path = tmp_output / "report.sarif" + yaml_config = ( + FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" + ) + java_approx = FIXTURES_DIR / "approximations" / "java" + custom_rules = FIXTURES_DIR / "rules" + + if not yaml_config.exists() or not java_approx.exists(): + pytest.skip("Fixture files not created yet") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin", str(custom_rules)], + rule_ids=[ + "java/security/stirling-path-traversal.yaml:stirling-path-traversal" + ], + approximations_config=str(yaml_config), + dataflow_approximations=str(java_approx), + severity=["note", "warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with combined approximation types failed") + + @pytest.mark.slow + @pytest.mark.new_feature + def test_approximations_change_results( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Running the same scan with and without custom approximations should + produce different results — the approximations add propagation paths. + """ + custom_rules = FIXTURES_DIR / "rules" + yaml_config = ( + FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" + ) + + if not custom_rules.exists() or not yaml_config.exists(): + pytest.skip("Fixture files not created yet") + + # Run 1: without approximations + sarif_no_approx = tmp_output / "no-approx" / "report.sarif" + (tmp_output / "no-approx").mkdir() + r1 = cli.scan( + project_path=str(stirling_project), + output=str(sarif_no_approx), + rulesets=["builtin", str(custom_rules)], + rule_ids=[ + "java/security/stirling-path-traversal.yaml:stirling-path-traversal" + ], + severity=["note", "warning", "error"], + timeout=600, + ) + + # Run 2: with approximations + sarif_with_approx = tmp_output / "with-approx" / "report.sarif" + (tmp_output / "with-approx").mkdir() + r2 = cli.scan( + project_path=str(stirling_project), + output=str(sarif_with_approx), + rulesets=["builtin", str(custom_rules)], + rule_ids=[ + "java/security/stirling-path-traversal.yaml:stirling-path-traversal" + ], + approximations_config=str(yaml_config), + severity=["note", "warning", "error"], + timeout=600, + ) + + r1.assert_ok("Scan without approximations failed") + r2.assert_ok("Scan with approximations failed") + + data1 = load_sarif(sarif_no_approx) + data2 = load_sarif(sarif_with_approx) + count1 = len(sarif_results(data1)) + count2 = len(sarif_results(data2)) + print(f"Without approximations: {count1} findings") + print(f"With approximations: {count2} findings") + assert count1 != count2, ( + f"Approximations had no effect on results — both runs produced {count1} findings. " + "Custom passThrough rules should change dataflow propagation." + ) diff --git a/agent-mode/test/test_build.py b/agent-mode/test/test_build.py new file mode 100644 index 000000000..60219dd8d --- /dev/null +++ b/agent-mode/test/test_build.py @@ -0,0 +1,152 @@ +""" +Suite 1: Project Build Scenarios + +Tests: +1.1 Scan with pre-compiled project model (project.yaml) +1.2 Scan with source project (triggers auto-compile) +1.3 Compile-only (autobuilder) +1.4 Scan with invalid project path (error handling) +1.5 Scan with custom output directory +""" + +import pytest +from pathlib import Path +from conftest import ( + OpenTaintCLI, + load_sarif, + sarif_results, + sarif_rule_ids, + STIRLING_PROJECT_DIR, + BUILTIN_RULES_DIR, +) + + +class TestScanPreCompiledProject: + """1.1: Scan using the pre-compiled Stirling-PDF project model.""" + + @pytest.mark.slow + def test_scan_with_builtin_rules( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """Basic scan with builtin rules produces a valid SARIF with findings.""" + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with builtin rules failed") + + # Validate SARIF output + data = load_sarif(sarif_path) + results = sarif_results(data) + assert len(results) > 0, ( + "Scan produced no findings — expected some on Stirling-PDF" + ) + + # Should contain known vulnerability types + rule_ids = sarif_rule_ids(data) + print(f"Found {len(results)} findings across rules: {rule_ids}") + + @pytest.mark.slow + def test_scan_with_custom_ruleset_directory( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """Scan with a custom ruleset directory works alongside builtin.""" + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=[str(BUILTIN_RULES_DIR)], + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with custom ruleset directory failed") + data = load_sarif(sarif_path) + assert len(sarif_results(data)) > 0 + + @pytest.mark.slow + def test_scan_severity_filter_note( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """Scan with severity=note should include more findings.""" + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + severity=["note", "warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with note severity failed") + + +class TestScanFromSourceProject: + """1.2: Scan from source (auto-compiles via autobuilder first).""" + + @pytest.mark.slow + def test_scan_from_source_directory(self, cli: OpenTaintCLI, tmp_output: Path): + """ + Scan the Stirling-PDF source directory (not pre-compiled). + This triggers auto-compilation via autobuilder. + """ + sarif_path = tmp_output / "report.sarif" + source_dir = STIRLING_PROJECT_DIR / "sources" + + if not source_dir.exists(): + pytest.skip("Stirling-PDF source directory not available") + + result = cli.scan( + project_path=str(source_dir), + output=str(sarif_path), + rulesets=["builtin"], + timeout=900, + ) + if result.ok: + data = load_sarif(sarif_path) + assert len(sarif_results(data)) > 0 + + +class TestCompileOnly: + """1.3: Test the compile command separately.""" + + @pytest.mark.slow + def test_compile_source_project(self, cli: OpenTaintCLI, tmp_output: Path): + """Compile a source project into a project model.""" + source_dir = STIRLING_PROJECT_DIR / "sources" + model_dir = tmp_output / "project-model" + + if not source_dir.exists(): + pytest.skip("Stirling-PDF source directory not available") + + result = cli.compile( + project_path=str(source_dir), + output_dir=str(model_dir), + timeout=300, + ) + if result.ok: + project_yaml = model_dir / "project.yaml" + assert project_yaml.exists(), "compile did not produce project.yaml" + + +class TestErrorHandling: + """1.4: Error handling for invalid inputs.""" + + def test_scan_nonexistent_project(self, cli: OpenTaintCLI, tmp_output: Path): + """Scan with nonexistent project path should fail gracefully.""" + sarif_path = tmp_output / "report.sarif" + result = cli.scan( + project_path="/nonexistent/project/path", + output=str(sarif_path), + ) + result.assert_failed("Scan should fail for nonexistent project") + + def test_scan_missing_output_flag(self, cli: OpenTaintCLI, stirling_project: Path): + """Scan without -o flag should fail (it's required).""" + result = cli.run([cli.cli_path, "scan", str(stirling_project)]) + result.assert_failed("Scan should require -o flag") diff --git a/agent-mode/test/test_external_methods.py b/agent-mode/test/test_external_methods.py new file mode 100644 index 000000000..0d46530e8 --- /dev/null +++ b/agent-mode/test/test_external_methods.py @@ -0,0 +1,260 @@ +""" +Suite 4: External Methods Extraction + +Tests: +4.1 Scan with --external-methods produces two YAML files (without-rules / with-rules) +4.2 External methods files have correct structure (methods list with method, signature, factPositions, callSites) +4.3 External methods contain expected fields +4.4 without-rules list is non-empty for a real project +4.5 with-rules list contains known standard library methods +4.6 Scan with custom approximations reduces without-rules count +4.7 External methods extraction alongside SARIF output +""" + +import pytest +from pathlib import Path +from conftest import ( + OpenTaintCLI, + load_sarif, + sarif_results, + load_external_methods, + count_external_methods, + external_methods_exist, + FIXTURES_DIR, + BUILTIN_RULES_DIR, +) + + +class TestExternalMethodsBasic: + """4.1-4.3: Basic external methods output.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_scan_produces_external_methods_file( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """--track-external-methods produces YAML files alongside SARIF output.""" + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + track_external_methods=True, + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with --track-external-methods failed") + assert external_methods_exist(sarif_path), ( + "External methods files not produced" + ) + + @pytest.mark.slow + @pytest.mark.new_feature + def test_external_methods_structure( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + External methods are split into two files (-without-rules.yaml and -with-rules.yaml). + Each entry has: method, signature, factPositions, callSites. + """ + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + track_external_methods=True, + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok() + + data = load_external_methods(sarif_path) + + for section_name in ["withoutRules", "withRules"]: + section = data.get(section_name, []) + for entry in section[:5]: + assert "method" in entry, f"Entry in {section_name} missing 'method'" + assert "signature" in entry, ( + f"Entry in {section_name} missing 'signature'" + ) + assert "factPositions" in entry, ( + f"Entry in {section_name} missing 'factPositions'" + ) + assert "callSites" in entry, ( + f"Entry in {section_name} missing 'callSites'" + ) + + assert "#" in entry["method"], ( + f"Method should be in Class#method format: {entry['method']}" + ) + + assert isinstance(entry["factPositions"], list), ( + f"factPositions should be a list: {entry['factPositions']}" + ) + + assert isinstance(entry["callSites"], int) and entry["callSites"] > 0, ( + f"callSites should be a positive integer: {entry['callSites']}" + ) + + for pos in entry["factPositions"]: + assert pos == "" or pos == "ret" or pos.startswith("arg("), ( + f"Invalid fact position: {pos} — expected '', 'ret', or 'arg(N)'" + ) + + +class TestExternalMethodsContent: + """4.4-4.5: External methods content validation.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_without_rules_nonempty_for_real_project( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Stirling-PDF uses many libraries without built-in approximations. + The withoutRules list should be non-empty. + """ + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + track_external_methods=True, + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok() + + data = load_external_methods(sarif_path) + without_count, with_count = count_external_methods(data) + print( + f"External methods: {without_count} without rules, {with_count} with rules" + ) + + assert without_count > 0, "Expected non-empty withoutRules for Stirling-PDF" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_with_rules_contains_standard_library_methods( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + The withRules section should contain standard library methods that + have built-in approximations. + """ + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + track_external_methods=True, + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok() + + data = load_external_methods(sarif_path) + with_rules = data.get("withRules", []) + with_rules_methods = {e["method"] for e in with_rules} + + print(f"Methods with rules ({len(with_rules_methods)}):") + for m in sorted(list(with_rules_methods))[:20]: + print(f" - {m}") + + +class TestExternalMethodsWithApproximations: + """4.6: Custom approximations reduce withoutRules count.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_approximations_reduce_without_rules( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Adding custom passThrough rules for methods that were in withoutRules + should move them to withRules. + """ + yaml_config = ( + FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" + ) + + if not yaml_config.exists(): + pytest.skip("Fixture approximation config not created yet") + + # Run 1: without custom approximations + sarif1 = tmp_output / "run1" / "report.sarif" + (tmp_output / "run1").mkdir() + r1 = cli.scan( + project_path=str(stirling_project), + output=str(sarif1), + rulesets=["builtin"], + track_external_methods=True, + severity=["warning", "error"], + timeout=600, + ) + + # Run 2: with custom approximations + sarif2 = tmp_output / "run2" / "report.sarif" + (tmp_output / "run2").mkdir() + r2 = cli.scan( + project_path=str(stirling_project), + output=str(sarif2), + rulesets=["builtin"], + approximations_config=str(yaml_config), + track_external_methods=True, + severity=["warning", "error"], + timeout=600, + ) + + if r1.ok and r2.ok: + data1 = load_external_methods(sarif1) + data2 = load_external_methods(sarif2) + wo1, _ = count_external_methods(data1) + wo2, _ = count_external_methods(data2) + print(f"Without custom approx: {wo1} methods without rules") + print(f"With custom approx: {wo2} methods without rules") + + methods_without_1 = {e["method"] for e in data1.get("withoutRules", [])} + methods_without_2 = {e["method"] for e in data2.get("withoutRules", [])} + newly_covered = methods_without_1 - methods_without_2 + if newly_covered: + print(f"Newly covered methods ({len(newly_covered)}):") + for m in sorted(newly_covered): + print(f" + {m}") + + +class TestExternalMethodsAlongsideSarif: + """4.7: External methods and SARIF are produced together.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_both_outputs_produced( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """A single scan produces both SARIF report and external methods file.""" + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + track_external_methods=True, + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok() + + assert sarif_path.exists(), "SARIF report not produced" + assert external_methods_exist(sarif_path), ( + "External methods files not produced" + ) + + sarif_data = load_sarif(sarif_path) + ext_data = load_external_methods(sarif_path) + assert len(sarif_results(sarif_data)) > 0, "SARIF has no results" + wo, wr = count_external_methods(ext_data) + assert wo + wr > 0, "External methods file is empty" diff --git a/agent-mode/test/test_full_loop.py b/agent-mode/test/test_full_loop.py new file mode 100644 index 000000000..ecadcdbcc --- /dev/null +++ b/agent-mode/test/test_full_loop.py @@ -0,0 +1,290 @@ +""" +Suite 5: Full Agent Loop (Integration) + +Simulates the complete agent workflow on Stirling-PDF: +1. Discover entry points (by reading source) +2. Create a custom path-traversal rule +3. Test the rule with samples +4. Run initial scan on Stirling-PDF +5. Analyze external methods +6. Create YAML approximation for an unmodeled method +7. Re-scan and verify the approximation has effect +""" + +import json +import shutil +import time +import pytest +from pathlib import Path +from conftest import ( + OpenTaintCLI, + load_sarif, + sarif_results, + sarif_rule_ids, + sarif_findings_for_rule, + load_external_methods, + count_external_methods, + external_methods_exist, + write_text, + write_yaml, + print_timing_breakdown, + FIXTURES_DIR, + BUILTIN_RULES_DIR, + STIRLING_PROJECT_DIR, +) + + +@pytest.mark.slow +@pytest.mark.new_feature +class TestFullAgentLoop: + """ + Simulates the agent's analysis workflow on Stirling-PDF. + + Phase 1 → discover entry points (manual) + Phase 2 → create rule + test + Phase 3 → scan + analyze + create approx + rescan + """ + + def _setup_workspace(self, tmp_output: Path) -> dict: + """Create the agent workspace directory layout.""" + workspace = { + "root": tmp_output, + "rules": tmp_output / "agent-rules", + "config": tmp_output / "agent-config", + "approximations": tmp_output / "agent-approximations" / "src", + "results": tmp_output / "results", + "test_project": tmp_output / "agent-test-project", + "test_compiled": tmp_output / "agent-test-compiled", + "test_output": tmp_output / "agent-test-output", + } + for d in workspace.values(): + if isinstance(d, Path): + d.mkdir(parents=True, exist_ok=True) + return workspace + + def test_full_agent_loop( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """Full end-to-end agent loop on Stirling-PDF.""" + ws = self._setup_workspace(tmp_output) + t0 = time.time() + + def _phase_time(label): + elapsed = time.time() - t0 + print(f" [{elapsed:6.1f}s] {label}") + + # ── Phase 1: Source Discovery (simulated) ───────────────────── + controllers = [ + "stirling.software.SPDF.controller.api.misc.PrintFileController", + "stirling.software.SPDF.controller.api.MergeController", + "stirling.software.SPDF.controller.api.SplitPDFController", + "stirling.software.SPDF.controller.api.security.*", + ] + print(f"Phase 1: Discovered {len(controllers)} controller groups") + _phase_time("Phase 1 complete (source discovery)") + + # ── Phase 2: Create Rule ────────────────────────────────────── + + builtin_path_traversal = ( + BUILTIN_RULES_DIR / "java" / "security" / "path-traversal.yaml" + ) + assert builtin_path_traversal.exists(), "Builtin path-traversal rule not found" + print("Phase 2a: Read builtin path-traversal rule") + + # Create custom source library rule + lib_dir = ws["rules"] / "java" / "lib" + lib_dir.mkdir(parents=True, exist_ok=True) + write_text( + lib_dir / "stirling-source.yaml", + """\ +rules: + - id: stirling-multipart-file-source + options: + lib: true + severity: NOTE + message: Untrusted multipart file data from Spring controller + languages: [java] + patterns: + - pattern: | + $RETURNTYPE $METHOD(..., @RequestParam MultipartFile $UNTRUSTED, ...) { ... } +""", + ) + + # Create join-mode security rule + sec_dir = ws["rules"] / "java" / "security" + sec_dir.mkdir(parents=True, exist_ok=True) + write_text( + sec_dir / "stirling-path-traversal.yaml", + """\ +rules: + - id: stirling-path-traversal + severity: ERROR + message: >- + User-uploaded file name flows to file system operation without sanitization + metadata: + cwe: CWE-22 + short-description: Path Traversal via uploaded file name + languages: [java] + mode: join + join: + refs: + - rule: java/lib/stirling-source.yaml#stirling-multipart-file-source + as: source + - rule: java/lib/generic/path-traversal-sinks.yaml#java-path-traversal-sinks + as: sink + on: + - 'source.$UNTRUSTED -> sink.$UNTRUSTED' +""", + ) + print("Phase 2b-c: Created custom rules") + _phase_time("Phase 2 complete (rule creation)") + + # ── Phase 3: Initial Scan ───────────────────────────────────── + + # Per-run subdirectory so the fixed external-methods filenames do not + # collide between the initial scan and the rescan further down. + run1_dir = ws["results"] / "run-1" + run1_dir.mkdir(parents=True, exist_ok=True) + sarif_path = run1_dir / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin", str(ws["rules"])], + rule_ids=[ + "java/security/stirling-path-traversal.yaml:stirling-path-traversal" + ], + track_external_methods=True, + severity=["note", "warning", "error"], + timeout=600, + ) + result.assert_ok("Initial scan failed") + print_timing_breakdown("initial-scan", result) + _phase_time("Phase 3 complete (initial scan)") + + sarif_data = load_sarif(sarif_path) + findings = sarif_findings_for_rule(sarif_data, "stirling-path-traversal") + print(f"Phase 3: Initial scan found {len(findings)} path-traversal findings") + assert len(findings) > 0, ( + "Expected path-traversal findings from initial scan but got 0. " + "Check that the join rule's sink ref matches the builtin sink rule ID." + ) + + for f in findings[:5]: + locs = f.get("locations", [{}]) + if locs: + uri = ( + locs[0] + .get("physicalLocation", {}) + .get("artifactLocation", {}) + .get("uri", "?") + ) + line = ( + locs[0] + .get("physicalLocation", {}) + .get("region", {}) + .get("startLine", "?") + ) + print(f" Finding: {uri}:{line}") + + # ── Phase 3b: Analyze External Methods ──────────────────────── + + priority_methods = [] + wo_count, wr_count = 0, 0 + if external_methods_exist(sarif_path): + ext_data = load_external_methods(sarif_path) + wo_count, wr_count = count_external_methods(ext_data) + print( + f"Phase 3b: External methods — {wo_count} without rules, {wr_count} with rules" + ) + + without_rules = ext_data.get("withoutRules", []) + priority_methods = [m for m in without_rules if m.get("callSites", 0) > 5] + priority_methods.sort(key=lambda m: m.get("callSites", 0), reverse=True) + print( + f" Priority unmodeled methods (>5 call sites): {len(priority_methods)}" + ) + for m in priority_methods[:10]: + print( + f" {m['method']} ({m['callSites']} call sites, positions: {m['factPositions']})" + ) + _phase_time("Phase 3b complete (external methods analysis)") + + # ── Phase 4: Create Approximation and Rescan ────────────────── + # Approximations are ONLY for external methods (from withoutRules). + # These are library methods without source code in the project. + + if priority_methods: + pass_through_rules = [] + for m in priority_methods[:5]: + method_name = m["method"] + positions = m["factPositions"] + + copies = [] + for pos in positions: + if pos.startswith("arg("): + copies.append({"from": pos, "to": "result"}) + elif pos == "": + copies.append({"from": "", "to": "result"}) + + if copies: + pass_through_rules.append( + { + "function": method_name, + "copy": copies, + } + ) + + if pass_through_rules: + config_file = ws["config"] / "custom-propagators.yaml" + write_yaml(config_file, {"passThrough": pass_through_rules}) + print( + f"Phase 4: Created {len(pass_through_rules)} custom passThrough rules" + ) + + # Rescan with approximations — own subdir keeps the fixed + # external-methods filenames from overwriting run-1 outputs. + run2_dir = ws["results"] / "run-2" + run2_dir.mkdir(parents=True, exist_ok=True) + sarif_path_2 = run2_dir / "report.sarif" + + result2 = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path_2), + rulesets=["builtin", str(ws["rules"])], + rule_ids=[ + "java/security/stirling-path-traversal.yaml:stirling-path-traversal" + ], + approximations_config=str(config_file), + track_external_methods=True, + severity=["note", "warning", "error"], + timeout=600, + ) + + print_timing_breakdown("rescan-with-approx", result2) + + if result2.ok: + sarif_data_2 = load_sarif(sarif_path_2) + findings_2 = sarif_findings_for_rule( + sarif_data_2, "stirling-path-traversal" + ) + print( + f"Phase 4: Rescan found {len(findings_2)} findings (was {len(findings)})" + ) + + if external_methods_exist(sarif_path_2): + ext_data_2 = load_external_methods(sarif_path_2) + wo2, wr2 = count_external_methods(ext_data_2) + print( + f" External methods after approx: {wo2} without (was {wo_count}), {wr2} with (was {wr_count})" + ) + + delta_findings = len(findings_2) - len(findings) + delta_methods = wo_count - wo2 + print( + f" Delta: {delta_findings:+d} findings, {delta_methods:+d} newly modeled methods" + ) + + _phase_time("Phase 4 complete (approximation + rescan)") + total = time.time() - t0 + print(f"\n=== Full agent loop completed in {total:.1f}s ===") diff --git a/agent-mode/test/test_rules.py b/agent-mode/test/test_rules.py new file mode 100644 index 000000000..9dbcab5bc --- /dev/null +++ b/agent-mode/test/test_rules.py @@ -0,0 +1,409 @@ +""" +Suite 2: Rule Generation Pipeline + +Tests: +2.1 Read builtin rules via `opentaint rules-path` (or known path) +2.2 Create custom library + security rules, verify YAML validity +2.3 Run scan with custom ruleset + --rule-id filter +2.4 Run scan with custom ruleset without --rule-id filter (all rules active) +2.5 Bootstrap test project, build, and run rule tests +2.6 Rule test: false negative detected (positive sample with wrong pattern) +2.7 Rule test: false positive detected (negative sample with too-broad pattern) +2.8 Run scan on Stirling-PDF with custom path-traversal rule +""" + +import json +import shutil +import pytest +from pathlib import Path +from conftest import ( + OpenTaintCLI, + load_sarif, + sarif_results, + sarif_rule_ids, + sarif_findings_for_rule, + write_text, + BUILTIN_RULES_DIR, + FIXTURES_DIR, +) + + +class TestReadBuiltinRules: + """2.1: Agent can discover and read builtin rules.""" + + def test_builtin_rules_directory_exists(self, builtin_rules: Path): + """Builtin rules directory exists and contains rule files.""" + security_dir = builtin_rules / "java" / "security" + assert security_dir.exists(), f"No security rules at {security_dir}" + rule_files = list(security_dir.glob("*.yaml")) + assert len(rule_files) > 10, ( + f"Expected >10 security rules, found {len(rule_files)}" + ) + + def test_builtin_lib_rules_exist(self, builtin_rules: Path): + """Library rules (sources/sinks) exist.""" + lib_generic = builtin_rules / "java" / "lib" / "generic" + assert lib_generic.exists() + assert (lib_generic / "servlet-untrusted-data-source.yaml").exists() + assert (lib_generic / "path-traversal-sinks.yaml").exists() + + @pytest.mark.new_feature + def test_rules_path_command(self, cli: OpenTaintCLI): + """opentaint rules-path prints the rules directory.""" + result = cli.rules_path() + result.assert_ok("rules-path command failed") + rules_dir = Path(result.stdout.strip()) + assert rules_dir.exists(), f"rules-path returned non-existent dir: {rules_dir}" + assert (rules_dir / "java" / "security").is_dir() + + +class TestCustomRuleCreation: + """2.2: Create and validate custom rules.""" + + def test_custom_rules_are_valid_yaml(self): + """Fixture rule files are syntactically valid YAML with expected structure.""" + import yaml + + rules_dir = FIXTURES_DIR / "rules" + for rule_file in rules_dir.rglob("*.yaml"): + with open(rule_file) as f: + data = yaml.safe_load(f) + assert "rules" in data, f"Rule file {rule_file} missing 'rules' key" + for rule in data["rules"]: + assert "id" in rule, f"Rule in {rule_file} missing 'id'" + assert "severity" in rule, f"Rule {rule['id']} missing 'severity'" + assert "languages" in rule, f"Rule {rule['id']} missing 'languages'" + + def test_library_rule_has_lib_option(self): + """Library rules must have options.lib: true.""" + import yaml + + lib_rule = FIXTURES_DIR / "rules" / "java" / "lib" / "stirling-source.yaml" + if not lib_rule.exists(): + pytest.skip("Library rule fixture not created yet") + with open(lib_rule) as f: + data = yaml.safe_load(f) + for rule in data["rules"]: + assert rule.get("options", {}).get("lib") is True, ( + f"Library rule {rule['id']} missing options.lib: true" + ) + + def test_security_rule_has_metadata(self): + """Security rules must have metadata.cwe and metadata.short-description.""" + import yaml + + sec_rule = ( + FIXTURES_DIR + / "rules" + / "java" + / "security" + / "stirling-path-traversal.yaml" + ) + if not sec_rule.exists(): + pytest.skip("Security rule fixture not created yet") + with open(sec_rule) as f: + data = yaml.safe_load(f) + for rule in data["rules"]: + if rule.get("options", {}).get("lib"): + continue + meta = rule.get("metadata", {}) + assert "cwe" in meta, f"Security rule {rule['id']} missing metadata.cwe" + assert "short-description" in meta, ( + f"Security rule {rule['id']} missing metadata.short-description" + ) + + +class TestScanWithRuleIdFilter: + """2.3-2.4: Scan with --rule-id filter.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_scan_with_rule_id_filter( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Scan with --rule-id should only produce findings for the specified rule. + Library rules referenced via refs should be auto-included. + """ + sarif_path = tmp_output / "report.sarif" + custom_rules = FIXTURES_DIR / "rules" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin", str(custom_rules)], + rule_ids=[ + "java/security/stirling-path-traversal.yaml:stirling-path-traversal" + ], + severity=["note", "warning", "error"], + timeout=600, + ) + result.assert_ok("Scan with --rule-id filter failed") + + data = load_sarif(sarif_path) + rule_ids = sarif_rule_ids(data) + for rid in rule_ids: + # With --semgrep-compatibility-sarif (default), rule IDs use dot-separated paths + assert rid in ( + "stirling-path-traversal", + "java.security.stirling-path-traversal", + ), f"Unexpected rule '{rid}' in output — --rule-id filter not working" + + @pytest.mark.slow + def test_scan_without_rule_id_filter_includes_all( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Scan without --rule-id should include findings from all active rules. + """ + sarif_path = tmp_output / "report.sarif" + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin"], + severity=["warning", "error"], + timeout=600, + ) + result.assert_ok("Scan without rule-id filter failed") + + data = load_sarif(sarif_path) + rule_ids = sarif_rule_ids(data) + assert len(rule_ids) > 1, f"Expected multiple rule IDs, got: {rule_ids}" + + +class TestRuleTests: + """2.5-2.7: Rule test workflow.""" + + @pytest.mark.slow + @pytest.mark.new_feature + def test_init_test_project(self, cli: OpenTaintCLI, tmp_output: Path): + """opentaint init-test-project bootstraps a valid Gradle test project.""" + test_project_dir = tmp_output / "test-project" + + result = cli.init_test_project( + output_dir=str(test_project_dir), + dependencies=[ + "org.springframework:spring-web:6.2.12", + "jakarta.servlet:jakarta.servlet-api:6.0.0", + ], + ) + if not result.ok: + pytest.skip("init-test-project not available (new feature)") + + assert (test_project_dir / "build.gradle.kts").exists() + assert (test_project_dir / "settings.gradle.kts").exists() + assert (test_project_dir / "libs" / "opentaint-sast-test-util.jar").exists() + assert (test_project_dir / "src" / "main" / "java" / "test").is_dir() + + @pytest.mark.slow + @pytest.mark.new_feature + def test_rule_test_all_pass(self, cli: OpenTaintCLI, tmp_output: Path): + """ + Create a test project with correct positive/negative samples. + Rule tests should all pass. + """ + test_project_dir = tmp_output / "test-project" + compiled_dir = tmp_output / "test-compiled" + test_output = tmp_output / "test-output" + rules_dir = FIXTURES_DIR / "rules" + + result = cli.init_test_project( + output_dir=str(test_project_dir), + dependencies=[ + "org.springframework:spring-web:6.2.12", + "jakarta.servlet:jakarta.servlet-api:6.0.0", + ], + ) + if not result.ok: + pytest.skip("init-test-project not available") + + # Copy test samples + samples_src = FIXTURES_DIR / "test-samples" / "src" + samples_dst = test_project_dir / "src" + if samples_src.exists(): + shutil.copytree(samples_src, samples_dst, dirs_exist_ok=True) + + compile_result = cli.compile(str(test_project_dir), str(compiled_dir)) + if not compile_result.ok: + combined = (compile_result.stdout + compile_result.stderr).lower() + # Autobuilder JAR may not be built locally — skip gracefully + if ( + "autobuilder" in combined + or "compile" in combined + or "compilation" in combined + ): + pytest.skip( + "Compilation failed (autobuilder JAR may not be available). " + "Build it with: cd core && ./gradlew :autobuilder:jar" + ) + compile_result.assert_ok("Failed to compile test project") + + test_result = cli.test_rules( + project_path=str(compiled_dir / "project.yaml"), + rulesets=[str(rules_dir)], + output_dir=str(test_output), + ) + test_result.assert_ok("Rule tests failed") + + result_json = test_output / "test-result.json" + assert result_json.exists(), "test-result.json not produced" + with open(result_json) as f: + results = json.load(f) + + assert len(results.get("falsePositive", [])) == 0, ( + f"Unexpected false positives: {results['falsePositive']}" + ) + assert len(results.get("falseNegative", [])) == 0, ( + f"Unexpected false negatives: {results['falseNegative']}" + ) + assert len(results.get("success", [])) > 0, ( + "No successful tests — something is wrong" + ) + + @pytest.mark.slow + @pytest.mark.new_feature + def test_rule_test_detects_false_negative( + self, cli: OpenTaintCLI, tmp_output: Path + ): + """ + A @PositiveRuleSample that doesn't match the rule → false negative. + This tests that the test framework correctly detects missing findings. + """ + test_project_dir = tmp_output / "test-project-fn" + compiled_dir = tmp_output / "test-compiled-fn" + test_output = tmp_output / "test-output-fn" + + rules_dir = tmp_output / "broken-rules" / "java" / "security" + rules_dir.mkdir(parents=True) + write_text( + rules_dir / "broken-rule.yaml", + """\ +rules: + - id: broken-path-traversal + severity: ERROR + message: This rule intentionally won't match + metadata: + cwe: CWE-22 + short-description: Broken rule for testing FN detection + languages: [java] + patterns: + - pattern: ThisClassDoesNotExist.neverCalled($X) +""", + ) + + result = cli.init_test_project( + output_dir=str(test_project_dir), + dependencies=["jakarta.servlet:jakarta.servlet-api:6.0.0"], + ) + if not result.ok: + pytest.skip("init-test-project not available") + + test_file = ( + test_project_dir + / "src" + / "main" + / "java" + / "test" + / "FalseNegativeTest.java" + ) + write_text( + test_file, + """\ +package test; + +import org.opentaint.sast.test.util.PositiveRuleSample; + +public class FalseNegativeTest { + + @PositiveRuleSample(value = "java/security/broken-rule.yaml", id = "broken-path-traversal") + public void shouldTriggerButWont() { + String x = System.getenv("USER_INPUT"); + System.out.println(x); + } +} +""", + ) + + compile_result = cli.compile(str(test_project_dir), str(compiled_dir)) + if not compile_result.ok: + if "autobuilder" in (compile_result.stdout + compile_result.stderr).lower(): + pytest.skip( + "Autobuilder JAR not available. " + "Build it with: cd core && ./gradlew :autobuilder:jar" + ) + pytest.skip("Cannot compile test project") + + test_result = cli.test_rules( + project_path=str(compiled_dir / "project.yaml"), + rulesets=[str(tmp_output / "broken-rules")], + output_dir=str(test_output), + ) + + result_json = test_output / "test-result.json" + assert result_json.exists(), ( + "test-result.json not produced — test-rules command may have failed.\n" + f" stdout: {test_result.stdout[:1000]}\n" + f" stderr: {test_result.stderr[:1000]}" + ) + test_result.assert_ok("test-rules command failed") + with open(result_json) as f: + results = json.load(f) + assert len(results.get("falseNegative", [])) > 0, ( + "Expected false negative not detected" + ) + + +class TestScanStirlingWithCustomRule: + """2.8: Run custom path-traversal rule on Stirling-PDF.""" + + @pytest.mark.slow + def test_scan_stirling_with_path_traversal_rule( + self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path + ): + """ + Scan Stirling-PDF with our custom path-traversal rule. + Stirling-PDF handles file uploads in several controllers — + we expect the rule to find some findings. + """ + sarif_path = tmp_output / "report.sarif" + custom_rules = FIXTURES_DIR / "rules" + + if not custom_rules.exists(): + pytest.skip("Fixture rules not created yet") + + result = cli.scan( + project_path=str(stirling_project), + output=str(sarif_path), + rulesets=["builtin", str(custom_rules)], + rule_ids=[ + "java/security/stirling-path-traversal.yaml:stirling-path-traversal" + ], + severity=["note", "warning", "error"], + timeout=600, + ) + + result.assert_ok("Scan with custom path-traversal rule failed") + data = load_sarif(sarif_path) + findings = sarif_findings_for_rule(data, "stirling-path-traversal") + print(f"Found {len(findings)} path-traversal findings in Stirling-PDF") + assert len(findings) > 0, ( + "Expected path-traversal findings in Stirling-PDF but got 0. " + "Check that the join rule's sink ref matches the builtin sink rule ID." + ) + for f in findings[:5]: + locs = f.get("locations", [{}]) + if locs: + uri = ( + locs[0] + .get("physicalLocation", {}) + .get("artifactLocation", {}) + .get("uri", "?") + ) + line = ( + locs[0] + .get("physicalLocation", {}) + .get("region", {}) + .get("startLine", "?") + ) + print(f" - {uri}:{line}") diff --git a/agent/meta-prompt.md b/agent/meta-prompt.md new file mode 100644 index 000000000..5627650f1 --- /dev/null +++ b/agent/meta-prompt.md @@ -0,0 +1,119 @@ +# OpenTaint Agent -- Meta Prompt + +You are an AI security analyst using OpenTaint, a dataflow-based SAST analyzer for JVM projects. Your goal is to find real vulnerabilities by iteratively creating rules, running analysis, and refining results. + +## Setup + +1. Run `opentaint agent skills` to get the skills directory path +2. Run `opentaint agent prompt` to get this file's path +3. Run `opentaint agent rules-path` to get the built-in rules directory +4. Read individual skill files as needed during each phase + +## Workflow + +Execute these four phases in order. Iterate phases 2-4 until the external methods list stabilizes and all findings are classified. + +### Phase 1: Project Setup + +1. **Build the project** (read `build-project.md`) + - Produce `./opentaint-project/project.yaml` +2. **Discover entry points** (read `discover-entry-points.md`) + - Identify attack surface, data sources, vulnerability classes + - Write `opentaint-analysis-plan.md` + +### Phase 2: Rule Creation + +1. **Check built-in rules** -- read rules in `$(opentaint agent rules-path)` +2. **Create rules** for uncovered vulnerability classes (read `create-rule.md`) + - Library rules in `agent-rules/java/lib/` + - Security rules in `agent-rules/java/security/` +3. **Test rules** (read `test-rule.md`) + - Create annotated test samples with `@PositiveRuleSample` / `@NegativeRuleSample` + - Fix until all tests pass + +### Phase 3: Analysis + +1. **Run analysis** (read `run-analysis.md`). Always pass a pre-compiled model via + `--project-model`, and use full rule IDs of the form `.yaml:`: + ```bash + opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/.yaml: \ + --track-external-methods + ``` +2. Collect `results/report.sarif`, and next to it the fixed-name files + `results/external-methods-without-rules.yaml` (taint-killing methods) and + `results/external-methods-with-rules.yaml` (already modeled). The + `--track-external-methods` flag is a boolean; the filenames and location are + fixed by the analyzer. + +### Phase 4: Results Interpretation and Iteration + +1. **Analyze findings** (read `analyze-findings.md`) + - Classify each SARIF finding as TP, FP (rule fix), or FP (approximation fix) + - Read `external-methods-without-rules.yaml` for FN discovery (these are the methods that kill taint) + +2. **For true positives**: Generate PoC (read `generate-poc.md`), document in `vulnerabilities.md` + +3. **For false positives**: Fix rules with `pattern-not`/`pattern-sanitizers`, update tests, re-run + +4. **For false negatives** (from external methods): + - Simple propagation -> YAML config (read `create-yaml-config.md`) + - Lambda/callback methods -> Code approximation (read `create-approximation.md`) + +5. **Re-run analysis** with updated rules/config/approximations + +6. **Stop when**: + - External methods list stabilizes + - All findings classified + - High-priority vulnerabilities have PoCs + +## Working Directory Layout + +``` +/ + opentaint-analysis-plan.md + vulnerabilities.md + opentaint-project/ # Built project model + agent-rules/ # Custom rules + java/lib/ + java/security/ + agent-config/ # YAML passThrough config + custom-propagators.yaml + agent-approximations/ + src/ # Java sources (auto-compiled by the CLI) + agent-test-project/ # Rule test project + results/ + report.sarif + external-methods-without-rules.yaml # written next to report.sarif + external-methods-with-rules.yaml +``` + +## Decision Guide + +| Situation | Action | Skill | +|-----------|--------|-------| +| Need new vulnerability detection | Create join-mode rule | create-rule | +| FP: over-broad pattern | Add pattern-not/sanitizers | create-rule | +| FN: library method kills taint | Add YAML passThrough | create-yaml-config | +| FN: lambda/callback method | Code-based approximation | create-approximation | +| Confirmed vulnerability | Generate PoC | generate-poc | + +## Note: Suspected Engine Issues + +If a rule that should fire keeps missing (or firing spuriously) even though the rule +tests pass and `external-methods-without-rules.yaml` has no methods on the relevant +path, read `opentaint-issue-investigation.md`. It walks through building a minimal +rule-test reproducer, ruling out library-model gaps, pinpointing the instruction where +IFDS drops the fact via `--debug-fact-reachability-sarif`, and writing a short report. + +## Key Constraints + +- Approximations (YAML and code-based) apply ONLY to external methods -- library classes without source code +- `--approximations-config` uses OVERRIDE mode, not extend; it is repeatable -- every occurrence is OVERRIDE-merged +- `--rule-id` takes the FULL rule ID: `.yaml:` (e.g. `java/security/my-vuln.yaml:my-vulnerability`) +- `--rule-id` drops every rule whose ID is not in the filter, including library rules referenced via `refs`. List every rule you need explicitly. +- `--track-external-methods` is a boolean; files are always written as `/external-methods-{without,with}-rules.yaml` +- Duplicate approximation targeting the same class as a built-in = error +- Each rule must have test coverage before running on the real project diff --git a/agent/skills/analyze-findings.md b/agent/skills/analyze-findings.md new file mode 100644 index 000000000..6e7e40fcd --- /dev/null +++ b/agent/skills/analyze-findings.md @@ -0,0 +1,95 @@ +# Skill: Analyze Findings + +Interpret SARIF findings and the external methods list to classify results and plan next actions. + +## Prerequisites + +- Analysis run complete (run-analysis skill) +- SARIF report and external methods YAML available + +## Procedure + +### 1. Read SARIF findings + +For each finding in `runs[0].results[]`: +- `ruleId`: Which rule triggered +- `locations[]`: Sink location (file, line) +- `codeFlows[]`: Taint trace from source to sink + +Read the trace: +- First location = **source** (where tainted data enters) +- Last location = **sink** (where tainted data is used dangerously) +- Intermediate locations = dataflow path + +### 2. Classify each finding + +**TRUE POSITIVE (TP)**: Real vulnerability. +- Source genuinely provides attacker-controlled data +- Sink genuinely performs a dangerous operation +- No sanitization between source and sink +- **Action**: Generate PoC (generate-poc skill), document in `vulnerabilities.md` + +**FALSE POSITIVE -- fixable via Rule**: Over-broad pattern matching. +- Sink pattern too broad, sanitizer not recognized, source matches non-attacker data +- **Action**: Add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or narrow `metavariable-regex`. Update tests. Re-run. + +**FALSE POSITIVE -- fixable via Approximation** (non-preferred): Imprecise taint propagation through a library method. +- Library method modeled as propagating taint when it actually neutralizes the threat +- **Action**: Override passThrough approximation. Re-run. + +### 3. Process external methods (FN discovery) + +The `--track-external-methods` flag produces two files next to the SARIF report: +- **`/external-methods-without-rules.yaml`** — Methods where the analyzer **killed dataflow facts** (no approximation model). **This is the only list worth approximating.** Every false negative caused by a missing library model is rooted here. +- **`/external-methods-with-rules.yaml`** — Methods that already have an approximation model. Do NOT target these with custom approximations or YAML `passThrough` rules — you would OVERRIDE an existing model, which is usually a regression. + +Filenames and directory are fixed; the flag is a boolean. + +**Approximation scope — hard rules**: +- Only methods listed in `external-methods-without-rules.yaml` are candidates for a new YAML `passThrough` rule or a code-based approximation. +- Methods not listed in either file were never reached on a tainted path during the scan; approximating them is a no-op until that changes (different sources/rules/entry points). +- Application-internal methods are never in these lists — approximations don't apply to them. Fix those via rule patterns, not approximations. + +Read `external-methods-without-rules.yaml`. **Prioritize generic data-flow propagators** over +vulnerability-specific methods. The most common cause of killed facts is mundane collection/utility +methods, not the vulnerability-relevant operations themselves. + +**HIGH PRIORITY — Generic propagators** (affect ALL vulnerability types): +- Collection operations: `List.add`/`List.get`, `Map.put`/`Map.get`, `Set.add`/`Set.iterator` +- String operations: `StringBuilder.append`/`toString`, `StringBuffer.append` +- Wrapper/DTO getters/setters: `Container.getValue`, `Pair.getFirst` +- Stream/iterator methods: `Iterator.next`, `Stream.collect` +- **Action**: Create `passThrough` YAML rules (create-yaml-config skill) + +**MEDIUM PRIORITY — Lambda/callback methods**: +- Example: `ReactiveStream#map(Function)` — taint flows through the function +- Example: `CompletableFuture#thenApply(Function)` — async propagation +- **Action**: Create code-based approximation (create-approximation skill) + +**LOW PRIORITY — Vulnerability-specific methods**: +- These are usually already modeled in built-in rules. Only add if missing. +- **Action**: Check `external-methods-with-rules.yaml` first; if present, skip. + +**NEUTRAL**: Irrelevant to taint flow (logging, metrics, sanitizers). +- **Action**: Skip — default call-to-return passthrough is correct + +### 4. Batch processing + +- Filter `external-methods-without-rules.yaml` to methods on a plausible source→sink path for the current vulnerability class; approximating methods that sit outside that path wastes iteration time. +- Group the filtered methods by package/library +- **Start with generic propagators** (collections, strings, wrappers) — they affect all rules +- Check built-in coverage first (many common libraries already have approximations — cross-check against `external-methods-with-rules.yaml`) +- Generate comprehensive rules per library +- Re-run with `--track-external-methods` after each batch; verify the approximated methods actually moved from `without-rules` to `with-rules`, and check for finding regressions + +## Decision Priorities + +- **FN fixes**: (1) YAML passThrough rule, (2) Code-based approximation (lambdas only), (3) Rule pattern fix +- **FP fixes**: (1) Rule fix via `pattern-not`/`pattern-sanitizers` (preferred), (2) PassThrough override (non-preferred) + +## Stop Condition + +Stop iterating when: +- External methods list stabilizes (no new methods appear) +- All SARIF findings are classified as TP or resolved FP +- High-priority vulnerabilities have PoCs diff --git a/agent/skills/build-project.md b/agent/skills/build-project.md new file mode 100644 index 000000000..7eb1ad248 --- /dev/null +++ b/agent/skills/build-project.md @@ -0,0 +1,77 @@ +# Skill: Build Project + +Build a target project and produce a `project.yaml` model for analysis. + +## Prerequisites + +- `opentaint` CLI available +- Java 21+ installed +- For Gradle/Maven: build tool installed, project builds independently + +## Procedure + +### 1. Determine project type + +Examine directory contents: +- `build.gradle` or `build.gradle.kts` -> Gradle +- `pom.xml` -> Maven +- Pre-compiled JARs/WARs -> classpath mode +- Existing `project.yaml` in a subdirectory -> already compiled + +### 2a. Gradle/Maven projects (autobuilder) + +```bash +opentaint compile /path/to/project -o ./opentaint-project +``` + +### 2b. If `opentaint compile` fails — manual build + `opentaint project` + +If the autobuilder cannot build the project, build it manually first, then create the project model: + +1. **Build the project manually**: +```bash +# Gradle +./gradlew build -x test + +# Maven +mvn package -DskipTests +``` + +2. **Create the project model with `opentaint project`**: + +> **CRITICAL**: Always specify `--package` to restrict analysis to project code only. +> Without `--package`, the analyzer will attempt to analyze ALL classes including third-party +> libraries, and will hang or run for hours. + +```bash +opentaint project \ + --output ./opentaint-project \ + --source-root /path/to/src \ + --classpath /path/to/app.jar \ + --package com.example.app +``` + +For multi-module projects, use multiple `--classpath` and `--package` flags: + +```bash +opentaint project \ + --output ./opentaint-project \ + --source-root /path/to/project \ + --classpath /path/to/module1/build/libs/module1.jar \ + --classpath /path/to/module2/build/libs/module2.jar \ + --package com.example.module1 \ + --package com.example.module2 +``` + +### 3. Verify + +Check that `./opentaint-project/project.yaml` exists and is non-empty. + +## Troubleshooting + +- **Build tool not found**: Install Gradle/Maven or use a wrapper (`./gradlew`, `./mvnw`) +- **Java version mismatch**: Set `JAVA_HOME` to the version required by the project +- **Compilation errors**: Check the autobuilder log, fix build issues, retry +- **Missing dependencies**: Ensure all submodules are initialized (`git submodule update --init`) +- **Autobuilder fails**: Build the project manually (see 2b above), then use `opentaint project` with the compiled artifacts +- **Analysis hangs**: You likely forgot `--package` — the analyzer is processing third-party libraries. Re-run `opentaint project` with `--package` to restrict to project code diff --git a/agent/skills/create-approximation.md b/agent/skills/create-approximation.md new file mode 100644 index 000000000..8fc4f265c --- /dev/null +++ b/agent/skills/create-approximation.md @@ -0,0 +1,120 @@ +# Skill: Create Approximation + +Create code-based approximations for complex library methods involving lambdas, async, or callbacks. + +## When approximations are actually useful + +Approximations (both code-based and YAML) only change the analysis of **external methods +with no existing model**. Concretely, this means the method the approximation targets must +appear in `/external-methods-without-rules.yaml` produced by the previous scan +(see `analyze-findings` skill). An entry there means the analyzer walked through that method +and **killed the dataflow facts** because it had no rule — that's the exact gap you can fill. + +If the method is in `external-methods-with-rules.yaml`, it is already modeled. Writing +another approximation for it is a no-op at best and conflicts with a built-in rule at worst +(duplicate-target error). Skip it. + +If the method is in neither list, the analyzer never reached it on a tainted path during +the scan. Adding an approximation will not change the result until the analyzer actually +observes a tainted argument flowing in. + +**Rule of thumb**: approximate only methods that are in the `without-rules` list **and** lie +on a code path relevant to your vulnerability (reachable between a source and a sink). + +## Prerequisites + +- A baseline scan has been run with `--track-external-methods` (see `run-analysis` skill) +- `external-methods-without-rules.yaml` has been read and the target method is in it (see `analyze-findings` skill) +- The method involves lambdas/callbacks/functional interfaces (YAML cannot model these — otherwise prefer `create-yaml-config`) +- The target class must NOT already have a built-in approximation (would be listed under `external-methods-with-rules.yaml` if so) + +## Procedure + +### 1. Create approximation source + +Create Java files in `agent-approximations/src/`: + +```java +package agent.approximations; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Function; + +@Approximate(com.example.lib.ReactiveProcessor.class) +public class ReactiveProcessor { + + // Model: taint on this flows through the function to the result + public Object transform(@ArgumentTypeContext Function fn) throws Throwable { + com.example.lib.ReactiveProcessor self = + (com.example.lib.ReactiveProcessor) (Object) this; + if (OpentaintNdUtil.nextBool()) return null; + Object input = self.getValue(); + return fn.apply(input); + } + + // Model: taint on this flows to the consumer argument + public void subscribe(@ArgumentTypeContext java.util.function.Consumer consumer) { + com.example.lib.ReactiveProcessor self = + (com.example.lib.ReactiveProcessor) (Object) this; + if (OpentaintNdUtil.nextBool()) { + consumer.accept(self.getValue()); + } + } +} +``` + +### 2. Run with approximations + +Point `--dataflow-approximations` at the source directory. The CLI auto-compiles `.java` +files using the analyzer JAR (for `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) +and the target project's dependencies, then forwards the compiled directory to the analyzer. +Manual `javac` invocation is not required. + +```bash +opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --dataflow-approximations ./agent-approximations/src +``` + +If `.java` compilation fails, the CLI reports the errors and aborts before the scan starts. +If the directory contains already-compiled `.class` files (no `.java` siblings), the CLI +passes it through unchanged. + +## Key Patterns + +| Pattern | Usage | +|---------|-------| +| `@Approximate(TargetClass.class)` | Link approximation to target class | +| `@ApproximateByName("fqn")` | Link by fully qualified name (when class not on compile classpath) | +| `(TargetClass) (Object) this` | Cast to access real object's methods | +| `@ArgumentTypeContext` | On lambda/functional interface parameters | +| `OpentaintNdUtil.nextBool()` | Non-deterministic branching (analyzer considers both paths) | + +## Constraints + +- Java 8 source compatibility +- One approximation class per target class (strict bijection) +- Must NOT target a class that already has a built-in approximation (will error at runtime). Verify by checking `external-methods-with-rules.yaml` — if the class appears there, it is already covered. +- Method signatures must match the target class methods exactly + +## Validating the approximation had an effect + +After re-running the scan with `--dataflow-approximations`, diff the before/after +`external-methods-without-rules.yaml`: + +- The approximated method should disappear from `without-rules` (moves to `with-rules`) +- If it does not move, your `@Approximate(...)` target class or the method signature does not match what the analyzer sees +- If new findings appear in the SARIF after the approximation, they are likely true positives the kill-facts was hiding + +## When to use code-based vs YAML + +- Lambda/callback invocation -> **Code-based** (this skill) +- Non-deterministic branching (async paths) -> **Code-based** +- Complex internal state with multiple method interactions -> **Code-based** +- Simple from-to propagation -> **YAML** (create-yaml-config skill) +- Method is **not** in `external-methods-without-rules.yaml` -> **do nothing** (approximation will have no observable effect) diff --git a/agent/skills/create-rule.md b/agent/skills/create-rule.md new file mode 100644 index 000000000..3cfd3d411 --- /dev/null +++ b/agent/skills/create-rule.md @@ -0,0 +1,150 @@ +# Skill: Create Rule + +Create pattern rules for detecting specific vulnerability classes. + +## Prerequisites + +- `opentaint` CLI available +- Understanding of the target vulnerability (source, sink, sanitizers) + +## Procedure + +### 1. Check existing coverage + +`opentaint agent rules-path` prints the absolute path to the built-in rules directory +(downloading them on first call). Use it to browse built-in patterns. + +```bash +RULES_DIR=$(opentaint agent rules-path) +ls $RULES_DIR/java/lib/generic/ +ls $RULES_DIR/java/lib/spring/ +ls $RULES_DIR/java/security/ +``` + +Read existing rules to understand patterns already covered. + +### 2. Create rule directory structure + +``` +agent-rules/ + java/ + lib/ + my-source.yaml + my-sink.yaml + security/ + my-vuln.yaml +``` + +### 3. Create library rules + +**Source rule** (`agent-rules/java/lib/my-source.yaml`): + +```yaml +rules: + - id: my-custom-source + options: + lib: true + severity: NOTE + message: Custom untrusted data source + languages: [java] + patterns: + - pattern-either: + - patterns: + - pattern: | + $RETURNTYPE $METHOD(HttpServletRequest $UNTRUSTED, ...) { ... } + - metavariable-pattern: + metavariable: $METHOD + pattern-either: + - pattern: doGet + - pattern: doPost +``` + +**Sink rule** (`agent-rules/java/lib/my-sink.yaml`): + +```yaml +rules: + - id: my-custom-sink + options: + lib: true + severity: NOTE + message: Custom dangerous operation + languages: [java] + mode: taint + pattern-sinks: + - patterns: + - pattern-either: + - pattern: (java.sql.Statement $S).executeQuery($UNTRUSTED) + - pattern: (java.sql.Statement $S).execute($UNTRUSTED) + - focus-metavariable: $UNTRUSTED +``` + +### 4. Create security rule (join mode) + +```yaml +rules: + - id: my-vulnerability + severity: ERROR + message: >- + Untrusted data flows to dangerous operation + metadata: + cwe: CWE-89 + short-description: SQL Injection via untrusted input + languages: [java] + mode: join + join: + refs: + - rule: java/lib/my-source.yaml#my-custom-source + as: source + - rule: java/lib/my-sink.yaml#my-custom-sink + as: sink + on: + - 'source.$UNTRUSTED -> sink.$UNTRUSTED' +``` + +### 5. Reference built-in library rules + +```yaml +refs: + - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + as: servlet-source + - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source + as: spring-source +``` + +### 6. Run analysis with specific rules + +The `--rule-id` flag requires the **full rule ID** in the format `:`. +The `ruleSetRelativePath` is the path to the YAML file relative to its ruleset root, **including** the `.yaml` extension. + +Library rules referenced via join-mode `refs` are NOT auto-included by `--rule-id` — the +filter drops every rule whose full ID is not listed. Either list every library rule +explicitly, or omit `--rule-id` entirely to keep all loaded rules active. + +```bash +# Full rule ID = "java/security/my-vuln.yaml" (relative path with .yaml) + ":" + "my-vulnerability" (id from YAML) +opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability +``` + +To discover full rule IDs, read the rule YAML file: +- The `id` field in the YAML gives the short ID +- The file path relative to the ruleset root (with `.yaml` extension) gives the prefix +- Combine as `:`, e.g. `java/security/path-traversal.yaml:path-traversal` + +## Constraints + +- Library rules MUST have `options.lib: true` and `severity: NOTE` +- Security rules MUST have `metadata.cwe` and `metadata.short-description` +- Source/sink metavariable names must match across `refs` and `on` clauses +- The `rule:` path in `refs` is relative to the ruleset root +- Rule IDs must be globally unique +- `--rule-id` requires the **full** rule ID (`:`), not just the short ID +- `--rule-id` drops every rule whose full ID is not listed, including library rules referenced via `refs`. List all rules you need explicitly, or omit `--rule-id`. +- For simple structural patterns (no dataflow), omit `mode:` (uses default mode) + +## FP/FN Fixes + +- **FP**: Add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or `metavariable-regex` +- **FN**: Add patterns to `pattern-either`, create new library rules, add new `on` clauses diff --git a/agent/skills/create-yaml-config.md b/agent/skills/create-yaml-config.md new file mode 100644 index 000000000..a176b055f --- /dev/null +++ b/agent/skills/create-yaml-config.md @@ -0,0 +1,157 @@ +# Skill: Create YAML Config + +Create YAML passThrough propagation rules for library methods. + +## When a passThrough rule actually changes the scan + +A custom `passThrough` entry only affects the analyzer's behavior if the target method is +an **external method with no existing model**. In practice: the method must appear in +`/external-methods-without-rules.yaml` produced by the previous scan +(see `analyze-findings` skill). That file is exactly the list of methods where the analyzer +killed dataflow facts for lack of a rule — those are the FN sources you can fix. + +Do not write passThrough rules for: +- Methods in `external-methods-with-rules.yaml` — already modeled; your rule will OVERRIDE the existing one, which is usually a regression. +- Methods that appear in neither list — the analyzer never reached them on a tainted path during the scan; the rule will be a no-op until that changes. +- Application-internal methods — approximations apply only to external library methods. + +**Rule of thumb**: open `external-methods-without-rules.yaml`, pick methods on a code path +from a source to a sink relevant to the target vulnerability, and write passThrough rules +for those. + +## Prerequisites + +- A baseline scan has been run with `--track-external-methods` (see `run-analysis` skill) +- `external-methods-without-rules.yaml` has been read; the methods you plan to model are in it (see `analyze-findings` skill) +- The method's propagation can be described by simple from/to copies (otherwise use `create-approximation`) + +## Procedure + +### 1. Create config file + +Create `agent-config/custom-propagators.yaml` with `passThrough:` rules. + +### 2. Common patterns + +**Simple getter** (taint on `this` to `result`): +```yaml +passThrough: + - function: com.example.lib.DataWrapper#getValue + copy: + - from: this + to: result +``` + +**Argument-to-result**: +```yaml +passThrough: + - function: com.example.lib.Converter#convert + copy: + - from: arg(0) + to: result +``` + +**Builder pattern**: +```yaml +passThrough: + - function: com.example.lib.Builder#withName + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result +``` + +**Object with internal state** (using ``): +```yaml +passThrough: + # Store taint + - function: com.example.lib.Container#put + copy: + - from: arg(0) + to: + - this + - .com.example.lib.Container##java.lang.Object + # Retrieve taint + - function: com.example.lib.Container#get + copy: + - from: + - this + - .com.example.lib.Container##java.lang.Object + to: result +``` + +**Package-wide getter pattern**: +```yaml +passThrough: + - function: + package: com.example.dto + class: + pattern: .* + name: + pattern: get.* + copy: + - from: this + to: result +``` + +**Conditional propagation**: +```yaml +passThrough: + - function: com.example.lib.Parser#parse + condition: + typeIs: + position: arg(0) + type: java.lang.String + copy: + - from: arg(0) + to: result +``` + +### 3. Run with config + +`--approximations-config` is repeatable. Each occurrence is OVERRIDE-merged with the default. + +```bash +opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --approximations-config ./agent-config/custom-propagators.yaml \ + --track-external-methods +``` + +### 4. Confirm the rule actually fired + +Keep `--track-external-methods` enabled and diff the fresh `external-methods-without-rules.yaml` +with the baseline one: + +- Every method you added a `passThrough` for should disappear from `without-rules` (it now moves to `with-rules`) +- If a method does not move, the `function` matcher did not match — check package, class, name, and `overrides:` +- If no new findings appear even though facts now propagate, the method was not on a source→sink path and the rule had no effect on results (harmless but noise; consider removing) + +## Reference + +### Position values +- `this`, `result`, `arg(0)`, `arg(1)`, ..., `arg(*)` +- Position modifiers (YAML list): `.[*]` (array element), `.ClassName#fieldName#fieldType` (field), `.` (synthetic state) + +### Function matching +- Simple: `package.Class#method` +- Complex: `{package, class, name}` with optional `pattern:` regex + +### Overrides +- `overrides: true` (default): applies to class and all subclasses +- `overrides: false`: exact class only + +### Conditions +`typeIs`, `annotatedWith`, `isConstant`, `isNull`, `constantMatches`, `tainted`, `numberOfArgs`, `methodAnnotated`, `classAnnotated`, `methodNameMatches`, `classNameMatches`, `isStaticField`, `anyOf`, `allOf`, `not` + +## When to use YAML vs code-based approximation + +- Simple from-to propagation -> **YAML** (this skill) +- Lambda/callback invocation -> **Code-based** (create-approximation skill) +- Non-deterministic branching -> **Code-based** +- Method is not in `external-methods-without-rules.yaml` -> **do nothing**; the rule will be a no-op (or, worse, an unintended OVERRIDE of an existing model) diff --git a/agent/skills/debug-rule-reachability.md b/agent/skills/debug-rule-reachability.md new file mode 100644 index 000000000..695b3f4b9 --- /dev/null +++ b/agent/skills/debug-rule-reachability.md @@ -0,0 +1,60 @@ +# Skill: Debug Rule Reachability + +Generate a fact reachability SARIF report to debug why a specific rule does (or doesn't) reach certain taint sinks. + +## Prerequisites + +- Project built (build-project skill) +- Rule created and tested (create-rule, test-rule skills) + +## ⚠️ CRITICAL: Single Rule Only + +**You MUST run the analyzer with exactly ONE rule** via a single `--rule-id` flag. Running fact reachability across multiple rules will produce an enormously huge SARIF report that is effectively unusable. + +## Procedure + +### Run analysis with fact reachability debugging + +```bash +opentaint scan --project-model ./opentaint-project \ + -o ./results/fact-reachability.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --debug-fact-reachability-sarif +``` + +The `--rule-id` flag requires the **full rule ID** in the format `:`. +Example: for a rule file at `agent-rules/java/security/my-vuln.yaml` with `id: my-vulnerability`, +the full ID is `java/security/my-vuln.yaml:my-vulnerability`. + +### View results + +```bash +opentaint summary ./results/fact-reachability.sarif --show-findings +``` + +## Key Flags + +| Flag | Purpose | +|------|---------| +| `--debug-fact-reachability-sarif` | Enable fact reachability SARIF output | +| `--rule-id` | **Exactly one** rule ID (format: `:`) | +| `--ruleset` | Rule directory (repeatable). Use `builtin` for built-in rules | +| `--timeout` | Analysis timeout (default 900s) | + +## Outputs + +The debug fact reachability report is **not** the main SARIF file specified by `-o`. The analyzer writes it as a **separate file** named `debug-ifds-fact-reachability.sarif` in the same output directory as the main report. + +For example, with `-o ./results/report.sarif`: + +- **`./results/report.sarif`** — Main vulnerability findings +- **`./results/debug-ifds-fact-reachability.sarif`** — Debug fact reachability report + +Always check the output directory (`-o` parent) for this file. + +## Notes + +- This is a debug-only option intended for troubleshooting rule coverage +- Pre-compiled project models are passed via `--project-model `, not as a positional argument +- `--rule-id` drops every rule whose full ID is not listed, **including** library rules referenced via join-mode `refs`; list each library rule explicitly if you need refs resolved diff --git a/agent/skills/discover-entry-points.md b/agent/skills/discover-entry-points.md new file mode 100644 index 000000000..3a5653468 --- /dev/null +++ b/agent/skills/discover-entry-points.md @@ -0,0 +1,45 @@ +# Skill: Discover Entry Points + +Identify the attack surface of the target project by reading source code and project structure. + +## Prerequisites + +- Target project source code accessible +- Project has been built (build-project skill complete) + +## Procedure + +### 1. Search for entry points by type + +Look for these patterns in the source code: + +- **Spring controllers**: `@RestController`, `@Controller`, `@RequestMapping`, `@GetMapping`, `@PostMapping`, `@PutMapping`, `@DeleteMapping` +- **Servlet handlers**: Classes extending `HttpServlet` with `doGet`, `doPost`, etc. +- **JAX-RS endpoints**: `@Path`, `@GET`, `@POST`, `@PUT`, `@DELETE` +- **Message handlers**: `@JmsListener`, `@KafkaListener`, `@RabbitListener` +- **CLI entry points**: `main(String[])` methods that process external input +- **Scheduled tasks**: `@Scheduled` methods that read external state + +### 2. For each entry point, determine + +- What external data it receives (HTTP params, headers, body, message payload) +- What operations it performs (DB queries, file I/O, command exec, HTTP calls) +- Which vulnerability classes are relevant (SQLi, XSS, command injection, path traversal, SSRF, XXE) + +### 3. Examine dependencies + +Read `build.gradle`, `pom.xml`, or `project.yaml` for: +- Web frameworks (Spring Boot, Micronaut, Quarkus) +- Database libraries (JDBC, JPA/Hibernate, MyBatis) +- Template engines (Thymeleaf, FreeMarker, Velocity) +- HTTP clients (OkHttp, Apache HttpClient, RestTemplate, WebClient) + +### 4. Record findings + +Document entry points, data sources, and relevant vulnerability classes in `opentaint-analysis-plan.md`. + +## Engine Notes + +- Spring projects: The analyzer auto-discovers Spring endpoints when `--project-kind spring-web` is set +- Generic projects: The analyzer uses all public/protected methods from public project classes +- Targeted analysis: Use `--debug-run-analysis-on-selected-entry-points "com.example.Class#method"` for focused testing diff --git a/agent/skills/generate-poc.md b/agent/skills/generate-poc.md new file mode 100644 index 000000000..63118960e --- /dev/null +++ b/agent/skills/generate-poc.md @@ -0,0 +1,83 @@ +# Skill: Generate PoC + +Generate a proof-of-concept exploit for a confirmed true positive vulnerability. + +## Prerequisites + +- A finding classified as TRUE POSITIVE (analyze-findings skill) +- SARIF trace read and understood + +## Procedure + +### 1. Extract vulnerability trace from SARIF + +- **Source**: Entry point + parameter (`codeFlows[0].threadFlows[0].locations[0]`) +- **Path**: Intermediate method calls +- **Sink**: Dangerous operation (`codeFlows[0].threadFlows[0].locations[-1]`) + +### 2. Construct PoC by vulnerability type + +**SQL Injection**: Input that extracts data or bypasses auth +```bash +curl "http://target:8080/api/users?id=1' OR '1'='1" +``` + +**Command Injection**: Input that executes arbitrary commands +```bash +curl "http://target:8080/api/process?cmd=;cat /etc/passwd" +``` + +**Path Traversal**: Input that accesses unauthorized files +```bash +curl "http://target:8080/api/files?path=../../../etc/passwd" +``` + +**XSS**: Input that executes JavaScript +```bash +curl "http://target:8080/api/search?q=" +``` + +**SSRF**: Input that makes the server request internal resources +```bash +curl "http://target:8080/api/fetch?url=http://169.254.169.254/latest/meta-data/" +``` + +**XXE**: XML input that reads files +```bash +curl -X POST "http://target:8080/api/parse" \ + -H "Content-Type: application/xml" \ + -d ']>&xxe;' +``` + +### 3. Document the finding + +```markdown +## VULN-001: SQL Injection in UserController.getUser + +**Severity**: Critical (CWE-89) +**Location**: `src/main/java/com/example/controller/UserController.java:45` +**Rule**: `my-vulnerability` + +### Description +User-controlled input from HTTP parameter `id` flows unsanitized into +a SQL query via `Statement.executeQuery()`. + +### Trace +1. **Source**: `UserController.getUser()` -- `request.getParameter("id")` (line 42) +2. **Flow**: String concatenation `"SELECT * FROM users WHERE id = " + input` (line 44) +3. **Sink**: `Statement.executeQuery(query)` (line 45) + +### Proof of Concept +\``` +curl "http://target:8080/api/users/1' OR '1'='1" +\``` + +### Remediation +Use parameterized queries: +\```java +PreparedStatement pstmt = conn.prepareStatement("SELECT * FROM users WHERE id = ?"); +pstmt.setString(1, input); +\``` +``` + +Write to `vulnerabilities.md` in the working directory. diff --git a/agent/skills/opentaint-issue-investigation.md b/agent/skills/opentaint-issue-investigation.md new file mode 100644 index 000000000..6e820e2e2 --- /dev/null +++ b/agent/skills/opentaint-issue-investigation.md @@ -0,0 +1,169 @@ +# Skill: OpenTaint Issue Investigation + +Investigate and confirm an issue in the OpenTaint analysis engine — a case where a rule +that should fire does not (or fires where it should not), and the cause is **not** the +rule's syntax or the library modeling, but the engine itself (e.g. an intra/inter-procedural +dataflow path that is cut unexpectedly). + +The deliverable is a small, self-contained reproducer plus a short write-up that points at +the exact instruction where the dataflow dies. + +## When to use this skill + +Use it after `analyze-findings` / `create-yaml-config` / `create-approximation` have been +exhausted and a finding is still missing (or spurious), even though: + +- The rule passes its own tests on isolated samples. +- `external-methods-without-rules.yaml` is empty (or irrelevant) for the relevant code path. +- Nothing about the library model is obviously wrong. + +If any of those is not true, stop and go fix the rule / add the approximation first. An +"engine issue" report is only credible once the trivial causes have been ruled out. + +## Prerequisites + +- Working rule with passing tests (`create-rule`, `test-rule`). +- Baseline scan has been run (`run-analysis`). +- `analyze-findings` has been consulted; the remaining failure is not explained by + `external-methods-without-rules.yaml`. + +## Procedure + +### 1. Build a minimal rule-test reproducer + +Shrink the original code to the smallest sample that still reproduces the problem, and put +it in a rule-test project (read `test-rule.md`). + +Choose the project shape based on what the real code needs: + +- **Plain method-level sample** — works for rules where the tainted flow stays inside one + method or crosses only ordinary Java calls. One class under `src/main/java/test/` with a + single `@PositiveRuleSample` (expected trigger) or `@NegativeRuleSample` (expected no + trigger) is enough. +- **Spring-app sub-project** — required whenever the real flow enters through a Spring + `@Controller`, uses Spring beans, or depends on dispatcher wiring. Create a dedicated + `spring-app-tests/` module with exactly one sample annotation, as described in + `test-rule.md` under *Testing Spring-app rules*. Positive and negative cases go in + separate sub-projects (e.g. `xss-spring-test-positive`, `xss-spring-test-negative`). + +Keep the sample as small as possible: remove every statement that is not needed to carry +taint from source to sink. A small reproducer is what makes the rest of the investigation +tractable — and it is what ships in the bug report. + +### 2. Confirm the issue reproduces on the test project + +Compile the test project and run the rule tests: + +```bash +opentaint compile ./agent-test-project -o ./agent-test-compiled +opentaint agent test-rules ./agent-test-compiled \ + -o ./agent-test-results \ + --ruleset builtin --ruleset ./agent-rules +``` + +Inspect `./agent-test-results/test-result.json`: + +- A `@PositiveRuleSample` that ends up in `falseNegative` reproduces a missed-detection + engine issue. +- A `@NegativeRuleSample` that ends up in `falsePositive` reproduces a spurious-detection + engine issue. +- `skipped` / `disabled` mean the rule was not actually exercised — fix the annotation + `value`/`id` or enable the rule before going further. +- `success` means the issue does **not** reproduce. Either the sample is too reduced, or + something in the original project (not in the sample) is what triggers the problem. Go + back to step 1 and add back the minimum context. + +Do not proceed until the test result matches the bug you are trying to document. + +### 3. Rule out missed external-method models + +Re-run the test with external-method tracking and read the two lists next to the SARIF +(read `analyze-findings.md`, §3): + +```bash +opentaint scan --project-model ./agent-test-compiled \ + -o ./agent-test-results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id .yaml: \ + --track-external-methods +``` + +Open `agent-test-results/external-methods-without-rules.yaml`. For every method that sits +on the source→sink path in your sample: + +- Simple propagator (getter/collection/builder) → add a YAML `passThrough` + (read `create-yaml-config.md`). +- Lambda/callback/async → add a code-based approximation + (read `create-approximation.md`). + +Re-run until that file contains **no methods on the relevant path**. Only then is it +legitimate to call the remaining failure an engine issue — otherwise you are just looking +at a missing library model. + +### 4. Locate where the dataflow dies + +Use the fact reachability debug SARIF to see exactly how far the taint travels (read +`debug-rule-reachability.md`). Run with a single `--rule-id`: + +```bash +opentaint scan --project-model ./agent-test-compiled \ + -o ./agent-test-results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id .yaml: \ + --debug-fact-reachability-sarif +``` + +Inspect `agent-test-results/debug-ifds-fact-reachability.sarif`. For a missed detection: + +1. Confirm the **source is matched** — at least one fact is reported at the source + location. If it is not, the problem is in the rule's `pattern-sources`, not the engine. +2. Walk the reachable facts along the expected path. Note the **last instruction that + still carries the fact** and the **first instruction where it is gone**. That gap is + where the engine drops the dataflow. +3. Check that the drop happens at an instruction that is **not relevant to the rule** — + e.g. a plain local assignment, a trivial method call with a modelled pass-through, a + cast, a field read. If the drop is at something the rule should handle (a recognised + sanitizer, a sink variant the rule was not written to match, etc.), the issue is still + in the rule, not in the engine. + +For a spurious detection, do the symmetric check: find the instruction where the fact +appears even though no tainted input reaches it. + +### 5. Write the investigation report + +Produce a short Markdown note (e.g. `issues/.md`) with: + +- **Reproducer** — path to the rule-test sub-project, the exact + `opentaint agent test-rules` command, and the relevant snippet from `test-result.json`. +- **Rule** — full rule ID (`.yaml:`) and the ruleset it came + from (`builtin` or `./agent-rules`). +- **Observed vs expected verdict** — e.g. *Expected: finding at `Sink.java:42`. Observed: + no finding; sample listed under `falseNegative`.* +- **Where the dataflow dies** — file, line, and the specific instruction from the fact + reachability SARIF. Quote the trace up to the last reachable fact and state which + instruction drops it. +- **Ruled-out causes** — + 1. Rule tests pass on an isolated method sample (rule syntax is fine). + 2. `external-methods-without-rules.yaml` has no methods on the relevant path (library + modeling is not the gap), or list the approximations that were added in step 3. + 3. The dropping instruction is unrelated to what the rule was meant to match (not a + sanitizer, not an unsupported sink variant, etc.). +- **Minimal hypothesis** — 1–3 sentences on what the engine is likely doing wrong at that + instruction (e.g. *"IFDS loses the fact across this `StringBuilder.append` because the + call is devirtualized to an `AbstractStringBuilder` overload that has no default + pass-through"*). Keep it short; this is a hypothesis, not a fix. + +Include only what is needed to reproduce and locate the problem. A good report is roughly +one screen of Markdown plus the rule-test sub-project. + +## Stop Condition + +The investigation is done when all of the following hold: + +- The rule-test sub-project reproduces the issue deterministically via + `opentaint agent test-rules`. +- No method on the expected source→sink path remains in + `external-methods-without-rules.yaml`. +- The fact reachability SARIF pinpoints a specific instruction where the taint is + dropped (or spuriously introduced) and that instruction is unrelated to the rule logic. +- The report in step 5 exists and is self-contained. diff --git a/agent/skills/run-analysis.md b/agent/skills/run-analysis.md new file mode 100644 index 000000000..7f138f38f --- /dev/null +++ b/agent/skills/run-analysis.md @@ -0,0 +1,94 @@ +# Skill: Run Analysis + +Run OpenTaint analysis on the target project and collect results. + +## Prerequisites + +- Project built (build-project skill) +- Rules created and tested (create-rule, test-rule skills) +- Optionally: YAML config (create-yaml-config skill) and/or approximations (create-approximation skill) + +## Procedure + +### Basic analysis + +The `--rule-id` flag requires the **full rule ID** in the format `:`. +Example: for a rule file at `agent-rules/java/security/my-vuln.yaml` with `id: my-vulnerability`, +the full ID is `java/security/my-vuln.yaml:my-vulnerability`. + +Pass the pre-compiled project model via `--project-model`. The positional `scan ` +argument is reserved for source projects that the CLI will compile itself. + +```bash +opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin \ + --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --track-external-methods +``` + +### With custom passThrough config + +`--approximations-config` is repeatable; every occurrence is OVERRIDE-merged. + +```bash +opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --approximations-config ./agent-config/custom-propagators.yaml \ + --track-external-methods +``` + +### With code-based approximations + +Point `--dataflow-approximations` at a directory of Java sources. The CLI auto-compiles +`.java` files into a temp directory and forwards that to the analyzer. + +```bash +opentaint scan --project-model ./opentaint-project \ + -o ./results/report.sarif \ + --ruleset builtin --ruleset ./agent-rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --dataflow-approximations ./agent-approximations/src \ + --track-external-methods +``` + +### View results + +```bash +opentaint summary ./results/report.sarif --show-findings +``` + +## Outputs + +Three files to collect — all next to the SARIF report: + +1. **`./results/report.sarif`** — Vulnerability findings with code flow traces +2. **`./results/external-methods-without-rules.yaml`** — Methods where no pass-through rules fired (**dataflow facts killed here — these cause false negatives**) +3. **`./results/external-methods-with-rules.yaml`** — Methods where pass-through rules were applied (already modeled, typically no action needed) + +The `--track-external-methods` flag is a boolean. Filenames and location are fixed: the +two YAMLs are written into the same directory as the SARIF file, using the names above. + +## Key Flags + +| Flag | Purpose | +|------|---------| +| `--project-model` | Pre-compiled project model directory (contains `project.yaml`) | +| `--ruleset` | Rule directory (repeatable). Use `builtin` for built-in rules | +| `--rule-id` | Enable only specific rules by full ID `.yaml:` (repeatable) | +| `--approximations-config` | YAML passThrough config (OVERRIDE mode, repeatable) | +| `--dataflow-approximations` | Directory of Java sources or compiled class files (repeatable) | +| `--track-external-methods` | Emit `external-methods-{without,with}-rules.yaml` next to the SARIF | +| `--severity` | Filter by severity (note, warning, error) | +| `--timeout` | Analysis timeout (default 900s) | + +## Notes + +- For a pre-compiled model, always use `--project-model `. The positional argument is only for source projects that will be compiled by the CLI. +- `--rule-id` drops every rule whose full ID is not in the filter, **including library rules referenced via join-mode `refs`**. List every rule you want active explicitly. +- `--approximations-config` uses OVERRIDE mode: custom rules replace (not extend) default config for matching methods. +- `--dataflow-approximations` accepts a directory. `.java` files are auto-compiled by the CLI; already-compiled `.class` directories are passed through as-is. +- Duplicate approximation targeting the same class as a built-in will cause an error. diff --git a/agent/skills/test-rule.md b/agent/skills/test-rule.md new file mode 100644 index 000000000..0b28da220 --- /dev/null +++ b/agent/skills/test-rule.md @@ -0,0 +1,160 @@ +# Skill: Test Rule + +Create test samples for a rule and verify it works correctly. + +## Prerequisites + +- `opentaint` CLI available +- Rules created (create-rule skill) +- Target project dependencies known + +## Procedure + +### 1. Bootstrap test project + +```bash +opentaint agent init-test-project ./agent-test-project \ + --dependency "javax.servlet:javax.servlet-api:4.0.1" +``` + +Or manually create a Gradle project with the test utility JAR and required dependencies. + +### 2. Create test samples + +Create Java files in `src/main/java/test/` with `@PositiveRuleSample` and `@NegativeRuleSample` annotations: + +```java +package test; + +import org.opentaint.sast.test.util.PositiveRuleSample; +import org.opentaint.sast.test.util.NegativeRuleSample; +import javax.servlet.http.HttpServletRequest; +import java.sql.Connection; +import java.sql.Statement; + +public class MyVulnTest { + private Connection db; + + @PositiveRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") + public void vulnerable(HttpServletRequest req) throws Exception { + String input = req.getParameter("id"); + Statement stmt = db.createStatement(); + stmt.executeQuery("SELECT * FROM users WHERE id = " + input); + } + + @NegativeRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") + public void safe(HttpServletRequest req) throws Exception { + String input = req.getParameter("id"); + var pstmt = db.prepareStatement("SELECT * FROM users WHERE id = ?"); + pstmt.setString(1, input); + pstmt.executeQuery(); + } +} +``` + +### 3. Build test project + +```bash +opentaint compile ./agent-test-project -o ./agent-test-compiled +``` + +### 4. Run rule tests + +**Always specify `-o`** so results are written to a known location: + +```bash +opentaint agent test-rules ./agent-test-compiled \ + -o ./agent-test-results \ + --ruleset builtin --ruleset ./agent-rules +``` + +### 5. Interpret results + +Read `./agent-test-results/test-result.json`: + +- **success**: Test passed (positive triggered, negative didn't) +- **falseNegative**: Positive sample did NOT trigger -> rule patterns too narrow +- **falsePositive**: Negative sample DID trigger -> rule patterns too broad +- **skipped**: Rule not found -> check `value` path and `id` match the rule file +- **disabled**: Rule is disabled + +## Testing Spring-app rules + +Some rules only fire inside a full Spring MVC entry-point graph (controllers, beans, dispatcher). A plain unit-like sample with `@PositiveRuleSample` on a bare method will not trigger them, because the tainted data must flow from a discovered `@Controller` entry point. + +For these rules, create **one dedicated Gradle sub-project per sample**. Each sub-project represents a complete, minimal Spring application containing **exactly one** `@PositiveRuleSample` or `@NegativeRuleSample` annotation. Split positive and negative cases into separate sub-projects, e.g. `xss-spring-test-positive` and `xss-spring-test-negative`. + +### How detection works + +`TestProjectAnalyzer` computes a `testSetName` per module as `module.moduleSourceRoot.relativeTo(project.sourceRoot)`, with `/` replaced by `-` (see `core/src/main/kotlin/org/opentaint/jvm/sast/project/TestProjectAnalyzer.kt`). If the name starts with `spring-app-tests`, the module is treated as a Spring test set: + +- All sample annotations in the module are collected as usual. +- Each sample is wrapped in a `SpringTestSample` that uses the Spring dispatcher method as the analysis entry point instead of the annotated method itself. +- Taint therefore originates from real `@Controller` request parameters and must reach the annotated sink method through normal Spring wiring. + +Consequence: the annotated method is only a marker for **which rule to run and the expected verdict**. The actual vulnerable/safe flow must be reachable from a controller in the same module. Keep each module to a single annotation so the verdict is unambiguous. + +### Project layout + +Use a multi-module Gradle build where every `spring-app-tests/` directory is its own sub-project: + +``` +agent-test-project/ +├── settings.gradle.kts +├── build.gradle.kts +└── spring-app-tests/ + ├── xss-spring-test-positive/ + │ ├── build.gradle.kts + │ └── src/main/java/test/ + │ ├── VulnerableController.java // @Controller with the tainted flow + │ └── VulnerableSink.java // carries the single @PositiveRuleSample + └── xss-spring-test-negative/ + ├── build.gradle.kts + └── src/main/java/test/ + ├── SafeController.java + └── SafeSink.java // carries the single @NegativeRuleSample +``` + +`settings.gradle.kts` should auto-discover every `spring-app-tests/*/build.gradle.kts` so adding a new case only requires a new directory. See `rules/test/settings.gradle.kts` for a reference implementation. + +### Required dependencies + +Each Spring sub-project must pull in at least: + +- `compileOnly` on `opentaint-sast-test-util` (for the sample annotations) +- `org.springframework:spring-webmvc` and `spring-context` (so `@Controller` is recognized) +- Any libraries used by the sample itself (servlet-api, JDBC, etc.) + +### Compile and run + +Compile and test the multi-module project the same way as a regular test project: + +```bash +opentaint compile ./agent-test-project -o ./agent-test-compiled +opentaint agent test-rules ./agent-test-compiled \ + -o ./agent-test-results \ + --ruleset builtin --ruleset ./agent-rules +``` + +Each `spring-app-tests/` sub-project becomes an independent test set and appears as its own entry in `test-result.json`. + +### Common pitfalls + +- **No `@Controller` in the module** -> `TestProjectAnalyzer` logs `No spring entry point found` and the sample is analyzed without Spring context, usually producing a false negative. Always include a controller that reaches the sink. +- **More than one annotation per module** -> the module still runs, but results become ambiguous; keep it to one sample per sub-project. +- **Module path does not start with `spring-app-tests`** -> `isSpringAppTestSet()` returns `false` and the sample is analyzed as a regular method-level test, so Spring-specific flows will not be triggered. + +## Annotation Fields + +- `value`: Path to rule YAML file, relative to ruleset root (e.g. `java/security/my-vuln.yaml`) +- `id`: Short rule ID within that file (the `id` field from the YAML, e.g. `my-vulnerability`) + +**Note**: The annotation `id` field uses the **short** rule ID (as written in the YAML file). +This is different from `--rule-id` in `opentaint scan`, which requires the **full** rule ID +in the format `:` (e.g. `java/security/my-vuln.yaml:my-vulnerability`). + +## Troubleshooting + +- **falseNegative**: Broaden source/sink patterns, check metavariable names match +- **falsePositive**: Add `pattern-not`, `pattern-sanitizers`, or narrow `metavariable-regex` +- **skipped**: Verify rule file path and ID, check rule is not disabled diff --git a/cli/Makefile b/cli/Makefile new file mode 100644 index 000000000..64c9562b7 --- /dev/null +++ b/cli/Makefile @@ -0,0 +1,27 @@ +GO ?= go + +BINARY_NAME ?= opentaint +BUILD_DIR ?= bin +BINARY_PATH := $(BUILD_DIR)/$(BINARY_NAME) + +PREFIX ?= /usr/local +BINDIR ?= $(PREFIX)/bin +INSTALL_GOBIN := $(abspath $(BINDIR)) + +.PHONY: all generate build install clean + +all: build + +generate: + $(GO) generate ./... + +build: generate + mkdir -p $(BUILD_DIR) + $(GO) build -o $(BINARY_PATH) . + +install: generate + mkdir -p $(BINDIR) + GOBIN=$(INSTALL_GOBIN) $(GO) install . + +clean: + rm -f $(BINARY_PATH) diff --git a/cli/cmd/agent.go b/cli/cmd/agent.go new file mode 100644 index 000000000..f2be1e8ff --- /dev/null +++ b/cli/cmd/agent.go @@ -0,0 +1,16 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +// agentCmd represents the agent command group +var agentCmd = &cobra.Command{ + Use: "agent", + Short: "Agent mode utilities", + Long: `Commands for AI agent integration: locate skills, meta-prompt, rules, and run rule tests.`, +} + +func init() { + rootCmd.AddCommand(agentCmd) +} diff --git a/cli/cmd/agent_init_test_project.go b/cli/cmd/agent_init_test_project.go new file mode 100644 index 000000000..ae0b675c2 --- /dev/null +++ b/cli/cmd/agent_init_test_project.go @@ -0,0 +1,178 @@ +package cmd + +import ( + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/seqra/opentaint/internal/testutil" + "github.com/seqra/opentaint/internal/utils" + "github.com/spf13/cobra" +) + +var initTestProjectDeps []string + +var agentInitTestProjectCmd = &cobra.Command{ + Use: "init-test-project ", + Short: "Bootstrap a rule test project with build.gradle.kts and test utility JAR", + Long: `Creates a minimal Gradle project structure for testing OpenTaint rules. + +The project includes: + - build.gradle.kts with compile-only dependencies + - settings.gradle.kts + - libs/opentaint-sast-test-util.jar (provides @PositiveRuleSample and @NegativeRuleSample annotations) + - src/main/java/test/ directory for test sample sources + +Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + outputDir := args[0] + + // 1. Create directory structure + dirs := []string{ + filepath.Join(outputDir, "libs"), + filepath.Join(outputDir, "src", "main", "java", "test"), + } + for _, d := range dirs { + if err := os.MkdirAll(d, 0o755); err != nil { + out.Fatalf("Failed to create directory %s: %s", d, err) + } + } + + // 2. Resolve and copy opentaint-sast-test-util.jar + testUtilJarSrc, err := resolveTestUtilJar() + if err != nil { + out.Fatalf("Failed to resolve test-util JAR: %s", err) + } + testUtilJarDst := filepath.Join(outputDir, "libs", "opentaint-sast-test-util.jar") + if err := copyFile(testUtilJarSrc, testUtilJarDst); err != nil { + out.Fatalf("Failed to copy test-util JAR: %s", err) + } + + // 3. Generate build.gradle.kts + if err := generateBuildGradle(outputDir, initTestProjectDeps); err != nil { + out.Fatalf("Failed to generate build.gradle.kts: %s", err) + } + + // 4. Generate settings.gradle.kts + if err := generateSettingsGradle(outputDir); err != nil { + out.Fatalf("Failed to generate settings.gradle.kts: %s", err) + } + + fmt.Printf("Test project initialized at %s\n", outputDir) + }, +} + +func init() { + agentCmd.AddCommand(agentInitTestProjectCmd) + agentInitTestProjectCmd.Flags().StringArrayVar(&initTestProjectDeps, "dependency", nil, + "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") +} + +// resolveTestUtilJar finds the opentaint-sast-test-util.jar. +// Resolution order: +// 1. Bundled path next to binary: /lib/opentaint-sast-test-util.jar +// 2. Install path: ~/.opentaint/install/lib/opentaint-sast-test-util.jar +// 3. Dev build: /core/opentaint-sast-test-util/build/libs/opentaint-sast-test-util.jar +func resolveTestUtilJar() (string, error) { + const jarName = "opentaint-sast-test-util.jar" + + // Tier 1: Bundled next to binary + if libPath := utils.GetBundledLibPath(); libPath != "" { + candidate := filepath.Join(libPath, jarName) + if _, err := os.Stat(candidate); err == nil { + return candidate, nil + } + } + + // Tier 2: Install path + if libPath := utils.GetInstallLibPath(); libPath != "" { + candidate := filepath.Join(libPath, jarName) + if _, err := os.Stat(candidate); err == nil { + return candidate, nil + } + } + + // Tier 3: Dev build — walk up from exe dir to find core/opentaint-sast-test-util/build/libs/ + if exe, err := os.Executable(); err == nil { + exe, _ = filepath.EvalSymlinks(exe) + // exe is typically at cli/bin/opentaint, so repo root is ../../ + dir := filepath.Dir(exe) + for i := 0; i < 4; i++ { + candidate := filepath.Join(dir, "core", "opentaint-sast-test-util", "build", "libs", jarName) + if _, err := os.Stat(candidate); err == nil { + return candidate, nil + } + dir = filepath.Dir(dir) + } + } + + // Tier 4: Extract from embedded binary + if extracted, err := testutil.ExtractJar(); err == nil { + return extracted, nil + } + + return "", fmt.Errorf( + "%s not found; build it with 'cd core && ./gradlew :opentaint-sast-test-util:jar' or reinstall opentaint", + jarName, + ) +} + +func copyFile(src, dst string) error { + in, err := os.Open(src) + if err != nil { + return fmt.Errorf("open source: %w", err) + } + defer in.Close() + + if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil { + return fmt.Errorf("create parent dir: %w", err) + } + + outFile, err := os.Create(dst) + if err != nil { + return fmt.Errorf("create destination: %w", err) + } + defer outFile.Close() + + if _, err := io.Copy(outFile, in); err != nil { + return fmt.Errorf("copy: %w", err) + } + return nil +} + +func generateBuildGradle(outputDir string, dependencies []string) error { + var sb strings.Builder + sb.WriteString(`plugins { + java +} + +java { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 +} + +repositories { + mavenCentral() +} + +dependencies { + compileOnly(files("libs/opentaint-sast-test-util.jar")) +`) + for _, dep := range dependencies { + sb.WriteString(fmt.Sprintf(" compileOnly(\"%s\")\n", dep)) + } + sb.WriteString("}\n") + + path := filepath.Join(outputDir, "build.gradle.kts") + return os.WriteFile(path, []byte(sb.String()), 0o644) +} + +func generateSettingsGradle(outputDir string) error { + content := `rootProject.name = "opentaint-rule-test" +` + path := filepath.Join(outputDir, "settings.gradle.kts") + return os.WriteFile(path, []byte(content), 0o644) +} diff --git a/cli/cmd/agent_prompt.go b/cli/cmd/agent_prompt.go new file mode 100644 index 000000000..60ac47893 --- /dev/null +++ b/cli/cmd/agent_prompt.go @@ -0,0 +1,26 @@ +package cmd + +import ( + "fmt" + "path/filepath" + + "github.com/seqra/opentaint/internal/agent" + "github.com/spf13/cobra" +) + +var agentPromptCmd = &cobra.Command{ + Use: "prompt", + Short: "Print the path to the meta-prompt file", + Run: func(cmd *cobra.Command, args []string) { + agentPath, err := agent.GetPath() + if err != nil { + out.Fatalf("Error: %s", err) + } + promptPath := filepath.Join(agentPath, "meta-prompt.md") + fmt.Println(promptPath) + }, +} + +func init() { + agentCmd.AddCommand(agentPromptCmd) +} diff --git a/cli/cmd/agent_rules_path.go b/cli/cmd/agent_rules_path.go new file mode 100644 index 000000000..ba30ae055 --- /dev/null +++ b/cli/cmd/agent_rules_path.go @@ -0,0 +1,41 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/seqra/opentaint/internal/globals" + "github.com/seqra/opentaint/internal/utils" + "github.com/spf13/cobra" +) + +var agentRulesPathCmd = &cobra.Command{ + Use: "rules-path", + Short: "Print the path to the builtin rules directory (downloads on demand)", + Run: func(cmd *cobra.Command, args []string) { + rulesPath, err := utils.GetRulesPath(globals.Config.Rules.Version) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %s\n", err) + os.Exit(1) + } + + // Download if not present + if _, err := os.Stat(rulesPath); os.IsNotExist(err) { + if dlErr := utils.DownloadAndUnpackGithubReleaseAsset( + globals.Config.Owner, globals.Config.Repo, + globals.Config.Rules.Version, globals.RulesAssetName, + rulesPath, globals.Config.Github.Token, + globals.Config.SkipVerify, out, + ); dlErr != nil { + fmt.Fprintf(os.Stderr, "Error downloading rules: %s\n", dlErr) + os.Exit(1) + } + } + + fmt.Println(rulesPath) + }, +} + +func init() { + agentCmd.AddCommand(agentRulesPathCmd) +} diff --git a/cli/cmd/agent_skills.go b/cli/cmd/agent_skills.go new file mode 100644 index 000000000..74689b847 --- /dev/null +++ b/cli/cmd/agent_skills.go @@ -0,0 +1,26 @@ +package cmd + +import ( + "fmt" + "path/filepath" + + "github.com/seqra/opentaint/internal/agent" + "github.com/spf13/cobra" +) + +var agentSkillsCmd = &cobra.Command{ + Use: "skills", + Short: "Print the path to the skills directory", + Run: func(cmd *cobra.Command, args []string) { + agentPath, err := agent.GetPath() + if err != nil { + out.Fatalf("Error: %s", err) + } + skillsPath := filepath.Join(agentPath, "skills") + fmt.Println(skillsPath) + }, +} + +func init() { + agentCmd.AddCommand(agentSkillsCmd) +} diff --git a/cli/cmd/agent_test_rules.go b/cli/cmd/agent_test_rules.go new file mode 100644 index 000000000..0beca9fd2 --- /dev/null +++ b/cli/cmd/agent_test_rules.go @@ -0,0 +1,153 @@ +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "time" + + "github.com/seqra/opentaint/internal/globals" + "github.com/seqra/opentaint/internal/utils" + "github.com/seqra/opentaint/internal/utils/java" + "github.com/seqra/opentaint/internal/utils/log" + "github.com/spf13/cobra" +) + +var ( + testRulesRuleset []string + testRulesOutputDir string + testRulesTimeout time.Duration + testRulesMaxMemory string + testRulesRuleID []string +) + +var agentTestRulesCmd = &cobra.Command{ + Use: "test-rules ", + Short: "Run rule tests against annotated test samples", + Long: `Run rule tests against annotated test samples in the given project model. + +Exit codes: + 0 All rule tests passed + 1 General failure (configuration or infrastructure error) + 252 Unhandled analyzer exception + 253 Out of memory (try increasing --max-memory) + 254 Analysis timed out (try increasing --timeout) + 255 Project configuration error`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + projectPath := log.AbsPathOrExit(args[0], "project-model") + nativeProjectPath := filepath.Join(projectPath, "project.yaml") + + if _, err := os.Stat(nativeProjectPath); os.IsNotExist(err) { + out.Fatalf("Project model not found: %s", nativeProjectPath) + } + + // Validate max-memory + maxMemory, err := utils.ParseMemoryValue(testRulesMaxMemory) + if err != nil { + out.Fatalf("Invalid --max-memory value: %s", err) + } + + // Resolve output directory + outputDir := testRulesOutputDir + if outputDir == "" { + tmpDir, err := os.MkdirTemp("", "opentaint-test-rules-*") + if err != nil { + out.Fatalf("Failed to create temp dir: %s", err) + } + outputDir = tmpDir + // Note: temp dir is NOT cleaned up so results remain accessible to the agent. + // The agent should always specify -o to control the output location. + } else { + outputDir = log.AbsPathOrExit(outputDir, "output") + if err := os.MkdirAll(outputDir, 0755); err != nil { + out.Fatalf("Failed to create output directory: %s", err) + } + } + + // Ensure builtin rules are available + rulesPath, err := utils.GetRulesPath(globals.Config.Rules.Version) + if err != nil { + out.Fatalf("Failed to resolve rules path: %s", err) + } + if _, err := os.Stat(rulesPath); os.IsNotExist(err) { + if dlErr := utils.DownloadAndUnpackGithubReleaseAsset( + globals.Config.Owner, globals.Config.Repo, + globals.Config.Rules.Version, globals.RulesAssetName, + rulesPath, globals.Config.Github.Token, + globals.Config.SkipVerify, out, + ); dlErr != nil { + out.Fatalf("Failed to download rules: %s", dlErr) + } + } + + timeoutSeconds := int64(testRulesTimeout / time.Second) + if timeoutSeconds <= 0 { + timeoutSeconds = 600 + } + + builder := NewAnalyzerBuilder(). + SetProject(nativeProjectPath). + SetOutputDir(outputDir). + SetSarifFileName("test-results.sarif"). + SetIfdsAnalysisTimeout(timeoutSeconds). + AddRuleSet(rulesPath). + EnableRunRuleTests() + + if maxMemory != "" { + builder.SetMaxMemory(maxMemory) + } + + // Add user rulesets + for _, rs := range testRulesRuleset { + absPath := log.AbsPathOrExit(rs, "ruleset") + builder.AddRuleSet(absPath) + } + + // Add rule ID filters + for _, ruleID := range testRulesRuleID { + builder.AddRuleID(ruleID) + } + + analyzerJarPath, err := ensureAnalyzerAvailable() + if err != nil { + out.Fatalf("Failed to resolve analyzer: %s", err) + } + builder.SetJarPath(analyzerJarPath) + + javaRunner := java.NewJavaRunner(). + WithSkipVerify(globals.Config.SkipVerify). + WithDebugOutput(out.DebugStream("Analyzer")). + WithImageType(java.AdoptiumImageJRE). + TrySpecificVersion(globals.DefaultJavaVersion) + if _, err := javaRunner.EnsureJava(); err != nil { + out.Fatalf("Failed to resolve Java: %s", err) + } + + cmdErr, err := scanProject(builder, javaRunner) + if err != nil { + out.Fatalf("Rule tests failed: %s", err) + } + analyzerFail := classifyAnalyzerError(cmdErr) + + // Always print output paths so the agent can inspect partial results + fmt.Printf("Results directory: %s\n", outputDir) + fmt.Printf("Test results: %s\n", filepath.Join(outputDir, "test-result.json")) + + if analyzerFail != nil { + os.Exit(analyzerFail.exitCode) + } + + fmt.Printf("Rule tests completed successfully\n") + }, +} + +func init() { + agentCmd.AddCommand(agentTestRulesCmd) + + agentTestRulesCmd.Flags().StringArrayVar(&testRulesRuleset, "ruleset", nil, "Additional ruleset path (repeatable)") + agentTestRulesCmd.Flags().StringVarP(&testRulesOutputDir, "output", "o", "", "Output directory for test results (test-result.json)") + agentTestRulesCmd.Flags().DurationVar(&testRulesTimeout, "timeout", 600*time.Second, "Timeout for analysis") + agentTestRulesCmd.Flags().StringVar(&testRulesMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") + agentTestRulesCmd.Flags().StringArrayVar(&testRulesRuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") +} diff --git a/cli/cmd/analyzer_exit.go b/cli/cmd/analyzer_exit.go new file mode 100644 index 000000000..8dd5a3cf4 --- /dev/null +++ b/cli/cmd/analyzer_exit.go @@ -0,0 +1,68 @@ +package cmd + +import ( + "fmt" + + "github.com/seqra/opentaint/internal/utils/java" +) + +// Analyzer exit codes as seen by the OS (unsigned byte values). +// These correspond to the Kotlin exitProcess() calls in AbstractAnalyzerRunner: +// +// exitProcess(-1) → 255 (project configuration error) +// exitProcess(-2) → 254 (analysis timeout) +// exitProcess(-3) → 253 (out of memory) +// exitProcess(-4) → 252 (unhandled exception) +const ( + analyzerExitConfigError = 255 + analyzerExitTimeout = 254 + analyzerExitOOM = 253 + analyzerExitException = 252 +) + +// analyzerError holds information about an analyzer failure. +// exitCode is the process exit code to forward to os.Exit. +type analyzerError struct { + exitCode int + message string +} + +// analyzerExitMessage returns a human-readable description for a known +// analyzer exit code, or empty string if the code is not recognized. +func analyzerExitMessage(code int) string { + switch code { + case analyzerExitConfigError: + return "project configuration error" + case analyzerExitTimeout: + return "analysis timed out — try increasing --timeout or --max-memory" + case analyzerExitOOM: + return "out of memory — try increasing --max-memory (e.g. --max-memory 16G)" + case analyzerExitException: + return "unhandled analyzer exception" + default: + return "" + } +} + +// classifyAnalyzerError converts a *JavaCommandError into an *analyzerError +// with a human-readable message. Returns nil when cmdErr is nil. +// +// The error message is printed immediately. The caller is responsible for +// eventually calling os.Exit with the returned exit code after performing +// any post-failure work (e.g. printing summaries). +func classifyAnalyzerError(cmdErr *java.JavaCommandError) *analyzerError { + if cmdErr == nil { + return nil + } + + code := cmdErr.ExitCode + if msg := analyzerExitMessage(code); msg != "" { + formatted := fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg) + out.Error(formatted) + return &analyzerError{exitCode: code, message: formatted} + } + + formatted := fmt.Sprintf("Analysis failed with exit code %d", code) + out.Error(formatted) + return &analyzerError{exitCode: code, message: formatted} +} diff --git a/cli/cmd/command_builder.go b/cli/cmd/command_builder.go index aa353a07b..0e53353dd 100644 --- a/cli/cmd/command_builder.go +++ b/cli/cmd/command_builder.go @@ -42,26 +42,28 @@ func NewAutobuilderBuilder() *AutobuilderBuilder { type AnalyzerBuilder struct { *BaseCommandBuilder - projectPath string - outputDir string - sarifFileName string - sarifCodeFlowLimit int64 - sarifToolVersion string - sarifToolSemanticVersion string - sarifUriBase string - semgrepCompatibility bool - partialFingerprints bool - ifdsAnalysisTimeout int64 - severities []string - ruleSetPaths []string - ruleLoadTracePath string - jarPath string - maxMemory string - ruleIDs []string - approximationsConfig []string - dataflowApproximations []string - trackExternalMethods bool - debugFactReachabilitySarif bool + projectPath string + outputDir string + sarifFileName string + sarifCodeFlowLimit int64 + sarifToolVersion string + sarifToolSemanticVersion string + sarifUriBase string + semgrepCompatibility bool + partialFingerprints bool + ifdsAnalysisTimeout int64 + severities []string + ruleSetPaths []string + ruleLoadTracePath string + jarPath string + maxMemory string + ruleIDs []string + approximationsConfig []string + dataflowApproximations []string + trackExternalMethods bool + debugFactReachabilitySarif bool + runRuleTests bool + debugRunAnalysisOnSelectedEntryPoints string } func (a *AnalyzerBuilder) SetProject(projectPath string) *AnalyzerBuilder { @@ -164,6 +166,16 @@ func (a *AnalyzerBuilder) EnableDebugFactReachabilitySarif() *AnalyzerBuilder { return a } +func (a *AnalyzerBuilder) SetDebugRunAnalysisOnSelectedEntryPoints(entryPoints string) *AnalyzerBuilder { + a.debugRunAnalysisOnSelectedEntryPoints = entryPoints + return a +} + +func (a *AnalyzerBuilder) EnableRunRuleTests() *AnalyzerBuilder { + a.runRuleTests = true + return a +} + func (a *AnalyzerBuilder) BuildNativeCommand() []string { // For native execution, create a temporary logs directory tempLogsDir, err := os.MkdirTemp("", "opentaint-*") @@ -253,6 +265,14 @@ func (a *AnalyzerBuilder) BuildNativeCommand() []string { flags = append(flags, "--debug-fact-reachability-sarif") } + if a.debugRunAnalysisOnSelectedEntryPoints != "" { + flags = append(flags, "--debug-run-analysis-on-selected-entry-points", a.debugRunAnalysisOnSelectedEntryPoints) + } + + if a.runRuleTests { + flags = append(flags, "--debug-run-rule-tests") + } + return append(command, flags...) } diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index f5fb4df4a..9ed9a3372 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -24,20 +24,21 @@ import ( ) var ( - UserProjectPath string - ProjectModelPath string - SarifReportPath string - SemgrepCompatibilitySarif bool - Severity []string - Ruleset []string - DryRunScan bool - Recompile bool - ScanLogFile string - RuleID []string - ApproximationsConfig []string - DataflowApproximations []string - TrackExternalMethods bool - DebugFactReachabilitySarif bool + UserProjectPath string + ProjectModelPath string + SarifReportPath string + SemgrepCompatibilitySarif bool + Severity []string + Ruleset []string + DryRunScan bool + Recompile bool + ScanLogFile string + RuleID []string + ApproximationsConfig []string + DataflowApproximations []string + TrackExternalMethods bool + DebugFactReachabilitySarif bool + DebugRunAnalysisOnSelectedEntryPoints string ) type RulesetType struct { @@ -143,6 +144,9 @@ func init() { scanCmd.Flags().BoolVar(&DebugFactReachabilitySarif, "debug-fact-reachability-sarif", false, "Generate SARIF with fact reachability info (debug; use with a single rule only)") _ = scanCmd.Flags().MarkHidden("debug-fact-reachability-sarif") + + scanCmd.Flags().StringVar(&DebugRunAnalysisOnSelectedEntryPoints, "debug-run-analysis-on-selected-entry-points", "", "Run analysis on selected entry points: '*' for all methods or method FQN like com.example.Class#method") + _ = scanCmd.Flags().MarkHidden("debug-run-analysis-on-selected-entry-points") } // currentScanBuilder returns a builder pre-populated with the user's current scan flags. @@ -371,6 +375,9 @@ func scan(cmd *cobra.Command) { if DebugFactReachabilitySarif { nativeBuilder.EnableDebugFactReachabilitySarif() } + if DebugRunAnalysisOnSelectedEntryPoints != "" { + nativeBuilder.SetDebugRunAnalysisOnSelectedEntryPoints(DebugRunAnalysisOnSelectedEntryPoints) + } analyzerJarPath, err := ensureAnalyzerAvailable() if err != nil { diff --git a/cli/internal/agent/.gitignore b/cli/internal/agent/.gitignore new file mode 100644 index 000000000..2a5b166fd --- /dev/null +++ b/cli/internal/agent/.gitignore @@ -0,0 +1,2 @@ +# Generated by go:generate from ../../agent/ +files/ diff --git a/cli/internal/agent/agent.go b/cli/internal/agent/agent.go new file mode 100644 index 000000000..002bef68b --- /dev/null +++ b/cli/internal/agent/agent.go @@ -0,0 +1,140 @@ +// Package agent embeds the agent skill files and meta-prompt, and extracts +// them on demand to ~/.opentaint/agent/ when no bundled copy is available. +package agent + +import ( + "crypto/sha256" + "embed" + "encoding/hex" + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" +) + +//go:generate sh -c "rm -rf files && cp -r ../../../agent files" + +//go:embed files +var agentFS embed.FS + +// contentHash returns a deterministic SHA-256 hash of all embedded files. +// Used as a staleness marker for the extracted copy. +func contentHash() string { + h := sha256.New() + // Walk in sorted order for determinism. + var paths []string + fs.WalkDir(agentFS, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil || d.IsDir() { + return err + } + paths = append(paths, path) + return nil + }) + sort.Strings(paths) + for _, p := range paths { + data, _ := agentFS.ReadFile(p) + h.Write([]byte(p)) + h.Write(data) + } + return hex.EncodeToString(h.Sum(nil)) +} + +// GetPath returns a filesystem path to the agent directory containing +// meta-prompt.md and skills/. +// +// Resolution order: +// 1. Bundled: /lib/agent/ (release archives place files here) +// 2. Extracted: ~/.opentaint/agent/ (populated from embedded FS on demand) +// +// The extracted copy is refreshed when its hash marker diverges from the +// embedded content, ensuring go-install and dev-build users always get the +// version of the agent files that matches their binary. +func GetPath() (string, error) { + // Tier 1: bundled next to binary (release builds). + if dir := exeDir(); dir != "" { + bundled := filepath.Join(dir, "lib", "agent") + if isDir(bundled) { + return bundled, nil + } + } + + // Tier 2: extract from embedded FS to ~/.opentaint/agent/. + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("cannot determine home directory: %w", err) + } + extractDir := filepath.Join(home, ".opentaint", "agent") + markerPath := filepath.Join(extractDir, ".content-hash") + wantHash := contentHash() + + if !needsExtract(markerPath, wantHash) { + return extractDir, nil + } + + if err := extractEmbedded(extractDir, markerPath, wantHash); err != nil { + return "", fmt.Errorf("failed to extract agent files: %w", err) + } + return extractDir, nil +} + +// needsExtract reports whether the extracted copy is missing or stale. +func needsExtract(markerPath, wantHash string) bool { + data, err := os.ReadFile(markerPath) + if err != nil { + return true + } + return strings.TrimSpace(string(data)) != wantHash +} + +// extractEmbedded writes the embedded agent FS to destDir and writes the +// content-hash marker. +func extractEmbedded(destDir, markerPath, hash string) error { + // Remove stale tree (if any) and recreate. + if err := os.RemoveAll(destDir); err != nil { + return err + } + + err := fs.WalkDir(agentFS, "files", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + // Strip the "files" prefix so "files/skills/foo.md" → "skills/foo.md". + rel, _ := filepath.Rel("files", path) + target := filepath.Join(destDir, rel) + + if d.IsDir() { + return os.MkdirAll(target, 0o755) + } + data, err := agentFS.ReadFile(path) + if err != nil { + return err + } + return os.WriteFile(target, data, 0o644) + }) + if err != nil { + return err + } + + return os.WriteFile(markerPath, []byte(hash+"\n"), 0o644) +} + +// exeDir returns the directory of the current executable, resolved through symlinks. +func exeDir() string { + exe, err := os.Executable() + if err != nil { + return "" + } + exe, err = filepath.EvalSymlinks(exe) + if err != nil { + return "" + } + return filepath.Dir(exe) +} + +// isDir reports whether path exists and is a directory. +func isDir(path string) bool { + fi, err := os.Stat(path) + return err == nil && fi.IsDir() +} diff --git a/cli/internal/testutil/.gitignore b/cli/internal/testutil/.gitignore new file mode 100644 index 000000000..cc77386b1 --- /dev/null +++ b/cli/internal/testutil/.gitignore @@ -0,0 +1,2 @@ +# Generated by go:generate from core/opentaint-sast-test-util/build/libs/ +jar/ diff --git a/cli/internal/testutil/testutil.go b/cli/internal/testutil/testutil.go new file mode 100644 index 000000000..b6b759b0d --- /dev/null +++ b/cli/internal/testutil/testutil.go @@ -0,0 +1,69 @@ +// Package testutil embeds the opentaint-sast-test-util.jar and extracts it +// on demand to ~/.opentaint/test-util/ when no bundled copy is available. +package testutil + +import ( + "crypto/sha256" + _ "embed" + "encoding/hex" + "fmt" + "os" + "path/filepath" + "strings" +) + +//go:generate sh -c "mkdir -p jar && cp ../../../core/opentaint-sast-test-util/build/libs/opentaint-sast-test-util.jar jar/" + +//go:embed jar/opentaint-sast-test-util.jar +var jarData []byte + +// JarName is the filename of the test-util JAR. +const JarName = "opentaint-sast-test-util.jar" + +func contentHash() string { + h := sha256.Sum256(jarData) + return hex.EncodeToString(h[:]) +} + +// ExtractJar extracts the embedded test-util JAR to ~/.opentaint/test-util/ +// and returns the path to the extracted JAR. Uses a SHA-256 content hash +// marker for staleness detection so the extracted copy is refreshed when the +// binary is rebuilt with a newer JAR. +func ExtractJar() (string, error) { + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("cannot determine home directory: %w", err) + } + extractDir := filepath.Join(home, ".opentaint", "test-util") + extractPath := filepath.Join(extractDir, JarName) + markerPath := filepath.Join(extractDir, ".content-hash") + wantHash := contentHash() + + if !needsExtract(markerPath, wantHash) && fileExists(extractPath) { + return extractPath, nil + } + + if err := os.MkdirAll(extractDir, 0o755); err != nil { + return "", fmt.Errorf("create dir: %w", err) + } + if err := os.WriteFile(extractPath, jarData, 0o644); err != nil { + return "", fmt.Errorf("write JAR: %w", err) + } + if err := os.WriteFile(markerPath, []byte(wantHash+"\n"), 0o644); err != nil { + return "", fmt.Errorf("write marker: %w", err) + } + return extractPath, nil +} + +func needsExtract(markerPath, wantHash string) bool { + data, err := os.ReadFile(markerPath) + if err != nil { + return true + } + return strings.TrimSpace(string(data)) != wantHash +} + +func fileExists(path string) bool { + _, err := os.Stat(path) + return err == nil +} diff --git a/task.md b/task.md new file mode 100644 index 000000000..af654ee90 --- /dev/null +++ b/task.md @@ -0,0 +1,86 @@ +We have dataflow based SAST analyzer. + +We have multiple configuration points: +1. Patterns in `rules/ruleset`. Vulnerable patterns to search in analyzed code. We treat all such patterns as a patterns on a dataflow trace and match them similar to the taint rules. +2. Approximations in `core/opentaint-config/config`. Default dataflow propagators for common libraries. +3. Approximations in `core/opentaint-jvm-sast-dataflow/dataflow-approximations`. Complex code-based propagators for complex methods. +4. Framework support like in `core/src/main/kotlin/org/opentaint/jvm/sast/project/spring/SpringWebProject.kt`. Special handling of frameworks. + +We are trying to make all our rules and approximations customizable (e.g. via llm agent). Here are our requirements: +1. Agent should be able to generate patterns (1). We need to specialize all requirement to the pattern language +2. Agent should be able to debug and fix patterns (1). For example, fix FP or FN. +3. To work with FN, engine will return a list of external methods, where dataflow fact was killed +4. Agent should be able to generate approximations (2) and (3), mainly to fix FN +5. Approximations (2) must be hierarchical. For example, if we have rule for method `get*` and a rule for `getEntry`, the rule for `getEntry` must override the rule for `get*` +6. Approximations (3) always override (2) +7. Agent must be able to override (2) and (3) +8. Agent must be able to generate required missed approximations based on the list of external methods, where dataflow fact was killed +9. Frameworks support (4) provided as-is and is not configurable by the agent + +Expected agent workflow: +1. Agent takes path to the project +2. Agent builds project via autobuilder, or create project.yaml himself (via dedicated CLI API). +3. Agent search for entry points and potentially vulnerable places +4. Agent start working on security analysis. Each step and progress is tracked in `opentaint-analysis-plan.md` +5. Agent creates Rule +6. Agent creates Tests for the rule and verify rule works as expected. + - Take simple test sample project (like in rules/test but with few samples) + - Write tests for designed rule + - Run opentaint analyzer on the test like in .github/workflows/ci-rules.yaml (via dedicated CLI API) + - Fix rule or test and repeat +7. Agent runs opentaint with Rule on the project +8. Opentaint produces 2 files: + - Sarif with discovered vulnerabilities wrt Rule. Each vulnerability may contain multiple traces. + - List (yaml format) with external methods, where dataflow fact was killed +9. Agent decide between the following options: + - Fix FN according to missed external methods list. Each list entry contains info about method and fact position (pass rule from). Agent can generate more Approximations and override current, then rerun analysis. + - Fix FN in rule (non preferred option). Add more patters and tests into Rule. Then rerun analysis. + - Analyze trace. + a. If trace contains FP, try fix it via Rule (e.g. pattern-not). Update rule and test, then rerun analysis + b. If trace contains FP, try fix it via Approximations (non preferred option). Override approximation to remove impossible dataflow + c. If trace is TP, try to generate POC. Then save it to `vulnerabilities.md` +10. Steps 7-10 are repeated until agent decides that all vulnerabilities discovered. + +Analyze current analyzer impl and requirements and propose the following documents: +1. agent-mode/info/pattern-rules.md: design of all thing related to pattern rules +2. agent-mode/info/approximations-config.md: all things related to approximations +3. agent-mode/info/agent-pipeline.md: whole pipeline for the agent to work with rules, common scenarios + +Consider we have opentaint installed on PATH and want to use it from the agent via skills. +We need to design the following things: +1. Changes in the opentaint that are required to match expected agent workflow +2. All opentaint operations must be available via Go CLI (implemented in Go or proxied to the Analyzer CLI) + - Consider code-based approximations. Opentaint CLI must have an API to take approximation source code and compile it to further use it in the analysis. +3. Skills that can be used via agent. + - Skills must include all the required examples + - For the rule-test skill we must provide simple sample test project +4. Meta prompt to run agent wrt expected workflow using skills. + +Write all your findings into `agent-mode/design/agent-mode-design.md`. + +No we need to design the test pipeline. +Let's start with project `/home/sobol/data/Stirling-PDF/seqra-project/project.yaml` +1. We need to test various project build scenarios +2. Check that rule generations pipeline works +3. Check approximations (including code based) generation/override +4. Check external methods extraction + +Write all your findings into `agent-mode/test/agent-mode-test.md`. + +No we need to understand implementation details: +1. Which modules/files will be modified and how +2. Projects structure: where skills and meta-prompt will be located +3. How skills and meta prompt will be accessible via agent? How we distribute them and provide to the end user +4. How we can test implementation without CLI installation in PATH + +Write all your findings into `agent-mode/impl/agent-mode-impl.md`. + +OK, WE ARE HERE. We have all required design done. + +Now we can implement things. Use `agent-mode/plan.md` to track implementation process. +Track all tasks status. If you have a new task (e.g. fix failing test) add it to the plan before start working on it. +Use git to fix project state after each task completed. + +1. Make all planned changes into analyzer and CLI +2. Write skills and meta-prompt +3. Verify everything works using tests From a4977b0f5c650ec845b6aec7bc8494b650f4dc3f Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Fri, 8 May 2026 17:08:25 +0300 Subject: [PATCH 02/54] refactor(cli): rename agent command to dev, move debug flags into dev subcommands --- cli/cmd/agent.go | 16 -- cli/cmd/agent_prompt.go | 26 ---- cli/cmd/agent_skills.go | 26 ---- cli/cmd/dev.go | 16 ++ cli/cmd/dev_debug_fact_reachability.go | 32 ++++ cli/cmd/dev_debug_run_on_entry_points.go | 33 +++++ ...st_project.go => dev_init_test_project.go} | 6 +- ...{agent_rules_path.go => dev_rules_path.go} | 4 +- ...{agent_test_rules.go => dev_test_rules.go} | 14 +- cli/cmd/scan.go | 58 ++++---- cli/internal/agent/.gitignore | 2 - cli/internal/agent/agent.go | 140 ------------------ 12 files changed, 124 insertions(+), 249 deletions(-) delete mode 100644 cli/cmd/agent.go delete mode 100644 cli/cmd/agent_prompt.go delete mode 100644 cli/cmd/agent_skills.go create mode 100644 cli/cmd/dev.go create mode 100644 cli/cmd/dev_debug_fact_reachability.go create mode 100644 cli/cmd/dev_debug_run_on_entry_points.go rename cli/cmd/{agent_init_test_project.go => dev_init_test_project.go} (96%) rename cli/cmd/{agent_rules_path.go => dev_rules_path.go} (92%) rename cli/cmd/{agent_test_rules.go => dev_test_rules.go} (86%) delete mode 100644 cli/internal/agent/.gitignore delete mode 100644 cli/internal/agent/agent.go diff --git a/cli/cmd/agent.go b/cli/cmd/agent.go deleted file mode 100644 index f2be1e8ff..000000000 --- a/cli/cmd/agent.go +++ /dev/null @@ -1,16 +0,0 @@ -package cmd - -import ( - "github.com/spf13/cobra" -) - -// agentCmd represents the agent command group -var agentCmd = &cobra.Command{ - Use: "agent", - Short: "Agent mode utilities", - Long: `Commands for AI agent integration: locate skills, meta-prompt, rules, and run rule tests.`, -} - -func init() { - rootCmd.AddCommand(agentCmd) -} diff --git a/cli/cmd/agent_prompt.go b/cli/cmd/agent_prompt.go deleted file mode 100644 index 60ac47893..000000000 --- a/cli/cmd/agent_prompt.go +++ /dev/null @@ -1,26 +0,0 @@ -package cmd - -import ( - "fmt" - "path/filepath" - - "github.com/seqra/opentaint/internal/agent" - "github.com/spf13/cobra" -) - -var agentPromptCmd = &cobra.Command{ - Use: "prompt", - Short: "Print the path to the meta-prompt file", - Run: func(cmd *cobra.Command, args []string) { - agentPath, err := agent.GetPath() - if err != nil { - out.Fatalf("Error: %s", err) - } - promptPath := filepath.Join(agentPath, "meta-prompt.md") - fmt.Println(promptPath) - }, -} - -func init() { - agentCmd.AddCommand(agentPromptCmd) -} diff --git a/cli/cmd/agent_skills.go b/cli/cmd/agent_skills.go deleted file mode 100644 index 74689b847..000000000 --- a/cli/cmd/agent_skills.go +++ /dev/null @@ -1,26 +0,0 @@ -package cmd - -import ( - "fmt" - "path/filepath" - - "github.com/seqra/opentaint/internal/agent" - "github.com/spf13/cobra" -) - -var agentSkillsCmd = &cobra.Command{ - Use: "skills", - Short: "Print the path to the skills directory", - Run: func(cmd *cobra.Command, args []string) { - agentPath, err := agent.GetPath() - if err != nil { - out.Fatalf("Error: %s", err) - } - skillsPath := filepath.Join(agentPath, "skills") - fmt.Println(skillsPath) - }, -} - -func init() { - agentCmd.AddCommand(agentSkillsCmd) -} diff --git a/cli/cmd/dev.go b/cli/cmd/dev.go new file mode 100644 index 000000000..24a165fb6 --- /dev/null +++ b/cli/cmd/dev.go @@ -0,0 +1,16 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +// devCmd represents the dev command group +var devCmd = &cobra.Command{ + Use: "dev", + Short: "Create and debug rules (experimental)", + Long: `This command provides utilities for rule authoring and debugging (experimental)`, +} + +func init() { + rootCmd.AddCommand(devCmd) +} diff --git a/cli/cmd/dev_debug_fact_reachability.go b/cli/cmd/dev_debug_fact_reachability.go new file mode 100644 index 000000000..40df4b2ae --- /dev/null +++ b/cli/cmd/dev_debug_fact_reachability.go @@ -0,0 +1,32 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +var devDebugFactReachabilityCmd = &cobra.Command{ + Use: "debug-fact-reachability [source-path]", + Short: "Generate SARIF with fact reachability info for a single rule", + Args: cobra.RangeArgs(1, 2), + Long: `This command scans the project for one rule and writes a sibling SARIF report with fact-reachability info to debug why the rule does or does not fire + +Arguments: + rule-id - Full rule ID, e.g. security/SqlInjection.yaml:tainted-sql-from-http (required) + source-path - Path to the project sources (default: current directory) + +The fact-reachability report is written next to the main SARIF as debug-ifds-fact-reachability.sarif. + +Use --project-model to scan a pre-compiled project model instead of compiling from sources. +`, + Annotations: map[string]string{"PrintConfig": "true"}, + Run: func(cmd *cobra.Command, args []string) { + RuleID = []string{args[0]} + DebugFactReachabilitySarif = true + scanCmd.Run(scanCmd, args[1:]) + }, +} + +func init() { + devCmd.AddCommand(devDebugFactReachabilityCmd) + addScanFlags(devDebugFactReachabilityCmd) +} diff --git a/cli/cmd/dev_debug_run_on_entry_points.go b/cli/cmd/dev_debug_run_on_entry_points.go new file mode 100644 index 000000000..7043cc827 --- /dev/null +++ b/cli/cmd/dev_debug_run_on_entry_points.go @@ -0,0 +1,33 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +var devDebugRunOnEntryPointsCmd = &cobra.Command{ + Use: "debug-run-on-entry-points [source-path]", + Short: "Run analysis on selected entry points", + Args: cobra.RangeArgs(1, 2), + Long: `This command scans the project starting only from the given entry point, useful for narrowing analysis while debugging a rule + +Arguments: + entry-point - '*' for all methods or method FQN like com.example.Class#method (required) + source-path - Path to the project sources (default: current directory) + +Note: this command is ignored on Spring projects + +Use --project-model to scan a pre-compiled project model instead of compiling from sources. +`, + Annotations: map[string]string{"PrintConfig": "true"}, + Run: func(cmd *cobra.Command, args []string) { + out.Warn("entry-point override has no effect on Spring projects") + DebugRunAnalysisOnSelectedEntryPoints = args[0] + scanCmd.Run(scanCmd, args[1:]) + }, +} + +func init() { + devCmd.AddCommand(devDebugRunOnEntryPointsCmd) + addScanFlags(devDebugRunOnEntryPointsCmd) + addRuleIDFlag(devDebugRunOnEntryPointsCmd) +} diff --git a/cli/cmd/agent_init_test_project.go b/cli/cmd/dev_init_test_project.go similarity index 96% rename from cli/cmd/agent_init_test_project.go rename to cli/cmd/dev_init_test_project.go index ae0b675c2..d833f3e7f 100644 --- a/cli/cmd/agent_init_test_project.go +++ b/cli/cmd/dev_init_test_project.go @@ -14,7 +14,7 @@ import ( var initTestProjectDeps []string -var agentInitTestProjectCmd = &cobra.Command{ +var devInitTestProjectCmd = &cobra.Command{ Use: "init-test-project ", Short: "Bootstrap a rule test project with build.gradle.kts and test utility JAR", Long: `Creates a minimal Gradle project structure for testing OpenTaint rules. @@ -66,8 +66,8 @@ Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, } func init() { - agentCmd.AddCommand(agentInitTestProjectCmd) - agentInitTestProjectCmd.Flags().StringArrayVar(&initTestProjectDeps, "dependency", nil, + devCmd.AddCommand(devInitTestProjectCmd) + devInitTestProjectCmd.Flags().StringArrayVar(&initTestProjectDeps, "dependency", nil, "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") } diff --git a/cli/cmd/agent_rules_path.go b/cli/cmd/dev_rules_path.go similarity index 92% rename from cli/cmd/agent_rules_path.go rename to cli/cmd/dev_rules_path.go index ba30ae055..5bf962066 100644 --- a/cli/cmd/agent_rules_path.go +++ b/cli/cmd/dev_rules_path.go @@ -9,7 +9,7 @@ import ( "github.com/spf13/cobra" ) -var agentRulesPathCmd = &cobra.Command{ +var devRulesPathCmd = &cobra.Command{ Use: "rules-path", Short: "Print the path to the builtin rules directory (downloads on demand)", Run: func(cmd *cobra.Command, args []string) { @@ -37,5 +37,5 @@ var agentRulesPathCmd = &cobra.Command{ } func init() { - agentCmd.AddCommand(agentRulesPathCmd) + devCmd.AddCommand(devRulesPathCmd) } diff --git a/cli/cmd/agent_test_rules.go b/cli/cmd/dev_test_rules.go similarity index 86% rename from cli/cmd/agent_test_rules.go rename to cli/cmd/dev_test_rules.go index 0beca9fd2..c0e762b35 100644 --- a/cli/cmd/agent_test_rules.go +++ b/cli/cmd/dev_test_rules.go @@ -21,7 +21,7 @@ var ( testRulesRuleID []string ) -var agentTestRulesCmd = &cobra.Command{ +var devTestRulesCmd = &cobra.Command{ Use: "test-rules ", Short: "Run rule tests against annotated test samples", Long: `Run rule tests against annotated test samples in the given project model. @@ -143,11 +143,11 @@ Exit codes: } func init() { - agentCmd.AddCommand(agentTestRulesCmd) + devCmd.AddCommand(devTestRulesCmd) - agentTestRulesCmd.Flags().StringArrayVar(&testRulesRuleset, "ruleset", nil, "Additional ruleset path (repeatable)") - agentTestRulesCmd.Flags().StringVarP(&testRulesOutputDir, "output", "o", "", "Output directory for test results (test-result.json)") - agentTestRulesCmd.Flags().DurationVar(&testRulesTimeout, "timeout", 600*time.Second, "Timeout for analysis") - agentTestRulesCmd.Flags().StringVar(&testRulesMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") - agentTestRulesCmd.Flags().StringArrayVar(&testRulesRuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") + devTestRulesCmd.Flags().StringArrayVar(&testRulesRuleset, "ruleset", nil, "Additional ruleset path (repeatable)") + devTestRulesCmd.Flags().StringVarP(&testRulesOutputDir, "output", "o", "", "Output directory for test results (test-result.json)") + devTestRulesCmd.Flags().DurationVar(&testRulesTimeout, "timeout", 600*time.Second, "Timeout for analysis") + devTestRulesCmd.Flags().StringVar(&testRulesMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") + devTestRulesCmd.Flags().StringArrayVar(&testRulesRuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") } diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 9ed9a3372..b50a67db4 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -112,41 +112,45 @@ Use --project-model to scan a pre-compiled project model instead of compiling fr func init() { rootCmd.AddCommand(scanCmd) + addScanFlags(scanCmd) + addRuleIDFlag(scanCmd) +} - scanCmd.Flags().DurationVarP(&globals.Config.Scan.Timeout, "timeout", "t", 900*time.Second, "Timeout for analysis") - _ = viper.BindPFlag("scan.timeout", scanCmd.Flags().Lookup("timeout")) - - scanCmd.Flags().StringArrayVar(&Ruleset, "ruleset", []string{"builtin"}, "YAML rules file, directory of YAML rules files ending in .yml or .yaml, or `builtin` to scan with built-in rules") - _ = viper.BindPFlag("scan.ruleset", scanCmd.Flags().Lookup("ruleset")) +// addRuleIDFlag registers the --rule-id flag. Split out from addScanFlags so +// that `dev debug-fact-reachability` can omit it (it takes the rule ID +// positionally and supports only one rule at a time). +func addRuleIDFlag(cmd *cobra.Command) { + cmd.Flags().StringArrayVar(&RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") +} - scanCmd.Flags().BoolVar(&SemgrepCompatibilitySarif, "semgrep-compatibility-sarif", true, "Use Semgrep compatible ruleId") - scanCmd.Flags().StringVarP(&SarifReportPath, "output", "o", "", "Path to the SARIF-report output file") +func addScanFlags(cmd *cobra.Command) { + cmd.Flags().DurationVarP(&globals.Config.Scan.Timeout, "timeout", "t", 900*time.Second, "Timeout for analysis") + _ = viper.BindPFlag("scan.timeout", cmd.Flags().Lookup("timeout")) - scanCmd.Flags().StringArrayVar(&Severity, "severity", []string{"warning", "error"}, "Report findings only from rules matching the supplied severity level. By default only warning and error rules are run (note, warning, error)") - scanCmd.Flags().StringVar(&globals.Config.Scan.MaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 1024m, 8G, 81920k, 83886080)") - _ = viper.BindPFlag("scan.max_memory", scanCmd.Flags().Lookup("max-memory")) - scanCmd.Flags().Int64Var(&globals.Config.Scan.CodeFlowLimit, "code-flow-limit", 0, "Maximum number of code flows to include in the report (0 = unlimited)") - _ = viper.BindPFlag("scan.code_flow_limit", scanCmd.Flags().Lookup("code-flow-limit")) - scanCmd.Flags().BoolVar(&DryRunScan, "dry-run", false, "Validate inputs and show what would run without compiling or scanning") - scanCmd.Flags().BoolVar(&Recompile, "recompile", false, "Force recompilation even if a cached project model exists") - scanCmd.Flags().StringVar(&ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)") - scanCmd.Flags().StringVar(&ScanLogFile, "log-file", "", "Path to the log file (default: /logs/.log)") - scanCmd.Flags().StringArrayVar(&RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") + cmd.Flags().StringArrayVar(&Ruleset, "ruleset", []string{"builtin"}, "YAML rules file, directory of YAML rules files ending in .yml or .yaml, or `builtin` to scan with built-in rules") + _ = viper.BindPFlag("scan.ruleset", cmd.Flags().Lookup("ruleset")) - scanCmd.Flags().StringArrayVar(&ApproximationsConfig, "approximations-config", nil, "YAML passThrough approximations config (OVERRIDE mode, repeatable)") - _ = scanCmd.Flags().MarkHidden("approximations-config") + cmd.Flags().BoolVar(&SemgrepCompatibilitySarif, "semgrep-compatibility-sarif", true, "Use Semgrep compatible ruleId") + cmd.Flags().StringVarP(&SarifReportPath, "output", "o", "", "Path to the SARIF-report output file") - scanCmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Directory of compiled approximation class files (repeatable)") - _ = scanCmd.Flags().MarkHidden("dataflow-approximations") + cmd.Flags().StringArrayVar(&Severity, "severity", []string{"warning", "error"}, "Report findings only from rules matching the supplied severity level. By default only warning and error rules are run (note, warning, error)") + cmd.Flags().StringVar(&globals.Config.Scan.MaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 1024m, 8G, 81920k, 83886080)") + _ = viper.BindPFlag("scan.max_memory", cmd.Flags().Lookup("max-memory")) + cmd.Flags().Int64Var(&globals.Config.Scan.CodeFlowLimit, "code-flow-limit", 0, "Maximum number of code flows to include in the report (0 = unlimited)") + _ = viper.BindPFlag("scan.code_flow_limit", cmd.Flags().Lookup("code-flow-limit")) + cmd.Flags().BoolVar(&DryRunScan, "dry-run", false, "Validate inputs and show what would run without compiling or scanning") + cmd.Flags().BoolVar(&Recompile, "recompile", false, "Force recompilation even if a cached project model exists") + cmd.Flags().StringVar(&ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)") + cmd.Flags().StringVar(&ScanLogFile, "log-file", "", "Path to the log file (default: /logs/.log)") - scanCmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write external-methods-{without,with}-rules.yaml next to the SARIF report") - _ = scanCmd.Flags().MarkHidden("track-external-methods") + cmd.Flags().StringArrayVar(&ApproximationsConfig, "approximations-config", nil, "YAML passThrough approximations config (OVERRIDE mode, repeatable)") + _ = cmd.Flags().MarkHidden("approximations-config") - scanCmd.Flags().BoolVar(&DebugFactReachabilitySarif, "debug-fact-reachability-sarif", false, "Generate SARIF with fact reachability info (debug; use with a single rule only)") - _ = scanCmd.Flags().MarkHidden("debug-fact-reachability-sarif") + cmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Directory of compiled approximation class files (repeatable)") + _ = cmd.Flags().MarkHidden("dataflow-approximations") - scanCmd.Flags().StringVar(&DebugRunAnalysisOnSelectedEntryPoints, "debug-run-analysis-on-selected-entry-points", "", "Run analysis on selected entry points: '*' for all methods or method FQN like com.example.Class#method") - _ = scanCmd.Flags().MarkHidden("debug-run-analysis-on-selected-entry-points") + cmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write external-methods-{without,with}-rules.yaml next to the SARIF report") + _ = cmd.Flags().MarkHidden("track-external-methods") } // currentScanBuilder returns a builder pre-populated with the user's current scan flags. diff --git a/cli/internal/agent/.gitignore b/cli/internal/agent/.gitignore deleted file mode 100644 index 2a5b166fd..000000000 --- a/cli/internal/agent/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# Generated by go:generate from ../../agent/ -files/ diff --git a/cli/internal/agent/agent.go b/cli/internal/agent/agent.go deleted file mode 100644 index 002bef68b..000000000 --- a/cli/internal/agent/agent.go +++ /dev/null @@ -1,140 +0,0 @@ -// Package agent embeds the agent skill files and meta-prompt, and extracts -// them on demand to ~/.opentaint/agent/ when no bundled copy is available. -package agent - -import ( - "crypto/sha256" - "embed" - "encoding/hex" - "fmt" - "io/fs" - "os" - "path/filepath" - "sort" - "strings" -) - -//go:generate sh -c "rm -rf files && cp -r ../../../agent files" - -//go:embed files -var agentFS embed.FS - -// contentHash returns a deterministic SHA-256 hash of all embedded files. -// Used as a staleness marker for the extracted copy. -func contentHash() string { - h := sha256.New() - // Walk in sorted order for determinism. - var paths []string - fs.WalkDir(agentFS, ".", func(path string, d fs.DirEntry, err error) error { - if err != nil || d.IsDir() { - return err - } - paths = append(paths, path) - return nil - }) - sort.Strings(paths) - for _, p := range paths { - data, _ := agentFS.ReadFile(p) - h.Write([]byte(p)) - h.Write(data) - } - return hex.EncodeToString(h.Sum(nil)) -} - -// GetPath returns a filesystem path to the agent directory containing -// meta-prompt.md and skills/. -// -// Resolution order: -// 1. Bundled: /lib/agent/ (release archives place files here) -// 2. Extracted: ~/.opentaint/agent/ (populated from embedded FS on demand) -// -// The extracted copy is refreshed when its hash marker diverges from the -// embedded content, ensuring go-install and dev-build users always get the -// version of the agent files that matches their binary. -func GetPath() (string, error) { - // Tier 1: bundled next to binary (release builds). - if dir := exeDir(); dir != "" { - bundled := filepath.Join(dir, "lib", "agent") - if isDir(bundled) { - return bundled, nil - } - } - - // Tier 2: extract from embedded FS to ~/.opentaint/agent/. - home, err := os.UserHomeDir() - if err != nil { - return "", fmt.Errorf("cannot determine home directory: %w", err) - } - extractDir := filepath.Join(home, ".opentaint", "agent") - markerPath := filepath.Join(extractDir, ".content-hash") - wantHash := contentHash() - - if !needsExtract(markerPath, wantHash) { - return extractDir, nil - } - - if err := extractEmbedded(extractDir, markerPath, wantHash); err != nil { - return "", fmt.Errorf("failed to extract agent files: %w", err) - } - return extractDir, nil -} - -// needsExtract reports whether the extracted copy is missing or stale. -func needsExtract(markerPath, wantHash string) bool { - data, err := os.ReadFile(markerPath) - if err != nil { - return true - } - return strings.TrimSpace(string(data)) != wantHash -} - -// extractEmbedded writes the embedded agent FS to destDir and writes the -// content-hash marker. -func extractEmbedded(destDir, markerPath, hash string) error { - // Remove stale tree (if any) and recreate. - if err := os.RemoveAll(destDir); err != nil { - return err - } - - err := fs.WalkDir(agentFS, "files", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - // Strip the "files" prefix so "files/skills/foo.md" → "skills/foo.md". - rel, _ := filepath.Rel("files", path) - target := filepath.Join(destDir, rel) - - if d.IsDir() { - return os.MkdirAll(target, 0o755) - } - data, err := agentFS.ReadFile(path) - if err != nil { - return err - } - return os.WriteFile(target, data, 0o644) - }) - if err != nil { - return err - } - - return os.WriteFile(markerPath, []byte(hash+"\n"), 0o644) -} - -// exeDir returns the directory of the current executable, resolved through symlinks. -func exeDir() string { - exe, err := os.Executable() - if err != nil { - return "" - } - exe, err = filepath.EvalSymlinks(exe) - if err != nil { - return "" - } - return filepath.Dir(exe) -} - -// isDir reports whether path exists and is a directory. -func isDir(path string) bool { - fi, err := os.Stat(path) - return err == nil && fi.IsDir() -} From a4814a53d96e7d02bb3f6bb5b1c40be6fa20526e Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Fri, 8 May 2026 19:56:30 +0300 Subject: [PATCH 03/54] refactor(skills): fix skills formatting, typos, other small issues --- agent/meta-prompt.md | 119 ------------ agent/skills/debug-rule-reachability.md | 60 ------- agent/skills/generate-poc.md | 83 --------- agent/skills/opentaint-issue-investigation.md | 169 ------------------ agent/skills/run-analysis.md | 94 ---------- .../analyze-findings/SKILL.md | 26 ++- .../build-project/SKILL.md | 23 ++- .../create-approximation/SKILL.md | 52 +++--- .../create-rule/SKILL.md | 37 ++-- .../create-yaml-config/SKILL.md | 38 ++-- skills/debug-rule-reachability/SKILL.md | 70 ++++++++ .../discover-entry-points/SKILL.md | 17 +- skills/generate-poc/SKILL.md | 87 +++++++++ skills/opentaint-agent/SKILL.md | 166 +++++++++++++++++ skills/opentaint-issue-investigation/SKILL.md | 127 +++++++++++++ skills/run-analysis/SKILL.md | 98 ++++++++++ .../test-rule.md => skills/test-rule/SKILL.md | 45 +++-- 17 files changed, 684 insertions(+), 627 deletions(-) delete mode 100644 agent/meta-prompt.md delete mode 100644 agent/skills/debug-rule-reachability.md delete mode 100644 agent/skills/generate-poc.md delete mode 100644 agent/skills/opentaint-issue-investigation.md delete mode 100644 agent/skills/run-analysis.md rename agent/skills/analyze-findings.md => skills/analyze-findings/SKILL.md (72%) rename agent/skills/build-project.md => skills/build-project/SKILL.md (76%) rename agent/skills/create-approximation.md => skills/create-approximation/SKILL.md (65%) rename agent/skills/create-rule.md => skills/create-rule/SKILL.md (74%) rename agent/skills/create-yaml-config.md => skills/create-yaml-config/SKILL.md (68%) create mode 100644 skills/debug-rule-reachability/SKILL.md rename agent/skills/discover-entry-points.md => skills/discover-entry-points/SKILL.md (75%) create mode 100644 skills/generate-poc/SKILL.md create mode 100644 skills/opentaint-agent/SKILL.md create mode 100644 skills/opentaint-issue-investigation/SKILL.md create mode 100644 skills/run-analysis/SKILL.md rename agent/skills/test-rule.md => skills/test-rule/SKILL.md (79%) diff --git a/agent/meta-prompt.md b/agent/meta-prompt.md deleted file mode 100644 index 5627650f1..000000000 --- a/agent/meta-prompt.md +++ /dev/null @@ -1,119 +0,0 @@ -# OpenTaint Agent -- Meta Prompt - -You are an AI security analyst using OpenTaint, a dataflow-based SAST analyzer for JVM projects. Your goal is to find real vulnerabilities by iteratively creating rules, running analysis, and refining results. - -## Setup - -1. Run `opentaint agent skills` to get the skills directory path -2. Run `opentaint agent prompt` to get this file's path -3. Run `opentaint agent rules-path` to get the built-in rules directory -4. Read individual skill files as needed during each phase - -## Workflow - -Execute these four phases in order. Iterate phases 2-4 until the external methods list stabilizes and all findings are classified. - -### Phase 1: Project Setup - -1. **Build the project** (read `build-project.md`) - - Produce `./opentaint-project/project.yaml` -2. **Discover entry points** (read `discover-entry-points.md`) - - Identify attack surface, data sources, vulnerability classes - - Write `opentaint-analysis-plan.md` - -### Phase 2: Rule Creation - -1. **Check built-in rules** -- read rules in `$(opentaint agent rules-path)` -2. **Create rules** for uncovered vulnerability classes (read `create-rule.md`) - - Library rules in `agent-rules/java/lib/` - - Security rules in `agent-rules/java/security/` -3. **Test rules** (read `test-rule.md`) - - Create annotated test samples with `@PositiveRuleSample` / `@NegativeRuleSample` - - Fix until all tests pass - -### Phase 3: Analysis - -1. **Run analysis** (read `run-analysis.md`). Always pass a pre-compiled model via - `--project-model`, and use full rule IDs of the form `.yaml:`: - ```bash - opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id java/security/.yaml: \ - --track-external-methods - ``` -2. Collect `results/report.sarif`, and next to it the fixed-name files - `results/external-methods-without-rules.yaml` (taint-killing methods) and - `results/external-methods-with-rules.yaml` (already modeled). The - `--track-external-methods` flag is a boolean; the filenames and location are - fixed by the analyzer. - -### Phase 4: Results Interpretation and Iteration - -1. **Analyze findings** (read `analyze-findings.md`) - - Classify each SARIF finding as TP, FP (rule fix), or FP (approximation fix) - - Read `external-methods-without-rules.yaml` for FN discovery (these are the methods that kill taint) - -2. **For true positives**: Generate PoC (read `generate-poc.md`), document in `vulnerabilities.md` - -3. **For false positives**: Fix rules with `pattern-not`/`pattern-sanitizers`, update tests, re-run - -4. **For false negatives** (from external methods): - - Simple propagation -> YAML config (read `create-yaml-config.md`) - - Lambda/callback methods -> Code approximation (read `create-approximation.md`) - -5. **Re-run analysis** with updated rules/config/approximations - -6. **Stop when**: - - External methods list stabilizes - - All findings classified - - High-priority vulnerabilities have PoCs - -## Working Directory Layout - -``` -/ - opentaint-analysis-plan.md - vulnerabilities.md - opentaint-project/ # Built project model - agent-rules/ # Custom rules - java/lib/ - java/security/ - agent-config/ # YAML passThrough config - custom-propagators.yaml - agent-approximations/ - src/ # Java sources (auto-compiled by the CLI) - agent-test-project/ # Rule test project - results/ - report.sarif - external-methods-without-rules.yaml # written next to report.sarif - external-methods-with-rules.yaml -``` - -## Decision Guide - -| Situation | Action | Skill | -|-----------|--------|-------| -| Need new vulnerability detection | Create join-mode rule | create-rule | -| FP: over-broad pattern | Add pattern-not/sanitizers | create-rule | -| FN: library method kills taint | Add YAML passThrough | create-yaml-config | -| FN: lambda/callback method | Code-based approximation | create-approximation | -| Confirmed vulnerability | Generate PoC | generate-poc | - -## Note: Suspected Engine Issues - -If a rule that should fire keeps missing (or firing spuriously) even though the rule -tests pass and `external-methods-without-rules.yaml` has no methods on the relevant -path, read `opentaint-issue-investigation.md`. It walks through building a minimal -rule-test reproducer, ruling out library-model gaps, pinpointing the instruction where -IFDS drops the fact via `--debug-fact-reachability-sarif`, and writing a short report. - -## Key Constraints - -- Approximations (YAML and code-based) apply ONLY to external methods -- library classes without source code -- `--approximations-config` uses OVERRIDE mode, not extend; it is repeatable -- every occurrence is OVERRIDE-merged -- `--rule-id` takes the FULL rule ID: `.yaml:` (e.g. `java/security/my-vuln.yaml:my-vulnerability`) -- `--rule-id` drops every rule whose ID is not in the filter, including library rules referenced via `refs`. List every rule you need explicitly. -- `--track-external-methods` is a boolean; files are always written as `/external-methods-{without,with}-rules.yaml` -- Duplicate approximation targeting the same class as a built-in = error -- Each rule must have test coverage before running on the real project diff --git a/agent/skills/debug-rule-reachability.md b/agent/skills/debug-rule-reachability.md deleted file mode 100644 index 695b3f4b9..000000000 --- a/agent/skills/debug-rule-reachability.md +++ /dev/null @@ -1,60 +0,0 @@ -# Skill: Debug Rule Reachability - -Generate a fact reachability SARIF report to debug why a specific rule does (or doesn't) reach certain taint sinks. - -## Prerequisites - -- Project built (build-project skill) -- Rule created and tested (create-rule, test-rule skills) - -## ⚠️ CRITICAL: Single Rule Only - -**You MUST run the analyzer with exactly ONE rule** via a single `--rule-id` flag. Running fact reachability across multiple rules will produce an enormously huge SARIF report that is effectively unusable. - -## Procedure - -### Run analysis with fact reachability debugging - -```bash -opentaint scan --project-model ./opentaint-project \ - -o ./results/fact-reachability.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --debug-fact-reachability-sarif -``` - -The `--rule-id` flag requires the **full rule ID** in the format `:`. -Example: for a rule file at `agent-rules/java/security/my-vuln.yaml` with `id: my-vulnerability`, -the full ID is `java/security/my-vuln.yaml:my-vulnerability`. - -### View results - -```bash -opentaint summary ./results/fact-reachability.sarif --show-findings -``` - -## Key Flags - -| Flag | Purpose | -|------|---------| -| `--debug-fact-reachability-sarif` | Enable fact reachability SARIF output | -| `--rule-id` | **Exactly one** rule ID (format: `:`) | -| `--ruleset` | Rule directory (repeatable). Use `builtin` for built-in rules | -| `--timeout` | Analysis timeout (default 900s) | - -## Outputs - -The debug fact reachability report is **not** the main SARIF file specified by `-o`. The analyzer writes it as a **separate file** named `debug-ifds-fact-reachability.sarif` in the same output directory as the main report. - -For example, with `-o ./results/report.sarif`: - -- **`./results/report.sarif`** — Main vulnerability findings -- **`./results/debug-ifds-fact-reachability.sarif`** — Debug fact reachability report - -Always check the output directory (`-o` parent) for this file. - -## Notes - -- This is a debug-only option intended for troubleshooting rule coverage -- Pre-compiled project models are passed via `--project-model `, not as a positional argument -- `--rule-id` drops every rule whose full ID is not listed, **including** library rules referenced via join-mode `refs`; list each library rule explicitly if you need refs resolved diff --git a/agent/skills/generate-poc.md b/agent/skills/generate-poc.md deleted file mode 100644 index 63118960e..000000000 --- a/agent/skills/generate-poc.md +++ /dev/null @@ -1,83 +0,0 @@ -# Skill: Generate PoC - -Generate a proof-of-concept exploit for a confirmed true positive vulnerability. - -## Prerequisites - -- A finding classified as TRUE POSITIVE (analyze-findings skill) -- SARIF trace read and understood - -## Procedure - -### 1. Extract vulnerability trace from SARIF - -- **Source**: Entry point + parameter (`codeFlows[0].threadFlows[0].locations[0]`) -- **Path**: Intermediate method calls -- **Sink**: Dangerous operation (`codeFlows[0].threadFlows[0].locations[-1]`) - -### 2. Construct PoC by vulnerability type - -**SQL Injection**: Input that extracts data or bypasses auth -```bash -curl "http://target:8080/api/users?id=1' OR '1'='1" -``` - -**Command Injection**: Input that executes arbitrary commands -```bash -curl "http://target:8080/api/process?cmd=;cat /etc/passwd" -``` - -**Path Traversal**: Input that accesses unauthorized files -```bash -curl "http://target:8080/api/files?path=../../../etc/passwd" -``` - -**XSS**: Input that executes JavaScript -```bash -curl "http://target:8080/api/search?q=" -``` - -**SSRF**: Input that makes the server request internal resources -```bash -curl "http://target:8080/api/fetch?url=http://169.254.169.254/latest/meta-data/" -``` - -**XXE**: XML input that reads files -```bash -curl -X POST "http://target:8080/api/parse" \ - -H "Content-Type: application/xml" \ - -d ']>&xxe;' -``` - -### 3. Document the finding - -```markdown -## VULN-001: SQL Injection in UserController.getUser - -**Severity**: Critical (CWE-89) -**Location**: `src/main/java/com/example/controller/UserController.java:45` -**Rule**: `my-vulnerability` - -### Description -User-controlled input from HTTP parameter `id` flows unsanitized into -a SQL query via `Statement.executeQuery()`. - -### Trace -1. **Source**: `UserController.getUser()` -- `request.getParameter("id")` (line 42) -2. **Flow**: String concatenation `"SELECT * FROM users WHERE id = " + input` (line 44) -3. **Sink**: `Statement.executeQuery(query)` (line 45) - -### Proof of Concept -\``` -curl "http://target:8080/api/users/1' OR '1'='1" -\``` - -### Remediation -Use parameterized queries: -\```java -PreparedStatement pstmt = conn.prepareStatement("SELECT * FROM users WHERE id = ?"); -pstmt.setString(1, input); -\``` -``` - -Write to `vulnerabilities.md` in the working directory. diff --git a/agent/skills/opentaint-issue-investigation.md b/agent/skills/opentaint-issue-investigation.md deleted file mode 100644 index 6e820e2e2..000000000 --- a/agent/skills/opentaint-issue-investigation.md +++ /dev/null @@ -1,169 +0,0 @@ -# Skill: OpenTaint Issue Investigation - -Investigate and confirm an issue in the OpenTaint analysis engine — a case where a rule -that should fire does not (or fires where it should not), and the cause is **not** the -rule's syntax or the library modeling, but the engine itself (e.g. an intra/inter-procedural -dataflow path that is cut unexpectedly). - -The deliverable is a small, self-contained reproducer plus a short write-up that points at -the exact instruction where the dataflow dies. - -## When to use this skill - -Use it after `analyze-findings` / `create-yaml-config` / `create-approximation` have been -exhausted and a finding is still missing (or spurious), even though: - -- The rule passes its own tests on isolated samples. -- `external-methods-without-rules.yaml` is empty (or irrelevant) for the relevant code path. -- Nothing about the library model is obviously wrong. - -If any of those is not true, stop and go fix the rule / add the approximation first. An -"engine issue" report is only credible once the trivial causes have been ruled out. - -## Prerequisites - -- Working rule with passing tests (`create-rule`, `test-rule`). -- Baseline scan has been run (`run-analysis`). -- `analyze-findings` has been consulted; the remaining failure is not explained by - `external-methods-without-rules.yaml`. - -## Procedure - -### 1. Build a minimal rule-test reproducer - -Shrink the original code to the smallest sample that still reproduces the problem, and put -it in a rule-test project (read `test-rule.md`). - -Choose the project shape based on what the real code needs: - -- **Plain method-level sample** — works for rules where the tainted flow stays inside one - method or crosses only ordinary Java calls. One class under `src/main/java/test/` with a - single `@PositiveRuleSample` (expected trigger) or `@NegativeRuleSample` (expected no - trigger) is enough. -- **Spring-app sub-project** — required whenever the real flow enters through a Spring - `@Controller`, uses Spring beans, or depends on dispatcher wiring. Create a dedicated - `spring-app-tests/` module with exactly one sample annotation, as described in - `test-rule.md` under *Testing Spring-app rules*. Positive and negative cases go in - separate sub-projects (e.g. `xss-spring-test-positive`, `xss-spring-test-negative`). - -Keep the sample as small as possible: remove every statement that is not needed to carry -taint from source to sink. A small reproducer is what makes the rest of the investigation -tractable — and it is what ships in the bug report. - -### 2. Confirm the issue reproduces on the test project - -Compile the test project and run the rule tests: - -```bash -opentaint compile ./agent-test-project -o ./agent-test-compiled -opentaint agent test-rules ./agent-test-compiled \ - -o ./agent-test-results \ - --ruleset builtin --ruleset ./agent-rules -``` - -Inspect `./agent-test-results/test-result.json`: - -- A `@PositiveRuleSample` that ends up in `falseNegative` reproduces a missed-detection - engine issue. -- A `@NegativeRuleSample` that ends up in `falsePositive` reproduces a spurious-detection - engine issue. -- `skipped` / `disabled` mean the rule was not actually exercised — fix the annotation - `value`/`id` or enable the rule before going further. -- `success` means the issue does **not** reproduce. Either the sample is too reduced, or - something in the original project (not in the sample) is what triggers the problem. Go - back to step 1 and add back the minimum context. - -Do not proceed until the test result matches the bug you are trying to document. - -### 3. Rule out missed external-method models - -Re-run the test with external-method tracking and read the two lists next to the SARIF -(read `analyze-findings.md`, §3): - -```bash -opentaint scan --project-model ./agent-test-compiled \ - -o ./agent-test-results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id .yaml: \ - --track-external-methods -``` - -Open `agent-test-results/external-methods-without-rules.yaml`. For every method that sits -on the source→sink path in your sample: - -- Simple propagator (getter/collection/builder) → add a YAML `passThrough` - (read `create-yaml-config.md`). -- Lambda/callback/async → add a code-based approximation - (read `create-approximation.md`). - -Re-run until that file contains **no methods on the relevant path**. Only then is it -legitimate to call the remaining failure an engine issue — otherwise you are just looking -at a missing library model. - -### 4. Locate where the dataflow dies - -Use the fact reachability debug SARIF to see exactly how far the taint travels (read -`debug-rule-reachability.md`). Run with a single `--rule-id`: - -```bash -opentaint scan --project-model ./agent-test-compiled \ - -o ./agent-test-results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id .yaml: \ - --debug-fact-reachability-sarif -``` - -Inspect `agent-test-results/debug-ifds-fact-reachability.sarif`. For a missed detection: - -1. Confirm the **source is matched** — at least one fact is reported at the source - location. If it is not, the problem is in the rule's `pattern-sources`, not the engine. -2. Walk the reachable facts along the expected path. Note the **last instruction that - still carries the fact** and the **first instruction where it is gone**. That gap is - where the engine drops the dataflow. -3. Check that the drop happens at an instruction that is **not relevant to the rule** — - e.g. a plain local assignment, a trivial method call with a modelled pass-through, a - cast, a field read. If the drop is at something the rule should handle (a recognised - sanitizer, a sink variant the rule was not written to match, etc.), the issue is still - in the rule, not in the engine. - -For a spurious detection, do the symmetric check: find the instruction where the fact -appears even though no tainted input reaches it. - -### 5. Write the investigation report - -Produce a short Markdown note (e.g. `issues/.md`) with: - -- **Reproducer** — path to the rule-test sub-project, the exact - `opentaint agent test-rules` command, and the relevant snippet from `test-result.json`. -- **Rule** — full rule ID (`.yaml:`) and the ruleset it came - from (`builtin` or `./agent-rules`). -- **Observed vs expected verdict** — e.g. *Expected: finding at `Sink.java:42`. Observed: - no finding; sample listed under `falseNegative`.* -- **Where the dataflow dies** — file, line, and the specific instruction from the fact - reachability SARIF. Quote the trace up to the last reachable fact and state which - instruction drops it. -- **Ruled-out causes** — - 1. Rule tests pass on an isolated method sample (rule syntax is fine). - 2. `external-methods-without-rules.yaml` has no methods on the relevant path (library - modeling is not the gap), or list the approximations that were added in step 3. - 3. The dropping instruction is unrelated to what the rule was meant to match (not a - sanitizer, not an unsupported sink variant, etc.). -- **Minimal hypothesis** — 1–3 sentences on what the engine is likely doing wrong at that - instruction (e.g. *"IFDS loses the fact across this `StringBuilder.append` because the - call is devirtualized to an `AbstractStringBuilder` overload that has no default - pass-through"*). Keep it short; this is a hypothesis, not a fix. - -Include only what is needed to reproduce and locate the problem. A good report is roughly -one screen of Markdown plus the rule-test sub-project. - -## Stop Condition - -The investigation is done when all of the following hold: - -- The rule-test sub-project reproduces the issue deterministically via - `opentaint agent test-rules`. -- No method on the expected source→sink path remains in - `external-methods-without-rules.yaml`. -- The fact reachability SARIF pinpoints a specific instruction where the taint is - dropped (or spuriously introduced) and that instruction is unrelated to the rule logic. -- The report in step 5 exists and is self-contained. diff --git a/agent/skills/run-analysis.md b/agent/skills/run-analysis.md deleted file mode 100644 index 7f138f38f..000000000 --- a/agent/skills/run-analysis.md +++ /dev/null @@ -1,94 +0,0 @@ -# Skill: Run Analysis - -Run OpenTaint analysis on the target project and collect results. - -## Prerequisites - -- Project built (build-project skill) -- Rules created and tested (create-rule, test-rule skills) -- Optionally: YAML config (create-yaml-config skill) and/or approximations (create-approximation skill) - -## Procedure - -### Basic analysis - -The `--rule-id` flag requires the **full rule ID** in the format `:`. -Example: for a rule file at `agent-rules/java/security/my-vuln.yaml` with `id: my-vulnerability`, -the full ID is `java/security/my-vuln.yaml:my-vulnerability`. - -Pass the pre-compiled project model via `--project-model`. The positional `scan ` -argument is reserved for source projects that the CLI will compile itself. - -```bash -opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin \ - --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --track-external-methods -``` - -### With custom passThrough config - -`--approximations-config` is repeatable; every occurrence is OVERRIDE-merged. - -```bash -opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --approximations-config ./agent-config/custom-propagators.yaml \ - --track-external-methods -``` - -### With code-based approximations - -Point `--dataflow-approximations` at a directory of Java sources. The CLI auto-compiles -`.java` files into a temp directory and forwards that to the analyzer. - -```bash -opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --dataflow-approximations ./agent-approximations/src \ - --track-external-methods -``` - -### View results - -```bash -opentaint summary ./results/report.sarif --show-findings -``` - -## Outputs - -Three files to collect — all next to the SARIF report: - -1. **`./results/report.sarif`** — Vulnerability findings with code flow traces -2. **`./results/external-methods-without-rules.yaml`** — Methods where no pass-through rules fired (**dataflow facts killed here — these cause false negatives**) -3. **`./results/external-methods-with-rules.yaml`** — Methods where pass-through rules were applied (already modeled, typically no action needed) - -The `--track-external-methods` flag is a boolean. Filenames and location are fixed: the -two YAMLs are written into the same directory as the SARIF file, using the names above. - -## Key Flags - -| Flag | Purpose | -|------|---------| -| `--project-model` | Pre-compiled project model directory (contains `project.yaml`) | -| `--ruleset` | Rule directory (repeatable). Use `builtin` for built-in rules | -| `--rule-id` | Enable only specific rules by full ID `.yaml:` (repeatable) | -| `--approximations-config` | YAML passThrough config (OVERRIDE mode, repeatable) | -| `--dataflow-approximations` | Directory of Java sources or compiled class files (repeatable) | -| `--track-external-methods` | Emit `external-methods-{without,with}-rules.yaml` next to the SARIF | -| `--severity` | Filter by severity (note, warning, error) | -| `--timeout` | Analysis timeout (default 900s) | - -## Notes - -- For a pre-compiled model, always use `--project-model `. The positional argument is only for source projects that will be compiled by the CLI. -- `--rule-id` drops every rule whose full ID is not in the filter, **including library rules referenced via join-mode `refs`**. List every rule you want active explicitly. -- `--approximations-config` uses OVERRIDE mode: custom rules replace (not extend) default config for matching methods. -- `--dataflow-approximations` accepts a directory. `.java` files are auto-compiled by the CLI; already-compiled `.class` directories are passed through as-is. -- Duplicate approximation targeting the same class as a built-in will cause an error. diff --git a/agent/skills/analyze-findings.md b/skills/analyze-findings/SKILL.md similarity index 72% rename from agent/skills/analyze-findings.md rename to skills/analyze-findings/SKILL.md index 6e7e40fcd..e7ad5329e 100644 --- a/agent/skills/analyze-findings.md +++ b/skills/analyze-findings/SKILL.md @@ -1,3 +1,12 @@ +--- +name: analyze-findings +description: Triage OpenTaint scan results — classify each finding as true positive, fixable false positive, or false negative — and pick the next action. Use when a SARIF report and external-methods YAMLs are available. +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + # Skill: Analyze Findings Interpret SARIF findings and the external methods list to classify results and plan next actions. @@ -27,21 +36,24 @@ Read the trace: - Source genuinely provides attacker-controlled data - Sink genuinely performs a dangerous operation - No sanitization between source and sink -- **Action**: Generate PoC (generate-poc skill), document in `vulnerabilities.md` +- **Action**: Generate PoC (generate-poc skill), document in `.opentaint/vulnerabilities.md` +- **Report as**: rule ID, CWE (from `runs[0].tool.driver.rules[].properties.cwe`), severity, source/sink locations, brief trace -**FALSE POSITIVE -- fixable via Rule**: Over-broad pattern matching. +**FALSE POSITIVE — fixable via Rule**: Over-broad pattern matching. - Sink pattern too broad, sanitizer not recognized, source matches non-attacker data - **Action**: Add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or narrow `metavariable-regex`. Update tests. Re-run. +- **Report as**: `suggested fix kind: pattern-not` or `pattern-sanitizers` (pick the most applicable) -**FALSE POSITIVE -- fixable via Approximation** (non-preferred): Imprecise taint propagation through a library method. +**FALSE POSITIVE — fixable via Approximation** (non-preferred): Imprecise taint propagation through a library method. - Library method modeled as propagating taint when it actually neutralizes the threat - **Action**: Override passThrough approximation. Re-run. +- **Report as**: `suggested fix kind: passThrough override` ### 3. Process external methods (FN discovery) The `--track-external-methods` flag produces two files next to the SARIF report: -- **`/external-methods-without-rules.yaml`** — Methods where the analyzer **killed dataflow facts** (no approximation model). **This is the only list worth approximating.** Every false negative caused by a missing library model is rooted here. -- **`/external-methods-with-rules.yaml`** — Methods that already have an approximation model. Do NOT target these with custom approximations or YAML `passThrough` rules — you would OVERRIDE an existing model, which is usually a regression. +- **`.opentaint/results/external-methods-without-rules.yaml`** — Methods where the analyzer **killed dataflow facts** (no approximation model). **This is the only list worth approximating.** Every false negative caused by a missing library model is rooted here. +- **`.opentaint/results/external-methods-with-rules.yaml`** — Methods that already have an approximation model. Do NOT target these with custom approximations or YAML `passThrough` rules — you would OVERRIDE an existing model, which is usually a regression. Filenames and directory are fixed; the flag is a boolean. @@ -50,9 +62,7 @@ Filenames and directory are fixed; the flag is a boolean. - Methods not listed in either file were never reached on a tainted path during the scan; approximating them is a no-op until that changes (different sources/rules/entry points). - Application-internal methods are never in these lists — approximations don't apply to them. Fix those via rule patterns, not approximations. -Read `external-methods-without-rules.yaml`. **Prioritize generic data-flow propagators** over -vulnerability-specific methods. The most common cause of killed facts is mundane collection/utility -methods, not the vulnerability-relevant operations themselves. +Read `external-methods-without-rules.yaml`. **Prioritize generic data-flow propagators** over vulnerability-specific methods. The most common cause of killed facts is mundane collection/utility methods, not the vulnerability-relevant operations themselves. **HIGH PRIORITY — Generic propagators** (affect ALL vulnerability types): - Collection operations: `List.add`/`List.get`, `Map.put`/`Map.get`, `Set.add`/`Set.iterator` diff --git a/agent/skills/build-project.md b/skills/build-project/SKILL.md similarity index 76% rename from agent/skills/build-project.md rename to skills/build-project/SKILL.md index 7eb1ad248..88dcc3623 100644 --- a/agent/skills/build-project.md +++ b/skills/build-project/SKILL.md @@ -1,6 +1,15 @@ +--- +name: build-project +description: Build a Java/Kotlin project for opentaint analysis and produce a project.yaml model. Use whenever an opentaint scan needs a project model and `opentaint compile` may need help. +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + # Skill: Build Project -Build a target project and produce a `project.yaml` model for analysis. +Build a target project and produce a `project.yaml` model for analysis ## Prerequisites @@ -21,7 +30,7 @@ Examine directory contents: ### 2a. Gradle/Maven projects (autobuilder) ```bash -opentaint compile /path/to/project -o ./opentaint-project +opentaint compile /path/to/project -o .opentaint/project ``` ### 2b. If `opentaint compile` fails — manual build + `opentaint project` @@ -39,13 +48,11 @@ mvn package -DskipTests 2. **Create the project model with `opentaint project`**: -> **CRITICAL**: Always specify `--package` to restrict analysis to project code only. -> Without `--package`, the analyzer will attempt to analyze ALL classes including third-party -> libraries, and will hang or run for hours. +> **CRITICAL**: Always specify `--package` to restrict analysis to project code only. Without `--package`, the analyzer will attempt to analyze ALL classes including third-party libraries, and will hang or run for hours. ```bash opentaint project \ - --output ./opentaint-project \ + --output .opentaint/project \ --source-root /path/to/src \ --classpath /path/to/app.jar \ --package com.example.app @@ -55,7 +62,7 @@ For multi-module projects, use multiple `--classpath` and `--package` flags: ```bash opentaint project \ - --output ./opentaint-project \ + --output .opentaint/project \ --source-root /path/to/project \ --classpath /path/to/module1/build/libs/module1.jar \ --classpath /path/to/module2/build/libs/module2.jar \ @@ -65,7 +72,7 @@ opentaint project \ ### 3. Verify -Check that `./opentaint-project/project.yaml` exists and is non-empty. +Check that `.opentaint/project/project.yaml` exists and is non-empty. ## Troubleshooting diff --git a/agent/skills/create-approximation.md b/skills/create-approximation/SKILL.md similarity index 65% rename from agent/skills/create-approximation.md rename to skills/create-approximation/SKILL.md index 8fc4f265c..31983dfb7 100644 --- a/agent/skills/create-approximation.md +++ b/skills/create-approximation/SKILL.md @@ -1,25 +1,25 @@ +--- +name: create-approximation +description: Write a Java code-based dataflow model for a library method whose taint propagation depends on lambdas, callbacks, or async chains. Use to fix false negatives that a YAML passThrough rule cannot express (see `create-yaml-config`). +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + # Skill: Create Approximation -Create code-based approximations for complex library methods involving lambdas, async, or callbacks. +Create code-based approximations for complex library methods involving lambdas, async, or callbacks. Sources live under `.opentaint/approximations/src/`. ## When approximations are actually useful -Approximations (both code-based and YAML) only change the analysis of **external methods -with no existing model**. Concretely, this means the method the approximation targets must -appear in `/external-methods-without-rules.yaml` produced by the previous scan -(see `analyze-findings` skill). An entry there means the analyzer walked through that method -and **killed the dataflow facts** because it had no rule — that's the exact gap you can fill. +Approximations (both code-based and YAML) only change the analysis of **external methods with no existing model**. Concretely, this means the method the approximation targets must appear in `.opentaint/results/external-methods-without-rules.yaml` produced by the previous scan (see `analyze-findings` skill). An entry there means the analyzer walked through that method and **killed the dataflow facts** because it had no rule — that's the exact gap you can fill. -If the method is in `external-methods-with-rules.yaml`, it is already modeled. Writing -another approximation for it is a no-op at best and conflicts with a built-in rule at worst -(duplicate-target error). Skip it. +If the method is in `external-methods-with-rules.yaml`, it is already modeled by a built-in code-based approximation. Writing another `@Approximate` class targeting the same class is a **hard runtime error** — the loader enforces a strict bijection and will abort with `IllegalArgumentException`. Skip it. -If the method is in neither list, the analyzer never reached it on a tainted path during -the scan. Adding an approximation will not change the result until the analyzer actually -observes a tainted argument flowing in. +If the method is in neither list, the analyzer never reached it on a tainted path during the scan. Adding an approximation will not change the result until the analyzer actually observes a tainted argument flowing in. -**Rule of thumb**: approximate only methods that are in the `without-rules` list **and** lie -on a code path relevant to your vulnerability (reachable between a source and a sink). +**Rule of thumb**: approximate only methods that are in the `without-rules` list **and** lie on a code path relevant to your vulnerability (reachable between a source and a sink). ## Prerequisites @@ -32,10 +32,10 @@ on a code path relevant to your vulnerability (reachable between a source and a ### 1. Create approximation source -Create Java files in `agent-approximations/src/`: +Create Java files in `.opentaint/approximations/src/`: ```java -package agent.approximations; +package com.example.approximations; import org.opentaint.ir.approximation.annotation.Approximate; import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; @@ -68,22 +68,17 @@ public class ReactiveProcessor { ### 2. Run with approximations -Point `--dataflow-approximations` at the source directory. The CLI auto-compiles `.java` -files using the analyzer JAR (for `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) -and the target project's dependencies, then forwards the compiled directory to the analyzer. -Manual `javac` invocation is not required. +Point `--dataflow-approximations` at the source directory. The CLI auto-compiles `.java` files using the analyzer JAR (for `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) and the target project's dependencies, then forwards the compiled directory to the analyzer. Manual `javac` invocation is not required. ```bash -opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ +opentaint scan --project-model .opentaint/project \ + -o .opentaint/results/report.sarif \ + --ruleset builtin --ruleset .opentaint/rules \ --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --dataflow-approximations ./agent-approximations/src + --dataflow-approximations .opentaint/approximations/src ``` -If `.java` compilation fails, the CLI reports the errors and aborts before the scan starts. -If the directory contains already-compiled `.class` files (no `.java` siblings), the CLI -passes it through unchanged. +If `.java` compilation fails, the CLI reports the errors and aborts before the scan starts. If the directory contains already-compiled `.class` files (no `.java` siblings), the CLI passes it through unchanged. ## Key Patterns @@ -104,8 +99,7 @@ passes it through unchanged. ## Validating the approximation had an effect -After re-running the scan with `--dataflow-approximations`, diff the before/after -`external-methods-without-rules.yaml`: +After re-running the scan with `--dataflow-approximations`, diff the before/after `external-methods-without-rules.yaml`: - The approximated method should disappear from `without-rules` (moves to `with-rules`) - If it does not move, your `@Approximate(...)` target class or the method signature does not match what the analyzer sees diff --git a/agent/skills/create-rule.md b/skills/create-rule/SKILL.md similarity index 74% rename from agent/skills/create-rule.md rename to skills/create-rule/SKILL.md index 3cfd3d411..8396aed44 100644 --- a/agent/skills/create-rule.md +++ b/skills/create-rule/SKILL.md @@ -1,6 +1,15 @@ +--- +name: create-rule +description: Author OpenTaint YAML pattern rules for a vulnerability class on JVM code. Use when an uncovered vulnerability needs detection, or when an existing rule needs a false-positive or false-negative fix. +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + # Skill: Create Rule -Create pattern rules for detecting specific vulnerability classes. +Create pattern rules for detecting specific vulnerability classes ## Prerequisites @@ -11,11 +20,10 @@ Create pattern rules for detecting specific vulnerability classes. ### 1. Check existing coverage -`opentaint agent rules-path` prints the absolute path to the built-in rules directory -(downloading them on first call). Use it to browse built-in patterns. +`opentaint dev rules-path` prints the absolute path to the built-in rules directory (downloading them on first call). Use it to browse built-in patterns. ```bash -RULES_DIR=$(opentaint agent rules-path) +RULES_DIR=$(opentaint dev rules-path) ls $RULES_DIR/java/lib/generic/ ls $RULES_DIR/java/lib/spring/ ls $RULES_DIR/java/security/ @@ -26,7 +34,7 @@ Read existing rules to understand patterns already covered. ### 2. Create rule directory structure ``` -agent-rules/ +.opentaint/rules/ java/ lib/ my-source.yaml @@ -37,7 +45,7 @@ agent-rules/ ### 3. Create library rules -**Source rule** (`agent-rules/java/lib/my-source.yaml`): +**Source rule** (`.opentaint/rules/java/lib/my-source.yaml`): ```yaml rules: @@ -59,7 +67,7 @@ rules: - pattern: doPost ``` -**Sink rule** (`agent-rules/java/lib/my-sink.yaml`): +**Sink rule** (`.opentaint/rules/java/lib/my-sink.yaml`): ```yaml rules: @@ -113,25 +121,22 @@ refs: ### 6. Run analysis with specific rules -The `--rule-id` flag requires the **full rule ID** in the format `:`. -The `ruleSetRelativePath` is the path to the YAML file relative to its ruleset root, **including** the `.yaml` extension. +The `--rule-id` flag requires the **full rule ID** in the format `.yaml:`. The `ruleSetRelativePath` is the path to the YAML file relative to its ruleset root, without the `.yaml` extension (it is written explicitly in the format). -Library rules referenced via join-mode `refs` are NOT auto-included by `--rule-id` — the -filter drops every rule whose full ID is not listed. Either list every library rule -explicitly, or omit `--rule-id` entirely to keep all loaded rules active. +Library rules referenced via join-mode `refs` are NOT auto-included by `--rule-id` — the filter drops every rule whose full ID is not listed. Either list every library rule explicitly, or omit `--rule-id` entirely to keep all loaded rules active. ```bash # Full rule ID = "java/security/my-vuln.yaml" (relative path with .yaml) + ":" + "my-vulnerability" (id from YAML) -opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ +opentaint scan --project-model .opentaint/project \ + -o .opentaint/results/report.sarif \ + --ruleset builtin --ruleset .opentaint/rules \ --rule-id java/security/my-vuln.yaml:my-vulnerability ``` To discover full rule IDs, read the rule YAML file: - The `id` field in the YAML gives the short ID - The file path relative to the ruleset root (with `.yaml` extension) gives the prefix -- Combine as `:`, e.g. `java/security/path-traversal.yaml:path-traversal` +- Combine as `.yaml:`, e.g. `java/security/path-traversal.yaml:path-traversal` ## Constraints diff --git a/agent/skills/create-yaml-config.md b/skills/create-yaml-config/SKILL.md similarity index 68% rename from agent/skills/create-yaml-config.md rename to skills/create-yaml-config/SKILL.md index a176b055f..786c7c742 100644 --- a/agent/skills/create-yaml-config.md +++ b/skills/create-yaml-config/SKILL.md @@ -1,23 +1,26 @@ +--- +name: create-yaml-config +description: Add a YAML passThrough model for an external library method that kills taint via simple from→to copies. Use to fix false negatives caused by unmodelled library methods on a real source→sink path (no lambdas — see `create-approximation`). +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + # Skill: Create YAML Config -Create YAML passThrough propagation rules for library methods. +Create YAML passThrough propagation rules for library methods ## When a passThrough rule actually changes the scan -A custom `passThrough` entry only affects the analyzer's behavior if the target method is -an **external method with no existing model**. In practice: the method must appear in -`/external-methods-without-rules.yaml` produced by the previous scan -(see `analyze-findings` skill). That file is exactly the list of methods where the analyzer -killed dataflow facts for lack of a rule — those are the FN sources you can fix. +A custom `passThrough` entry only affects the analyzer's behavior if the target method is an **external method with no existing model**. In practice: the method must appear in `.opentaint/results/external-methods-without-rules.yaml` produced by the previous scan (see `analyze-findings` skill). That file is exactly the list of methods where the analyzer killed dataflow facts for lack of a rule — those are the FN sources you can fix. Do not write passThrough rules for: -- Methods in `external-methods-with-rules.yaml` — already modeled; your rule will OVERRIDE the existing one, which is usually a regression. +- Methods in `external-methods-with-rules.yaml` — already modeled by a built-in YAML passThrough. Since `--approximations-config` replaces the entire built-in list, writing a custom config means you implicitly own all passThrough coverage; adding a duplicate method entry is not a hard error, but you are now responsible for all methods previously covered by built-ins. - Methods that appear in neither list — the analyzer never reached them on a tainted path during the scan; the rule will be a no-op until that changes. - Application-internal methods — approximations apply only to external library methods. -**Rule of thumb**: open `external-methods-without-rules.yaml`, pick methods on a code path -from a source to a sink relevant to the target vulnerability, and write passThrough rules -for those. +**Rule of thumb**: open `external-methods-without-rules.yaml`, pick methods on a code path from a source to a sink relevant to the target vulnerability, and write passThrough rules for those. ## Prerequisites @@ -29,7 +32,7 @@ for those. ### 1. Create config file -Create `agent-config/custom-propagators.yaml` with `passThrough:` rules. +Create `.opentaint/config/custom-propagators.yaml` with `passThrough:` rules. ### 2. Common patterns @@ -112,21 +115,20 @@ passThrough: ### 3. Run with config -`--approximations-config` is repeatable. Each occurrence is OVERRIDE-merged with the default. +`--approximations-config` is repeatable; all supplied files are merged together into a single combined config. That combined config then **replaces the entire built-in passThrough list** — not per-method, but the whole list. If the combined config is non-empty, no built-in passThrough entry is active; you own the full set. ```bash -opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ +opentaint scan --project-model .opentaint/project \ + -o .opentaint/results/report.sarif \ + --ruleset builtin --ruleset .opentaint/rules \ --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --approximations-config ./agent-config/custom-propagators.yaml \ + --approximations-config .opentaint/config/custom-propagators.yaml \ --track-external-methods ``` ### 4. Confirm the rule actually fired -Keep `--track-external-methods` enabled and diff the fresh `external-methods-without-rules.yaml` -with the baseline one: +Keep `--track-external-methods` enabled and diff the fresh `external-methods-without-rules.yaml` with the baseline one: - Every method you added a `passThrough` for should disappear from `without-rules` (it now moves to `with-rules`) - If a method does not move, the `function` matcher did not match — check package, class, name, and `overrides:` diff --git a/skills/debug-rule-reachability/SKILL.md b/skills/debug-rule-reachability/SKILL.md new file mode 100644 index 000000000..b62c0aeb6 --- /dev/null +++ b/skills/debug-rule-reachability/SKILL.md @@ -0,0 +1,70 @@ +--- +name: debug-rule-reachability +description: Produce a fact-reachability SARIF for one OpenTaint rule to see exactly where its dataflow facts get killed. Use when a rule passes its tests but still misses (or spuriously fires) on the real project. +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + +# Skill: Debug Rule Reachability + +Generate a fact reachability SARIF report to debug why a specific rule does (or doesn't) reach certain taint sinks + +## Prerequisites + +- Project model available — path provided by caller (`.opentaint/project/` from main pipeline, `.opentaint/test-compiled/` when called from `opentaint-issue-investigation`) +- Rule created and tested (create-rule, test-rule skills) + +## ⚠️ CRITICAL: Single Rule Only + +This command targets exactly ONE rule. Running fact reachability across multiple rules would produce an enormously huge SARIF report that is effectively unusable; the dedicated `opentaint dev debug-fact-reachability` command takes a single rule ID as its required argument. + +## Procedure + +### Run the debug command + +`opentaint dev debug-fact-reachability` is a separate command (not a flag on `scan`). It takes the full rule ID as its first positional argument and the source path (or a pre-compiled model via `--project-model`) as the second. + +```bash +opentaint dev debug-fact-reachability \ + java/security/my-vuln.yaml:my-vulnerability \ + --project-model .opentaint/project \ + -o .opentaint/results/fact-reachability.sarif \ + --ruleset builtin --ruleset .opentaint/rules +``` + +The rule ID requires the **full rule ID** in the format `:`. Example: for a rule file at `.opentaint/rules/java/security/my-vuln.yaml` with `id: my-vulnerability`, the full ID is `java/security/my-vuln.yaml:my-vulnerability`. + +### View results + +```bash +opentaint summary .opentaint/results/fact-reachability.sarif --show-findings +``` + +## Key Flags + +| Flag/Arg | Purpose | +|------|---------| +| `` (positional) | **Exactly one** full rule ID (`.yaml:`) — required | +| `--project-model` | Pre-compiled project model directory (skip recompilation) | +| `-o` | Path to the main SARIF output file | +| `--ruleset` | Rule directory (repeatable). Use `builtin` for built-in rules | +| `--timeout` | Analysis timeout (default 15m) | + +## Outputs + +The fact reachability report is **not** the main SARIF file specified by `-o`. The analyzer writes it as a **separate file** named `debug-ifds-fact-reachability.sarif` in the same output directory as the main report. + +For example, with `-o .opentaint/results/fact-reachability.sarif`: + +- **`.opentaint/results/fact-reachability.sarif`** — Main vulnerability findings for the single rule +- **`.opentaint/results/debug-ifds-fact-reachability.sarif`** — Debug fact reachability report + +Always check the output directory (`-o` parent) for this file. + +## Notes + +- This is a debug-only command intended for troubleshooting rule coverage +- Pre-compiled project models are passed via `--project-model `; otherwise the second positional argument is a source-path that the CLI will compile +- The command implicitly restricts the run to the one rule given as the positional argument; library rules referenced via join-mode `refs` are still resolved as needed diff --git a/agent/skills/discover-entry-points.md b/skills/discover-entry-points/SKILL.md similarity index 75% rename from agent/skills/discover-entry-points.md rename to skills/discover-entry-points/SKILL.md index 3a5653468..5e2ec50b8 100644 --- a/agent/skills/discover-entry-points.md +++ b/skills/discover-entry-points/SKILL.md @@ -1,6 +1,15 @@ +--- +name: discover-entry-points +description: Map the attack surface of a Java/Kotlin project — HTTP endpoints, message handlers, schedulers, CLI mains — into an OpenTaint analysis plan. Use when the user asks for "discovering attack surface" in context of opentaint. +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + # Skill: Discover Entry Points -Identify the attack surface of the target project by reading source code and project structure. +Identify the attack surface of the target project by reading source code and project structure ## Prerequisites @@ -36,10 +45,10 @@ Read `build.gradle`, `pom.xml`, or `project.yaml` for: ### 4. Record findings -Document entry points, data sources, and relevant vulnerability classes in `opentaint-analysis-plan.md`. +Document entry points, data sources, and relevant vulnerability classes in `.opentaint/analysis-plan.md`. ## Engine Notes -- Spring projects: The analyzer auto-discovers Spring endpoints when `--project-kind spring-web` is set +- Spring projects: The analyzer auto-discovers Spring endpoints automatically - Generic projects: The analyzer uses all public/protected methods from public project classes -- Targeted analysis: Use `--debug-run-analysis-on-selected-entry-points "com.example.Class#method"` for focused testing +- Targeted analysis: Use `opentaint dev debug-run-on-entry-points "com.example.Class#method"` for focused testing diff --git a/skills/generate-poc/SKILL.md b/skills/generate-poc/SKILL.md new file mode 100644 index 000000000..717f932ed --- /dev/null +++ b/skills/generate-poc/SKILL.md @@ -0,0 +1,87 @@ +--- +name: generate-poc +description: Build a proof-of-concept for a confirmed true-positive OpenTaint finding (SQLi, command injection, path traversal, XSS, SSRF, XXE) and document it. Use when a SARIF finding has been confirmed as a TP +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + +# Skill: Generate PoC + +Generate a proof-of-concept for a confirmed true positive finding + +## Prerequisites + +- A finding classified as TRUE POSITIVE (analyze-findings skill) +- Triage input includes: VULN number, rule ID, CWE, severity, source/sink locations, trace steps + +## Procedure + +### 1. Construct PoC by vulnerability type + +Use the source/sink location and trace from the triage to determine the HTTP route, parameter name, and payload shape. If the actual host and port are not known, use `http://:` as a placeholder. + +**SQL Injection**: Input that extracts data or bypasses auth +```bash +curl "http://:/api/users?id=1' OR '1'='1" +``` + +**Command Injection**: Input that executes arbitrary commands +```bash +curl "http://:/api/process?cmd=;cat /etc/passwd" +``` + +**Path Traversal**: Input that accesses unauthorized files +```bash +curl "http://:/api/files?path=../../../etc/passwd" +``` + +**XSS**: Input that executes JavaScript +```bash +curl "http://:/api/search?q=" +``` + +**SSRF**: Input that makes the server request internal resources +```bash +curl "http://:/api/fetch?url=http://169.254.169.254/latest/meta-data/" +``` + +**XXE**: XML input that reads files +```bash +curl -X POST "http://:/api/parse" \ + -H "Content-Type: application/xml" \ + -d ']>&xxe;' +``` + +For other CWE classes, construct an HTTP request that delivers the tainted source value to the identified sink parameter. + +### 2. Document the finding + +Use the triage input to fill in the template: + +```markdown +## : in + +**Severity**: () +**Location**: `:` +**Rule**: `` + +### Description + + +### Trace +1. **Source**: `` -- `` (line ) +2. **Flow**: +3. **Sink**: `` (line ) + +### Proof of Concept +``` + +``` + +### Remediation + +``` + +Return this markdown block as output to the main agent. The main agent appends it to `.opentaint/vulnerabilities.md`. diff --git a/skills/opentaint-agent/SKILL.md b/skills/opentaint-agent/SKILL.md new file mode 100644 index 000000000..0c1ced1bf --- /dev/null +++ b/skills/opentaint-agent/SKILL.md @@ -0,0 +1,166 @@ +--- +name: opentaint-agent +description: Run an end-to-end opentaint security analysis on a Java/Kotlin project. Build, find entry points, write rules, scan, and triage findings. Use this skill when the user asks to "find vulnerabilities", "run SAST", or "scan Java app for security issues" +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + +# Opentaint Agent -- Meta Prompt + +You are an AI security analyst using opentaint, a dataflow-based SAST analyzer for JVM projects. Your goal is to find real vulnerabilities by iteratively creating rules, running analysis, and refining results. + +If the user does not explicitly name a target, scan the current project in current folder. + +All agent-generated artifacts (project model, rules, config, approximations, test project, results, plans, reports) live under a single `.opentaint/` directory at the project root. Do not scatter files outside it. + +## Setup + +Run `opentaint dev rules-path` to get the built-in rules directory. + +## Workflow + +Execute these four phases in order. Iterate phases 2-4 until the external methods list stabilizes and all findings are classified. + +**Subagent delegation** The Delegate blocks under each step are instructions, on how to dispatch that step to a subagent. Each block is a contract: which skill the subagent should load, what inputs to pass, what output to require back, and (where it loops) the stop condition. If you have a tool for spawning subagents, follow the Delegate blocks. If you have no subagent tool, ignore the Delegate blocks safely and execute the steps directly using the named skills. + +### Phase 1: Project Setup + +1. Build the project (use the `build-project` skill). Produce `.opentaint/project/project.yaml`. + + Delegate via the `build-project` skill + - Inputs: target project root path; any known build constraints (Java version, submodules, `--package` filters) + - Output: absolute path to the model directory containing `project.yaml`, OR a one-paragraph build-failure summary with the failing command + +2. Discover entry points (use the `discover-entry-points` skill). Identify attack surface, data sources, vulnerability classes. Write `.opentaint/analysis-plan.md`. + + Delegate via the `discover-entry-points` skill + - Inputs: project root; model directory from step 1 + - Output: one-paragraph short summary of found attack surfaces. Do not require the full plan content back — read the file yourself on demand + +### Phase 2: Rule Creation + +1. Check built-in rules — read rules in `$(opentaint dev rules-path)` + +2. Create rules for uncovered vulnerability classes (use the `create-rule` skill). Library rules in `.opentaint/rules/java/lib/`, security rules in `.opentaint/rules/java/security/` + +3. Test rules (use the `test-rule` skill). Create annotated test samples with `@PositiveRuleSample` / `@NegativeRuleSample`, fix until all tests pass + +Delegate (covers the whole phase — one subagent reads the built-in rules reference, authors the rule, and tests it) via the `create-rule` and `test-rule` skills, used together as a loop +- Inputs: vulnerability class; source/sink hints from `.opentaint/analysis-plan.md`; built-in rules path (`$(opentaint dev rules-path)`) +- Subagent loop: check built-in coverage and find library rules to reference; author or edit YAML per `create-rule`; add samples and run `opentaint dev test-rules` per `test-rule`; fix patterns on `falseNegative` / `falsePositive` +- Output: full rule ID (`.yaml:`); path to the rule file; one-line test result summary +- Stop when: every sample reports `success` in `test-result.json` + +### Phase 3: Analysis + +1. Run analysis (use the `run-analysis` skill). Always pass a pre-compiled model via `--project-model`, and use full rule IDs of the form `.yaml:`: + ```bash + opentaint scan --project-model .opentaint/project \ + -o .opentaint/results/report.sarif \ + --ruleset builtin --ruleset .opentaint/rules \ + --rule-id java/security/.yaml: \ + --track-external-methods + ``` +2. Collect `.opentaint/results/report.sarif`, and next to it the fixed-name files `.opentaint/results/external-methods-without-rules.yaml` (taint-killing methods) and `.opentaint/results/external-methods-with-rules.yaml` (already modeled). The `--track-external-methods` flag is a boolean; the filenames and location are fixed by the analyzer. + +Run in main: this phase is one CLI invocation. Output files persist on disk and are consumed by Phase 4. Delegating it would only add a subagent hop without saving context — run `opentaint scan` directly + +### Phase 4: Results Interpretation and Iteration + +1. Analyze findings (use the `analyze-findings` skill). Classify each SARIF finding as TP, FP (rule fix), or FP (approximation fix). Read `external-methods-without-rules.yaml` for FN discovery (these are the methods that kill taint). + + Delegate via the `analyze-findings` skill + - Inputs: paths to `.opentaint/results/report.sarif`, `.opentaint/results/external-methods-without-rules.yaml`, `.opentaint/results/external-methods-with-rules.yaml`; the active rule IDs + - Output: structured triage — + - TPs: rule ID, CWE, severity, source/sink locations, brief trace + - FPs: rule ID and suggested fix kind (`pattern-not` / `pattern-sanitizers` / passThrough override) + - PassThrough candidates: prioritized list of generic propagators on a real source→sink path + - Approximation candidates: lambda/async methods + - Stop when: every finding is classified + +2. For true positives: generate PoC (use the `generate-poc` skill), document in `.opentaint/vulnerabilities.md`. + + Before dispatching, assign a sequential `VULN-NNN` number to each TP (e.g. VULN-001, VULN-002). + + Delegate (parallel fan-out) via the `generate-poc` skill — one subagent per TP + - Inputs (per subagent): assigned VULN number; the single TP's trace from the triage (rule ID, CWE, severity, source/sink locations, trace steps) + - Output (per subagent): PoC command; `.opentaint/vulnerabilities.md` entry text for that finding + - You then append the returned entries to `.opentaint/vulnerabilities.md` + +3. For false positives: fix rules with `pattern-not` / `pattern-sanitizers`, update tests, re-run. + + Delegate via the `create-rule` and `test-rule` skills, used as a loop (same shape as Phase 2 step 3, starting from an existing rule) + - Inputs: rule ID and path; FP triage entries from step 1; the failing trace + - Subagent loop: edit rule; add a `@NegativeRuleSample` reproducing the FP; run tests + - Output: updated rule ID; test summary + - Stop when: the new negative sample passes and prior positives still pass + +4. For false negatives (from external methods): simple propagation -> YAML config (use the `create-yaml-config` skill); lambda/callback methods -> code approximation (use the `create-approximation` skill). + + Delegate (batched by package) via `create-yaml-config` and/or `create-approximation` (pick per method shape) + - Inputs: filtered method list from the triage (only methods on a real source→sink path), grouped by package/library; existing `.opentaint/config/` and `.opentaint/approximations/` paths + - Subagent action: write the models, then re-run `opentaint scan --track-external-methods` to verify the methods moved from `external-methods-without-rules.yaml` to `external-methods-with-rules.yaml` + - Output: methods successfully moved; methods that did not move, each with a one-line reason (signature mismatch, wrong `overrides:`, etc.) + - Stop when: every targeted method either moves to `with-rules` or is reported back as not-moved with a reason + +5. Re-run analysis with updated rules/config/approximations. + + Run in main: same as Phase 3 — single CLI invocation, no delegation + +6. Stop when the external methods list stabilizes, all findings are classified, and high-priority vulnerabilities have PoCs + +## Working Directory Layout + +``` +/ + .opentaint/ + analysis-plan.md + vulnerabilities.md + project/ # Built project model + rules/ # Custom rules + java/lib/ + java/security/ + config/ # YAML passThrough config + custom-propagators.yaml + approximations/ + src/ # Java sources (auto-compiled by the CLI) + test-project/ # Rule test project + test-compiled/ # Compiled test project model + test-results/ # Rule test outputs + results/ + report.sarif + external-methods-without-rules.yaml # written next to report.sarif + external-methods-with-rules.yaml + issues/ # Engine-issue reports (when applicable) +``` + +## Decision Guide + +| Situation | Action | Skill | +|-----------|--------|-------| +| Need new vulnerability detection | Create join-mode rule | create-rule | +| FP: over-broad pattern | Add pattern-not/sanitizers | create-rule | +| FN: library method kills taint | Add YAML passThrough | create-yaml-config | +| FN: lambda/callback method | Code-based approximation | create-approximation | +| Confirmed vulnerability | Generate PoC | generate-poc | + +## Note: Suspected Engine Issues + +If a rule that should fire keeps missing (or firing spuriously) even though the rule tests pass and `external-methods-without-rules.yaml` has no methods on the relevant path, use the `opentaint-issue-investigation` skill. It walks through building a minimal rule-test reproducer, ruling out library-model gaps, pinpointing the instruction where IFDS drops the fact via `opentaint dev debug-fact-reachability`, and writing a short report. + +Delegate via the `opentaint-issue-investigation` skill (it pulls in `debug-rule-reachability` and `test-rule` as needed) +- Inputs: failing rule ID; original project location; existing triage notes; proof that no relevant method remains in `external-methods-without-rules.yaml` +- Output: path to `.opentaint/issues/.md` +- Stop when: the report exists and self-contains the reproducer plus the dropping instruction location + +## Key Constraints + +- Approximations (YAML and code-based) apply ONLY to external methods -- library classes without source code +- `--approximations-config` is repeatable; all files are merged together, then the combined result **replaces the entire built-in passThrough list** — not per-method. Passing any custom config means no built-in passThrough entry is active. +- `--rule-id` takes the FULL rule ID: `.yaml:` (e.g. `java/security/my-vuln.yaml:my-vulnerability`) +- `--rule-id` drops every rule whose ID is not in the filter, including library rules referenced via `refs`. List every rule you need explicitly. +- `--track-external-methods` is a boolean; files are always written as `/external-methods-{without,with}-rules.yaml` +- Duplicate approximation targeting the same class as a built-in = error +- Each rule must have test coverage before running on the real project diff --git a/skills/opentaint-issue-investigation/SKILL.md b/skills/opentaint-issue-investigation/SKILL.md new file mode 100644 index 000000000..dc0595c95 --- /dev/null +++ b/skills/opentaint-issue-investigation/SKILL.md @@ -0,0 +1,127 @@ +--- +name: opentaint-issue-investigation +description: Build a minimal reproducer and pinpoint the instruction where OpenTaint's engine drops a dataflow fact, then write a short engine-issue report. Use as a last resort when a rule passes its tests, the library model is complete, and the finding is still wrong. +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + +# Skill: OpenTaint Issue Investigation + +Investigate and confirm an issue in the OpenTaint analysis engine — a case where a rule that should fire does not (or fires where it should not), and the cause is **not** the rule's syntax or the library modeling, but the engine itself (e.g. an intra/inter-procedural dataflow path that is cut unexpectedly). + +The deliverable is a small, self-contained reproducer plus a short write-up that points at the exact instruction where the dataflow dies. + +## When to use this skill + +Use it after `analyze-findings` / `create-yaml-config` / `create-approximation` have been exhausted and a finding is still missing (or spurious), even though: + +- The rule passes its own tests on isolated samples. +- `external-methods-without-rules.yaml` is empty (or irrelevant) for the relevant code path. +- Nothing about the library model is obviously wrong. + +If any of those is not true, stop and go fix the rule / add the approximation first. An "engine issue" report is only credible once the trivial causes have been ruled out. + +## Prerequisites + +- Working rule with passing tests (`create-rule`, `test-rule`). +- Baseline scan has been run (`run-analysis`). +- `analyze-findings` has been consulted; the remaining failure is not explained by `external-methods-without-rules.yaml`. + +## Procedure + +### 1. Build a minimal rule-test reproducer + +Shrink the original code to the smallest sample that still reproduces the problem, and put it in a rule-test project under `.opentaint/test-project/` at the analyzed project root (read the `test-rule` skill). + +Choose the project shape based on what the real code needs: + +- **Plain method-level sample** — works for rules where the tainted flow stays inside one method or crosses only ordinary Java calls. One class under `src/main/java/test/` with a single `@PositiveRuleSample` (expected trigger) or `@NegativeRuleSample` (expected no trigger) is enough. +- **Spring-app sub-project** — required whenever the real flow enters through a Spring `@Controller`, uses Spring beans, or depends on dispatcher wiring. Create a dedicated `spring-app-tests/` module with exactly one sample annotation, as described in the `test-rule` skill under *Testing Spring-app rules*. Positive and negative cases go in separate sub-projects (e.g. `xss-spring-test-positive`, `xss-spring-test-negative`). + +Keep the sample as small as possible: remove every statement that is not needed to carry taint from source to sink. A small reproducer is what makes the rest of the investigation tractable — and it is what ships in the bug report. + +### 2. Confirm the issue reproduces on the test project + +Compile the test project and run the rule tests: + +```bash +opentaint compile .opentaint/test-project -o .opentaint/test-compiled +opentaint dev test-rules .opentaint/test-compiled \ + -o .opentaint/test-results \ + --ruleset builtin --ruleset .opentaint/rules +``` + +Inspect `.opentaint/test-results/test-result.json`: + +- A `@PositiveRuleSample` that ends up in `falseNegative` reproduces a missed-detection engine issue. +- A `@NegativeRuleSample` that ends up in `falsePositive` reproduces a spurious-detection engine issue. +- `skipped` / `disabled` mean the rule was not actually exercised — fix the annotation `value`/`id` or enable the rule before going further. +- `success` means the issue does **not** reproduce. Either the sample is too reduced, or something in the original project (not in the sample) is what triggers the problem. Go back to step 1 and add back the minimum context. + +Do not proceed until the test result matches the bug you are trying to document. + +### 3. Rule out missed external-method models + +Re-run the test with external-method tracking and read the two lists next to the SARIF (read the `analyze-findings` skill, §3): + +```bash +opentaint scan --project-model .opentaint/test-compiled \ + -o .opentaint/test-results/report.sarif \ + --ruleset builtin --ruleset .opentaint/rules \ + --rule-id .yaml: \ + --track-external-methods +``` + +Open `.opentaint/test-results/external-methods-without-rules.yaml`. For every method that sits on the source→sink path in your sample: + +- Simple propagator (getter/collection/builder) → add a YAML `passThrough` (read the `create-yaml-config` skill). +- Lambda/callback/async → add a code-based approximation (read the `create-approximation` skill). + +Re-run until that file contains **no methods on the relevant path**. Only then is it legitimate to call the remaining failure an engine issue — otherwise you are just looking at a missing library model. + +### 4. Locate where the dataflow dies + +Use the fact reachability debug command to see exactly how far the taint travels (read the `debug-rule-reachability` skill). It is a separate command, `opentaint dev debug-fact-reachability`, that takes a single full rule ID: + +```bash +opentaint dev debug-fact-reachability \ + .yaml: \ + --project-model .opentaint/test-compiled \ + -o .opentaint/test-results/report.sarif \ + --ruleset builtin --ruleset .opentaint/rules +``` + +Inspect `.opentaint/test-results/debug-ifds-fact-reachability.sarif`. For a missed detection: + +1. Confirm the **source is matched** — at least one fact is reported at the source location. If it is not, the problem is in the rule's `pattern-sources`, not the engine. +2. Walk the reachable facts along the expected path. Note the **last instruction that still carries the fact** and the **first instruction where it is gone**. That gap is where the engine drops the dataflow. +3. Check that the drop happens at an instruction that is **not relevant to the rule** — e.g. a plain local assignment, a trivial method call with a modelled pass-through, a cast, a field read. If the drop is at something the rule should handle (a recognised sanitizer, a sink variant the rule was not written to match, etc.), the issue is still in the rule, not in the engine. + +For a spurious detection, do the symmetric check: find the instruction where the fact appears even though no tainted input reaches it. + +### 5. Write the investigation report + +Produce a short Markdown note at `.opentaint/issues/.md` with: + +- **Reproducer** — path to the rule-test sub-project, the exact `opentaint dev test-rules` command, and the relevant snippet from `test-result.json`. +- **Rule** — full rule ID (`.yaml:`) and the ruleset it came from (`builtin` or `.opentaint/rules`). +- **Observed vs expected verdict** — e.g. *Expected: finding at `Sink.java:42`. Observed: no finding; sample listed under `falseNegative`.* +- **Where the dataflow dies** — file, line, and the specific instruction from the fact reachability SARIF. Quote the trace up to the last reachable fact and state which instruction drops it. +- **Ruled-out causes** — + 1. Rule tests pass on an isolated method sample (rule syntax is fine). + 2. `external-methods-without-rules.yaml` has no methods on the relevant path (library modeling is not the gap), or list the approximations that were added in step 3. + 3. The dropping instruction is unrelated to what the rule was meant to match (not a sanitizer, not an unsupported sink variant, etc.). +- **Minimal hypothesis** — 1–3 sentences on what the engine is likely doing wrong at that instruction (e.g. *"IFDS loses the fact across this `StringBuilder.append` because the call is devirtualized to an `AbstractStringBuilder` overload that has no default pass-through"*). Keep it short; this is a hypothesis, not a fix. + +Include only what is needed to reproduce and locate the problem. A good report is roughly one screen of Markdown plus the rule-test sub-project. + +## Stop Condition + +The investigation is done when all of the following hold: + +- The rule-test sub-project reproduces the issue deterministically via `opentaint dev test-rules`. +- No method on the expected source→sink path remains in `external-methods-without-rules.yaml`. +- The fact reachability SARIF pinpoints a specific instruction where the taint is dropped (or spuriously introduced) and that instruction is unrelated to the rule logic. +- The report at `.opentaint/issues/.md` exists and is self-contained. diff --git a/skills/run-analysis/SKILL.md b/skills/run-analysis/SKILL.md new file mode 100644 index 000000000..c62948447 --- /dev/null +++ b/skills/run-analysis/SKILL.md @@ -0,0 +1,98 @@ +--- +name: run-analysis +description: Run an OpenTaint scan on a built project model and produce the SARIF report plus the taint-killing-method YAMLs used for iteration. Use whenever the user asks to scan or re-scan a project. +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + +# Skill: Run Analysis + +Run OpenTaint analysis on the target project and collect results + +## Prerequisites + +- Project built (build-project skill) — model at `.opentaint/project/` +- Rules created and tested (create-rule, test-rule skills) — at `.opentaint/rules/` +- Optionally: YAML config (create-yaml-config skill) at `.opentaint/config/` and/or approximations (create-approximation skill) at `.opentaint/approximations/` + +## Procedure + +### Basic analysis + +The `--rule-id` flag requires the **full rule ID** in the format `.yaml:`. Example: for a rule file at `.opentaint/rules/java/security/my-vuln.yaml` with `id: my-vulnerability`, the full ID is `java/security/my-vuln.yaml:my-vulnerability`. + +Pass the pre-compiled project model via `--project-model`. The positional `scan ` argument is reserved for source projects that the CLI will compile itself. + +```bash +opentaint scan --project-model .opentaint/project \ + -o .opentaint/results/report.sarif \ + --ruleset builtin \ + --ruleset .opentaint/rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --track-external-methods +``` + +### With custom passThrough config + +`--approximations-config` is repeatable; every occurrence is OVERRIDE-merged. + +```bash +opentaint scan --project-model .opentaint/project \ + -o .opentaint/results/report.sarif \ + --ruleset builtin --ruleset .opentaint/rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --approximations-config .opentaint/config/custom-propagators.yaml \ + --track-external-methods +``` + +### With code-based approximations + +Point `--dataflow-approximations` at a directory of Java sources. The CLI auto-compiles `.java` files into a temp directory and forwards that to the analyzer. + +```bash +opentaint scan --project-model .opentaint/project \ + -o .opentaint/results/report.sarif \ + --ruleset builtin --ruleset .opentaint/rules \ + --rule-id java/security/my-vuln.yaml:my-vulnerability \ + --dataflow-approximations .opentaint/approximations/src \ + --track-external-methods +``` + +### View results + +```bash +opentaint summary .opentaint/results/report.sarif --show-findings +``` + +## Outputs + +Three files to collect — all next to the SARIF report: + +1. **`.opentaint/results/report.sarif`** — Vulnerability findings with code flow traces +2. **`.opentaint/results/external-methods-without-rules.yaml`** — Methods where no pass-through rules fired (**dataflow facts killed here — these cause false negatives**) +3. **`.opentaint/results/external-methods-with-rules.yaml`** — Methods where pass-through rules were applied (already modeled, typically no action needed) + +The `--track-external-methods` flag is a boolean. Filenames and location are fixed: the two YAMLs are written into the same directory as the SARIF file, using the names above. + +## Key Flags + +| Flag | Purpose | +|------|---------| +| `--project-model` | Pre-compiled project model directory (contains `project.yaml`) | +| `--ruleset` | Rule directory (repeatable). Use `builtin` for built-in rules | +| `--rule-id` | Enable only specific rules by full ID `.yaml:` (repeatable) | +| `--approximations-config` | YAML passThrough config (repeatable; all files merged, combined result replaces the entire built-in passThrough list) | +| `--dataflow-approximations` | Directory of Java sources or compiled class files (repeatable) | +| `--track-external-methods` | Emit `external-methods-{without,with}-rules.yaml` next to the SARIF | +| `--severity` | Filter by severity (note, warning, error) | +| `--timeout` | Analysis timeout (default 900s) | + +## Notes + +- For a pre-compiled model, always use `--project-model `. The positional argument is only for source projects that will be compiled by the CLI. +- `--rule-id` drops every rule whose full ID is not in the filter, **including library rules referenced via join-mode `refs`**. List every rule you want active explicitly. +- `--approximations-config` is repeatable; all supplied files are merged into one combined config, which then replaces the **entire** built-in passThrough list. If you pass any `--approximations-config`, no built-in passThrough entry is active — your files must cover everything you need. +- `--dataflow-approximations` accepts a directory. `.java` files are auto-compiled by the CLI; already-compiled `.class` directories are passed through as-is. +- Duplicate approximation targeting the same class as a built-in will cause an error. diff --git a/agent/skills/test-rule.md b/skills/test-rule/SKILL.md similarity index 79% rename from agent/skills/test-rule.md rename to skills/test-rule/SKILL.md index 0b28da220..1f71e21ca 100644 --- a/agent/skills/test-rule.md +++ b/skills/test-rule/SKILL.md @@ -1,24 +1,33 @@ +--- +name: test-rule +description: Verify an OpenTaint rule on annotated test samples, including multi-module Spring reproducers. Use whenever a rule has been written or edited before scanning real projects. +license: Apache-2.0 +metadata: + author: opentaint + version: "0.1" +--- + # Skill: Test Rule -Create test samples for a rule and verify it works correctly. +Create test samples for a rule and verify it works correctly ## Prerequisites - `opentaint` CLI available - Rules created (create-rule skill) -- Target project dependencies known +- Target project dependencies — derive from `.opentaint/analysis-plan.md` (step 3 lists detected frameworks, DB libraries, and HTTP clients) or directly from the project's `build.gradle` / `pom.xml` ## Procedure ### 1. Bootstrap test project +Start with a plain method-level project. Only switch to the Spring multi-module layout (see below) if the plain test returns `falseNegative`. + ```bash -opentaint agent init-test-project ./agent-test-project \ +opentaint dev init-test-project .opentaint/test-project \ --dependency "javax.servlet:javax.servlet-api:4.0.1" ``` -Or manually create a Gradle project with the test utility JAR and required dependencies. - ### 2. Create test samples Create Java files in `src/main/java/test/` with `@PositiveRuleSample` and `@NegativeRuleSample` annotations: @@ -55,7 +64,7 @@ public class MyVulnTest { ### 3. Build test project ```bash -opentaint compile ./agent-test-project -o ./agent-test-compiled +opentaint compile .opentaint/test-project -o .opentaint/test-compiled ``` ### 4. Run rule tests @@ -63,14 +72,14 @@ opentaint compile ./agent-test-project -o ./agent-test-compiled **Always specify `-o`** so results are written to a known location: ```bash -opentaint agent test-rules ./agent-test-compiled \ - -o ./agent-test-results \ - --ruleset builtin --ruleset ./agent-rules +opentaint dev test-rules .opentaint/test-compiled \ + -o .opentaint/test-results \ + --ruleset builtin --ruleset .opentaint/rules ``` ### 5. Interpret results -Read `./agent-test-results/test-result.json`: +Read `.opentaint/test-results/test-result.json`: - **success**: Test passed (positive triggered, negative didn't) - **falseNegative**: Positive sample did NOT trigger -> rule patterns too narrow @@ -99,7 +108,7 @@ Consequence: the annotated method is only a marker for **which rule to run and t Use a multi-module Gradle build where every `spring-app-tests/` directory is its own sub-project: ``` -agent-test-project/ +.opentaint/test-project/ ├── settings.gradle.kts ├── build.gradle.kts └── spring-app-tests/ @@ -115,7 +124,7 @@ agent-test-project/ └── SafeSink.java // carries the single @NegativeRuleSample ``` -`settings.gradle.kts` should auto-discover every `spring-app-tests/*/build.gradle.kts` so adding a new case only requires a new directory. See `rules/test/settings.gradle.kts` for a reference implementation. +`settings.gradle.kts` should auto-discover every `spring-app-tests/*/build.gradle.kts` so adding a new case only requires a new directory. See `rules/test/settings.gradle.kts` in the OpenTaint repo for a reference implementation. ### Required dependencies @@ -130,10 +139,10 @@ Each Spring sub-project must pull in at least: Compile and test the multi-module project the same way as a regular test project: ```bash -opentaint compile ./agent-test-project -o ./agent-test-compiled -opentaint agent test-rules ./agent-test-compiled \ - -o ./agent-test-results \ - --ruleset builtin --ruleset ./agent-rules +opentaint compile .opentaint/test-project -o .opentaint/test-compiled +opentaint dev test-rules .opentaint/test-compiled \ + -o .opentaint/test-results \ + --ruleset builtin --ruleset .opentaint/rules ``` Each `spring-app-tests/` sub-project becomes an independent test set and appears as its own entry in `test-result.json`. @@ -149,9 +158,7 @@ Each `spring-app-tests/` sub-project becomes an independent test set and a - `value`: Path to rule YAML file, relative to ruleset root (e.g. `java/security/my-vuln.yaml`) - `id`: Short rule ID within that file (the `id` field from the YAML, e.g. `my-vulnerability`) -**Note**: The annotation `id` field uses the **short** rule ID (as written in the YAML file). -This is different from `--rule-id` in `opentaint scan`, which requires the **full** rule ID -in the format `:` (e.g. `java/security/my-vuln.yaml:my-vulnerability`). +**Note**: The annotation `id` field uses the **short** rule ID (as written in the YAML file). This is different from `--rule-id` in `opentaint scan`, which requires the **full** rule ID in the format `:` (e.g. `java/security/my-vuln.yaml:my-vulnerability`). ## Troubleshooting From f892f0f310242fb0bd278d8023b88645eabbafba Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Thu, 14 May 2026 16:48:58 +0200 Subject: [PATCH 04/54] Fix Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 87c7ad91e..ada6d30ea 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,7 @@ install: core cli printf '%s\n' \ '#!/bin/sh' \ 'set -eu' \ - 'BIN_DIR=$$(CDPATH= cd -- "$$(dirname -- "$$0")" && pwd)' \ + 'BIN_DIR=$$(CDPATH= cd -- "$$(dirname -- "$$(realpath "$$0")")" && pwd)' \ 'PREFIX_DIR=$$(CDPATH= cd -- "$$BIN_DIR/.." && pwd)' \ 'LIB_DIR="$$PREFIX_DIR/lib"' \ 'exec "$$BIN_DIR/$(CLI_BINARY_NAME)" --experimental --analyzer-jar "$$LIB_DIR/$(notdir $(ANALYZER_JAR))" --autobuilder-jar "$$LIB_DIR/$(notdir $(AUTOBUILDER_JAR))" "$$@"' \ From 91ed63cfe4405dc5e2568b3a6f971f18a83c054c Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Wed, 27 May 2026 16:57:47 +0300 Subject: [PATCH 05/54] feat: update all skills, introduce new workflow, new cli commands --- agent-mode/test/conftest.py | 8 +- cli/cmd/command_builder.go | 10 +- cli/cmd/dev_init_test_project.go | 104 +++++--- cli/cmd/dev_test_approximations.go | 62 +++++ cli/cmd/dev_test_rules.go | 224 ++++++++++-------- cli/cmd/scan.go | 16 +- .../example/approximation-rule.yaml | 15 ++ .../example/src/main/java/test/Taint.java | 14 ++ cli/internal/testapprox/testapprox.go | 57 +++++ skills/analyze-external-methods/SKILL.md | 89 +++++++ skills/analyze-findings/SKILL.md | 115 +++------ skills/appsec_agent/SKILL.md | 213 +++++++++++++++++ .../appsec_agent/references/approximations.md | 15 ++ skills/appsec_agent/references/build.md | 5 + .../appsec_agent/references/discover-rules.md | 16 ++ skills/appsec_agent/references/escalation.md | 7 + skills/appsec_agent/references/poc.md | 10 + .../references/reproduce-vulnerability.md | 14 ++ skills/appsec_agent/references/scan.md | 5 + skills/appsec_agent/references/suppress-fp.md | 11 + skills/appsec_agent/references/triage.md | 11 + .../appsec_agent/scripts/sarif-to-findings.py | 166 +++++++++++++ skills/build-project/SKILL.md | 88 +++---- skills/create-approximation/SKILL.md | 114 --------- skills/create-dataflow-approximation/SKILL.md | 121 ++++++++++ .../SKILL.md | 140 +++++++++++ skills/create-rule/SKILL.md | 123 +++++----- skills/create-test-project/SKILL.md | 83 +++++++ .../references/approximation.md | 54 +++++ skills/create-test-project/references/rule.md | 43 ++++ .../references/spring-multimodule.md | 60 +++++ skills/create-yaml-config/SKILL.md | 159 ------------- skills/debug-rule-reachability/SKILL.md | 70 ------ skills/debug-rule/SKILL.md | 76 ++++++ skills/discover-attack-surface/SKILL.md | 103 ++++++++ skills/discover-entry-points/SKILL.md | 54 ----- skills/generate-poc/SKILL.md | 108 +++++---- skills/opentaint-agent/SKILL.md | 166 ------------- skills/opentaint-issue-investigation/SKILL.md | 127 ---------- skills/report-analyzer-issue/SKILL.md | 59 +++++ skills/run-analysis/SKILL.md | 98 -------- skills/run-scan/SKILL.md | 77 ++++++ skills/run-scan/scripts/sarif-to-findings.py | 166 +++++++++++++ skills/test-rule/SKILL.md | 167 ------------- 44 files changed, 2097 insertions(+), 1346 deletions(-) create mode 100644 cli/cmd/dev_test_approximations.go create mode 100644 cli/internal/testapprox/example/approximation-rule.yaml create mode 100644 cli/internal/testapprox/example/src/main/java/test/Taint.java create mode 100644 cli/internal/testapprox/testapprox.go create mode 100644 skills/analyze-external-methods/SKILL.md create mode 100644 skills/appsec_agent/SKILL.md create mode 100644 skills/appsec_agent/references/approximations.md create mode 100644 skills/appsec_agent/references/build.md create mode 100644 skills/appsec_agent/references/discover-rules.md create mode 100644 skills/appsec_agent/references/escalation.md create mode 100644 skills/appsec_agent/references/poc.md create mode 100644 skills/appsec_agent/references/reproduce-vulnerability.md create mode 100644 skills/appsec_agent/references/scan.md create mode 100644 skills/appsec_agent/references/suppress-fp.md create mode 100644 skills/appsec_agent/references/triage.md create mode 100644 skills/appsec_agent/scripts/sarif-to-findings.py delete mode 100644 skills/create-approximation/SKILL.md create mode 100644 skills/create-dataflow-approximation/SKILL.md create mode 100644 skills/create-pass-through-approximation/SKILL.md create mode 100644 skills/create-test-project/SKILL.md create mode 100644 skills/create-test-project/references/approximation.md create mode 100644 skills/create-test-project/references/rule.md create mode 100644 skills/create-test-project/references/spring-multimodule.md delete mode 100644 skills/create-yaml-config/SKILL.md delete mode 100644 skills/debug-rule-reachability/SKILL.md create mode 100644 skills/debug-rule/SKILL.md create mode 100644 skills/discover-attack-surface/SKILL.md delete mode 100644 skills/discover-entry-points/SKILL.md delete mode 100644 skills/opentaint-agent/SKILL.md delete mode 100644 skills/opentaint-issue-investigation/SKILL.md create mode 100644 skills/report-analyzer-issue/SKILL.md delete mode 100644 skills/run-analysis/SKILL.md create mode 100644 skills/run-scan/SKILL.md create mode 100644 skills/run-scan/scripts/sarif-to-findings.py delete mode 100644 skills/test-rule/SKILL.md diff --git a/agent-mode/test/conftest.py b/agent-mode/test/conftest.py index 1a61de157..4fcc07c9f 100644 --- a/agent-mode/test/conftest.py +++ b/agent-mode/test/conftest.py @@ -360,15 +360,15 @@ def sarif_findings_for_rule(data: dict, rule_id: str) -> list: def _derive_external_methods_paths(sarif_path: Path) -> tuple: """Return the two fixed external-methods file paths next to the SARIF. - The analyzer always writes ``external-methods-without-rules.yaml`` and - ``external-methods-with-rules.yaml`` into its output directory. Here we + The analyzer always writes ``dropped-external-methods.yaml`` and + ``approximated-external-methods.yaml`` into its output directory. Here we key off the SARIF path (or its parent directory), matching how the Go CLI routes ``-o`` to ``--output-dir``. """ parent = sarif_path if sarif_path.is_dir() else sarif_path.parent return ( - parent / "external-methods-without-rules.yaml", - parent / "external-methods-with-rules.yaml", + parent / "dropped-external-methods.yaml", + parent / "approximated-external-methods.yaml", ) diff --git a/cli/cmd/command_builder.go b/cli/cmd/command_builder.go index 0e53353dd..6b4fd245e 100644 --- a/cli/cmd/command_builder.go +++ b/cli/cmd/command_builder.go @@ -58,7 +58,7 @@ type AnalyzerBuilder struct { jarPath string maxMemory string ruleIDs []string - approximationsConfig []string + passthroughApproximations []string dataflowApproximations []string trackExternalMethods bool debugFactReachabilitySarif bool @@ -146,8 +146,8 @@ func (a *AnalyzerBuilder) AddRuleID(ruleID string) *AnalyzerBuilder { return a } -func (a *AnalyzerBuilder) AddApproximationsConfig(configPath string) *AnalyzerBuilder { - a.approximationsConfig = append(a.approximationsConfig, configPath) +func (a *AnalyzerBuilder) AddPassthroughApproximations(path string) *AnalyzerBuilder { + a.passthroughApproximations = append(a.passthroughApproximations, path) return a } @@ -249,8 +249,8 @@ func (a *AnalyzerBuilder) BuildNativeCommand() []string { flags = append(flags, "--semgrep-rule-id", ruleID) } - for _, configPath := range a.approximationsConfig { - flags = append(flags, "--approximations-config", configPath) + for _, passthrough := range a.passthroughApproximations { + flags = append(flags, "--passthrough-approximations", passthrough) } for _, approxPath := range a.dataflowApproximations { diff --git a/cli/cmd/dev_init_test_project.go b/cli/cmd/dev_init_test_project.go index d833f3e7f..0d5ac3ce0 100644 --- a/cli/cmd/dev_init_test_project.go +++ b/cli/cmd/dev_init_test_project.go @@ -7,15 +7,17 @@ import ( "path/filepath" "strings" + "github.com/seqra/opentaint/internal/testapprox" "github.com/seqra/opentaint/internal/testutil" "github.com/seqra/opentaint/internal/utils" "github.com/spf13/cobra" ) -var initTestProjectDeps []string +var initRuleProjectDeps []string +var initApproxProjectDeps []string -var devInitTestProjectCmd = &cobra.Command{ - Use: "init-test-project ", +var devInitRuleProjectCmd = &cobra.Command{ + Use: "init-rule-project ", Short: "Bootstrap a rule test project with build.gradle.kts and test utility JAR", Long: `Creates a minimal Gradle project structure for testing OpenTaint rules. @@ -28,49 +30,76 @@ The project includes: Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { - outputDir := args[0] - - // 1. Create directory structure - dirs := []string{ - filepath.Join(outputDir, "libs"), - filepath.Join(outputDir, "src", "main", "java", "test"), - } - for _, d := range dirs { - if err := os.MkdirAll(d, 0o755); err != nil { - out.Fatalf("Failed to create directory %s: %s", d, err) - } - } + bootstrapTestProject(args[0], "opentaint-rule-test", initRuleProjectDeps) + fmt.Printf("Rule test project initialized at %s\n", args[0]) + }, +} - // 2. Resolve and copy opentaint-sast-test-util.jar - testUtilJarSrc, err := resolveTestUtilJar() - if err != nil { - out.Fatalf("Failed to resolve test-util JAR: %s", err) - } - testUtilJarDst := filepath.Join(outputDir, "libs", "opentaint-sast-test-util.jar") - if err := copyFile(testUtilJarSrc, testUtilJarDst); err != nil { - out.Fatalf("Failed to copy test-util JAR: %s", err) - } +var devInitApproximationProjectCmd = &cobra.Command{ + Use: "init-approximation-project ", + Short: "Bootstrap a dataflow approximation test project with the fixed Taint source/sink and rule", + Long: `Creates a minimal Gradle project structure for testing OpenTaint dataflow approximations. - // 3. Generate build.gradle.kts - if err := generateBuildGradle(outputDir, initTestProjectDeps); err != nil { - out.Fatalf("Failed to generate build.gradle.kts: %s", err) - } +The project includes: + - build.gradle.kts with compile-only dependencies + - settings.gradle.kts + - libs/opentaint-sast-test-util.jar (provides @PositiveRuleSample and @NegativeRuleSample annotations) + - approximation-rule.yaml, the fixed source->sink rule the samples are checked against + - src/main/java/test/ with Taint.java (the fixed source() and sink()) for test sample sources + - approximations/src/ directory for the approximation under test - // 4. Generate settings.gradle.kts - if err := generateSettingsGradle(outputDir); err != nil { - out.Fatalf("Failed to generate settings.gradle.kts: %s", err) +Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + bootstrapTestProject(args[0], "approximation-test-project", initApproxProjectDeps) + if err := testapprox.Scaffold(args[0]); err != nil { + out.Fatalf("Failed to scaffold approximation project: %s", err) } - - fmt.Printf("Test project initialized at %s\n", outputDir) + fmt.Printf("Approximation test project initialized at %s\n", args[0]) }, } func init() { - devCmd.AddCommand(devInitTestProjectCmd) - devInitTestProjectCmd.Flags().StringArrayVar(&initTestProjectDeps, "dependency", nil, + devCmd.AddCommand(devInitRuleProjectCmd) + devInitRuleProjectCmd.Flags().StringArrayVar(&initRuleProjectDeps, "dependency", nil, + "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") + + devCmd.AddCommand(devInitApproximationProjectCmd) + devInitApproximationProjectCmd.Flags().StringArrayVar(&initApproxProjectDeps, "dependency", nil, "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") } +// bootstrapTestProject creates the shared Gradle layout (dirs, test-util JAR, build files) +// used by both init-rule-project and init-approximation-project. +func bootstrapTestProject(outputDir, projectName string, dependencies []string) { + dirs := []string{ + filepath.Join(outputDir, "libs"), + filepath.Join(outputDir, "src", "main", "java", "test"), + } + for _, d := range dirs { + if err := os.MkdirAll(d, 0o755); err != nil { + out.Fatalf("Failed to create directory %s: %s", d, err) + } + } + + testUtilJarSrc, err := resolveTestUtilJar() + if err != nil { + out.Fatalf("Failed to resolve test-util JAR: %s", err) + } + testUtilJarDst := filepath.Join(outputDir, "libs", "opentaint-sast-test-util.jar") + if err := copyFile(testUtilJarSrc, testUtilJarDst); err != nil { + out.Fatalf("Failed to copy test-util JAR: %s", err) + } + + if err := generateBuildGradle(outputDir, dependencies); err != nil { + out.Fatalf("Failed to generate build.gradle.kts: %s", err) + } + + if err := generateSettingsGradle(outputDir, projectName); err != nil { + out.Fatalf("Failed to generate settings.gradle.kts: %s", err) + } +} + // resolveTestUtilJar finds the opentaint-sast-test-util.jar. // Resolution order: // 1. Bundled path next to binary: /lib/opentaint-sast-test-util.jar @@ -170,9 +199,8 @@ dependencies { return os.WriteFile(path, []byte(sb.String()), 0o644) } -func generateSettingsGradle(outputDir string) error { - content := `rootProject.name = "opentaint-rule-test" -` +func generateSettingsGradle(outputDir, projectName string) error { + content := fmt.Sprintf("rootProject.name = %q\n", projectName) path := filepath.Join(outputDir, "settings.gradle.kts") return os.WriteFile(path, []byte(content), 0o644) } diff --git a/cli/cmd/dev_test_approximations.go b/cli/cmd/dev_test_approximations.go new file mode 100644 index 000000000..8b54b15c2 --- /dev/null +++ b/cli/cmd/dev_test_approximations.go @@ -0,0 +1,62 @@ +package cmd + +import ( + "os" + "time" + + "github.com/seqra/opentaint/internal/testapprox" + "github.com/spf13/cobra" +) + +var ( + testApproxOutputDir string + testApproxTimeout time.Duration + testApproxMaxMemory string + testApproxDataflow []string +) + +var devTestApproximationsCmd = &cobra.Command{ + Use: "test-approximations ", + Short: "Run rule tests against annotated test samples with approximations applied", + Long: `Run rule tests against annotated test samples with the given approximations applied. + +The fixed source->sink harness rule is applied automatically; samples reference it as +` + "`@PositiveRuleSample(value = \"approximation-rule.yaml\", id = \"approximation-rule\")`" + `. + +Exit codes: + 0 All rule tests passed + 1 General failure (configuration or infrastructure error) + 252 Unhandled analyzer exception + 253 Out of memory (try increasing --max-memory) + 254 Analysis timed out (try increasing --timeout) + 255 Project configuration error`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + ruleDir, err := os.MkdirTemp("", "opentaint-approx-rule-*") + if err != nil { + out.Fatalf("Failed to create temp dir for harness rule: %s", err) + } + if _, err := testapprox.WriteFixedRule(ruleDir); err != nil { + out.Fatalf("Failed to materialize harness rule: %s", err) + } + + runTestProject(args[0], testProjectOptions{ + label: "Approximation tests", + tempDir: "opentaint-test-approximations-*", + rulesets: []string{ruleDir}, + outputDir: testApproxOutputDir, + timeout: testApproxTimeout, + maxMemory: testApproxMaxMemory, + dataflowApprox: testApproxDataflow, + }) + }, +} + +func init() { + devCmd.AddCommand(devTestApproximationsCmd) + + devTestApproximationsCmd.Flags().StringVarP(&testApproxOutputDir, "output", "o", "", "Output directory for test results (test-result.json)") + devTestApproximationsCmd.Flags().DurationVar(&testApproxTimeout, "timeout", 600*time.Second, "Timeout for analysis") + devTestApproximationsCmd.Flags().StringVar(&testApproxMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") + devTestApproximationsCmd.Flags().StringArrayVar(&testApproxDataflow, "dataflow-approximations", nil, "Directory of compiled approximation class files or .java sources (repeatable)") +} diff --git a/cli/cmd/dev_test_rules.go b/cli/cmd/dev_test_rules.go index c0e762b35..046d2b30b 100644 --- a/cli/cmd/dev_test_rules.go +++ b/cli/cmd/dev_test_rules.go @@ -35,111 +35,145 @@ Exit codes: 255 Project configuration error`, Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { - projectPath := log.AbsPathOrExit(args[0], "project-model") - nativeProjectPath := filepath.Join(projectPath, "project.yaml") - - if _, err := os.Stat(nativeProjectPath); os.IsNotExist(err) { - out.Fatalf("Project model not found: %s", nativeProjectPath) - } - - // Validate max-memory - maxMemory, err := utils.ParseMemoryValue(testRulesMaxMemory) - if err != nil { - out.Fatalf("Invalid --max-memory value: %s", err) - } - - // Resolve output directory - outputDir := testRulesOutputDir - if outputDir == "" { - tmpDir, err := os.MkdirTemp("", "opentaint-test-rules-*") - if err != nil { - out.Fatalf("Failed to create temp dir: %s", err) - } - outputDir = tmpDir - // Note: temp dir is NOT cleaned up so results remain accessible to the agent. - // The agent should always specify -o to control the output location. - } else { - outputDir = log.AbsPathOrExit(outputDir, "output") - if err := os.MkdirAll(outputDir, 0755); err != nil { - out.Fatalf("Failed to create output directory: %s", err) - } - } - - // Ensure builtin rules are available - rulesPath, err := utils.GetRulesPath(globals.Config.Rules.Version) - if err != nil { - out.Fatalf("Failed to resolve rules path: %s", err) - } - if _, err := os.Stat(rulesPath); os.IsNotExist(err) { - if dlErr := utils.DownloadAndUnpackGithubReleaseAsset( - globals.Config.Owner, globals.Config.Repo, - globals.Config.Rules.Version, globals.RulesAssetName, - rulesPath, globals.Config.Github.Token, - globals.Config.SkipVerify, out, - ); dlErr != nil { - out.Fatalf("Failed to download rules: %s", dlErr) - } - } - - timeoutSeconds := int64(testRulesTimeout / time.Second) - if timeoutSeconds <= 0 { - timeoutSeconds = 600 - } + runTestProject(args[0], testProjectOptions{ + label: "Rule tests", + tempDir: "opentaint-test-rules-*", + rulesets: testRulesRuleset, + outputDir: testRulesOutputDir, + timeout: testRulesTimeout, + maxMemory: testRulesMaxMemory, + ruleIDs: testRulesRuleID, + }) + }, +} - builder := NewAnalyzerBuilder(). - SetProject(nativeProjectPath). - SetOutputDir(outputDir). - SetSarifFileName("test-results.sarif"). - SetIfdsAnalysisTimeout(timeoutSeconds). - AddRuleSet(rulesPath). - EnableRunRuleTests() +// testProjectOptions holds the inputs shared by `dev test-rules` and `dev test-approximations`. +type testProjectOptions struct { + label string + tempDir string + rulesets []string + outputDir string + timeout time.Duration + maxMemory string + ruleIDs []string + dataflowApprox []string +} - if maxMemory != "" { - builder.SetMaxMemory(maxMemory) - } +func runTestProject(projectModelArg string, opts testProjectOptions) { + projectPath := log.AbsPathOrExit(projectModelArg, "project-model") + nativeProjectPath := filepath.Join(projectPath, "project.yaml") - // Add user rulesets - for _, rs := range testRulesRuleset { - absPath := log.AbsPathOrExit(rs, "ruleset") - builder.AddRuleSet(absPath) - } + if _, err := os.Stat(nativeProjectPath); os.IsNotExist(err) { + out.Fatalf("Project model not found: %s", nativeProjectPath) + } - // Add rule ID filters - for _, ruleID := range testRulesRuleID { - builder.AddRuleID(ruleID) - } + // Validate max-memory + maxMemory, err := utils.ParseMemoryValue(opts.maxMemory) + if err != nil { + out.Fatalf("Invalid --max-memory value: %s", err) + } - analyzerJarPath, err := ensureAnalyzerAvailable() + // Resolve output directory + outputDir := opts.outputDir + if outputDir == "" { + tmpDir, err := os.MkdirTemp("", opts.tempDir) if err != nil { - out.Fatalf("Failed to resolve analyzer: %s", err) + out.Fatalf("Failed to create temp dir: %s", err) } - builder.SetJarPath(analyzerJarPath) - - javaRunner := java.NewJavaRunner(). - WithSkipVerify(globals.Config.SkipVerify). - WithDebugOutput(out.DebugStream("Analyzer")). - WithImageType(java.AdoptiumImageJRE). - TrySpecificVersion(globals.DefaultJavaVersion) - if _, err := javaRunner.EnsureJava(); err != nil { - out.Fatalf("Failed to resolve Java: %s", err) + outputDir = tmpDir + // Note: temp dir is NOT cleaned up so results remain accessible to the agent. + // The agent should always specify -o to control the output location. + } else { + outputDir = log.AbsPathOrExit(outputDir, "output") + if err := os.MkdirAll(outputDir, 0755); err != nil { + out.Fatalf("Failed to create output directory: %s", err) } - - cmdErr, err := scanProject(builder, javaRunner) - if err != nil { - out.Fatalf("Rule tests failed: %s", err) + } + + // Ensure builtin rules are available + rulesPath, err := utils.GetRulesPath(globals.Config.Rules.Version) + if err != nil { + out.Fatalf("Failed to resolve rules path: %s", err) + } + if _, err := os.Stat(rulesPath); os.IsNotExist(err) { + if dlErr := utils.DownloadAndUnpackGithubReleaseAsset( + globals.Config.Owner, globals.Config.Repo, + globals.Config.Rules.Version, globals.RulesAssetName, + rulesPath, globals.Config.Github.Token, + globals.Config.SkipVerify, out, + ); dlErr != nil { + out.Fatalf("Failed to download rules: %s", dlErr) } - analyzerFail := classifyAnalyzerError(cmdErr) - - // Always print output paths so the agent can inspect partial results - fmt.Printf("Results directory: %s\n", outputDir) - fmt.Printf("Test results: %s\n", filepath.Join(outputDir, "test-result.json")) - - if analyzerFail != nil { - os.Exit(analyzerFail.exitCode) + } + + timeoutSeconds := int64(opts.timeout / time.Second) + if timeoutSeconds <= 0 { + timeoutSeconds = 600 + } + + builder := NewAnalyzerBuilder(). + SetProject(nativeProjectPath). + SetOutputDir(outputDir). + SetSarifFileName("test-results.sarif"). + SetIfdsAnalysisTimeout(timeoutSeconds). + AddRuleSet(rulesPath). + EnableRunRuleTests() + + if maxMemory != "" { + builder.SetMaxMemory(maxMemory) + } + + // Add user rulesets + for _, rs := range opts.rulesets { + absPath := log.AbsPathOrExit(rs, "ruleset") + builder.AddRuleSet(absPath) + } + + // Add rule ID filters + for _, ruleID := range opts.ruleIDs { + builder.AddRuleID(ruleID) + } + + analyzerJarPath, err := ensureAnalyzerAvailable() + if err != nil { + out.Fatalf("Failed to resolve analyzer: %s", err) + } + builder.SetJarPath(analyzerJarPath) + + // Auto-compile .java sources in a --dataflow-approximations dir, as `scan` does. + for _, approxPath := range opts.dataflowApprox { + absApproxPath := log.AbsPathOrExit(approxPath, "dataflow-approximations") + compiledPath, compileErr := compileApproximationsIfNeeded(absApproxPath, analyzerJarPath, projectPath) + if compileErr != nil { + out.Fatalf("Approximation compilation failed: %s", compileErr) } - - fmt.Printf("Rule tests completed successfully\n") - }, + builder.AddDataflowApproximations(compiledPath) + } + + javaRunner := java.NewJavaRunner(). + WithSkipVerify(globals.Config.SkipVerify). + WithDebugOutput(out.DebugStream("Analyzer")). + WithImageType(java.AdoptiumImageJRE). + TrySpecificVersion(globals.DefaultJavaVersion) + if _, err := javaRunner.EnsureJava(); err != nil { + out.Fatalf("Failed to resolve Java: %s", err) + } + + cmdErr, err := scanProject(builder, javaRunner) + if err != nil { + out.Fatalf("%s failed: %s", opts.label, err) + } + analyzerFail := classifyAnalyzerError(cmdErr) + + // Always print output paths so the agent can inspect partial results + fmt.Printf("Results directory: %s\n", outputDir) + fmt.Printf("Test results: %s\n", filepath.Join(outputDir, "test-result.json")) + + if analyzerFail != nil { + os.Exit(analyzerFail.exitCode) + } + + fmt.Printf("%s completed successfully\n", opts.label) } func init() { diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index b50a67db4..c3643e643 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -34,7 +34,7 @@ var ( Recompile bool ScanLogFile string RuleID []string - ApproximationsConfig []string + PassthroughApproximations []string DataflowApproximations []string TrackExternalMethods bool DebugFactReachabilitySarif bool @@ -143,13 +143,13 @@ func addScanFlags(cmd *cobra.Command) { cmd.Flags().StringVar(&ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)") cmd.Flags().StringVar(&ScanLogFile, "log-file", "", "Path to the log file (default: /logs/.log)") - cmd.Flags().StringArrayVar(&ApproximationsConfig, "approximations-config", nil, "YAML passThrough approximations config (OVERRIDE mode, repeatable)") - _ = cmd.Flags().MarkHidden("approximations-config") + cmd.Flags().StringArrayVar(&PassthroughApproximations, "passthrough-approximations", nil, "passThrough approximation YAML file or directory of them (OVERRIDE mode, repeatable)") + _ = cmd.Flags().MarkHidden("passthrough-approximations") - cmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Directory of compiled approximation class files (repeatable)") + cmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Directory of compiled approximation class files or .java sources (repeatable)") _ = cmd.Flags().MarkHidden("dataflow-approximations") - cmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write external-methods-{without,with}-rules.yaml next to the SARIF report") + cmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write dropped-external-methods.yaml and approximated-external-methods.yaml next to the SARIF report") _ = cmd.Flags().MarkHidden("track-external-methods") } @@ -369,9 +369,9 @@ func scan(cmd *cobra.Command) { for _, ruleID := range RuleID { nativeBuilder.AddRuleID(ruleID) } - for _, approxConfig := range ApproximationsConfig { - absApproxConfig := log.AbsPathOrExit(approxConfig, "approximations-config") - nativeBuilder.AddApproximationsConfig(absApproxConfig) + for _, passthrough := range PassthroughApproximations { + absPassthrough := log.AbsPathOrExit(passthrough, "passthrough-approximations") + nativeBuilder.AddPassthroughApproximations(absPassthrough) } if TrackExternalMethods { nativeBuilder.SetTrackExternalMethods(true) diff --git a/cli/internal/testapprox/example/approximation-rule.yaml b/cli/internal/testapprox/example/approximation-rule.yaml new file mode 100644 index 000000000..0f0ce705a --- /dev/null +++ b/cli/internal/testapprox/example/approximation-rule.yaml @@ -0,0 +1,15 @@ +rules: + - id: approximation-rule + severity: ERROR + message: Tainted value from Taint.source() reached Taint.sink() through an approximated method + metadata: + short-description: Approximation test source-to-sink flow + languages: + - java + mode: taint + pattern-sources: + - pattern: test.Taint.source() + pattern-sinks: + - patterns: + - pattern: test.Taint.sink($VALUE) + - focus-metavariable: $VALUE diff --git a/cli/internal/testapprox/example/src/main/java/test/Taint.java b/cli/internal/testapprox/example/src/main/java/test/Taint.java new file mode 100644 index 000000000..3dede3dc8 --- /dev/null +++ b/cli/internal/testapprox/example/src/main/java/test/Taint.java @@ -0,0 +1,14 @@ +package test; + +public final class Taint { + + private Taint() { + } + + public static String source() { + return ""; + } + + public static void sink(String value) { + } +} diff --git a/cli/internal/testapprox/testapprox.go b/cli/internal/testapprox/testapprox.go new file mode 100644 index 000000000..4dc8f8189 --- /dev/null +++ b/cli/internal/testapprox/testapprox.go @@ -0,0 +1,57 @@ +// Package testapprox bundles the fixed source->sink rule the `opentaint dev test-approximations` +// harness applies, and the Taint source/sink helper scaffolded into an approximation test project. +package testapprox + +import ( + _ "embed" + "fmt" + "os" + "path/filepath" +) + +// FixedRuleFileName is the rule's path relative to the ruleset root, and the value +// samples reference in @PositiveRuleSample/@NegativeRuleSample. +const FixedRuleFileName = "approximation-rule.yaml" + +// ApproximationsSrcDir is the source root, relative to the project, where the agent writes the +// approximation under test and which is passed to test-approximations as --dataflow-approximations. +const ApproximationsSrcDir = "approximations/src" + +//go:embed example/approximation-rule.yaml +var fixedRule []byte + +//go:embed example/src/main/java/test/Taint.java +var taintJava []byte + +// WriteFixedRule writes the fixed harness rule into dir and returns its path. Used by +// test-approximations to apply the rule automatically from a throwaway ruleset directory. +func WriteFixedRule(dir string) (string, error) { + path := filepath.Join(dir, FixedRuleFileName) + if err := os.WriteFile(path, fixedRule, 0o644); err != nil { + return "", fmt.Errorf("write fixed approximation rule: %w", err) + } + return path, nil +} + +// Scaffold writes the fixed rule (for reference — test-approximations applies its own bundled copy) +// and the Taint source/sink helper, and creates the approximations source dir for the agent to write +// the approximation under test into. Samples and the approximation itself are the agent's to write. +func Scaffold(projectDir string) error { + files := map[string][]byte{ + filepath.Join(projectDir, FixedRuleFileName): fixedRule, + filepath.Join(projectDir, "src", "main", "java", "test", "Taint.java"): taintJava, + } + for path, content := range files { + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return fmt.Errorf("create %s: %w", filepath.Dir(path), err) + } + if err := os.WriteFile(path, content, 0o644); err != nil { + return fmt.Errorf("write %s: %w", filepath.Base(path), err) + } + } + approxDir := filepath.Join(projectDir, filepath.FromSlash(ApproximationsSrcDir), "approx") + if err := os.MkdirAll(approxDir, 0o755); err != nil { + return fmt.Errorf("create %s: %w", approxDir, err) + } + return nil +} diff --git a/skills/analyze-external-methods/SKILL.md b/skills/analyze-external-methods/SKILL.md new file mode 100644 index 000000000..aa1db364c --- /dev/null +++ b/skills/analyze-external-methods/SKILL.md @@ -0,0 +1,89 @@ +--- +name: analyze-external-methods +description: Analyze and group an OpenTaint scan's dropped external methods and decide what to approximate or skip. Use when a dropped-external-methods.yaml needs turning into approximation targets +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# Skill: Analyze External Methods + +Read the methods where the analyzer killed taint, group them by library and kind, and record per group what to model and how — so the right skill can build each approximation + +## Inputs + +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default + +- Dropped methods `` — methods where the analyzer killed dataflow facts for lack of a model. Default: `.opentaint/results/dropped-external-methods.yaml` +- Tracking directory `` — where approximation tracking files are written. Default: `.opentaint/tracking` +- Project root `` — sources and build files, to resolve which library owns each method. Default: current directory + +## Workflow + +Requires ``, without it there's nothing to group + +### 1. Group by package and kind + +Every method in `` is a taint-killing path — model all of them. First decide each method's kind: + +- passthrough — taint moves by a simple from→to copy: a getter, arg→result, builder, container field, collection `add`/`get`, `StringBuilder.append`, `Stream.collect` +- dataflow — taint flows through a lambda/callback/functional interface or an async chain + +Group by package AND kind — one tracking file per (package, kind): `-passthrough.yaml` for the simple copies, `-dataflow.yaml` for the lambda/callback/async ones. The two kinds are built by different skills with different stages, so they're separate units; kind is the only split (no finer sub-groups). Each unit is one agent's work + +### 2. Flag methods to skip + +The one exception: a few methods the engine asks about don't carry taint — logging, metrics, sanitizers (e.g. `org.slf4j.Logger#info`). List those in `skipped.yaml` instead of an approximation group; the default call-to-return behavior is already correct for them + +## Output + +- One `/approximations/-.yaml` per (package, kind), with `stages.description: done` and its `methods` (each `target` + `type`); a dataflow unit also carries `dependencies` (the library's exact Maven GAV its test project needs) +- `/approximations/skipped.yaml` listing the skip methods +- A brief summary to the caller: one line per unit (package, kind, method count) plus the skip count. Don't paste the method lists back — the tracking files hold them + +## Tracking + +Create one file per (package, kind); fill only the discovery-stage fields. The two kinds differ — passThrough is written and verified by the scan, dataflow is built and tested on a test project: + +```yaml +# -passthrough.yaml — simple copies, no test project +package: com.foo +stages: + description: done + written: pending +notes: > + DTO getters returning tainted fields +methods: + - target: "com.foo.Wrapper#getValue" + type: passthrough +``` + +```yaml +# -dataflow.yaml — lambda/callback/async, tested on a test project +package: com.foo +dependencies: # exact GAV the test project needs, from the build files + - com.foo:foo-core:1.2.3 +stages: + description: done + test_project: pending + tests_passing: pending +notes: > + Reactor operators carrying taint through the mapper +methods: + - target: "com.foo.Reactor#flatMap" + type: dataflow +``` + +```yaml +# skipped.yaml — engine asks to approximate these, but they don't carry taint +methods: + - "org.slf4j.Logger#info" + - "org.slf4j.Logger#debug" +``` + +## Gotchas + +- Model every method in `` — each is a real taint-killing path; don't second-guess the list. The only exceptions are the obvious non-taint methods you move to `skipped.yaml` +- Approximate only external library methods — never an application-internal class. An internal method that drops taint is a rule or engine matter, not an approximation target; if one shows up as a candidate, drop it +- One file = one (package, kind) = one agent: passThrough and dataflow go in separate files — different skills, different stages; never put a method in two, or two agents collide diff --git a/skills/analyze-findings/SKILL.md b/skills/analyze-findings/SKILL.md index e7ad5329e..30cce6589 100644 --- a/skills/analyze-findings/SKILL.md +++ b/skills/analyze-findings/SKILL.md @@ -1,105 +1,68 @@ --- name: analyze-findings -description: Triage OpenTaint scan results — classify each finding as true positive, fixable false positive, or false negative — and pick the next action. Use when a SARIF report and external-methods YAMLs are available. +description: Triage OpenTaint findings — split a rule's results into distinct vulnerabilities and classify each true positive or false positive. Use when scan findings need a TP/FP verdict license: Apache-2.0 metadata: author: opentaint - version: "0.1" + version: "0.2" --- # Skill: Analyze Findings -Interpret SARIF findings and the external methods list to classify results and plan next actions. +A finding file bundles all of one rule's results. Read each result's code flow, split the bundle into distinct vulnerabilities, and give each a TP/FP verdict on its own evidence -## Prerequisites +## Inputs -- Analysis run complete (run-analysis skill) -- SARIF report and external methods YAML available +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default -## Procedure +- Findings to triage `` — the finding tracking file(s); each bundles all of one rule's SARIF results in `sarif_hashes` +- SARIF report `` — the raw scan output holding the code-flow traces. Default: `.opentaint/results/report.sarif` -### 1. Read SARIF findings +## Workflow -For each finding in `runs[0].results[]`: -- `ruleId`: Which rule triggered -- `locations[]`: Sink location (file, line) -- `codeFlows[]`: Taint trace from source to sink +### 1. One result at a time — STOP checklist -Read the trace: -- First location = **source** (where tainted data enters) -- Last location = **sink** (where tainted data is used dangerously) -- Intermediate locations = dataflow path +For each hash in the bundle, before any verdict: -### 2. Classify each finding +- found its SARIF result via `sarif_hashes` and read the raw `codeFlows[]` +- walk every step, source → hops → sink, confirming it's the same tainted value end to end +- judging each result on its own trace — no verdict shared across results just because they share the rule -**TRUE POSITIVE (TP)**: Real vulnerability. -- Source genuinely provides attacker-controlled data -- Sink genuinely performs a dangerous operation -- No sanitization between source and sink -- **Action**: Generate PoC (generate-poc skill), document in `.opentaint/vulnerabilities.md` -- **Report as**: rule ID, CWE (from `runs[0].tool.driver.rules[].properties.cwe`), severity, source/sink locations, brief trace +### 2. Split the bundle into logical findings -**FALSE POSITIVE — fixable via Rule**: Over-broad pattern matching. -- Sink pattern too broad, sanitizer not recognized, source matches non-attacker data -- **Action**: Add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or narrow `metavariable-regex`. Update tests. Re-run. -- **Report as**: `suggested fix kind: pattern-not` or `pattern-sanitizers` (pick the most applicable) +The results in the file all fired one rule, but may be several different vulnerabilities. Keep results that are the same vulnerability (same sink, same essential flow) together as one finding; move genuinely distinct ones (different sink, or a different flow) into their own finding file with a new `finding_name` and their `sarif_hashes` -**FALSE POSITIVE — fixable via Approximation** (non-preferred): Imprecise taint propagation through a library method. -- Library method modeled as propagating taint when it actually neutralizes the threat -- **Action**: Override passThrough approximation. Re-run. -- **Report as**: `suggested fix kind: passThrough override` +### 3. Classify and record -### 3. Process external methods (FN discovery) +Verdict each logical finding from its flow: -The `--track-external-methods` flag produces two files next to the SARIF report: -- **`.opentaint/results/external-methods-without-rules.yaml`** — Methods where the analyzer **killed dataflow facts** (no approximation model). **This is the only list worth approximating.** Every false negative caused by a missing library model is rooted here. -- **`.opentaint/results/external-methods-with-rules.yaml`** — Methods that already have an approximation model. Do NOT target these with custom approximations or YAML `passThrough` rules — you would OVERRIDE an existing model, which is usually a regression. +- TP — the source is attacker-controlled, the sink is genuinely dangerous with that input, and nothing sanitizes it in between +- FP — a sanitizer/validator neutralizes it, the source isn't actually attacker-controlled (config, constant, server-set), the sink is safe for this input (parameterized, escaped), or the path is infeasible. Record which one, so the suppress-FP stage knows what to narrow -Filenames and directory are fixed; the flag is a boolean. +Set `verdict` and append the reasoning to `notes`, below the analyzer report already seeded there. Leave `poc` for generate-poc -**Approximation scope — hard rules**: -- Only methods listed in `external-methods-without-rules.yaml` are candidates for a new YAML `passThrough` rule or a code-based approximation. -- Methods not listed in either file were never reached on a tainted path during the scan; approximating them is a no-op until that changes (different sources/rules/entry points). -- Application-internal methods are never in these lists — approximations don't apply to them. Fix those via rule patterns, not approximations. +## Output -Read `external-methods-without-rules.yaml`. **Prioritize generic data-flow propagators** over vulnerability-specific methods. The most common cause of killed facts is mundane collection/utility methods, not the vulnerability-relevant operations themselves. +- Each logical finding in its own file with `verdict` set and the rationale in `notes` +- A brief summary to the caller: one line per finding — name, verdict, one-clause reason -**HIGH PRIORITY — Generic propagators** (affect ALL vulnerability types): -- Collection operations: `List.add`/`List.get`, `Map.put`/`Map.get`, `Set.add`/`Set.iterator` -- String operations: `StringBuilder.append`/`toString`, `StringBuffer.append` -- Wrapper/DTO getters/setters: `Container.getValue`, `Pair.getFirst` -- Stream/iterator methods: `Iterator.next`, `Stream.collect` -- **Action**: Create `passThrough` YAML rules (create-yaml-config skill) +## Tracking -**MEDIUM PRIORITY — Lambda/callback methods**: -- Example: `ReactiveStream#map(Function)` — taint flows through the function -- Example: `CompletableFuture#thenApply(Function)` — async propagation -- **Action**: Create code-based approximation (create-approximation skill) +Editing an existing finding touches only `verdict` and `notes`. A split also creates a new finding file — give it the full shape, copying `rule_id` from the bundle and moving over the results' `sarif_hashes` and their analyzer report: -**LOW PRIORITY — Vulnerability-specific methods**: -- These are usually already modeled in built-in rules. Only add if missing. -- **Action**: Check `external-methods-with-rules.yaml` first; if present, skip. +```yaml +finding_name: # a fresh docker-like name for the split-off vuln +sarif_hashes: [, ...] # hashes matching this logical vulnerability +rule_id: java/security/sqli.yaml:sqli # same rule as the bundle it came from +verdict: TP # pending | TP | FP +notes: > + + triage: @RequestParam orderBy is attacker-controlled; reaches ${} in SelectProvider unsanitized → TP +poc: pending +poc_script: null +``` -**NEUTRAL**: Irrelevant to taint flow (logging, metrics, sanitizers). -- **Action**: Skip — default call-to-return passthrough is correct +## Gotchas -### 4. Batch processing - -- Filter `external-methods-without-rules.yaml` to methods on a plausible source→sink path for the current vulnerability class; approximating methods that sit outside that path wastes iteration time. -- Group the filtered methods by package/library -- **Start with generic propagators** (collections, strings, wrappers) — they affect all rules -- Check built-in coverage first (many common libraries already have approximations — cross-check against `external-methods-with-rules.yaml`) -- Generate comprehensive rules per library -- Re-run with `--track-external-methods` after each batch; verify the approximated methods actually moved from `without-rules` to `with-rules`, and check for finding regressions - -## Decision Priorities - -- **FN fixes**: (1) YAML passThrough rule, (2) Code-based approximation (lambdas only), (3) Rule pattern fix -- **FP fixes**: (1) Rule fix via `pattern-not`/`pattern-sanitizers` (preferred), (2) PassThrough override (non-preferred) - -## Stop Condition - -Stop iterating when: -- External methods list stabilizes (no new methods appear) -- All SARIF findings are classified as TP or resolved FP -- High-priority vulnerabilities have PoCs +- Bulk verdicts are the most common triage error — many results under one shared rationale with the traces unread. One trace, one judgment +- A rule's bundle is not one finding — split distinct vulnerabilities apart, but keep true duplicates (same sink and flow) together as one finding with multiple `sarif_hashes` diff --git a/skills/appsec_agent/SKILL.md b/skills/appsec_agent/SKILL.md new file mode 100644 index 000000000..6d30f05e7 --- /dev/null +++ b/skills/appsec_agent/SKILL.md @@ -0,0 +1,213 @@ +--- +name: appsec_agent +description: Run an end-to-end application-security analysis on a JVM project with OpenTaint — build, scan, model missing library methods, triage, and confirm vulnerabilities. Use when the user asks to find vulnerabilities, run SAST, or scan a Java/Kotlin app for security issues +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# AppSec Agent + +Orchestrate an end-to-end OpenTaint analysis of a JVM project: run the workflow the user picks by dispatching each step to a subagent that loads one leaf skill, verifying the artifact it returns, and tracking progress. The leaf work is never done here. OpenTaint is a dataflow (taint) SAST analyzer; the goal is real, confirmed vulnerabilities. + +The run is one pipeline of a few steps, each gated by the chosen workflow; a step's detail lives in a reference loaded when you reach it, while what every workflow shares stays in this file. Default to the current directory when no target is named. + +Keep every artifact under one `.opentaint/` directory at the project root — models, rules, configs, approximations, test projects, results, tracking, PoCs, reports. Don't scatter files outside it. + +## Setup + +Run `opentaint dev rules-path` once to learn the built-in rules directory; built-ins always load, custom rules go under `.opentaint/rules`. + +## Choose a workflow + +Begin by asking the user which workflow to run — a single AskUserQuestion offering the presets only, each option's description giving its composition: + +- fast — scan: lite, triage: static +- default — scan: normal, triage: static, suppress-FP: optional +- ultra — scan: deep, triage: dynamic, suppress-FP: on +- reproduce-vulnerability — anchored on a vulnerability the user asserts exists; deep scan + dynamic triage + +The tool adds an Other choice; if the user takes it, ask for any custom steps — a custom combination of scan level (lite/normal/deep), triage level (static/dynamic), and suppress-FP (on/off). Record the resolved levels in `state.yaml`. + +Levels, once chosen: + +- scan — lite (build + scan with existing rules) · normal (+ approximation iteration) · deep (+ discover-attack-surface + new rules, fixed first) +- triage — static (classify from the model) · dynamic (+ a PoC per confirmed TP) +- suppress-FP — a post-triage stage that fixes confirmed false positives on rules you own + +The run is one fixed pipeline; the levels decide which steps execute. Walk it top to bottom — when you reach a step your levels include, load its reference and do it; skip the bracketed steps your levels omit. Don't load a step's reference until you reach it. + +``` +build → references/build.md every level +[deep] discover + new rules → references/discover-rules.md deep +scan → references/scan.md every level +[normal/deep] approximation iteration → references/approximations.md normal, deep +triage (generate findings + classify) → references/triage.md every level +[suppress-FP] → references/suppress-fp.md when suppress-FP is on +[dynamic] PoC + assemble vulnerabilities → references/poc.md dynamic +``` + +Which steps each preset runs: + +- fast — build, scan, triage +- default — build, scan, approximations, triage, [suppress-FP] +- ultra — build, discover-rules, scan, approximations, triage, suppress-FP, poc +- reproduce-vulnerability — references/reproduce-vulnerability.md walks the same steps anchored on the asserted vuln + +From inside any step, when a rule or approximation won't behave, load references/escalation.md. Only the approximation iteration loops (it re-scans internally); new rules are fixed before it. + +## Delegation + +Every block's work runs in subagents. Dispatch each with this template: + +``` +Invoke the Skill tool with skill_id= first, then do the task. +Inputs: + : # one line per input the skill lists +Return: + , plus the exact command you ran to verify +Do not run `opentaint scan`. Do not write `.opentaint/vulnerabilities.md`. +``` + +Universal rules — every dispatch, every workflow: + +- open the prompt with the Skill-load line — the subagent has none of this context until it loads its skill +- pass resolved paths (the ``-keyed `.opentaint/...` paths from Working directory layout), never the placeholder tokens +- read the named output artifact yourself before continuing — a claim is not an artifact +- only the scan agent (run-scan) runs `opentaint scan`; no rule, approximation, or triage subagent scans +- only you write `.opentaint/vulnerabilities.md` and `.opentaint/tracking/state.yaml` +- never swap the project model mid-analysis; every run uses the same model +- never triage yourself — verdicts come only from analyze-findings subagents + +Orchestration practices: + +- one unit, one subagent — rules, approximation units, and finding files are independent (unique `` paths), so dispatch them as a parallel fan-out, no races +- the sole sequential exception is PoC (shared app state and ports); see references/poc.md +- write `state.yaml` at each fan-out join — a phase flips to `done` only once every unit's artifact exists on disk + +## State and resumption + +You are the only writer of `.opentaint/tracking/state.yaml` — it records the chosen levels and every phase's status, written after each fan-out join. + +On start, and after any compaction, reconstruct position from artifacts before doing anything — never replay a completed phase: + +- read `state.yaml` and the `tracking/` tree +- skip any phase whose artifact exists: `project.yaml` → build; `report.sarif` → scan; a rule's `artifact` + `tests_passing: done` → that rule; an approximation unit's `artifact` (plus `tests_passing` for dataflow) → that unit; a finding with `verdict` set → triaged; with `poc` set → PoC'd +- detect new work from artifacts, not memory: finding files with `verdict: pending` (a fresh or reset scan) → triage; methods in `dropped-external-methods.yaml` not yet in any approximation unit → approximations + +## Tracking layout + +The single source of truth for the tracking schema; each skill writes only its own slice (named in its block reference). + +``` +.opentaint/tracking/ + state.yaml # you only — levels + phase status + findings/.yaml # one per logical finding (from the SARIF→finding script; split by triage) + rules/.yaml # one per rule + approximations/-passthrough.yaml # simple from→to copies; write-only, scan-verified + approximations/-dataflow.yaml # lambda/callback/async; tested on a test project + approximations/skipped.yaml # methods the engine asks for but that carry no taint +``` + +state.yaml: + +```yaml +mode: ultra # fast | default | ultra | reproduce-vulnerability | custom +scan_level: deep # lite | normal | deep +triage_level: dynamic # static | dynamic +suppress_fp: true +phases: # pending | in_progress | done + build: done + discover: done # deep only + rules: done # deep only; fixed first + scan: done + approximations: in_progress # normal/deep; iterative, rescans within + triage: pending + suppress_fp: pending # after triage + poc: pending # dynamic triage +``` + +findings/.yaml — created by the SARIF→finding script; `verdict`/`notes` by analyze-findings; `poc`/`poc_script` by generate-poc: + +```yaml +finding_name: brave-hopper +sarif_hashes: [, ...] +rule_id: java/security/sqli.yaml:sqli +verdict: pending # pending | TP | FP +notes: > # analyzer report, then triage and PoC notes + +poc: pending # pending | confirmed | failed +poc_script: null # path under .opentaint/pocs/ once generate-poc writes one +``` + +rules/.yaml — created by discover-attack-surface (`description`); `test_project` by create-test-project; `tests_passing` + `rule_id` + `artifact` by create-rule: + +```yaml +name: mybatis-sqli +rule_id: null # filled on creation +artifact: null # added once the rule file exists +finding: null # finding_name; non-null only for suppress-FP +requirements: > + CWE-89 SQLi via MyBatis ${} ; source @RequestParam orderBy ; sink ${} in SelectProvider +dependencies: [org.mybatis:mybatis:3.5.13] +stages: # pending | in_progress | done + description: done + test_project: pending + tests_passing: pending +notes: > + free-form +``` + +approximations/-.yaml — created by analyze-external-methods (`description` + `methods`); the stages differ by kind: + +```yaml +package: com.foo +artifact: null # added once the file exists +stages: + description: done + written: pending # passthrough only (write-only, scan-verified) + # test_project / tests_passing # dataflow only (built and tested) +# dependencies: [...] # dataflow only — the GAVs its test project needs +methods: + - target: "com.foo.Wrapper#getValue" + type: passthrough # passthrough | dataflow (matches the file kind) +notes: > + free-form +``` + +approximations/skipped.yaml: + +```yaml +methods: # engine asks to approximate these, but they carry no taint + - "org.slf4j.Logger#info" +``` + +## Working directory layout + +``` +/.opentaint/ + project/ # built project model (project.yaml) + rules/java/{lib/generic,lib/spring,security}/ # custom rules + config/.yaml # passThrough approximation configs + approximations/src// # code-based (dataflow) approximation sources + test-projects// # per-unit test project sources + test-compiled// # per-unit compiled test model + test-results// # per-unit test outputs + results/ + report.sarif + dropped-external-methods.yaml # taint-killing methods → approximate + approximated-external-methods.yaml # already modeled + pocs/.py # PoC scripts + issues/.md # engine-issue reports + tracking/ # see Tracking layout + vulnerabilities.md # you assemble this from confirmed findings +``` + +## Key constraints + +- approximations apply only to external library methods — never an application-internal class +- `--passthrough-approximations` merges with built-ins at the rule level; a provided rule overrides a built-in only when it matches one already there — it does not replace the built-in set +- both approximation dir flags walk the tree recursively, so the final scan points at the parent dirs and applies every unit +- `--rule-id` drops every rule not named, including library `refs` — list them all when restricting +- a custom approximation targeting a class that already has a built-in one errors at load diff --git a/skills/appsec_agent/references/approximations.md b/skills/appsec_agent/references/approximations.md new file mode 100644 index 000000000..b7b5b53b7 --- /dev/null +++ b/skills/appsec_agent/references/approximations.md @@ -0,0 +1,15 @@ +# Approximation iteration + +The step that models the library methods killing taint, run on normal and deep after the first scan, looping to stabilization. The rescans are part of this block — load references/scan.md for each. Dispatch per the Delegate template in SKILL.md. + +Loop until stabilization: + +1. analyze-external-methods — Inputs: dropped-file `.opentaint/results/dropped-external-methods.yaml`, tracking-dir `.opentaint/tracking`, ``. Writes one `approximations/-passthrough.yaml` and/or `-dataflow.yaml` per package, plus `skipped.yaml`, only for methods not already in a unit. Returns one line per unit +2. Fan out per unit: + - passthrough → create-pass-through-approximation — Inputs: `` from the unit, ``, config-file `.opentaint/config/.yaml`. Write-only; sets `written` + `artifact`. No test project + - dataflow → create-test-project (dataflow shape) then create-dataflow-approximation — test-compiled `.opentaint/test-compiled/`, approx-src `.opentaint/approximations/src/`. Sets `test_project`, then `tests_passing` + `artifact` (test-approximations auto-applies its own fixed rule — nothing to pass) +3. Re-scan (references/scan.md) with both approximation dirs pointing at the parents (`.opentaint/config`, `.opentaint/approximations/src`) +4. Pass-through verify (no separate skill): the scan agent reports any method you modeled that is still in `dropped-external-methods.yaml`, or any config load error. Re-invoke that package's create-pass-through-approximation agent to fix (matcher / from→to / YAML), then rescan. A dataflow method that still drops despite passing its isolated test is an escalation case (references/escalation.md), not a re-write +5. Stabilization: stop when no method on a source→sink path remains unmodeled and a rescan surfaces no new such methods (equivalently, byte-equal SARIF across rescans). Otherwise feed the newly dropped methods back into step 1 + +Set `phases.approximations: in_progress` across the loop, `done` at stabilization. diff --git a/skills/appsec_agent/references/build.md b/skills/appsec_agent/references/build.md new file mode 100644 index 000000000..161a701ff --- /dev/null +++ b/skills/appsec_agent/references/build.md @@ -0,0 +1,5 @@ +# Build + +The build step, run in every workflow. Dispatch per the Delegate template in SKILL.md; write only the slice named here. + +Delegate build-project. Inputs: ``, model-out `.opentaint/project`, any build constraints (Java version, submodules, `--package` filters). Verify `.opentaint/project/project.yaml` exists, is non-empty, and — for a multi-module project — covers the expected module count, not just that the file is present. Set `phases.build: done`. diff --git a/skills/appsec_agent/references/discover-rules.md b/skills/appsec_agent/references/discover-rules.md new file mode 100644 index 000000000..de77ee2ec --- /dev/null +++ b/skills/appsec_agent/references/discover-rules.md @@ -0,0 +1,16 @@ +# Discover + new rules + +The deep-scan step that maps the attack surface and writes the rules to cover it, run after build and before the scan (deep, and the reproduce-vulnerability workflow). New rules are fixed here, before any approximation iteration. Dispatch per the Delegate template in SKILL.md. + +## Discover attack surface + +Delegate discover-attack-surface. Inputs: ``, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`. It creates one `tracking/rules/.yaml` per proposed rule (`description` stage + requirements + dependencies) and returns one line per rule. Don't ask for the full analysis back. Set `phases.discover: done`. + +## Rules + +Fan out the rule units (one subagent each); per unit a two-step loop: + +1. create-test-project — Inputs: spec = the rule's `requirements`, `` `.opentaint/tracking/rules/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the tracking file. Sets `test_project: done` +2. create-rule — Inputs: requirements (the tracking file), test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules`, ``. Iterates `opentaint dev test-rules` until every sample passes; sets `tests_passing: done`, `rule_id`, `artifact` + +If create-rule can't converge after repeated attempts, load references/escalation.md. Set `phases.rules: done` once every rule's `tests_passing` is done. diff --git a/skills/appsec_agent/references/escalation.md b/skills/appsec_agent/references/escalation.md new file mode 100644 index 000000000..06547770a --- /dev/null +++ b/skills/appsec_agent/references/escalation.md @@ -0,0 +1,7 @@ +# Escalation block + +Load this when a create-rule / create-pass-through / create-dataflow agent can't make its samples pass after repeated attempts, or a rule passes its tests yet is wrong on the real scan. Dispatch per the Delegate template in SKILL.md. These skills write no tracking files. + +1. debug-rule — Inputs: the `` to trace (for an approximation, the rule whose sample routes taint through the modeled method), the `` and `` of the run that showed the problem, ``, and the approximation dirs if the flow depends on them. Returns a diagnosis: rule fix, missing library model, or engine issue +2. Route by cause: a rule cause goes back to create-rule (references/discover-rules.md), a model cause back to the relevant create-*-approximation agent (references/approximations.md); an engine cause goes to step 3 +3. report-analyzer-issue — Inputs: the ``, the existing `` / ``, the `` (rule full id, or the approximation's target methods), and `` (you decide whether to also file at github.com/seqra/opentaint). It writes `.opentaint/issues/.md` diff --git a/skills/appsec_agent/references/poc.md b/skills/appsec_agent/references/poc.md new file mode 100644 index 000000000..a9825d604 --- /dev/null +++ b/skills/appsec_agent/references/poc.md @@ -0,0 +1,10 @@ +# PoC + +The dynamic-confirmation step, run on a dynamic run after triage. Confirm each TP on a running instance, then assemble the report. Dispatch per the Delegate template in SKILL.md. + +Run PoCs one subagent at a time, never in parallel — concurrent exploits race on shared app state and ports. For each TP finding: + +- first finding: generate-poc with no `` — it builds and starts the app and returns the `` it started +- every later finding: pass that `` so the agent reuses the running instance + +Inputs each time: `` = the TP finding file, ``, poc-dir `.opentaint/pocs`, and `` once known. Each sets `poc` (`confirmed`/`failed`) + `poc_script`; a `failed` repro does not flip the triage verdict. After all PoCs, assemble `.opentaint/vulnerabilities.md` from the confirmed findings yourself (subagents never write it; see SKILL.md). Set `phases.poc: done`. diff --git a/skills/appsec_agent/references/reproduce-vulnerability.md b/skills/appsec_agent/references/reproduce-vulnerability.md new file mode 100644 index 000000000..5ee497d9c --- /dev/null +++ b/skills/appsec_agent/references/reproduce-vulnerability.md @@ -0,0 +1,14 @@ +# Reproduce-vulnerability workflow + +The reproduce-vulnerability workflow: the user asserts a specific vulnerability exists and you must make the analysis surface it. Treat the asserted vuln as ground truth — it gives you confidence while debugging, so a missed detection points at the analysis, not at the vuln being absent. Run at deep scan + dynamic triage, but driven by the one vuln rather than sweeping the whole project. + +It walks the same steps as a deep + dynamic run, narrowed to the asserted flow; dispatch any step per the Delegate template in SKILL.md: + +1. Build (references/build.md) +2. Reproduce the asserted flow as a test project — create-test-project with a `@PositiveRuleSample` mirroring the real source → hops → sink; this is the ground-truth reproducer the rest is debugged against +3. Ensure a rule covers it — reuse a built-in, or discover + author one for that source→sink (references/discover-rules.md). Confirm the sample passes +4. Scan anchored on that rule (references/scan.md, `--rule-id` listing its `refs`), then model any method on the flow's path that the scan drops (references/approximations.md) +5. If the scan still won't flag the known-vulnerable flow once the rule passes its test and no method on the path remains dropped, escalate (references/escalation.md) — this is the case the reproduce mode exists to catch +6. Confirm dynamically (references/poc.md), then assemble `.opentaint/vulnerabilities.md` + +Record `mode: reproduce-vulnerability` in `state.yaml`; phase tracking is the same as a deep + dynamic run. diff --git a/skills/appsec_agent/references/scan.md b/skills/appsec_agent/references/scan.md new file mode 100644 index 000000000..14f39f290 --- /dev/null +++ b/skills/appsec_agent/references/scan.md @@ -0,0 +1,5 @@ +# Scan + +The scan step, run in every workflow (and once per rescan the approximation iteration triggers). Dispatch via the scan agent per the Delegate template in SKILL.md. + +Delegate run-scan. Inputs: model-dir `.opentaint/project`, ruleset `builtin` + `.opentaint/rules`, report `.opentaint/results/report.sarif`; on normal/deep also config-dir `.opentaint/config` and approx-dir `.opentaint/approximations/src` (both dir flags walk the tree recursively, so the parents apply every unit). Require a concise return — finding counts per rule, the methods still in `dropped-external-methods.yaml` that sit on a source→sink path, and any config load/parse errors — not the SARIF body. The files persist on disk for the next steps. Set `phases.scan: done`. diff --git a/skills/appsec_agent/references/suppress-fp.md b/skills/appsec_agent/references/suppress-fp.md new file mode 100644 index 000000000..d10ac170c --- /dev/null +++ b/skills/appsec_agent/references/suppress-fp.md @@ -0,0 +1,11 @@ +# Suppress-FP block + +Load this when the workflow has suppress-FP on, after triage. It fixes confirmed false positives on rules you own or can override, so a rule edit can't silently drop a real finding. Dispatch per the Delegate template in SKILL.md. + +For each confirmed FP on an own/overridable rule, one at a time: + +1. create-test-project — pin the confirmed TPs as `@PositiveRuleSample` and add the FP as `@NegativeRuleSample`, recompile. Inputs: the FP and TP traces as ``, the rule's ``, test-project / test-compiled `.opentaint/test-{projects,compiled}/` +2. create-rule — refine only the rule until the negative stops firing and every positive still passes. Inputs: the rule ``, test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules` +3. re-scan (references/scan.md), then regenerate finding files and retriage the affected findings (references/triage.md) + +Loop until the FP is gone and the TPs stay. An FP from a built-in rule you can't override is recorded in the finding's `notes`, not suppressed. Set `phases.suppress_fp: done`. diff --git a/skills/appsec_agent/references/triage.md b/skills/appsec_agent/references/triage.md new file mode 100644 index 000000000..c69289ff4 --- /dev/null +++ b/skills/appsec_agent/references/triage.md @@ -0,0 +1,11 @@ +# Triage + +The triage step, run in every workflow. It generates the finding files and classifies each TP/FP. On a dynamic run, continue to references/poc.md afterward; on static, triage is the last step. The scan must be stable first. Dispatch per the Delegate template in SKILL.md. + +## Generate finding files + +Run this skill's bundled `scripts/sarif-to-findings.py` over `.opentaint/results/report.sarif` (`python3 scripts/sarif-to-findings.py .opentaint/results/report.sarif -o .opentaint/tracking/findings`). It writes one `tracking/findings/.yaml` per rule and is idempotent — a rescan adds new result hashes and resets changed findings to `pending`. This is a deterministic script with no context cost, so run it yourself, not via a subagent. + +## Classify — never in main + +Fan out analyze-findings, one subagent per finding file (the rule bundle is the bucket). Inputs: `` = the finding file, report `.opentaint/results/report.sarif`. The agent reads each result's `codeFlows[]`, splits the bundle into distinct logical findings, and sets `verdict` + `notes` on each. Return: one line per logical finding (name, verdict, one-clause reason). Assign no verdicts yourself. Set `phases.triage: done`. diff --git a/skills/appsec_agent/scripts/sarif-to-findings.py b/skills/appsec_agent/scripts/sarif-to-findings.py new file mode 100644 index 000000000..c73abf57e --- /dev/null +++ b/skills/appsec_agent/scripts/sarif-to-findings.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +""" +sarif-to-findings.py — turn an OpenTaint SARIF report into per-rule finding +tracking files under .opentaint/tracking/findings/. + +One file per rule_id, bundling that rule's result hashes into sarif_hashes. +Grouping is trivial (by rule_id) — no clustering. The triage skill +(analyze-findings) later splits a rule's bundle into distinct logical findings. + +Idempotent: re-running after a re-scan adds only result hashes not already +present in any of that rule's finding files, resets the touched file's verdict +to `pending`, and leaves existing verdict/notes/poc and triage splits intact. + +SARIF assumptions — adjust the two helpers below if the real OpenTaint SARIF +differs: +- result.ruleId holds the full rule id (e.g. java/security/sqli.yaml:sqli) +- a stable per-result hash comes from result.fingerprints / partialFingerprints + when present, else is computed from ruleId + locations + code-flow locations +- result.message.text seeds the analyzer report in `notes` +""" +import argparse +import glob +import hashlib +import json +import re +from pathlib import Path + +ADJ = ["brave", "calm", "eager", "fuzzy", "gentle", "jolly", "keen", "lucid", + "merry", "noble", "proud", "quiet", "rapid", "sly", "tidy", "vivid", + "witty", "zesty", "amber", "bold"] +NOUN = ["hopper", "eagle", "otter", "falcon", "maple", "comet", "harbor", + "willow", "pixel", "river", "ember", "cobra", "lotus", "raven", + "quartz", "badger", "cedar", "drake", "finch", "gull"] + + +def docker_name(seed, taken): + """Stable adjective-noun slug from the rule id; suffixed on collision.""" + h = int(hashlib.sha1(seed.encode()).hexdigest(), 16) + base = f"{ADJ[h % len(ADJ)]}-{NOUN[(h // len(ADJ)) % len(NOUN)]}" + name, n = base, 2 + while name in taken: + name, n = f"{base}-{n}", n + 1 + return name + + +# Prefer a stable, named fingerprint kind. vulnerabilitySourceSinkHash is more stable +# than vulnerabilityWithTraceHash — it keys on the source+sink and survives changes to +# the intermediate trace path. Fall back to any fingerprint value, then a content hash. +_FP_PREFERENCE = ("vulnerabilitySourceSinkHash", "vulnerabilityWithTraceHash") + + +def result_hash(res): + fp = res.get("fingerprints") or res.get("partialFingerprints") + if isinstance(fp, dict) and fp: + for pref in _FP_PREFERENCE: + for k, v in fp.items(): + if k.startswith(pref): + return str(v)[:16] + return str(sorted(fp.values())[0])[:16] + parts = [res.get("ruleId", "")] + locs = list(res.get("locations", [])) + for cf in res.get("codeFlows", []): + for tf in cf.get("threadFlows", []): + locs += [st.get("location", {}) for st in tf.get("locations", [])] + for loc in locs: + pl = loc.get("physicalLocation", {}) + parts.append(pl.get("artifactLocation", {}).get("uri", "")) + parts.append(json.dumps(pl.get("region", {}), sort_keys=True)) + return hashlib.sha1("|".join(parts).encode()).hexdigest()[:16] + + +def scan_results(sarif): + """rule_id -> {hash: message}""" + out = {} + for run in sarif.get("runs", []): + for res in run.get("results", []): + rid = res.get("ruleId") or "unknown" + msg = (res.get("message", {}) or {}).get("text", "").strip() + out.setdefault(rid, {})[result_hash(res)] = msg + return out + + +NAME_RE = re.compile(r'^finding_name:\s*(.+?)\s*$', re.M) +RULE_RE = re.compile(r'^rule_id:\s*(.+?)\s*$', re.M) +HASHES_RE = re.compile(r'^sarif_hashes:\s*\[(.*)\]\s*$', re.M) + + +def parse_existing(text): + name = NAME_RE.search(text) + rid = RULE_RE.search(text) + hm = HASHES_RE.search(text) + hashes = [h.strip() for h in hm.group(1).split(",") if h.strip()] if hm else [] + return (name.group(1) if name else None, + rid.group(1) if rid else None, + hashes) + + +def fmt_list(hashes): + return "[" + ", ".join(hashes) + "]" + + +def new_file_text(name, rid, hashes, notes): + body = "\n".join(" " + ln for ln in (notes or "(no analyzer message)").splitlines()) + return (f"finding_name: {name}\n" + f"sarif_hashes: {fmt_list(hashes)}\n" + f"rule_id: {rid}\n" + f"verdict: pending\n" + f"notes: >\n{body}\n" + f"poc: pending\n" + f"poc_script: null\n") + + +def main(): + ap = argparse.ArgumentParser( + description="SARIF -> per-rule finding tracking files (idempotent)") + ap.add_argument("sarif", help="path to report.sarif") + ap.add_argument("-o", "--out", default=".opentaint/tracking/findings", + help="findings dir (default: .opentaint/tracking/findings)") + args = ap.parse_args() + + by_rule = scan_results(json.loads(Path(args.sarif).read_text())) + + out = Path(args.out) + out.mkdir(parents=True, exist_ok=True) + + existing = {} # rule_id -> [(path, hashes)] + taken = set() + for p in sorted(glob.glob(str(out / "*.yaml"))): + name, rid, hashes = parse_existing(Path(p).read_text()) + if name: + taken.add(name) + if rid: + existing.setdefault(rid, []).append((Path(p), hashes)) + + created = updated = unchanged = 0 + for rid, hashmap in sorted(by_rule.items()): + scanned = set(hashmap) + files = existing.get(rid) + if not files: + name = docker_name(rid, taken) + taken.add(name) + notes = "\n".join(sorted({m for m in hashmap.values() if m})) + (out / f"{name}.yaml").write_text( + new_file_text(name, rid, sorted(scanned), notes)) + created += 1 + continue + already = set().union(*(set(h) for _, h in files)) + new = sorted(scanned - already) + if not new: + unchanged += 1 + continue + # add new hashes to the first finding file for this rule; reset verdict + path, hashes = files[0] + merged = sorted(set(hashes) | set(new)) + text = path.read_text() + text = HASHES_RE.sub(lambda m: "sarif_hashes: " + fmt_list(merged), text, count=1) + text = re.sub(r'^verdict:\s*.+$', "verdict: pending", text, count=1, flags=re.M) + path.write_text(text) + updated += 1 + + print(f"findings: {created} created, {updated} updated, {unchanged} unchanged " + f"({len(by_rule)} rules in scan)") + + +if __name__ == "__main__": + main() diff --git a/skills/build-project/SKILL.md b/skills/build-project/SKILL.md index 88dcc3623..f19de1da7 100644 --- a/skills/build-project/SKILL.md +++ b/skills/build-project/SKILL.md @@ -1,84 +1,68 @@ --- name: build-project -description: Build a Java/Kotlin project for opentaint analysis and produce a project.yaml model. Use whenever an opentaint scan needs a project model and `opentaint compile` may need help. +description: Build a Java/Kotlin project for opentaint analysis and produce a project.yaml model. Use whenever an opentaint scan needs a project model and `opentaint compile` may need help license: Apache-2.0 metadata: author: opentaint - version: "0.1" + version: "0.2" --- # Skill: Build Project -Build a target project and produce a `project.yaml` model for analysis +Build a target project into an opentaint project model. The model is this skill's only output -## Prerequisites +## Inputs -- `opentaint` CLI available -- Java 21+ installed -- For Gradle/Maven: build tool installed, project builds independently +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default -## Procedure +- Project root `` — the project to build. Default: current directory +- Model output directory `` — where to write the model. Default: `.opentaint/project` +- Build constraints (optional) — required Java version, submodules to initialize, `--package` filters for `opentaint project` + +## Workflow ### 1. Determine project type -Examine directory contents: -- `build.gradle` or `build.gradle.kts` -> Gradle -- `pom.xml` -> Maven -- Pre-compiled JARs/WARs -> classpath mode -- Existing `project.yaml` in a subdirectory -> already compiled +- `build.gradle` / `build.gradle.kts` → Gradle +- `pom.xml` → Maven +- pre-compiled JAR/WAR → classpath mode +- existing `project.yaml` → already built, reuse it -### 2a. Gradle/Maven projects (autobuilder) +### 2a. Gradle/Maven — autobuilder ```bash -opentaint compile /path/to/project -o .opentaint/project +opentaint compile -o ``` -### 2b. If `opentaint compile` fails — manual build + `opentaint project` +### 2b. Autobuilder fails — manual build + `opentaint project` -If the autobuilder cannot build the project, build it manually first, then create the project model: +Build manually, then create the model from the artifacts. Always pass `--package` to restrict analysis to project code — without it the analyzer walks third-party libraries and hangs -1. **Build the project manually**: ```bash -# Gradle -./gradlew build -x test - -# Maven -mvn package -DskipTests -``` +./gradlew build -x test # Gradle +mvn package -DskipTests # Maven -2. **Create the project model with `opentaint project`**: - -> **CRITICAL**: Always specify `--package` to restrict analysis to project code only. Without `--package`, the analyzer will attempt to analyze ALL classes including third-party libraries, and will hang or run for hours. - -```bash opentaint project \ - --output .opentaint/project \ - --source-root /path/to/src \ - --classpath /path/to/app.jar \ - --package com.example.app + --output \ + --source-root \ + --classpath \ + --package ``` -For multi-module projects, use multiple `--classpath` and `--package` flags: - -```bash -opentaint project \ - --output .opentaint/project \ - --source-root /path/to/project \ - --classpath /path/to/module1/build/libs/module1.jar \ - --classpath /path/to/module2/build/libs/module2.jar \ - --package com.example.module1 \ - --package com.example.module2 -``` +Multi-module: repeat `--classpath` and `--package` per module ### 3. Verify -Check that `.opentaint/project/project.yaml` exists and is non-empty. +`/project.yaml` exists and is non-empty + +## Output + +The project model directory containing `project.yaml` (default `.opentaint/project`, or the caller's path). Report that path back -## Troubleshooting +## Gotchas -- **Build tool not found**: Install Gradle/Maven or use a wrapper (`./gradlew`, `./mvnw`) -- **Java version mismatch**: Set `JAVA_HOME` to the version required by the project -- **Compilation errors**: Check the autobuilder log, fix build issues, retry -- **Missing dependencies**: Ensure all submodules are initialized (`git submodule update --init`) -- **Autobuilder fails**: Build the project manually (see 2b above), then use `opentaint project` with the compiled artifacts -- **Analysis hangs**: You likely forgot `--package` — the analyzer is processing third-party libraries. Re-run `opentaint project` with `--package` to restrict to project code +- Analysis hangs → `--package` was omitted in `opentaint project`; the analyzer is processing third-party libraries. Re-run with `--package` +- Build tool not found → use the wrapper (`./gradlew`, `./mvnw`) or install the tool +- Compilation errors → check the autobuilder log, fix the build, retry; if it can't be fixed, fall back to 2b +- Java version mismatch → set `JAVA_HOME` to the version the project needs (opentaint itself needs Java 21+) +- Missing dependencies → initialize submodules (`git submodule update --init`) diff --git a/skills/create-approximation/SKILL.md b/skills/create-approximation/SKILL.md deleted file mode 100644 index 31983dfb7..000000000 --- a/skills/create-approximation/SKILL.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -name: create-approximation -description: Write a Java code-based dataflow model for a library method whose taint propagation depends on lambdas, callbacks, or async chains. Use to fix false negatives that a YAML passThrough rule cannot express (see `create-yaml-config`). -license: Apache-2.0 -metadata: - author: opentaint - version: "0.1" ---- - -# Skill: Create Approximation - -Create code-based approximations for complex library methods involving lambdas, async, or callbacks. Sources live under `.opentaint/approximations/src/`. - -## When approximations are actually useful - -Approximations (both code-based and YAML) only change the analysis of **external methods with no existing model**. Concretely, this means the method the approximation targets must appear in `.opentaint/results/external-methods-without-rules.yaml` produced by the previous scan (see `analyze-findings` skill). An entry there means the analyzer walked through that method and **killed the dataflow facts** because it had no rule — that's the exact gap you can fill. - -If the method is in `external-methods-with-rules.yaml`, it is already modeled by a built-in code-based approximation. Writing another `@Approximate` class targeting the same class is a **hard runtime error** — the loader enforces a strict bijection and will abort with `IllegalArgumentException`. Skip it. - -If the method is in neither list, the analyzer never reached it on a tainted path during the scan. Adding an approximation will not change the result until the analyzer actually observes a tainted argument flowing in. - -**Rule of thumb**: approximate only methods that are in the `without-rules` list **and** lie on a code path relevant to your vulnerability (reachable between a source and a sink). - -## Prerequisites - -- A baseline scan has been run with `--track-external-methods` (see `run-analysis` skill) -- `external-methods-without-rules.yaml` has been read and the target method is in it (see `analyze-findings` skill) -- The method involves lambdas/callbacks/functional interfaces (YAML cannot model these — otherwise prefer `create-yaml-config`) -- The target class must NOT already have a built-in approximation (would be listed under `external-methods-with-rules.yaml` if so) - -## Procedure - -### 1. Create approximation source - -Create Java files in `.opentaint/approximations/src/`: - -```java -package com.example.approximations; - -import org.opentaint.ir.approximation.annotation.Approximate; -import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; -import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; - -import java.util.function.Function; - -@Approximate(com.example.lib.ReactiveProcessor.class) -public class ReactiveProcessor { - - // Model: taint on this flows through the function to the result - public Object transform(@ArgumentTypeContext Function fn) throws Throwable { - com.example.lib.ReactiveProcessor self = - (com.example.lib.ReactiveProcessor) (Object) this; - if (OpentaintNdUtil.nextBool()) return null; - Object input = self.getValue(); - return fn.apply(input); - } - - // Model: taint on this flows to the consumer argument - public void subscribe(@ArgumentTypeContext java.util.function.Consumer consumer) { - com.example.lib.ReactiveProcessor self = - (com.example.lib.ReactiveProcessor) (Object) this; - if (OpentaintNdUtil.nextBool()) { - consumer.accept(self.getValue()); - } - } -} -``` - -### 2. Run with approximations - -Point `--dataflow-approximations` at the source directory. The CLI auto-compiles `.java` files using the analyzer JAR (for `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) and the target project's dependencies, then forwards the compiled directory to the analyzer. Manual `javac` invocation is not required. - -```bash -opentaint scan --project-model .opentaint/project \ - -o .opentaint/results/report.sarif \ - --ruleset builtin --ruleset .opentaint/rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --dataflow-approximations .opentaint/approximations/src -``` - -If `.java` compilation fails, the CLI reports the errors and aborts before the scan starts. If the directory contains already-compiled `.class` files (no `.java` siblings), the CLI passes it through unchanged. - -## Key Patterns - -| Pattern | Usage | -|---------|-------| -| `@Approximate(TargetClass.class)` | Link approximation to target class | -| `@ApproximateByName("fqn")` | Link by fully qualified name (when class not on compile classpath) | -| `(TargetClass) (Object) this` | Cast to access real object's methods | -| `@ArgumentTypeContext` | On lambda/functional interface parameters | -| `OpentaintNdUtil.nextBool()` | Non-deterministic branching (analyzer considers both paths) | - -## Constraints - -- Java 8 source compatibility -- One approximation class per target class (strict bijection) -- Must NOT target a class that already has a built-in approximation (will error at runtime). Verify by checking `external-methods-with-rules.yaml` — if the class appears there, it is already covered. -- Method signatures must match the target class methods exactly - -## Validating the approximation had an effect - -After re-running the scan with `--dataflow-approximations`, diff the before/after `external-methods-without-rules.yaml`: - -- The approximated method should disappear from `without-rules` (moves to `with-rules`) -- If it does not move, your `@Approximate(...)` target class or the method signature does not match what the analyzer sees -- If new findings appear in the SARIF after the approximation, they are likely true positives the kill-facts was hiding - -## When to use code-based vs YAML - -- Lambda/callback invocation -> **Code-based** (this skill) -- Non-deterministic branching (async paths) -> **Code-based** -- Complex internal state with multiple method interactions -> **Code-based** -- Simple from-to propagation -> **YAML** (create-yaml-config skill) -- Method is **not** in `external-methods-without-rules.yaml` -> **do nothing** (approximation will have no observable effect) diff --git a/skills/create-dataflow-approximation/SKILL.md b/skills/create-dataflow-approximation/SKILL.md new file mode 100644 index 000000000..7727533d1 --- /dev/null +++ b/skills/create-dataflow-approximation/SKILL.md @@ -0,0 +1,121 @@ +--- +name: create-dataflow-approximation +description: Model a library method's taint propagation as code-based dataflow approximation and refine it against a test project until the sample passes. Use for a dropped external method whose propagation a passThrough copy cannot express +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# Skill: Create Dataflow Approximation + +Write a code-based approximation for a library method whose taint propagation depends on lambdas, callbacks, or async chains, then test it against the prepared test project and fix until the approximation sample passes + +## Inputs + +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default + +- Methods to model `` — the target method(s) and how taint flows through them, from the tracking file's `methods` (all `type: dataflow`) +- Tracking file `` — the dataflow approximation unit (`-dataflow`). Default: `.opentaint/tracking/approximations/.yaml` +- Approximation sources `` — this package's own directory for the `.java` approximation files. Default: `.opentaint/approximations/src/` +- Compiled test project `` — the per-package compiled model to test against. Default: `.opentaint/test-compiled/` + +## Workflow + +### 1. Write the approximation source + +Create Java files in ``. Target the EXACT class named in `dropped-external-methods.yaml` (the unit's `methods[].target`), whatever it is. `@Approximate` matches only that one class — unlike passThrough's `overrides: true`, it is not propagated to other types in the hierarchy — and the dropped FQN already reflects how the analyzer resolved the call: an interface-typed receiver (`Map m = ...; m.computeIfAbsent(...)`) drops `java.util.Map#computeIfAbsent` → target `java.util.Map`; a concrete receiver (`new HashMap<>()`) drops `java.util.HashMap#computeIfAbsent` → target `java.util.HashMap`. Don't substitute a supertype or a subtype for what the dropped file names. Model the real propagation — never leave the body empty, an empty body silently drops the taint; in doubt about how taint flows through the method (which callback or argument carries it), read the library's source rather than guessing: + +```java +package com.example.approximations; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Function; + +@Approximate(com.example.lib.ReactiveProcessor.class) +public class ReactiveProcessor { + + // Model: taint on this flows through the function to the result + public Object transform(@ArgumentTypeContext Function fn) throws Throwable { + com.example.lib.ReactiveProcessor self = + (com.example.lib.ReactiveProcessor) (Object) this; + if (OpentaintNdUtil.nextBool()) return null; + Object input = self.getValue(); + return fn.apply(input); + } + + // Model: taint on this flows to the consumer argument + public void subscribe(@ArgumentTypeContext java.util.function.Consumer consumer) { + com.example.lib.ReactiveProcessor self = + (com.example.lib.ReactiveProcessor) (Object) this; + if (OpentaintNdUtil.nextBool()) { + consumer.accept(self.getValue()); + } + } +} +``` + +Wrapper-returning operators (a `Mono`/`Flux`, `Optional`, `Stream`, a builder — anything where the taint stays inside a container): three things matter beyond the plain case above. Declare the real concrete return type, not `Object` (the IFDS summary won't propagate otherwise); in the `nextBool()` branch `return self`, not `null` (returning `null` discards the container's taint on that path); and extract → apply → re-wrap so a downstream extractor (`block`, `get`, …) can pull the tainted value back out: + +```java +@Approximate(reactor.core.publisher.Mono.class) +public class Mono { + public reactor.core.publisher.Mono map(@ArgumentTypeContext Function fn) throws Throwable { + reactor.core.publisher.Mono self = (reactor.core.publisher.Mono) (Object) this; + if (OpentaintNdUtil.nextBool()) return self; + Object up = self.block(); // extract upstream element + return reactor.core.publisher.Mono.justOrEmpty(fn.apply(up)); // apply mapper, re-wrap + } +} +``` + +### 2. Test against the test project + +Run `test-approximations` over `` applying only this package's sources (``); iterate the source until the sample passes: + +```bash +opentaint dev test-approximations \ + -o .opentaint/test-results/ \ + --dataflow-approximations +``` + +test-approximations applies its own bundled fixed source→sink rule automatically — you don't author or pass one (there is no `--ruleset` flag); other packages' approximation sources are merged only at the final scan, not here. The CLI auto-compiles the `.java` sources against the analyzer JAR (for `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) and the project's dependencies; if compilation fails it reports the errors and aborts before the tests. The sample that routes taint through the method is a `falseNegative` until the model propagates it. Read `.opentaint/test-results//test-result.json`: + +- still `falseNegative` → the `@Approximate(...)` target class or a method signature doesn't match what the analyzer sees, or the body doesn't route taint from the real source to the modeled result/argument; diagnose the mismatch, don't rationalize a non-result. Most common: the target class doesn't equal the FQN in `dropped-external-methods.yaml` — you wrote a supertype/subtype (e.g. `java.util.Map` when the dropped file says `java.util.HashMap#computeIfAbsent`, or vice-versa). Re-target the exact dropped class and match the cast (`(java.util.HashMap) (Object) this`) +- `falsePositive` (a negative sample fired) → the model is over-broad: it taints a read it shouldn't, e.g. data fetched under a different key/field than it was stored under. Narrow the propagation until the negative stays non-firing while the positive passes (negatives exist only for shared-state methods — see create-test-project/references/approximation.md) + +## Key patterns + +| Pattern | Usage | +|---|---| +| `@Approximate(TargetClass.class)` | Link the approximation to its target class — the EXACT class `dropped-external-methods.yaml` names (interface or concrete, as the analyzer resolved it); matches only that class, not propagated to other types in the hierarchy. Must be on the compile classpath (a project dependency or a JDK type) | +| `(TargetClass) (Object) this` | Cast to reach the real object's methods | +| `@ArgumentTypeContext` | On lambda / functional-interface parameters | +| `OpentaintNdUtil.nextBool()` | Non-deterministic branch — the analyzer considers both paths | + +## Output + +- The approximation source(s) under `` +- Tracking updated: `artifact` and `stages.tests_passing` (per Tracking) +- Report the source path, a one-line test summary, and the exact `test-approximations` command used + +## Tracking + +In ``, once the source exists and its sample passes: + +```yaml +artifact: .opentaint/approximations/src//com/example/approximations/ReactiveProcessor.java +stages: + tests_passing: done +``` + +Do not touch other stages or fields + +## Constraints + +- Java 8 source compatibility +- One approximation class per target class (strict bijection); never target a class that already has a built-in approximation — it errors at load with `IllegalArgumentException` +- Method signatures must match the target class methods exactly diff --git a/skills/create-pass-through-approximation/SKILL.md b/skills/create-pass-through-approximation/SKILL.md new file mode 100644 index 000000000..4d8ca2600 --- /dev/null +++ b/skills/create-pass-through-approximation/SKILL.md @@ -0,0 +1,140 @@ +--- +name: create-pass-through-approximation +description: Model a library method's taint propagation as a passThrough approximation config. Use for a dropped external method whose propagation is simple copying +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# Skill: Create PassThrough Approximation + +Write passThrough propagation rules for external library methods. There's no test project — the main scan applies the config and verifies it; if a modeled method is still dropped or the config errors, you're re-invoked to fix it + +## Inputs + +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default + +- Methods to model `` — the target method(s) and what each propagates, from the tracking file's `methods` (all `type: passthrough`) +- Tracking file `` — the passThrough approximation unit. Default: `.opentaint/tracking/approximations/.yaml` +- Config output `` — where to write the passThrough approximation. Default: `.opentaint/config/.yaml` + +## Workflow + +### 1. Write the passThrough config + +Write `passThrough:` rules into `` + +Simple getter (taint on `this` to `result`): +```yaml +passThrough: + - function: com.example.lib.DataWrapper#getValue + copy: + - from: this + to: result +``` + +Argument to result: +```yaml +passThrough: + - function: com.example.lib.Converter#convert + copy: + - from: arg(0) + to: result +``` + +Builder pattern: +```yaml +passThrough: + - function: com.example.lib.Builder#withName + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result +``` + +Container via a synthetic field — when a container takes the taint in one call and hands it back in another, write into a field that doesn't really exist, then read from it: +```yaml +passThrough: + - function: org.springframework.http.ResponseEntity$BodyBuilder#body + copy: + - from: arg(0) + to: + - result + - .org.springframework.http.HttpEntity#Body#java.lang.Object + - function: org.springframework.http.HttpEntity#getBody + copy: + - from: + - this + - .org.springframework.http.HttpEntity#Body#java.lang.Object + to: result +``` +The naive model — copy the data onto `this`, then on read copy `this` to `result` — fails on types: `this` is the container type, not the data type (e.g. `String`), so the engine can't hang the taint on it. Routing through a field typed `java.lang.Object` (here `#Body#java.lang.Object`) sidesteps the mismatch. A synthetic per-object slot `.` does the same job without naming a field — store on the taking call, read on the returning one (see Reference) + +Conditional propagation: +```yaml +passThrough: + - function: com.example.lib.Parser#parse + condition: + typeIs: + position: arg(0) + type: java.lang.String + copy: + - from: arg(0) + to: result +``` + +### 2. Verification is the scan + +There's no test project for passThrough. The main scan applies `` (run-scan's `--passthrough-approximations`, which takes a file or a directory) and the scan agent reports back. You're re-invoked to fix the config when that scan shows: + +- a method you modeled still in `dropped-external-methods.yaml` → the `function` matcher didn't match (check package, class, name, `overrides`), or the `from`/`to` doesn't land on the tainted position +- a config load / parse error → fix the YAML + +Never invoke the analyzer JAR directly — always go through the CLI + +## Output + +- The passThrough config at `` +- Tracking updated: `written` + `artifact` (per Tracking) +- Report the config path and the methods modeled + +## Tracking + +In ``, once the config is written: + +```yaml +artifact: .opentaint/config/.yaml +stages: + written: done +``` + +Do not touch other stages or fields + +## Reference + +Position values +- `this`, `result`, `arg(0)`, `arg(1)`, ..., `arg(*)` +- Position modifiers (YAML list): `.[*]` (array element), `.ClassName#fieldName#fieldType` (field), `.` (synthetic per-object state, an alternative to a named field) + +Function matching +- Simple: `package.Class#method` +- Complex: `{package, class, name}`, each with an optional `pattern:` regex — for one hard-to-name function, not for matching many at once (see Gotchas) + +Overrides +- `overrides: true` (default): applies to the class and all subclasses +- `overrides: false`: exact class only + +Conditions +- `typeIs`, `annotatedWith`, `isConstant`, `isNull`, `constantMatches`, `tainted`, `numberOfArgs`, `methodAnnotated`, `classAnnotated`, `methodNameMatches`, `classNameMatches`, `isStaticField`, `anyOf`, `allOf`, `not` + +## Gotchas + +- passThrough expresses only from→to copies — DB round-trips, lambdas, and async belong in create-dataflow-approximation +- The approximation merges with built-ins at the rule level — a provided rule overrides a built-in only if it matches one; don't redefine a method already in `approximated-external-methods.yaml` +- A wrong argument position copies the wrong value — point `from`/`to` at the tainted one +- In doubt about how a method moves taint — which argument or field reaches the result — read the library's source rather than guessing +- Model one function per rule — don't use a regex/wildcard `pattern:` matcher (e.g. `name: get.*`, `class: .*`) to cover many functions at once; it over-models, copying taint through methods you never vetted and manufacturing false positives. Write an explicit `function:` per method diff --git a/skills/create-rule/SKILL.md b/skills/create-rule/SKILL.md index 8396aed44..fc6f5f081 100644 --- a/skills/create-rule/SKILL.md +++ b/skills/create-rule/SKILL.md @@ -1,51 +1,48 @@ --- name: create-rule -description: Author OpenTaint YAML pattern rules for a vulnerability class on JVM code. Use when an uncovered vulnerability needs detection, or when an existing rule needs a false-positive or false-negative fix. +description: Author and verify an OpenTaint detection rule for a vulnerability class on JVM code. Use whenever a rule needs to be created for an uncovered vulnerability, or an existing rule needs a false-positive or false-negative fix license: Apache-2.0 metadata: author: opentaint - version: "0.1" + version: "0.2" --- # Skill: Create Rule -Create pattern rules for detecting specific vulnerability classes +Create a pattern rule for a vulnerability class, then test it against the prepared test project and fix it until every sample passes -## Prerequisites +## Inputs -- `opentaint` CLI available -- Understanding of the target vulnerability (source, sink, sanitizers) +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default -## Procedure +- Requirements `` — what to detect (source, sink, vuln class); either a rule tracking file or an overall description +- Compiled test project `` — the compiled model to verify against. Default: `.opentaint/test-compiled/` (per rule/approximation ``) +- Rules directory `` — where rules are written. Default: `.opentaint/rules` +- Tracking file `` — the rule file. Default: `.opentaint/tracking/rules/.yaml` + +Built-in rules are available at `opentaint dev rules-path` + +## Workflow ### 1. Check existing coverage -`opentaint dev rules-path` prints the absolute path to the built-in rules directory (downloading them on first call). Use it to browse built-in patterns. +Browse builtin rules at `opentaint dev rules-path` for source/sink library rules to reference. A `refs` to a built-in source/sink is cheaper and more accurate than a new one -```bash -RULES_DIR=$(opentaint dev rules-path) -ls $RULES_DIR/java/lib/generic/ -ls $RULES_DIR/java/lib/spring/ -ls $RULES_DIR/java/security/ -``` +### 2. Wire sources and sinks -Read existing rules to understand patterns already covered. +Prefer referencing built-in source/sink library rules; write a custom one only when no built-in fits. Derive each pattern from the requirements' fully-qualified names and annotations -### 2. Create rule directory structure +Reference built-ins: +```yaml +refs: + - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + as: servlet-source + - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source + as: spring-source ``` -.opentaint/rules/ - java/ - lib/ - my-source.yaml - my-sink.yaml - security/ - my-vuln.yaml -``` - -### 3. Create library rules -**Source rule** (`.opentaint/rules/java/lib/my-source.yaml`): +Custom source library rule (`/java/lib/generic/my-source.yaml`), if no built-in fits: ```yaml rules: @@ -67,7 +64,7 @@ rules: - pattern: doPost ``` -**Sink rule** (`.opentaint/rules/java/lib/my-sink.yaml`): +Custom sink library rule (`/java/lib/generic/my-sink.yaml`): ```yaml rules: @@ -86,7 +83,9 @@ rules: - focus-metavariable: $UNTRUSTED ``` -### 4. Create security rule (join mode) +### 3. Create the security rule (join mode) + +Write it at `/java/security/.yaml` — name the file and `id` after the rule name from the tracking file. Wire the sources and sinks (built-in or custom) via `refs`: ```yaml rules: @@ -101,55 +100,63 @@ rules: mode: join join: refs: - - rule: java/lib/my-source.yaml#my-custom-source + - rule: java/lib/generic/my-source.yaml#my-custom-source as: source - - rule: java/lib/my-sink.yaml#my-custom-sink + - rule: java/lib/generic/my-sink.yaml#my-custom-sink as: sink on: - 'source.$UNTRUSTED -> sink.$UNTRUSTED' ``` -### 5. Reference built-in library rules +### 4. Test until success -```yaml -refs: - - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source - as: servlet-source - - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source - as: spring-source +Run the rule tests against the compiled test project; iterate the rule and re-run `test-rules` until every sample passes: + +```bash +opentaint dev test-rules \ + -o .opentaint/test-results/ \ + --ruleset ``` -### 6. Run analysis with specific rules +`test-rules` auto-loads the built-in rules, so pass only your custom `` — a literal `builtin` here would be treated as a path. Read `.opentaint/test-results//test-result.json`: -The `--rule-id` flag requires the **full rule ID** in the format `.yaml:`. The `ruleSetRelativePath` is the path to the YAML file relative to its ruleset root, without the `.yaml` extension (it is written explicitly in the format). +- `falseNegative` (positive didn't trigger) → patterns too narrow; broaden `pattern-either`, check metavariable names match across branches and between `refs` and `on` +- `falsePositive` (negative triggered) → patterns too broad; add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or `metavariable-regex` +- `skipped` / `disabled` → the rule wasn't exercised; fix the annotation `value`/`id`, or enable the rule -Library rules referenced via join-mode `refs` are NOT auto-included by `--rule-id` — the filter drops every rule whose full ID is not listed. Either list every library rule explicitly, or omit `--rule-id` entirely to keep all loaded rules active. +### 5. Refining for a false positive (suppress-FP) -```bash -# Full rule ID = "java/security/my-vuln.yaml" (relative path with .yaml) + ":" + "my-vulnerability" (id from YAML) -opentaint scan --project-model .opentaint/project \ - -o .opentaint/results/report.sarif \ - --ruleset builtin --ruleset .opentaint/rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability -``` +The test project already pins the confirmed TPs as `@PositiveRuleSample` and reproduces the FP as a `@NegativeRuleSample` — refine only the rule. Narrow it (step 4's `falsePositive` handling) until the negative stops triggering while every positive still passes. Do not touch the samples; if one looks wrong, hand it back upstream -To discover full rule IDs, read the rule YAML file: -- The `id` field in the YAML gives the short ID -- The file path relative to the ruleset root (with `.yaml` extension) gives the prefix -- Combine as `.yaml:`, e.g. `java/security/path-traversal.yaml:path-traversal` +## Output + +- The rule file(s) under `` +- Tracking updated: `rule_id`, `artifact`, `stages.tests_passing` (per Tracking) +- Report the full rule id, a one-line test summary, and the exact `test-rules` command used + +## Tracking + +In ``, once the rule exists and its samples pass: + +```yaml +rule_id: java/security/my-vuln.yaml:my-vulnerability +artifact: .opentaint/rules/java/security/my-vuln.yaml +stages: + tests_passing: done +``` ## Constraints - Library rules MUST have `options.lib: true` and `severity: NOTE` - Security rules MUST have `metadata.cwe` and `metadata.short-description` -- Source/sink metavariable names must match across `refs` and `on` clauses +- Source/sink metavariable names must match across `refs` and `on` clauses, or the join won't connect - The `rule:` path in `refs` is relative to the ruleset root - Rule IDs must be globally unique -- `--rule-id` requires the **full** rule ID (`:`), not just the short ID -- `--rule-id` drops every rule whose full ID is not listed, including library rules referenced via `refs`. List all rules you need explicitly, or omit `--rule-id`. - For simple structural patterns (no dataflow), omit `mode:` (uses default mode) +- Custom library rules go under `/java/lib/generic/` or `/java/lib/spring/` (for Spring-specific), mirroring the built-in layout — never directly under `java/lib/` + -## FP/FN Fixes +## Gotchas -- **FP**: Add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or `metavariable-regex` -- **FN**: Add patterns to `pattern-either`, create new library rules, add new `on` clauses +- A wrong argument position in `(..., $UNTRUSTED, ...)` focuses the wrong parameter — point `focus-metavariable` at the tainted one +- Refine the rule, never the test project — don't edit or weaken samples here; if one is wrong, hand it back upstream diff --git a/skills/create-test-project/SKILL.md b/skills/create-test-project/SKILL.md new file mode 100644 index 000000000..6a2bd49dc --- /dev/null +++ b/skills/create-test-project/SKILL.md @@ -0,0 +1,83 @@ +--- +name: create-test-project +description: Create an OpenTaint test project with annotated positive/negative samples for verifying a rule or approximation. Use when a rule or approximation needs a test project to check against +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# Skill: Create Test Project + +Build a minimal compiled test project whose annotated samples reproduce the flow a rule or approximation is checked against. The compiled model is the deliverable; its sources sit alongside it + +## Inputs + +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default + +- What to test `` — a rule's requirements, or the package's methods to exercise, with enough context to build a realistic flow +- Tracking file `` — the rule or approximation file this test serves. Default: `.opentaint/tracking/rules/.yaml` or `.opentaint/tracking/approximations/.yaml` +- Test project `` — sources. Default: `.opentaint/test-projects/` +- Compiled output `` — the model. Default: `.opentaint/test-compiled/` +- Dependencies — exact Maven coordinates the samples need; default: the `dependencies` list in ``; with no tracking file, derive them from the project's `build.gradle`/`pom.xml` + +`` is the rule name for a rule, or the dataflow approximation unit (`-dataflow`) for an approximation; the two never share a folder + +## Workflow + +### 1. Init the project + +Pick the scaffold by shape, then pass each coordinate from the tracking file's `dependencies` as a `--dependency`: + +- a rule → `init-rule-project` (Gradle build + the test-util jar) +- a dataflow approximation → `init-approximation-project` (the same, plus `Taint.java` and the fixed `approximation-rule.yaml` the harness applies) + +```bash +# rule test project +opentaint dev init-rule-project \ + --dependency "org.mybatis:mybatis:3.5.13" \ + --dependency "javax.servlet:javax.servlet-api:4.0.1" + +# dataflow approximation test project +opentaint dev init-approximation-project \ + --dependency "io.projectreactor:reactor-core:3.8.5" +``` + +### 2. Write samples + +Write Java samples under `/src/main/java/test/`, each annotated with its expected verdict — `@PositiveRuleSample` (must flag) or `@NegativeRuleSample` (must not). `value` is the rule path relative to the ruleset root (with `.yaml`), `id` the short id from the YAML — not the full `--rule-id` used by `opentaint scan`. One expected verdict per sample. Split the samples across files however groups most logically — don't cram unrelated ones into a single class + +What the positive and negative samples must contain depends on the shape — load and follow the matching reference: + +- a rule → `references/rule.md` +- a dataflow approximation → `references/approximation.md` (passThrough approximations need no test project — they're written directly and verified by the scan) + +### 3. Compile + +```bash +opentaint compile -o +``` + +A clean compile is the deliverable. If it won't build, fix the samples or dependencies before handing off + +## Output + +- A compiled test project (``) plus its sources (``); report both paths and the exact `compile` command used +- The tracking file's `test_project` stage marked done (see Tracking) + +## Tracking + +In ``, set only the test-project stage (`in_progress` while building, `done` once it compiles): + +```yaml +stages: + test_project: done +``` + +Do not touch other stages or fields + +## Gotchas + +- One expected verdict per sample +- One unit per `` folder — never write into another unit's project, so concurrent agents don't race +- In doubt about how the real flow or a method behaves, read the source rather than guessing — the sample must mirror the actual code diff --git a/skills/create-test-project/references/approximation.md b/skills/create-test-project/references/approximation.md new file mode 100644 index 000000000..196f0c05f --- /dev/null +++ b/skills/create-test-project/references/approximation.md @@ -0,0 +1,54 @@ +# Dataflow approximation test project + +This shape is for code-based (dataflow) approximations only — passThrough approximations are written directly and verified by the scan, with no test project + +## How it tests + +`opentaint dev test-approximations` applies one fixed source → sink rule automatically — you do not author or pass a rule. That rule matches a fixed pair, `test.Taint.source()` and `test.Taint.sink(...)`, provided by the `Taint` helper scaffolded into the project. Your samples route taint from `Taint.source()` through the method being approximated into `Taint.sink(...)`. Granularity is per sample (`className#methodName`), so the one fixed rule covers every sample — a broken approximation only flips its own sample + +`opentaint dev init-approximation-project ` scaffolds the Gradle build, `Taint.java`, and the `approximation-rule.yaml` reference — you add only the samples (under `src/main/java/test/`). The approximation itself is NOT part of this project: it lives in its own unit folder `.opentaint/approximations/src/` and is applied to this compiled model at test time via `--dataflow-approximations` (see create-dataflow-approximation). Do not create an `approximations/` directory inside the test project + +## Positive sample + +Put samples under `src/main/java/test/`, each a public method annotated with the fixed rule. A positive sends `Taint.source()` through the approximated method into `Taint.sink(...)`; it stays a `falseNegative` until the approximation propagates the taint, then flips to `success`. One positive per method being approximated + +```java +package test; + +import org.opentaint.sast.test.util.PositiveRuleSample; + +import java.util.HashMap; +import java.util.Map; + +public class ApproximationSamples { + + @PositiveRuleSample(value = "approximation-rule.yaml", id = "approximation-rule") + public void taintReachesSink() { + String tainted = Taint.source(); + Map cache = new HashMap<>(); + String routed = cache.computeIfAbsent(tainted, k -> k); // the approximated method + Taint.sink(routed); + } +} +``` + +## Negative sample — only for shared state + +Add a `@NegativeRuleSample` only when the method holds state that taint must not cross — a container, cache, registry, or builder where you store under one key/field and read from another. Write a negative that stores tainted data under one variable and reads a different one; with a correct model the read stays clean, so the sample must not fire. For plain propagation (argument → result, or a value through a callback) the positive alone proves the model — skip the negative + +```java + @NegativeRuleSample(value = "approximation-rule.yaml", id = "approximation-rule") + public void taintDoesNotCrossKeys() { + Map cache = new HashMap<>(); + cache.put("k1", Taint.source()); // taint stored under one key + Taint.sink(cache.get("k2")); // a different key — must stay clean + } +``` + +A negative that fires (`falsePositive` in `test-result.json`) means the model is over-broad — it taints a read it shouldn't. Narrow the approximation until the negative stays non-firing while the positive still passes + +## Notes + +- `value`/`id` always reference the fixed rule: `approximation-rule.yaml` / `approximation-rule`. test-approximations applies its own bundled copy, so the project's `approximation-rule.yaml` is only a reference — what matters is that samples call `test.Taint.source()` / `test.Taint.sink(...)` +- the sample's receiver type fixes the dropped method's fully-qualified name, and the approximation must `@Approximate` that exact class — so mirror the real call's receiver type. An interface-typed receiver (`Map m`, e.g. a method parameter) drops `java.util.Map#computeIfAbsent`; a concrete `Map cache = new HashMap<>()` drops `java.util.HashMap#computeIfAbsent`. The `new HashMap<>()` form above is just one case — match whichever the real flow uses +- the approximation under test is NOT in this project — it lives in the separate unit folder `.opentaint/approximations/src/`, compiled by the CLI (not Gradle) and applied with `--dataflow-approximations ` — see create-dataflow-approximation diff --git a/skills/create-test-project/references/rule.md b/skills/create-test-project/references/rule.md new file mode 100644 index 000000000..d44953266 --- /dev/null +++ b/skills/create-test-project/references/rule.md @@ -0,0 +1,43 @@ +# Rule test project + +## Samples + +- `@PositiveRuleSample` — reproduce the vulnerability from the requirements: tainted input from the real source flowing through the real hops into the dangerous sink, mirroring the actual signatures and annotations +- `@NegativeRuleSample` — a flow the rule must not flag: the safe (sanitized or parameterized) version of the same operation, or a confirmed false positive you're narrowing the rule against. Keep it realistic, not stripped to constants + +```java +package test; + +import org.opentaint.sast.test.util.PositiveRuleSample; +import org.opentaint.sast.test.util.NegativeRuleSample; +import javax.servlet.http.HttpServletRequest; +import java.sql.Connection; +import java.sql.Statement; + +public class MyVulnTest { + private Connection db; + + @PositiveRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") + public void vulnerable(HttpServletRequest req) throws Exception { + String input = req.getParameter("id"); + Statement stmt = db.createStatement(); + stmt.executeQuery("SELECT * FROM users WHERE id = " + input); + } + + @NegativeRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") + public void safe(HttpServletRequest req) throws Exception { + String input = req.getParameter("id"); + var pstmt = db.prepareStatement("SELECT * FROM users WHERE id = ?"); + pstmt.setString(1, input); + pstmt.executeQuery(); + } +} +``` + +## Suppress-FP + +When narrowing a rule after triage confirms a false positive, add that FP as a `@NegativeRuleSample` and pin every confirmed true positive as a `@PositiveRuleSample`, so the rule edit can't silently drop a real finding. Then recompile + +## Spring-entry flows + +If the flow only fires through a Spring entry point (controller → bean → sink), a plain method sample will be a `falseNegative`. Use the multi-module Spring layout — read `spring-multimodule.md` and follow it diff --git a/skills/create-test-project/references/spring-multimodule.md b/skills/create-test-project/references/spring-multimodule.md new file mode 100644 index 000000000..3694a0c7e --- /dev/null +++ b/skills/create-test-project/references/spring-multimodule.md @@ -0,0 +1,60 @@ +# Spring multi-module test projects + +Load this when a plain method-level sample returns `falseNegative` because the flow only fires through a Spring entry point (controller → bean → sink). Some rules only trigger inside a full Spring MVC entry-point graph — a `@PositiveRuleSample` on a bare method won't trigger them, because the tainted data must flow from a discovered `@Controller`. + +For these rules, create one dedicated Gradle sub-project per sample. Each sub-project is a complete, minimal Spring application containing exactly one `@PositiveRuleSample` or `@NegativeRuleSample`. Split positive and negative cases into separate sub-projects, e.g. `xss-spring-test-positive` and `xss-spring-test-negative`. + +## How detection works + +`TestProjectAnalyzer` computes a `testSetName` per module as `module.moduleSourceRoot.relativeTo(project.sourceRoot)`, with `/` replaced by `-` (see `core/src/main/kotlin/org/opentaint/jvm/sast/project/TestProjectAnalyzer.kt`). If the name starts with `spring-app-tests`, the module is treated as a Spring test set: + +- All sample annotations in the module are collected as usual +- Each sample is wrapped in a `SpringTestSample` that uses the Spring dispatcher method as the analysis entry point instead of the annotated method itself +- Taint therefore originates from real `@Controller` request parameters and must reach the annotated sink method through normal Spring wiring + +Consequence: the annotated method is only a marker for which rule to run and the expected verdict. The actual vulnerable/safe flow must be reachable from a controller in the same module. Keep each module to a single annotation so the verdict is unambiguous. + +## Project layout + +Multi-module Gradle build where every `spring-app-tests/` directory is its own sub-project: + +``` +/ +├── settings.gradle.kts +├── build.gradle.kts +└── spring-app-tests/ + ├── xss-spring-test-positive/ + │ ├── build.gradle.kts + │ └── src/main/java/test/ + │ ├── VulnerableController.java // @Controller with the tainted flow + │ └── VulnerableSink.java // carries the single @PositiveRuleSample + └── xss-spring-test-negative/ + ├── build.gradle.kts + └── src/main/java/test/ + ├── SafeController.java + └── SafeSink.java // carries the single @NegativeRuleSample +``` + +`settings.gradle.kts` should auto-discover every `spring-app-tests/*/build.gradle.kts` so adding a case only needs a new directory. See `rules/test/settings.gradle.kts` in the OpenTaint repo for a reference implementation. + +## Required dependencies + +Each Spring sub-project needs at least: + +- `compileOnly` on `opentaint-sast-test-util` (the sample annotations) +- `org.springframework:spring-webmvc` and `spring-context` (so `@Controller` is recognized) +- Any libraries the sample itself uses (servlet-api, JDBC, etc.) + +## Compile + +```bash +opentaint compile -o +``` + +Each `spring-app-tests/` sub-project becomes an independent test set and appears as its own entry in `test-result.json`. + +## Common pitfalls + +- No `@Controller` in the module → `TestProjectAnalyzer` logs `No spring entry point found` and the sample is analyzed without Spring context, usually a false negative. Always include a controller that reaches the sink +- More than one annotation per module → results become ambiguous; keep it to one sample per sub-project +- Module path not starting with `spring-app-tests` → `isSpringAppTestSet()` returns false and the sample runs as a regular method-level test, so Spring flows won't trigger diff --git a/skills/create-yaml-config/SKILL.md b/skills/create-yaml-config/SKILL.md deleted file mode 100644 index 786c7c742..000000000 --- a/skills/create-yaml-config/SKILL.md +++ /dev/null @@ -1,159 +0,0 @@ ---- -name: create-yaml-config -description: Add a YAML passThrough model for an external library method that kills taint via simple from→to copies. Use to fix false negatives caused by unmodelled library methods on a real source→sink path (no lambdas — see `create-approximation`). -license: Apache-2.0 -metadata: - author: opentaint - version: "0.1" ---- - -# Skill: Create YAML Config - -Create YAML passThrough propagation rules for library methods - -## When a passThrough rule actually changes the scan - -A custom `passThrough` entry only affects the analyzer's behavior if the target method is an **external method with no existing model**. In practice: the method must appear in `.opentaint/results/external-methods-without-rules.yaml` produced by the previous scan (see `analyze-findings` skill). That file is exactly the list of methods where the analyzer killed dataflow facts for lack of a rule — those are the FN sources you can fix. - -Do not write passThrough rules for: -- Methods in `external-methods-with-rules.yaml` — already modeled by a built-in YAML passThrough. Since `--approximations-config` replaces the entire built-in list, writing a custom config means you implicitly own all passThrough coverage; adding a duplicate method entry is not a hard error, but you are now responsible for all methods previously covered by built-ins. -- Methods that appear in neither list — the analyzer never reached them on a tainted path during the scan; the rule will be a no-op until that changes. -- Application-internal methods — approximations apply only to external library methods. - -**Rule of thumb**: open `external-methods-without-rules.yaml`, pick methods on a code path from a source to a sink relevant to the target vulnerability, and write passThrough rules for those. - -## Prerequisites - -- A baseline scan has been run with `--track-external-methods` (see `run-analysis` skill) -- `external-methods-without-rules.yaml` has been read; the methods you plan to model are in it (see `analyze-findings` skill) -- The method's propagation can be described by simple from/to copies (otherwise use `create-approximation`) - -## Procedure - -### 1. Create config file - -Create `.opentaint/config/custom-propagators.yaml` with `passThrough:` rules. - -### 2. Common patterns - -**Simple getter** (taint on `this` to `result`): -```yaml -passThrough: - - function: com.example.lib.DataWrapper#getValue - copy: - - from: this - to: result -``` - -**Argument-to-result**: -```yaml -passThrough: - - function: com.example.lib.Converter#convert - copy: - - from: arg(0) - to: result -``` - -**Builder pattern**: -```yaml -passThrough: - - function: com.example.lib.Builder#withName - copy: - - from: arg(0) - to: this - - from: arg(0) - to: result - - from: this - to: result -``` - -**Object with internal state** (using ``): -```yaml -passThrough: - # Store taint - - function: com.example.lib.Container#put - copy: - - from: arg(0) - to: - - this - - .com.example.lib.Container##java.lang.Object - # Retrieve taint - - function: com.example.lib.Container#get - copy: - - from: - - this - - .com.example.lib.Container##java.lang.Object - to: result -``` - -**Package-wide getter pattern**: -```yaml -passThrough: - - function: - package: com.example.dto - class: - pattern: .* - name: - pattern: get.* - copy: - - from: this - to: result -``` - -**Conditional propagation**: -```yaml -passThrough: - - function: com.example.lib.Parser#parse - condition: - typeIs: - position: arg(0) - type: java.lang.String - copy: - - from: arg(0) - to: result -``` - -### 3. Run with config - -`--approximations-config` is repeatable; all supplied files are merged together into a single combined config. That combined config then **replaces the entire built-in passThrough list** — not per-method, but the whole list. If the combined config is non-empty, no built-in passThrough entry is active; you own the full set. - -```bash -opentaint scan --project-model .opentaint/project \ - -o .opentaint/results/report.sarif \ - --ruleset builtin --ruleset .opentaint/rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --approximations-config .opentaint/config/custom-propagators.yaml \ - --track-external-methods -``` - -### 4. Confirm the rule actually fired - -Keep `--track-external-methods` enabled and diff the fresh `external-methods-without-rules.yaml` with the baseline one: - -- Every method you added a `passThrough` for should disappear from `without-rules` (it now moves to `with-rules`) -- If a method does not move, the `function` matcher did not match — check package, class, name, and `overrides:` -- If no new findings appear even though facts now propagate, the method was not on a source→sink path and the rule had no effect on results (harmless but noise; consider removing) - -## Reference - -### Position values -- `this`, `result`, `arg(0)`, `arg(1)`, ..., `arg(*)` -- Position modifiers (YAML list): `.[*]` (array element), `.ClassName#fieldName#fieldType` (field), `.` (synthetic state) - -### Function matching -- Simple: `package.Class#method` -- Complex: `{package, class, name}` with optional `pattern:` regex - -### Overrides -- `overrides: true` (default): applies to class and all subclasses -- `overrides: false`: exact class only - -### Conditions -`typeIs`, `annotatedWith`, `isConstant`, `isNull`, `constantMatches`, `tainted`, `numberOfArgs`, `methodAnnotated`, `classAnnotated`, `methodNameMatches`, `classNameMatches`, `isStaticField`, `anyOf`, `allOf`, `not` - -## When to use YAML vs code-based approximation - -- Simple from-to propagation -> **YAML** (this skill) -- Lambda/callback invocation -> **Code-based** (create-approximation skill) -- Non-deterministic branching -> **Code-based** -- Method is not in `external-methods-without-rules.yaml` -> **do nothing**; the rule will be a no-op (or, worse, an unintended OVERRIDE of an existing model) diff --git a/skills/debug-rule-reachability/SKILL.md b/skills/debug-rule-reachability/SKILL.md deleted file mode 100644 index b62c0aeb6..000000000 --- a/skills/debug-rule-reachability/SKILL.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -name: debug-rule-reachability -description: Produce a fact-reachability SARIF for one OpenTaint rule to see exactly where its dataflow facts get killed. Use when a rule passes its tests but still misses (or spuriously fires) on the real project. -license: Apache-2.0 -metadata: - author: opentaint - version: "0.1" ---- - -# Skill: Debug Rule Reachability - -Generate a fact reachability SARIF report to debug why a specific rule does (or doesn't) reach certain taint sinks - -## Prerequisites - -- Project model available — path provided by caller (`.opentaint/project/` from main pipeline, `.opentaint/test-compiled/` when called from `opentaint-issue-investigation`) -- Rule created and tested (create-rule, test-rule skills) - -## ⚠️ CRITICAL: Single Rule Only - -This command targets exactly ONE rule. Running fact reachability across multiple rules would produce an enormously huge SARIF report that is effectively unusable; the dedicated `opentaint dev debug-fact-reachability` command takes a single rule ID as its required argument. - -## Procedure - -### Run the debug command - -`opentaint dev debug-fact-reachability` is a separate command (not a flag on `scan`). It takes the full rule ID as its first positional argument and the source path (or a pre-compiled model via `--project-model`) as the second. - -```bash -opentaint dev debug-fact-reachability \ - java/security/my-vuln.yaml:my-vulnerability \ - --project-model .opentaint/project \ - -o .opentaint/results/fact-reachability.sarif \ - --ruleset builtin --ruleset .opentaint/rules -``` - -The rule ID requires the **full rule ID** in the format `:`. Example: for a rule file at `.opentaint/rules/java/security/my-vuln.yaml` with `id: my-vulnerability`, the full ID is `java/security/my-vuln.yaml:my-vulnerability`. - -### View results - -```bash -opentaint summary .opentaint/results/fact-reachability.sarif --show-findings -``` - -## Key Flags - -| Flag/Arg | Purpose | -|------|---------| -| `` (positional) | **Exactly one** full rule ID (`.yaml:`) — required | -| `--project-model` | Pre-compiled project model directory (skip recompilation) | -| `-o` | Path to the main SARIF output file | -| `--ruleset` | Rule directory (repeatable). Use `builtin` for built-in rules | -| `--timeout` | Analysis timeout (default 15m) | - -## Outputs - -The fact reachability report is **not** the main SARIF file specified by `-o`. The analyzer writes it as a **separate file** named `debug-ifds-fact-reachability.sarif` in the same output directory as the main report. - -For example, with `-o .opentaint/results/fact-reachability.sarif`: - -- **`.opentaint/results/fact-reachability.sarif`** — Main vulnerability findings for the single rule -- **`.opentaint/results/debug-ifds-fact-reachability.sarif`** — Debug fact reachability report - -Always check the output directory (`-o` parent) for this file. - -## Notes - -- This is a debug-only command intended for troubleshooting rule coverage -- Pre-compiled project models are passed via `--project-model `; otherwise the second positional argument is a source-path that the CLI will compile -- The command implicitly restricts the run to the one rule given as the positional argument; library rules referenced via join-mode `refs` are still resolved as needed diff --git a/skills/debug-rule/SKILL.md b/skills/debug-rule/SKILL.md new file mode 100644 index 000000000..6f6f7efd9 --- /dev/null +++ b/skills/debug-rule/SKILL.md @@ -0,0 +1,76 @@ +--- +name: debug-rule +description: Debug a rule or approximation that behaves unexpectedly by tracing where taint is dropped. Use when its samples won't pass after repeated attempts, or it passes tests but is wrong on a real scan +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# Skill: Debug Rule + +Diagnose why a rule or approximation behaves unexpectedly on a model — samples that won't pass after repeated attempts, a missed flow, or a spurious finding on a real scan — by tracing where taint is dropped, and decide who owns the fix: the rule, a missing library model, or the engine + +## Inputs + +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default + +- Rule `` — the single full rule ID to trace (`.yaml:`); fact-reachability is always per-rule, so to debug an approximation trace the rule whose sample routes taint through the approximated method +- Project model `` — the model where the behavior shows up. Default: `.opentaint/test-compiled/` for a test project, or `.opentaint/project` for a main scan +- Ruleset `` — Default: `builtin` plus `.opentaint/rules` +- Output directory `` — where the debug SARIF lands. Default: `.opentaint/test-results/` for a test model, or `.opentaint/results` for a main scan +- Dropped external methods `` — the list from the run that showed the problem. Default: `dropped-external-methods.yaml` next to that run's SARIF +- Approximation directories `` / `` (optional) — apply when the behavior depends on them, so the debug run matches the run that showed the problem. Default: `.opentaint/config`, `.opentaint/approximations/src` + +## Workflow + +### 1. Precondition — library model complete + +Open `` from the run that showed the problem. If any method on the source→sink path is listed, STOP and model it (passThrough or dataflow), re-run, then debug — that missing model is the cause, not the engine. A method you already approximated that is still listed means the approximation isn't matching the real signature; fix it there. Debug only once no method on the path remains; if no `` exists, produce one with a `--track-external-methods` run + +### 2. Localize the kill — fact-reachability SARIF + +```bash +opentaint dev debug-fact-reachability \ + --project-model \ + -o /report.sarif \ + --ruleset builtin --ruleset +``` + +When the thing under debug is an approximation (or the flow depends on one), append `--passthrough-approximations ` / `--dataflow-approximations ` so the trace runs with it applied — taint dying at the approximated call then means the approximation isn't propagating: wrong signature (still in ``), empty body, or wrong from→to. Read the separate `/debug-ifds-fact-reachability.sarif` (not the `-o` file). For a missed detection (a `@PositiveRuleSample` that won't pass, or a flow absent from a scan): confirm a fact exists at the source — if not, the gap is in `pattern-sources` — then walk the facts to the last instruction still carrying the fact and the first where it's gone; that gap is where taint dies. For a spurious detection, do the reverse: find where a fact appears with no tainted input reaching it + +### 3. Isolate an entry point (optional) + +When the run misses the flow and you suspect the entry method is never reached, force analysis onto it. The entry point is positional — `*` for all methods, or a method FQN: + +```bash +opentaint dev debug-run-on-entry-points "com.example.Controller#handle" \ + --project-model \ + -o /report.sarif \ + --ruleset builtin --ruleset +``` + +A finding that appears here but not in the full run points to entry-point discovery / reachability, not the dataflow; if it still doesn't appear, localize the kill with step 2. This command is ignored on Spring projects (the entry-point override has no effect there), so for a missed Spring-controller flow rely on step 2 instead + +### 4. Classify the cause + +The killing instruction decides who owns the fix: + +- external library method → missing model (step 1 should have caught it; fact-reachability names the exact method) +- something the rule should handle — a mistaken sanitizer, an unmatched sink or source variant → fix the rule +- a plain instruction the engine should propagate through (assignment, cast, field read, an already-modeled call), with the rule correct and model complete → engine issue; route to report-analyzer-issue with the trace + +## Output + +- The diagnosis: `file:line` and instruction where taint is killed (or spuriously introduced), and which of the three causes it is +- For an engine issue, the fact-reachability trace up to the last reachable fact — report-analyzer-issue's input +- The exact debug command(s) used and the model they ran against + +## Tracking + +None — diagnostic, writes no tracking file + +## Gotchas + +- One rule per fact-reachability run; across many rules the report is unusably huge +- Debug the exact run that showed the problem — same model, rulesets, approximation dirs — or you debug something else; never swap the model mid-analysis diff --git a/skills/discover-attack-surface/SKILL.md b/skills/discover-attack-surface/SKILL.md new file mode 100644 index 000000000..6ca8e5d20 --- /dev/null +++ b/skills/discover-attack-surface/SKILL.md @@ -0,0 +1,103 @@ +--- +name: discover-attack-surface +description: Map a Java/Kotlin project's attack surface and turn gaps in rule coverage into concrete rule requirements. Use when a project needs its attack surface mapped into rule requirements (requires a built project model) +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# Skill: Discover Attack Surface + +Identify the attack surface of the target project by reading source code and project structure. Convert each security gap into concrete rule requirements, which will be used for creating test project and rule later + +## Inputs + +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default + +- Project root `` — the project sources. Default: current directory +- Project model `` — the built model. Default: `.opentaint/project` +- Tracking directory `` — where rule files are written. Default: `.opentaint/tracking` + +## Workflow + +Requires a built project model — without it you can miss entry points the analyzer actually sees + +### 1. Find entry points and sinks + +Search the sources for attack surface: + +- Spring/JAX-RS endpoints: `@RestController`, `@Controller`, `@RequestMapping`, `@GetMapping`/`@PostMapping`/..., `@Path`, `@GET`/`@POST` +- Servlets: classes extending `HttpServlet` (`doGet`, `doPost`) +- Message handlers: `@JmsListener`, `@KafkaListener`, `@RabbitListener` +- Other external input: `main(String[])`, `@Scheduled` methods reading external state + +For each, note what external data enters (params, headers, body, payload) and what dangerous operations it can reach (DB query, file I/O, command exec, outbound HTTP, deserialization, templating) + +### 2. Map dependencies to vulnerability classes + +Read `build.gradle` / `pom.xml` (or the model) and match each library to the classes it enables: + +- Web framework (Spring Boot, Micronaut, Quarkus) → shapes the entry points and request-binding sources +- DB / ORM (JDBC, JPA/Hibernate, MyBatis) → SQLi, especially string-built queries or `${}` mapper interpolation +- Template engines (Thymeleaf, FreeMarker, Velocity) → SSTI and reflected XSS +- HTTP clients (OkHttp, Apache HttpClient, RestTemplate, WebClient) → SSRF +- XML parsers (JAXB, DocumentBuilder, SAXParser) → XXE +- Deserializers (Jackson polymorphic typing, native `ObjectInputStream`, XStream) → insecure deserialization +- File / process APIs (`java.nio.file`, `ProcessBuilder`, `Runtime.exec`) → path traversal, command injection + +### 3. Decide which rules to write + +Check coverage, then turn each real gap into a requirement: + +- Read the built-in rules (`opentaint dev rules-path`) and anything already in `.opentaint/rules`. A source→sink pair is a gap only when no existing rule detects it +- Verify the pair is semantically real before recording it: the source is genuinely attacker-controlled (a request param, header, or body is; an app-internal constant or server config is not), and the sink is genuinely dangerous with tainted input (string-concatenated SQL is; a parameterized query is not). A pair that fails this isn't a rule +- For every uncovered, semantically real pair worth detecting, write one rule tracking file at `/rules/.yaml` (per Tracking) + +Name the rule `-` in kebab-case — the sink technology or framework plus the class, e.g. `mybatis-sqli`, `thymeleaf-ssti`, `resttemplate-ssrf`. It must be unique and stable: the name is the tracking file name and follows the rule through every later stage + +## Output + +- One `/rules/.yaml` per proposed rule, with `stages.description: done`, `requirements` filled, and `dependencies` (exact Maven GAV from the build files) the test project needs. `requirements` must reproduce the real flow, not paraphrase it: + - vuln class / CWE + - source — fully-qualified entry method, tainted input, `file:line` + - sink — fully-qualified method, dangerous call, `file:line` + - flow — intermediate hops as fully-qualified method names + - the real signatures and annotations, so the test can mirror the actual code +- A brief summary to the caller: one line per rule (name, vuln class, source→sink). Don't paste the full analysis back — the tracking files hold the detail + +## Tracking + +Create one rule file per proposed rule; fill only the discovery-stage fields: + +```yaml +name: mybatis-sqli +rule_id: null # filled later +finding: null # filled later +requirements: | + CWE-89 SQL injection via MyBatis ${} interpolation + source: com.example.web.OrderController#listOrders(String) — @RequestParam("orderBy"), OrderController.java:42 + flow: orderBy -> com.example.service.OrderService#list(String), OrderService.java:31 + -> com.example.mapper.OrderMapper#selectByOrder(String) + sink: com.example.mapper.OrderSqlProvider#byOrder — ${orderBy} concatenated into ORDER BY, OrderSqlProvider.java:18 +dependencies: # exact GAV the test project needs, from the build files + - org.mybatis:mybatis:3.5.13 + - org.springframework:spring-webmvc:5.3.30 +stages: + description: done + test_project: pending + tests_passing: pending +notes: > + mirror the @RequestParam binding and the @SelectProvider signature in the test +``` + +## Engine notes + +- Spring projects: the analyzer auto-discovers Spring endpoints, so you don't have to enumerate every controller — focus on which flows are dangerous +- Generic projects: the analyzer treats all public/protected methods of public classes as entry points + +## Gotchas + +- Propose a rule only for a real gap; if a built-in already covers the source→sink, don't duplicate it +- Requirements drive a test project someone else builds; vague requirements produce a useless test +- A passing test won't catch a semantically wrong source or sink — verify both are real here, when writing requirements, because nothing downstream re-checks it diff --git a/skills/discover-entry-points/SKILL.md b/skills/discover-entry-points/SKILL.md deleted file mode 100644 index 5e2ec50b8..000000000 --- a/skills/discover-entry-points/SKILL.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -name: discover-entry-points -description: Map the attack surface of a Java/Kotlin project — HTTP endpoints, message handlers, schedulers, CLI mains — into an OpenTaint analysis plan. Use when the user asks for "discovering attack surface" in context of opentaint. -license: Apache-2.0 -metadata: - author: opentaint - version: "0.1" ---- - -# Skill: Discover Entry Points - -Identify the attack surface of the target project by reading source code and project structure - -## Prerequisites - -- Target project source code accessible -- Project has been built (build-project skill complete) - -## Procedure - -### 1. Search for entry points by type - -Look for these patterns in the source code: - -- **Spring controllers**: `@RestController`, `@Controller`, `@RequestMapping`, `@GetMapping`, `@PostMapping`, `@PutMapping`, `@DeleteMapping` -- **Servlet handlers**: Classes extending `HttpServlet` with `doGet`, `doPost`, etc. -- **JAX-RS endpoints**: `@Path`, `@GET`, `@POST`, `@PUT`, `@DELETE` -- **Message handlers**: `@JmsListener`, `@KafkaListener`, `@RabbitListener` -- **CLI entry points**: `main(String[])` methods that process external input -- **Scheduled tasks**: `@Scheduled` methods that read external state - -### 2. For each entry point, determine - -- What external data it receives (HTTP params, headers, body, message payload) -- What operations it performs (DB queries, file I/O, command exec, HTTP calls) -- Which vulnerability classes are relevant (SQLi, XSS, command injection, path traversal, SSRF, XXE) - -### 3. Examine dependencies - -Read `build.gradle`, `pom.xml`, or `project.yaml` for: -- Web frameworks (Spring Boot, Micronaut, Quarkus) -- Database libraries (JDBC, JPA/Hibernate, MyBatis) -- Template engines (Thymeleaf, FreeMarker, Velocity) -- HTTP clients (OkHttp, Apache HttpClient, RestTemplate, WebClient) - -### 4. Record findings - -Document entry points, data sources, and relevant vulnerability classes in `.opentaint/analysis-plan.md`. - -## Engine Notes - -- Spring projects: The analyzer auto-discovers Spring endpoints automatically -- Generic projects: The analyzer uses all public/protected methods from public project classes -- Targeted analysis: Use `opentaint dev debug-run-on-entry-points "com.example.Class#method"` for focused testing diff --git a/skills/generate-poc/SKILL.md b/skills/generate-poc/SKILL.md index 717f932ed..073c27315 100644 --- a/skills/generate-poc/SKILL.md +++ b/skills/generate-poc/SKILL.md @@ -1,87 +1,85 @@ --- name: generate-poc -description: Build a proof-of-concept for a confirmed true-positive OpenTaint finding (SQLi, command injection, path traversal, XSS, SSRF, XXE) and document it. Use when a SARIF finding has been confirmed as a TP +description: Reproduce a true-positive finding against the running application. Use when a finding needs dynamic confirmation license: Apache-2.0 metadata: author: opentaint - version: "0.1" + version: "0.2" --- # Skill: Generate PoC -Generate a proof-of-concept for a confirmed true positive finding +Try to make the vulnerability actually fire on a running instance via a Python script, and record the outcome — confirmed or failed -## Prerequisites +## Inputs -- A finding classified as TRUE POSITIVE (analyze-findings skill) -- Triage input includes: VULN number, rule ID, CWE, severity, source/sink locations, trace steps +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default -## Procedure +- Finding `` — the TP finding file. Default: `.opentaint/tracking/findings/.yaml` (name is required) +- Project root `` — sources to build and run. Default: current directory +- App endpoint `` (optional) — base URL if the app is already running +- PoC directory `` — where the PoC script is saved. Default: `.opentaint/pocs` -### 1. Construct PoC by vulnerability type +## Workflow -Use the source/sink location and trace from the triage to determine the HTTP route, parameter name, and payload shape. If the actual host and port are not known, use `http://:` as a placeholder. +### 1. Start the app -**SQL Injection**: Input that extracts data or bypasses auth -```bash -curl "http://:/api/users?id=1' OR '1'='1" -``` +Reuse `` if given. Otherwise build and start the app the way the project expects (`spring-boot:run`, `java -jar`, `docker compose`, …), wait until it's listening, and note the base URL. The PoC must hit a live instance -**Command Injection**: Input that executes arbitrary commands -```bash -curl "http://:/api/process?cmd=;cat /etc/passwd" -``` +### 2. Map the finding to a live request -**Path Traversal**: Input that accesses unauthorized files -```bash -curl "http://:/api/files?path=../../../etc/passwd" -``` +From the finding's source location find the entry point — the route and method, and the param / header / body field that carries the tainted input — and a payload that drives it to the sink. Common shapes: -**XSS**: Input that executes JavaScript -```bash -curl "http://:/api/search?q=" -``` +- SQL injection — `?id=1' OR '1'='1` +- command injection — `?cmd=;cat /etc/passwd` +- path traversal — `?path=../../../etc/passwd` +- XSS — `?q=` +- SSRF — `?url=http://169.254.169.254/latest/meta-data/` +- XXE — an XML body with `` -**SSRF**: Input that makes the server request internal resources -```bash -curl "http://:/api/fetch?url=http://169.254.169.254/latest/meta-data/" -``` +### 3. Write and run the PoC script -**XXE**: XML input that reads files -```bash -curl -X POST "http://:/api/parse" \ - -H "Content-Type: application/xml" \ - -d ']>&xxe;' -``` +Write a self-contained Python script to `/.py` that does any setup (auth, seed state), sends the request, and asserts the observable evidence — so it's re-runnable and self-checking. -For other CWE classes, construct an HTTP request that delivers the tainted source value to the identified sink parameter. +Run it. Confirmation needs observable proof — rows returned, file contents, command output, a time delay, an out-of-band callback, an injection-revealing error and so on -### 2. Document the finding +### 4. Record the outcome -Use the triage input to fill in the template: +- confirmed — the script fired and proved the vuln. Set `poc: confirmed`, record `poc_script`, and in `notes` describe the working sequence (setup → request(s) → observed evidence), not just the final request +- failed — after several attempts you couldn't confirm the finding, or the app/route couldn't be reached. Set `poc: failed`, save the script, and in `notes` record the variants you tried and why each didn't fire -```markdown -## : in +## Output -**Severity**: () -**Location**: `:` -**Rule**: `` +- The PoC script at `/.py` +- The finding's `poc` set to `confirmed` or `failed`, `poc_script` recorded, evidence/reason in `notes` +- If you started the app, leave it running and report its `` so the next PoC can reuse it instead of starting another instance +- Report the outcome to the caller; if failed, call out that the finding is unconfirmed. Do not write `.opentaint/vulnerabilities.md` — main assembles that from the confirmed findings -### Description - +## Tracking -### Trace -1. **Source**: `` -- `` (line ) -2. **Flow**: -3. **Sink**: `` (line ) +In ``, set `poc` and `poc_script` and append the result to `notes`: -### Proof of Concept -``` - +```yaml +poc: confirmed # confirmed | failed +poc_script: .opentaint/pocs/brave-hopper.py +notes: > + + poc: logged in as a seeded user (POST /login), then GET /api/orders?orderBy=id);SELECT pg_sleep(5)-- + — the injected ORDER BY delayed the response ~5s while a benign orderBy=id returned instantly → time-based SQLi confirmed ``` -### Remediation - +Failed instead — narrate the attempts, not a single request: + +```yaml +poc: failed +poc_script: .opentaint/pocs/brave-hopper.py +notes: > + + poc: tried ' OR 1=1--, a UNION SELECT, and time-based pg_sleep on /api/orders and /api/orders/search; + every variant returned 400 — orderBy is whitelisted to column names server-side → could not reproduce ``` -Return this markdown block as output to the main agent. The main agent appends it to `.opentaint/vulnerabilities.md`. +## Gotchas + +- Reproduce, don't theorize — a script you didn't run, or a 200 with no observable effect, is not a confirmation +- failed ≠ false positive — couldn't-reproduce isn't proof the code is safe (auth, missing state, wrong payload). Record `failed` and DO NOT flip `verdict` here diff --git a/skills/opentaint-agent/SKILL.md b/skills/opentaint-agent/SKILL.md deleted file mode 100644 index 0c1ced1bf..000000000 --- a/skills/opentaint-agent/SKILL.md +++ /dev/null @@ -1,166 +0,0 @@ ---- -name: opentaint-agent -description: Run an end-to-end opentaint security analysis on a Java/Kotlin project. Build, find entry points, write rules, scan, and triage findings. Use this skill when the user asks to "find vulnerabilities", "run SAST", or "scan Java app for security issues" -license: Apache-2.0 -metadata: - author: opentaint - version: "0.1" ---- - -# Opentaint Agent -- Meta Prompt - -You are an AI security analyst using opentaint, a dataflow-based SAST analyzer for JVM projects. Your goal is to find real vulnerabilities by iteratively creating rules, running analysis, and refining results. - -If the user does not explicitly name a target, scan the current project in current folder. - -All agent-generated artifacts (project model, rules, config, approximations, test project, results, plans, reports) live under a single `.opentaint/` directory at the project root. Do not scatter files outside it. - -## Setup - -Run `opentaint dev rules-path` to get the built-in rules directory. - -## Workflow - -Execute these four phases in order. Iterate phases 2-4 until the external methods list stabilizes and all findings are classified. - -**Subagent delegation** The Delegate blocks under each step are instructions, on how to dispatch that step to a subagent. Each block is a contract: which skill the subagent should load, what inputs to pass, what output to require back, and (where it loops) the stop condition. If you have a tool for spawning subagents, follow the Delegate blocks. If you have no subagent tool, ignore the Delegate blocks safely and execute the steps directly using the named skills. - -### Phase 1: Project Setup - -1. Build the project (use the `build-project` skill). Produce `.opentaint/project/project.yaml`. - - Delegate via the `build-project` skill - - Inputs: target project root path; any known build constraints (Java version, submodules, `--package` filters) - - Output: absolute path to the model directory containing `project.yaml`, OR a one-paragraph build-failure summary with the failing command - -2. Discover entry points (use the `discover-entry-points` skill). Identify attack surface, data sources, vulnerability classes. Write `.opentaint/analysis-plan.md`. - - Delegate via the `discover-entry-points` skill - - Inputs: project root; model directory from step 1 - - Output: one-paragraph short summary of found attack surfaces. Do not require the full plan content back — read the file yourself on demand - -### Phase 2: Rule Creation - -1. Check built-in rules — read rules in `$(opentaint dev rules-path)` - -2. Create rules for uncovered vulnerability classes (use the `create-rule` skill). Library rules in `.opentaint/rules/java/lib/`, security rules in `.opentaint/rules/java/security/` - -3. Test rules (use the `test-rule` skill). Create annotated test samples with `@PositiveRuleSample` / `@NegativeRuleSample`, fix until all tests pass - -Delegate (covers the whole phase — one subagent reads the built-in rules reference, authors the rule, and tests it) via the `create-rule` and `test-rule` skills, used together as a loop -- Inputs: vulnerability class; source/sink hints from `.opentaint/analysis-plan.md`; built-in rules path (`$(opentaint dev rules-path)`) -- Subagent loop: check built-in coverage and find library rules to reference; author or edit YAML per `create-rule`; add samples and run `opentaint dev test-rules` per `test-rule`; fix patterns on `falseNegative` / `falsePositive` -- Output: full rule ID (`.yaml:`); path to the rule file; one-line test result summary -- Stop when: every sample reports `success` in `test-result.json` - -### Phase 3: Analysis - -1. Run analysis (use the `run-analysis` skill). Always pass a pre-compiled model via `--project-model`, and use full rule IDs of the form `.yaml:`: - ```bash - opentaint scan --project-model .opentaint/project \ - -o .opentaint/results/report.sarif \ - --ruleset builtin --ruleset .opentaint/rules \ - --rule-id java/security/.yaml: \ - --track-external-methods - ``` -2. Collect `.opentaint/results/report.sarif`, and next to it the fixed-name files `.opentaint/results/external-methods-without-rules.yaml` (taint-killing methods) and `.opentaint/results/external-methods-with-rules.yaml` (already modeled). The `--track-external-methods` flag is a boolean; the filenames and location are fixed by the analyzer. - -Run in main: this phase is one CLI invocation. Output files persist on disk and are consumed by Phase 4. Delegating it would only add a subagent hop without saving context — run `opentaint scan` directly - -### Phase 4: Results Interpretation and Iteration - -1. Analyze findings (use the `analyze-findings` skill). Classify each SARIF finding as TP, FP (rule fix), or FP (approximation fix). Read `external-methods-without-rules.yaml` for FN discovery (these are the methods that kill taint). - - Delegate via the `analyze-findings` skill - - Inputs: paths to `.opentaint/results/report.sarif`, `.opentaint/results/external-methods-without-rules.yaml`, `.opentaint/results/external-methods-with-rules.yaml`; the active rule IDs - - Output: structured triage — - - TPs: rule ID, CWE, severity, source/sink locations, brief trace - - FPs: rule ID and suggested fix kind (`pattern-not` / `pattern-sanitizers` / passThrough override) - - PassThrough candidates: prioritized list of generic propagators on a real source→sink path - - Approximation candidates: lambda/async methods - - Stop when: every finding is classified - -2. For true positives: generate PoC (use the `generate-poc` skill), document in `.opentaint/vulnerabilities.md`. - - Before dispatching, assign a sequential `VULN-NNN` number to each TP (e.g. VULN-001, VULN-002). - - Delegate (parallel fan-out) via the `generate-poc` skill — one subagent per TP - - Inputs (per subagent): assigned VULN number; the single TP's trace from the triage (rule ID, CWE, severity, source/sink locations, trace steps) - - Output (per subagent): PoC command; `.opentaint/vulnerabilities.md` entry text for that finding - - You then append the returned entries to `.opentaint/vulnerabilities.md` - -3. For false positives: fix rules with `pattern-not` / `pattern-sanitizers`, update tests, re-run. - - Delegate via the `create-rule` and `test-rule` skills, used as a loop (same shape as Phase 2 step 3, starting from an existing rule) - - Inputs: rule ID and path; FP triage entries from step 1; the failing trace - - Subagent loop: edit rule; add a `@NegativeRuleSample` reproducing the FP; run tests - - Output: updated rule ID; test summary - - Stop when: the new negative sample passes and prior positives still pass - -4. For false negatives (from external methods): simple propagation -> YAML config (use the `create-yaml-config` skill); lambda/callback methods -> code approximation (use the `create-approximation` skill). - - Delegate (batched by package) via `create-yaml-config` and/or `create-approximation` (pick per method shape) - - Inputs: filtered method list from the triage (only methods on a real source→sink path), grouped by package/library; existing `.opentaint/config/` and `.opentaint/approximations/` paths - - Subagent action: write the models, then re-run `opentaint scan --track-external-methods` to verify the methods moved from `external-methods-without-rules.yaml` to `external-methods-with-rules.yaml` - - Output: methods successfully moved; methods that did not move, each with a one-line reason (signature mismatch, wrong `overrides:`, etc.) - - Stop when: every targeted method either moves to `with-rules` or is reported back as not-moved with a reason - -5. Re-run analysis with updated rules/config/approximations. - - Run in main: same as Phase 3 — single CLI invocation, no delegation - -6. Stop when the external methods list stabilizes, all findings are classified, and high-priority vulnerabilities have PoCs - -## Working Directory Layout - -``` -/ - .opentaint/ - analysis-plan.md - vulnerabilities.md - project/ # Built project model - rules/ # Custom rules - java/lib/ - java/security/ - config/ # YAML passThrough config - custom-propagators.yaml - approximations/ - src/ # Java sources (auto-compiled by the CLI) - test-project/ # Rule test project - test-compiled/ # Compiled test project model - test-results/ # Rule test outputs - results/ - report.sarif - external-methods-without-rules.yaml # written next to report.sarif - external-methods-with-rules.yaml - issues/ # Engine-issue reports (when applicable) -``` - -## Decision Guide - -| Situation | Action | Skill | -|-----------|--------|-------| -| Need new vulnerability detection | Create join-mode rule | create-rule | -| FP: over-broad pattern | Add pattern-not/sanitizers | create-rule | -| FN: library method kills taint | Add YAML passThrough | create-yaml-config | -| FN: lambda/callback method | Code-based approximation | create-approximation | -| Confirmed vulnerability | Generate PoC | generate-poc | - -## Note: Suspected Engine Issues - -If a rule that should fire keeps missing (or firing spuriously) even though the rule tests pass and `external-methods-without-rules.yaml` has no methods on the relevant path, use the `opentaint-issue-investigation` skill. It walks through building a minimal rule-test reproducer, ruling out library-model gaps, pinpointing the instruction where IFDS drops the fact via `opentaint dev debug-fact-reachability`, and writing a short report. - -Delegate via the `opentaint-issue-investigation` skill (it pulls in `debug-rule-reachability` and `test-rule` as needed) -- Inputs: failing rule ID; original project location; existing triage notes; proof that no relevant method remains in `external-methods-without-rules.yaml` -- Output: path to `.opentaint/issues/.md` -- Stop when: the report exists and self-contains the reproducer plus the dropping instruction location - -## Key Constraints - -- Approximations (YAML and code-based) apply ONLY to external methods -- library classes without source code -- `--approximations-config` is repeatable; all files are merged together, then the combined result **replaces the entire built-in passThrough list** — not per-method. Passing any custom config means no built-in passThrough entry is active. -- `--rule-id` takes the FULL rule ID: `.yaml:` (e.g. `java/security/my-vuln.yaml:my-vulnerability`) -- `--rule-id` drops every rule whose ID is not in the filter, including library rules referenced via `refs`. List every rule you need explicitly. -- `--track-external-methods` is a boolean; files are always written as `/external-methods-{without,with}-rules.yaml` -- Duplicate approximation targeting the same class as a built-in = error -- Each rule must have test coverage before running on the real project diff --git a/skills/opentaint-issue-investigation/SKILL.md b/skills/opentaint-issue-investigation/SKILL.md deleted file mode 100644 index dc0595c95..000000000 --- a/skills/opentaint-issue-investigation/SKILL.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -name: opentaint-issue-investigation -description: Build a minimal reproducer and pinpoint the instruction where OpenTaint's engine drops a dataflow fact, then write a short engine-issue report. Use as a last resort when a rule passes its tests, the library model is complete, and the finding is still wrong. -license: Apache-2.0 -metadata: - author: opentaint - version: "0.1" ---- - -# Skill: OpenTaint Issue Investigation - -Investigate and confirm an issue in the OpenTaint analysis engine — a case where a rule that should fire does not (or fires where it should not), and the cause is **not** the rule's syntax or the library modeling, but the engine itself (e.g. an intra/inter-procedural dataflow path that is cut unexpectedly). - -The deliverable is a small, self-contained reproducer plus a short write-up that points at the exact instruction where the dataflow dies. - -## When to use this skill - -Use it after `analyze-findings` / `create-yaml-config` / `create-approximation` have been exhausted and a finding is still missing (or spurious), even though: - -- The rule passes its own tests on isolated samples. -- `external-methods-without-rules.yaml` is empty (or irrelevant) for the relevant code path. -- Nothing about the library model is obviously wrong. - -If any of those is not true, stop and go fix the rule / add the approximation first. An "engine issue" report is only credible once the trivial causes have been ruled out. - -## Prerequisites - -- Working rule with passing tests (`create-rule`, `test-rule`). -- Baseline scan has been run (`run-analysis`). -- `analyze-findings` has been consulted; the remaining failure is not explained by `external-methods-without-rules.yaml`. - -## Procedure - -### 1. Build a minimal rule-test reproducer - -Shrink the original code to the smallest sample that still reproduces the problem, and put it in a rule-test project under `.opentaint/test-project/` at the analyzed project root (read the `test-rule` skill). - -Choose the project shape based on what the real code needs: - -- **Plain method-level sample** — works for rules where the tainted flow stays inside one method or crosses only ordinary Java calls. One class under `src/main/java/test/` with a single `@PositiveRuleSample` (expected trigger) or `@NegativeRuleSample` (expected no trigger) is enough. -- **Spring-app sub-project** — required whenever the real flow enters through a Spring `@Controller`, uses Spring beans, or depends on dispatcher wiring. Create a dedicated `spring-app-tests/` module with exactly one sample annotation, as described in the `test-rule` skill under *Testing Spring-app rules*. Positive and negative cases go in separate sub-projects (e.g. `xss-spring-test-positive`, `xss-spring-test-negative`). - -Keep the sample as small as possible: remove every statement that is not needed to carry taint from source to sink. A small reproducer is what makes the rest of the investigation tractable — and it is what ships in the bug report. - -### 2. Confirm the issue reproduces on the test project - -Compile the test project and run the rule tests: - -```bash -opentaint compile .opentaint/test-project -o .opentaint/test-compiled -opentaint dev test-rules .opentaint/test-compiled \ - -o .opentaint/test-results \ - --ruleset builtin --ruleset .opentaint/rules -``` - -Inspect `.opentaint/test-results/test-result.json`: - -- A `@PositiveRuleSample` that ends up in `falseNegative` reproduces a missed-detection engine issue. -- A `@NegativeRuleSample` that ends up in `falsePositive` reproduces a spurious-detection engine issue. -- `skipped` / `disabled` mean the rule was not actually exercised — fix the annotation `value`/`id` or enable the rule before going further. -- `success` means the issue does **not** reproduce. Either the sample is too reduced, or something in the original project (not in the sample) is what triggers the problem. Go back to step 1 and add back the minimum context. - -Do not proceed until the test result matches the bug you are trying to document. - -### 3. Rule out missed external-method models - -Re-run the test with external-method tracking and read the two lists next to the SARIF (read the `analyze-findings` skill, §3): - -```bash -opentaint scan --project-model .opentaint/test-compiled \ - -o .opentaint/test-results/report.sarif \ - --ruleset builtin --ruleset .opentaint/rules \ - --rule-id .yaml: \ - --track-external-methods -``` - -Open `.opentaint/test-results/external-methods-without-rules.yaml`. For every method that sits on the source→sink path in your sample: - -- Simple propagator (getter/collection/builder) → add a YAML `passThrough` (read the `create-yaml-config` skill). -- Lambda/callback/async → add a code-based approximation (read the `create-approximation` skill). - -Re-run until that file contains **no methods on the relevant path**. Only then is it legitimate to call the remaining failure an engine issue — otherwise you are just looking at a missing library model. - -### 4. Locate where the dataflow dies - -Use the fact reachability debug command to see exactly how far the taint travels (read the `debug-rule-reachability` skill). It is a separate command, `opentaint dev debug-fact-reachability`, that takes a single full rule ID: - -```bash -opentaint dev debug-fact-reachability \ - .yaml: \ - --project-model .opentaint/test-compiled \ - -o .opentaint/test-results/report.sarif \ - --ruleset builtin --ruleset .opentaint/rules -``` - -Inspect `.opentaint/test-results/debug-ifds-fact-reachability.sarif`. For a missed detection: - -1. Confirm the **source is matched** — at least one fact is reported at the source location. If it is not, the problem is in the rule's `pattern-sources`, not the engine. -2. Walk the reachable facts along the expected path. Note the **last instruction that still carries the fact** and the **first instruction where it is gone**. That gap is where the engine drops the dataflow. -3. Check that the drop happens at an instruction that is **not relevant to the rule** — e.g. a plain local assignment, a trivial method call with a modelled pass-through, a cast, a field read. If the drop is at something the rule should handle (a recognised sanitizer, a sink variant the rule was not written to match, etc.), the issue is still in the rule, not in the engine. - -For a spurious detection, do the symmetric check: find the instruction where the fact appears even though no tainted input reaches it. - -### 5. Write the investigation report - -Produce a short Markdown note at `.opentaint/issues/.md` with: - -- **Reproducer** — path to the rule-test sub-project, the exact `opentaint dev test-rules` command, and the relevant snippet from `test-result.json`. -- **Rule** — full rule ID (`.yaml:`) and the ruleset it came from (`builtin` or `.opentaint/rules`). -- **Observed vs expected verdict** — e.g. *Expected: finding at `Sink.java:42`. Observed: no finding; sample listed under `falseNegative`.* -- **Where the dataflow dies** — file, line, and the specific instruction from the fact reachability SARIF. Quote the trace up to the last reachable fact and state which instruction drops it. -- **Ruled-out causes** — - 1. Rule tests pass on an isolated method sample (rule syntax is fine). - 2. `external-methods-without-rules.yaml` has no methods on the relevant path (library modeling is not the gap), or list the approximations that were added in step 3. - 3. The dropping instruction is unrelated to what the rule was meant to match (not a sanitizer, not an unsupported sink variant, etc.). -- **Minimal hypothesis** — 1–3 sentences on what the engine is likely doing wrong at that instruction (e.g. *"IFDS loses the fact across this `StringBuilder.append` because the call is devirtualized to an `AbstractStringBuilder` overload that has no default pass-through"*). Keep it short; this is a hypothesis, not a fix. - -Include only what is needed to reproduce and locate the problem. A good report is roughly one screen of Markdown plus the rule-test sub-project. - -## Stop Condition - -The investigation is done when all of the following hold: - -- The rule-test sub-project reproduces the issue deterministically via `opentaint dev test-rules`. -- No method on the expected source→sink path remains in `external-methods-without-rules.yaml`. -- The fact reachability SARIF pinpoints a specific instruction where the taint is dropped (or spuriously introduced) and that instruction is unrelated to the rule logic. -- The report at `.opentaint/issues/.md` exists and is self-contained. diff --git a/skills/report-analyzer-issue/SKILL.md b/skills/report-analyzer-issue/SKILL.md new file mode 100644 index 000000000..c85a6f604 --- /dev/null +++ b/skills/report-analyzer-issue/SKILL.md @@ -0,0 +1,59 @@ +--- +name: report-analyzer-issue +description: Write an OpenTaint engine-issue report from a confirmed diagnosis, optionally opening a GitHub issue. Use when engine-side issue got confirmed and requires report +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# Skill: Report Analyzer Issue + +Turn a confirmed engine-level diagnosis into a self-contained `.opentaint/issues/.md` report, and optionally a GitHub issue. It only writes the report from the diagnosis, the test project, and the rule or approximation it concerns — it runs no analysis of its own + +## Inputs + +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default + +- Diagnosis `` — debug-rule's engine-level conclusion: where taint dies (`file:line` + instruction), the fact-reachability trace up to the last reachable fact, and observed vs expected verdict +- Test project `` / `` — the project the artifact was tested on and debug-rule traced, already built by create-test-project. Default: `.opentaint/test-projects/` / `.opentaint/test-compiled/` +- Artifact `` — the rule or approximation the issue concerns: a rule's full id and ruleset, or the approximation's target method(s) +- Issue file `` — where to write the report. Default: `.opentaint/issues/.md`; `` is a short kebab-case symptom name (a filename — no spaces or hashes) +- Open a GitHub issue `` (optional) — whether to also file at github.com/seqra/opentaint; the main agent decides and passes this. Default: no + +## Workflow + +### 1. Gate — require an engine diagnosis + +File a report only for an engine issue debug-rule already confirmed. The diagnosis must establish all three; if any is missing, return to the caller and ask for debugging first — don't verify or run anything yourself: + +- not a rule fix — the rule's patterns are correct; debug-rule ruled out tightening or broadening it +- not a missing model — no method on the source→sink path remains in `dropped-external-methods.yaml` +- it is the engine — taint is dropped at an instruction the engine should propagate through + +### 2. Write the report + +Write `` — this file is the deliverable; never return the diagnosis as chat text only. Assemble from the inputs: + +- Test project — `` path, the test command (`test-rules` / `test-approximations`), and the failing `test-result.json` snippet (e.g. a `@PositiveRuleSample` stuck at `falseNegative`) +- Rule / approximation — the ``: a rule's full id and ruleset, or the approximation's target method(s) +- Observed vs expected — e.g. expected a finding at `Sink.java:42`; observed none +- Where the dataflow dies — `file:line` and the instruction, quoted up to the last reachable fact +- Ruled-out causes — the three gate points +- Hypothesis — 1–3 sentences on what the engine is likely doing wrong there; a hypothesis, not a fix + +Keep it to about one screen plus the test project + +### 3. File on GitHub (only if asked) + +When `` is set, file the same content to the fixed repo: + +```bash +gh issue create --repo seqra/opentaint \ + --title ": " \ + --body-file +``` + +## Output + +- The written `` (always), and the issue URL if one was filed diff --git a/skills/run-analysis/SKILL.md b/skills/run-analysis/SKILL.md deleted file mode 100644 index c62948447..000000000 --- a/skills/run-analysis/SKILL.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -name: run-analysis -description: Run an OpenTaint scan on a built project model and produce the SARIF report plus the taint-killing-method YAMLs used for iteration. Use whenever the user asks to scan or re-scan a project. -license: Apache-2.0 -metadata: - author: opentaint - version: "0.1" ---- - -# Skill: Run Analysis - -Run OpenTaint analysis on the target project and collect results - -## Prerequisites - -- Project built (build-project skill) — model at `.opentaint/project/` -- Rules created and tested (create-rule, test-rule skills) — at `.opentaint/rules/` -- Optionally: YAML config (create-yaml-config skill) at `.opentaint/config/` and/or approximations (create-approximation skill) at `.opentaint/approximations/` - -## Procedure - -### Basic analysis - -The `--rule-id` flag requires the **full rule ID** in the format `.yaml:`. Example: for a rule file at `.opentaint/rules/java/security/my-vuln.yaml` with `id: my-vulnerability`, the full ID is `java/security/my-vuln.yaml:my-vulnerability`. - -Pass the pre-compiled project model via `--project-model`. The positional `scan ` argument is reserved for source projects that the CLI will compile itself. - -```bash -opentaint scan --project-model .opentaint/project \ - -o .opentaint/results/report.sarif \ - --ruleset builtin \ - --ruleset .opentaint/rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --track-external-methods -``` - -### With custom passThrough config - -`--approximations-config` is repeatable; every occurrence is OVERRIDE-merged. - -```bash -opentaint scan --project-model .opentaint/project \ - -o .opentaint/results/report.sarif \ - --ruleset builtin --ruleset .opentaint/rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --approximations-config .opentaint/config/custom-propagators.yaml \ - --track-external-methods -``` - -### With code-based approximations - -Point `--dataflow-approximations` at a directory of Java sources. The CLI auto-compiles `.java` files into a temp directory and forwards that to the analyzer. - -```bash -opentaint scan --project-model .opentaint/project \ - -o .opentaint/results/report.sarif \ - --ruleset builtin --ruleset .opentaint/rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --dataflow-approximations .opentaint/approximations/src \ - --track-external-methods -``` - -### View results - -```bash -opentaint summary .opentaint/results/report.sarif --show-findings -``` - -## Outputs - -Three files to collect — all next to the SARIF report: - -1. **`.opentaint/results/report.sarif`** — Vulnerability findings with code flow traces -2. **`.opentaint/results/external-methods-without-rules.yaml`** — Methods where no pass-through rules fired (**dataflow facts killed here — these cause false negatives**) -3. **`.opentaint/results/external-methods-with-rules.yaml`** — Methods where pass-through rules were applied (already modeled, typically no action needed) - -The `--track-external-methods` flag is a boolean. Filenames and location are fixed: the two YAMLs are written into the same directory as the SARIF file, using the names above. - -## Key Flags - -| Flag | Purpose | -|------|---------| -| `--project-model` | Pre-compiled project model directory (contains `project.yaml`) | -| `--ruleset` | Rule directory (repeatable). Use `builtin` for built-in rules | -| `--rule-id` | Enable only specific rules by full ID `.yaml:` (repeatable) | -| `--approximations-config` | YAML passThrough config (repeatable; all files merged, combined result replaces the entire built-in passThrough list) | -| `--dataflow-approximations` | Directory of Java sources or compiled class files (repeatable) | -| `--track-external-methods` | Emit `external-methods-{without,with}-rules.yaml` next to the SARIF | -| `--severity` | Filter by severity (note, warning, error) | -| `--timeout` | Analysis timeout (default 900s) | - -## Notes - -- For a pre-compiled model, always use `--project-model `. The positional argument is only for source projects that will be compiled by the CLI. -- `--rule-id` drops every rule whose full ID is not in the filter, **including library rules referenced via join-mode `refs`**. List every rule you want active explicitly. -- `--approximations-config` is repeatable; all supplied files are merged into one combined config, which then replaces the **entire** built-in passThrough list. If you pass any `--approximations-config`, no built-in passThrough entry is active — your files must cover everything you need. -- `--dataflow-approximations` accepts a directory. `.java` files are auto-compiled by the CLI; already-compiled `.class` directories are passed through as-is. -- Duplicate approximation targeting the same class as a built-in will cause an error. diff --git a/skills/run-scan/SKILL.md b/skills/run-scan/SKILL.md new file mode 100644 index 000000000..83939a650 --- /dev/null +++ b/skills/run-scan/SKILL.md @@ -0,0 +1,77 @@ +--- +name: run-scan +description: Run an OpenTaint scan on project and produces the SARIF report. Use whenever the user asks to scan or re-scan a project +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# Skill: Run Scan + +Run an OpenTaint scan over a project and collect results + +## Inputs + +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default + +- Target `` / `` — pre-compiled model or source project directory. Default: model at `.opentaint/project` +- Ruleset `` — Default: `builtin` plus `.opentaint/rules` if present +- Rule IDs `` (optional) — full IDs to restrict the scan to, omit to run all loaded rules +- SARIF output `` — Default: `.opentaint/results/report.sarif` +- PassThrough config `` (optional) — a passThrough YAML file or a directory of them. Default: `.opentaint/config` +- Dataflow approximations directory `` (optional) — Default: `.opentaint/approximations/src` + +## Workflow + +Point at the code either way: a source project (CLI compiles it) as the positional `scan `, or a pre-built model via `--project-model `. If project model provided prefer using it instead of source project + +```bash +opentaint scan --project-model \ + -o \ + --ruleset builtin --ruleset \ + --track-external-methods +``` + +Append optional flags as needed: + +- `--rule-id ` — restrict to specific rules (repeatable); omit to run all loaded rules +- `--passthrough-approximations ` — apply passThrough configs from a YAML file or a directory of them (OVERRIDE: merged with built-ins at the rule level, a provided rule overrides a built-in only when it matches one in the built-in set; repeatable; replaces the old `--approximations-config`) +- `--dataflow-approximations ` — apply code-based approximations (Java sources, auto-compiled; or pre-compiled `.class` dirs, passed through as-is) + +## Output + +Three files, all next to the SARIF report: + +1. `` — findings with code-flow traces +2. `dropped-external-methods.yaml` — methods where dataflow facts were killed (no approximation model) → candidates to approximate; possible source of false negatives +3. `approximated-external-methods.yaml` — methods already modeled + +## Finding files + +`scripts/sarif-to-findings.py` turns a SARIF report into one finding tracking file per rule under `.opentaint/tracking/findings/`, bundling each rule's result hashes: + +```bash +python3 scripts/sarif-to-findings.py -o .opentaint/tracking/findings +``` + +Run it on the SARIF you intend to triage. It's idempotent — a re-run adds only result hashes not already present, resets a touched finding's `verdict` to `pending`, and preserves existing verdicts, notes, PoCs, and any triage splits. Grouping is by rule id only; analyze-findings splits a rule's bundle into distinct logical findings + +## Key Flags + +| Flag | Purpose | +|---|---| +| `--project-model` | Pre-compiled model directory (omit to scan a source project via the positional arg) | +| `--ruleset` | Rule directory (repeatable); `builtin` for built-ins | +| `--rule-id` | Restrict to specific full rule IDs (repeatable) | +| `--passthrough-approximations` | passThrough configs: a YAML file or directory of them (OVERRIDE, repeatable) | +| `--dataflow-approximations` | Directory of Java sources or compiled classes (repeatable) | +| `--track-external-methods` | Emit `dropped-external-methods.yaml` + `approximated-external-methods.yaml` next to the SARIF | +| `--timeout` | Analysis timeout (default 900s) | + +## Gotchas + +- `--rule-id` drops every rule whose full ID is not listed, including library rules referenced via join-mode `refs`. List every rule you need +- `--passthrough-approximations` merges with the built-in passThrough set at the rule level — a provided rule overrides a built-in only if it matches one in the built-in set, otherwise built-ins stay active +- Paths fall back to the `.opentaint/` layout when the caller omits them; the caller can override any of them +- Duplicate approximation targeting the same class as a built-in errors out diff --git a/skills/run-scan/scripts/sarif-to-findings.py b/skills/run-scan/scripts/sarif-to-findings.py new file mode 100644 index 000000000..c73abf57e --- /dev/null +++ b/skills/run-scan/scripts/sarif-to-findings.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +""" +sarif-to-findings.py — turn an OpenTaint SARIF report into per-rule finding +tracking files under .opentaint/tracking/findings/. + +One file per rule_id, bundling that rule's result hashes into sarif_hashes. +Grouping is trivial (by rule_id) — no clustering. The triage skill +(analyze-findings) later splits a rule's bundle into distinct logical findings. + +Idempotent: re-running after a re-scan adds only result hashes not already +present in any of that rule's finding files, resets the touched file's verdict +to `pending`, and leaves existing verdict/notes/poc and triage splits intact. + +SARIF assumptions — adjust the two helpers below if the real OpenTaint SARIF +differs: +- result.ruleId holds the full rule id (e.g. java/security/sqli.yaml:sqli) +- a stable per-result hash comes from result.fingerprints / partialFingerprints + when present, else is computed from ruleId + locations + code-flow locations +- result.message.text seeds the analyzer report in `notes` +""" +import argparse +import glob +import hashlib +import json +import re +from pathlib import Path + +ADJ = ["brave", "calm", "eager", "fuzzy", "gentle", "jolly", "keen", "lucid", + "merry", "noble", "proud", "quiet", "rapid", "sly", "tidy", "vivid", + "witty", "zesty", "amber", "bold"] +NOUN = ["hopper", "eagle", "otter", "falcon", "maple", "comet", "harbor", + "willow", "pixel", "river", "ember", "cobra", "lotus", "raven", + "quartz", "badger", "cedar", "drake", "finch", "gull"] + + +def docker_name(seed, taken): + """Stable adjective-noun slug from the rule id; suffixed on collision.""" + h = int(hashlib.sha1(seed.encode()).hexdigest(), 16) + base = f"{ADJ[h % len(ADJ)]}-{NOUN[(h // len(ADJ)) % len(NOUN)]}" + name, n = base, 2 + while name in taken: + name, n = f"{base}-{n}", n + 1 + return name + + +# Prefer a stable, named fingerprint kind. vulnerabilitySourceSinkHash is more stable +# than vulnerabilityWithTraceHash — it keys on the source+sink and survives changes to +# the intermediate trace path. Fall back to any fingerprint value, then a content hash. +_FP_PREFERENCE = ("vulnerabilitySourceSinkHash", "vulnerabilityWithTraceHash") + + +def result_hash(res): + fp = res.get("fingerprints") or res.get("partialFingerprints") + if isinstance(fp, dict) and fp: + for pref in _FP_PREFERENCE: + for k, v in fp.items(): + if k.startswith(pref): + return str(v)[:16] + return str(sorted(fp.values())[0])[:16] + parts = [res.get("ruleId", "")] + locs = list(res.get("locations", [])) + for cf in res.get("codeFlows", []): + for tf in cf.get("threadFlows", []): + locs += [st.get("location", {}) for st in tf.get("locations", [])] + for loc in locs: + pl = loc.get("physicalLocation", {}) + parts.append(pl.get("artifactLocation", {}).get("uri", "")) + parts.append(json.dumps(pl.get("region", {}), sort_keys=True)) + return hashlib.sha1("|".join(parts).encode()).hexdigest()[:16] + + +def scan_results(sarif): + """rule_id -> {hash: message}""" + out = {} + for run in sarif.get("runs", []): + for res in run.get("results", []): + rid = res.get("ruleId") or "unknown" + msg = (res.get("message", {}) or {}).get("text", "").strip() + out.setdefault(rid, {})[result_hash(res)] = msg + return out + + +NAME_RE = re.compile(r'^finding_name:\s*(.+?)\s*$', re.M) +RULE_RE = re.compile(r'^rule_id:\s*(.+?)\s*$', re.M) +HASHES_RE = re.compile(r'^sarif_hashes:\s*\[(.*)\]\s*$', re.M) + + +def parse_existing(text): + name = NAME_RE.search(text) + rid = RULE_RE.search(text) + hm = HASHES_RE.search(text) + hashes = [h.strip() for h in hm.group(1).split(",") if h.strip()] if hm else [] + return (name.group(1) if name else None, + rid.group(1) if rid else None, + hashes) + + +def fmt_list(hashes): + return "[" + ", ".join(hashes) + "]" + + +def new_file_text(name, rid, hashes, notes): + body = "\n".join(" " + ln for ln in (notes or "(no analyzer message)").splitlines()) + return (f"finding_name: {name}\n" + f"sarif_hashes: {fmt_list(hashes)}\n" + f"rule_id: {rid}\n" + f"verdict: pending\n" + f"notes: >\n{body}\n" + f"poc: pending\n" + f"poc_script: null\n") + + +def main(): + ap = argparse.ArgumentParser( + description="SARIF -> per-rule finding tracking files (idempotent)") + ap.add_argument("sarif", help="path to report.sarif") + ap.add_argument("-o", "--out", default=".opentaint/tracking/findings", + help="findings dir (default: .opentaint/tracking/findings)") + args = ap.parse_args() + + by_rule = scan_results(json.loads(Path(args.sarif).read_text())) + + out = Path(args.out) + out.mkdir(parents=True, exist_ok=True) + + existing = {} # rule_id -> [(path, hashes)] + taken = set() + for p in sorted(glob.glob(str(out / "*.yaml"))): + name, rid, hashes = parse_existing(Path(p).read_text()) + if name: + taken.add(name) + if rid: + existing.setdefault(rid, []).append((Path(p), hashes)) + + created = updated = unchanged = 0 + for rid, hashmap in sorted(by_rule.items()): + scanned = set(hashmap) + files = existing.get(rid) + if not files: + name = docker_name(rid, taken) + taken.add(name) + notes = "\n".join(sorted({m for m in hashmap.values() if m})) + (out / f"{name}.yaml").write_text( + new_file_text(name, rid, sorted(scanned), notes)) + created += 1 + continue + already = set().union(*(set(h) for _, h in files)) + new = sorted(scanned - already) + if not new: + unchanged += 1 + continue + # add new hashes to the first finding file for this rule; reset verdict + path, hashes = files[0] + merged = sorted(set(hashes) | set(new)) + text = path.read_text() + text = HASHES_RE.sub(lambda m: "sarif_hashes: " + fmt_list(merged), text, count=1) + text = re.sub(r'^verdict:\s*.+$', "verdict: pending", text, count=1, flags=re.M) + path.write_text(text) + updated += 1 + + print(f"findings: {created} created, {updated} updated, {unchanged} unchanged " + f"({len(by_rule)} rules in scan)") + + +if __name__ == "__main__": + main() diff --git a/skills/test-rule/SKILL.md b/skills/test-rule/SKILL.md deleted file mode 100644 index 1f71e21ca..000000000 --- a/skills/test-rule/SKILL.md +++ /dev/null @@ -1,167 +0,0 @@ ---- -name: test-rule -description: Verify an OpenTaint rule on annotated test samples, including multi-module Spring reproducers. Use whenever a rule has been written or edited before scanning real projects. -license: Apache-2.0 -metadata: - author: opentaint - version: "0.1" ---- - -# Skill: Test Rule - -Create test samples for a rule and verify it works correctly - -## Prerequisites - -- `opentaint` CLI available -- Rules created (create-rule skill) -- Target project dependencies — derive from `.opentaint/analysis-plan.md` (step 3 lists detected frameworks, DB libraries, and HTTP clients) or directly from the project's `build.gradle` / `pom.xml` - -## Procedure - -### 1. Bootstrap test project - -Start with a plain method-level project. Only switch to the Spring multi-module layout (see below) if the plain test returns `falseNegative`. - -```bash -opentaint dev init-test-project .opentaint/test-project \ - --dependency "javax.servlet:javax.servlet-api:4.0.1" -``` - -### 2. Create test samples - -Create Java files in `src/main/java/test/` with `@PositiveRuleSample` and `@NegativeRuleSample` annotations: - -```java -package test; - -import org.opentaint.sast.test.util.PositiveRuleSample; -import org.opentaint.sast.test.util.NegativeRuleSample; -import javax.servlet.http.HttpServletRequest; -import java.sql.Connection; -import java.sql.Statement; - -public class MyVulnTest { - private Connection db; - - @PositiveRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") - public void vulnerable(HttpServletRequest req) throws Exception { - String input = req.getParameter("id"); - Statement stmt = db.createStatement(); - stmt.executeQuery("SELECT * FROM users WHERE id = " + input); - } - - @NegativeRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") - public void safe(HttpServletRequest req) throws Exception { - String input = req.getParameter("id"); - var pstmt = db.prepareStatement("SELECT * FROM users WHERE id = ?"); - pstmt.setString(1, input); - pstmt.executeQuery(); - } -} -``` - -### 3. Build test project - -```bash -opentaint compile .opentaint/test-project -o .opentaint/test-compiled -``` - -### 4. Run rule tests - -**Always specify `-o`** so results are written to a known location: - -```bash -opentaint dev test-rules .opentaint/test-compiled \ - -o .opentaint/test-results \ - --ruleset builtin --ruleset .opentaint/rules -``` - -### 5. Interpret results - -Read `.opentaint/test-results/test-result.json`: - -- **success**: Test passed (positive triggered, negative didn't) -- **falseNegative**: Positive sample did NOT trigger -> rule patterns too narrow -- **falsePositive**: Negative sample DID trigger -> rule patterns too broad -- **skipped**: Rule not found -> check `value` path and `id` match the rule file -- **disabled**: Rule is disabled - -## Testing Spring-app rules - -Some rules only fire inside a full Spring MVC entry-point graph (controllers, beans, dispatcher). A plain unit-like sample with `@PositiveRuleSample` on a bare method will not trigger them, because the tainted data must flow from a discovered `@Controller` entry point. - -For these rules, create **one dedicated Gradle sub-project per sample**. Each sub-project represents a complete, minimal Spring application containing **exactly one** `@PositiveRuleSample` or `@NegativeRuleSample` annotation. Split positive and negative cases into separate sub-projects, e.g. `xss-spring-test-positive` and `xss-spring-test-negative`. - -### How detection works - -`TestProjectAnalyzer` computes a `testSetName` per module as `module.moduleSourceRoot.relativeTo(project.sourceRoot)`, with `/` replaced by `-` (see `core/src/main/kotlin/org/opentaint/jvm/sast/project/TestProjectAnalyzer.kt`). If the name starts with `spring-app-tests`, the module is treated as a Spring test set: - -- All sample annotations in the module are collected as usual. -- Each sample is wrapped in a `SpringTestSample` that uses the Spring dispatcher method as the analysis entry point instead of the annotated method itself. -- Taint therefore originates from real `@Controller` request parameters and must reach the annotated sink method through normal Spring wiring. - -Consequence: the annotated method is only a marker for **which rule to run and the expected verdict**. The actual vulnerable/safe flow must be reachable from a controller in the same module. Keep each module to a single annotation so the verdict is unambiguous. - -### Project layout - -Use a multi-module Gradle build where every `spring-app-tests/` directory is its own sub-project: - -``` -.opentaint/test-project/ -├── settings.gradle.kts -├── build.gradle.kts -└── spring-app-tests/ - ├── xss-spring-test-positive/ - │ ├── build.gradle.kts - │ └── src/main/java/test/ - │ ├── VulnerableController.java // @Controller with the tainted flow - │ └── VulnerableSink.java // carries the single @PositiveRuleSample - └── xss-spring-test-negative/ - ├── build.gradle.kts - └── src/main/java/test/ - ├── SafeController.java - └── SafeSink.java // carries the single @NegativeRuleSample -``` - -`settings.gradle.kts` should auto-discover every `spring-app-tests/*/build.gradle.kts` so adding a new case only requires a new directory. See `rules/test/settings.gradle.kts` in the OpenTaint repo for a reference implementation. - -### Required dependencies - -Each Spring sub-project must pull in at least: - -- `compileOnly` on `opentaint-sast-test-util` (for the sample annotations) -- `org.springframework:spring-webmvc` and `spring-context` (so `@Controller` is recognized) -- Any libraries used by the sample itself (servlet-api, JDBC, etc.) - -### Compile and run - -Compile and test the multi-module project the same way as a regular test project: - -```bash -opentaint compile .opentaint/test-project -o .opentaint/test-compiled -opentaint dev test-rules .opentaint/test-compiled \ - -o .opentaint/test-results \ - --ruleset builtin --ruleset .opentaint/rules -``` - -Each `spring-app-tests/` sub-project becomes an independent test set and appears as its own entry in `test-result.json`. - -### Common pitfalls - -- **No `@Controller` in the module** -> `TestProjectAnalyzer` logs `No spring entry point found` and the sample is analyzed without Spring context, usually producing a false negative. Always include a controller that reaches the sink. -- **More than one annotation per module** -> the module still runs, but results become ambiguous; keep it to one sample per sub-project. -- **Module path does not start with `spring-app-tests`** -> `isSpringAppTestSet()` returns `false` and the sample is analyzed as a regular method-level test, so Spring-specific flows will not be triggered. - -## Annotation Fields - -- `value`: Path to rule YAML file, relative to ruleset root (e.g. `java/security/my-vuln.yaml`) -- `id`: Short rule ID within that file (the `id` field from the YAML, e.g. `my-vulnerability`) - -**Note**: The annotation `id` field uses the **short** rule ID (as written in the YAML file). This is different from `--rule-id` in `opentaint scan`, which requires the **full** rule ID in the format `:` (e.g. `java/security/my-vuln.yaml:my-vulnerability`). - -## Troubleshooting - -- **falseNegative**: Broaden source/sink patterns, check metavariable names match -- **falsePositive**: Add `pattern-not`, `pattern-sanitizers`, or narrow `metavariable-regex` -- **skipped**: Verify rule file path and ID, check rule is not disabled From d158c609debc10c6e1d91e15ae64157eb03c8f56 Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Wed, 27 May 2026 19:40:17 +0300 Subject: [PATCH 06/54] feat: update discovering skill --- skills/appsec_agent/SKILL.md | 18 ++- .../appsec_agent/references/discover-rules.md | 4 +- skills/create-test-project/SKILL.md | 9 +- skills/discover-attack-surface/SKILL.md | 121 +++++++++++------- 4 files changed, 100 insertions(+), 52 deletions(-) diff --git a/skills/appsec_agent/SKILL.md b/skills/appsec_agent/SKILL.md index 6d30f05e7..0c56fa658 100644 --- a/skills/appsec_agent/SKILL.md +++ b/skills/appsec_agent/SKILL.md @@ -93,7 +93,7 @@ You are the only writer of `.opentaint/tracking/state.yaml` — it records the c On start, and after any compaction, reconstruct position from artifacts before doing anything — never replay a completed phase: - read `state.yaml` and the `tracking/` tree -- skip any phase whose artifact exists: `project.yaml` → build; `report.sarif` → scan; a rule's `artifact` + `tests_passing: done` → that rule; an approximation unit's `artifact` (plus `tests_passing` for dataflow) → that unit; a finding with `verdict` set → triaged; with `poc` set → PoC'd +- skip any phase whose artifact exists: `project.yaml` → build; `coverage.yaml` with every area `done` → discover; `report.sarif` → scan; a rule's `artifact` + `tests_passing: done` → that rule; an approximation unit's `artifact` (plus `tests_passing` for dataflow) → that unit; a finding with `verdict` set → triaged; with `poc` set → PoC'd - detect new work from artifacts, not memory: finding files with `verdict: pending` (a fresh or reset scan) → triage; methods in `dropped-external-methods.yaml` not yet in any approximation unit → approximations ## Tracking layout @@ -103,6 +103,7 @@ The single source of truth for the tracking schema; each skill writes only its o ``` .opentaint/tracking/ state.yaml # you only — levels + phase status + coverage.yaml # discover-attack-surface — one entry per attack area walked (deep) findings/.yaml # one per logical finding (from the SARIF→finding script; split by triage) rules/.yaml # one per rule approximations/-passthrough.yaml # simple from→to copies; write-only, scan-verified @@ -128,6 +129,17 @@ phases: # pending | in_progress | done poc: pending # dynamic triage ``` +coverage.yaml — created by discover-attack-surface (deep): the attack-area checklist it walks, one entry per area, so you can see nothing was skipped and which areas spawned rules: + +```yaml +areas: + - area: database # one per attack area + status: done # pending | done + rules: [mybatis-sqli] # proposed rule names; [] when built-ins cover it or the area is absent + notes: > + free-form — what was found and why +``` + findings/.yaml — created by the SARIF→finding script; `verdict`/`notes` by analyze-findings; `poc`/`poc_script` by generate-poc: ```yaml @@ -148,8 +160,8 @@ name: mybatis-sqli rule_id: null # filled on creation artifact: null # added once the rule file exists finding: null # finding_name; non-null only for suppress-FP -requirements: > - CWE-89 SQLi via MyBatis ${} ; source @RequestParam orderBy ; sink ${} in SelectProvider +requirements: > # short — what built-ins miss, not a full traced flow + CWE-89 SQLi via MyBatis ${} ; source: HTTP param (built-in spring source) ; sink: ${} in SelectProvider — no built-in, write one ; lives in OrderMapper dependencies: [org.mybatis:mybatis:3.5.13] stages: # pending | in_progress | done description: done diff --git a/skills/appsec_agent/references/discover-rules.md b/skills/appsec_agent/references/discover-rules.md index de77ee2ec..e9ce9911d 100644 --- a/skills/appsec_agent/references/discover-rules.md +++ b/skills/appsec_agent/references/discover-rules.md @@ -4,13 +4,13 @@ The deep-scan step that maps the attack surface and writes the rules to cover it ## Discover attack surface -Delegate discover-attack-surface. Inputs: ``, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`. It creates one `tracking/rules/.yaml` per proposed rule (`description` stage + requirements + dependencies) and returns one line per rule. Don't ask for the full analysis back. Set `phases.discover: done`. +Delegate discover-attack-surface. Inputs: ``, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`. It walks a fixed checklist of attack areas into `tracking/coverage.yaml` and creates one `tracking/rules/.yaml` per gap (`description` stage + a short requirements + dependencies), returning the areas covered and one line per rule. Don't ask for the full analysis back. Set `phases.discover: done` once every area in `coverage.yaml` is `done`. ## Rules Fan out the rule units (one subagent each); per unit a two-step loop: -1. create-test-project — Inputs: spec = the rule's `requirements`, `` `.opentaint/tracking/rules/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the tracking file. Sets `test_project: done` +1. create-test-project — Inputs: spec = the rule's `requirements`, ``, `` `.opentaint/tracking/rules/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the tracking file. Sets `test_project: done` 2. create-rule — Inputs: requirements (the tracking file), test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules`, ``. Iterates `opentaint dev test-rules` until every sample passes; sets `tests_passing: done`, `rule_id`, `artifact` If create-rule can't converge after repeated attempts, load references/escalation.md. Set `phases.rules: done` once every rule's `tests_passing` is done. diff --git a/skills/create-test-project/SKILL.md b/skills/create-test-project/SKILL.md index 6a2bd49dc..981271bc4 100644 --- a/skills/create-test-project/SKILL.md +++ b/skills/create-test-project/SKILL.md @@ -15,7 +15,8 @@ Build a minimal compiled test project whose annotated samples reproduce the flow From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default -- What to test `` — a rule's requirements, or the package's methods to exercise, with enough context to build a realistic flow +- What to test `` — a rule's requirements, or the package's methods to exercise +- Project root `` — the real sources the requirements point into. Default: current directory - Tracking file `` — the rule or approximation file this test serves. Default: `.opentaint/tracking/rules/.yaml` or `.opentaint/tracking/approximations/.yaml` - Test project `` — sources. Default: `.opentaint/test-projects/` - Compiled output `` — the model. Default: `.opentaint/test-compiled/` @@ -43,7 +44,9 @@ opentaint dev init-approximation-project \ --dependency "io.projectreactor:reactor-core:3.8.5" ``` -### 2. Write samples +### 2. Read the real flow, then write samples + +The requirements only name the source/sink and its framework. Before writing, find that source and sink in `` and read the actual method signatures, annotations, and how the tainted value is built. The samples must mirror that code, not a guess — a sample built on the wrong signature compiles but verifies nothing Write Java samples under `/src/main/java/test/`, each annotated with its expected verdict — `@PositiveRuleSample` (must flag) or `@NegativeRuleSample` (must not). `value` is the rule path relative to the ruleset root (with `.yaml`), `id` the short id from the YAML — not the full `--rule-id` used by `opentaint scan`. One expected verdict per sample. Split the samples across files however groups most logically — don't cram unrelated ones into a single class @@ -80,4 +83,4 @@ Do not touch other stages or fields - One expected verdict per sample - One unit per `` folder — never write into another unit's project, so concurrent agents don't race -- In doubt about how the real flow or a method behaves, read the source rather than guessing — the sample must mirror the actual code +- For library-method behavior the requirements don't pin down (does it sanitize? propagate taint?), read the dependency or its docs rather than guessing diff --git a/skills/discover-attack-surface/SKILL.md b/skills/discover-attack-surface/SKILL.md index 6ca8e5d20..15bc5a7f3 100644 --- a/skills/discover-attack-surface/SKILL.md +++ b/skills/discover-attack-surface/SKILL.md @@ -1,15 +1,15 @@ --- name: discover-attack-surface -description: Map a Java/Kotlin project's attack surface and turn gaps in rule coverage into concrete rule requirements. Use when a project needs its attack surface mapped into rule requirements (requires a built project model) +description: Walk a JVM project's attack surface area by area and turn each coverage gap into a rule requirement. Use when a project needs rule coverage mapped across its attack-surface areas (requires a built project model) license: Apache-2.0 metadata: author: opentaint - version: "0.2" + version: "0.3" --- # Skill: Discover Attack Surface -Identify the attack surface of the target project by reading source code and project structure. Convert each security gap into concrete rule requirements, which will be used for creating test project and rule later +Cover the target's attack surface systematically. Walk a fixed checklist of attack areas, and for each one explore the project sources and its dependencies for untrusted flows the built-in rules miss. Every gap becomes one rule requirement; the checklist records what was explored so no area is silently skipped ## Inputs @@ -17,69 +17,102 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Project root `` — the project sources. Default: current directory - Project model `` — the built model. Default: `.opentaint/project` -- Tracking directory `` — where rule files are written. Default: `.opentaint/tracking` +- Tracking directory `` — where the coverage checklist and rule files are written. Default: `.opentaint/tracking` ## Workflow Requires a built project model — without it you can miss entry points the analyzer actually sees -### 1. Find entry points and sinks +### 1. Seed the checklist -Search the sources for attack surface: +Seed `/coverage.yaml`'s `areas` list with one entry per area below, each `status: pending` and `rules: null` (null until you walk it; `[]` or names once done). These source-side and sink-side classes of taint flow are a minimum — add a project-specific area when a dependency exposes one they don't cover (comments for you, don't write them): -- Spring/JAX-RS endpoints: `@RestController`, `@Controller`, `@RequestMapping`, `@GetMapping`/`@PostMapping`/..., `@Path`, `@GET`/`@POST` -- Servlets: classes extending `HttpServlet` (`doGet`, `doPost`) -- Message handlers: `@JmsListener`, `@KafkaListener`, `@RabbitListener` -- Other external input: `main(String[])`, `@Scheduled` methods reading external state +```yaml +- area: user-input # untrusted data entering: HTTP params/headers/body, RPC, payloads, CLI args, config + status: pending +- area: database # SQL/HQL/NoSQL query construction (SQLi) + status: pending +- area: filesystem # paths built for file read/write/delete (path traversal) + status: pending +- area: command-exec # process or shell execution (command injection) + status: pending +- area: outbound-request # HTTP/URL clients (SSRF) + status: pending +- area: deserialization # object/JSON/XML deserialization of untrusted bytes + status: pending +- area: templating # template or expression evaluation (SSTI, EL injection) + status: pending +- area: xml-parsing # XML/document parsing (XXE) + status: pending +- area: ldap # directory queries (LDAP injection) + status: pending +- area: response-output # untrusted data rendered into a response (XSS) + status: pending +- area: reflection # dynamic class/method loading (code injection) + status: pending +- area: redirect # untrusted URL driving a redirect (open redirect) + status: pending +- area: logging # untrusted data into log/format APIs (log injection) + status: pending +``` + +### 2. Walk every area + +Go through each `pending` area in turn — never skip one. For each, explore both the project and its dependencies: -For each, note what external data enters (params, headers, body, payload) and what dangerous operations it can reach (DB query, file I/O, command exec, outbound HTTP, deserialization, templating) +- read model for the libraries that expose this area +- search the sources for the matching sources or sinks +- note what untrusted data enters and which dangerous call it can reach -### 2. Map dependencies to vulnerability classes +Then check coverage against the built-in rules (`opentaint dev rules-path`) and anything in `.opentaint/rules`, and decide: -Read `build.gradle` / `pom.xml` (or the model) and match each library to the classes it enables: +- built-ins already detect every real flow here, or the area is absent from this project → no rule; leave `rules: []` +- a real, untrusted flow has no covering rule → propose a rule (step 3) -- Web framework (Spring Boot, Micronaut, Quarkus) → shapes the entry points and request-binding sources -- DB / ORM (JDBC, JPA/Hibernate, MyBatis) → SQLi, especially string-built queries or `${}` mapper interpolation -- Template engines (Thymeleaf, FreeMarker, Velocity) → SSTI and reflected XSS -- HTTP clients (OkHttp, Apache HttpClient, RestTemplate, WebClient) → SSRF -- XML parsers (JAXB, DocumentBuilder, SAXParser) → XXE -- Deserializers (Jackson polymorphic typing, native `ObjectInputStream`, XStream) → insecure deserialization -- File / process APIs (`java.nio.file`, `ProcessBuilder`, `Runtime.exec`) → path traversal, command injection +Verify the flow is real before recording it: the source is genuinely attacker-controlled (a request param, header, body, or message payload is; an app constant or server config is not), and the sink is genuinely dangerous with tainted input (string-built SQL is; a parameterized query is not). A pair that fails this isn't a rule -### 3. Decide which rules to write +Update the area's entry in `coverage.yaml` the moment you finish it — set `status: done`, fill `rules` (`[]` or the proposed names), add a one-line `notes` of what you found — then move on. Write per area, not batched at the end, so the walk resumes cleanly and every area carries a record proving it was checked, not skipped -Check coverage, then turn each real gap into a requirement: +### 3. Record each proposed rule -- Read the built-in rules (`opentaint dev rules-path`) and anything already in `.opentaint/rules`. A source→sink pair is a gap only when no existing rule detects it -- Verify the pair is semantically real before recording it: the source is genuinely attacker-controlled (a request param, header, or body is; an app-internal constant or server config is not), and the sink is genuinely dangerous with tainted input (string-concatenated SQL is; a parameterized query is not). A pair that fails this isn't a rule -- For every uncovered, semantically real pair worth detecting, write one rule tracking file at `/rules/.yaml` (per Tracking) +For each gap, add the rule name to its area's `rules:` list and write one `/rules/.yaml`. Name it `-` in kebab-case — the sink technology or framework plus the class, e.g. `mybatis-sqli`, `thymeleaf-ssti`, `resttemplate-ssrf`. It must be unique and stable: the name is the tracking file and follows the rule downstream -Name the rule `-` in kebab-case — the sink technology or framework plus the class, e.g. `mybatis-sqli`, `thymeleaf-ssti`, `resttemplate-ssrf`. It must be unique and stable: the name is the tracking file name and follows the rule through every later stage +State only what a rule author needs: the vuln class, which built-in source/sink rules already apply, and which source or sink is missing and must be written. Name the framework and the class where the flow lives — not a full traced flow with line numbers. The test project built later reads the real code to reproduce it ## Output -- One `/rules/.yaml` per proposed rule, with `stages.description: done`, `requirements` filled, and `dependencies` (exact Maven GAV from the build files) the test project needs. `requirements` must reproduce the real flow, not paraphrase it: - - vuln class / CWE - - source — fully-qualified entry method, tainted input, `file:line` - - sink — fully-qualified method, dangerous call, `file:line` - - flow — intermediate hops as fully-qualified method names - - the real signatures and annotations, so the test can mirror the actual code -- A brief summary to the caller: one line per rule (name, vuln class, source→sink). Don't paste the full analysis back — the tracking files hold the detail +- `/coverage.yaml` — every area `done`, each with proposed rules (or `[]`) +- One `/rules/.yaml` per proposed rule, with `stages.description: done`, a short `requirements`, and `dependencies` (exact Maven GAV from the build files) the test project needs +- A brief summary to the caller: the areas covered, then one line per proposed rule (name, vuln class, source→sink). The tracking files hold the detail — don't paste it back ## Tracking -Create one rule file per proposed rule; fill only the discovery-stage fields: +`/coverage.yaml` — one entry per area, filled as you walk: + +```yaml +- area: database + status: done # pending | done + rules: [mybatis-sqli] # proposed rule names; [] when built-ins cover it or the area is absent + notes: > + MyBatis 3.5 mappers use ${} interpolation; built-in covers JDBC sinks but not MyBatis ${} +- area: filesystem + status: done + rules: [] + notes: only constant paths; no untrusted data reaches a file API +# ... +``` + +`/rules/.yaml` — discovery-stage fields only: ```yaml name: mybatis-sqli rule_id: null # filled later finding: null # filled later -requirements: | - CWE-89 SQL injection via MyBatis ${} interpolation - source: com.example.web.OrderController#listOrders(String) — @RequestParam("orderBy"), OrderController.java:42 - flow: orderBy -> com.example.service.OrderService#list(String), OrderService.java:31 - -> com.example.mapper.OrderMapper#selectByOrder(String) - sink: com.example.mapper.OrderSqlProvider#byOrder — ${orderBy} concatenated into ORDER BY, OrderSqlProvider.java:18 +requirements: > + CWE-89 SQLi via MyBatis ${} interpolation. + source: untrusted HTTP request param — built-in spring source covers it + sink: ${} string interpolation in a @SelectProvider / mapper XML — no built-in; needs a new sink rule + lives in: com.example.mapper.OrderMapper / OrderSqlProvider dependencies: # exact GAV the test project needs, from the build files - org.mybatis:mybatis:3.5.13 - org.springframework:spring-webmvc:5.3.30 @@ -88,16 +121,16 @@ stages: test_project: pending tests_passing: pending notes: > - mirror the @RequestParam binding and the @SelectProvider signature in the test + free-form ``` ## Engine notes -- Spring projects: the analyzer auto-discovers Spring endpoints, so you don't have to enumerate every controller — focus on which flows are dangerous +- Spring projects: the analyzer auto-discovers Spring endpoints, so `user-input` is largely sources the built-ins already see — focus on which sinks those flows reach - Generic projects: the analyzer treats all public/protected methods of public classes as entry points ## Gotchas - Propose a rule only for a real gap; if a built-in already covers the source→sink, don't duplicate it -- Requirements drive a test project someone else builds; vague requirements produce a useless test -- A passing test won't catch a semantically wrong source or sink — verify both are real here, when writing requirements, because nothing downstream re-checks it +- Requirements name the missing source/sink and where it lives, not a full traced flow — keep them short; the test project reads the real code +- A passing test won't catch a semantically wrong source or sink — verify both are real here, because nothing downstream re-checks it From a3a655813bc75c54abb694768df765b86acf56e5 Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Wed, 27 May 2026 19:48:11 +0300 Subject: [PATCH 07/54] feat: add fallback for rule-creation process --- cli/cmd/dev_test_rules.go | 51 +++++++++++-------- skills/appsec_agent/SKILL.md | 2 +- .../appsec_agent/references/discover-rules.md | 4 +- skills/appsec_agent/references/escalation.md | 2 +- skills/create-rule/SKILL.md | 23 ++++++++- 5 files changed, 56 insertions(+), 26 deletions(-) diff --git a/cli/cmd/dev_test_rules.go b/cli/cmd/dev_test_rules.go index 046d2b30b..22f10dcba 100644 --- a/cli/cmd/dev_test_rules.go +++ b/cli/cmd/dev_test_rules.go @@ -14,11 +14,13 @@ import ( ) var ( - testRulesRuleset []string - testRulesOutputDir string - testRulesTimeout time.Duration - testRulesMaxMemory string - testRulesRuleID []string + testRulesRuleset []string + testRulesOutputDir string + testRulesTimeout time.Duration + testRulesMaxMemory string + testRulesRuleID []string + testRulesDataflow []string + testRulesPassthrough []string ) var devTestRulesCmd = &cobra.Command{ @@ -36,27 +38,30 @@ Exit codes: Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { runTestProject(args[0], testProjectOptions{ - label: "Rule tests", - tempDir: "opentaint-test-rules-*", - rulesets: testRulesRuleset, - outputDir: testRulesOutputDir, - timeout: testRulesTimeout, - maxMemory: testRulesMaxMemory, - ruleIDs: testRulesRuleID, + label: "Rule tests", + tempDir: "opentaint-test-rules-*", + rulesets: testRulesRuleset, + outputDir: testRulesOutputDir, + timeout: testRulesTimeout, + maxMemory: testRulesMaxMemory, + ruleIDs: testRulesRuleID, + dataflowApprox: testRulesDataflow, + passthroughApprox: testRulesPassthrough, }) }, } // testProjectOptions holds the inputs shared by `dev test-rules` and `dev test-approximations`. type testProjectOptions struct { - label string - tempDir string - rulesets []string - outputDir string - timeout time.Duration - maxMemory string - ruleIDs []string - dataflowApprox []string + label string + tempDir string + rulesets []string + outputDir string + timeout time.Duration + maxMemory string + ruleIDs []string + dataflowApprox []string + passthroughApprox []string } func runTestProject(projectModelArg string, opts testProjectOptions) { @@ -149,6 +154,10 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { } builder.AddDataflowApproximations(compiledPath) } + for _, passthrough := range opts.passthroughApprox { + absPassthrough := log.AbsPathOrExit(passthrough, "passthrough-approximations") + builder.AddPassthroughApproximations(absPassthrough) + } javaRunner := java.NewJavaRunner(). WithSkipVerify(globals.Config.SkipVerify). @@ -184,4 +193,6 @@ func init() { devTestRulesCmd.Flags().DurationVar(&testRulesTimeout, "timeout", 600*time.Second, "Timeout for analysis") devTestRulesCmd.Flags().StringVar(&testRulesMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") devTestRulesCmd.Flags().StringArrayVar(&testRulesRuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") + devTestRulesCmd.Flags().StringArrayVar(&testRulesDataflow, "dataflow-approximations", nil, "Directory of compiled approximation class files or .java sources (repeatable)") + devTestRulesCmd.Flags().StringArrayVar(&testRulesPassthrough, "passthrough-approximations", nil, "passThrough approximation YAML file or directory of them (repeatable)") } diff --git a/skills/appsec_agent/SKILL.md b/skills/appsec_agent/SKILL.md index 0c56fa658..a17a993c8 100644 --- a/skills/appsec_agent/SKILL.md +++ b/skills/appsec_agent/SKILL.md @@ -75,7 +75,7 @@ Universal rules — every dispatch, every workflow: - open the prompt with the Skill-load line — the subagent has none of this context until it loads its skill - pass resolved paths (the ``-keyed `.opentaint/...` paths from Working directory layout), never the placeholder tokens - read the named output artifact yourself before continuing — a claim is not an artifact -- only the scan agent (run-scan) runs `opentaint scan`; no rule, approximation, or triage subagent scans +- only run-scan scans the main project model; rule/approximation/triage subagents don't — the one exception is a create-rule agent running a diagnostic `--track-external-methods` scan of its own test project (never the main model) - only you write `.opentaint/vulnerabilities.md` and `.opentaint/tracking/state.yaml` - never swap the project model mid-analysis; every run uses the same model - never triage yourself — verdicts come only from analyze-findings subagents diff --git a/skills/appsec_agent/references/discover-rules.md b/skills/appsec_agent/references/discover-rules.md index e9ce9911d..bcfd6cd97 100644 --- a/skills/appsec_agent/references/discover-rules.md +++ b/skills/appsec_agent/references/discover-rules.md @@ -11,6 +11,6 @@ Delegate discover-attack-surface. Inputs: ``, model-dir `.opentain Fan out the rule units (one subagent each); per unit a two-step loop: 1. create-test-project — Inputs: spec = the rule's `requirements`, ``, `` `.opentaint/tracking/rules/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the tracking file. Sets `test_project: done` -2. create-rule — Inputs: requirements (the tracking file), test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules`, ``. Iterates `opentaint dev test-rules` until every sample passes; sets `tests_passing: done`, `rule_id`, `artifact` +2. create-rule — Inputs: requirements (the tracking file), test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules`, ``, and on a re-dispatch the approximation dirs `.opentaint/config` / `.opentaint/approximations/src`. Iterates `opentaint dev test-rules` until every sample passes; sets `tests_passing: done`, `rule_id`, `artifact` -If create-rule can't converge after repeated attempts, load references/escalation.md. Set `phases.rules: done` once every rule's `tests_passing` is done. +If create-rule reports the test project drops a library method on the rule's flow, the rule can't be verified until that method is modeled — route the dropped methods through the approximation loop (references/approximations.md; they're real library methods the main scan needs too), then re-dispatch create-rule with the approximation dirs. If it reports non-convergence with nothing dropped, load references/escalation.md. A rule's `tests_passing` stays `pending` until its samples pass; set `phases.rules: done` once every rule's is done. diff --git a/skills/appsec_agent/references/escalation.md b/skills/appsec_agent/references/escalation.md index 06547770a..437dec77a 100644 --- a/skills/appsec_agent/references/escalation.md +++ b/skills/appsec_agent/references/escalation.md @@ -1,6 +1,6 @@ # Escalation block -Load this when a create-rule / create-pass-through / create-dataflow agent can't make its samples pass after repeated attempts, or a rule passes its tests yet is wrong on the real scan. Dispatch per the Delegate template in SKILL.md. These skills write no tracking files. +Load this when a create-rule / create-pass-through / create-dataflow agent can't make its samples pass after repeated attempts, or a rule passes its tests yet is wrong on the real scan. The common cause — a library method on the flow killing taint in the test project — is already caught by create-rule's own `--track-external-methods` check and routed to the approximation loop (references/discover-rules.md); reach here for what survives that: a subtle rule bug, an approximation present but mis-modeled, or an engine issue. Dispatch per the Delegate template in SKILL.md. These skills write no tracking files. 1. debug-rule — Inputs: the `` to trace (for an approximation, the rule whose sample routes taint through the modeled method), the `` and `` of the run that showed the problem, ``, and the approximation dirs if the flow depends on them. Returns a diagnosis: rule fix, missing library model, or engine issue 2. Route by cause: a rule cause goes back to create-rule (references/discover-rules.md), a model cause back to the relevant create-*-approximation agent (references/approximations.md); an engine cause goes to step 3 diff --git a/skills/create-rule/SKILL.md b/skills/create-rule/SKILL.md index fc6f5f081..871d6ecd1 100644 --- a/skills/create-rule/SKILL.md +++ b/skills/create-rule/SKILL.md @@ -19,6 +19,7 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Compiled test project `` — the compiled model to verify against. Default: `.opentaint/test-compiled/` (per rule/approximation ``) - Rules directory `` — where rules are written. Default: `.opentaint/rules` - Tracking file `` — the rule file. Default: `.opentaint/tracking/rules/.yaml` +- Approximation directories `` / `` (optional) — apply on a re-dispatch when the test project needs a library model that's now built. Default: none Built-in rules are available at `opentaint dev rules-path` @@ -118,13 +119,29 @@ opentaint dev test-rules \ --ruleset ``` -`test-rules` auto-loads the built-in rules, so pass only your custom `` — a literal `builtin` here would be treated as a path. Read `.opentaint/test-results//test-result.json`: +`test-rules` auto-loads the built-in rules, so pass only your custom `` — a literal `builtin` here would be treated as a path. When the caller passed `` / ``, append `--passthrough-approximations ` / `--dataflow-approximations ` — without them a library method the test flow relies on drops taint and the positive can't pass. Read `.opentaint/test-results//test-result.json`: - `falseNegative` (positive didn't trigger) → patterns too narrow; broaden `pattern-either`, check metavariable names match across branches and between `refs` and `on` - `falsePositive` (negative triggered) → patterns too broad; add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or `metavariable-regex` - `skipped` / `disabled` → the rule wasn't exercised; fix the annotation `value`/`id`, or enable the rule -### 5. Refining for a false positive (suppress-FP) +### 5. When a positive won't pass after a couple of fixes + +A `@PositiveRuleSample` that won't trigger after ~2 fix attempts may have a cause no rule edit can fix — a library method on its flow killing taint. Before escalating, scan your own test model with `--track-external-methods`: + +```bash +opentaint scan --project-model \ + -o .opentaint/test-results//diag.sarif \ + --ruleset builtin --ruleset \ + --track-external-methods +``` + +Read `dropped-external-methods.yaml` next to it; either way leave `tests_passing: pending`: + +- a dropped method on the failing sample's source→sink path → that's the cause, not the rule: report which methods need a model, to be approximated before you're re-dispatched +- nothing dropped and no clear rule cause → report non-convergence for escalation, rather than editing blindly + +### 6. Refining for a false positive (suppress-FP) The test project already pins the confirmed TPs as `@PositiveRuleSample` and reproduces the FP as a `@NegativeRuleSample` — refine only the rule. Narrow it (step 4's `falsePositive` handling) until the negative stops triggering while every positive still passes. Do not touch the samples; if one looks wrong, hand it back upstream @@ -133,6 +150,7 @@ The test project already pins the confirmed TPs as `@PositiveRuleSample` and rep - The rule file(s) under `` - Tracking updated: `rule_id`, `artifact`, `stages.tests_passing` (per Tracking) - Report the full rule id, a one-line test summary, and the exact `test-rules` command used +- If blocked (step 5): leave `tests_passing: pending` and report the cause instead ## Tracking @@ -160,3 +178,4 @@ stages: - A wrong argument position in `(..., $UNTRUSTED, ...)` focuses the wrong parameter — point `focus-metavariable` at the tainted one - Refine the rule, never the test project — don't edit or weaken samples here; if one is wrong, hand it back upstream +- A positive that won't pass because a library method drops taint is not a rule bug — don't broaden the rule to force it; surface it for approximation (step 5) From e2c777549c4b8c4b2358640a2d75a3e497927426 Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Tue, 2 Jun 2026 10:41:45 +0300 Subject: [PATCH 08/54] feat: improve discovery workflow, refactor cli --- cli/cmd/dev.go | 16 -- cli/cmd/dev_debug_fact_reachability.go | 32 --- cli/cmd/dev_debug_run_on_entry_points.go | 33 ---- cli/cmd/dev_rules_path.go | 41 ---- cli/cmd/health.go | 183 ++++++++++++++++++ cli/cmd/scan.go | 11 +- cli/cmd/test.go | 30 +++ ...ximations.go => test_approximation_run.go} | 14 +- ...{dev_init_test_project.go => test_init.go} | 22 ++- cli/cmd/test_rule_reachability.go | 48 +++++ .../{dev_test_rules.go => test_rule_run.go} | 24 +-- cli/internal/rules/refs.go | 111 +++++++++++ cli/internal/rules/refs_test.go | 92 +++++++++ cli/internal/testapprox/testapprox.go | 12 +- skills/analyze-external-methods/SKILL.md | 10 +- skills/analyze-findings/SKILL.md | 2 +- .../{appsec_agent => appsec-agent}/SKILL.md | 52 +++-- .../references/approximations.md | 8 +- .../references/build.md | 2 - .../appsec-agent/references/discover-rules.md | 24 +++ skills/appsec-agent/references/escalation.md | 7 + skills/appsec-agent/references/poc.md | 12 ++ .../references/reproduce-vulnerability.md | 0 skills/appsec-agent/references/scan.md | 3 + .../references/suppress-fp.md | 2 - .../references/triage.md | 2 +- .../scripts/sarif-to-findings.py | 0 .../appsec_agent/references/discover-rules.md | 16 -- skills/appsec_agent/references/escalation.md | 7 - skills/appsec_agent/references/poc.md | 10 - skills/appsec_agent/references/scan.md | 5 - skills/assemble-lib-rules/SKILL.md | 68 +++++++ skills/create-dataflow-approximation/SKILL.md | 41 ++-- .../SKILL.md | 144 ++++++++------ skills/create-rule/SKILL.md | 15 +- skills/create-test-project/SKILL.md | 12 +- .../references/approximation.md | 9 +- skills/debug-rule/SKILL.md | 23 ++- skills/discover-attack-surface/SKILL.md | 125 +++++------- skills/generate-poc/SKILL.md | 17 +- skills/report-analyzer-issue/SKILL.md | 2 +- skills/run-scan/SKILL.md | 18 +- skills/run-scan/scripts/sarif-to-findings.py | 166 ---------------- skills/triage-dependencies/SKILL.md | 70 +++++++ 44 files changed, 949 insertions(+), 592 deletions(-) delete mode 100644 cli/cmd/dev.go delete mode 100644 cli/cmd/dev_debug_fact_reachability.go delete mode 100644 cli/cmd/dev_debug_run_on_entry_points.go delete mode 100644 cli/cmd/dev_rules_path.go create mode 100644 cli/cmd/health.go create mode 100644 cli/cmd/test.go rename cli/cmd/{dev_test_approximations.go => test_approximation_run.go} (69%) rename cli/cmd/{dev_init_test_project.go => test_init.go} (90%) create mode 100644 cli/cmd/test_rule_reachability.go rename cli/cmd/{dev_test_rules.go => test_rule_run.go} (83%) create mode 100644 cli/internal/rules/refs.go create mode 100644 cli/internal/rules/refs_test.go rename skills/{appsec_agent => appsec-agent}/SKILL.md (74%) rename skills/{appsec_agent => appsec-agent}/references/approximations.md (64%) rename skills/{appsec_agent => appsec-agent}/references/build.md (74%) create mode 100644 skills/appsec-agent/references/discover-rules.md create mode 100644 skills/appsec-agent/references/escalation.md create mode 100644 skills/appsec-agent/references/poc.md rename skills/{appsec_agent => appsec-agent}/references/reproduce-vulnerability.md (100%) create mode 100644 skills/appsec-agent/references/scan.md rename skills/{appsec_agent => appsec-agent}/references/suppress-fp.md (79%) rename skills/{appsec_agent => appsec-agent}/references/triage.md (77%) rename skills/{appsec_agent => appsec-agent}/scripts/sarif-to-findings.py (100%) delete mode 100644 skills/appsec_agent/references/discover-rules.md delete mode 100644 skills/appsec_agent/references/escalation.md delete mode 100644 skills/appsec_agent/references/poc.md delete mode 100644 skills/appsec_agent/references/scan.md create mode 100644 skills/assemble-lib-rules/SKILL.md delete mode 100644 skills/run-scan/scripts/sarif-to-findings.py create mode 100644 skills/triage-dependencies/SKILL.md diff --git a/cli/cmd/dev.go b/cli/cmd/dev.go deleted file mode 100644 index 24a165fb6..000000000 --- a/cli/cmd/dev.go +++ /dev/null @@ -1,16 +0,0 @@ -package cmd - -import ( - "github.com/spf13/cobra" -) - -// devCmd represents the dev command group -var devCmd = &cobra.Command{ - Use: "dev", - Short: "Create and debug rules (experimental)", - Long: `This command provides utilities for rule authoring and debugging (experimental)`, -} - -func init() { - rootCmd.AddCommand(devCmd) -} diff --git a/cli/cmd/dev_debug_fact_reachability.go b/cli/cmd/dev_debug_fact_reachability.go deleted file mode 100644 index 40df4b2ae..000000000 --- a/cli/cmd/dev_debug_fact_reachability.go +++ /dev/null @@ -1,32 +0,0 @@ -package cmd - -import ( - "github.com/spf13/cobra" -) - -var devDebugFactReachabilityCmd = &cobra.Command{ - Use: "debug-fact-reachability [source-path]", - Short: "Generate SARIF with fact reachability info for a single rule", - Args: cobra.RangeArgs(1, 2), - Long: `This command scans the project for one rule and writes a sibling SARIF report with fact-reachability info to debug why the rule does or does not fire - -Arguments: - rule-id - Full rule ID, e.g. security/SqlInjection.yaml:tainted-sql-from-http (required) - source-path - Path to the project sources (default: current directory) - -The fact-reachability report is written next to the main SARIF as debug-ifds-fact-reachability.sarif. - -Use --project-model to scan a pre-compiled project model instead of compiling from sources. -`, - Annotations: map[string]string{"PrintConfig": "true"}, - Run: func(cmd *cobra.Command, args []string) { - RuleID = []string{args[0]} - DebugFactReachabilitySarif = true - scanCmd.Run(scanCmd, args[1:]) - }, -} - -func init() { - devCmd.AddCommand(devDebugFactReachabilityCmd) - addScanFlags(devDebugFactReachabilityCmd) -} diff --git a/cli/cmd/dev_debug_run_on_entry_points.go b/cli/cmd/dev_debug_run_on_entry_points.go deleted file mode 100644 index 7043cc827..000000000 --- a/cli/cmd/dev_debug_run_on_entry_points.go +++ /dev/null @@ -1,33 +0,0 @@ -package cmd - -import ( - "github.com/spf13/cobra" -) - -var devDebugRunOnEntryPointsCmd = &cobra.Command{ - Use: "debug-run-on-entry-points [source-path]", - Short: "Run analysis on selected entry points", - Args: cobra.RangeArgs(1, 2), - Long: `This command scans the project starting only from the given entry point, useful for narrowing analysis while debugging a rule - -Arguments: - entry-point - '*' for all methods or method FQN like com.example.Class#method (required) - source-path - Path to the project sources (default: current directory) - -Note: this command is ignored on Spring projects - -Use --project-model to scan a pre-compiled project model instead of compiling from sources. -`, - Annotations: map[string]string{"PrintConfig": "true"}, - Run: func(cmd *cobra.Command, args []string) { - out.Warn("entry-point override has no effect on Spring projects") - DebugRunAnalysisOnSelectedEntryPoints = args[0] - scanCmd.Run(scanCmd, args[1:]) - }, -} - -func init() { - devCmd.AddCommand(devDebugRunOnEntryPointsCmd) - addScanFlags(devDebugRunOnEntryPointsCmd) - addRuleIDFlag(devDebugRunOnEntryPointsCmd) -} diff --git a/cli/cmd/dev_rules_path.go b/cli/cmd/dev_rules_path.go deleted file mode 100644 index 5bf962066..000000000 --- a/cli/cmd/dev_rules_path.go +++ /dev/null @@ -1,41 +0,0 @@ -package cmd - -import ( - "fmt" - "os" - - "github.com/seqra/opentaint/internal/globals" - "github.com/seqra/opentaint/internal/utils" - "github.com/spf13/cobra" -) - -var devRulesPathCmd = &cobra.Command{ - Use: "rules-path", - Short: "Print the path to the builtin rules directory (downloads on demand)", - Run: func(cmd *cobra.Command, args []string) { - rulesPath, err := utils.GetRulesPath(globals.Config.Rules.Version) - if err != nil { - fmt.Fprintf(os.Stderr, "Error: %s\n", err) - os.Exit(1) - } - - // Download if not present - if _, err := os.Stat(rulesPath); os.IsNotExist(err) { - if dlErr := utils.DownloadAndUnpackGithubReleaseAsset( - globals.Config.Owner, globals.Config.Repo, - globals.Config.Rules.Version, globals.RulesAssetName, - rulesPath, globals.Config.Github.Token, - globals.Config.SkipVerify, out, - ); dlErr != nil { - fmt.Fprintf(os.Stderr, "Error downloading rules: %s\n", dlErr) - os.Exit(1) - } - } - - fmt.Println(rulesPath) - }, -} - -func init() { - devCmd.AddCommand(devRulesPathCmd) -} diff --git a/cli/cmd/health.go b/cli/cmd/health.go new file mode 100644 index 000000000..1faf0eb39 --- /dev/null +++ b/cli/cmd/health.go @@ -0,0 +1,183 @@ +package cmd + +import ( + "fmt" + "os" + "strconv" + "strings" + + "github.com/seqra/opentaint/internal/globals" + "github.com/seqra/opentaint/internal/utils" + "github.com/seqra/opentaint/internal/utils/java" + "github.com/spf13/cobra" +) + +var ( + healthAutobuilder bool + healthAnalyzer bool + healthRules bool + healthRuntime bool +) + +// healthComponent is one resolved dependency in the health report. +type healthComponent struct { + name string + version string + path string + present bool +} + +// healthCmd represents the health command +var healthCmd = &cobra.Command{ + Use: "health", + Short: "Print the resolved dependency paths", + Long: `Print the on-disk paths OpenTaint resolves for its dependencies: autobuilder, +analyzer, rules, and the Java runtime. + +Pass --autobuilder, --analyzer, --rules or --runtime to show only those; with a +single flag just the bare path is printed. Nothing is downloaded except the +rules, which are fetched on demand.`, + Args: cobra.NoArgs, + Run: func(cmd *cobra.Command, args []string) { + runHealth() + }, +} + +func init() { + rootCmd.AddCommand(healthCmd) + healthCmd.Flags().BoolVar(&healthAutobuilder, "autobuilder", false, "Show only the autobuilder JAR path") + healthCmd.Flags().BoolVar(&healthAnalyzer, "analyzer", false, "Show only the analyzer JAR path") + healthCmd.Flags().BoolVar(&healthRules, "rules", false, "Show only the built-in rules path (downloads on demand)") + healthCmd.Flags().BoolVar(&healthRuntime, "runtime", false, "Show only the Java runtime path") +} + +func runHealth() { + // No flags shows every component, in fixed order. + var requested []string + if healthAutobuilder { + requested = append(requested, "autobuilder") + } + if healthAnalyzer { + requested = append(requested, "analyzer") + } + if healthRules { + requested = append(requested, "rules") + } + if healthRuntime { + requested = append(requested, "runtime") + } + if len(requested) == 0 { + requested = []string{"autobuilder", "analyzer", "rules", "runtime"} + } + + components := make([]healthComponent, 0, len(requested)) + for _, key := range requested { + components = append(components, resolveHealthComponent(key)) + } + + // A single flag prints just the bare path, for scripting. + if len(requested) == 1 { + c := components[0] + fmt.Println(c.path) + if !c.present { + fmt.Fprintf(os.Stderr, "%s missing at %s\n", c.name, c.path) + } + return + } + + sb := out.Section("OpenTaint Health") + for _, c := range components { + value := c.path + if c.version != "" { + value = displayVersion(c.version) + " " + c.path + } + if !c.present { + value += " " + out.Theme().Error.Render("missing") + } + sb.Field(c.name, value) + } + sb.Render() +} + +// resolveHealthComponent resolves a component's path and presence. Only the +// rules are fetched on demand; the rest are reported as-is. +func resolveHealthComponent(key string) healthComponent { + switch key { + case "autobuilder": + path, err := utils.GetAutobuilderJarPath(globals.Config.Autobuilder.Version) + return healthComponent{"Autobuilder", globals.Config.Autobuilder.Version, path, err == nil && pathExistsCmd(path)} + case "analyzer": + path, err := utils.GetAnalyzerJarPath(globals.Config.Analyzer.Version) + return healthComponent{"Analyzer", globals.Config.Analyzer.Version, path, err == nil && pathExistsCmd(path)} + case "rules": + return resolveRulesComponent() + case "runtime": + return resolveRuntimeComponent() + default: + return healthComponent{name: key} + } +} + +// resolveRulesComponent resolves the built-in rules directory, downloading it +// on demand so `health --rules` replaces `dev rules-path`. +func resolveRulesComponent() healthComponent { + c := healthComponent{name: "Rules", version: globals.Config.Rules.Version} + path, err := utils.GetRulesPath(globals.Config.Rules.Version) + if err != nil { + return c + } + c.path = path + if pathExistsCmd(path) { + c.present = true + return c + } + if dlErr := utils.DownloadAndUnpackGithubReleaseAsset( + globals.Config.Owner, globals.Config.Repo, + globals.Config.Rules.Version, globals.RulesAssetName, + path, globals.Config.Github.Token, globals.Config.SkipVerify, out, + ); dlErr != nil { + fmt.Fprintf(os.Stderr, "Error downloading rules: %s\n", dlErr) + return c + } + c.present = pathExistsCmd(path) + return c +} + +// resolveRuntimeComponent reports the Java the analyzer uses: a managed JRE if +// present, otherwise system Java. +func resolveRuntimeComponent() healthComponent { + c := healthComponent{name: "Runtime"} + if jre := utils.FindExistingJRE(utils.ManagedJRETiers()); jre != nil { + c.path = utils.JavaBinaryPath(jre.Path) + c.version = "Java " + strconv.Itoa(globals.DefaultJavaVersion) + " · managed" + c.present = true + return c + } + if sys := java.DetectSystemJava(); sys != nil { + c.path = sys.Path + c.version = "Java " + sys.FullVersion + " · " + sys.Vendor + c.present = true + return c + } + c.version = "Java " + strconv.Itoa(globals.DefaultJavaVersion) + if jre := utils.GetInstallJREPath(); jre != "" { + c.path = utils.JavaBinaryPath(jre) + } + return c +} + +// displayVersion strips the artifact-kind prefix (e.g. "rules/v0.1.1" → "v0.1.1"). +func displayVersion(v string) string { + if idx := strings.LastIndex(v, "/"); idx >= 0 { + return v[idx+1:] + } + return v +} + +func pathExistsCmd(p string) bool { + if p == "" { + return false + } + _, err := os.Stat(p) + return err == nil +} diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index c3643e643..77bd6eb82 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -8,6 +8,7 @@ import ( "github.com/seqra/opentaint/internal/analyzer" "github.com/seqra/opentaint/internal/load_trace" + "github.com/seqra/opentaint/internal/rules" "github.com/seqra/opentaint/internal/sarif" "github.com/seqra/opentaint/internal/validation" "github.com/seqra/opentaint/internal/version" @@ -39,6 +40,7 @@ var ( TrackExternalMethods bool DebugFactReachabilitySarif bool DebugRunAnalysisOnSelectedEntryPoints string + expandRuleRefs bool ) type RulesetType struct { @@ -117,7 +119,7 @@ func init() { } // addRuleIDFlag registers the --rule-id flag. Split out from addScanFlags so -// that `dev debug-fact-reachability` can omit it (it takes the rule ID +// that `test rule reachability` can omit it (it takes the rule ID // positionally and supports only one rule at a time). func addRuleIDFlag(cmd *cobra.Command) { cmd.Flags().StringArrayVar(&RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") @@ -366,6 +368,13 @@ func scan(cmd *cobra.Command) { if maxMemory != "" { nativeBuilder.SetMaxMemory(maxMemory) } + if expandRuleRefs && len(RuleID) > 0 { + var roots []string + for _, r := range absRuleSetPaths { + roots = append(roots, r.Path) + } + RuleID = rules.ExpandRuleIDs(RuleID, roots) + } for _, ruleID := range RuleID { nativeBuilder.AddRuleID(ruleID) } diff --git a/cli/cmd/test.go b/cli/cmd/test.go new file mode 100644 index 000000000..9e47bf593 --- /dev/null +++ b/cli/cmd/test.go @@ -0,0 +1,30 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +// testCmd groups the rule/approximation authoring, testing, and debugging tools (experimental). +var testCmd = &cobra.Command{ + Use: "test", + Short: "Author, test, and debug rules and approximations (experimental)", + Long: `Utilities for the rule and approximation test-driven loop: scaffold a test project, run tests against annotated samples, and trace fact reachability when a rule misbehaves (experimental)`, +} + +// testRuleCmd groups the rule-authoring subcommands (init/run/reachability). +var testRuleCmd = &cobra.Command{ + Use: "rule", + Short: "Scaffold, test, and debug detection rules", +} + +// testApproximationCmd groups the approximation-authoring subcommands (init/run). +var testApproximationCmd = &cobra.Command{ + Use: "approximation", + Short: "Scaffold and test dataflow approximations", +} + +func init() { + rootCmd.AddCommand(testCmd) + testCmd.AddCommand(testRuleCmd) + testCmd.AddCommand(testApproximationCmd) +} diff --git a/cli/cmd/dev_test_approximations.go b/cli/cmd/test_approximation_run.go similarity index 69% rename from cli/cmd/dev_test_approximations.go rename to cli/cmd/test_approximation_run.go index 8b54b15c2..7c266cccc 100644 --- a/cli/cmd/dev_test_approximations.go +++ b/cli/cmd/test_approximation_run.go @@ -15,8 +15,8 @@ var ( testApproxDataflow []string ) -var devTestApproximationsCmd = &cobra.Command{ - Use: "test-approximations ", +var testApproximationRunCmd = &cobra.Command{ + Use: "run ", Short: "Run rule tests against annotated test samples with approximations applied", Long: `Run rule tests against annotated test samples with the given approximations applied. @@ -53,10 +53,10 @@ Exit codes: } func init() { - devCmd.AddCommand(devTestApproximationsCmd) + testApproximationCmd.AddCommand(testApproximationRunCmd) - devTestApproximationsCmd.Flags().StringVarP(&testApproxOutputDir, "output", "o", "", "Output directory for test results (test-result.json)") - devTestApproximationsCmd.Flags().DurationVar(&testApproxTimeout, "timeout", 600*time.Second, "Timeout for analysis") - devTestApproximationsCmd.Flags().StringVar(&testApproxMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") - devTestApproximationsCmd.Flags().StringArrayVar(&testApproxDataflow, "dataflow-approximations", nil, "Directory of compiled approximation class files or .java sources (repeatable)") + testApproximationRunCmd.Flags().StringVarP(&testApproxOutputDir, "output", "o", "", "Output directory for test results (test-result.json)") + testApproximationRunCmd.Flags().DurationVar(&testApproxTimeout, "timeout", 600*time.Second, "Timeout for analysis") + testApproximationRunCmd.Flags().StringVar(&testApproxMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") + testApproximationRunCmd.Flags().StringArrayVar(&testApproxDataflow, "dataflow-approximations", nil, "Directory of compiled approximation class files or .java sources (repeatable)") } diff --git a/cli/cmd/dev_init_test_project.go b/cli/cmd/test_init.go similarity index 90% rename from cli/cmd/dev_init_test_project.go rename to cli/cmd/test_init.go index 0d5ac3ce0..8db775ac5 100644 --- a/cli/cmd/dev_init_test_project.go +++ b/cli/cmd/test_init.go @@ -16,8 +16,8 @@ import ( var initRuleProjectDeps []string var initApproxProjectDeps []string -var devInitRuleProjectCmd = &cobra.Command{ - Use: "init-rule-project ", +var testRuleInitCmd = &cobra.Command{ + Use: "init ", Short: "Bootstrap a rule test project with build.gradle.kts and test utility JAR", Long: `Creates a minimal Gradle project structure for testing OpenTaint rules. @@ -35,8 +35,8 @@ Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, }, } -var devInitApproximationProjectCmd = &cobra.Command{ - Use: "init-approximation-project ", +var testApproximationInitCmd = &cobra.Command{ + Use: "init ", Short: "Bootstrap a dataflow approximation test project with the fixed Taint source/sink and rule", Long: `Creates a minimal Gradle project structure for testing OpenTaint dataflow approximations. @@ -46,7 +46,9 @@ The project includes: - libs/opentaint-sast-test-util.jar (provides @PositiveRuleSample and @NegativeRuleSample annotations) - approximation-rule.yaml, the fixed source->sink rule the samples are checked against - src/main/java/test/ with Taint.java (the fixed source() and sink()) for test sample sources - - approximations/src/ directory for the approximation under test + +The approximation under test is NOT part of this project: it lives in its own unit folder +(.opentaint/approximations/) and is applied at test time via --dataflow-approximations. Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, Args: cobra.ExactArgs(1), @@ -60,17 +62,17 @@ Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, } func init() { - devCmd.AddCommand(devInitRuleProjectCmd) - devInitRuleProjectCmd.Flags().StringArrayVar(&initRuleProjectDeps, "dependency", nil, + testRuleCmd.AddCommand(testRuleInitCmd) + testRuleInitCmd.Flags().StringArrayVar(&initRuleProjectDeps, "dependency", nil, "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") - devCmd.AddCommand(devInitApproximationProjectCmd) - devInitApproximationProjectCmd.Flags().StringArrayVar(&initApproxProjectDeps, "dependency", nil, + testApproximationCmd.AddCommand(testApproximationInitCmd) + testApproximationInitCmd.Flags().StringArrayVar(&initApproxProjectDeps, "dependency", nil, "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") } // bootstrapTestProject creates the shared Gradle layout (dirs, test-util JAR, build files) -// used by both init-rule-project and init-approximation-project. +// used by both `test rule init` and `test approximation init`. func bootstrapTestProject(outputDir, projectName string, dependencies []string) { dirs := []string{ filepath.Join(outputDir, "libs"), diff --git a/cli/cmd/test_rule_reachability.go b/cli/cmd/test_rule_reachability.go new file mode 100644 index 000000000..17eb90038 --- /dev/null +++ b/cli/cmd/test_rule_reachability.go @@ -0,0 +1,48 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +var reachabilityEntryPoint string + +var testRuleReachabilityCmd = &cobra.Command{ + Use: "reachability [source-path]", + Short: "Trace fact reachability for a single rule (why it does or does not fire)", + Long: `This command scans the project for one rule and writes a sibling SARIF report with fact-reachability info to debug why the rule does or does not fire + +Arguments: + rule-id - Full rule ID, e.g. security/SqlInjection.yaml:tainted-sql-from-http (required) + source-path - Path to the project sources (default: current directory) + +The rule's library source/sink dependencies (its join refs) are collected and analyzed automatically. + +The fact-reachability report is written next to the main SARIF as debug-ifds-fact-reachability.sarif. + +Use --entry-points to seed the analysis at a specific method while tracing reachability: + Non-Spring: RESTRICTS the entry-point set to this method only. + Spring: ADDS this method to Spring's auto-discovered entry-point set (the set can't be narrowed on Spring). +The value is '*' for all methods or a method FQN like com.example.Class#method. + +Use --project-model to scan a pre-compiled project model instead of compiling from sources. +`, + Annotations: map[string]string{"PrintConfig": "true"}, + Args: cobra.RangeArgs(1, 2), + Run: func(cmd *cobra.Command, args []string) { + RuleID = []string{args[0]} + DebugFactReachabilitySarif = true + expandRuleRefs = true + if reachabilityEntryPoint != "" { + out.Warn("on Spring projects this method is added to the auto-discovered entry points, not used to restrict them") + DebugRunAnalysisOnSelectedEntryPoints = reachabilityEntryPoint + } + scanCmd.Run(scanCmd, args[1:]) + }, +} + +func init() { + testRuleCmd.AddCommand(testRuleReachabilityCmd) + addScanFlags(testRuleReachabilityCmd) + testRuleReachabilityCmd.Flags().StringVar(&reachabilityEntryPoint, "entry-points", "", + "Seed analysis at this method ('*' or FQN like com.example.Class#method); restricts on non-Spring, adds on Spring") +} diff --git a/cli/cmd/dev_test_rules.go b/cli/cmd/test_rule_run.go similarity index 83% rename from cli/cmd/dev_test_rules.go rename to cli/cmd/test_rule_run.go index 22f10dcba..6f477eff2 100644 --- a/cli/cmd/dev_test_rules.go +++ b/cli/cmd/test_rule_run.go @@ -23,8 +23,8 @@ var ( testRulesPassthrough []string ) -var devTestRulesCmd = &cobra.Command{ - Use: "test-rules ", +var testRuleRunCmd = &cobra.Command{ + Use: "run ", Short: "Run rule tests against annotated test samples", Long: `Run rule tests against annotated test samples in the given project model. @@ -51,7 +51,7 @@ Exit codes: }, } -// testProjectOptions holds the inputs shared by `dev test-rules` and `dev test-approximations`. +// testProjectOptions holds the inputs shared by `test rule run` and `test approximation run`. type testProjectOptions struct { label string tempDir string @@ -186,13 +186,13 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { } func init() { - devCmd.AddCommand(devTestRulesCmd) - - devTestRulesCmd.Flags().StringArrayVar(&testRulesRuleset, "ruleset", nil, "Additional ruleset path (repeatable)") - devTestRulesCmd.Flags().StringVarP(&testRulesOutputDir, "output", "o", "", "Output directory for test results (test-result.json)") - devTestRulesCmd.Flags().DurationVar(&testRulesTimeout, "timeout", 600*time.Second, "Timeout for analysis") - devTestRulesCmd.Flags().StringVar(&testRulesMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") - devTestRulesCmd.Flags().StringArrayVar(&testRulesRuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") - devTestRulesCmd.Flags().StringArrayVar(&testRulesDataflow, "dataflow-approximations", nil, "Directory of compiled approximation class files or .java sources (repeatable)") - devTestRulesCmd.Flags().StringArrayVar(&testRulesPassthrough, "passthrough-approximations", nil, "passThrough approximation YAML file or directory of them (repeatable)") + testRuleCmd.AddCommand(testRuleRunCmd) + + testRuleRunCmd.Flags().StringArrayVar(&testRulesRuleset, "ruleset", nil, "Additional ruleset path (repeatable)") + testRuleRunCmd.Flags().StringVarP(&testRulesOutputDir, "output", "o", "", "Output directory for test results (test-result.json)") + testRuleRunCmd.Flags().DurationVar(&testRulesTimeout, "timeout", 600*time.Second, "Timeout for analysis") + testRuleRunCmd.Flags().StringVar(&testRulesMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") + testRuleRunCmd.Flags().StringArrayVar(&testRulesRuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") + testRuleRunCmd.Flags().StringArrayVar(&testRulesDataflow, "dataflow-approximations", nil, "Directory of compiled approximation class files or .java sources (repeatable)") + testRuleRunCmd.Flags().StringArrayVar(&testRulesPassthrough, "passthrough-approximations", nil, "passThrough approximation YAML file or directory of them (repeatable)") } diff --git a/cli/internal/rules/refs.go b/cli/internal/rules/refs.go new file mode 100644 index 000000000..a676d10aa --- /dev/null +++ b/cli/internal/rules/refs.go @@ -0,0 +1,111 @@ +package rules + +import ( + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v2" +) + +// ruleFile is the minimal shape parsed from a ruleset YAML: each rule's id and +// the rules it pulls in via join.refs. +type ruleFile struct { + Rules []struct { + ID string `yaml:"id"` + Join struct { + Refs []struct { + Rule string `yaml:"rule"` + } `yaml:"refs"` + } `yaml:"join"` + } `yaml:"rules"` +} + +// ExpandRuleIDs returns ruleIDs together with every rule transitively +// referenced through join.refs, resolved against the given ruleset roots. A +// full rule id is ":"; a ref is the same path +// with '#' instead of ':'. Originals come first, the rest in BFS order; +// duplicates are removed and ids that can't be resolved on disk pass through +// unchanged. +func ExpandRuleIDs(ruleIDs []string, rulesetRoots []string) []string { + seen := make(map[string]bool, len(ruleIDs)) + var result []string + queue := append([]string(nil), ruleIDs...) + + for len(queue) > 0 { + id := queue[0] + queue = queue[1:] + if seen[id] { + continue + } + seen[id] = true + result = append(result, id) + + for _, ref := range refsOf(id, rulesetRoots) { + if !seen[ref] { + queue = append(queue, ref) + } + } + } + return result +} + +// refsOf returns the full ids referenced by the rule named id via join.refs, +// or nil when the rule's file or entry can't be found. +func refsOf(id string, rulesetRoots []string) []string { + relPath, shortID, ok := splitRuleID(id) + if !ok { + return nil + } + rf, ok := loadRuleFile(relPath, rulesetRoots) + if !ok { + return nil + } + for _, r := range rf.Rules { + if r.ID != shortID { + continue + } + var refs []string + for _, ref := range r.Join.Refs { + if full := refToRuleID(ref.Rule); full != "" { + refs = append(refs, full) + } + } + return refs + } + return nil +} + +// splitRuleID splits "java/security/x.yaml:short" into "java/security/x.yaml" and "short". +func splitRuleID(id string) (relPath, shortID string, ok bool) { + idx := strings.LastIndex(id, ":") + if idx < 0 { + return "", "", false + } + return id[:idx], id[idx+1:], true +} + +// refToRuleID converts a join ref ("path.yaml#short") to a full id ("path.yaml:short"). +func refToRuleID(ref string) string { + idx := strings.LastIndex(ref, "#") + if idx < 0 { + return ref + } + return ref[:idx] + ":" + ref[idx+1:] +} + +// loadRuleFile finds relPath under one of the roots and parses it. +func loadRuleFile(relPath string, rulesetRoots []string) (ruleFile, bool) { + for _, root := range rulesetRoots { + data, err := os.ReadFile(filepath.Join(root, filepath.FromSlash(relPath))) + if err != nil { + continue + } + var rf ruleFile + if err := yaml.Unmarshal(data, &rf); err != nil { + continue + } + return rf, true + } + return ruleFile{}, false +} diff --git a/cli/internal/rules/refs_test.go b/cli/internal/rules/refs_test.go new file mode 100644 index 000000000..b1c65ab6c --- /dev/null +++ b/cli/internal/rules/refs_test.go @@ -0,0 +1,92 @@ +package rules + +import ( + "os" + "path/filepath" + "testing" +) + +// writeRule writes a ruleset YAML at root/relPath, creating parent dirs. +func writeRule(t *testing.T, root, relPath, content string) { + t.Helper() + full := filepath.Join(root, filepath.FromSlash(relPath)) + if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(full, []byte(content), 0o644); err != nil { + t.Fatal(err) + } +} + +func TestExpandRuleIDs_CollectsJoinRefs(t *testing.T) { + root := t.TempDir() + writeRule(t, root, "java/security/xss.yaml", ` +rules: + - id: xss + mode: join + join: + refs: + - rule: java/lib/generic/src.yaml#src + as: untrusted + - rule: java/lib/generic/sink.yaml#sink + as: sink +`) + writeRule(t, root, "java/lib/generic/src.yaml", "rules:\n - id: src\n options: {lib: true}\n") + writeRule(t, root, "java/lib/generic/sink.yaml", "rules:\n - id: sink\n options: {lib: true}\n") + + got := ExpandRuleIDs([]string{"java/security/xss.yaml:xss"}, []string{root}) + want := []string{ + "java/security/xss.yaml:xss", + "java/lib/generic/src.yaml:src", + "java/lib/generic/sink.yaml:sink", + } + assertEqual(t, got, want) +} + +func TestExpandRuleIDs_Transitive(t *testing.T) { + root := t.TempDir() + writeRule(t, root, "a.yaml", "rules:\n - id: a\n join:\n refs:\n - rule: b.yaml#b\n") + writeRule(t, root, "b.yaml", "rules:\n - id: b\n join:\n refs:\n - rule: c.yaml#c\n") + writeRule(t, root, "c.yaml", "rules:\n - id: c\n") + + got := ExpandRuleIDs([]string{"a.yaml:a"}, []string{root}) + assertEqual(t, got, []string{"a.yaml:a", "b.yaml:b", "c.yaml:c"}) +} + +func TestExpandRuleIDs_CycleAndDuplicates(t *testing.T) { + root := t.TempDir() + writeRule(t, root, "a.yaml", "rules:\n - id: a\n join:\n refs:\n - rule: b.yaml#b\n") + writeRule(t, root, "b.yaml", "rules:\n - id: b\n join:\n refs:\n - rule: a.yaml#a\n") + + got := ExpandRuleIDs([]string{"a.yaml:a", "a.yaml:a"}, []string{root}) + assertEqual(t, got, []string{"a.yaml:a", "b.yaml:b"}) +} + +func TestExpandRuleIDs_UnresolvedPassesThrough(t *testing.T) { + root := t.TempDir() + got := ExpandRuleIDs([]string{"does/not/exist.yaml:x"}, []string{root}) + assertEqual(t, got, []string{"does/not/exist.yaml:x"}) +} + +func TestExpandRuleIDs_MultipleRoots(t *testing.T) { + builtin := t.TempDir() + custom := t.TempDir() + // Custom security rule refs a builtin lib rule — the agent's main case. + writeRule(t, custom, "java/security/my.yaml", "rules:\n - id: my\n join:\n refs:\n - rule: java/lib/generic/src.yaml#src\n") + writeRule(t, builtin, "java/lib/generic/src.yaml", "rules:\n - id: src\n") + + got := ExpandRuleIDs([]string{"java/security/my.yaml:my"}, []string{builtin, custom}) + assertEqual(t, got, []string{"java/security/my.yaml:my", "java/lib/generic/src.yaml:src"}) +} + +func assertEqual(t *testing.T, got, want []string) { + t.Helper() + if len(got) != len(want) { + t.Fatalf("got %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("got %v, want %v", got, want) + } + } +} diff --git a/cli/internal/testapprox/testapprox.go b/cli/internal/testapprox/testapprox.go index 4dc8f8189..d208ca1ee 100644 --- a/cli/internal/testapprox/testapprox.go +++ b/cli/internal/testapprox/testapprox.go @@ -13,10 +13,6 @@ import ( // samples reference in @PositiveRuleSample/@NegativeRuleSample. const FixedRuleFileName = "approximation-rule.yaml" -// ApproximationsSrcDir is the source root, relative to the project, where the agent writes the -// approximation under test and which is passed to test-approximations as --dataflow-approximations. -const ApproximationsSrcDir = "approximations/src" - //go:embed example/approximation-rule.yaml var fixedRule []byte @@ -34,8 +30,8 @@ func WriteFixedRule(dir string) (string, error) { } // Scaffold writes the fixed rule (for reference — test-approximations applies its own bundled copy) -// and the Taint source/sink helper, and creates the approximations source dir for the agent to write -// the approximation under test into. Samples and the approximation itself are the agent's to write. +// and the Taint source/sink helper. Samples are the agent's to write; the approximation under test +// lives in its own unit folder (.opentaint/approximations/), never inside this test project. func Scaffold(projectDir string) error { files := map[string][]byte{ filepath.Join(projectDir, FixedRuleFileName): fixedRule, @@ -49,9 +45,5 @@ func Scaffold(projectDir string) error { return fmt.Errorf("write %s: %w", filepath.Base(path), err) } } - approxDir := filepath.Join(projectDir, filepath.FromSlash(ApproximationsSrcDir), "approx") - if err := os.MkdirAll(approxDir, 0o755); err != nil { - return fmt.Errorf("create %s: %w", approxDir, err) - } return nil } diff --git a/skills/analyze-external-methods/SKILL.md b/skills/analyze-external-methods/SKILL.md index aa1db364c..e223729f3 100644 --- a/skills/analyze-external-methods/SKILL.md +++ b/skills/analyze-external-methods/SKILL.md @@ -30,7 +30,7 @@ Every method in `` is a taint-killing path — model all of them. - passthrough — taint moves by a simple from→to copy: a getter, arg→result, builder, container field, collection `add`/`get`, `StringBuilder.append`, `Stream.collect` - dataflow — taint flows through a lambda/callback/functional interface or an async chain -Group by package AND kind — one tracking file per (package, kind): `-passthrough.yaml` for the simple copies, `-dataflow.yaml` for the lambda/callback/async ones. The two kinds are built by different skills with different stages, so they're separate units; kind is the only split (no finer sub-groups). Each unit is one agent's work +Group by package AND kind — one tracking file per (package, kind): `-passthrough.yaml` for the simple copies, `-dataflow.yaml` for the lambda/callback/async ones. `` is the dotted Java package with `.` replaced by `-` (e.g. `reactor.core.publisher` → `reactor-core-publisher`) so it's filesystem-friendly; the YAML `package:` field keeps the real dotted name. Kind is the only split (no finer sub-groups). Each unit is one agent's work ### 2. Flag methods to skip @@ -47,7 +47,7 @@ The one exception: a few methods the engine asks about don't carry taint — log Create one file per (package, kind); fill only the discovery-stage fields. The two kinds differ — passThrough is written and verified by the scan, dataflow is built and tested on a test project: ```yaml -# -passthrough.yaml — simple copies, no test project +# -passthrough.yaml — simple copies, no test project package: com.foo stages: description: done @@ -60,7 +60,7 @@ methods: ``` ```yaml -# -dataflow.yaml — lambda/callback/async, tested on a test project +# -dataflow.yaml — lambda/callback/async, tested on a test project package: com.foo dependencies: # exact GAV the test project needs, from the build files - com.foo:foo-core:1.2.3 @@ -85,5 +85,5 @@ methods: ## Gotchas - Model every method in `` — each is a real taint-killing path; don't second-guess the list. The only exceptions are the obvious non-taint methods you move to `skipped.yaml` -- Approximate only external library methods — never an application-internal class. An internal method that drops taint is a rule or engine matter, not an approximation target; if one shows up as a candidate, drop it -- One file = one (package, kind) = one agent: passThrough and dataflow go in separate files — different skills, different stages; never put a method in two, or two agents collide +- Approximate only external library methods — never an application-internal class. If one shows up as a candidate, drop it +- One file = one (package, kind) = one agent: passThrough and dataflow go in separate files; never put a method in two, or two agents collide diff --git a/skills/analyze-findings/SKILL.md b/skills/analyze-findings/SKILL.md index 30cce6589..1b91787ad 100644 --- a/skills/analyze-findings/SKILL.md +++ b/skills/analyze-findings/SKILL.md @@ -39,7 +39,7 @@ Verdict each logical finding from its flow: - TP — the source is attacker-controlled, the sink is genuinely dangerous with that input, and nothing sanitizes it in between - FP — a sanitizer/validator neutralizes it, the source isn't actually attacker-controlled (config, constant, server-set), the sink is safe for this input (parameterized, escaped), or the path is infeasible. Record which one, so the suppress-FP stage knows what to narrow -Set `verdict` and append the reasoning to `notes`, below the analyzer report already seeded there. Leave `poc` for generate-poc +Set `verdict` and append the reasoning to `notes`, below the analyzer report already seeded there ## Output diff --git a/skills/appsec_agent/SKILL.md b/skills/appsec-agent/SKILL.md similarity index 74% rename from skills/appsec_agent/SKILL.md rename to skills/appsec-agent/SKILL.md index a17a993c8..58c74ffa3 100644 --- a/skills/appsec_agent/SKILL.md +++ b/skills/appsec-agent/SKILL.md @@ -1,5 +1,5 @@ --- -name: appsec_agent +name: appsec-agent description: Run an end-to-end application-security analysis on a JVM project with OpenTaint — build, scan, model missing library methods, triage, and confirm vulnerabilities. Use when the user asks to find vulnerabilities, run SAST, or scan a Java/Kotlin app for security issues license: Apache-2.0 metadata: @@ -17,7 +17,13 @@ Keep every artifact under one `.opentaint/` directory at the project root — mo ## Setup -Run `opentaint dev rules-path` once to learn the built-in rules directory; built-ins always load, custom rules go under `.opentaint/rules`. +Before anything else, confirm `opentaint` is on PATH (`command -v opentaint` / `opentaint --version`). If it's missing, don't proceed silently — tell the user and ask to install it, offering the command for their platform; run an install only on explicit confirmation: + +- macOS, or any platform with Homebrew — `brew install --cask seqra/tap/opentaint` +- Linux without Homebrew — `curl -fsSL https://opentaint.org/install.sh | bash` +- Windows (PowerShell) — `irm https://opentaint.org/install.ps1 | iex` + +After installing, run `opentaint health` to confirm the autobuilder/analyzer/rules/runtime resolve. ## Choose a workflow @@ -82,8 +88,9 @@ Universal rules — every dispatch, every workflow: Orchestration practices: -- one unit, one subagent — rules, approximation units, and finding files are independent (unique `` paths), so dispatch them as a parallel fan-out, no races +- Units fan out in parallel — independent `` paths, no races - the sole sequential exception is PoC (shared app state and ports); see references/poc.md +- Steps within a unit are sequential via the artifact on disk — dispatch step N only after step N−1's named artifact exists; never bundle steps into one dispatch - write `state.yaml` at each fan-out join — a phase flips to `done` only once every unit's artifact exists on disk ## State and resumption @@ -93,7 +100,7 @@ You are the only writer of `.opentaint/tracking/state.yaml` — it records the c On start, and after any compaction, reconstruct position from artifacts before doing anything — never replay a completed phase: - read `state.yaml` and the `tracking/` tree -- skip any phase whose artifact exists: `project.yaml` → build; `coverage.yaml` with every area `done` → discover; `report.sarif` → scan; a rule's `artifact` + `tests_passing: done` → that rule; an approximation unit's `artifact` (plus `tests_passing` for dataflow) → that unit; a finding with `verdict` set → triaged; with `poc` set → PoC'd +- skip any phase whose artifact exists: `project.yaml` → build; `coverage.yaml` with every entry `done` and `lib-pieces.yaml` with every entry resolved → discover; `report.sarif` → scan; a rule's `artifact` + `tests_passing: done` → that rule; an approximation unit's `artifact` (plus `tests_passing` for dataflow) → that unit; a finding with `verdict` set → triaged; with `poc` set → PoC'd - detect new work from artifacts, not memory: finding files with `verdict: pending` (a fresh or reset scan) → triage; methods in `dropped-external-methods.yaml` not yet in any approximation unit → approximations ## Tracking layout @@ -103,12 +110,14 @@ The single source of truth for the tracking schema; each skill writes only its o ``` .opentaint/tracking/ state.yaml # you only — levels + phase status - coverage.yaml # discover-attack-surface — one entry per attack area walked (deep) + coverage.yaml # triage-dependencies seeds, discover-attack-surface fills — one entry per dependency package weighed (deep) + lib-pieces.yaml # discover-attack-surface parks unpaired sources/sinks; assemble-lib-rules resolves them (deep) findings/.yaml # one per logical finding (from the SARIF→finding script; split by triage) - rules/.yaml # one per rule - approximations/-passthrough.yaml # simple from→to copies; write-only, scan-verified - approximations/-dataflow.yaml # lambda/callback/async; tested on a test project + rules/.yaml # one per rule (join requirement — from discover-attack-surface or assemble-lib-rules) + approximations/-passthrough.yaml # simple from→to copies; write-only, scan-verified + approximations/-dataflow.yaml # lambda/callback/async; tested on a test project approximations/skipped.yaml # methods the engine asks for but that carry no taint + poc-servers.yaml # generate-poc — instances it started; you reap them at end of PoC phase ``` state.yaml: @@ -129,17 +138,26 @@ phases: # pending | in_progress | done poc: pending # dynamic triage ``` -coverage.yaml — created by discover-attack-surface (deep): the attack-area checklist it walks, one entry per area, so you can see nothing was skipped and which areas spawned rules: +coverage.yaml — seeded by triage-dependencies and filled by discover-attack-surface (deep): one entry per dependency package weighed, so you can see which libraries were drilled and which were dismissed. A `pending` entry is a flagged library awaiting its depth pass; the rule names live in `rules/.yaml`, not here: ```yaml -areas: - - area: database # one per attack area - status: done # pending | done - rules: [mybatis-sqli] # proposed rule names; [] when built-ins cover it or the area is absent +packages: + - package: org.springframework.web.reactive.function + status: done # pending (flagged, awaiting depth) | done (drilled or dismissed) notes: > free-form — what was found and why ``` +lib-pieces.yaml — discover-attack-surface appends a source or sink it couldn't pair; assemble-lib-rules pairs each into a join and resolves its `disposition` (deep): + +```yaml +sources: # likewise a `sinks:` list + - role: Apache HttpClient response body — server-controlled data + package: org.apache.hc.client5.http + dependency: org.apache.httpcomponents.client5:httpclient5:5.3 + disposition: pending # pending | | dropped: +``` + findings/.yaml — created by the SARIF→finding script; `verdict`/`notes` by analyze-findings; `poc`/`poc_script` by generate-poc: ```yaml @@ -153,7 +171,7 @@ poc: pending # pending | confirmed | failed poc_script: null # path under .opentaint/pocs/ once generate-poc writes one ``` -rules/.yaml — created by discover-attack-surface (`description`); `test_project` by create-test-project; `tests_passing` + `rule_id` + `artifact` by create-rule: +rules/.yaml — created by discover-attack-surface or assemble-lib-rules (`description`); `test_project` by create-test-project; `tests_passing` + `rule_id` + `artifact` by create-rule: ```yaml name: mybatis-sqli @@ -171,7 +189,7 @@ notes: > free-form ``` -approximations/-.yaml — created by analyze-external-methods (`description` + `methods`); the stages differ by kind: +approximations/-.yaml — created by analyze-external-methods (`description` + `methods`); `` = the dotted package with `.` -> `-` (the YAML `package:` field keeps the real dotted name). The stages differ by kind: ```yaml package: com.foo @@ -201,8 +219,8 @@ methods: # engine asks to approximate these, but they carry no ta /.opentaint/ project/ # built project model (project.yaml) rules/java/{lib/generic,lib/spring,security}/ # custom rules - config/.yaml # passThrough approximation configs - approximations/src// # code-based (dataflow) approximation sources + pass-through/.yaml # passThrough approximation configs + approximations// # code-based (dataflow) approximation sources, per unit test-projects// # per-unit test project sources test-compiled// # per-unit compiled test model test-results// # per-unit test outputs diff --git a/skills/appsec_agent/references/approximations.md b/skills/appsec-agent/references/approximations.md similarity index 64% rename from skills/appsec_agent/references/approximations.md rename to skills/appsec-agent/references/approximations.md index b7b5b53b7..db0efcca0 100644 --- a/skills/appsec_agent/references/approximations.md +++ b/skills/appsec-agent/references/approximations.md @@ -1,14 +1,12 @@ # Approximation iteration -The step that models the library methods killing taint, run on normal and deep after the first scan, looping to stabilization. The rescans are part of this block — load references/scan.md for each. Dispatch per the Delegate template in SKILL.md. - Loop until stabilization: 1. analyze-external-methods — Inputs: dropped-file `.opentaint/results/dropped-external-methods.yaml`, tracking-dir `.opentaint/tracking`, ``. Writes one `approximations/-passthrough.yaml` and/or `-dataflow.yaml` per package, plus `skipped.yaml`, only for methods not already in a unit. Returns one line per unit 2. Fan out per unit: - - passthrough → create-pass-through-approximation — Inputs: `` from the unit, ``, config-file `.opentaint/config/.yaml`. Write-only; sets `written` + `artifact`. No test project - - dataflow → create-test-project (dataflow shape) then create-dataflow-approximation — test-compiled `.opentaint/test-compiled/`, approx-src `.opentaint/approximations/src/`. Sets `test_project`, then `tests_passing` + `artifact` (test-approximations auto-applies its own fixed rule — nothing to pass) -3. Re-scan (references/scan.md) with both approximation dirs pointing at the parents (`.opentaint/config`, `.opentaint/approximations/src`) + - passthrough → create-pass-through-approximation — Inputs: `` from the unit, ``, config-file `.opentaint/pass-through/.yaml`. Write-only; sets `written` + `artifact`. No test project + - dataflow → two sequential dispatches per unit: first create-test-project (dataflow shape) produces `.opentaint/test-compiled/` and sets `test_project: done`; on its return, dispatch create-dataflow-approximation against that model (approx-src `.opentaint/approximations/`) — sets `tests_passing` + `artifact` (`test approximation run` auto-applies its own fixed rule — nothing to pass) +3. Re-scan (references/scan.md) with both approximation dirs pointing at the parents (`.opentaint/pass-through`, `.opentaint/approximations`) 4. Pass-through verify (no separate skill): the scan agent reports any method you modeled that is still in `dropped-external-methods.yaml`, or any config load error. Re-invoke that package's create-pass-through-approximation agent to fix (matcher / from→to / YAML), then rescan. A dataflow method that still drops despite passing its isolated test is an escalation case (references/escalation.md), not a re-write 5. Stabilization: stop when no method on a source→sink path remains unmodeled and a rescan surfaces no new such methods (equivalently, byte-equal SARIF across rescans). Otherwise feed the newly dropped methods back into step 1 diff --git a/skills/appsec_agent/references/build.md b/skills/appsec-agent/references/build.md similarity index 74% rename from skills/appsec_agent/references/build.md rename to skills/appsec-agent/references/build.md index 161a701ff..0159222ab 100644 --- a/skills/appsec_agent/references/build.md +++ b/skills/appsec-agent/references/build.md @@ -1,5 +1,3 @@ # Build -The build step, run in every workflow. Dispatch per the Delegate template in SKILL.md; write only the slice named here. - Delegate build-project. Inputs: ``, model-out `.opentaint/project`, any build constraints (Java version, submodules, `--package` filters). Verify `.opentaint/project/project.yaml` exists, is non-empty, and — for a multi-module project — covers the expected module count, not just that the file is present. Set `phases.build: done`. diff --git a/skills/appsec-agent/references/discover-rules.md b/skills/appsec-agent/references/discover-rules.md new file mode 100644 index 000000000..5d226f0fc --- /dev/null +++ b/skills/appsec-agent/references/discover-rules.md @@ -0,0 +1,24 @@ +# Discover + new rules + +## Triage dependencies + +Delegate triage-dependencies. Inputs: ``, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`. It reads `project.yaml`'s dependency list and writes `tracking/coverage.yaml` (`package` / `status` / `notes`) — one `status: pending` entry per library that could introduce a source or sink, dismissals summarised — returning one line per flagged library. Don't ask for the full list back. + +## Discover attack surface + +Fan out discover-attack-surface in parallel, one agent per `pending` package in `coverage.yaml`. Inputs each: ``, ``, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`, lib-pieces `.opentaint/tracking/lib-pieces.yaml`. Each works source-first — finds the package's attacker-controlled sources the app uses, pairs each to a dangerous sink as one `tracking/rules/.yaml` join requirement (`description` stage + short requirements + every GAV the flow crosses), and parks any source or sink it couldn't pair in `lib-pieces.yaml`, then flips its `coverage.yaml` entry to `done`. Returns one line per proposed rule plus a loose-piece count. + +## Assemble lib rules + +Once the discover fan-out is done and `lib-pieces.yaml` has `pending` entries, delegate assemble-lib-rules. Inputs: lib-pieces `.opentaint/tracking/lib-pieces.yaml`, ``, tracking-dir `.opentaint/tracking`. With every package's loose pieces in one view it pairs each into a join `tracking/rules/.yaml` (source whose sink lives in another package, sink reached by a built-in source), and resolves every piece's `disposition` to a join name or `dropped: `. One agent — it needs the global view to dedup; fan out by vuln class only if the piece set is large. + +Then a quick area cross-check: across network, persistence, environment, serialization, rendering, naming, execution, messaging — is every boundary a dependency exposes either covered by built-ins or now carrying a rule? If a boundary has a relevant dependency but produced no rule and no clear reason, dispatch a depth pass for it. Set `phases.discover: done` once every `coverage.yaml` entry is `done` and every `lib-pieces.yaml` entry is resolved. + +## Rules + +Fan out across rule units in parallel. Each unit is a two-step pipeline — dispatch the steps one at a time, waiting for the prior step's artifact before the next: + +1. create-test-project — Inputs: spec = the rule's `requirements`, ``, `` `.opentaint/tracking/rules/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the tracking file. Sets `test_project: done` +2. create-rule — Inputs: requirements (the tracking file), test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules`, ``, and on a re-dispatch the approximation dirs `.opentaint/pass-through` / `.opentaint/approximations`. Iterates `opentaint test rule run` until every sample passes; sets `tests_passing: done`, `rule_id`, `artifact` + +If create-rule reports the test project drops a library method on the rule's flow, the rule can't be verified until that method is modeled — route the dropped methods through the approximation loop (references/approximations.md; they're real library methods the main scan needs too), then re-dispatch create-rule with the approximation dirs. If it reports non-convergence with nothing dropped, load references/escalation.md. A rule's `tests_passing` stays `pending` until its samples pass; set `phases.rules: done` once every rule's is done. diff --git a/skills/appsec-agent/references/escalation.md b/skills/appsec-agent/references/escalation.md new file mode 100644 index 000000000..9babf54db --- /dev/null +++ b/skills/appsec-agent/references/escalation.md @@ -0,0 +1,7 @@ +# Escalation block + +These skills write no tracking files. + +1. debug-rule — Inputs: the `` to trace (for an approximation, the rule whose sample routes taint through the modeled method), the `` and `` of the run that showed the problem, ``, and the approximation dirs if the flow depends on them. Returns a diagnosis: rule fix, missing library model, or engine issue +2. Route by cause: a rule cause goes back to create-rule (references/discover-rules.md); a model cause back to the relevant create-*-approximation agent (references/approximations.md) — either to add a missing unit, or to override a built-in that debug-rule shows isn't propagating (you write the override tracking unit for the specific method, since analyze-external-methods didn't produce one); an engine cause goes to step 3 +3. report-analyzer-issue — Inputs: the ``, the existing `` / ``, the `` (rule full id, or the approximation's target methods), and `` (you decide whether to also file at github.com/seqra/opentaint). It writes `.opentaint/issues/.md` diff --git a/skills/appsec-agent/references/poc.md b/skills/appsec-agent/references/poc.md new file mode 100644 index 000000000..1d02d15a1 --- /dev/null +++ b/skills/appsec-agent/references/poc.md @@ -0,0 +1,12 @@ +# PoC + +Run PoCs one subagent at a time, never in parallel — concurrent exploits race on shared app state and ports. For each TP finding: + +- first finding: generate-poc with no `` — it builds and starts the app and returns the `` it started +- every later finding: pass that `` so the agent reuses the running instance + +Inputs each time: `` = the TP finding file, ``, poc-dir `.opentaint/pocs`, and `` once known. Each sets `poc` (`confirmed`/`failed`) + `poc_script`; a `failed` repro does not flip the triage verdict. Each PoC subagent registers any instance it starts in `.opentaint/tracking/poc-servers.yaml` — that registry, not memory, is what's running (so a reuse-or-start decision and teardown both survive compaction). + +After all PoCs, assemble `.opentaint/vulnerabilities.md` from the confirmed findings yourself (subagents never write it; see SKILL.md). + +Then tear down — you own this, run it directly (don't dispatch a subagent). Read `poc-servers.yaml`; if it lists any instance, ask the user keep-vs-shutdown (default: shut down). Unless they say keep, stop each entry from its `kind` + `ref` (`process` → `kill `, `container` → `docker stop `, `compose` → `docker compose -f down`), confirm its `port` is free, and empty the registry. Only after teardown set `phases.poc: done`. diff --git a/skills/appsec_agent/references/reproduce-vulnerability.md b/skills/appsec-agent/references/reproduce-vulnerability.md similarity index 100% rename from skills/appsec_agent/references/reproduce-vulnerability.md rename to skills/appsec-agent/references/reproduce-vulnerability.md diff --git a/skills/appsec-agent/references/scan.md b/skills/appsec-agent/references/scan.md new file mode 100644 index 000000000..f7c597a51 --- /dev/null +++ b/skills/appsec-agent/references/scan.md @@ -0,0 +1,3 @@ +# Scan + +Delegate run-scan. Inputs: model-dir `.opentaint/project`, ruleset `builtin` + `.opentaint/rules`, report `.opentaint/results/report.sarif`; on normal/deep also config-dir `.opentaint/pass-through` and approx-dir `.opentaint/approximations` (both dir flags walk the tree recursively, so the parents apply every unit). Require a concise return — finding counts per rule, the methods still in `dropped-external-methods.yaml` that sit on a source→sink path, and any config load/parse errors — not the SARIF body. The files persist on disk for the next steps. Set `phases.scan: done`. diff --git a/skills/appsec_agent/references/suppress-fp.md b/skills/appsec-agent/references/suppress-fp.md similarity index 79% rename from skills/appsec_agent/references/suppress-fp.md rename to skills/appsec-agent/references/suppress-fp.md index d10ac170c..bf7193d1a 100644 --- a/skills/appsec_agent/references/suppress-fp.md +++ b/skills/appsec-agent/references/suppress-fp.md @@ -1,7 +1,5 @@ # Suppress-FP block -Load this when the workflow has suppress-FP on, after triage. It fixes confirmed false positives on rules you own or can override, so a rule edit can't silently drop a real finding. Dispatch per the Delegate template in SKILL.md. - For each confirmed FP on an own/overridable rule, one at a time: 1. create-test-project — pin the confirmed TPs as `@PositiveRuleSample` and add the FP as `@NegativeRuleSample`, recompile. Inputs: the FP and TP traces as ``, the rule's ``, test-project / test-compiled `.opentaint/test-{projects,compiled}/` diff --git a/skills/appsec_agent/references/triage.md b/skills/appsec-agent/references/triage.md similarity index 77% rename from skills/appsec_agent/references/triage.md rename to skills/appsec-agent/references/triage.md index c69289ff4..c1bab0c03 100644 --- a/skills/appsec_agent/references/triage.md +++ b/skills/appsec-agent/references/triage.md @@ -1,6 +1,6 @@ # Triage -The triage step, run in every workflow. It generates the finding files and classifies each TP/FP. On a dynamic run, continue to references/poc.md afterward; on static, triage is the last step. The scan must be stable first. Dispatch per the Delegate template in SKILL.md. +The scan must be stable first. ## Generate finding files diff --git a/skills/appsec_agent/scripts/sarif-to-findings.py b/skills/appsec-agent/scripts/sarif-to-findings.py similarity index 100% rename from skills/appsec_agent/scripts/sarif-to-findings.py rename to skills/appsec-agent/scripts/sarif-to-findings.py diff --git a/skills/appsec_agent/references/discover-rules.md b/skills/appsec_agent/references/discover-rules.md deleted file mode 100644 index bcfd6cd97..000000000 --- a/skills/appsec_agent/references/discover-rules.md +++ /dev/null @@ -1,16 +0,0 @@ -# Discover + new rules - -The deep-scan step that maps the attack surface and writes the rules to cover it, run after build and before the scan (deep, and the reproduce-vulnerability workflow). New rules are fixed here, before any approximation iteration. Dispatch per the Delegate template in SKILL.md. - -## Discover attack surface - -Delegate discover-attack-surface. Inputs: ``, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`. It walks a fixed checklist of attack areas into `tracking/coverage.yaml` and creates one `tracking/rules/.yaml` per gap (`description` stage + a short requirements + dependencies), returning the areas covered and one line per rule. Don't ask for the full analysis back. Set `phases.discover: done` once every area in `coverage.yaml` is `done`. - -## Rules - -Fan out the rule units (one subagent each); per unit a two-step loop: - -1. create-test-project — Inputs: spec = the rule's `requirements`, ``, `` `.opentaint/tracking/rules/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the tracking file. Sets `test_project: done` -2. create-rule — Inputs: requirements (the tracking file), test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules`, ``, and on a re-dispatch the approximation dirs `.opentaint/config` / `.opentaint/approximations/src`. Iterates `opentaint dev test-rules` until every sample passes; sets `tests_passing: done`, `rule_id`, `artifact` - -If create-rule reports the test project drops a library method on the rule's flow, the rule can't be verified until that method is modeled — route the dropped methods through the approximation loop (references/approximations.md; they're real library methods the main scan needs too), then re-dispatch create-rule with the approximation dirs. If it reports non-convergence with nothing dropped, load references/escalation.md. A rule's `tests_passing` stays `pending` until its samples pass; set `phases.rules: done` once every rule's is done. diff --git a/skills/appsec_agent/references/escalation.md b/skills/appsec_agent/references/escalation.md deleted file mode 100644 index 437dec77a..000000000 --- a/skills/appsec_agent/references/escalation.md +++ /dev/null @@ -1,7 +0,0 @@ -# Escalation block - -Load this when a create-rule / create-pass-through / create-dataflow agent can't make its samples pass after repeated attempts, or a rule passes its tests yet is wrong on the real scan. The common cause — a library method on the flow killing taint in the test project — is already caught by create-rule's own `--track-external-methods` check and routed to the approximation loop (references/discover-rules.md); reach here for what survives that: a subtle rule bug, an approximation present but mis-modeled, or an engine issue. Dispatch per the Delegate template in SKILL.md. These skills write no tracking files. - -1. debug-rule — Inputs: the `` to trace (for an approximation, the rule whose sample routes taint through the modeled method), the `` and `` of the run that showed the problem, ``, and the approximation dirs if the flow depends on them. Returns a diagnosis: rule fix, missing library model, or engine issue -2. Route by cause: a rule cause goes back to create-rule (references/discover-rules.md), a model cause back to the relevant create-*-approximation agent (references/approximations.md); an engine cause goes to step 3 -3. report-analyzer-issue — Inputs: the ``, the existing `` / ``, the `` (rule full id, or the approximation's target methods), and `` (you decide whether to also file at github.com/seqra/opentaint). It writes `.opentaint/issues/.md` diff --git a/skills/appsec_agent/references/poc.md b/skills/appsec_agent/references/poc.md deleted file mode 100644 index a9825d604..000000000 --- a/skills/appsec_agent/references/poc.md +++ /dev/null @@ -1,10 +0,0 @@ -# PoC - -The dynamic-confirmation step, run on a dynamic run after triage. Confirm each TP on a running instance, then assemble the report. Dispatch per the Delegate template in SKILL.md. - -Run PoCs one subagent at a time, never in parallel — concurrent exploits race on shared app state and ports. For each TP finding: - -- first finding: generate-poc with no `` — it builds and starts the app and returns the `` it started -- every later finding: pass that `` so the agent reuses the running instance - -Inputs each time: `` = the TP finding file, ``, poc-dir `.opentaint/pocs`, and `` once known. Each sets `poc` (`confirmed`/`failed`) + `poc_script`; a `failed` repro does not flip the triage verdict. After all PoCs, assemble `.opentaint/vulnerabilities.md` from the confirmed findings yourself (subagents never write it; see SKILL.md). Set `phases.poc: done`. diff --git a/skills/appsec_agent/references/scan.md b/skills/appsec_agent/references/scan.md deleted file mode 100644 index 14f39f290..000000000 --- a/skills/appsec_agent/references/scan.md +++ /dev/null @@ -1,5 +0,0 @@ -# Scan - -The scan step, run in every workflow (and once per rescan the approximation iteration triggers). Dispatch via the scan agent per the Delegate template in SKILL.md. - -Delegate run-scan. Inputs: model-dir `.opentaint/project`, ruleset `builtin` + `.opentaint/rules`, report `.opentaint/results/report.sarif`; on normal/deep also config-dir `.opentaint/config` and approx-dir `.opentaint/approximations/src` (both dir flags walk the tree recursively, so the parents apply every unit). Require a concise return — finding counts per rule, the methods still in `dropped-external-methods.yaml` that sit on a source→sink path, and any config load/parse errors — not the SARIF body. The files persist on disk for the next steps. Set `phases.scan: done`. diff --git a/skills/assemble-lib-rules/SKILL.md b/skills/assemble-lib-rules/SKILL.md new file mode 100644 index 000000000..28e5608e6 --- /dev/null +++ b/skills/assemble-lib-rules/SKILL.md @@ -0,0 +1,68 @@ +--- +name: assemble-lib-rules +description: Pair the unpaired sources and sinks left by discovery into join rules. Use for assembling lib rules +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# Skill: Assemble Lib Rules + +The per-package discovery passes each see only their own library, so a source whose sink sits elsewhere is parked unpaired. With the loose pieces from every package in front of you, pair each into a join — the place a source and a sink finally become a detectable vulnerability + +## Inputs + +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default + +- Loose pieces `` — the unpaired sources/sinks from discovery. Default: `.opentaint/tracking/lib-pieces.yaml` +- Project root `` — the project sources, to confirm a source can actually reach a sink. Default: current directory +- Tracking directory `` — where rule requirements are written. Default: `.opentaint/tracking` + +Built-in rules are available at `opentaint health --rules` + +## Workflow + +### 1. Read the pieces and what's already covered + +Read ``, the built-in rules (`opentaint health --rules`), `.opentaint/rules`, and the join requirements already in `/rules`. A piece a built-in or an existing join already covers needs no new rule + +### 2. Pair source-first + +For each `pending` source, find the dangerous sinks it can reach — among the loose sinks, the built-in sink rules, and the app's own dangerous operations. Reach is a code-level question (does the source's data flow toward that sink anywhere in ``), not a taint trace — the scan does the tracing; you decide the pairing is plausible. Then per real pairing write one join requirement `/rules/.yaml`, named `-` in kebab-case, naming the source end and sink end (which a built-in covers, which must be written) and every library the flow crosses under `dependencies`. Set that source piece's `disposition` to the join name + +### 3. Mop up sinks, then resolve every piece + +A `pending` sink a source you just placed feeds is already in a join; a loose sink reached only by a built-in source gets its own join. Then resolve what's left: a source with no dangerous sink anywhere, a sink with no source in reach, or a piece a built-in already covers → set `disposition: dropped: `. Leave no `pending` entry — an unresolved piece is an un-modeled source or sink + +## Output + +- One `/rules/.yaml` per join assembled, schema as discover-attack-surface writes it (`stages.description: done`, short `requirements`, `dependencies`) +- Every `` entry resolved — `disposition` is a join name or `dropped: ` +- A brief summary to the caller: one line per join (name, source→sink) and the paired/dropped counts. The tracking files hold the detail — don't paste it back + +## Tracking + +`` — resolve each entry's `disposition`: + +```yaml +sources: + - role: Apache HttpClient response body — data from a server the app calls + package: org.apache.hc.client5.http + dependency: org.apache.httpcomponents.client5:httpclient5:5.3 + disposition: httpclient-ssrf # the join it went into +sinks: + - role: SnakeYAML load — untrusted YAML deserialization + package: org.yaml.snakeyaml + dependency: org.yaml:snakeyaml:2.2 + disposition: "dropped: no untrusted source reaches a SnakeYAML load in this app" +``` + +The join requirements themselves use the `rules/.yaml` schema discover-attack-surface writes + +## Gotchas + +- Write requirements, not rule files — create-rule authors the lib source/sink YAMLs and the join from the requirement downstream +- Pair only a flow that exists in the app — a join whose source can't reach its sink wastes a test project and never converges; confirm reachability in `` +- Reference a built-in source or sink where one fits rather than requiring a new one +- Resolve every piece — drop with a reason, never silently leave one `pending` diff --git a/skills/create-dataflow-approximation/SKILL.md b/skills/create-dataflow-approximation/SKILL.md index 7727533d1..d27a5dcab 100644 --- a/skills/create-dataflow-approximation/SKILL.md +++ b/skills/create-dataflow-approximation/SKILL.md @@ -16,15 +16,15 @@ Write a code-based approximation for a library method whose taint propagation de From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default - Methods to model `` — the target method(s) and how taint flows through them, from the tracking file's `methods` (all `type: dataflow`) -- Tracking file `` — the dataflow approximation unit (`-dataflow`). Default: `.opentaint/tracking/approximations/.yaml` -- Approximation sources `` — this package's own directory for the `.java` approximation files. Default: `.opentaint/approximations/src/` +- Tracking file `` — the dataflow approximation unit (`-dataflow`, e.g. `reactor-core-publisher-dataflow`). Default: `.opentaint/tracking/approximations/.yaml` +- Approximation sources `` — this package's own directory for the `.java` approximation files. Default: `.opentaint/approximations/` - Compiled test project `` — the per-package compiled model to test against. Default: `.opentaint/test-compiled/` ## Workflow ### 1. Write the approximation source -Create Java files in ``. Target the EXACT class named in `dropped-external-methods.yaml` (the unit's `methods[].target`), whatever it is. `@Approximate` matches only that one class — unlike passThrough's `overrides: true`, it is not propagated to other types in the hierarchy — and the dropped FQN already reflects how the analyzer resolved the call: an interface-typed receiver (`Map m = ...; m.computeIfAbsent(...)`) drops `java.util.Map#computeIfAbsent` → target `java.util.Map`; a concrete receiver (`new HashMap<>()`) drops `java.util.HashMap#computeIfAbsent` → target `java.util.HashMap`. Don't substitute a supertype or a subtype for what the dropped file names. Model the real propagation — never leave the body empty, an empty body silently drops the taint; in doubt about how taint flows through the method (which callback or argument carries it), read the library's source rather than guessing: +Create Java files in ``. Target the EXACT class named in `dropped-external-methods.yaml` — `@Approximate` matches only that class (unlike passThrough's `overrides: true`), and the dropped FQN reflects how the analyzer resolved the call: an interface-typed receiver (`Map m = ...; m.computeIfAbsent(...)`) drops `java.util.Map#computeIfAbsent`; a concrete one (`new HashMap<>()`) drops `java.util.HashMap#computeIfAbsent`. Don't substitute a supertype or subtype. Model the real propagation — never leave the body empty (it silently drops taint); when unsure how taint flows through the method, read the library source rather than guessing: ```java package com.example.approximations; @@ -58,7 +58,7 @@ public class ReactiveProcessor { } ``` -Wrapper-returning operators (a `Mono`/`Flux`, `Optional`, `Stream`, a builder — anything where the taint stays inside a container): three things matter beyond the plain case above. Declare the real concrete return type, not `Object` (the IFDS summary won't propagate otherwise); in the `nextBool()` branch `return self`, not `null` (returning `null` discards the container's taint on that path); and extract → apply → re-wrap so a downstream extractor (`block`, `get`, …) can pull the tainted value back out: +Wrapper-returning operators (a `Mono`/`Flux`, `Optional`, `Stream`, a builder — anything where the taint stays inside a container): declare the real concrete return type, not `Object`; in the `nextBool()` branch `return self`, not `null`; and extract → apply → re-wrap so a downstream extractor (`block`, `get`, …) can pull the tainted value back out: ```java @Approximate(reactor.core.publisher.Mono.class) @@ -74,24 +74,28 @@ public class Mono { ### 2. Test against the test project -Run `test-approximations` over `` applying only this package's sources (``); iterate the source until the sample passes: +Run `test approximation run` over `` applying only this package's sources (``); iterate the source until the sample passes: ```bash -opentaint dev test-approximations \ +opentaint test approximation run \ -o .opentaint/test-results/ \ --dataflow-approximations ``` -test-approximations applies its own bundled fixed source→sink rule automatically — you don't author or pass one (there is no `--ruleset` flag); other packages' approximation sources are merged only at the final scan, not here. The CLI auto-compiles the `.java` sources against the analyzer JAR (for `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) and the project's dependencies; if compilation fails it reports the errors and aborts before the tests. The sample that routes taint through the method is a `falseNegative` until the model propagates it. Read `.opentaint/test-results//test-result.json`: +`test approximation run` applies its own bundled fixed source→sink rule automatically — you don't author or pass one. The CLI auto-compiles the `.java` sources against the analyzer JAR (for `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) and the project's dependencies; if compilation fails it reports the errors and aborts before the tests. The sample that routes taint through the method is a `falseNegative` until the model propagates it. Read `.opentaint/test-results//test-result.json`: -- still `falseNegative` → the `@Approximate(...)` target class or a method signature doesn't match what the analyzer sees, or the body doesn't route taint from the real source to the modeled result/argument; diagnose the mismatch, don't rationalize a non-result. Most common: the target class doesn't equal the FQN in `dropped-external-methods.yaml` — you wrote a supertype/subtype (e.g. `java.util.Map` when the dropped file says `java.util.HashMap#computeIfAbsent`, or vice-versa). Re-target the exact dropped class and match the cast (`(java.util.HashMap) (Object) this`) -- `falsePositive` (a negative sample fired) → the model is over-broad: it taints a read it shouldn't, e.g. data fetched under a different key/field than it was stored under. Narrow the propagation until the negative stays non-firing while the positive passes (negatives exist only for shared-state methods — see create-test-project/references/approximation.md) +- still `falseNegative` → the `@Approximate(...)` target class or a method signature doesn't match what the analyzer sees, or the body doesn't route taint from the real source to the modeled result/argument; diagnose the mismatch, don't rationalize a non-result. Most common: target-class mismatch with the dropped FQN — re-target the exact dropped class and match the cast (`(java.util.HashMap) (Object) this`) +- `falsePositive` (a negative sample fired) → the model is over-broad: it taints a read it shouldn't, e.g. data fetched under a different key/field than it was stored under. Narrow the propagation until the negative stays non-firing while the positive passes + +### 3. When the sample won't pass after a couple of fixes + +After ~2 fix attempts without a clearer cause — `@Approximate` target matches the dropped FQN, the body propagates from the modeled source slot to the result/argument, but the sample is still `falseNegative` — don't keep guessing. Leave `tests_passing: pending` and report non-convergence to the caller; the orchestrator escalates to debug-rule for a fact-reachability trace through the approximation point ## Key patterns | Pattern | Usage | |---|---| -| `@Approximate(TargetClass.class)` | Link the approximation to its target class — the EXACT class `dropped-external-methods.yaml` names (interface or concrete, as the analyzer resolved it); matches only that class, not propagated to other types in the hierarchy. Must be on the compile classpath (a project dependency or a JDK type) | +| `@Approximate(TargetClass.class)` | Link the approximation to its target class. Must be on the compile classpath (a project dependency or a JDK type) | | `(TargetClass) (Object) this` | Cast to reach the real object's methods | | `@ArgumentTypeContext` | On lambda / functional-interface parameters | | `OpentaintNdUtil.nextBool()` | Non-deterministic branch — the analyzer considers both paths | @@ -100,14 +104,14 @@ test-approximations applies its own bundled fixed source→sink rule automatical - The approximation source(s) under `` - Tracking updated: `artifact` and `stages.tests_passing` (per Tracking) -- Report the source path, a one-line test summary, and the exact `test-approximations` command used +- Report the source path, a one-line test summary, and the exact `test approximation run` command used ## Tracking In ``, once the source exists and its sample passes: ```yaml -artifact: .opentaint/approximations/src//com/example/approximations/ReactiveProcessor.java +artifact: .opentaint/approximations//com/example/approximations/ReactiveProcessor.java stages: tests_passing: done ``` @@ -117,5 +121,16 @@ Do not touch other stages or fields ## Constraints - Java 8 source compatibility -- One approximation class per target class (strict bijection); never target a class that already has a built-in approximation — it errors at load with `IllegalArgumentException` +- One approximation class per target class — a strict bijection enforced at load (duplicates throw `IllegalArgumentException`). Built-in dataflow approximations are first-priority and presumed correct; you cannot override them — see Troubleshooting if debug-rule traces a kill to one - Method signatures must match the target class methods exactly +- Don't unpack or grep the analyzer JAR for built-in models or signatures — its internals aren't a stable API; go through the CLI + +## Troubleshooting + +When debug-rule traces a taint kill to an external method, walk this in order: + +1. Confirm the method has a built-in — `approximated-external-methods.yaml` lists it (if you didn't pass an approximation to the scan, the listing is the bundled set) +2. Confirm from the debug-rule trace that taint dies at exactly that method +3. Classify the gap: + - fits a from→to copy → write a passthrough override (built-in passthroughs are overrideable by design) + - truly needs dataflow shape (lambdas/callbacks/async) → engine issue; built-in dataflows aren't locally overrideable — report it upstream diff --git a/skills/create-pass-through-approximation/SKILL.md b/skills/create-pass-through-approximation/SKILL.md index 4d8ca2600..fc2a4fdbc 100644 --- a/skills/create-pass-through-approximation/SKILL.md +++ b/skills/create-pass-through-approximation/SKILL.md @@ -9,7 +9,7 @@ metadata: # Skill: Create PassThrough Approximation -Write passThrough propagation rules for external library methods. There's no test project — the main scan applies the config and verifies it; if a modeled method is still dropped or the config errors, you're re-invoked to fix it +Write passThrough propagation rules for external library methods ## Inputs @@ -17,84 +17,101 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Methods to model `` — the target method(s) and what each propagates, from the tracking file's `methods` (all `type: passthrough`) - Tracking file `` — the passThrough approximation unit. Default: `.opentaint/tracking/approximations/.yaml` -- Config output `` — where to write the passThrough approximation. Default: `.opentaint/config/.yaml` +- Config output `` — where to write the passThrough approximation. Default: `.opentaint/pass-through/.yaml` ## Workflow ### 1. Write the passThrough config -Write `passThrough:` rules into `` +Write `passThrough:` copies into ``. When an object carries taint between calls — a setter stores it and a getter returns it later, or a builder holds it — route through a virtual slot, an access path `[, .##java.lang.Object]`: +- the slot name is nominal — the engine never resolves it, so it need not be a real field +- type it `java.lang.Object` — a concrete type can fail the read-out type-check and drop the taint +- the writer and reader must name the identical `Class#slot#java.lang.Object` triple, or the taint drops -Simple getter (taint on `this` to `result`): +Getter / setter pair — the writer stores into the slot, the getter reads the same slot back to `result`: ```yaml passThrough: - - function: com.example.lib.DataWrapper#getValue - copy: - - from: this - to: result +- function: org.springframework.http.HttpEntity#setBody + copy: + - from: arg(0) + to: + - this + - .org.springframework.http.HttpEntity#Body#java.lang.Object +- function: org.springframework.http.HttpEntity#getBody + copy: + - from: + - this + - .org.springframework.http.HttpEntity#Body#java.lang.Object + to: result ``` -Argument to result: +Several writers sharing one slot — any of them taints the object, the reader pulls it back: ```yaml passThrough: - - function: com.example.lib.Converter#convert - copy: - - from: arg(0) - to: result +- function: org.apache.tools.ant.types.FileSet#setDir + copy: + - from: arg(0) + to: + - this + - .org.apache.tools.ant.types.FileSet#path#java.lang.Object +- function: org.apache.tools.ant.types.FileSet#setFile + copy: + - from: arg(0) + to: + - this + - .org.apache.tools.ant.types.FileSet#path#java.lang.Object ``` -Builder pattern: +Cross-type builder — when a builder method consumes an argument and returns a *different* type, carry the taint along both the chained receiver (for further calls on `this`) and the returned object, slot included. Four copies: arg → returned-value slot, arg → builder slot, whole builder → returned value, builder slot → returned-value slot: ```yaml passThrough: - - function: com.example.lib.Builder#withName - copy: - - from: arg(0) - to: this - - from: arg(0) - to: result - - from: this - to: result +- function: org.springframework.ldap.query.LdapQueryBuilder#filter + copy: + - from: arg(0) + to: + - result + - .org.springframework.ldap.query.LdapQuery#filter#java.lang.Object + - from: arg(0) + to: + - this + - .org.springframework.ldap.query.LdapQueryBuilder#filter#java.lang.Object + - from: this + to: result + - from: + - this + - .org.springframework.ldap.query.LdapQueryBuilder#filter#java.lang.Object + to: + - result + - .org.springframework.ldap.query.LdapQuery#filter#java.lang.Object ``` -Container via a synthetic field — when a container takes the taint in one call and hands it back in another, write into a field that doesn't really exist, then read from it: +Conditional propagation — gate a rule with a `condition` (the copy still routes through a slot): ```yaml passThrough: - - function: org.springframework.http.ResponseEntity$BodyBuilder#body - copy: - - from: arg(0) - to: - - result - - .org.springframework.http.HttpEntity#Body#java.lang.Object - - function: org.springframework.http.HttpEntity#getBody - copy: - - from: - - this - - .org.springframework.http.HttpEntity#Body#java.lang.Object - to: result -``` -The naive model — copy the data onto `this`, then on read copy `this` to `result` — fails on types: `this` is the container type, not the data type (e.g. `String`), so the engine can't hang the taint on it. Routing through a field typed `java.lang.Object` (here `#Body#java.lang.Object`) sidesteps the mismatch. A synthetic per-object slot `.` does the same job without naming a field — store on the taking call, read on the returning one (see Reference) - -Conditional propagation: -```yaml -passThrough: - - function: com.example.lib.Parser#parse - condition: - typeIs: - position: arg(0) - type: java.lang.String - copy: - - from: arg(0) - to: result +- function: com.example.lib.Parser#parse + condition: + typeIs: java.lang.String + pos: arg(0) + copy: + - from: arg(0) + to: + - this + - .com.example.lib.Parser#parsed#java.lang.Object ``` ### 2. Verification is the scan -There's no test project for passThrough. The main scan applies `` (run-scan's `--passthrough-approximations`, which takes a file or a directory) and the scan agent reports back. You're re-invoked to fix the config when that scan shows: +There's no test project for passThrough. The main scan applies `` and the scan agent reports back. You're re-invoked to fix the config when that scan shows: - a method you modeled still in `dropped-external-methods.yaml` → the `function` matcher didn't match (check package, class, name, `overrides`), or the `from`/`to` doesn't land on the tainted position -- a config load / parse error → fix the YAML +- the flow still doesn't surface though the method is no longer dropped → most often a broken channel: the writer and reader name different `Class#slot#java.lang.Object` triples, or the slot isn't typed `java.lang.Object` +- a config load / parse error → fix the YAML (an unknown `condition` key, a bad position, or a 2-part field modifier all fail to load) -Never invoke the analyzer JAR directly — always go through the CLI +Never invoke or grep the analyzer JAR — its internals aren't a stable API; for built-in rules use `opentaint health --rules`, for everything else the CLI + +### 3. When the config won't converge + +After ~2 fix re-invocations without a clearer cause — matcher fields and `from`/`to` checked, writer/reader slots confirmed identical, the modeled method no longer in `dropped-external-methods.yaml`, but the scan still doesn't surface the flow — don't keep guessing. Report non-convergence to the caller; the orchestrator escalates to debug-rule for a fact-reachability trace of where taint dies ## Output @@ -107,7 +124,7 @@ Never invoke the analyzer JAR directly — always go through the CLI In ``, once the config is written: ```yaml -artifact: .opentaint/config/.yaml +artifact: .opentaint/pass-through/.yaml stages: written: done ``` @@ -116,25 +133,28 @@ Do not touch other stages or fields ## Reference -Position values -- `this`, `result`, `arg(0)`, `arg(1)`, ..., `arg(*)` -- Position modifiers (YAML list): `.[*]` (array element), `.ClassName#fieldName#fieldType` (field), `.` (synthetic per-object state, an alternative to a named field) +Position bases +- `this`, `result`, `arg(0)`, `arg(1)`, … +- `any()` — a single argument bound consistently across every position in the rule; `class()` — a static field. Rare — prefer an explicit `arg(N)` + +Access-path modifiers (list form `[, ]`) +- `.##` — a field or virtual slot; type it `java.lang.Object`. The slot name is arbitrary (a descriptive name, or the conventional `` for a generic carrier) +- `.[*]` — array / collection element Function matching - Simple: `package.Class#method` -- Complex: `{package, class, name}`, each with an optional `pattern:` regex — for one hard-to-name function, not for matching many at once (see Gotchas) +- Complex: `{package, class, name}` — for one hard-to-name function, not for matching many at once (see Gotchas) Overrides - `overrides: true` (default): applies to the class and all subclasses - `overrides: false`: exact class only -Conditions -- `typeIs`, `annotatedWith`, `isConstant`, `isNull`, `constantMatches`, `tainted`, `numberOfArgs`, `methodAnnotated`, `classAnnotated`, `methodNameMatches`, `classNameMatches`, `isStaticField`, `anyOf`, `allOf`, `not` +Conditions (the only keys that load from YAML) +- `typeIs`, `annotatedWith`, `isConstant`, `isNull`, `constantMatches`, `constantEq`, `constantGt`, `constantLt`, `tainted`, `anyOf`, `allOf`, `not` ## Gotchas -- passThrough expresses only from→to copies — DB round-trips, lambdas, and async belong in create-dataflow-approximation -- The approximation merges with built-ins at the rule level — a provided rule overrides a built-in only if it matches one; don't redefine a method already in `approximated-external-methods.yaml` +- The approximation merges with built-ins at the rule level — a provided rule overrides a built-in only if it matches one. Don't redefine a method already in `approximated-external-methods.yaml` unless debug-rule shows the built-in isn't propagating taint here, then override deliberately - A wrong argument position copies the wrong value — point `from`/`to` at the tainted one - In doubt about how a method moves taint — which argument or field reaches the result — read the library's source rather than guessing -- Model one function per rule — don't use a regex/wildcard `pattern:` matcher (e.g. `name: get.*`, `class: .*`) to cover many functions at once; it over-models, copying taint through methods you never vetted and manufacturing false positives. Write an explicit `function:` per method +- Model one function per rule — don't use a regex/wildcard `pattern:` matcher (e.g. `name: get.*`, `class: .*`) or `arg(*)` to cover many functions at once; it over-models, copying taint through methods you never vetted and manufacturing false positives. Write an explicit `function:` per method diff --git a/skills/create-rule/SKILL.md b/skills/create-rule/SKILL.md index 871d6ecd1..fb96c5200 100644 --- a/skills/create-rule/SKILL.md +++ b/skills/create-rule/SKILL.md @@ -21,13 +21,13 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Tracking file `` — the rule file. Default: `.opentaint/tracking/rules/.yaml` - Approximation directories `` / `` (optional) — apply on a re-dispatch when the test project needs a library model that's now built. Default: none -Built-in rules are available at `opentaint dev rules-path` +Built-in rules are available at `opentaint health --rules` ## Workflow ### 1. Check existing coverage -Browse builtin rules at `opentaint dev rules-path` for source/sink library rules to reference. A `refs` to a built-in source/sink is cheaper and more accurate than a new one +Browse builtin rules at `opentaint health --rules` for source/sink library rules to reference. A `refs` to a built-in source/sink is cheaper and more accurate than a new one ### 2. Wire sources and sinks @@ -111,15 +111,15 @@ rules: ### 4. Test until success -Run the rule tests against the compiled test project; iterate the rule and re-run `test-rules` until every sample passes: +Run the rule tests against the compiled test project; iterate the rule and re-run `test rule run` until every sample passes: ```bash -opentaint dev test-rules \ +opentaint test rule run \ -o .opentaint/test-results/ \ --ruleset ``` -`test-rules` auto-loads the built-in rules, so pass only your custom `` — a literal `builtin` here would be treated as a path. When the caller passed `` / ``, append `--passthrough-approximations ` / `--dataflow-approximations ` — without them a library method the test flow relies on drops taint and the positive can't pass. Read `.opentaint/test-results//test-result.json`: +`test rule run` auto-loads the built-in rules, so pass only your custom `` — a literal `builtin` here would be treated as a path. When the caller passed `` / ``, append `--passthrough-approximations ` / `--dataflow-approximations ` — without them a library method the test flow relies on drops taint and the positive can't pass. Read `.opentaint/test-results//test-result.json`: - `falseNegative` (positive didn't trigger) → patterns too narrow; broaden `pattern-either`, check metavariable names match across branches and between `refs` and `on` - `falsePositive` (negative triggered) → patterns too broad; add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or `metavariable-regex` @@ -143,13 +143,13 @@ Read `dropped-external-methods.yaml` next to it; either way leave `tests_passing ### 6. Refining for a false positive (suppress-FP) -The test project already pins the confirmed TPs as `@PositiveRuleSample` and reproduces the FP as a `@NegativeRuleSample` — refine only the rule. Narrow it (step 4's `falsePositive` handling) until the negative stops triggering while every positive still passes. Do not touch the samples; if one looks wrong, hand it back upstream +The test project already pins the confirmed TPs as `@PositiveRuleSample` and reproduces the FP as a `@NegativeRuleSample` — refine only the rule. Narrow it (step 4's `falsePositive` handling) until the negative stops triggering while every positive still passes ## Output - The rule file(s) under `` - Tracking updated: `rule_id`, `artifact`, `stages.tests_passing` (per Tracking) -- Report the full rule id, a one-line test summary, and the exact `test-rules` command used +- Report the full rule id, a one-line test summary, and the exact `test rule run` command used - If blocked (step 5): leave `tests_passing: pending` and report the cause instead ## Tracking @@ -179,3 +179,4 @@ stages: - A wrong argument position in `(..., $UNTRUSTED, ...)` focuses the wrong parameter — point `focus-metavariable` at the tainted one - Refine the rule, never the test project — don't edit or weaken samples here; if one is wrong, hand it back upstream - A positive that won't pass because a library method drops taint is not a rule bug — don't broaden the rule to force it; surface it for approximation (step 5) +- Don't unpack or grep the analyzer JAR for built-in rules — its internals aren't a stable API; read the YAMLs from `opentaint health --rules` diff --git a/skills/create-test-project/SKILL.md b/skills/create-test-project/SKILL.md index 981271bc4..0e6a41be5 100644 --- a/skills/create-test-project/SKILL.md +++ b/skills/create-test-project/SKILL.md @@ -22,7 +22,7 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Compiled output `` — the model. Default: `.opentaint/test-compiled/` - Dependencies — exact Maven coordinates the samples need; default: the `dependencies` list in ``; with no tracking file, derive them from the project's `build.gradle`/`pom.xml` -`` is the rule name for a rule, or the dataflow approximation unit (`-dataflow`) for an approximation; the two never share a folder +`` is the rule name for a rule, or the dataflow approximation unit (`-dataflow`, e.g. `reactor-core-publisher-dataflow`) for an approximation; the two never share a folder ## Workflow @@ -30,17 +30,17 @@ From the caller; if omitted, fall back to the default. Ask only when a required Pick the scaffold by shape, then pass each coordinate from the tracking file's `dependencies` as a `--dependency`: -- a rule → `init-rule-project` (Gradle build + the test-util jar) -- a dataflow approximation → `init-approximation-project` (the same, plus `Taint.java` and the fixed `approximation-rule.yaml` the harness applies) +- a rule → `test rule init` (Gradle build + the test-util jar) +- a dataflow approximation → `test approximation init` (the same, plus `Taint.java` and the fixed `approximation-rule.yaml` the harness applies) ```bash # rule test project -opentaint dev init-rule-project \ +opentaint test rule init \ --dependency "org.mybatis:mybatis:3.5.13" \ --dependency "javax.servlet:javax.servlet-api:4.0.1" # dataflow approximation test project -opentaint dev init-approximation-project \ +opentaint test approximation init \ --dependency "io.projectreactor:reactor-core:3.8.5" ``` @@ -53,7 +53,7 @@ Write Java samples under `/src/main/java/test/`, each annotated wi What the positive and negative samples must contain depends on the shape — load and follow the matching reference: - a rule → `references/rule.md` -- a dataflow approximation → `references/approximation.md` (passThrough approximations need no test project — they're written directly and verified by the scan) +- a dataflow approximation → `references/approximation.md` ### 3. Compile diff --git a/skills/create-test-project/references/approximation.md b/skills/create-test-project/references/approximation.md index 196f0c05f..65fcab1c9 100644 --- a/skills/create-test-project/references/approximation.md +++ b/skills/create-test-project/references/approximation.md @@ -1,12 +1,10 @@ # Dataflow approximation test project -This shape is for code-based (dataflow) approximations only — passThrough approximations are written directly and verified by the scan, with no test project - ## How it tests -`opentaint dev test-approximations` applies one fixed source → sink rule automatically — you do not author or pass a rule. That rule matches a fixed pair, `test.Taint.source()` and `test.Taint.sink(...)`, provided by the `Taint` helper scaffolded into the project. Your samples route taint from `Taint.source()` through the method being approximated into `Taint.sink(...)`. Granularity is per sample (`className#methodName`), so the one fixed rule covers every sample — a broken approximation only flips its own sample +`opentaint test approximation run` applies one fixed source → sink rule automatically — you do not author or pass a rule. That rule matches a fixed pair, `test.Taint.source()` and `test.Taint.sink(...)`, provided by the `Taint` helper scaffolded into the project. Your samples route taint from `Taint.source()` through the method being approximated into `Taint.sink(...)`. Granularity is per sample (`className#methodName`), so the one fixed rule covers every sample — a broken approximation only flips its own sample -`opentaint dev init-approximation-project ` scaffolds the Gradle build, `Taint.java`, and the `approximation-rule.yaml` reference — you add only the samples (under `src/main/java/test/`). The approximation itself is NOT part of this project: it lives in its own unit folder `.opentaint/approximations/src/` and is applied to this compiled model at test time via `--dataflow-approximations` (see create-dataflow-approximation). Do not create an `approximations/` directory inside the test project +`opentaint test approximation init ` scaffolds the Gradle build, `Taint.java`, and the `approximation-rule.yaml` reference — you add only the samples (under `src/main/java/test/`) ## Positive sample @@ -49,6 +47,5 @@ A negative that fires (`falsePositive` in `test-result.json`) means the model is ## Notes -- `value`/`id` always reference the fixed rule: `approximation-rule.yaml` / `approximation-rule`. test-approximations applies its own bundled copy, so the project's `approximation-rule.yaml` is only a reference — what matters is that samples call `test.Taint.source()` / `test.Taint.sink(...)` +- `value`/`id` always reference the fixed rule: `approximation-rule.yaml` / `approximation-rule`. `test approximation run` applies its own bundled copy, so the project's `approximation-rule.yaml` is only a reference — what matters is that samples call `test.Taint.source()` / `test.Taint.sink(...)` - the sample's receiver type fixes the dropped method's fully-qualified name, and the approximation must `@Approximate` that exact class — so mirror the real call's receiver type. An interface-typed receiver (`Map m`, e.g. a method parameter) drops `java.util.Map#computeIfAbsent`; a concrete `Map cache = new HashMap<>()` drops `java.util.HashMap#computeIfAbsent`. The `new HashMap<>()` form above is just one case — match whichever the real flow uses -- the approximation under test is NOT in this project — it lives in the separate unit folder `.opentaint/approximations/src/`, compiled by the CLI (not Gradle) and applied with `--dataflow-approximations ` — see create-dataflow-approximation diff --git a/skills/debug-rule/SKILL.md b/skills/debug-rule/SKILL.md index 6f6f7efd9..1dca302b5 100644 --- a/skills/debug-rule/SKILL.md +++ b/skills/debug-rule/SKILL.md @@ -15,12 +15,12 @@ Diagnose why a rule or approximation behaves unexpectedly on a model — samples From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default -- Rule `` — the single full rule ID to trace (`.yaml:`); fact-reachability is always per-rule, so to debug an approximation trace the rule whose sample routes taint through the approximated method +- Rules `` — the security rule to trace AND every library rule it `refs` (source/sink), each as `.yaml:`; fact-reachability runs only the rules you list and silently disconnects the join if a ref is missing. For an approximation, trace the rule whose sample routes taint through the approximated method - Project model `` — the model where the behavior shows up. Default: `.opentaint/test-compiled/` for a test project, or `.opentaint/project` for a main scan - Ruleset `` — Default: `builtin` plus `.opentaint/rules` - Output directory `` — where the debug SARIF lands. Default: `.opentaint/test-results/` for a test model, or `.opentaint/results` for a main scan - Dropped external methods `` — the list from the run that showed the problem. Default: `dropped-external-methods.yaml` next to that run's SARIF -- Approximation directories `` / `` (optional) — apply when the behavior depends on them, so the debug run matches the run that showed the problem. Default: `.opentaint/config`, `.opentaint/approximations/src` +- Approximation directories `` / `` (optional) — apply when the behavior depends on them, so the debug run matches the run that showed the problem. Default: `.opentaint/pass-through`, `.opentaint/approximations` ## Workflow @@ -30,40 +30,45 @@ Open `` from the run that showed the problem. If any method on the ### 2. Localize the kill — fact-reachability SARIF +Pass the single rule to debug as the positional `` — its library `refs` (source/sink) are collected and analyzed automatically, so you don't list them: + ```bash -opentaint dev debug-fact-reachability \ +opentaint test rule reachability \ --project-model \ -o /report.sarif \ --ruleset builtin --ruleset ``` -When the thing under debug is an approximation (or the flow depends on one), append `--passthrough-approximations ` / `--dataflow-approximations ` so the trace runs with it applied — taint dying at the approximated call then means the approximation isn't propagating: wrong signature (still in ``), empty body, or wrong from→to. Read the separate `/debug-ifds-fact-reachability.sarif` (not the `-o` file). For a missed detection (a `@PositiveRuleSample` that won't pass, or a flow absent from a scan): confirm a fact exists at the source — if not, the gap is in `pattern-sources` — then walk the facts to the last instruction still carrying the fact and the first where it's gone; that gap is where taint dies. For a spurious detection, do the reverse: find where a fact appears with no tainted input reaching it +The debug output is the sibling file `/debug-ifds-fact-reachability.sarif`, NOT the `-o` SARIF. The `-o` file is the regular rule run (findings only); the per-instruction fact-reachability data — what shows where taint dies — lives only in the sibling. Read the sibling; the `-o` SARIF only tells you whether the rule fired, not why + +When the thing under debug is an approximation (or the flow depends on one), append `--passthrough-approximations ` / `--dataflow-approximations ` so the trace runs with it applied — taint dying at the approximated call then means the approximation isn't propagating: wrong signature (still in ``), empty body, or wrong from→to. For a missed detection (a `@PositiveRuleSample` that won't pass, or a flow absent from a scan): confirm a fact exists at the source — if not, the gap is in `pattern-sources` — then walk the facts to the last instruction still carrying the fact and the first where it's gone; that gap is where taint dies. For a spurious detection, do the reverse: find where a fact appears with no tainted input reaching it ### 3. Isolate an entry point (optional) -When the run misses the flow and you suspect the entry method is never reached, force analysis onto it. The entry point is positional — `*` for all methods, or a method FQN: +When the run misses the flow and you suspect the entry method is never reached, force analysis onto it with the same `reachability` command plus `--entry-points` — `*` for all methods, or a method FQN: ```bash -opentaint dev debug-run-on-entry-points "com.example.Controller#handle" \ +opentaint test rule reachability \ + --entry-points "com.example.Controller#handle" \ --project-model \ -o /report.sarif \ --ruleset builtin --ruleset ``` -A finding that appears here but not in the full run points to entry-point discovery / reachability, not the dataflow; if it still doesn't appear, localize the kill with step 2. This command is ignored on Spring projects (the entry-point override has no effect there), so for a missed Spring-controller flow rely on step 2 instead +A finding that appears here but not in the full run points to entry-point discovery / reachability, not the dataflow; if it still doesn't appear, localize the kill with step 2. On Spring projects the flag is **additive, not restrictive**: auto-discovered endpoints stay and your method is added if absent — use it only to force-include a method the analyzer never starts from (an endpoint Spring didn't recognize); you can't narrow to a single method ### 4. Classify the cause The killing instruction decides who owns the fix: -- external library method → missing model (step 1 should have caught it; fact-reachability names the exact method) +- external library method → missing or broken model. If the method is NOT in `approximated-external-methods.yaml`, step 1 should have caught it (route to analyze-external-methods + create-*-approximation). If it IS listed (a built-in claims to model it) yet taint dies here, the built-in is wrong for this case — write your own override: passthrough overrides at the rule level, so prefer a passthrough config for the specific method; a dataflow override conflicts with built-ins at load, so fall back to passthrough on that method, or if only a dataflow shape can express the propagation, treat it as an engine issue - something the rule should handle — a mistaken sanitizer, an unmatched sink or source variant → fix the rule - a plain instruction the engine should propagate through (assignment, cast, field read, an already-modeled call), with the rule correct and model complete → engine issue; route to report-analyzer-issue with the trace ## Output - The diagnosis: `file:line` and instruction where taint is killed (or spuriously introduced), and which of the three causes it is -- For an engine issue, the fact-reachability trace up to the last reachable fact — report-analyzer-issue's input +- For an engine issue, the fact-reachability trace from `debug-ifds-fact-reachability.sarif` up to the last reachable fact — report-analyzer-issue's input - The exact debug command(s) used and the model they ran against ## Tracking diff --git a/skills/discover-attack-surface/SKILL.md b/skills/discover-attack-surface/SKILL.md index 15bc5a7f3..9636643bf 100644 --- a/skills/discover-attack-surface/SKILL.md +++ b/skills/discover-attack-surface/SKILL.md @@ -1,121 +1,84 @@ --- name: discover-attack-surface -description: Walk a JVM project's attack surface area by area and turn each coverage gap into a rule requirement. Use when a project needs rule coverage mapped across its attack-surface areas (requires a built project model) +description: Analyze a dependency package for potential sources and sinks not covered by the built-in rules. Use for the depth pass of attack-surface discovery license: Apache-2.0 metadata: author: opentaint - version: "0.3" + version: "0.2" --- # Skill: Discover Attack Surface -Cover the target's attack surface systematically. Walk a fixed checklist of attack areas, and for each one explore the project sources and its dependencies for untrusted flows the built-in rules miss. Every gap becomes one rule requirement; the checklist records what was explored so no area is silently skipped +Take one library the triage flagged, find how the project actually uses it, and follow untrusted data from the sources it introduces to the dangerous sinks they reach — recording every flow(s) the built-in rules miss as one rule requirement ## Inputs From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default +- Package `` — the flagged library to drill (a `pending` entry in `coverage.yaml`) - Project root `` — the project sources. Default: current directory - Project model `` — the built model. Default: `.opentaint/project` -- Tracking directory `` — where the coverage checklist and rule files are written. Default: `.opentaint/tracking` +- Tracking directory `` — where the coverage record and rule files live. Default: `.opentaint/tracking` +- Loose pieces `` — the running list of sources/sinks no package pass could pair yet. Default: `.opentaint/tracking/lib-pieces.yaml` ## Workflow Requires a built project model — without it you can miss entry points the analyzer actually sees -### 1. Seed the checklist +### 1. Find how the project uses the package -Seed `/coverage.yaml`'s `areas` list with one entry per area below, each `status: pending` and `rules: null` (null until you walk it; `[]` or names once done). These source-side and sink-side classes of taint flow are a minimum — add a project-specific area when a dependency exposes one they don't cover (comments for you, don't write them): +Search threw `` sources for ``'s imports and call sites. List the distinct methods of it the app calls — these, not the library's whole API, are the surface that matters -```yaml -- area: user-input # untrusted data entering: HTTP params/headers/body, RPC, payloads, CLI args, config - status: pending -- area: database # SQL/HQL/NoSQL query construction (SQLi) - status: pending -- area: filesystem # paths built for file read/write/delete (path traversal) - status: pending -- area: command-exec # process or shell execution (command injection) - status: pending -- area: outbound-request # HTTP/URL clients (SSRF) - status: pending -- area: deserialization # object/JSON/XML deserialization of untrusted bytes - status: pending -- area: templating # template or expression evaluation (SSTI, EL injection) - status: pending -- area: xml-parsing # XML/document parsing (XXE) - status: pending -- area: ldap # directory queries (LDAP injection) - status: pending -- area: response-output # untrusted data rendered into a response (XSS) - status: pending -- area: reflection # dynamic class/method loading (code injection) - status: pending -- area: redirect # untrusted URL driving a redirect (open redirect) - status: pending -- area: logging # untrusted data into log/format APIs (log injection) - status: pending -``` - -### 2. Walk every area - -Go through each `pending` area in turn — never skip one. For each, explore both the project and its dependencies: +### 2. Source-first: find the sources and the sinks they feed -- read model for the libraries that expose this area -- search the sources for the matching sources or sinks -- note what untrusted data enters and which dangerous call it can reach +Among the used methods, find the ones that return attacker-controlled data — e.g. HTTP/RPC request data, message-broker payloads, or second-order rows read back from storage — and the ones that are dangerous operations — e.g. query construction, command/file/path ops, deserialization, template/EL evaluation, LDAP/JNDI, reflection. You don't trace every usage by hand: the analyzer does taint propagation at scan time. Identify what the library introduces, and for a source which dangerous sink classes the app exposes that it could feed — at the category level, not call by call. The sink may live in a different library than the source -Then check coverage against the built-in rules (`opentaint dev rules-path`) and anything in `.opentaint/rules`, and decide: +Don't drop a library that only introduces sources — a request or HTTP-client library is the common case: its tainted data is consumed by sinks elsewhere you can't all see, so record the source as a loose piece in `` (step 3) for assemble-lib-rules to pair. Drop only a candidate that isn't genuinely untrusted -- built-ins already detect every real flow here, or the area is absent from this project → no rule; leave `rules: []` -- a real, untrusted flow has no covering rule → propose a rule (step 3) +Verify each source is genuinely attacker-controlled (a request param, header, body, or message payload is; an app constant or server config is not) and each sink genuinely dangerous with tainted input (string-built SQL is; a parameterized query is not) -Verify the flow is real before recording it: the source is genuinely attacker-controlled (a request param, header, body, or message payload is; an app constant or server config is not), and the sink is genuinely dangerous with tainted input (string-built SQL is; a parameterized query is not). A pair that fails this isn't a rule +### 3. Check coverage, record each gap -Update the area's entry in `coverage.yaml` the moment you finish it — set `status: done`, fill `rules` (`[]` or the proposed names), add a one-line `notes` of what you found — then move on. Write per area, not batched at the end, so the walk resumes cleanly and every area carries a record proving it was checked, not skipped +Check the sources, sinks, and their pairings against the built-in rules (`opentaint health --rules`) and anything in `.opentaint/rules`: -### 3. Record each proposed rule +- a built-in already covers the source→sink end to end → no rule +- a source you can pair to a dangerous sink for a real vuln class, not covered → a **join** rule requirement: write one `/rules/.yaml`, named `-` in kebab-case (e.g. `mybatis-sqli`, `webclient-ssrf`), unique and stable — the name is the tracking file and follows the rule downstream. create-rule writes any missing source/sink lib rule and wires the join, referencing a built-in where one fits +- a genuine untrusted source you can't pair to a sink here, or a dangerous sink with no source in reach → append it to ``, not a rule. assemble-lib-rules pairs the loose pieces across packages into joins once every package is drilled — a source whose sink sits in a part of the app this pass doesn't see is the common case -For each gap, add the rule name to its area's `rules:` list and write one `/rules/.yaml`. Name it `-` in kebab-case — the sink technology or framework plus the class, e.g. `mybatis-sqli`, `thymeleaf-ssti`, `resttemplate-ssrf`. It must be unique and stable: the name is the tracking file and follows the rule downstream +State only what a rule author needs: the vuln class, which end a built-in covers and which must be written, and where it lives. Name the framework and the class, not a full traced flow with line numbers — the test project built later reads the real code. List every library the flow crosses under `dependencies` -State only what a rule author needs: the vuln class, which built-in source/sink rules already apply, and which source or sink is missing and must be written. Name the framework and the class where the flow lives — not a full traced flow with line numbers. The test project built later reads the real code to reproduce it +Flip the package's `coverage.yaml` entry to `status: done` and add a one-line `notes` of what you found — write it the moment you finish so the walk resumes cleanly ## Output -- `/coverage.yaml` — every area `done`, each with proposed rules (or `[]`) -- One `/rules/.yaml` per proposed rule, with `stages.description: done`, a short `requirements`, and `dependencies` (exact Maven GAV from the build files) the test project needs -- A brief summary to the caller: the areas covered, then one line per proposed rule (name, vuln class, source→sink). The tracking files hold the detail — don't paste it back +- One `/rules/.yaml` per paired flow, with `stages.description: done`, a short `requirements`, and `dependencies` (exact Maven GAVs from the build files — every library the flow crosses) +- Any unpaired source or sink appended to `` with `disposition: pending` +- The package's `coverage.yaml` entry set `status: done` with a one-line `notes` +- A brief summary to the caller: one line per proposed rule (name, source→sink) and a count of loose pieces left for assembly. The tracking files hold the detail — don't paste it back ## Tracking -`/coverage.yaml` — one entry per area, filled as you walk: +`/coverage.yaml` — flip this package's entry when done: ```yaml -- area: database - status: done # pending | done - rules: [mybatis-sqli] # proposed rule names; [] when built-ins cover it or the area is absent - notes: > - MyBatis 3.5 mappers use ${} interpolation; built-in covers JDBC sinks but not MyBatis ${} -- area: filesystem - status: done - rules: [] - notes: only constant paths; no untrusted data reaches a file API -# ... + - package: org.springframework.web.reactive.function + status: done + notes: ServerRequest source not covered by built-ins; reaches WebClient (SSRF) — webclient-ssrf ``` `/rules/.yaml` — discovery-stage fields only: ```yaml -name: mybatis-sqli +name: webclient-ssrf rule_id: null # filled later finding: null # filled later requirements: > - CWE-89 SQLi via MyBatis ${} interpolation. - source: untrusted HTTP request param — built-in spring source covers it - sink: ${} string interpolation in a @SelectProvider / mapper XML — no built-in; needs a new sink rule - lives in: com.example.mapper.OrderMapper / OrderSqlProvider -dependencies: # exact GAV the test project needs, from the build files - - org.mybatis:mybatis:3.5.13 - - org.springframework:spring-webmvc:5.3.30 + CWE-918 SSRF via Spring WebClient. + source: user-supplied URL from request body — built-in spring source covers it + sink: WebClient.get().uri($UNTRUSTED) — no built-in; needs a new sink rule + lives in: run.halo.app.core.attachment.DefaultAttachmentService / ProxyFilter +dependencies: # every library the flow crosses, exact GAV from the build files + - org.springframework:spring-webflux:6.1.0 stages: description: done test_project: pending @@ -124,13 +87,29 @@ notes: > free-form ``` +`/lib-pieces.yaml` — append a source or sink you couldn't pair; assemble-lib-rules joins it: + +```yaml +sources: + - role: Apache HttpClient response body — data from a server the app calls + package: org.apache.hc.client5.http + dependency: org.apache.httpcomponents.client5:httpclient5:5.3 + disposition: pending # pending | once assembled | dropped: +sinks: + - role: SnakeYAML load — untrusted YAML deserialization + package: org.yaml.snakeyaml + dependency: org.yaml:snakeyaml:2.2 + disposition: pending +``` + ## Engine notes -- Spring projects: the analyzer auto-discovers Spring endpoints, so `user-input` is largely sources the built-ins already see — focus on which sinks those flows reach +- Spring projects: the analyzer auto-discovers Spring endpoints, so `network` inbound sources are largely ones the built-ins already see — focus on which sinks those flows reach - Generic projects: the analyzer treats all public/protected methods of public classes as entry points ## Gotchas - Propose a rule only for a real gap; if a built-in already covers the source→sink, don't duplicate it +- Don't drop a source-only library because you can't trace its sinks — append the source to `` for assemble-lib-rules to pair; drop only a candidate that isn't genuinely untrusted - Requirements name the missing source/sink and where it lives, not a full traced flow — keep them short; the test project reads the real code -- A passing test won't catch a semantically wrong source or sink — verify both are real here, because nothing downstream re-checks it +- Don't grep dependency jars to find usage — read the app's own sources in `` diff --git a/skills/generate-poc/SKILL.md b/skills/generate-poc/SKILL.md index 073c27315..f6a2802bd 100644 --- a/skills/generate-poc/SKILL.md +++ b/skills/generate-poc/SKILL.md @@ -26,6 +26,10 @@ From the caller; if omitted, fall back to the default. Ask only when a required Reuse `` if given. Otherwise build and start the app the way the project expects (`spring-boot:run`, `java -jar`, `docker compose`, …), wait until it's listening, and note the base URL. The PoC must hit a live instance +Bind to `127.0.0.1` (`--server.address=127.0.0.1`, `docker run -p 127.0.0.1:8080:8080`, a compose override on the port mapping) — never `0.0.0.0` or a public interface: a live exploit must not be reachable off-host. A specific non-local IP is fine when the test genuinely needs one, but never the public wildcard + +Once it's listening, record it in the registry (see § Tracking) so the orchestrator can reap it later + ### 2. Map the finding to a live request From the finding's source location find the entry point — the route and method, and the param / header / body field that carries the tainted input — and a payload that drives it to the sink. Common shapes: @@ -52,11 +56,20 @@ Run it. Confirmation needs observable proof — rows returned, file contents, co - The PoC script at `/.py` - The finding's `poc` set to `confirmed` or `failed`, `poc_script` recorded, evidence/reason in `notes` -- If you started the app, leave it running and report its `` so the next PoC can reuse it instead of starting another instance +- If you started the app, register it in `.opentaint/tracking/poc-servers.yaml` and leave it running so the next PoC can reuse it; report the ``. You do not stop it — the orchestrator tears down every registered instance at the end of the PoC phase - Report the outcome to the caller; if failed, call out that the finding is unconfirmed. Do not write `.opentaint/vulnerabilities.md` — main assembles that from the confirmed findings ## Tracking +If you started an instance, append it to `.opentaint/tracking/poc-servers.yaml` (PoCs run one at a time, so the append never races) — the orchestrator reads this to tear instances down (`kind` + `ref` give it the stop command): + +```yaml +servers: + - kind: process # process | container | compose + port: 8080 + ref: "12345" # pid | container id/name | compose file path +``` + In ``, set `poc` and `poc_script` and append the result to `notes`: ```yaml @@ -83,3 +96,5 @@ notes: > - Reproduce, don't theorize — a script you didn't run, or a 200 with no observable effect, is not a confirmation - failed ≠ false positive — couldn't-reproduce isn't proof the code is safe (auth, missing state, wrong payload). Record `failed` and DO NOT flip `verdict` here +- Don't bind a started instance to `0.0.0.0` or a public interface — a running exploit must stay off-host (localhost, or a specific IP the test needs) +- Don't stop instances you started or skip registering them — the orchestrator owns teardown and can only reap what's in `poc-servers.yaml` diff --git a/skills/report-analyzer-issue/SKILL.md b/skills/report-analyzer-issue/SKILL.md index c85a6f604..3b6ee68cf 100644 --- a/skills/report-analyzer-issue/SKILL.md +++ b/skills/report-analyzer-issue/SKILL.md @@ -35,7 +35,7 @@ File a report only for an engine issue debug-rule already confirmed. The diagnos Write `` — this file is the deliverable; never return the diagnosis as chat text only. Assemble from the inputs: -- Test project — `` path, the test command (`test-rules` / `test-approximations`), and the failing `test-result.json` snippet (e.g. a `@PositiveRuleSample` stuck at `falseNegative`) +- Test project — `` path, the test command (`test rule run` / `test approximation run`), and the failing `test-result.json` snippet (e.g. a `@PositiveRuleSample` stuck at `falseNegative`) - Rule / approximation — the ``: a rule's full id and ruleset, or the approximation's target method(s) - Observed vs expected — e.g. expected a finding at `Sink.java:42`; observed none - Where the dataflow dies — `file:line` and the instruction, quoted up to the last reachable fact diff --git a/skills/run-scan/SKILL.md b/skills/run-scan/SKILL.md index 83939a650..4aafe7073 100644 --- a/skills/run-scan/SKILL.md +++ b/skills/run-scan/SKILL.md @@ -19,8 +19,8 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Ruleset `` — Default: `builtin` plus `.opentaint/rules` if present - Rule IDs `` (optional) — full IDs to restrict the scan to, omit to run all loaded rules - SARIF output `` — Default: `.opentaint/results/report.sarif` -- PassThrough config `` (optional) — a passThrough YAML file or a directory of them. Default: `.opentaint/config` -- Dataflow approximations directory `` (optional) — Default: `.opentaint/approximations/src` +- PassThrough config `` (optional) — a passThrough YAML file or a directory of them. Default: `.opentaint/pass-through` +- Dataflow approximations directory `` (optional) — Default: `.opentaint/approximations` ## Workflow @@ -36,7 +36,7 @@ opentaint scan --project-model \ Append optional flags as needed: - `--rule-id ` — restrict to specific rules (repeatable); omit to run all loaded rules -- `--passthrough-approximations ` — apply passThrough configs from a YAML file or a directory of them (OVERRIDE: merged with built-ins at the rule level, a provided rule overrides a built-in only when it matches one in the built-in set; repeatable; replaces the old `--approximations-config`) +- `--passthrough-approximations ` — apply passThrough configs from a YAML file or a directory of them (OVERRIDE: merged with built-ins at the rule level, a provided rule overrides a built-in only when it matches one; repeatable) - `--dataflow-approximations ` — apply code-based approximations (Java sources, auto-compiled; or pre-compiled `.class` dirs, passed through as-is) ## Output @@ -47,16 +47,6 @@ Three files, all next to the SARIF report: 2. `dropped-external-methods.yaml` — methods where dataflow facts were killed (no approximation model) → candidates to approximate; possible source of false negatives 3. `approximated-external-methods.yaml` — methods already modeled -## Finding files - -`scripts/sarif-to-findings.py` turns a SARIF report into one finding tracking file per rule under `.opentaint/tracking/findings/`, bundling each rule's result hashes: - -```bash -python3 scripts/sarif-to-findings.py -o .opentaint/tracking/findings -``` - -Run it on the SARIF you intend to triage. It's idempotent — a re-run adds only result hashes not already present, resets a touched finding's `verdict` to `pending`, and preserves existing verdicts, notes, PoCs, and any triage splits. Grouping is by rule id only; analyze-findings splits a rule's bundle into distinct logical findings - ## Key Flags | Flag | Purpose | @@ -71,7 +61,5 @@ Run it on the SARIF you intend to triage. It's idempotent — a re-run adds only ## Gotchas -- `--rule-id` drops every rule whose full ID is not listed, including library rules referenced via join-mode `refs`. List every rule you need -- `--passthrough-approximations` merges with the built-in passThrough set at the rule level — a provided rule overrides a built-in only if it matches one in the built-in set, otherwise built-ins stay active - Paths fall back to the `.opentaint/` layout when the caller omits them; the caller can override any of them - Duplicate approximation targeting the same class as a built-in errors out diff --git a/skills/run-scan/scripts/sarif-to-findings.py b/skills/run-scan/scripts/sarif-to-findings.py deleted file mode 100644 index c73abf57e..000000000 --- a/skills/run-scan/scripts/sarif-to-findings.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python3 -""" -sarif-to-findings.py — turn an OpenTaint SARIF report into per-rule finding -tracking files under .opentaint/tracking/findings/. - -One file per rule_id, bundling that rule's result hashes into sarif_hashes. -Grouping is trivial (by rule_id) — no clustering. The triage skill -(analyze-findings) later splits a rule's bundle into distinct logical findings. - -Idempotent: re-running after a re-scan adds only result hashes not already -present in any of that rule's finding files, resets the touched file's verdict -to `pending`, and leaves existing verdict/notes/poc and triage splits intact. - -SARIF assumptions — adjust the two helpers below if the real OpenTaint SARIF -differs: -- result.ruleId holds the full rule id (e.g. java/security/sqli.yaml:sqli) -- a stable per-result hash comes from result.fingerprints / partialFingerprints - when present, else is computed from ruleId + locations + code-flow locations -- result.message.text seeds the analyzer report in `notes` -""" -import argparse -import glob -import hashlib -import json -import re -from pathlib import Path - -ADJ = ["brave", "calm", "eager", "fuzzy", "gentle", "jolly", "keen", "lucid", - "merry", "noble", "proud", "quiet", "rapid", "sly", "tidy", "vivid", - "witty", "zesty", "amber", "bold"] -NOUN = ["hopper", "eagle", "otter", "falcon", "maple", "comet", "harbor", - "willow", "pixel", "river", "ember", "cobra", "lotus", "raven", - "quartz", "badger", "cedar", "drake", "finch", "gull"] - - -def docker_name(seed, taken): - """Stable adjective-noun slug from the rule id; suffixed on collision.""" - h = int(hashlib.sha1(seed.encode()).hexdigest(), 16) - base = f"{ADJ[h % len(ADJ)]}-{NOUN[(h // len(ADJ)) % len(NOUN)]}" - name, n = base, 2 - while name in taken: - name, n = f"{base}-{n}", n + 1 - return name - - -# Prefer a stable, named fingerprint kind. vulnerabilitySourceSinkHash is more stable -# than vulnerabilityWithTraceHash — it keys on the source+sink and survives changes to -# the intermediate trace path. Fall back to any fingerprint value, then a content hash. -_FP_PREFERENCE = ("vulnerabilitySourceSinkHash", "vulnerabilityWithTraceHash") - - -def result_hash(res): - fp = res.get("fingerprints") or res.get("partialFingerprints") - if isinstance(fp, dict) and fp: - for pref in _FP_PREFERENCE: - for k, v in fp.items(): - if k.startswith(pref): - return str(v)[:16] - return str(sorted(fp.values())[0])[:16] - parts = [res.get("ruleId", "")] - locs = list(res.get("locations", [])) - for cf in res.get("codeFlows", []): - for tf in cf.get("threadFlows", []): - locs += [st.get("location", {}) for st in tf.get("locations", [])] - for loc in locs: - pl = loc.get("physicalLocation", {}) - parts.append(pl.get("artifactLocation", {}).get("uri", "")) - parts.append(json.dumps(pl.get("region", {}), sort_keys=True)) - return hashlib.sha1("|".join(parts).encode()).hexdigest()[:16] - - -def scan_results(sarif): - """rule_id -> {hash: message}""" - out = {} - for run in sarif.get("runs", []): - for res in run.get("results", []): - rid = res.get("ruleId") or "unknown" - msg = (res.get("message", {}) or {}).get("text", "").strip() - out.setdefault(rid, {})[result_hash(res)] = msg - return out - - -NAME_RE = re.compile(r'^finding_name:\s*(.+?)\s*$', re.M) -RULE_RE = re.compile(r'^rule_id:\s*(.+?)\s*$', re.M) -HASHES_RE = re.compile(r'^sarif_hashes:\s*\[(.*)\]\s*$', re.M) - - -def parse_existing(text): - name = NAME_RE.search(text) - rid = RULE_RE.search(text) - hm = HASHES_RE.search(text) - hashes = [h.strip() for h in hm.group(1).split(",") if h.strip()] if hm else [] - return (name.group(1) if name else None, - rid.group(1) if rid else None, - hashes) - - -def fmt_list(hashes): - return "[" + ", ".join(hashes) + "]" - - -def new_file_text(name, rid, hashes, notes): - body = "\n".join(" " + ln for ln in (notes or "(no analyzer message)").splitlines()) - return (f"finding_name: {name}\n" - f"sarif_hashes: {fmt_list(hashes)}\n" - f"rule_id: {rid}\n" - f"verdict: pending\n" - f"notes: >\n{body}\n" - f"poc: pending\n" - f"poc_script: null\n") - - -def main(): - ap = argparse.ArgumentParser( - description="SARIF -> per-rule finding tracking files (idempotent)") - ap.add_argument("sarif", help="path to report.sarif") - ap.add_argument("-o", "--out", default=".opentaint/tracking/findings", - help="findings dir (default: .opentaint/tracking/findings)") - args = ap.parse_args() - - by_rule = scan_results(json.loads(Path(args.sarif).read_text())) - - out = Path(args.out) - out.mkdir(parents=True, exist_ok=True) - - existing = {} # rule_id -> [(path, hashes)] - taken = set() - for p in sorted(glob.glob(str(out / "*.yaml"))): - name, rid, hashes = parse_existing(Path(p).read_text()) - if name: - taken.add(name) - if rid: - existing.setdefault(rid, []).append((Path(p), hashes)) - - created = updated = unchanged = 0 - for rid, hashmap in sorted(by_rule.items()): - scanned = set(hashmap) - files = existing.get(rid) - if not files: - name = docker_name(rid, taken) - taken.add(name) - notes = "\n".join(sorted({m for m in hashmap.values() if m})) - (out / f"{name}.yaml").write_text( - new_file_text(name, rid, sorted(scanned), notes)) - created += 1 - continue - already = set().union(*(set(h) for _, h in files)) - new = sorted(scanned - already) - if not new: - unchanged += 1 - continue - # add new hashes to the first finding file for this rule; reset verdict - path, hashes = files[0] - merged = sorted(set(hashes) | set(new)) - text = path.read_text() - text = HASHES_RE.sub(lambda m: "sarif_hashes: " + fmt_list(merged), text, count=1) - text = re.sub(r'^verdict:\s*.+$', "verdict: pending", text, count=1, flags=re.M) - path.write_text(text) - updated += 1 - - print(f"findings: {created} created, {updated} updated, {unchanged} unchanged " - f"({len(by_rule)} rules in scan)") - - -if __name__ == "__main__": - main() diff --git a/skills/triage-dependencies/SKILL.md b/skills/triage-dependencies/SKILL.md new file mode 100644 index 000000000..4a2cd5a95 --- /dev/null +++ b/skills/triage-dependencies/SKILL.md @@ -0,0 +1,70 @@ +--- +name: triage-dependencies +description: Mark which of a project's dependency libraries could introduce taint sources or sinks. Use to start attack-surface discovery +license: Apache-2.0 +metadata: + author: opentaint + version: "0.2" +--- + +# Skill: Triage Dependencies + +Read the project's dependency libraries and mark which ones touch a trust boundary — a place untrusted data can enter (source) or a dangerous operation it can reach (sink) — so depth analysis runs only on the libraries that can matter + +## Inputs + +From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default + +- Project root `` — the project sources and build files. Default: current directory +- Project model `` — the built model; its `project.yaml` lists every dependency. Default: `.opentaint/project` +- Tracking directory `` — where the coverage record is written. Default: `.opentaint/tracking` + +## Workflow + +### 1. List the dependencies + +Read `/project.yaml` — its `dependencies:` is every jar on the classpath. Resolve each to the library it is. Most of a large project's jars are transitive infrastructure + +### 2. Mark each library + +For each library decide: could it introduce an attacker-controlled source (e.g. HTTP/RPC request data, message-broker payloads, second-order rows read back and so on) or a dangerous sink (e.g. query construction, command/file/path ops, deserialization, template/EL, LDAP/JNDI, reflection and so on)? + +- clearly irrelevant — build/Gradle plugins, logging, annotations, bytecode tooling (ASM, byte-buddy), test libraries, pure data structures: dismiss +- clearly relevant — web frameworks, query/ORM libraries, HTTP clients, deserializers, template engines, LDAP/JNDI, scripting: flag +- unsure — do a brief peek: grep `` sources for the library's package imports or call sites. If the app never references it and nothing transitive exposes it to untrusted data, dismiss; otherwise flag + +A library the app references only for safe, constant, or framework-internal use is not a flag — flag where untrusted data plausibly enters or a dangerous call is plausibly reachable + +### 3. Record coverage + +Write `/coverage.yaml` (schema below). One `pending` entry per flagged library — these are the depth work-list. Record dismissals as a single bulk entry summarising the categories ruled out, not one row per jar; add an individual `done` row only for a library a reader might expect to be flagged but isn't, with a one-line reason + +## Output + +- `/coverage.yaml` — flagged libraries `status: pending`, dismissals summarised +- A brief summary to the caller: one line per flagged library (package, why) and the dismissed count. The file holds the detail — don't paste it back + +## Tracking + +`/coverage.yaml` — one entry per weighed library: + +```yaml +packages: + - package: org.springframework.web.reactive.function # flagged → depth work-list + status: pending # pending | done + notes: WebFlux functional routing — ServerRequest request data (source); WebClient (SSRF sink) + - package: org.springframework.data.r2dbc + status: pending + notes: reactive DB access — check for string-built query sinks + - package: + status: done # bulk dismissal + notes: > + logging (logback/slf4j), build plugins, annotations, ASM/byte-buddy, test libs, + data structures — no source/sink surface +``` + +## Gotchas + +- Don't grep dependency jars to decide — judge from the library's identity and the app's own usage in `` sources +- Flag on plausibility, not certainty — depth analysis confirms or drops it; a missed library is a missed vulnerability on all other stages, an over-flag only costs one depth pass + From 4515a84c43a359bb5f79d27e74f557be3eeac42f Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Tue, 2 Jun 2026 13:21:28 +0300 Subject: [PATCH 09/54] feat: modify rule creation process --- skills/appsec-agent/SKILL.md | 39 +++++--- .../appsec-agent/references/discover-rules.md | 18 ++-- skills/assemble-lib-rules/SKILL.md | 70 ++++++++------- skills/create-rule/SKILL.md | 21 +++-- skills/create-test-project/SKILL.md | 4 +- skills/create-test-project/references/rule.md | 4 +- skills/discover-attack-surface/SKILL.md | 90 +++++++------------ 7 files changed, 124 insertions(+), 122 deletions(-) diff --git a/skills/appsec-agent/SKILL.md b/skills/appsec-agent/SKILL.md index 58c74ffa3..cc9975423 100644 --- a/skills/appsec-agent/SKILL.md +++ b/skills/appsec-agent/SKILL.md @@ -100,7 +100,7 @@ You are the only writer of `.opentaint/tracking/state.yaml` — it records the c On start, and after any compaction, reconstruct position from artifacts before doing anything — never replay a completed phase: - read `state.yaml` and the `tracking/` tree -- skip any phase whose artifact exists: `project.yaml` → build; `coverage.yaml` with every entry `done` and `lib-pieces.yaml` with every entry resolved → discover; `report.sarif` → scan; a rule's `artifact` + `tests_passing: done` → that rule; an approximation unit's `artifact` (plus `tests_passing` for dataflow) → that unit; a finding with `verdict` set → triaged; with `poc` set → PoC'd +- skip any phase whose artifact exists: `project.yaml` → build; `coverage.yaml` with every entry `done` plus the `tracking/rules` join requirements → discover; `report.sarif` → scan; a rule's `artifact` + `tests_passing: done` → that rule; an approximation unit's `artifact` (plus `tests_passing` for dataflow) → that unit; a finding with `verdict` set → triaged; with `poc` set → PoC'd - detect new work from artifacts, not memory: finding files with `verdict: pending` (a fresh or reset scan) → triage; methods in `dropped-external-methods.yaml` not yet in any approximation unit → approximations ## Tracking layout @@ -111,9 +111,9 @@ The single source of truth for the tracking schema; each skill writes only its o .opentaint/tracking/ state.yaml # you only — levels + phase status coverage.yaml # triage-dependencies seeds, discover-attack-surface fills — one entry per dependency package weighed (deep) - lib-pieces.yaml # discover-attack-surface parks unpaired sources/sinks; assemble-lib-rules resolves them (deep) + surface.yaml # discover-attack-surface — the sources/sinks each package introduces (deep) findings/.yaml # one per logical finding (from the SARIF→finding script; split by triage) - rules/.yaml # one per rule (join requirement — from discover-attack-surface or assemble-lib-rules) + rules/.yaml # one per vuln-class join (requirement by assemble-lib-rules; written + tested next phase) approximations/-passthrough.yaml # simple from→to copies; write-only, scan-verified approximations/-dataflow.yaml # lambda/callback/async; tested on a test project approximations/skipped.yaml # methods the engine asks for but that carry no taint @@ -148,14 +148,20 @@ packages: free-form — what was found and why ``` -lib-pieces.yaml — discover-attack-surface appends a source or sink it couldn't pair; assemble-lib-rules pairs each into a join and resolves its `disposition` (deep): +surface.yaml — discover-attack-surface appends the sources/sinks each package introduces; assemble-lib-rules groups them into join requirements (deep). `builtin: null` ⇒ a new pattern to write next phase; sources are general, sinks carry a `vuln_class`: ```yaml -sources: # likewise a `sinks:` list - - role: Apache HttpClient response body — server-controlled data - package: org.apache.hc.client5.http - dependency: org.apache.httpcomponents.client5:httpclient5:5.3 - disposition: pending # pending | | dropped: +sources: + - package: org.springframework.web.reactive.function.server + idea: ServerRequest body/params — untrusted request data + builtin: null + dependency: org.springframework:spring-webflux:6.1.0 +sinks: + - package: org.springframework.web.reactive.function.client + vuln_class: ssrf + idea: WebClient.get().uri($UNTRUSTED) + builtin: null + dependency: org.springframework:spring-webflux:6.1.0 ``` findings/.yaml — created by the SARIF→finding script; `verdict`/`notes` by analyze-findings; `poc`/`poc_script` by generate-poc: @@ -171,16 +177,21 @@ poc: pending # pending | confirmed | failed poc_script: null # path under .opentaint/pocs/ once generate-poc writes one ``` -rules/.yaml — created by discover-attack-surface or assemble-lib-rules (`description`); `test_project` by create-test-project; `tests_passing` + `rule_id` + `artifact` by create-rule: +rules/.yaml — one per vuln-class join (`` = the class); `description` + `sources`/`sinks` by assemble-lib-rules; `test_project` by create-test-project; `tests_passing` + `rule_id` + `artifact` by create-rule: ```yaml -name: mybatis-sqli +name: ssrf # the vuln class; becomes the join rule's file and id rule_id: null # filled on creation artifact: null # added once the rule file exists finding: null # finding_name; non-null only for suppress-FP -requirements: > # short — what built-ins miss, not a full traced flow - CWE-89 SQLi via MyBatis ${} ; source: HTTP param (built-in spring source) ; sink: ${} in SelectProvider — no built-in, write one ; lives in OrderMapper -dependencies: [org.mybatis:mybatis:3.5.13] +requirements: > # short — the class and what the join wires + CWE-918 SSRF — join every untrusted-data source to the SSRF sink group +sources: # ref a built-in, or a new one to write + - ref: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + - new: ServerRequest body/params — org.springframework.web.reactive.function.server +sinks: + - new: WebClient.get().uri($UNTRUSTED) — org.springframework.web.reactive.function.client; in DefaultAttachmentService +dependencies: [org.springframework:spring-webflux:6.1.0] stages: # pending | in_progress | done description: done test_project: pending diff --git a/skills/appsec-agent/references/discover-rules.md b/skills/appsec-agent/references/discover-rules.md index 5d226f0fc..934c0927a 100644 --- a/skills/appsec-agent/references/discover-rules.md +++ b/skills/appsec-agent/references/discover-rules.md @@ -6,19 +6,23 @@ Delegate triage-dependencies. Inputs: ``, model-dir `.opentaint/pr ## Discover attack surface -Fan out discover-attack-surface in parallel, one agent per `pending` package in `coverage.yaml`. Inputs each: ``, ``, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`, lib-pieces `.opentaint/tracking/lib-pieces.yaml`. Each works source-first — finds the package's attacker-controlled sources the app uses, pairs each to a dangerous sink as one `tracking/rules/.yaml` join requirement (`description` stage + short requirements + every GAV the flow crosses), and parks any source or sink it couldn't pair in `lib-pieces.yaml`, then flips its `coverage.yaml` entry to `done`. Returns one line per proposed rule plus a loose-piece count. +Fan out discover-attack-surface in parallel, one agent per `pending` package in `coverage.yaml`. Inputs each: ``, ``, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`, surface `.opentaint/tracking/surface.yaml`. This phase only describes ideas — each agent catalogues the package's untrusted-data sources and dangerous sinks into `surface.yaml` (new patterns vs a built-in ref; sinks tagged by vuln class), writes no rule and runs no test, then flips its `coverage.yaml` entry to `done`. Returns the sources/sinks found. ## Assemble lib rules -Once the discover fan-out is done and `lib-pieces.yaml` has `pending` entries, delegate assemble-lib-rules. Inputs: lib-pieces `.opentaint/tracking/lib-pieces.yaml`, ``, tracking-dir `.opentaint/tracking`. With every package's loose pieces in one view it pairs each into a join `tracking/rules/.yaml` (source whose sink lives in another package, sink reached by a built-in source), and resolves every piece's `disposition` to a join name or `dropped: `. One agent — it needs the global view to dedup; fan out by vuln class only if the piece set is large. +Once the discover fan-out is done, delegate assemble-lib-rules. Inputs: surface `.opentaint/tracking/surface.yaml`, tracking-dir `.opentaint/tracking`. With the whole surface in one view it groups sinks by vuln class and writes one `tracking/rules/.yaml` join requirement per class (every source + that class's sink group, new combinations only) — still description, not rule files. One agent for the global view; fan out by vuln class only if the surface is large. -Then a quick area cross-check: across network, persistence, environment, serialization, rendering, naming, execution, messaging — is every boundary a dependency exposes either covered by built-ins or now carrying a rule? If a boundary has a relevant dependency but produced no rule and no clear reason, dispatch a depth pass for it. Set `phases.discover: done` once every `coverage.yaml` entry is `done` and every `lib-pieces.yaml` entry is resolved. +Then a quick area cross-check: across network, persistence, environment, serialization, rendering, naming, execution, messaging — is every boundary a dependency exposes either covered by built-ins or now carrying a join requirement? If a boundary has a relevant dependency but produced no requirement and no clear reason, dispatch a depth pass for it. Set `phases.discover: done` once every `coverage.yaml` entry is `done` and the join requirements are written. ## Rules -Fan out across rule units in parallel. Each unit is a two-step pipeline — dispatch the steps one at a time, waiting for the prior step's artifact before the next: +Write and test the rules from the join requirements in two passes — sources first (one agent, since a new source is shared across every class's join), then sinks + joins in parallel. -1. create-test-project — Inputs: spec = the rule's `requirements`, ``, `` `.opentaint/tracking/rules/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the tracking file. Sets `test_project: done` -2. create-rule — Inputs: requirements (the tracking file), test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules`, ``, and on a re-dispatch the approximation dirs `.opentaint/pass-through` / `.opentaint/approximations`. Iterates `opentaint test rule run` until every sample passes; sets `tests_passing: done`, `rule_id`, `artifact` +**Pass 1 — source lib rules (one agent).** Dispatch one create-rule to author every new source lib rule the requirements name (the `new:` sources across `tracking/rules/*.yaml`). Inputs: requirements = the `new:` sources, rules-dir `.opentaint/rules`. A source lib rule isn't testable alone — it's verified by the joins that ref it in pass 2. Wait for it to finish before pass 2, so the joins can ref the source rules it wrote. -If create-rule reports the test project drops a library method on the rule's flow, the rule can't be verified until that method is modeled — route the dropped methods through the approximation loop (references/approximations.md; they're real library methods the main scan needs too), then re-dispatch create-rule with the approximation dirs. If it reports non-convergence with nothing dropped, load references/escalation.md. A rule's `tests_passing` stays `pending` until its samples pass; set `phases.rules: done` once every rule's is done. +**Pass 2 — sinks + joins (parallel, per vuln class).** Fan out across the `tracking/rules/.yaml` join requirements. Each unit is a two-step pipeline — dispatch the steps one at a time, waiting for the prior step's artifact: + +1. create-test-project — Inputs: spec = the requirement's `sources`/`sinks`, ``, `` `.opentaint/tracking/rules/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the requirement. Writes synthetic samples — each new sink fed by a known built-in source, each source (incl. the pass-1 ones) into the class's sink. Sets `test_project: done` +2. create-rule — Inputs: requirements (the tracking file), test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules`, ``, and on a re-dispatch the approximation dirs `.opentaint/pass-through` / `.opentaint/approximations`. Writes the class's new sink lib rules + the join (ref every source), iterates `opentaint test rule run` until every sample passes; sets `tests_passing: done`, `rule_id`, `artifact` + +If create-rule reports the test project drops a library method on the rule's flow, the rule can't be verified until that method is modeled — route the dropped methods through the approximation loop (references/approximations.md; they're real library methods the main scan needs too), then re-dispatch create-rule with the approximation dirs. If it reports non-convergence with nothing dropped, load references/escalation.md. A join's `tests_passing` stays `pending` until its samples pass; set `phases.rules: done` once every join's is done. diff --git a/skills/assemble-lib-rules/SKILL.md b/skills/assemble-lib-rules/SKILL.md index 28e5608e6..6be8ede61 100644 --- a/skills/assemble-lib-rules/SKILL.md +++ b/skills/assemble-lib-rules/SKILL.md @@ -1,6 +1,6 @@ --- name: assemble-lib-rules -description: Pair the unpaired sources and sinks left by discovery into join rules. Use for assembling lib rules +description: Group the discovered sources and sinks into per-vuln-class join rule requirements. Use after the discover-attack-surface fan-out, to describe the rules the next phase will write license: Apache-2.0 metadata: author: opentaint @@ -9,60 +9,70 @@ metadata: # Skill: Assemble Lib Rules -The per-package discovery passes each see only their own library, so a source whose sink sits elsewhere is parked unpaired. With the loose pieces from every package in front of you, pair each into a join — the place a source and a sink finally become a detectable vulnerability +The per-package passes catalogue sources and sinks but never pair them. With the whole surface inventory in front of you, describe the join rules the next phase will build — one per vuln class, each wiring every source to that class's sinks, mirroring the built-in security rules ## Inputs From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default -- Loose pieces `` — the unpaired sources/sinks from discovery. Default: `.opentaint/tracking/lib-pieces.yaml` -- Project root `` — the project sources, to confirm a source can actually reach a sink. Default: current directory -- Tracking directory `` — where rule requirements are written. Default: `.opentaint/tracking` +- Surface inventory `` — the discovered sources/sinks. Default: `.opentaint/tracking/surface.yaml` +- Tracking directory `` — where the join requirements are written. Default: `.opentaint/tracking` Built-in rules are available at `opentaint health --rules` ## Workflow -### 1. Read the pieces and what's already covered +### 1. Read the surface and the built-ins -Read ``, the built-in rules (`opentaint health --rules`), `.opentaint/rules`, and the join requirements already in `/rules`. A piece a built-in or an existing join already covers needs no new rule +Read `` and the built-in rules (`opentaint health --rules`). Note which built-in source/sink lib rules already exist, to ref -### 2. Pair source-first +### 2. Group sinks by vuln class -For each `pending` source, find the dangerous sinks it can reach — among the loose sinks, the built-in sink rules, and the app's own dangerous operations. Reach is a code-level question (does the source's data flow toward that sink anywhere in ``), not a taint trace — the scan does the tracing; you decide the pairing is plausible. Then per real pairing write one join requirement `/rules/.yaml`, named `-` in kebab-case, naming the source end and sink end (which a built-in covers, which must be written) and every library the flow crosses under `dependencies`. Set that source piece's `disposition` to the join name +The sinks in `` carry a `vuln_class`; group them. A class needs a join requirement if it has a **new** sink, or if there's any **new** source (a new source must be wired to every class's sink group). Skip a class only when it has no new sink and there's no new source — the built-in join already covers it -### 3. Mop up sinks, then resolve every piece +### 3. Describe one join requirement per class -A `pending` sink a source you just placed feeds is already in a join; a loose sink reached only by a built-in source gets its own join. Then resolve what's left: a source with no dangerous sink anywhere, a sink with no source in reach, or a piece a built-in already covers → set `disposition: dropped: `. Leave no `pending` entry — an unresolved piece is an un-modeled source or sink +For each class, write one `/rules/.yaml` (`` = the vuln class), naming: + +- every source (built-in refs + the new ones from ``) — a join aggregates them all, like the built-ins +- that class's sink group (built-in refs + new) +- every library the rule crosses under `dependencies` + +A join wires only combinations with a **new** end (new source → any sink, any source → new sink); a built-in source → built-in sink pair is already covered by the built-in join, so leaving it out keeps the join from double-reporting ## Output -- One `/rules/.yaml` per join assembled, schema as discover-attack-surface writes it (`stages.description: done`, short `requirements`, `dependencies`) -- Every `` entry resolved — `disposition` is a join name or `dropped: ` -- A brief summary to the caller: one line per join (name, source→sink) and the paired/dropped counts. The tracking files hold the detail — don't paste it back +- One `/rules/.yaml` per vuln-class join, with `stages.description: done`, its `sources`/`sinks`, and `dependencies` +- A brief summary to the caller: one line per join (class, source/sink count, which ends are new). The files hold the detail — don't paste it back ## Tracking -`` — resolve each entry's `disposition`: +`/rules/.yaml` — the join requirement the next phase builds and tests: ```yaml -sources: - - role: Apache HttpClient response body — data from a server the app calls - package: org.apache.hc.client5.http - dependency: org.apache.httpcomponents.client5:httpclient5:5.3 - disposition: httpclient-ssrf # the join it went into +name: ssrf # = the vuln class; becomes the join rule's file and id +rule_id: null # filled later +artifact: null +finding: null +requirements: > + CWE-918 SSRF — join every untrusted-data source to the SSRF sink group +sources: # ref a built-in, or a new one to write + - ref: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + - new: ServerRequest body/params — org.springframework.web.reactive.function.server sinks: - - role: SnakeYAML load — untrusted YAML deserialization - package: org.yaml.snakeyaml - dependency: org.yaml:snakeyaml:2.2 - disposition: "dropped: no untrusted source reaches a SnakeYAML load in this app" + - new: WebClient.get().uri($UNTRUSTED) — org.springframework.web.reactive.function.client; in DefaultAttachmentService / ProxyFilter +dependencies: + - org.springframework:spring-webflux:6.1.0 +stages: + description: done + test_project: pending + tests_passing: pending +notes: > + free-form ``` -The join requirements themselves use the `rules/.yaml` schema discover-attack-surface writes - ## Gotchas -- Write requirements, not rule files — create-rule authors the lib source/sink YAMLs and the join from the requirement downstream -- Pair only a flow that exists in the app — a join whose source can't reach its sink wastes a test project and never converges; confirm reachability in `` -- Reference a built-in source or sink where one fits rather than requiring a new one -- Resolve every piece — drop with a reason, never silently leave one `pending` +- Describe, don't write — emit requirements only; rules are written and tested in the next phase +- One join per vuln class, aggregating every source — don't write a separate join per source or per package +- Ref a built-in source or sink rather than re-declaring it diff --git a/skills/create-rule/SKILL.md b/skills/create-rule/SKILL.md index fb96c5200..03f059c97 100644 --- a/skills/create-rule/SKILL.md +++ b/skills/create-rule/SKILL.md @@ -11,6 +11,8 @@ metadata: Create a pattern rule for a vulnerability class, then test it against the prepared test project and fix it until every sample passes +Two roles: a **source pass** writes the source lib rules the requirements name (a source isn't testable alone — the joins that ref it verify it later, so author and stop); a **sink pass**, per vuln class, writes that class's sink lib rules and the one join that refs every source, then tests + ## Inputs From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default @@ -86,14 +88,14 @@ rules: ### 3. Create the security rule (join mode) -Write it at `/java/security/.yaml` — name the file and `id` after the rule name from the tracking file. Wire the sources and sinks (built-in or custom) via `refs`: +One join per vuln class — write it at `/java/security/.yaml`, naming the file and `id` after the class (the requirement's `name`). Ref every source (built-in + the new per-package ones) and the class's sink group, and wire each source to the sink in `on:`, like the built-in `java/security/ssrf.yaml`: ```yaml rules: - - id: my-vulnerability + - id: sql-injection severity: ERROR message: >- - Untrusted data flows to dangerous operation + Untrusted data flows to a dangerous operation metadata: cwe: CWE-89 short-description: SQL Injection via untrusted input @@ -101,14 +103,19 @@ rules: mode: join join: refs: - - rule: java/lib/generic/my-source.yaml#my-custom-source - as: source - - rule: java/lib/generic/my-sink.yaml#my-custom-sink + - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + as: servlet-source + - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source + as: spring-source + - rule: java/lib//my-new-sink.yaml#my-new-sink as: sink on: - - 'source.$UNTRUSTED -> sink.$UNTRUSTED' + - 'servlet-source.$UNTRUSTED -> sink.$UNTRUSTED' + - 'spring-source.$UNTRUSTED -> sink.$UNTRUSTED' ``` +Wire only combinations with a new end — a built-in source → built-in sink pair is already covered by the built-in join, so repeating it here double-reports + ### 4. Test until success Run the rule tests against the compiled test project; iterate the rule and re-run `test rule run` until every sample passes: diff --git a/skills/create-test-project/SKILL.md b/skills/create-test-project/SKILL.md index 0e6a41be5..8ef03cf1b 100644 --- a/skills/create-test-project/SKILL.md +++ b/skills/create-test-project/SKILL.md @@ -44,9 +44,9 @@ opentaint test approximation init \ --dependency "io.projectreactor:reactor-core:3.8.5" ``` -### 2. Read the real flow, then write samples +### 2. Read the real signatures, then write samples -The requirements only name the source/sink and its framework. Before writing, find that source and sink in `` and read the actual method signatures, annotations, and how the tainted value is built. The samples must mirror that code, not a guess — a sample built on the wrong signature compiles but verifies nothing +The requirements name sources and sinks. For each new source and new sink, find it in `` and read its real method signature and annotations — the pattern matches on those, so a sample built on the wrong signature compiles but verifies nothing. The flow itself is minimal, not the app's real path: to exercise a new sink, pass a known (built-in) source's value straight into it; to exercise a new source, pass its value straight into a known (built-in) sink Write Java samples under `/src/main/java/test/`, each annotated with its expected verdict — `@PositiveRuleSample` (must flag) or `@NegativeRuleSample` (must not). `value` is the rule path relative to the ruleset root (with `.yaml`), `id` the short id from the YAML — not the full `--rule-id` used by `opentaint scan`. One expected verdict per sample. Split the samples across files however groups most logically — don't cram unrelated ones into a single class diff --git a/skills/create-test-project/references/rule.md b/skills/create-test-project/references/rule.md index d44953266..3b824fb56 100644 --- a/skills/create-test-project/references/rule.md +++ b/skills/create-test-project/references/rule.md @@ -2,8 +2,8 @@ ## Samples -- `@PositiveRuleSample` — reproduce the vulnerability from the requirements: tainted input from the real source flowing through the real hops into the dangerous sink, mirroring the actual signatures and annotations -- `@NegativeRuleSample` — a flow the rule must not flag: the safe (sanitized or parameterized) version of the same operation, or a confirmed false positive you're narrowing the rule against. Keep it realistic, not stripped to constants +- `@PositiveRuleSample` — a minimal flow that must flag: a known (built-in) source's value passed straight into the new sink, or the new source's value into a known (built-in) sink — real signatures, no extra hops. One per new source and per new sink; `value`/`id` point at the join rule +- `@NegativeRuleSample` — the safe (sanitized or parameterized) variant of the same, which must not flag; or a confirmed false positive you're narrowing the rule against. Keep it realistic, not stripped to constants ```java package test; diff --git a/skills/discover-attack-surface/SKILL.md b/skills/discover-attack-surface/SKILL.md index 9636643bf..9a8496d86 100644 --- a/skills/discover-attack-surface/SKILL.md +++ b/skills/discover-attack-surface/SKILL.md @@ -1,6 +1,6 @@ --- name: discover-attack-surface -description: Analyze a dependency package for potential sources and sinks not covered by the built-in rules. Use for the depth pass of attack-surface discovery +description: Analyze a dependency package for potential sources and sinks not covered by the built-in rules. Use for the depth pass of attack-surface discovery, one package at a time, after triage-dependencies flags it license: Apache-2.0 metadata: author: opentaint @@ -9,7 +9,7 @@ metadata: # Skill: Discover Attack Surface -Take one library the triage flagged, find how the project actually uses it, and follow untrusted data from the sources it introduces to the dangerous sinks they reach — recording every flow(s) the built-in rules miss as one rule requirement +Take one library the triage flagged and record the untrusted-data sources and dangerous sinks it introduces ## Inputs @@ -18,8 +18,8 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Package `` — the flagged library to drill (a `pending` entry in `coverage.yaml`) - Project root `` — the project sources. Default: current directory - Project model `` — the built model. Default: `.opentaint/project` -- Tracking directory `` — where the coverage record and rule files live. Default: `.opentaint/tracking` -- Loose pieces `` — the running list of sources/sinks no package pass could pair yet. Default: `.opentaint/tracking/lib-pieces.yaml` +- Tracking directory `` — where the coverage record and surface inventory live. Default: `.opentaint/tracking` +- Surface inventory `` — the running list of discovered sources/sinks. Default: `.opentaint/tracking/surface.yaml` ## Workflow @@ -27,89 +27,59 @@ Requires a built project model — without it you can miss entry points the anal ### 1. Find how the project uses the package -Search threw `` sources for ``'s imports and call sites. List the distinct methods of it the app calls — these, not the library's whole API, are the surface that matters +Search through `` sources for ``'s imports and call sites. List the distinct methods of it the app calls — these, not the library's whole API, are the surface that matters -### 2. Source-first: find the sources and the sinks they feed +### 2. Identify sources and sinks -Among the used methods, find the ones that return attacker-controlled data — e.g. HTTP/RPC request data, message-broker payloads, or second-order rows read back from storage — and the ones that are dangerous operations — e.g. query construction, command/file/path ops, deserialization, template/EL evaluation, LDAP/JNDI, reflection. You don't trace every usage by hand: the analyzer does taint propagation at scan time. Identify what the library introduces, and for a source which dangerous sink classes the app exposes that it could feed — at the category level, not call by call. The sink may live in a different library than the source +Among the used methods, pick out the **sources** — methods returning attacker-controlled data (HTTP/RPC request data, message-broker payloads, second-order rows read back) — and the **sinks** — dangerous operations (query construction, command/file/path ops, deserialization, template/EL evaluation, LDAP/JNDI, reflection). Catalogue each end on its own; don't trace a flow between them — the analyzer pairs them at scan time -Don't drop a library that only introduces sources — a request or HTTP-client library is the common case: its tainted data is consumed by sinks elsewhere you can't all see, so record the source as a loose piece in `` (step 3) for assemble-lib-rules to pair. Drop only a candidate that isn't genuinely untrusted +For each, check whether a built-in rule already matches it (`opentaint health --rules` + `.opentaint/rules`); a built-in match records its ref instead of a new idea. Tag each sink with its vuln class (`ssrf`, `sqli`, `path-traversal`, …); sources aren't class-tagged -Verify each source is genuinely attacker-controlled (a request param, header, body, or message payload is; an app constant or server config is not) and each sink genuinely dangerous with tainted input (string-built SQL is; a parameterized query is not) +Verify each is real before recording it: a source is genuinely attacker-controlled (a request param, header, body, or message payload is; an app constant or server config is not), a sink genuinely dangerous with tainted input (string-built SQL is; a parameterized query is not) -### 3. Check coverage, record each gap +### 3. Record into the surface inventory -Check the sources, sinks, and their pairings against the built-in rules (`opentaint health --rules`) and anything in `.opentaint/rules`: - -- a built-in already covers the source→sink end to end → no rule -- a source you can pair to a dangerous sink for a real vuln class, not covered → a **join** rule requirement: write one `/rules/.yaml`, named `-` in kebab-case (e.g. `mybatis-sqli`, `webclient-ssrf`), unique and stable — the name is the tracking file and follows the rule downstream. create-rule writes any missing source/sink lib rule and wires the join, referencing a built-in where one fits -- a genuine untrusted source you can't pair to a sink here, or a dangerous sink with no source in reach → append it to ``, not a rule. assemble-lib-rules pairs the loose pieces across packages into joins once every package is drilled — a source whose sink sits in a part of the app this pass doesn't see is the common case - -State only what a rule author needs: the vuln class, which end a built-in covers and which must be written, and where it lives. Name the framework and the class, not a full traced flow with line numbers — the test project built later reads the real code. List every library the flow crosses under `dependencies` - -Flip the package's `coverage.yaml` entry to `status: done` and add a one-line `notes` of what you found — write it the moment you finish so the walk resumes cleanly +Append each source and sink to `` (schema below) — for a new one, a short idea of the pattern and where it lives; for a covered one, the built-in ref. Then flip the package's `coverage.yaml` entry to `status: done` with a one-line `notes`. Write it the moment you finish so the walk resumes cleanly ## Output -- One `/rules/.yaml` per paired flow, with `stages.description: done`, a short `requirements`, and `dependencies` (exact Maven GAVs from the build files — every library the flow crosses) -- Any unpaired source or sink appended to `` with `disposition: pending` +- Sources and sinks the package introduces appended to `` - The package's `coverage.yaml` entry set `status: done` with a one-line `notes` -- A brief summary to the caller: one line per proposed rule (name, source→sink) and a count of loose pieces left for assembly. The tracking files hold the detail — don't paste it back +- A brief summary to the caller: the sources and sinks found (one line each, new vs built-in-covered). The inventory holds the detail — don't paste it back ## Tracking `/coverage.yaml` — flip this package's entry when done: ```yaml - - package: org.springframework.web.reactive.function + - package: org.springframework.web.reactive.function.client status: done - notes: ServerRequest source not covered by built-ins; reaches WebClient (SSRF) — webclient-ssrf -``` - -`/rules/.yaml` — discovery-stage fields only: - -```yaml -name: webclient-ssrf -rule_id: null # filled later -finding: null # filled later -requirements: > - CWE-918 SSRF via Spring WebClient. - source: user-supplied URL from request body — built-in spring source covers it - sink: WebClient.get().uri($UNTRUSTED) — no built-in; needs a new sink rule - lives in: run.halo.app.core.attachment.DefaultAttachmentService / ProxyFilter -dependencies: # every library the flow crosses, exact GAV from the build files - - org.springframework:spring-webflux:6.1.0 -stages: - description: done - test_project: pending - tests_passing: pending -notes: > - free-form + notes: WebClient request methods — SSRF sink not covered by built-ins; no new source ``` -`/lib-pieces.yaml` — append a source or sink you couldn't pair; assemble-lib-rules joins it: +`/surface.yaml` — append what the package introduces (`builtin: null` ⇒ new, to be written next phase): ```yaml -sources: - - role: Apache HttpClient response body — data from a server the app calls - package: org.apache.hc.client5.http - dependency: org.apache.httpcomponents.client5:httpclient5:5.3 - disposition: pending # pending | once assembled | dropped: -sinks: - - role: SnakeYAML load — untrusted YAML deserialization - package: org.yaml.snakeyaml - dependency: org.yaml:snakeyaml:2.2 - disposition: pending +sources: # general untrusted-data sources + - package: org.springframework.web.reactive.function.server + idea: ServerRequest body/params/headers — untrusted request data; in RouterFunctions + builtin: null + dependency: org.springframework:spring-webflux:6.1.0 +sinks: # tagged by vuln class + - package: org.springframework.web.reactive.function.client + vuln_class: ssrf + idea: WebClient.get().uri($UNTRUSTED); in DefaultAttachmentService / ProxyFilter + builtin: null + dependency: org.springframework:spring-webflux:6.1.0 ``` ## Engine notes -- Spring projects: the analyzer auto-discovers Spring endpoints, so `network` inbound sources are largely ones the built-ins already see — focus on which sinks those flows reach +- Spring projects: the analyzer auto-discovers Spring endpoints, so `network` inbound sources are largely ones the built-ins already see — focus on the sinks - Generic projects: the analyzer treats all public/protected methods of public classes as entry points ## Gotchas -- Propose a rule only for a real gap; if a built-in already covers the source→sink, don't duplicate it -- Don't drop a source-only library because you can't trace its sinks — append the source to `` for assemble-lib-rules to pair; drop only a candidate that isn't genuinely untrusted -- Requirements name the missing source/sink and where it lives, not a full traced flow — keep them short; the test project reads the real code +- Describe, don't write — record source/sink ideas only; rules are written and tested in the next phase +- Don't re-declare a built-in source or sink — record its ref instead - Don't grep dependency jars to find usage — read the app's own sources in `` From 7b1610dfede2d519b16ad4df9ef5ae0441b6d7cd Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Thu, 4 Jun 2026 20:39:49 +0300 Subject: [PATCH 10/54] fix: a few more fixes --- cli/cmd/test_init.go | 47 +++++-- .../example/src/main/java/test/Taint.java | 21 +++ cli/internal/testrule/testrule.go | 50 +++++++ skills/analyze-findings/SKILL.md | 2 +- skills/appsec-agent/SKILL.md | 132 +++++++++--------- .../appsec-agent/references/approximations.md | 4 +- .../appsec-agent/references/discover-rules.md | 22 ++- skills/appsec-agent/references/poc.md | 4 +- .../references/reproduce-vulnerability.md | 14 -- skills/appsec-agent/references/scan.md | 2 + skills/appsec-agent/references/suppress-fp.md | 9 -- skills/assemble-lib-rules/SKILL.md | 92 +++++++----- .../SKILL.md | 35 ++++- skills/create-rule/SKILL.md | 82 ++++++----- skills/create-test-project/SKILL.md | 40 +++--- skills/create-test-project/references/rule.md | 29 ++-- skills/discover-attack-surface/SKILL.md | 71 ++++++---- skills/generate-poc/SKILL.md | 2 + 18 files changed, 401 insertions(+), 257 deletions(-) create mode 100644 cli/internal/testrule/example/src/main/java/test/Taint.java create mode 100644 cli/internal/testrule/testrule.go delete mode 100644 skills/appsec-agent/references/reproduce-vulnerability.md delete mode 100644 skills/appsec-agent/references/suppress-fp.md diff --git a/cli/cmd/test_init.go b/cli/cmd/test_init.go index 8db775ac5..c1b8dc2bf 100644 --- a/cli/cmd/test_init.go +++ b/cli/cmd/test_init.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/seqra/opentaint/internal/testapprox" + "github.com/seqra/opentaint/internal/testrule" "github.com/seqra/opentaint/internal/testutil" "github.com/seqra/opentaint/internal/utils" "github.com/spf13/cobra" @@ -15,23 +16,45 @@ import ( var initRuleProjectDeps []string var initApproxProjectDeps []string +var initRuleSinksOnly bool +var initRuleSourcesOnly bool var testRuleInitCmd = &cobra.Command{ Use: "init ", - Short: "Bootstrap a rule test project with build.gradle.kts and test utility JAR", - Long: `Creates a minimal Gradle project structure for testing OpenTaint rules. - -The project includes: - - build.gradle.kts with compile-only dependencies - - settings.gradle.kts - - libs/opentaint-sast-test-util.jar (provides @PositiveRuleSample and @NegativeRuleSample annotations) - - src/main/java/test/ directory for test sample sources + Short: "Bootstrap rule test projects (sinks and/or sources) with the generic Taint marker", + Long: `Creates the rule test projects under : a 'sinks' project (a package's sink +lib rules tested against the generic Taint source) and a 'sources' project (a package's source +lib rules tested against the generic Taint sink). Pass --sinks-only or --sources-only for a +package that has only one side. + +Each project includes: + - build.gradle.kts with compile-only dependencies, settings.gradle.kts + - libs/opentaint-sast-test-util.jar (provides @PositiveRuleSample and @NegativeRuleSample) + - src/main/java/test/ with Taint.java (the generic source()/sink()) for test sample sources + - test-rules/java/lib/test/generic-{source,sink}.yaml — the marker lib rules an agent refs + from a test join; these and the test join live only here, never in .opentaint/rules, so + they never reach the main project scan Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { - bootstrapTestProject(args[0], "opentaint-rule-test", initRuleProjectDeps) - fmt.Printf("Rule test project initialized at %s\n", args[0]) + if initRuleSinksOnly && initRuleSourcesOnly { + out.Fatalf("--sinks-only and --sources-only are mutually exclusive") + } + kinds := []string{"sinks", "sources"} + if initRuleSinksOnly { + kinds = []string{"sinks"} + } else if initRuleSourcesOnly { + kinds = []string{"sources"} + } + for _, kind := range kinds { + dir := filepath.Join(args[0], kind) + bootstrapTestProject(dir, "opentaint-rule-test-"+kind, initRuleProjectDeps) + if err := testrule.Scaffold(dir); err != nil { + out.Fatalf("Failed to scaffold rule test project: %s", err) + } + fmt.Printf("Rule test project (%s) initialized at %s\n", kind, dir) + } }, } @@ -65,6 +88,10 @@ func init() { testRuleCmd.AddCommand(testRuleInitCmd) testRuleInitCmd.Flags().StringArrayVar(&initRuleProjectDeps, "dependency", nil, "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") + testRuleInitCmd.Flags().BoolVar(&initRuleSinksOnly, "sinks-only", false, + "Scaffold only the sinks test project (a package with no sources)") + testRuleInitCmd.Flags().BoolVar(&initRuleSourcesOnly, "sources-only", false, + "Scaffold only the sources test project (a package with no sinks)") testApproximationCmd.AddCommand(testApproximationInitCmd) testApproximationInitCmd.Flags().StringArrayVar(&initApproxProjectDeps, "dependency", nil, diff --git a/cli/internal/testrule/example/src/main/java/test/Taint.java b/cli/internal/testrule/example/src/main/java/test/Taint.java new file mode 100644 index 000000000..8df75faa1 --- /dev/null +++ b/cli/internal/testrule/example/src/main/java/test/Taint.java @@ -0,0 +1,21 @@ +package test; + +/** + * Generic taint marker for rule test projects. {@code source()} is generic so it + * assigns to any type without a cast (it erases to {@code Object}); {@code sink(Object)} + * accepts any value. Matched by the bundled generic-source / generic-sink lib rules, so a + * package's source/sink lib rules can be exercised against a fixed, type-agnostic counterpart. + */ +public final class Taint { + + private Taint() { + } + + @SuppressWarnings("unchecked") + public static T source() { + return (T) new Object(); + } + + public static void sink(Object value) { + } +} diff --git a/cli/internal/testrule/testrule.go b/cli/internal/testrule/testrule.go new file mode 100644 index 000000000..613fe2845 --- /dev/null +++ b/cli/internal/testrule/testrule.go @@ -0,0 +1,50 @@ +// Package testrule bundles the generic Taint source/sink lib rules and the Taint +// helper scaffolded into a rule test project, so a package's source/sink lib rules can +// be exercised against a fixed, type-agnostic counterpart (the generic marker), the way +// testapprox bundles the fixed approximation rule. +package testrule + +import ( + _ "embed" + "fmt" + "os" + "path/filepath" +) + +//go:embed example/src/main/java/test/Taint.java +var taintJava []byte + +//go:embed example/rules/java/lib/test/generic-source.yaml +var genericSource []byte + +//go:embed example/rules/java/lib/test/generic-sink.yaml +var genericSink []byte + +// Marker locations, relative to the test project root. The marker lib rules and the +// test join an agent writes alongside them live only under MarkersDir — never in +// .opentaint/rules — so they never reach the main project scan. The rule paths double +// as the values an agent refs from a test join (relative to the test-rules root). +const ( + MarkersDir = "test-rules" + GenericSourceRule = "java/lib/test/generic-source.yaml" + GenericSinkRule = "java/lib/test/generic-sink.yaml" +) + +// Scaffold writes the Taint helper into the project sources and the generic +// source/sink marker lib rules into the project's test-rules ruleset. +func Scaffold(projectDir string) error { + files := map[string][]byte{ + filepath.Join(projectDir, "src", "main", "java", "test", "Taint.java"): taintJava, + filepath.Join(projectDir, MarkersDir, filepath.FromSlash(GenericSourceRule)): genericSource, + filepath.Join(projectDir, MarkersDir, filepath.FromSlash(GenericSinkRule)): genericSink, + } + for path, content := range files { + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return fmt.Errorf("create %s: %w", filepath.Dir(path), err) + } + if err := os.WriteFile(path, content, 0o644); err != nil { + return fmt.Errorf("write %s: %w", filepath.Base(path), err) + } + } + return nil +} diff --git a/skills/analyze-findings/SKILL.md b/skills/analyze-findings/SKILL.md index 1b91787ad..da21ea622 100644 --- a/skills/analyze-findings/SKILL.md +++ b/skills/analyze-findings/SKILL.md @@ -37,7 +37,7 @@ The results in the file all fired one rule, but may be several different vulnera Verdict each logical finding from its flow: - TP — the source is attacker-controlled, the sink is genuinely dangerous with that input, and nothing sanitizes it in between -- FP — a sanitizer/validator neutralizes it, the source isn't actually attacker-controlled (config, constant, server-set), the sink is safe for this input (parameterized, escaped), or the path is infeasible. Record which one, so the suppress-FP stage knows what to narrow +- FP — a sanitizer/validator neutralizes it, the source isn't actually attacker-controlled (config, constant, server-set), the sink is safe for this input (parameterized, escaped), or the path is infeasible. Record which one Set `verdict` and append the reasoning to `notes`, below the analyzer report already seeded there diff --git a/skills/appsec-agent/SKILL.md b/skills/appsec-agent/SKILL.md index cc9975423..c88c9bc7b 100644 --- a/skills/appsec-agent/SKILL.md +++ b/skills/appsec-agent/SKILL.md @@ -27,40 +27,27 @@ After installing, run `opentaint health` to confirm the autobuilder/analyzer/rul ## Choose a workflow -Begin by asking the user which workflow to run — a single AskUserQuestion offering the presets only, each option's description giving its composition: +Begin by asking the user two things — two separate AskUserQuestion calls, scan level then triage level. Record the chosen `scan_level` and `triage_level` in `state.yaml`: -- fast — scan: lite, triage: static -- default — scan: normal, triage: static, suppress-FP: optional -- ultra — scan: deep, triage: dynamic, suppress-FP: on -- reproduce-vulnerability — anchored on a vulnerability the user asserts exists; deep scan + dynamic triage +1. Scan level — `lite` · `normal` · `deep` + - lite — build + scan with existing rules + - normal — + approximation iteration + - deep — + discover-attack-surface + new rules (fixed first) +2. Triage level — `static` · `dynamic` + - static — classify findings from the model, no running app + - dynamic — + a PoC per confirmed TP. This launches a few test services on the user's current machine (local instances and ports); they're torn down at the end of the run. Make that clear in the option -The tool adds an Other choice; if the user takes it, ask for any custom steps — a custom combination of scan level (lite/normal/deep), triage level (static/dynamic), and suppress-FP (on/off). Record the resolved levels in `state.yaml`. - -Levels, once chosen: - -- scan — lite (build + scan with existing rules) · normal (+ approximation iteration) · deep (+ discover-attack-surface + new rules, fixed first) -- triage — static (classify from the model) · dynamic (+ a PoC per confirmed TP) -- suppress-FP — a post-triage stage that fixes confirmed false positives on rules you own - -The run is one fixed pipeline; the levels decide which steps execute. Walk it top to bottom — when you reach a step your levels include, load its reference and do it; skip the bracketed steps your levels omit. Don't load a step's reference until you reach it. +The run is one fixed pipeline; the two levels decide which steps execute. Walk it top to bottom — when you reach a step your levels include, load its reference and do it; skip the bracketed steps your levels omit. Don't load a step's reference until you reach it. ``` -build → references/build.md every level -[deep] discover + new rules → references/discover-rules.md deep -scan → references/scan.md every level -[normal/deep] approximation iteration → references/approximations.md normal, deep -triage (generate findings + classify) → references/triage.md every level -[suppress-FP] → references/suppress-fp.md when suppress-FP is on -[dynamic] PoC + assemble vulnerabilities → references/poc.md dynamic +build → references/build.md every run +[deep] discover + new rules → references/discover-rules.md deep scan +scan → references/scan.md every run +[normal/deep] approximation iteration → references/approximations.md normal, deep scan +triage (generate findings + classify) → references/triage.md every run +[dynamic] PoC + assemble vulnerabilities → references/poc.md dynamic triage ``` -Which steps each preset runs: - -- fast — build, scan, triage -- default — build, scan, approximations, triage, [suppress-FP] -- ultra — build, discover-rules, scan, approximations, triage, suppress-FP, poc -- reproduce-vulnerability — references/reproduce-vulnerability.md walks the same steps anchored on the asserted vuln - From inside any step, when a rule or approximation won't behave, load references/escalation.md. Only the approximation iteration loops (it re-scans internally); new rules are fixed before it. ## Delegation @@ -93,6 +80,16 @@ Orchestration practices: - Steps within a unit are sequential via the artifact on disk — dispatch step N only after step N−1's named artifact exists; never bundle steps into one dispatch - write `state.yaml` at each fan-out join — a phase flips to `done` only once every unit's artifact exists on disk +## Resource limits + +A fan-out unit that compiles or scans (approximation creation, rule test-projects, discovery) each spawns a heavy `opentaint` JVM, so unbounded parallelism OOMs the machine. At run start, compute a concurrency cap and never dispatch more than that many such subagents at once: + +- cores — `nproc` (Linux) / `sysctl -n hw.ncpu` (macOS) +- free memory in GB — `free -g` (Linux, the `available` column) / `sysctl -n hw.memsize` ÷ 1024³ (macOS) +- cap = `min(cores, floor(free_GB / 4))`, floored at 1 — budget ~4 GB per concurrent JVM + +It's machine state, not run state — recompute on resume, don't track it. Light subagents that only read/write tracking (analyze-external-methods, analyze-findings, assemble-lib-rules) aren't bound by the cap; PoC is already sequential. + ## State and resumption You are the only writer of `.opentaint/tracking/state.yaml` — it records the chosen levels and every phase's status, written after each fan-out join. @@ -100,7 +97,7 @@ You are the only writer of `.opentaint/tracking/state.yaml` — it records the c On start, and after any compaction, reconstruct position from artifacts before doing anything — never replay a completed phase: - read `state.yaml` and the `tracking/` tree -- skip any phase whose artifact exists: `project.yaml` → build; `coverage.yaml` with every entry `done` plus the `tracking/rules` join requirements → discover; `report.sarif` → scan; a rule's `artifact` + `tests_passing: done` → that rule; an approximation unit's `artifact` (plus `tests_passing` for dataflow) → that unit; a finding with `verdict` set → triaged; with `poc` set → PoC'd +- skip any phase whose artifact exists: `project.yaml` → build; `coverage.yaml` with every entry `done` → discover; a lib unit's `tests_passing: done` → that package's lib rules, and a `rules/join/.yaml` per vuln class → joins assembled; `report.sarif` → scan; an approximation unit's `artifact` (plus `tests_passing` for dataflow) → that unit; a finding with `verdict` set → triaged; with `poc` set → PoC'd - detect new work from artifacts, not memory: finding files with `verdict: pending` (a fresh or reset scan) → triage; methods in `dropped-external-methods.yaml` not yet in any approximation unit → approximations ## Tracking layout @@ -110,10 +107,10 @@ The single source of truth for the tracking schema; each skill writes only its o ``` .opentaint/tracking/ state.yaml # you only — levels + phase status - coverage.yaml # triage-dependencies seeds, discover-attack-surface fills — one entry per dependency package weighed (deep) - surface.yaml # discover-attack-surface — the sources/sinks each package introduces (deep) + coverage.yaml # triage-dependencies seeds, discover-attack-surface flips — one entry per dependency package weighed (deep) findings/.yaml # one per logical finding (from the SARIF→finding script; split by triage) - rules/.yaml # one per vuln-class join (requirement by assemble-lib-rules; written + tested next phase) + rules/lib/.yaml # per-package rule plan — new source/sink lib rules (discover plans; create-* build + test vs the marker) (deep) + rules/join/.yaml # per-vuln-class security join (assemble-lib-rules writes; main scan verifies) (deep) approximations/-passthrough.yaml # simple from→to copies; write-only, scan-verified approximations/-dataflow.yaml # lambda/callback/async; tested on a test project approximations/skipped.yaml # methods the engine asks for but that carry no taint @@ -123,10 +120,8 @@ The single source of truth for the tracking schema; each skill writes only its o state.yaml: ```yaml -mode: ultra # fast | default | ultra | reproduce-vulnerability | custom scan_level: deep # lite | normal | deep triage_level: dynamic # static | dynamic -suppress_fp: true phases: # pending | in_progress | done build: done discover: done # deep only @@ -134,11 +129,10 @@ phases: # pending | in_progress | done scan: done approximations: in_progress # normal/deep; iterative, rescans within triage: pending - suppress_fp: pending # after triage poc: pending # dynamic triage ``` -coverage.yaml — seeded by triage-dependencies and filled by discover-attack-surface (deep): one entry per dependency package weighed, so you can see which libraries were drilled and which were dismissed. A `pending` entry is a flagged library awaiting its depth pass; the rule names live in `rules/.yaml`, not here: +coverage.yaml — seeded by triage-dependencies and flipped by discover-attack-surface (deep): one entry per dependency package weighed, so you can see which libraries were drilled and which were dismissed. A `pending` entry is a flagged library awaiting its depth pass; the rule plan lives in `rules/lib/.yaml`, not here: ```yaml packages: @@ -148,22 +142,6 @@ packages: free-form — what was found and why ``` -surface.yaml — discover-attack-surface appends the sources/sinks each package introduces; assemble-lib-rules groups them into join requirements (deep). `builtin: null` ⇒ a new pattern to write next phase; sources are general, sinks carry a `vuln_class`: - -```yaml -sources: - - package: org.springframework.web.reactive.function.server - idea: ServerRequest body/params — untrusted request data - builtin: null - dependency: org.springframework:spring-webflux:6.1.0 -sinks: - - package: org.springframework.web.reactive.function.client - vuln_class: ssrf - idea: WebClient.get().uri($UNTRUSTED) - builtin: null - dependency: org.springframework:spring-webflux:6.1.0 -``` - findings/.yaml — created by the SARIF→finding script; `verdict`/`notes` by analyze-findings; `poc`/`poc_script` by generate-poc: ```yaml @@ -177,21 +155,24 @@ poc: pending # pending | confirmed | failed poc_script: null # path under .opentaint/pocs/ once generate-poc writes one ``` -rules/.yaml — one per vuln-class join (`` = the class); `description` + `sources`/`sinks` by assemble-lib-rules; `test_project` by create-test-project; `tests_passing` + `rule_id` + `artifact` by create-rule: +rules/lib/.yaml — per-package rule plan; `description` fields + `sources`/`sinks` by discover-attack-surface, `test_project` by create-test-project, `tests_passing` + `rule_id`s + `artifact` by create-rule. `coverage: new` ⇒ write a pattern, `expand` ⇒ ref the built-in plus the missing methods: ```yaml -name: ssrf # the vuln class; becomes the join rule's file and id -rule_id: null # filled on creation -artifact: null # added once the rule file exists -finding: null # finding_name; non-null only for suppress-FP -requirements: > # short — the class and what the join wires - CWE-918 SSRF — join every untrusted-data source to the SSRF sink group -sources: # ref a built-in, or a new one to write - - ref: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source - - new: ServerRequest body/params — org.springframework.web.reactive.function.server -sinks: - - new: WebClient.get().uri($UNTRUSTED) — org.springframework.web.reactive.function.client; in DefaultAttachmentService +package: org.springframework.web.reactive.function.client dependencies: [org.springframework:spring-webflux:6.1.0] +builtin_coverage: partial # partial | none +artifact: null # create-rule +sources: + - idea: ServerRequest body/params — untrusted request data + coverage: new # new | expand + builtin: null + rule_id: null +sinks: + - vuln_class: ssrf + idea: WebClient.post/put().uri($UNTRUSTED) + coverage: expand + builtin: java/lib/generic/ssrf-sinks.yaml#java-ssrf-sink + rule_id: null stages: # pending | in_progress | done description: done test_project: pending @@ -200,6 +181,25 @@ notes: > free-form ``` +rules/join/.yaml — one per vuln-class security join, written by assemble-lib-rules after the lib rules exist and verified by the main scan: + +```yaml +name: ssrf # the vuln class; the join rule's file and id +rule_id: java/security/ssrf.yaml:ssrf +artifact: .opentaint/rules/java/security/ssrf.yaml +sources: # built-in + created + - ref: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + - ref: java/lib/spring/webflux-request-source.yaml#webflux-request-source +sinks: # created + built-in + - new: java/lib/spring/webclient-ssrf-sink.yaml#webclient-ssrf-sink + - builtin: java/lib/generic/ssrf-sinks.yaml#java-ssrf-sink +stages: # pending | in_progress | done + written: done + verified: pending # done once the main scan confirms it +notes: > + free-form +``` + approximations/-.yaml — created by analyze-external-methods (`description` + `methods`); `` = the dotted package with `.` -> `-` (the YAML `package:` field keeps the real dotted name). The stages differ by kind: ```yaml @@ -232,8 +232,8 @@ methods: # engine asks to approximate these, but they carry no ta rules/java/{lib/generic,lib/spring,security}/ # custom rules pass-through/.yaml # passThrough approximation configs approximations// # code-based (dataflow) approximation sources, per unit - test-projects// # per-unit test project sources - test-compiled// # per-unit compiled test model + test-projects// # per-unit test project sources; a rule unit holds sinks/ and sources/ sub-projects, each with a test-rules/ (the generic markers + that side's test join — test-only, never loaded by the main scan) + test-compiled// # per-unit compiled test model (a rule unit: sinks/ and sources/ models) test-results// # per-unit test outputs results/ report.sarif diff --git a/skills/appsec-agent/references/approximations.md b/skills/appsec-agent/references/approximations.md index db0efcca0..68a280b41 100644 --- a/skills/appsec-agent/references/approximations.md +++ b/skills/appsec-agent/references/approximations.md @@ -3,11 +3,11 @@ Loop until stabilization: 1. analyze-external-methods — Inputs: dropped-file `.opentaint/results/dropped-external-methods.yaml`, tracking-dir `.opentaint/tracking`, ``. Writes one `approximations/-passthrough.yaml` and/or `-dataflow.yaml` per package, plus `skipped.yaml`, only for methods not already in a unit. Returns one line per unit -2. Fan out per unit: +2. Fan out per unit (capped per SKILL.md § Resource limits — these units compile and scan): - passthrough → create-pass-through-approximation — Inputs: `` from the unit, ``, config-file `.opentaint/pass-through/.yaml`. Write-only; sets `written` + `artifact`. No test project - dataflow → two sequential dispatches per unit: first create-test-project (dataflow shape) produces `.opentaint/test-compiled/` and sets `test_project: done`; on its return, dispatch create-dataflow-approximation against that model (approx-src `.opentaint/approximations/`) — sets `tests_passing` + `artifact` (`test approximation run` auto-applies its own fixed rule — nothing to pass) 3. Re-scan (references/scan.md) with both approximation dirs pointing at the parents (`.opentaint/pass-through`, `.opentaint/approximations`) 4. Pass-through verify (no separate skill): the scan agent reports any method you modeled that is still in `dropped-external-methods.yaml`, or any config load error. Re-invoke that package's create-pass-through-approximation agent to fix (matcher / from→to / YAML), then rescan. A dataflow method that still drops despite passing its isolated test is an escalation case (references/escalation.md), not a re-write -5. Stabilization: stop when no method on a source→sink path remains unmodeled and a rescan surfaces no new such methods (equivalently, byte-equal SARIF across rescans). Otherwise feed the newly dropped methods back into step 1 +5. Stabilization: keep classifying until every method in `dropped-external-methods.yaml` is either modeled (a passthrough/dataflow unit) or listed in `skipped.yaml`, and a rescan surfaces no new dropped methods — i.e. the only thing left dropped is the skip set. Otherwise feed the newly dropped methods back into step 1 Set `phases.approximations: in_progress` across the loop, `done` at stabilization. diff --git a/skills/appsec-agent/references/discover-rules.md b/skills/appsec-agent/references/discover-rules.md index 934c0927a..b4e53bbc1 100644 --- a/skills/appsec-agent/references/discover-rules.md +++ b/skills/appsec-agent/references/discover-rules.md @@ -6,23 +6,19 @@ Delegate triage-dependencies. Inputs: ``, model-dir `.opentaint/pr ## Discover attack surface -Fan out discover-attack-surface in parallel, one agent per `pending` package in `coverage.yaml`. Inputs each: ``, ``, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`, surface `.opentaint/tracking/surface.yaml`. This phase only describes ideas — each agent catalogues the package's untrusted-data sources and dangerous sinks into `surface.yaml` (new patterns vs a built-in ref; sinks tagged by vuln class), writes no rule and runs no test, then flips its `coverage.yaml` entry to `done`. Returns the sources/sinks found. +Fan out discover-attack-surface in parallel, one agent per `pending` package in `coverage.yaml` (capped per SKILL.md § Resource limits). Inputs each: ``, deps-dir `.opentaint/project/dependencies`, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`. Each agent first settles built-in coverage (full ⇒ no unit, just `coverage.yaml` done; partial ⇒ expand only the missing methods; none ⇒ plan from scratch), then enumerates the package's sources/sinks from its **dependency jar** and writes the package's rule plan `tracking/rules/lib/.yaml` (new vs expand; sinks tagged by vuln class), writing no rule and running no test, then flips its `coverage.yaml` entry to `done`. Returns the sources/sinks planned. -## Assemble lib rules +Then a quick area cross-check: across network, persistence, environment, serialization, rendering, naming, execution, messaging — is every boundary a dependency exposes either covered by built-ins or now carrying a lib unit? If a boundary has a relevant dependency but produced no unit and no clear reason, dispatch a depth pass for it. Set `phases.discover: done` once every `coverage.yaml` entry is `done`. -Once the discover fan-out is done, delegate assemble-lib-rules. Inputs: surface `.opentaint/tracking/surface.yaml`, tracking-dir `.opentaint/tracking`. With the whole surface in one view it groups sinks by vuln class and writes one `tracking/rules/.yaml` join requirement per class (every source + that class's sink group, new combinations only) — still description, not rule files. One agent for the global view; fan out by vuln class only if the surface is large. +## Per-package lib rules -Then a quick area cross-check: across network, persistence, environment, serialization, rendering, naming, execution, messaging — is every boundary a dependency exposes either covered by built-ins or now carrying a join requirement? If a boundary has a relevant dependency but produced no requirement and no clear reason, dispatch a depth pass for it. Set `phases.discover: done` once every `coverage.yaml` entry is `done` and the join requirements are written. +Build the lib rules from the `tracking/rules/lib/.yaml` units. Fan out per package (capped per SKILL.md § Resource limits — each unit compiles and scans); each unit is a two-step pipeline, dispatched one step at a time after the prior step's artifact: -## Rules +1. create-test-project — Inputs: `` = the lib unit's sources/sinks, ``, `` `.opentaint/tracking/rules/lib/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the unit. Scaffolds the `sinks/` and/or `sources/` marker projects (`test rule init`, `--sinks-only`/`--sources-only` for a one-sided package), writes the generic-marker counterpart samples, compiles each sub-project. Sets `test_project: done` +2. create-rule — Inputs: requirements (the lib unit), test-compiled `.opentaint/test-compiled/`, test-project `.opentaint/test-projects/`, rules-dir `.opentaint/rules`, ``, and on a re-dispatch the approximation dirs `.opentaint/pass-through` / `.opentaint/approximations`. Writes the package's source lib rules + per-vuln-class sink lib rules into `.opentaint/rules`, the test joins against the markers into each test project's `test-rules`, and iterates `test rule run` per sub-project until every sample passes; sets `tests_passing: done` and the lib rules' `rule_id`s/`artifact` -Write and test the rules from the join requirements in two passes — sources first (one agent, since a new source is shared across every class's join), then sinks + joins in parallel. +If create-rule reports the test project drops a library method on the rule's flow, route the dropped methods through the approximation loop (references/approximations.md), then re-dispatch create-rule with the approximation dirs. If it reports non-convergence with nothing dropped, load references/escalation.md. Set `phases.rules: done` once every lib unit's `tests_passing` is done. -**Pass 1 — source lib rules (one agent).** Dispatch one create-rule to author every new source lib rule the requirements name (the `new:` sources across `tracking/rules/*.yaml`). Inputs: requirements = the `new:` sources, rules-dir `.opentaint/rules`. A source lib rule isn't testable alone — it's verified by the joins that ref it in pass 2. Wait for it to finish before pass 2, so the joins can ref the source rules it wrote. +## Assemble joins -**Pass 2 — sinks + joins (parallel, per vuln class).** Fan out across the `tracking/rules/.yaml` join requirements. Each unit is a two-step pipeline — dispatch the steps one at a time, waiting for the prior step's artifact: - -1. create-test-project — Inputs: spec = the requirement's `sources`/`sinks`, ``, `` `.opentaint/tracking/rules/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the requirement. Writes synthetic samples — each new sink fed by a known built-in source, each source (incl. the pass-1 ones) into the class's sink. Sets `test_project: done` -2. create-rule — Inputs: requirements (the tracking file), test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules`, ``, and on a re-dispatch the approximation dirs `.opentaint/pass-through` / `.opentaint/approximations`. Writes the class's new sink lib rules + the join (ref every source), iterates `opentaint test rule run` until every sample passes; sets `tests_passing: done`, `rule_id`, `artifact` - -If create-rule reports the test project drops a library method on the rule's flow, the rule can't be verified until that method is modeled — route the dropped methods through the approximation loop (references/approximations.md; they're real library methods the main scan needs too), then re-dispatch create-rule with the approximation dirs. If it reports non-convergence with nothing dropped, load references/escalation.md. A join's `tests_passing` stays `pending` until its samples pass; set `phases.rules: done` once every join's is done. +Once the per-package lib rules are done, delegate assemble-lib-rules. Inputs: lib-units `.opentaint/tracking/rules/lib`, rules-dir `.opentaint/rules`, tracking-dir `.opentaint/tracking`. With every created lib rule in one view it writes one security join per vuln class — `tracking/rules/join/.yaml` + `.opentaint/rules/java/security/.yaml` — merging built-in + created sources with the new sinks, and created sources with built-in sinks (new-end combinations only). These carry no test project; the main scan verifies them (references/scan.md). One agent for the global view; fan out by vuln class only if there are many. diff --git a/skills/appsec-agent/references/poc.md b/skills/appsec-agent/references/poc.md index 1d02d15a1..0ce5f1ba7 100644 --- a/skills/appsec-agent/references/poc.md +++ b/skills/appsec-agent/references/poc.md @@ -5,8 +5,10 @@ Run PoCs one subagent at a time, never in parallel — concurrent exploits race - first finding: generate-poc with no `` — it builds and starts the app and returns the `` it started - every later finding: pass that `` so the agent reuses the running instance +When a finding needs several services (app + DB + broker + …), have generate-poc bring them all up with one `docker compose` on a shared network, registered as a single `compose` entry — one command then tears the stack down. + Inputs each time: `` = the TP finding file, ``, poc-dir `.opentaint/pocs`, and `` once known. Each sets `poc` (`confirmed`/`failed`) + `poc_script`; a `failed` repro does not flip the triage verdict. Each PoC subagent registers any instance it starts in `.opentaint/tracking/poc-servers.yaml` — that registry, not memory, is what's running (so a reuse-or-start decision and teardown both survive compaction). After all PoCs, assemble `.opentaint/vulnerabilities.md` from the confirmed findings yourself (subagents never write it; see SKILL.md). -Then tear down — you own this, run it directly (don't dispatch a subagent). Read `poc-servers.yaml`; if it lists any instance, ask the user keep-vs-shutdown (default: shut down). Unless they say keep, stop each entry from its `kind` + `ref` (`process` → `kill `, `container` → `docker stop `, `compose` → `docker compose -f down`), confirm its `port` is free, and empty the registry. Only after teardown set `phases.poc: done`. +Then tear down — you own this, run it directly (don't dispatch a subagent). Read `poc-servers.yaml` and stop every instance it lists — always terminate, no keep-vs-shutdown prompt. From each entry's `kind` + `ref` (`process` → `kill `, `container` → `docker stop `, `compose` → `docker compose -f down`), confirm its `port` is free, and empty the registry. Only after teardown set `phases.poc: done`. diff --git a/skills/appsec-agent/references/reproduce-vulnerability.md b/skills/appsec-agent/references/reproduce-vulnerability.md deleted file mode 100644 index 5ee497d9c..000000000 --- a/skills/appsec-agent/references/reproduce-vulnerability.md +++ /dev/null @@ -1,14 +0,0 @@ -# Reproduce-vulnerability workflow - -The reproduce-vulnerability workflow: the user asserts a specific vulnerability exists and you must make the analysis surface it. Treat the asserted vuln as ground truth — it gives you confidence while debugging, so a missed detection points at the analysis, not at the vuln being absent. Run at deep scan + dynamic triage, but driven by the one vuln rather than sweeping the whole project. - -It walks the same steps as a deep + dynamic run, narrowed to the asserted flow; dispatch any step per the Delegate template in SKILL.md: - -1. Build (references/build.md) -2. Reproduce the asserted flow as a test project — create-test-project with a `@PositiveRuleSample` mirroring the real source → hops → sink; this is the ground-truth reproducer the rest is debugged against -3. Ensure a rule covers it — reuse a built-in, or discover + author one for that source→sink (references/discover-rules.md). Confirm the sample passes -4. Scan anchored on that rule (references/scan.md, `--rule-id` listing its `refs`), then model any method on the flow's path that the scan drops (references/approximations.md) -5. If the scan still won't flag the known-vulnerable flow once the rule passes its test and no method on the path remains dropped, escalate (references/escalation.md) — this is the case the reproduce mode exists to catch -6. Confirm dynamically (references/poc.md), then assemble `.opentaint/vulnerabilities.md` - -Record `mode: reproduce-vulnerability` in `state.yaml`; phase tracking is the same as a deep + dynamic run. diff --git a/skills/appsec-agent/references/scan.md b/skills/appsec-agent/references/scan.md index f7c597a51..d25f9cc10 100644 --- a/skills/appsec-agent/references/scan.md +++ b/skills/appsec-agent/references/scan.md @@ -1,3 +1,5 @@ # Scan Delegate run-scan. Inputs: model-dir `.opentaint/project`, ruleset `builtin` + `.opentaint/rules`, report `.opentaint/results/report.sarif`; on normal/deep also config-dir `.opentaint/pass-through` and approx-dir `.opentaint/approximations` (both dir flags walk the tree recursively, so the parents apply every unit). Require a concise return — finding counts per rule, the methods still in `dropped-external-methods.yaml` that sit on a source→sink path, and any config load/parse errors — not the SARIF body. The files persist on disk for the next steps. Set `phases.scan: done`. + +On deep runs, if the scan flags an issue with a created rule — a rule that failed to load/parse, a join that should fire but didn't, or an own rule that false-positives — dispatch create-rule to fix that rule (references/discover-rules.md), then rescan before continuing. diff --git a/skills/appsec-agent/references/suppress-fp.md b/skills/appsec-agent/references/suppress-fp.md deleted file mode 100644 index bf7193d1a..000000000 --- a/skills/appsec-agent/references/suppress-fp.md +++ /dev/null @@ -1,9 +0,0 @@ -# Suppress-FP block - -For each confirmed FP on an own/overridable rule, one at a time: - -1. create-test-project — pin the confirmed TPs as `@PositiveRuleSample` and add the FP as `@NegativeRuleSample`, recompile. Inputs: the FP and TP traces as ``, the rule's ``, test-project / test-compiled `.opentaint/test-{projects,compiled}/` -2. create-rule — refine only the rule until the negative stops firing and every positive still passes. Inputs: the rule ``, test-compiled `.opentaint/test-compiled/`, rules-dir `.opentaint/rules` -3. re-scan (references/scan.md), then regenerate finding files and retriage the affected findings (references/triage.md) - -Loop until the FP is gone and the TPs stay. An FP from a built-in rule you can't override is recorded in the finding's `notes`, not suppressed. Set `phases.suppress_fp: done`. diff --git a/skills/assemble-lib-rules/SKILL.md b/skills/assemble-lib-rules/SKILL.md index 6be8ede61..43e935d55 100644 --- a/skills/assemble-lib-rules/SKILL.md +++ b/skills/assemble-lib-rules/SKILL.md @@ -1,6 +1,6 @@ --- name: assemble-lib-rules -description: Group the discovered sources and sinks into per-vuln-class join rule requirements. Use after the discover-attack-surface fan-out, to describe the rules the next phase will write +description: Write the per-vuln-class security join rules that merge the created source/sink lib rules with the built-ins. Use after the per-package lib rules are created and tested, to wire them into project-level joins license: Apache-2.0 metadata: author: opentaint @@ -9,70 +9,92 @@ metadata: # Skill: Assemble Lib Rules -The per-package passes catalogue sources and sinks but never pair them. With the whole surface inventory in front of you, describe the join rules the next phase will build — one per vuln class, each wiring every source to that class's sinks, mirroring the built-in security rules +The per-package passes author source and sink lib rules but never pair them across packages. With every created lib rule and the whole built-in set in front of you, write the security joins — one per vuln class, each merging the created rules with the built-ins, mirroring the built-in security rules. These are verified by the main scan, not a test project ## Inputs From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default -- Surface inventory `` — the discovered sources/sinks. Default: `.opentaint/tracking/surface.yaml` -- Tracking directory `` — where the join requirements are written. Default: `.opentaint/tracking` +- Lib units `` — the per-package lib tracking files (`rules/lib/.yaml`) with the created source/sink `rule_id`s and their vuln classes. Default: `.opentaint/tracking/rules/lib/` +- Rules directory `` — where the security joins are written. Default: `.opentaint/rules` +- Tracking directory `` — where the join records are written. Default: `.opentaint/tracking` Built-in rules are available at `opentaint health --rules` ## Workflow -### 1. Read the surface and the built-ins +### 1. Read the created lib rules and the built-ins -Read `` and the built-in rules (`opentaint health --rules`). Note which built-in source/sink lib rules already exist, to ref +Read every per-package lib unit in `` (the source/sink `rule_id`s create-rule wrote, sinks carrying their `vuln_class`) and the built-in source/sink lib rules (`opentaint health --rules`). Collect every source rule (built-in + created) and every sink rule grouped by vuln class -### 2. Group sinks by vuln class +### 2. Write one security join per vuln class -The sinks in `` carry a `vuln_class`; group them. A class needs a join requirement if it has a **new** sink, or if there's any **new** source (a new source must be wired to every class's sink group). Skip a class only when it has no new sink and there's no new source — the built-in join already covers it +For each vuln class that has a **created** (new) sink or for which there is any **created** source, write `/java/security/.yaml` (file and `id` = the class), `mode: join`, refing the relevant sources and sinks and wiring **only new-end combinations** in `on:`: -### 3. Describe one join requirement per class +- built-in sources + created sources → that class's **new** sinks +- created sources → that class's **built-in** sinks +- skip built-in source → built-in sink — the built-in join already covers it, so repeating it double-reports -For each class, write one `/rules/.yaml` (`` = the vuln class), naming: +```yaml +rules: + - id: ssrf + severity: ERROR + message: Untrusted data reaches an SSRF sink + metadata: + cwe: CWE-918 + short-description: SSRF via untrusted input + languages: [java] + mode: join + join: + refs: + - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + as: servlet-source # built-in source + - rule: java/lib/spring/webflux-request-source.yaml#webflux-request-source + as: webflux-source # created source + - rule: java/lib/spring/webclient-ssrf-sink.yaml#webclient-ssrf-sink + as: new-sink # created (new) sink + - rule: java/lib/generic/ssrf-sinks.yaml#java-ssrf-sink + as: builtin-sink # built-in sink + on: + - 'servlet-source.$UNTRUSTED -> new-sink.$UNTRUSTED' # built-in source → new sink + - 'webflux-source.$UNTRUSTED -> new-sink.$UNTRUSTED' # created source → new sink + - 'webflux-source.$UNTRUSTED -> builtin-sink.$UNTRUSTED' # created source → built-in sink +``` + +(no `servlet-source -> builtin-sink` line — the built-in join already covers that pair) -- every source (built-in refs + the new ones from ``) — a join aggregates them all, like the built-ins -- that class's sink group (built-in refs + new) -- every library the rule crosses under `dependencies` +### 3. Stop — the main scan verifies -A join wires only combinations with a **new** end (new source → any sink, any source → new sink); a built-in source → built-in sink pair is already covered by the built-in join, so leaving it out keeps the join from double-reporting +These joins carry no test project — the main scan applies them. Write them and stop; if the scan shows a join didn't load or fire, the orchestrator re-dispatches create-rule to fix it ## Output -- One `/rules/.yaml` per vuln-class join, with `stages.description: done`, its `sources`/`sinks`, and `dependencies` -- A brief summary to the caller: one line per join (class, source/sink count, which ends are new). The files hold the detail — don't paste it back +- One `/java/security/.yaml` per vuln-class join, refing the created + built-in lib rules +- One `/rules/join/.yaml` per join, with `stages.written: done` +- A brief summary to the caller: one line per join (class, source/sink count, which ends are new) ## Tracking -`/rules/.yaml` — the join requirement the next phase builds and tests: +`/rules/join/.yaml` — the security join, verified by the main scan: ```yaml -name: ssrf # = the vuln class; becomes the join rule's file and id -rule_id: null # filled later -artifact: null -finding: null -requirements: > - CWE-918 SSRF — join every untrusted-data source to the SSRF sink group -sources: # ref a built-in, or a new one to write +name: ssrf # the vuln class; the join rule's file and id +rule_id: java/security/ssrf.yaml:ssrf +artifact: .opentaint/rules/java/security/ssrf.yaml +sources: # built-in + created - ref: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source - - new: ServerRequest body/params — org.springframework.web.reactive.function.server -sinks: - - new: WebClient.get().uri($UNTRUSTED) — org.springframework.web.reactive.function.client; in DefaultAttachmentService / ProxyFilter -dependencies: - - org.springframework:spring-webflux:6.1.0 + - ref: java/lib/spring/webflux-request-source.yaml#webflux-request-source +sinks: # created + built-in + - new: java/lib/spring/webclient-ssrf-sink.yaml#webclient-ssrf-sink + - builtin: java/lib/generic/ssrf-sinks.yaml#java-ssrf-sink stages: - description: done - test_project: pending - tests_passing: pending + written: done + verified: pending # done once the main scan confirms it notes: > free-form ``` ## Gotchas -- Describe, don't write — emit requirements only; rules are written and tested in the next phase -- One join per vuln class, aggregating every source — don't write a separate join per source or per package -- Ref a built-in source or sink rather than re-declaring it +- One join per vuln class, aggregating every relevant source — don't write a separate join per source or per package +- Ref the existing lib rules (built-in + created); never re-declare a source or sink diff --git a/skills/create-pass-through-approximation/SKILL.md b/skills/create-pass-through-approximation/SKILL.md index fc2a4fdbc..903cfcf7c 100644 --- a/skills/create-pass-through-approximation/SKILL.md +++ b/skills/create-pass-through-approximation/SKILL.md @@ -99,6 +99,32 @@ passThrough: - .com.example.lib.Parser#parsed#java.lang.Object ``` +Full config — every function in one top-level `passThrough:` list (quote `[*]` — unquoted it parses as a YAML alias): +```yaml +passThrough: +- function: org.springframework.beans.MutablePropertyValues#add + copy: + - from: arg(1) + to: + - this + - .org.springframework.beans.PropertyValue#Value#java.lang.Object +- function: org.springframework.beans.PropertyValue#getValue + overrides: false + copy: + - from: + - this + - .org.springframework.beans.PropertyValue#Value#java.lang.Object + to: result +- function: org.springframework.beans.PropertyValues#getPropertyValues + copy: + - from: + - this + - .java.lang.Iterable#Element#java.lang.Object + to: + - result + - '[*]' +``` + ### 2. Verification is the scan There's no test project for passThrough. The main scan applies `` and the scan agent reports back. You're re-invoked to fix the config when that scan shows: @@ -135,11 +161,11 @@ Do not touch other stages or fields Position bases - `this`, `result`, `arg(0)`, `arg(1)`, … -- `any()` — a single argument bound consistently across every position in the rule; `class()` — a static field. Rare — prefer an explicit `arg(N)` +- `any()` — expands to every argument matching the classifier (a cartesian product across positions, bound consistently), not a single argument. Rare — prefer an explicit `arg(N)` Access-path modifiers (list form `[, ]`) - `.##` — a field or virtual slot; type it `java.lang.Object`. The slot name is arbitrary (a descriptive name, or the conventional `` for a generic carrier) -- `.[*]` — array / collection element +- `[*]` — array element (no leading dot). For `java.util` collections this does *not* carry element taint; route it through the conventional `.java.lang.Iterable#Element#java.lang.Object` slot instead (as the built-in `List`/`Collection` models do) Function matching - Simple: `package.Class#method` @@ -150,7 +176,10 @@ Overrides - `overrides: false`: exact class only Conditions (the only keys that load from YAML) -- `typeIs`, `annotatedWith`, `isConstant`, `isNull`, `constantMatches`, `constantEq`, `constantGt`, `constantLt`, `tainted`, `anyOf`, `allOf`, `not` +- take a `pos: `: `typeIs`, `annotatedWith`, `constantMatches`, `constantEq`, `tainted` +- take the position directly, no `pos:` field: `isConstant`, `isNull` — adding `pos:` fails to load +- nest other conditions: `anyOf`, `allOf`, `not` +- `constantGt` / `constantLt` load but crash the scan when actually evaluated against a constant (their string-typed bound fails an engine type-check) — avoid until fixed ## Gotchas diff --git a/skills/create-rule/SKILL.md b/skills/create-rule/SKILL.md index 03f059c97..af73ee74c 100644 --- a/skills/create-rule/SKILL.md +++ b/skills/create-rule/SKILL.md @@ -9,18 +9,19 @@ metadata: # Skill: Create Rule -Create a pattern rule for a vulnerability class, then test it against the prepared test project and fix it until every sample passes +Per package, author the new source/sink lib rules the requirements name, wire each to the generic `Taint` marker in a test join, and verify against the package's marker test projects until every sample passes -Two roles: a **source pass** writes the source lib rules the requirements name (a source isn't testable alone — the joins that ref it verify it later, so author and stop); a **sink pass**, per vuln class, writes that class's sink lib rules and the one join that refs every source, then tests +Two roles: the **main** one authors a package's lib rules (above); a **fix** narrows or broadens a created rule the main scan later flags. The cross-package security joins are written by assemble-lib-rules, not here ## Inputs From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default -- Requirements `` — what to detect (source, sink, vuln class); either a rule tracking file or an overall description -- Compiled test project `` — the compiled model to verify against. Default: `.opentaint/test-compiled/` (per rule/approximation ``) -- Rules directory `` — where rules are written. Default: `.opentaint/rules` -- Tracking file `` — the rule file. Default: `.opentaint/tracking/rules/.yaml` +- Requirements `` — the per-package lib unit naming the new sources/sinks (a tracking file), or for a fix the rule to change +- Compiled test projects `` — the marker models to verify against. Default: `.opentaint/test-compiled//sinks` and `.opentaint/test-compiled//sources` (`` = the package-kebab) +- Test project `` — the sources tree; the test joins go in each side's `//test-rules` (only `test rule run` loads them, never the main scan). Default: `.opentaint/test-projects/` +- Rules directory `` — where the lib rules are written. Default: `.opentaint/rules` +- Tracking file `` — the lib unit file. Default: `.opentaint/tracking/rules/lib/.yaml` - Approximation directories `` / `` (optional) — apply on a re-dispatch when the test project needs a library model that's now built. Default: none Built-in rules are available at `opentaint health --rules` @@ -86,47 +87,47 @@ rules: - focus-metavariable: $UNTRUSTED ``` -### 3. Create the security rule (join mode) +### 3. Write the test joins (against the generic marker) -One join per vuln class — write it at `/java/security/.yaml`, naming the file and `id` after the class (the requirement's `name`). Ref every source (built-in + the new per-package ones) and the class's sink group, and wire each source to the sink in `on:`, like the built-in `java/security/ssrf.yaml`: +A lib rule emits nothing alone — to exercise it you need a join. Write one test join per sub-project into its `test-rules/java/security/`, wiring your new lib rules to the generic `Taint` marker. These live only in the test project (never ``), so the main scan never loads them. Name each `-sinks` / `-sources` so the samples' `value`/`id` resolve: + +- `sinks/` → `-sinks`: ref the generic source + every new sink lib rule, wiring `src.$UNTRUSTED -> .$UNTRUSTED` for each +- `sources/` → `-sources`: ref every new source lib rule + the generic sink, wiring `.$UNTRUSTED -> sink.$VALUE` for each ```yaml +# .opentaint/test-projects//sinks/test-rules/java/security/-sinks.yaml rules: - - id: sql-injection + - id: -sinks severity: ERROR - message: >- - Untrusted data flows to a dangerous operation + message: Tainted value reaches a sink under test metadata: - cwe: CWE-89 - short-description: SQL Injection via untrusted input + cwe: CWE-000 + short-description: test join for the package's sinks languages: [java] mode: join join: refs: - - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source - as: servlet-source - - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source - as: spring-source - - rule: java/lib//my-new-sink.yaml#my-new-sink + - rule: java/lib/test/generic-source.yaml#generic-taint-source + as: src + - rule: java/lib//my-new-sink.yaml#my-new-sink as: sink on: - - 'servlet-source.$UNTRUSTED -> sink.$UNTRUSTED' - - 'spring-source.$UNTRUSTED -> sink.$UNTRUSTED' + - 'src.$UNTRUSTED -> sink.$UNTRUSTED' ``` -Wire only combinations with a new end — a built-in source → built-in sink pair is already covered by the built-in join, so repeating it here double-reports +The marker rules resolve from the sub-project's `test-rules` root, your lib rules from `` — pass both to `test rule run`. Metavariable names must match across `refs` and `on` ### 4. Test until success -Run the rule tests against the compiled test project; iterate the rule and re-run `test rule run` until every sample passes: +Run the tests against each compiled sub-project, loading your lib rules (``) and the test joins + markers (`//test-rules`); iterate until every sample passes: ```bash -opentaint test rule run \ - -o .opentaint/test-results/ \ - --ruleset +opentaint test rule run /sinks \ + -o .opentaint/test-results//sinks \ + --ruleset --ruleset /sinks/test-rules ``` -`test rule run` auto-loads the built-in rules, so pass only your custom `` — a literal `builtin` here would be treated as a path. When the caller passed `` / ``, append `--passthrough-approximations ` / `--dataflow-approximations ` — without them a library method the test flow relies on drops taint and the positive can't pass. Read `.opentaint/test-results//test-result.json`: +`test rule run` auto-loads the built-in rules, so pass only your custom rulesets — a literal `builtin` here would be treated as a path. When the caller passed `` / ``, append `--passthrough-approximations ` / `--dataflow-approximations ` — without them a library method the test flow relies on drops taint and the positive can't pass. Read `.opentaint/test-results//sinks/test-result.json`: - `falseNegative` (positive didn't trigger) → patterns too narrow; broaden `pattern-either`, check metavariable names match across branches and between `refs` and `on` - `falsePositive` (negative triggered) → patterns too broad; add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or `metavariable-regex` @@ -134,12 +135,12 @@ opentaint test rule run \ ### 5. When a positive won't pass after a couple of fixes -A `@PositiveRuleSample` that won't trigger after ~2 fix attempts may have a cause no rule edit can fix — a library method on its flow killing taint. Before escalating, scan your own test model with `--track-external-methods`: +A `@PositiveRuleSample` that won't trigger after ~2 fix attempts may have a cause no rule edit can fix — a library method on its flow killing taint. Before escalating, scan that sub-project's model with `--track-external-methods` (add the marker `test-rules` so the join resolves): ```bash -opentaint scan --project-model \ - -o .opentaint/test-results//diag.sarif \ - --ruleset builtin --ruleset \ +opentaint scan --project-model /sinks \ + -o .opentaint/test-results//sinks/diag.sarif \ + --ruleset builtin --ruleset --ruleset /sinks/test-rules \ --track-external-methods ``` @@ -148,24 +149,19 @@ Read `dropped-external-methods.yaml` next to it; either way leave `tests_passing - a dropped method on the failing sample's source→sink path → that's the cause, not the rule: report which methods need a model, to be approximated before you're re-dispatched - nothing dropped and no clear rule cause → report non-convergence for escalation, rather than editing blindly -### 6. Refining for a false positive (suppress-FP) - -The test project already pins the confirmed TPs as `@PositiveRuleSample` and reproduces the FP as a `@NegativeRuleSample` — refine only the rule. Narrow it (step 4's `falsePositive` handling) until the negative stops triggering while every positive still passes - ## Output -- The rule file(s) under `` -- Tracking updated: `rule_id`, `artifact`, `stages.tests_passing` (per Tracking) -- Report the full rule id, a one-line test summary, and the exact `test rule run` command used +- The new lib rule file(s) under ``, and the test join(s) under each test project's `test-rules/` +- Tracking updated: the lib rules' `rule_id`s/`artifact`, `stages.tests_passing` (per Tracking) +- Report the lib rule ids, a one-line test summary per sub-project, and the exact `test rule run` command used - If blocked (step 5): leave `tests_passing: pending` and report the cause instead ## Tracking -In ``, once the rule exists and its samples pass: +In ``, once the lib rules exist and every sub-project's samples pass: ```yaml -rule_id: java/security/my-vuln.yaml:my-vulnerability -artifact: .opentaint/rules/java/security/my-vuln.yaml +artifact: .opentaint/rules/java/lib/generic/my-sink.yaml stages: tests_passing: done ``` @@ -173,12 +169,12 @@ stages: ## Constraints - Library rules MUST have `options.lib: true` and `severity: NOTE` -- Security rules MUST have `metadata.cwe` and `metadata.short-description` +- Security rules (the joins) MUST have `metadata.cwe` and `metadata.short-description` - Source/sink metavariable names must match across `refs` and `on` clauses, or the join won't connect -- The `rule:` path in `refs` is relative to the ruleset root +- The `rule:` path in `refs` is relative to the ruleset root — a marker ref resolves under the test project's `test-rules`, a lib ref under `` - Rule IDs must be globally unique - For simple structural patterns (no dataflow), omit `mode:` (uses default mode) -- Custom library rules go under `/java/lib/generic/` or `/java/lib/spring/` (for Spring-specific), mirroring the built-in layout — never directly under `java/lib/` +- Custom library rules go under `/java/lib/generic/` or `/java/lib/spring/` (for Spring-specific), mirroring the built-in layout — never directly under `java/lib/`; the test joins go in the test project's `test-rules/java/security/`, never `` ## Gotchas diff --git a/skills/create-test-project/SKILL.md b/skills/create-test-project/SKILL.md index 8ef03cf1b..214d9102e 100644 --- a/skills/create-test-project/SKILL.md +++ b/skills/create-test-project/SKILL.md @@ -17,12 +17,12 @@ From the caller; if omitted, fall back to the default. Ask only when a required - What to test `` — a rule's requirements, or the package's methods to exercise - Project root `` — the real sources the requirements point into. Default: current directory -- Tracking file `` — the rule or approximation file this test serves. Default: `.opentaint/tracking/rules/.yaml` or `.opentaint/tracking/approximations/.yaml` -- Test project `` — sources. Default: `.opentaint/test-projects/` -- Compiled output `` — the model. Default: `.opentaint/test-compiled/` +- Tracking file `` — the rule or approximation file this test serves. Default: `.opentaint/tracking/rules/lib/.yaml` or `.opentaint/tracking/approximations/.yaml` +- Test project `` — sources. Default: `.opentaint/test-projects/` (a rule project holds a `sinks/` and/or `sources/` sub-project under it) +- Compiled output `` — the model. Default: `.opentaint/test-compiled/` (one model per sub-project: `/sinks`, `/sources`) - Dependencies — exact Maven coordinates the samples need; default: the `dependencies` list in ``; with no tracking file, derive them from the project's `build.gradle`/`pom.xml` -`` is the rule name for a rule, or the dataflow approximation unit (`-dataflow`, e.g. `reactor-core-publisher-dataflow`) for an approximation; the two never share a folder +`` is the package (``) for a rule, or the dataflow approximation unit (`-dataflow`, e.g. `reactor-core-publisher-dataflow`) for an approximation; the two never share a folder ## Workflow @@ -30,14 +30,16 @@ From the caller; if omitted, fall back to the default. Ask only when a required Pick the scaffold by shape, then pass each coordinate from the tracking file's `dependencies` as a `--dependency`: -- a rule → `test rule init` (Gradle build + the test-util jar) -- a dataflow approximation → `test approximation init` (the same, plus `Taint.java` and the fixed `approximation-rule.yaml` the harness applies) +- a rule → `test rule init` — scaffolds a `sinks/` and a `sources/` sub-project under ``, each with `Taint.java` (the generic `source()`/`sink()`) and the generic marker lib rules in its `test-rules/`. Pass `--sinks-only` / `--sources-only` for a package with only one side, so you get a single sub-project +- a dataflow approximation → `test approximation init` (Gradle build + the test-util jar, plus `Taint.java` and the fixed `approximation-rule.yaml` the harness applies) ```bash -# rule test project +# rule test projects — both sides (this package has new sinks and new sources) opentaint test rule init \ - --dependency "org.mybatis:mybatis:3.5.13" \ - --dependency "javax.servlet:javax.servlet-api:4.0.1" + --dependency "org.springframework:spring-webflux:6.1.0" +# sink-only package +opentaint test rule init --sinks-only \ + --dependency "org.mybatis:mybatis:3.5.13" # dataflow approximation test project opentaint test approximation init \ @@ -46,26 +48,32 @@ opentaint test approximation init \ ### 2. Read the real signatures, then write samples -The requirements name sources and sinks. For each new source and new sink, find it in `` and read its real method signature and annotations — the pattern matches on those, so a sample built on the wrong signature compiles but verifies nothing. The flow itself is minimal, not the app's real path: to exercise a new sink, pass a known (built-in) source's value straight into it; to exercise a new source, pass its value straight into a known (built-in) sink +The requirements name sources and sinks. For each new source and new sink, read its real method signature from the package jar in `.opentaint/project/dependencies` (with `javap`) — the pattern matches on that, so a sample built on the wrong signature compiles but verifies nothing. The flow is minimal, not the app's real path, and the counterpart is always the generic `Taint` marker (so types always fit — never a real source/sink): -Write Java samples under `/src/main/java/test/`, each annotated with its expected verdict — `@PositiveRuleSample` (must flag) or `@NegativeRuleSample` (must not). `value` is the rule path relative to the ruleset root (with `.yaml`), `id` the short id from the YAML — not the full `--rule-id` used by `opentaint scan`. One expected verdict per sample. Split the samples across files however groups most logically — don't cram unrelated ones into a single class +- a **sink** sample (in the `sinks/` sub-project): assign `test.Taint.source()` to a local of the sink argument's type, then pass it in — `String t = test.Taint.source(); pkg.theSink(t);` (the generic `source()` infers the type, no cast) +- a **source** sample (in the `sources/` sub-project): call the new source, then pass its value into `test.Taint.sink(...)` — `var v = pkg.theSource(); test.Taint.sink(v);` (`sink` takes `Object`, so any type fits) -What the positive and negative samples must contain depends on the shape — load and follow the matching reference: +Write Java samples under `//src/main/java/test/`, each annotated with its expected verdict — `@PositiveRuleSample` (must flag) or `@NegativeRuleSample` (must not). `value`/`id` point at that sub-project's test join, which create-rule writes: `value = "java/security/-sinks.yaml", id = "-sinks"` for sink samples, `-sources` for source samples (`` = the package-kebab). `value` is the rule path relative to the test-rules root, `id` the short id — not the full `--rule-id` used by `opentaint scan`. One expected verdict per sample -- a rule → `references/rule.md` -- a dataflow approximation → `references/approximation.md` +Load and follow `references/rule.md` (for a rule) or `references/approximation.md` (for a dataflow approximation) ### 3. Compile +Compile each project to its own model — a rule's `sinks/` and `sources/` sub-projects separately; an approximation's single project once: + ```bash +# rule +opentaint compile /sinks -o /sinks +opentaint compile /sources -o /sources +# approximation opentaint compile -o ``` -A clean compile is the deliverable. If it won't build, fix the samples or dependencies before handing off +A clean compile is the deliverable. If one won't build, fix that project's samples or dependencies before handing off ## Output -- A compiled test project (``) plus its sources (``); report both paths and the exact `compile` command used +- The compiled model(s) (``, per sub-project for a rule) plus their sources (``); report the paths and the exact `compile` command(s) used - The tracking file's `test_project` stage marked done (see Tracking) ## Tracking diff --git a/skills/create-test-project/references/rule.md b/skills/create-test-project/references/rule.md index 3b824fb56..20f4041c9 100644 --- a/skills/create-test-project/references/rule.md +++ b/skills/create-test-project/references/rule.md @@ -2,31 +2,36 @@ ## Samples -- `@PositiveRuleSample` — a minimal flow that must flag: a known (built-in) source's value passed straight into the new sink, or the new source's value into a known (built-in) sink — real signatures, no extra hops. One per new source and per new sink; `value`/`id` point at the join rule -- `@NegativeRuleSample` — the safe (sanitized or parameterized) variant of the same, which must not flag; or a confirmed false positive you're narrowing the rule against. Keep it realistic, not stripped to constants +The fixed counterpart is always the generic `Taint` marker (scaffolded by `test rule init`), never a real source/sink — so types fit cast-free and the sample only exercises the rule under test. + +- `@PositiveRuleSample` — a minimal flow that must flag, with real sink/source signatures and no extra hops: + - **sink** under test → ` t = test.Taint.source(); pkg.theSink(t);` — declare the local as the sink argument's type; the generic `source()` infers it, no cast + - **source** under test → `var v = pkg.theSource(); test.Taint.sink(v);` — `sink` takes `Object`, so any type fits + One positive per new sink (in `sinks/`) and per new source (in `sources/`); `value`/`id` point at that sub-project's test join (`-sinks` / `-sources`, `` = the package-kebab) +- `@NegativeRuleSample` — the safe (sanitized or parameterized) variant of the same, which must not flag. Keep it realistic, not stripped to constants ```java package test; import org.opentaint.sast.test.util.PositiveRuleSample; import org.opentaint.sast.test.util.NegativeRuleSample; -import javax.servlet.http.HttpServletRequest; import java.sql.Connection; import java.sql.Statement; -public class MyVulnTest { +// sinks/ sub-project — a SQL sink fed by the generic marker source +public class SqlSinkTest { private Connection db; - @PositiveRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") - public void vulnerable(HttpServletRequest req) throws Exception { - String input = req.getParameter("id"); + @PositiveRuleSample(value = "java/security/jdbc-sinks.yaml", id = "jdbc-sinks") + public void vulnerable() throws Exception { + String input = test.Taint.source(); // generic marker: infers String, no cast Statement stmt = db.createStatement(); stmt.executeQuery("SELECT * FROM users WHERE id = " + input); } - @NegativeRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") - public void safe(HttpServletRequest req) throws Exception { - String input = req.getParameter("id"); + @NegativeRuleSample(value = "java/security/jdbc-sinks.yaml", id = "jdbc-sinks") + public void safe() throws Exception { + String input = test.Taint.source(); var pstmt = db.prepareStatement("SELECT * FROM users WHERE id = ?"); pstmt.setString(1, input); pstmt.executeQuery(); @@ -34,10 +39,6 @@ public class MyVulnTest { } ``` -## Suppress-FP - -When narrowing a rule after triage confirms a false positive, add that FP as a `@NegativeRuleSample` and pin every confirmed true positive as a `@PositiveRuleSample`, so the rule edit can't silently drop a real finding. Then recompile - ## Spring-entry flows If the flow only fires through a Spring entry point (controller → bean → sink), a plain method sample will be a `falseNegative`. Use the multi-module Spring layout — read `spring-multimodule.md` and follow it diff --git a/skills/discover-attack-surface/SKILL.md b/skills/discover-attack-surface/SKILL.md index 9a8496d86..d32b53103 100644 --- a/skills/discover-attack-surface/SKILL.md +++ b/skills/discover-attack-surface/SKILL.md @@ -9,43 +9,45 @@ metadata: # Skill: Discover Attack Surface -Take one library the triage flagged and record the untrusted-data sources and dangerous sinks it introduces +Take one library the triage flagged, settle what the built-in rules already cover, and write the package's rule plan — the untrusted-data sources and dangerous sinks it introduces — for the next phase to build ## Inputs From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default - Package `` — the flagged library to drill (a `pending` entry in `coverage.yaml`) -- Project root `` — the project sources. Default: current directory +- Dependency jars `` — the project's resolved dependency jars, one per library. Default: `.opentaint/project/dependencies` - Project model `` — the built model. Default: `.opentaint/project` -- Tracking directory `` — where the coverage record and surface inventory live. Default: `.opentaint/tracking` -- Surface inventory `` — the running list of discovered sources/sinks. Default: `.opentaint/tracking/surface.yaml` +- Tracking directory `` — where the coverage record and the per-package lib units live. Default: `.opentaint/tracking` ## Workflow -Requires a built project model — without it you can miss entry points the analyzer actually sees +### 1. Settle built-in coverage first -### 1. Find how the project uses the package +Before enumerating anything, see what the built-ins already match for this package — read the lib rules (`opentaint health --rules`) plus `.opentaint/rules`. Decide one of: -Search through `` sources for ``'s imports and call sites. List the distinct methods of it the app calls — these, not the library's whole API, are the surface that matters +- **full** — the built-ins already match the package's relevant sources/sinks → write no lib unit, flip the `coverage.yaml` entry to `done` with a `builtin_coverage: full` note, and stop. Don't drill further +- **partial** — built-ins match some but miss methods/overloads/classes → plan only the missing ones (`coverage: expand`, ref the built-in for the rest) +- **none** — plan the package's surface from scratch -### 2. Identify sources and sinks +### 2. Enumerate sources and sinks from the package jar -Among the used methods, pick out the **sources** — methods returning attacker-controlled data (HTTP/RPC request data, message-broker payloads, second-order rows read back) — and the **sinks** — dangerous operations (query construction, command/file/path ops, deserialization, template/EL evaluation, LDAP/JNDI, reflection). Catalogue each end on its own; don't trace a flow between them — the analyzer pairs them at scan time +Find the package's jar in `` (match the artifact from the dependency GAV; `unzip -l | grep ` confirms it owns the package) and read its compiled API with `javap` / `unzip` — capture as many real sources and sinks as the package exposes, not just the ones the app happens to call today. Never read the analyzer jar — only dependency jars -For each, check whether a built-in rule already matches it (`opentaint health --rules` + `.opentaint/rules`); a built-in match records its ref instead of a new idea. Tag each sink with its vuln class (`ssrf`, `sqli`, `path-traversal`, …); sources aren't class-tagged +- **sources** — methods returning attacker-controlled data (HTTP/RPC request data, message-broker payloads, second-order rows read back); general, not class-tagged +- **sinks** — dangerous operations (query construction, command/file/path ops, deserialization, template/EL, LDAP/JNDI, reflection); tag each with its vuln class (`ssrf`, `sqli`, `path-traversal`, …) -Verify each is real before recording it: a source is genuinely attacker-controlled (a request param, header, body, or message payload is; an app constant or server config is not), a sink genuinely dangerous with tainted input (string-built SQL is; a parameterized query is not) +Verify each is real before recording: a source genuinely attacker-controlled, a sink genuinely dangerous with tainted input. Don't trace a flow between them — the analyzer pairs them at scan time -### 3. Record into the surface inventory +### 3. Write the package's rule plan -Append each source and sink to `` (schema below) — for a new one, a short idea of the pattern and where it lives; for a covered one, the built-in ref. Then flip the package's `coverage.yaml` entry to `status: done` with a one-line `notes`. Write it the moment you finish so the walk resumes cleanly +Write `/rules/lib/.yaml` — its new sources, its sinks grouped by `vuln_class`, the dependency GAV, `stages.description: done`, and each `coverage: new` or `expand`. Then flip the package's `coverage.yaml` entry to `status: done`. `` is the dotted package with `.` → `-`; the `package:` field keeps the real dotted name ## Output -- Sources and sinks the package introduces appended to `` +- A `/rules/lib/.yaml` rule plan (or, for `full` coverage, none — just the coverage note) - The package's `coverage.yaml` entry set `status: done` with a one-line `notes` -- A brief summary to the caller: the sources and sinks found (one line each, new vs built-in-covered). The inventory holds the detail — don't paste it back +- A brief summary to the caller: the sources and sinks planned (one line each, marked `new` / `expand`). The unit holds the detail — don't paste it back ## Tracking @@ -54,23 +56,33 @@ Append each source and sink to `` (schema below) — for a new one, a s ```yaml - package: org.springframework.web.reactive.function.client status: done - notes: WebClient request methods — SSRF sink not covered by built-ins; no new source + notes: WebClient request methods — SSRF sink; built-ins cover get(), expand with post()/put(); no new source ``` -`/surface.yaml` — append what the package introduces (`builtin: null` ⇒ new, to be written next phase): +`/rules/lib/.yaml` — the rule plan; fill only the discovery-stage fields (create-test-project and create-rule fill the rest): ```yaml -sources: # general untrusted-data sources - - package: org.springframework.web.reactive.function.server - idea: ServerRequest body/params/headers — untrusted request data; in RouterFunctions +package: org.springframework.web.reactive.function.client +dependencies: + - org.springframework:spring-webflux:6.1.0 +builtin_coverage: partial # partial | none +sources: # general, not class-tagged + - idea: ServerRequest body/params/headers — untrusted request data + coverage: new # new | expand builtin: null - dependency: org.springframework:spring-webflux:6.1.0 -sinks: # tagged by vuln class - - package: org.springframework.web.reactive.function.client - vuln_class: ssrf - idea: WebClient.get().uri($UNTRUSTED); in DefaultAttachmentService / ProxyFilter - builtin: null - dependency: org.springframework:spring-webflux:6.1.0 + rule_id: null +sinks: # grouped by vuln class + - vuln_class: ssrf + idea: WebClient.get/post/put().uri($UNTRUSTED) + coverage: expand + builtin: java/lib/generic/ssrf-sinks.yaml#java-ssrf-sink + rule_id: null +stages: + description: done + test_project: pending + tests_passing: pending +notes: > + free-form ``` ## Engine notes @@ -80,6 +92,5 @@ sinks: # tagged by vuln class ## Gotchas -- Describe, don't write — record source/sink ideas only; rules are written and tested in the next phase -- Don't re-declare a built-in source or sink — record its ref instead -- Don't grep dependency jars to find usage — read the app's own sources in `` +- Plan, don't write — record source/sink ideas only; the lib rules are written and tested in the next phase +- Don't re-declare a source or sink a built-in already matches — `coverage: expand` with only the missing methods, or fold it into `full` coverage diff --git a/skills/generate-poc/SKILL.md b/skills/generate-poc/SKILL.md index f6a2802bd..27fa85d5e 100644 --- a/skills/generate-poc/SKILL.md +++ b/skills/generate-poc/SKILL.md @@ -26,6 +26,8 @@ From the caller; if omitted, fall back to the default. Ask only when a required Reuse `` if given. Otherwise build and start the app the way the project expects (`spring-boot:run`, `java -jar`, `docker compose`, …), wait until it's listening, and note the base URL. The PoC must hit a live instance +When the app needs backing services (DB, broker, cache, …), bring them all up with one `docker compose` on a shared network rather than starting each by hand, and register it as a single `compose` entry + Bind to `127.0.0.1` (`--server.address=127.0.0.1`, `docker run -p 127.0.0.1:8080:8080`, a compose override on the port mapping) — never `0.0.0.0` or a public interface: a live exploit must not be reachable off-host. A specific non-local IP is fine when the test genuinely needs one, but never the public wildcard Once it's listening, record it in the registry (see § Tracking) so the orchestrator can reap it later From 957f1045076ae80d26473b3d15e91e361d770371 Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Fri, 5 Jun 2026 00:56:02 +0300 Subject: [PATCH 11/54] fix: gitignore --- cli/.gitignore | 2 +- .../example/rules/java/lib/test/generic-sink.yaml | 13 +++++++++++++ .../example/rules/java/lib/test/generic-source.yaml | 13 +++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 cli/internal/testrule/example/rules/java/lib/test/generic-sink.yaml create mode 100644 cli/internal/testrule/example/rules/java/lib/test/generic-source.yaml diff --git a/cli/.gitignore b/cli/.gitignore index 5a99dd502..8329bd621 100644 --- a/cli/.gitignore +++ b/cli/.gitignore @@ -3,7 +3,7 @@ bin/ !npm/bin/ dist/ dist-npm/ -lib/ +/lib/ /opentaint # Operating system files diff --git a/cli/internal/testrule/example/rules/java/lib/test/generic-sink.yaml b/cli/internal/testrule/example/rules/java/lib/test/generic-sink.yaml new file mode 100644 index 000000000..af016b3bc --- /dev/null +++ b/cli/internal/testrule/example/rules/java/lib/test/generic-sink.yaml @@ -0,0 +1,13 @@ +rules: + - id: generic-taint-sink + options: + lib: true + severity: NOTE + message: Generic dangerous-operation marker (test.Taint.sink) + metadata: + short-description: Generic test taint sink + languages: + - java + patterns: + - pattern: test.Taint.sink($VALUE) + - focus-metavariable: $VALUE diff --git a/cli/internal/testrule/example/rules/java/lib/test/generic-source.yaml b/cli/internal/testrule/example/rules/java/lib/test/generic-source.yaml new file mode 100644 index 000000000..8a1432fda --- /dev/null +++ b/cli/internal/testrule/example/rules/java/lib/test/generic-source.yaml @@ -0,0 +1,13 @@ +rules: + - id: generic-taint-source + options: + lib: true + severity: NOTE + message: Generic untrusted-data marker (test.Taint.source) + metadata: + short-description: Generic test taint source + languages: + - java + patterns: + - pattern: | + $UNTRUSTED = test.Taint.source(); From be96c68aa247907221aa140fbdabfbff62185ac3 Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Fri, 5 Jun 2026 02:24:52 +0300 Subject: [PATCH 12/54] feat: add some improvements --- skills/appsec-agent/SKILL.md | 35 ++++++----- .../appsec-agent/references/approximations.md | 2 + .../appsec-agent/references/discover-rules.md | 2 +- skills/assemble-lib-rules/SKILL.md | 62 ++++++++++--------- skills/create-dataflow-approximation/SKILL.md | 2 + .../SKILL.md | 19 +++++- skills/create-rule/SKILL.md | 5 +- skills/create-test-project/SKILL.md | 2 + skills/debug-rule/SKILL.md | 3 + 9 files changed, 85 insertions(+), 47 deletions(-) diff --git a/skills/appsec-agent/SKILL.md b/skills/appsec-agent/SKILL.md index c88c9bc7b..2ebf94396 100644 --- a/skills/appsec-agent/SKILL.md +++ b/skills/appsec-agent/SKILL.md @@ -27,7 +27,7 @@ After installing, run `opentaint health` to confirm the autobuilder/analyzer/rul ## Choose a workflow -Begin by asking the user two things — two separate AskUserQuestion calls, scan level then triage level. Record the chosen `scan_level` and `triage_level` in `state.yaml`: +Begin by asking the user both things in a single AskUserQuestion call — two questions, scan level and triage level, presented together (never one call then another). Record the chosen `scan_level` and `triage_level` in `state.yaml`: 1. Scan level — `lite` · `normal` · `deep` - lite — build + scan with existing rules @@ -82,13 +82,16 @@ Orchestration practices: ## Resource limits -A fan-out unit that compiles or scans (approximation creation, rule test-projects, discovery) each spawns a heavy `opentaint` JVM, so unbounded parallelism OOMs the machine. At run start, compute a concurrency cap and never dispatch more than that many such subagents at once: +Two limits apply to every fan-out — a global one against rate-limiting, and a tighter one against memory: -- cores — `nproc` (Linux) / `sysctl -n hw.ncpu` (macOS) -- free memory in GB — `free -g` (Linux, the `available` column) / `sysctl -n hw.memsize` ÷ 1024³ (macOS) -- cap = `min(cores, floor(free_GB / 4))`, floored at 1 — budget ~4 GB per concurrent JVM +- Global cap of 7 — never dispatch more than 7 subagents at once, of any kind. Bursting more reliably trips transient rate-limiting (a fan-out of 20 left half the agents rate-limited mid-run). It binds light and heavy agents alike +- RAM-heavy agents each spawn a heavy `opentaint` JVM, so they take a tighter memory bound on top of the global cap. The heavy set is exactly `build-project`, `run-scan`, `create-rule`, `create-dataflow-approximation`, and sometimes `debug-rule` (when it traces a real scan). Compute the bound at run start and never dispatch more than this many heavy subagents at once: + - cores — `nproc` (Linux) / `sysctl -n hw.ncpu` (macOS) + - free memory in GB — `free -g` (Linux, the `available` column) / `sysctl -n hw.memsize` ÷ 1024³ (macOS) + - `cap_heavy = max(1, min(cores, floor(free_GB / 2), 7))` — budget ~2 GB per concurrent JVM +- Every other agent is not RAM-bound — discover-attack-surface (reads jars + the built model), create-test-project (compiles once), triage-dependencies, analyze-external-methods, analyze-findings, create-pass-through-approximation, assemble-lib-rules, generate-poc. They're held only by the global cap of 7 -It's machine state, not run state — recompute on resume, don't track it. Light subagents that only read/write tracking (analyze-external-methods, analyze-findings, assemble-lib-rules) aren't bound by the cap; PoC is already sequential. +It's machine state, not run state — recompute on resume, don't track it. PoC is already sequential. ## State and resumption @@ -181,21 +184,23 @@ notes: > free-form ``` -rules/join/.yaml — one per vuln-class security join, written by assemble-lib-rules after the lib rules exist and verified by the main scan: +rules/join/.yaml — one file per vuln class, written by assemble-lib-rules after the lib rules exist and verified by the main scan. A join references exactly ONE sink rule, so a class with several sinks holds several joins — one entry under `joins:` per sink rule, each its own file/id: ```yaml -name: ssrf # the vuln class; the join rule's file and id -rule_id: java/security/ssrf.yaml:ssrf -artifact: .opentaint/rules/java/security/ssrf.yaml -sources: # built-in + created +name: ssrf +sources: - ref: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source - ref: java/lib/spring/webflux-request-source.yaml#webflux-request-source -sinks: # created + built-in - - new: java/lib/spring/webclient-ssrf-sink.yaml#webclient-ssrf-sink - - builtin: java/lib/generic/ssrf-sinks.yaml#java-ssrf-sink +joins: + - rule_id: java/security/ssrf-webclient-ssrf-sink-lib-ext.yaml:ssrf-webclient-ssrf-sink-lib-ext + artifact: .opentaint/rules/java/security/ssrf-webclient-ssrf-sink-lib-ext.yaml + sink: { new: java/lib/spring/webclient-ssrf-sink.yaml#webclient-ssrf-sink } + - rule_id: java/security/ssrf-java-ssrf-sink-lib-ext.yaml:ssrf-java-ssrf-sink-lib-ext + artifact: .opentaint/rules/java/security/ssrf-java-ssrf-sink-lib-ext.yaml + sink: { builtin: java/lib/generic/ssrf-sinks.yaml#java-ssrf-sink } stages: # pending | in_progress | done written: done - verified: pending # done once the main scan confirms it + verified: pending notes: > free-form ``` diff --git a/skills/appsec-agent/references/approximations.md b/skills/appsec-agent/references/approximations.md index 68a280b41..8db89ffea 100644 --- a/skills/appsec-agent/references/approximations.md +++ b/skills/appsec-agent/references/approximations.md @@ -1,5 +1,7 @@ # Approximation iteration +Every dropped method MUST end up either modeled (a passthrough/dataflow unit) or in `skipped.yaml` — no exceptions, no "good enough". This loop does not finish while any method in `dropped-external-methods.yaml` is still unclassified. Do not stop early because the important-looking ones are done, because a batch is large, or because the remaining methods seem minor — an unmodeled method silently kills taint and hides real findings. Keep iterating until the only thing left dropped is the skip set. + Loop until stabilization: 1. analyze-external-methods — Inputs: dropped-file `.opentaint/results/dropped-external-methods.yaml`, tracking-dir `.opentaint/tracking`, ``. Writes one `approximations/-passthrough.yaml` and/or `-dataflow.yaml` per package, plus `skipped.yaml`, only for methods not already in a unit. Returns one line per unit diff --git a/skills/appsec-agent/references/discover-rules.md b/skills/appsec-agent/references/discover-rules.md index b4e53bbc1..b5189499e 100644 --- a/skills/appsec-agent/references/discover-rules.md +++ b/skills/appsec-agent/references/discover-rules.md @@ -21,4 +21,4 @@ If create-rule reports the test project drops a library method on the rule's flo ## Assemble joins -Once the per-package lib rules are done, delegate assemble-lib-rules. Inputs: lib-units `.opentaint/tracking/rules/lib`, rules-dir `.opentaint/rules`, tracking-dir `.opentaint/tracking`. With every created lib rule in one view it writes one security join per vuln class — `tracking/rules/join/.yaml` + `.opentaint/rules/java/security/.yaml` — merging built-in + created sources with the new sinks, and created sources with built-in sinks (new-end combinations only). These carry no test project; the main scan verifies them (references/scan.md). One agent for the global view; fan out by vuln class only if there are many. +Once the per-package lib rules are done, delegate assemble-lib-rules. Inputs: lib-units `.opentaint/tracking/rules/lib`, rules-dir `.opentaint/rules`, tracking-dir `.opentaint/tracking`. With every created lib rule in one view it writes the security joins — one `tracking/rules/join/.yaml` per vuln class (listing its joins) plus one `.opentaint/rules/java/security/--lib-ext.yaml` per join (a join refs exactly one sink, so a class with several sinks yields several joins) — merging built-in + created sources with the new sinks, and created sources with built-in sinks (new-end combinations only). These carry no test project; the main scan verifies them (references/scan.md). One agent for the global view; fan out by vuln class only if there are many. diff --git a/skills/assemble-lib-rules/SKILL.md b/skills/assemble-lib-rules/SKILL.md index 43e935d55..f53d77ada 100644 --- a/skills/assemble-lib-rules/SKILL.md +++ b/skills/assemble-lib-rules/SKILL.md @@ -27,17 +27,24 @@ Built-in rules are available at `opentaint health --rules` Read every per-package lib unit in `` (the source/sink `rule_id`s create-rule wrote, sinks carrying their `vuln_class`) and the built-in source/sink lib rules (`opentaint health --rules`). Collect every source rule (built-in + created) and every sink rule grouped by vuln class -### 2. Write one security join per vuln class +### 2. Write one security join per (vuln class, sink rule) -For each vuln class that has a **created** (new) sink or for which there is any **created** source, write `/java/security/.yaml` (file and `id` = the class), `mode: join`, refing the relevant sources and sinks and wiring **only new-end combinations** in `on:`: +A join references exactly ONE right-hand (sink) rule — you cannot merge several sinks into one join. So a vuln class with more than one relevant sink becomes several joins: one per sink rule, each refing all the relevant sources on the left. Sources are many; the sink is always one. -- built-in sources + created sources → that class's **new** sinks -- created sources → that class's **built-in** sinks -- skip built-in source → built-in sink — the built-in join already covers it, so repeating it double-reports +For each vuln class, and within it each sink rule that needs new wiring, write `/java/security/--lib-ext.yaml` with `mode: join`, refing the relevant sources + that one sink, wiring only new-end combinations in `on:`: + +- a created (new) sink ← from every relevant source (built-in + created) +- a built-in sink ← from created sources only (built-in source → built-in sink is already covered by the built-in join — repeating it double-reports) + +Two rules that bite here: + +- Unique id — use `id: --lib-ext`, never the bare class name; a custom join named `ssrf`/`xxe`/`path-traversal` collides silently with the built-in join of that id and is dropped with no error (only the scan's rule statistics reveal it) +- Same metavariable both sides — every `on:` clause connects the metavariable both lib rules bind (`$UNTRUSTED` by convention) as `source.$UNTRUSTED -> sink.$UNTRUSTED`; don't invent a new name on either end, or the join won't connect ```yaml +# java/security/ssrf-webclient-ssrf-sink-lib-ext.yaml rules: - - id: ssrf + - id: ssrf-webclient-ssrf-sink-lib-ext severity: ERROR message: Untrusted data reaches an SSRF sink metadata: @@ -48,20 +55,17 @@ rules: join: refs: - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source - as: servlet-source # built-in source + as: servlet-source - rule: java/lib/spring/webflux-request-source.yaml#webflux-request-source - as: webflux-source # created source + as: webflux-source - rule: java/lib/spring/webclient-ssrf-sink.yaml#webclient-ssrf-sink - as: new-sink # created (new) sink - - rule: java/lib/generic/ssrf-sinks.yaml#java-ssrf-sink - as: builtin-sink # built-in sink + as: sink on: - - 'servlet-source.$UNTRUSTED -> new-sink.$UNTRUSTED' # built-in source → new sink - - 'webflux-source.$UNTRUSTED -> new-sink.$UNTRUSTED' # created source → new sink - - 'webflux-source.$UNTRUSTED -> builtin-sink.$UNTRUSTED' # created source → built-in sink + - 'servlet-source.$UNTRUSTED -> sink.$UNTRUSTED' + - 'webflux-source.$UNTRUSTED -> sink.$UNTRUSTED' ``` -(no `servlet-source -> builtin-sink` line — the built-in join already covers that pair) +The same class's built-in sink is a second file (`ssrf-java-ssrf-sink-lib-ext.yaml`), refing only the created sources → that built-in sink. The `#` comments in these examples are for you — don't copy them into the rules you write ### 3. Stop — the main scan verifies @@ -69,32 +73,34 @@ These joins carry no test project — the main scan applies them. Write them and ## Output -- One `/java/security/.yaml` per vuln-class join, refing the created + built-in lib rules -- One `/rules/join/.yaml` per join, with `stages.written: done` -- A brief summary to the caller: one line per join (class, source/sink count, which ends are new) +- One `/java/security/--lib-ext.yaml` per (vuln class, sink rule), each refing all relevant sources + its one sink +- One `/rules/join/.yaml` per vuln class, listing every join it produced, with `stages.written: done` +- A brief summary to the caller: one line per join (class, sink, source count, which ends are new) ## Tracking -`/rules/join/.yaml` — the security join, verified by the main scan: +`/rules/join/.yaml` — one file per vuln class, listing each join (one per sink rule), verified by the main scan: ```yaml -name: ssrf # the vuln class; the join rule's file and id -rule_id: java/security/ssrf.yaml:ssrf -artifact: .opentaint/rules/java/security/ssrf.yaml -sources: # built-in + created +name: ssrf +sources: - ref: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source - ref: java/lib/spring/webflux-request-source.yaml#webflux-request-source -sinks: # created + built-in - - new: java/lib/spring/webclient-ssrf-sink.yaml#webclient-ssrf-sink - - builtin: java/lib/generic/ssrf-sinks.yaml#java-ssrf-sink +joins: + - rule_id: java/security/ssrf-webclient-ssrf-sink-lib-ext.yaml:ssrf-webclient-ssrf-sink-lib-ext + artifact: .opentaint/rules/java/security/ssrf-webclient-ssrf-sink-lib-ext.yaml + sink: { new: java/lib/spring/webclient-ssrf-sink.yaml#webclient-ssrf-sink } + - rule_id: java/security/ssrf-java-ssrf-sink-lib-ext.yaml:ssrf-java-ssrf-sink-lib-ext + artifact: .opentaint/rules/java/security/ssrf-java-ssrf-sink-lib-ext.yaml + sink: { builtin: java/lib/generic/ssrf-sinks.yaml#java-ssrf-sink } stages: written: done - verified: pending # done once the main scan confirms it + verified: pending notes: > free-form ``` ## Gotchas -- One join per vuln class, aggregating every relevant source — don't write a separate join per source or per package +- One join references exactly one sink — a class with N relevant sinks yields N joins, each aggregating every relevant source; never pack two sinks into one join - Ref the existing lib rules (built-in + created); never re-declare a source or sink diff --git a/skills/create-dataflow-approximation/SKILL.md b/skills/create-dataflow-approximation/SKILL.md index d27a5dcab..6f13dfd07 100644 --- a/skills/create-dataflow-approximation/SKILL.md +++ b/skills/create-dataflow-approximation/SKILL.md @@ -121,6 +121,8 @@ Do not touch other stages or fields ## Constraints - Java 8 source compatibility +- Put the `@Approximate` classes in a neutral package (e.g. `com.example.approximations`) — never the target library's own package. Inside the library's package every bare FQN resolves to your approximation's non-generic class instead of the real type, breaking compilation wholesale +- Model every method and overload the unit lists, not only the shapes you happen to have a sample for — an under-covered unit silently drops taint through the overloads you skipped - One approximation class per target class — a strict bijection enforced at load (duplicates throw `IllegalArgumentException`). Built-in dataflow approximations are first-priority and presumed correct; you cannot override them — see Troubleshooting if debug-rule traces a kill to one - Method signatures must match the target class methods exactly - Don't unpack or grep the analyzer JAR for built-in models or signatures — its internals aren't a stable API; go through the CLI diff --git a/skills/create-pass-through-approximation/SKILL.md b/skills/create-pass-through-approximation/SKILL.md index 903cfcf7c..2ea2044c1 100644 --- a/skills/create-pass-through-approximation/SKILL.md +++ b/skills/create-pass-through-approximation/SKILL.md @@ -18,6 +18,7 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Methods to model `` — the target method(s) and what each propagates, from the tracking file's `methods` (all `type: passthrough`) - Tracking file `` — the passThrough approximation unit. Default: `.opentaint/tracking/approximations/.yaml` - Config output `` — where to write the passThrough approximation. Default: `.opentaint/pass-through/.yaml` +- Test model `` (optional) — any compiled model to dry-run the config against for a load/parse check. Default: `.opentaint/project` if it exists, else any `.opentaint/test-compiled/*` model ## Workflow @@ -125,7 +126,20 @@ passThrough: - '[*]' ``` -### 2. Verification is the scan +### 2. Optional — dry-run the config for load errors + +There's no dedicated load-check command, but if a compiled `` is present you can catch YAML load/parse errors before the main scan by running a quick scan with the config applied (won't verify propagation — there's no matching flow — only that the config loads): + +```bash +opentaint scan --project-model \ + -o .opentaint/test-results//passthrough-loadcheck.sarif \ + --ruleset builtin \ + --passthrough-approximations +``` + +A config error aborts the scan with the parse/load message — fix the YAML and re-run. Nice-to-have, not required; skip it when no model is around + +### 3. Verification is the scan There's no test project for passThrough. The main scan applies `` and the scan agent reports back. You're re-invoked to fix the config when that scan shows: @@ -135,7 +149,7 @@ There's no test project for passThrough. The main scan applies `` a Never invoke or grep the analyzer JAR — its internals aren't a stable API; for built-in rules use `opentaint health --rules`, for everything else the CLI -### 3. When the config won't converge +### 4. When the config won't converge After ~2 fix re-invocations without a clearer cause — matcher fields and `from`/`to` checked, writer/reader slots confirmed identical, the modeled method no longer in `dropped-external-methods.yaml`, but the scan still doesn't surface the flow — don't keep guessing. Report non-convergence to the caller; the orchestrator escalates to debug-rule for a fact-reachability trace of where taint dies @@ -183,6 +197,7 @@ Conditions (the only keys that load from YAML) ## Gotchas +- The `#` comments in the examples here are for you — don't copy them into the config you write; keep produced YAML comment-free - The approximation merges with built-ins at the rule level — a provided rule overrides a built-in only if it matches one. Don't redefine a method already in `approximated-external-methods.yaml` unless debug-rule shows the built-in isn't propagating taint here, then override deliberately - A wrong argument position copies the wrong value — point `from`/`to` at the tainted one - In doubt about how a method moves taint — which argument or field reaches the result — read the library's source rather than guessing diff --git a/skills/create-rule/SKILL.md b/skills/create-rule/SKILL.md index af73ee74c..682d76989 100644 --- a/skills/create-rule/SKILL.md +++ b/skills/create-rule/SKILL.md @@ -170,7 +170,7 @@ stages: - Library rules MUST have `options.lib: true` and `severity: NOTE` - Security rules (the joins) MUST have `metadata.cwe` and `metadata.short-description` -- Source/sink metavariable names must match across `refs` and `on` clauses, or the join won't connect +- Source/sink metavariable names must match across `refs` and `on` clauses, or the join won't connect; bind the tainted value as `$UNTRUSTED` in every lib source/sink rule, so the security joins assemble-lib-rules writes later reference one consistent name - The `rule:` path in `refs` is relative to the ruleset root — a marker ref resolves under the test project's `test-rules`, a lib ref under `` - Rule IDs must be globally unique - For simple structural patterns (no dataflow), omit `mode:` (uses default mode) @@ -182,4 +182,7 @@ stages: - A wrong argument position in `(..., $UNTRUSTED, ...)` focuses the wrong parameter — point `focus-metavariable` at the tainted one - Refine the rule, never the test project — don't edit or weaken samples here; if one is wrong, hand it back upstream - A positive that won't pass because a library method drops taint is not a rule bug — don't broaden the rule to force it; surface it for approximation (step 5) +- The `#` comments in the examples here are for you — don't copy them into the rule files you write; keep produced YAML comment-free +- An implicit-receiver pattern `this.method(...)` is unsupported ("Failed to transform pattern: ThisExpr") — match the unqualified call as a bare `method($X)` pattern instead +- A structural (no-source) sink and a taint-flow sink can't share one join id — the engine forbids one id being both; if a class needs both, split them into separate rules/joins - Don't unpack or grep the analyzer JAR for built-in rules — its internals aren't a stable API; read the YAMLs from `opentaint health --rules` diff --git a/skills/create-test-project/SKILL.md b/skills/create-test-project/SKILL.md index 214d9102e..54d8cf3da 100644 --- a/skills/create-test-project/SKILL.md +++ b/skills/create-test-project/SKILL.md @@ -91,4 +91,6 @@ Do not touch other stages or fields - One expected verdict per sample - One unit per `` folder — never write into another unit's project, so concurrent agents don't race +- The scaffold (`test rule init` / `test approximation init`) defaults to Java 8 — bump `source/targetCompatibility` when the samples use a library needing Java 17/21 (Spring 7, spring-data 4, Lucene 10, Jackson 3). Set `release` on the running JDK; a Gradle `toolchain{}` block fails here (only JDK 21 is locatable, with no download repo) +- A positive must route the marker `source()` into the sink — a sink whose only untrusted input is a bare method parameter with no in-sample source (e.g. `getValue(Expression e)`) can't be satisfied by any taint-flow join; feed the parameter from `test.Taint.source()` or the sample is unprovable - For library-method behavior the requirements don't pin down (does it sanitize? propagate taint?), read the dependency or its docs rather than guessing diff --git a/skills/debug-rule/SKILL.md b/skills/debug-rule/SKILL.md index 1dca302b5..7961e657f 100644 --- a/skills/debug-rule/SKILL.md +++ b/skills/debug-rule/SKILL.md @@ -59,6 +59,8 @@ A finding that appears here but not in the full run points to entry-point discov ### 4. Classify the cause +An engine bug is the least likely outcome by far — assume it last. Nearly every taint kill is a missing or wrong library model (an un-approximated method, or an approximation whose signature/from→to is off) or a rule defect; both are tedious to rule out, but that's not a reason to jump to "engine". Exhaust the first two before you even consider the third. + The killing instruction decides who owns the fix: - external library method → missing or broken model. If the method is NOT in `approximated-external-methods.yaml`, step 1 should have caught it (route to analyze-external-methods + create-*-approximation). If it IS listed (a built-in claims to model it) yet taint dies here, the built-in is wrong for this case — write your own override: passthrough overrides at the rule level, so prefer a passthrough config for the specific method; a dataflow override conflicts with built-ins at load, so fall back to passthrough on that method, or if only a dataflow shape can express the propagation, treat it as an engine issue @@ -77,5 +79,6 @@ None — diagnostic, writes no tracking file ## Gotchas +- Don't reach for an "engine" verdict because ruling out a model or rule cause is tedious — a missing/wrong approximation or a rule gap is overwhelmingly more likely. Classify engine only when the killing instruction is a plain propagation (assignment, cast, field read, an already-modeled call) with the model proven complete and the rule proven correct - One rule per fact-reachability run; across many rules the report is unusably huge - Debug the exact run that showed the problem — same model, rulesets, approximation dirs — or you debug something else; never swap the model mid-analysis From 670fd6c1502b55a904f31fdc22c8825c5411d6ed Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Fri, 5 Jun 2026 17:55:20 +0300 Subject: [PATCH 13/54] fix: improve approximations writing, add instructions about stored injections --- skills/analyze-external-methods/SKILL.md | 20 ++++++++-------- skills/appsec-agent/SKILL.md | 6 +++-- .../appsec-agent/references/approximations.md | 6 ++--- .../appsec-agent/references/discover-rules.md | 2 +- skills/appsec-agent/references/scan.md | 2 +- skills/create-dataflow-approximation/SKILL.md | 5 ++-- .../SKILL.md | 23 +++++++++++++++++-- skills/debug-rule/SKILL.md | 2 +- skills/discover-attack-surface/SKILL.md | 3 ++- skills/run-scan/SKILL.md | 2 +- skills/triage-dependencies/SKILL.md | 2 +- 11 files changed, 48 insertions(+), 25 deletions(-) diff --git a/skills/analyze-external-methods/SKILL.md b/skills/analyze-external-methods/SKILL.md index e223729f3..b73aa2bca 100644 --- a/skills/analyze-external-methods/SKILL.md +++ b/skills/analyze-external-methods/SKILL.md @@ -9,13 +9,13 @@ metadata: # Skill: Analyze External Methods -Read the methods where the analyzer killed taint, group them by library and kind, and record per group what to model and how — so the right skill can build each approximation +Read the methods where the analyzer lost track of the data, group them by library and kind, and record per group what to model and how — so the right skill can build each approximation ## Inputs From the caller; if omitted, fall back to the default. Ask only when a required input is missing and has no sensible default -- Dropped methods `` — methods where the analyzer killed dataflow facts for lack of a model. Default: `.opentaint/results/dropped-external-methods.yaml` +- Dropped methods `` — methods where the analyzer dropped the data for lack of a model. Default: `.opentaint/results/dropped-external-methods.yaml` - Tracking directory `` — where approximation tracking files are written. Default: `.opentaint/tracking` - Project root `` — sources and build files, to resolve which library owns each method. Default: current directory @@ -25,16 +25,16 @@ Requires ``, without it there's nothing to group ### 1. Group by package and kind -Every method in `` is a taint-killing path — model all of them. First decide each method's kind: +Every method in `` is a place the data is lost for lack of a model — model all of them. First decide each method's kind: -- passthrough — taint moves by a simple from→to copy: a getter, arg→result, builder, container field, collection `add`/`get`, `StringBuilder.append`, `Stream.collect` -- dataflow — taint flows through a lambda/callback/functional interface or an async chain +- passthrough — data moves by a simple from→to copy: a getter, arg→result, builder, container field, collection `add`/`get`, `StringBuilder.append`, `Stream.collect` +- dataflow — data flows through a lambda/callback/functional interface or an async chain Group by package AND kind — one tracking file per (package, kind): `-passthrough.yaml` for the simple copies, `-dataflow.yaml` for the lambda/callback/async ones. `` is the dotted Java package with `.` replaced by `-` (e.g. `reactor.core.publisher` → `reactor-core-publisher`) so it's filesystem-friendly; the YAML `package:` field keeps the real dotted name. Kind is the only split (no finer sub-groups). Each unit is one agent's work ### 2. Flag methods to skip -The one exception: a few methods the engine asks about don't carry taint — logging, metrics, sanitizers (e.g. `org.slf4j.Logger#info`). List those in `skipped.yaml` instead of an approximation group; the default call-to-return behavior is already correct for them +The one exception: a few methods the engine asks about don't affect the data flow — logging, metrics (e.g. `org.slf4j.Logger#info`). List those in `skipped.yaml` instead of an approximation group; the default call-to-return behavior is already correct for them ## Output @@ -53,7 +53,7 @@ stages: description: done written: pending notes: > - DTO getters returning tainted fields + DTO getters returning fields that carry the data methods: - target: "com.foo.Wrapper#getValue" type: passthrough @@ -69,14 +69,14 @@ stages: test_project: pending tests_passing: pending notes: > - Reactor operators carrying taint through the mapper + Reactor operators carrying data through the mapper methods: - target: "com.foo.Reactor#flatMap" type: dataflow ``` ```yaml -# skipped.yaml — engine asks to approximate these, but they don't carry taint +# skipped.yaml — engine asks to approximate these, but they don't affect the data flow methods: - "org.slf4j.Logger#info" - "org.slf4j.Logger#debug" @@ -84,6 +84,6 @@ methods: ## Gotchas -- Model every method in `` — each is a real taint-killing path; don't second-guess the list. The only exceptions are the obvious non-taint methods you move to `skipped.yaml` +- Model every method in `` — each is a real place the data is lost; don't second-guess the list. The only exceptions are the obvious methods that don't move data, which you move to `skipped.yaml` - Approximate only external library methods — never an application-internal class. If one shows up as a candidate, drop it - One file = one (package, kind) = one agent: passThrough and dataflow go in separate files; never put a method in two, or two agents collide diff --git a/skills/appsec-agent/SKILL.md b/skills/appsec-agent/SKILL.md index 2ebf94396..ff26283ca 100644 --- a/skills/appsec-agent/SKILL.md +++ b/skills/appsec-agent/SKILL.md @@ -105,7 +105,7 @@ On start, and after any compaction, reconstruct position from artifacts before d ## Tracking layout -The single source of truth for the tracking schema; each skill writes only its own slice (named in its block reference). +The single source of truth for the tracking schema; each skill writes only its own slice (named in its block reference). The `#` comments in the YAML below are for understanding only — never copy them into produced files. ``` .opentaint/tracking/ @@ -236,7 +236,7 @@ methods: # engine asks to approximate these, but they carry no ta project/ # built project model (project.yaml) rules/java/{lib/generic,lib/spring,security}/ # custom rules pass-through/.yaml # passThrough approximation configs - approximations// # code-based (dataflow) approximation sources, per unit + dataflow// # code-based (dataflow) approximation sources, per unit test-projects// # per-unit test project sources; a rule unit holds sinks/ and sources/ sub-projects, each with a test-rules/ (the generic markers + that side's test join — test-only, never loaded by the main scan) test-compiled// # per-unit compiled test model (a rule unit: sinks/ and sources/ models) test-results// # per-unit test outputs @@ -252,8 +252,10 @@ methods: # engine asks to approximate these, but they carry no ta ## Key constraints +- the engine models stored / second-order injection (data persisted then read back) on its own — no source, sink-side, or propagator needs to be added for the store→read path - approximations apply only to external library methods — never an application-internal class - `--passthrough-approximations` merges with built-ins at the rule level; a provided rule overrides a built-in only when it matches one already there — it does not replace the built-in set - both approximation dir flags walk the tree recursively, so the final scan points at the parent dirs and applies every unit - `--rule-id` drops every rule not named, including library `refs` — list them all when restricting - a custom approximation targeting a class that already has a built-in one errors at load +- a custom dataflow approximation overrides a passThrough for the same method — the passThrough→dataflow fallback when a passThrough won't converge; remove that method's passThrough config when re-planning it as dataflow, before the dataflow one is tested or scanned, to avoid override issues diff --git a/skills/appsec-agent/references/approximations.md b/skills/appsec-agent/references/approximations.md index 8db89ffea..ca53c10c1 100644 --- a/skills/appsec-agent/references/approximations.md +++ b/skills/appsec-agent/references/approximations.md @@ -7,9 +7,9 @@ Loop until stabilization: 1. analyze-external-methods — Inputs: dropped-file `.opentaint/results/dropped-external-methods.yaml`, tracking-dir `.opentaint/tracking`, ``. Writes one `approximations/-passthrough.yaml` and/or `-dataflow.yaml` per package, plus `skipped.yaml`, only for methods not already in a unit. Returns one line per unit 2. Fan out per unit (capped per SKILL.md § Resource limits — these units compile and scan): - passthrough → create-pass-through-approximation — Inputs: `` from the unit, ``, config-file `.opentaint/pass-through/.yaml`. Write-only; sets `written` + `artifact`. No test project - - dataflow → two sequential dispatches per unit: first create-test-project (dataflow shape) produces `.opentaint/test-compiled/` and sets `test_project: done`; on its return, dispatch create-dataflow-approximation against that model (approx-src `.opentaint/approximations/`) — sets `tests_passing` + `artifact` (`test approximation run` auto-applies its own fixed rule — nothing to pass) -3. Re-scan (references/scan.md) with both approximation dirs pointing at the parents (`.opentaint/pass-through`, `.opentaint/approximations`) -4. Pass-through verify (no separate skill): the scan agent reports any method you modeled that is still in `dropped-external-methods.yaml`, or any config load error. Re-invoke that package's create-pass-through-approximation agent to fix (matcher / from→to / YAML), then rescan. A dataflow method that still drops despite passing its isolated test is an escalation case (references/escalation.md), not a re-write + - dataflow → two sequential dispatches per unit: first create-test-project (dataflow shape) produces `.opentaint/test-compiled/` and sets `test_project: done`; on its return, dispatch create-dataflow-approximation against that model (approx-src `.opentaint/dataflow/`) — sets `tests_passing` + `artifact` (`test approximation run` auto-applies its own fixed rule — nothing to pass) +3. Re-scan (references/scan.md) with both approximation dirs pointing at the parents (`.opentaint/pass-through`, `.opentaint/dataflow`) +4. Pass-through verify (no separate skill): the scan agent reports any method you modeled that is still in `dropped-external-methods.yaml`, or any config load error. Re-invoke that package's create-pass-through-approximation agent to fix (matcher / from→to / YAML), then rescan. When that agent reports the passThrough won't converge (after ~2 fixes, no clear cause), don't keep re-invoking it — a passThrough copy can't express this method's propagation. Re-plan that method as a dataflow unit (drop its passThrough config first so the two don't collide) and run it through the create-test-project → create-dataflow-approximation pipeline; the custom dataflow overrides the passThrough. A dataflow method that still drops despite passing its isolated test is an escalation case (references/escalation.md), not a re-write 5. Stabilization: keep classifying until every method in `dropped-external-methods.yaml` is either modeled (a passthrough/dataflow unit) or listed in `skipped.yaml`, and a rescan surfaces no new dropped methods — i.e. the only thing left dropped is the skip set. Otherwise feed the newly dropped methods back into step 1 Set `phases.approximations: in_progress` across the loop, `done` at stabilization. diff --git a/skills/appsec-agent/references/discover-rules.md b/skills/appsec-agent/references/discover-rules.md index b5189499e..3d77a5ffb 100644 --- a/skills/appsec-agent/references/discover-rules.md +++ b/skills/appsec-agent/references/discover-rules.md @@ -15,7 +15,7 @@ Then a quick area cross-check: across network, persistence, environment, seriali Build the lib rules from the `tracking/rules/lib/.yaml` units. Fan out per package (capped per SKILL.md § Resource limits — each unit compiles and scans); each unit is a two-step pipeline, dispatched one step at a time after the prior step's artifact: 1. create-test-project — Inputs: `` = the lib unit's sources/sinks, ``, `` `.opentaint/tracking/rules/lib/.yaml`, test-project `.opentaint/test-projects/`, test-compiled `.opentaint/test-compiled/`, dependencies from the unit. Scaffolds the `sinks/` and/or `sources/` marker projects (`test rule init`, `--sinks-only`/`--sources-only` for a one-sided package), writes the generic-marker counterpart samples, compiles each sub-project. Sets `test_project: done` -2. create-rule — Inputs: requirements (the lib unit), test-compiled `.opentaint/test-compiled/`, test-project `.opentaint/test-projects/`, rules-dir `.opentaint/rules`, ``, and on a re-dispatch the approximation dirs `.opentaint/pass-through` / `.opentaint/approximations`. Writes the package's source lib rules + per-vuln-class sink lib rules into `.opentaint/rules`, the test joins against the markers into each test project's `test-rules`, and iterates `test rule run` per sub-project until every sample passes; sets `tests_passing: done` and the lib rules' `rule_id`s/`artifact` +2. create-rule — Inputs: requirements (the lib unit), test-compiled `.opentaint/test-compiled/`, test-project `.opentaint/test-projects/`, rules-dir `.opentaint/rules`, ``, and on a re-dispatch the approximation dirs `.opentaint/pass-through` / `.opentaint/dataflow`. Writes the package's source lib rules + per-vuln-class sink lib rules into `.opentaint/rules`, the test joins against the markers into each test project's `test-rules`, and iterates `test rule run` per sub-project until every sample passes; sets `tests_passing: done` and the lib rules' `rule_id`s/`artifact` If create-rule reports the test project drops a library method on the rule's flow, route the dropped methods through the approximation loop (references/approximations.md), then re-dispatch create-rule with the approximation dirs. If it reports non-convergence with nothing dropped, load references/escalation.md. Set `phases.rules: done` once every lib unit's `tests_passing` is done. diff --git a/skills/appsec-agent/references/scan.md b/skills/appsec-agent/references/scan.md index d25f9cc10..04ed80bbd 100644 --- a/skills/appsec-agent/references/scan.md +++ b/skills/appsec-agent/references/scan.md @@ -1,5 +1,5 @@ # Scan -Delegate run-scan. Inputs: model-dir `.opentaint/project`, ruleset `builtin` + `.opentaint/rules`, report `.opentaint/results/report.sarif`; on normal/deep also config-dir `.opentaint/pass-through` and approx-dir `.opentaint/approximations` (both dir flags walk the tree recursively, so the parents apply every unit). Require a concise return — finding counts per rule, the methods still in `dropped-external-methods.yaml` that sit on a source→sink path, and any config load/parse errors — not the SARIF body. The files persist on disk for the next steps. Set `phases.scan: done`. +Delegate run-scan. Inputs: model-dir `.opentaint/project`, ruleset `builtin` + `.opentaint/rules`, report `.opentaint/results/report.sarif`; on normal/deep also config-dir `.opentaint/pass-through` and approx-dir `.opentaint/dataflow` (both dir flags walk the tree recursively, so the parents apply every unit). Require a concise return — finding counts per rule, the methods still in `dropped-external-methods.yaml` that sit on a source→sink path, and any config load/parse errors — not the SARIF body. The files persist on disk for the next steps. Set `phases.scan: done`. On deep runs, if the scan flags an issue with a created rule — a rule that failed to load/parse, a join that should fire but didn't, or an own rule that false-positives — dispatch create-rule to fix that rule (references/discover-rules.md), then rescan before continuing. diff --git a/skills/create-dataflow-approximation/SKILL.md b/skills/create-dataflow-approximation/SKILL.md index 6f13dfd07..0953de1ea 100644 --- a/skills/create-dataflow-approximation/SKILL.md +++ b/skills/create-dataflow-approximation/SKILL.md @@ -17,7 +17,7 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Methods to model `` — the target method(s) and how taint flows through them, from the tracking file's `methods` (all `type: dataflow`) - Tracking file `` — the dataflow approximation unit (`-dataflow`, e.g. `reactor-core-publisher-dataflow`). Default: `.opentaint/tracking/approximations/.yaml` -- Approximation sources `` — this package's own directory for the `.java` approximation files. Default: `.opentaint/approximations/` +- Approximation sources `` — this package's own directory for the `.java` approximation files. Default: `.opentaint/dataflow/` - Compiled test project `` — the per-package compiled model to test against. Default: `.opentaint/test-compiled/` ## Workflow @@ -111,7 +111,7 @@ After ~2 fix attempts without a clearer cause — `@Approximate` target matches In ``, once the source exists and its sample passes: ```yaml -artifact: .opentaint/approximations//com/example/approximations/ReactiveProcessor.java +artifact: .opentaint/dataflow//com/example/approximations/ReactiveProcessor.java stages: tests_passing: done ``` @@ -120,6 +120,7 @@ Do not touch other stages or fields ## Constraints +- Also the passThrough fallback — when a passThrough for a method won't converge, the orchestrator re-plans it here; target the same dropped class and the dataflow approximation overrides the passThrough (the orchestrator removes the stale passThrough config before this one is tested) - Java 8 source compatibility - Put the `@Approximate` classes in a neutral package (e.g. `com.example.approximations`) — never the target library's own package. Inside the library's package every bare FQN resolves to your approximation's non-generic class instead of the real type, breaking compilation wholesale - Model every method and overload the unit lists, not only the shapes you happen to have a sample for — an under-covered unit silently drops taint through the overloads you skipped diff --git a/skills/create-pass-through-approximation/SKILL.md b/skills/create-pass-through-approximation/SKILL.md index 2ea2044c1..df501fb1c 100644 --- a/skills/create-pass-through-approximation/SKILL.md +++ b/skills/create-pass-through-approximation/SKILL.md @@ -86,6 +86,25 @@ passThrough: - .org.springframework.ldap.query.LdapQuery#filter#java.lang.Object ``` +Builder terminal — a no-arg `build()` / `toX()` that returns a new object carrying what the builder accumulated; no argument is involved, so copy each slot from `this` to the matching slot on `result` (the setters that filled the builder slot are separate rules of their own): +```yaml +passThrough: +- function: com.google.common.collect.ImmutableMap$Builder#build + copy: + - from: + - this + - .java.util.Map#MapKey#java.lang.Object + to: + - result + - .java.util.Map#MapKey#java.lang.Object + - from: + - this + - .java.util.Map#MapValue#java.lang.Object + to: + - result + - .java.util.Map#MapValue#java.lang.Object +``` + Conditional propagation — gate a rule with a `condition` (the copy still routes through a slot): ```yaml passThrough: @@ -151,7 +170,7 @@ Never invoke or grep the analyzer JAR — its internals aren't a stable API; for ### 4. When the config won't converge -After ~2 fix re-invocations without a clearer cause — matcher fields and `from`/`to` checked, writer/reader slots confirmed identical, the modeled method no longer in `dropped-external-methods.yaml`, but the scan still doesn't surface the flow — don't keep guessing. Report non-convergence to the caller; the orchestrator escalates to debug-rule for a fact-reachability trace of where taint dies +After ~2 fix re-invocations without a clearer cause — matcher fields and `from`/`to` checked, writer/reader slots confirmed identical, the modeled method no longer in `dropped-external-methods.yaml`, but the scan still doesn't surface the flow — don't keep guessing at the copy. Report non-convergence to the caller: a passThrough can't express this method's propagation, so the fix is a dataflow approximation for it (a custom dataflow overrides the passThrough). The orchestrator re-plans the method as a dataflow unit and removes this passThrough config before the dataflow one is tested ## Output @@ -190,7 +209,7 @@ Overrides - `overrides: false`: exact class only Conditions (the only keys that load from YAML) -- take a `pos: `: `typeIs`, `annotatedWith`, `constantMatches`, `constantEq`, `tainted` +- take a `pos: `: `typeIs`, `constantMatches`, `constantEq`, `tainted` - take the position directly, no `pos:` field: `isConstant`, `isNull` — adding `pos:` fails to load - nest other conditions: `anyOf`, `allOf`, `not` - `constantGt` / `constantLt` load but crash the scan when actually evaluated against a constant (their string-typed bound fails an engine type-check) — avoid until fixed diff --git a/skills/debug-rule/SKILL.md b/skills/debug-rule/SKILL.md index 7961e657f..c938e35f5 100644 --- a/skills/debug-rule/SKILL.md +++ b/skills/debug-rule/SKILL.md @@ -20,7 +20,7 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Ruleset `` — Default: `builtin` plus `.opentaint/rules` - Output directory `` — where the debug SARIF lands. Default: `.opentaint/test-results/` for a test model, or `.opentaint/results` for a main scan - Dropped external methods `` — the list from the run that showed the problem. Default: `dropped-external-methods.yaml` next to that run's SARIF -- Approximation directories `` / `` (optional) — apply when the behavior depends on them, so the debug run matches the run that showed the problem. Default: `.opentaint/pass-through`, `.opentaint/approximations` +- Approximation directories `` / `` (optional) — apply when the behavior depends on them, so the debug run matches the run that showed the problem. Default: `.opentaint/pass-through`, `.opentaint/dataflow` ## Workflow diff --git a/skills/discover-attack-surface/SKILL.md b/skills/discover-attack-surface/SKILL.md index d32b53103..b1b4b74b2 100644 --- a/skills/discover-attack-surface/SKILL.md +++ b/skills/discover-attack-surface/SKILL.md @@ -34,7 +34,7 @@ Before enumerating anything, see what the built-ins already match for this packa Find the package's jar in `` (match the artifact from the dependency GAV; `unzip -l | grep ` confirms it owns the package) and read its compiled API with `javap` / `unzip` — capture as many real sources and sinks as the package exposes, not just the ones the app happens to call today. Never read the analyzer jar — only dependency jars -- **sources** — methods returning attacker-controlled data (HTTP/RPC request data, message-broker payloads, second-order rows read back); general, not class-tagged +- **sources** — the exact place untrusted data first enters from a boundary (network, persistence, serialization, messaging, execution): a method that *returns* attacker-controlled data — HTTP/RPC request data, a message-broker payload. NOT a method that merely passes data it was handed along — that's a propagator the engine already handles, not a source. General, not class-tagged - **sinks** — dangerous operations (query construction, command/file/path ops, deserialization, template/EL, LDAP/JNDI, reflection); tag each with its vuln class (`ssrf`, `sqli`, `path-traversal`, …) Verify each is real before recording: a source genuinely attacker-controlled, a sink genuinely dangerous with tainted input. Don't trace a flow between them — the analyzer pairs them at scan time @@ -89,6 +89,7 @@ notes: > - Spring projects: the analyzer auto-discovers Spring endpoints, so `network` inbound sources are largely ones the built-ins already see — focus on the sinks - Generic projects: the analyzer treats all public/protected methods of public classes as entry points +- Stored / second-order injection (data persisted then read back) is modeled by the engine on its own — don't plan a source for the read-back or a propagator for the store→read path ## Gotchas diff --git a/skills/run-scan/SKILL.md b/skills/run-scan/SKILL.md index 4aafe7073..0f9bd32f1 100644 --- a/skills/run-scan/SKILL.md +++ b/skills/run-scan/SKILL.md @@ -20,7 +20,7 @@ From the caller; if omitted, fall back to the default. Ask only when a required - Rule IDs `` (optional) — full IDs to restrict the scan to, omit to run all loaded rules - SARIF output `` — Default: `.opentaint/results/report.sarif` - PassThrough config `` (optional) — a passThrough YAML file or a directory of them. Default: `.opentaint/pass-through` -- Dataflow approximations directory `` (optional) — Default: `.opentaint/approximations` +- Dataflow approximations directory `` (optional) — Default: `.opentaint/dataflow` ## Workflow diff --git a/skills/triage-dependencies/SKILL.md b/skills/triage-dependencies/SKILL.md index 4a2cd5a95..633e30bfa 100644 --- a/skills/triage-dependencies/SKILL.md +++ b/skills/triage-dependencies/SKILL.md @@ -27,7 +27,7 @@ Read `/project.yaml` — its `dependencies:` is every jar on the clas ### 2. Mark each library -For each library decide: could it introduce an attacker-controlled source (e.g. HTTP/RPC request data, message-broker payloads, second-order rows read back and so on) or a dangerous sink (e.g. query construction, command/file/path ops, deserialization, template/EL, LDAP/JNDI, reflection and so on)? +For each library decide: could it introduce an attacker-controlled source (e.g. HTTP/RPC request data, message-broker payloads and so on) or a dangerous sink (e.g. query construction, command/file/path ops, deserialization, template/EL, LDAP/JNDI, reflection and so on)? - clearly irrelevant — build/Gradle plugins, logging, annotations, bytecode tooling (ASM, byte-buddy), test libraries, pure data structures: dismiss - clearly relevant — web frameworks, query/ORM libraries, HTTP clients, deserializers, template engines, LDAP/JNDI, scripting: flag From 5492e867b22be9ed2ce5e8e259c9504a909c60af Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Sat, 6 Jun 2026 15:21:15 +0200 Subject: [PATCH 14/54] fix(cli): resolve bundled lib/jre for FHS install layout The bare `opentaint` binary failed to auto-load the bundled analyzer, autobuilder and rules, falling back to ~/.opentaint/install/lib instead. GetBundledLibPath/GetBundledJREPath resolved artifacts at /lib, which only matches the flat managed layout (~/.opentaint/install/). The `make install` FHS layout puts the binary in /bin and artifacts in /lib (siblings), so the bundled tier never matched. Only the opentaint-dev wrapper worked, via its explicit --analyzer-jar ../lib flag. resolveBundledDir now checks both / (flat) and /../ (FHS), preferring whichever exists. Also bundle the ruleset in `make install` (rules/ruleset -> $(LIBDIR)/rules) so all three artifacts ship in the install lib and load relative to the binary. --- Makefile | 5 +++ cli/internal/utils/bundled_path_test.go | 60 +++++++++++++++++++++++++ cli/internal/utils/opentaint_home.go | 35 +++++++++++---- 3 files changed, 92 insertions(+), 8 deletions(-) create mode 100644 cli/internal/utils/bundled_path_test.go diff --git a/Makefile b/Makefile index ada6d30ea..3c4eb4c52 100644 --- a/Makefile +++ b/Makefile @@ -18,8 +18,10 @@ TEST_UTIL_TASK := :opentaint-sast-test-util:jar ANALYZER_JAR := $(CORE_DIR)/build/libs/opentaint-project-analyzer.jar AUTOBUILDER_JAR := $(CORE_DIR)/opentaint-jvm-autobuilder/build/libs/opentaint-project-auto-builder.jar TEST_UTIL_JAR := $(CORE_DIR)/opentaint-sast-test-util/build/libs/opentaint-sast-test-util.jar +RULES_SRC := rules/ruleset INSTALLED_ANALYZER_JAR := $(LIBDIR)/$(notdir $(ANALYZER_JAR)) INSTALLED_AUTOBUILDER_JAR := $(LIBDIR)/$(notdir $(AUTOBUILDER_JAR)) +INSTALLED_RULES_DIR := $(LIBDIR)/rules INSTALLED_CLI_BINARY := $(BINDIR)/$(CLI_BINARY_NAME) INSTALLED_DEV_BINARY := $(BINDIR)/$(CLI_DEV_BINARY_NAME) @@ -47,6 +49,9 @@ install: core cli $(INSTALL) -m 0644 $(ANALYZER_JAR) $(INSTALLED_ANALYZER_JAR) $(INSTALL) -m 0644 $(AUTOBUILDER_JAR) $(INSTALLED_AUTOBUILDER_JAR) $(INSTALL) -m 0644 $(TEST_UTIL_JAR) $(LIBDIR)/$(notdir $(TEST_UTIL_JAR)) + rm -rf $(INSTALLED_RULES_DIR) + mkdir -p $(INSTALLED_RULES_DIR) + cp -R $(RULES_SRC)/. $(INSTALLED_RULES_DIR)/ printf '%s\n' \ '#!/bin/sh' \ 'set -eu' \ diff --git a/cli/internal/utils/bundled_path_test.go b/cli/internal/utils/bundled_path_test.go new file mode 100644 index 000000000..c02f4693f --- /dev/null +++ b/cli/internal/utils/bundled_path_test.go @@ -0,0 +1,60 @@ +package utils + +import ( + "os" + "path/filepath" + "testing" +) + +// FHS layout: `make install` puts the binary in /bin and the artifacts +// in /lib, so lib is a sibling of the binary's directory. +func TestResolveBundledDir_FHSLayout(t *testing.T) { + prefix := t.TempDir() + binDir := filepath.Join(prefix, "bin") + libDir := filepath.Join(prefix, "lib") + if err := os.MkdirAll(binDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(libDir, 0o755); err != nil { + t.Fatal(err) + } + + if got := resolveBundledDir(binDir, "lib"); got != libDir { + t.Errorf("resolveBundledDir(FHS) = %q, want %q (sibling lib)", got, libDir) + } +} + +// Flat layout: the managed install (~/.opentaint/install/) keeps the binary, +// lib/ and jre/ in the same directory. +func TestResolveBundledDir_FlatLayout(t *testing.T) { + dir := t.TempDir() + libDir := filepath.Join(dir, "lib") + if err := os.MkdirAll(libDir, 0o755); err != nil { + t.Fatal(err) + } + + if got := resolveBundledDir(dir, "lib"); got != libDir { + t.Errorf("resolveBundledDir(flat) = %q, want %q", got, libDir) + } +} + +// When neither layout has the directory, fall back to the flat path so callers +// keep a stable default probe/download target (preserves prior behavior). +func TestResolveBundledDir_NoneFallsBackToFlat(t *testing.T) { + binDir := filepath.Join(t.TempDir(), "bin") + if err := os.MkdirAll(binDir, 0o755); err != nil { + t.Fatal(err) + } + + got := resolveBundledDir(binDir, "jre") + want := filepath.Join(binDir, "jre") + if got != want { + t.Errorf("resolveBundledDir(none) = %q, want %q (flat default)", got, want) + } +} + +func TestResolveBundledDir_EmptyExeDir(t *testing.T) { + if got := resolveBundledDir("", "lib"); got != "" { + t.Errorf("resolveBundledDir(\"\") = %q, want empty", got) + } +} diff --git a/cli/internal/utils/opentaint_home.go b/cli/internal/utils/opentaint_home.go index 44377f4b4..f34c13fef 100644 --- a/cli/internal/utils/opentaint_home.go +++ b/cli/internal/utils/opentaint_home.go @@ -50,22 +50,41 @@ func exeDir() string { return filepath.Dir(exe) } +// resolveBundledDir locates a bundled artifact directory (e.g. "lib" or "jre") +// relative to the binary, supporting both supported install layouts: +// +// - flat: / — the managed install (~/.opentaint/install/) keeps +// the binary, lib/ and jre/ in the same directory. +// - FHS: /../ — `make install` puts the binary in /bin +// and artifacts in /lib, so the directory is a sibling of bin/. +// +// The first layout whose directory exists wins. When neither exists it falls +// back to the flat path so callers keep a stable default probe/download target. +// Returns empty string if exeDir is empty (executable path undeterminable). +func resolveBundledDir(exeDir, name string) string { + if exeDir == "" { + return "" + } + flat := filepath.Join(exeDir, name) + if pathExists(flat) { + return flat + } + if sibling := filepath.Join(exeDir, "..", name); pathExists(sibling) { + return sibling + } + return flat +} + // GetBundledLibPath returns the path to the bundled lib directory next to the binary. // Returns empty string if the path cannot be determined. func GetBundledLibPath() string { - if dir := exeDir(); dir != "" { - return filepath.Join(dir, "lib") - } - return "" + return resolveBundledDir(exeDir(), "lib") } // GetBundledJREPath returns the path to the bundled JRE directory next to the binary. // Returns empty string if the path cannot be determined. func GetBundledJREPath() string { - if dir := exeDir(); dir != "" { - return filepath.Join(dir, "jre") - } - return "" + return resolveBundledDir(exeDir(), "jre") } // GetInstallDir returns the path to ~/.opentaint/install/. From 9c27966f7c5a2cdafdd5f57bf8f6f3138bc1b2cf Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Sat, 6 Jun 2026 15:53:40 +0200 Subject: [PATCH 15/54] fix(cli): show bundled artifacts as "custom" not a nominal version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When scanning with a make-install (FHS) build, the header printed `Bundled ruleset: rules/v0.2.0` — a nominal version that looked like the stock release even though the bundled lib holds the user's own modified rules. Same for the analyzer/autobuilder jars. ArtifactDisplayVersion is now tier-aware: an artifact resolved from the bundled tier (the lib next to the binary, which the user controls) renders as `custom ()`, since its nominal version may not match its content. Managed install/cache releases still display their version string. Adds resolveArtifactTier (resolveArtifactPath now delegates to it) and a bundled-tier display test case. --- cli/internal/utils/display_version.go | 19 ++++++++++++++----- cli/internal/utils/display_version_test.go | 21 +++++++++++++++++---- cli/internal/utils/opentaint_home.go | 20 ++++++++++++++------ 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/cli/internal/utils/display_version.go b/cli/internal/utils/display_version.go index ce585a205..32e2e1a25 100644 --- a/cli/internal/utils/display_version.go +++ b/cli/internal/utils/display_version.go @@ -7,17 +7,26 @@ import ( ) func ArtifactDisplayVersion(def globals.ArtifactDef, jarPathOverride string) string { - resolvedPath := "" - if jarPathOverride == "" && def.Version == "" { - resolvedPath, _ = resolveArtifactPath(def) + resolvedTier, resolvedPath := "", "" + if jarPathOverride == "" { + resolvedTier, resolvedPath, _ = resolveArtifactTier(def) } - return displayVersion(def.Version, jarPathOverride, resolvedPath) + return displayVersion(def.Version, jarPathOverride, resolvedTier, resolvedPath) } -func displayVersion(version, overridePath, resolvedPath string) string { +// displayVersion renders an artifact's display label: +// - an explicit jar-path override always wins -> custom () +// - resolved from the bundled tier (a user-controlled build next to the binary, +// whose nominal version may not match its actual content) -> custom () +// - an empty/unpinned version -> custom () +// - otherwise (a managed install/cache release) -> the version string +func displayVersion(version, overridePath, resolvedTier, resolvedPath string) string { if overridePath != "" { return customLabel(overridePath) } + if resolvedTier == TierBundled { + return customLabel(resolvedPath) + } if version == "" { return customLabel(resolvedPath) } diff --git a/cli/internal/utils/display_version_test.go b/cli/internal/utils/display_version_test.go index 2d612f3b4..99c601a22 100644 --- a/cli/internal/utils/display_version_test.go +++ b/cli/internal/utils/display_version_test.go @@ -11,13 +11,15 @@ func TestDisplayVersion(t *testing.T) { name string version string overridePath string + resolvedTier string resolvedPath string want string }{ { - name: "pinned version, no override", + name: "pinned version, no override, managed install tier", version: "analyzer/2026.05.27.68ab20a", overridePath: "", + resolvedTier: TierInstall, resolvedPath: "/opt/opentaint/lib/opentaint-project-analyzer.jar", want: "analyzer/2026.05.27.68ab20a", }, @@ -25,6 +27,7 @@ func TestDisplayVersion(t *testing.T) { name: "jar-path override wins over a present version", version: "analyzer/2026.05.27.68ab20a", overridePath: "/home/dev/build/analyzer.jar", + resolvedTier: TierBundled, resolvedPath: "/home/dev/build/analyzer.jar", want: "custom (/home/dev/build/analyzer.jar)", }, @@ -32,6 +35,7 @@ func TestDisplayVersion(t *testing.T) { name: "empty pin falls back to resolved path", version: "", overridePath: "", + resolvedTier: TierCache, resolvedPath: "/opt/opentaint/lib/opentaint-project-analyzer.jar", want: "custom (/opt/opentaint/lib/opentaint-project-analyzer.jar)", }, @@ -39,16 +43,25 @@ func TestDisplayVersion(t *testing.T) { name: "override takes precedence over empty pin", version: "", overridePath: "/home/dev/build/analyzer.jar", + resolvedTier: TierInstall, resolvedPath: "/opt/opentaint/lib/opentaint-project-analyzer.jar", want: "custom (/home/dev/build/analyzer.jar)", }, + { + name: "bundled tier shows custom path even with a pinned version", + version: "rules/v0.2.0", + overridePath: "", + resolvedTier: TierBundled, + resolvedPath: "/opt/opentaint/lib/rules", + want: "custom (/opt/opentaint/lib/rules)", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := displayVersion(tt.version, tt.overridePath, tt.resolvedPath) + got := displayVersion(tt.version, tt.overridePath, tt.resolvedTier, tt.resolvedPath) if got != tt.want { - t.Errorf("displayVersion(%q, %q, %q) = %q, want %q", - tt.version, tt.overridePath, tt.resolvedPath, got, tt.want) + t.Errorf("displayVersion(%q, %q, %q, %q) = %q, want %q", + tt.version, tt.overridePath, tt.resolvedTier, tt.resolvedPath, got, tt.want) } }) } diff --git a/cli/internal/utils/opentaint_home.go b/cli/internal/utils/opentaint_home.go index f34c13fef..5e92e6746 100644 --- a/cli/internal/utils/opentaint_home.go +++ b/cli/internal/utils/opentaint_home.go @@ -177,20 +177,28 @@ func ReconcileInstallMarker() { _ = WriteInstallVersionMarker() } -// resolveArtifactPath resolves the path for an artifact by checking tiers in order: +// resolveArtifactTier resolves both the storage tier and path for an artifact by +// checking tiers in order: // 1. Bundled path (next to binary) — only if version matches bindVersion // 2. Install path (~/.opentaint/install/lib/) — only if version matches bindVersion // 3. Cache path (~/.opentaint/) -func resolveArtifactPath(def globals.ArtifactDef) (string, error) { +// When no tier exists yet, it returns the last tier as the default download target. +func resolveArtifactTier(def globals.ArtifactDef) (string, string, error) { tiers, err := ArtifactTiers(def) if err != nil { - return "", err + return "", "", err } if found := FindExisting(CurrentTiers(tiers, IsInstallCurrent())); found != nil { - return found.Path, nil + return found.Name, found.Path, nil } - // Return last tier as default download target (even if artifact not yet downloaded) - return tiers[len(tiers)-1].Path, nil + last := tiers[len(tiers)-1] + return last.Name, last.Path, nil +} + +// resolveArtifactPath resolves the path for an artifact. See resolveArtifactTier. +func resolveArtifactPath(def globals.ArtifactDef) (string, error) { + _, path, err := resolveArtifactTier(def) + return path, err } func GetAutobuilderJarPath(version string) (string, error) { From 6f3c34b0c072f3845c60ed5455d982191a342570 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Mon, 8 Jun 2026 21:07:32 +0200 Subject: [PATCH 16/54] docs: document agent skills and CLI tooling --- README.md | 18 +++++++++++++ docs/README.md | 3 +++ docs/usage.md | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) diff --git a/README.md b/README.md index ae3444225..d8dcfd05f 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,24 @@ For more options, see [Installation](docs/README.md#installation) and [Usage](do --- +## AI Agent Workflows + +OpenTaint now ships agent skills for turning static analysis into an end-to-end application-security workflow. Install them with: + +```bash +npx skills add https://github.com/seqra/opentaint +``` + +The `appsec-agent` skill orchestrates a full JVM project assessment: build the project, run OpenTaint, model missing library data flows, triage findings, and optionally generate dynamic proof-of-concept checks for confirmed vulnerabilities. + +Included skills cover the common security-analysis loop: + +- **Scan and triage:** `build-project`, `run-scan`, `analyze-findings`, `generate-poc` +- **Coverage expansion:** `triage-dependencies`, `discover-attack-surface`, `create-test-project`, `create-rule`, `assemble-lib-rules` +- **Dataflow modeling:** `analyze-external-methods`, `create-pass-through-approximation`, `create-dataflow-approximation`, `debug-rule`, `report-analyzer-issue` + +--- + ## Documentation Full guides — installation, usage, configuration, CI/CD integration: **[Documentation](docs/README.md)**. diff --git a/docs/README.md b/docs/README.md index ae2eaf161..638a2e910 100644 --- a/docs/README.md +++ b/docs/README.md @@ -130,6 +130,9 @@ opentaint summary --show-findings --verbose-flow --show-code-snippets results.sa | `opentaint compile` | Build project model separately | | `opentaint project` | Create model from precompiled JARs | | `opentaint summary` | View SARIF results | +| `opentaint health` | Show resolved analyzer, autobuilder, rules, and runtime paths | +| `opentaint test rule` | Scaffold, test, and debug detection rules | +| `opentaint test approximation` | Scaffold and test dataflow approximations | | `opentaint pull` | Download dependencies | | `opentaint update` | Update to latest version | | `opentaint prune` | Remove stale artifacts and cached models | diff --git a/docs/usage.md b/docs/usage.md index 295a71fa7..f6794f987 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -80,6 +80,9 @@ Use [CodeChecker](https://github.com/Ericsson/codechecker) for advanced result m | `opentaint compile` | Build project model separately from scanning | | `opentaint project` | Create project model from precompiled JARs/classes | | `opentaint summary` | View SARIF analysis results | +| `opentaint health` | Print resolved dependency paths for the analyzer, autobuilder, rules, and Java runtime | +| `opentaint test rule` | Scaffold, test, and debug detection rules | +| `opentaint test approximation` | Scaffold and test dataflow approximations | | `opentaint pull` | Download analyzer dependencies | | `opentaint update` | Update to latest version | | `opentaint prune` | Remove stale downloaded artifacts and cached models | @@ -102,6 +105,72 @@ On the first run, the compiled project model is cached in `~/.opentaint/cache/`. | `--dry-run` | Validate inputs and show what would run without compiling or scanning | | `--log-file` | Path to the log file (default: `/logs/.log`) | +#### Agent and rule-authoring flags + +These flags support custom rule development and AI-agent workflows: + +| Flag | Description | +|------|-------------| +| `--track-external-methods` | Write `dropped-external-methods.yaml` and `approximated-external-methods.yaml` next to the SARIF report | +| `--passthrough-approximations` | Apply pass-through approximation YAML files or directories (repeatable) | +| `--dataflow-approximations` | Apply compiled dataflow approximation classes or Java source directories (repeatable) | + +Use external-method tracking when a scan may miss flows through library methods. The dropped-methods file shows where taint was killed because no model was available; the approximated-methods file shows methods already covered by built-in or custom models. + +### opentaint health + +Print the on-disk paths OpenTaint resolves for its dependencies: + +```bash +opentaint health +opentaint health --rules +opentaint health --analyzer +``` + +With no flags, `health` prints the autobuilder, analyzer, built-in rules, and Java runtime. With a single component flag, it prints only the bare path, which is useful for scripts and agents. + +| Flag | Description | +|------|-------------| +| `--autobuilder` | Show only the autobuilder JAR path | +| `--analyzer` | Show only the analyzer JAR path | +| `--rules` | Show only the built-in rules path, downloading rules on demand | +| `--runtime` | Show only the Java runtime path | + +### opentaint test + +The `test` command group is experimental tooling for rule and approximation development. + +#### Rule tests + +```bash +opentaint test rule init .opentaint/test-projects/my-rule +opentaint compile .opentaint/test-projects/my-rule/sinks -o .opentaint/test-compiled/my-rule/sinks +opentaint test rule run .opentaint/test-compiled/my-rule/sinks --ruleset .opentaint/rules +opentaint test rule reachability java/security/my-rule.yaml:my-rule --project-model .opentaint/test-compiled/my-rule/sinks --ruleset .opentaint/rules +``` + +| Command | Description | +|---------|-------------| +| `opentaint test rule init ` | Bootstrap source and sink test projects with annotated sample support | +| `opentaint test rule run ` | Run rules against annotated positive and negative samples | +| `opentaint test rule reachability [source-path]` | Trace fact reachability for a single rule and its referenced library rules | + +#### Approximation tests + +```bash +opentaint test approximation init .opentaint/test-projects/my-approximation +opentaint compile .opentaint/test-projects/my-approximation -o .opentaint/test-compiled/my-approximation +opentaint test approximation run .opentaint/test-compiled/my-approximation \ + --dataflow-approximations .opentaint/dataflow/my-approximation +``` + +| Command | Description | +|---------|-------------| +| `opentaint test approximation init ` | Bootstrap a test project with the fixed `Taint.source()` to `Taint.sink(...)` harness | +| `opentaint test approximation run ` | Run annotated samples with dataflow approximations applied | + +Rule and approximation test runs write `test-result.json` and `test-results.sarif` to the selected output directory. + ### opentaint compile Compiles Java and Kotlin projects and generates project models for analysis. Useful when you want to separate compilation from scanning or need to inspect the project model. From 96f4aa6e86970823e66eb6d278942cf94830a044 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Mon, 8 Jun 2026 21:19:55 +0200 Subject: [PATCH 17/54] clean(docs): Clean tmp files --- agent-mode/design/agent-mode-design.md | 1407 ----------- agent-mode/impl/agent-mode-impl.md | 1275 ---------- agent-mode/info/agent-pipeline.md | 672 ------ agent-mode/info/approximations-config.md | 487 ---- agent-mode/info/pattern-rules.md | 313 --- agent-mode/mismatch.md | 358 --- agent-mode/mitigation-plan.md | 407 ---- agent-mode/plan.md | 303 --- agent-mode/test-status.md | 95 - agent-mode/test/agent-mode-test.md | 2085 ----------------- agent-mode/test/conftest.py | 497 ---- .../java/PdfBoxDocumentApprox.java | 50 - .../yaml/custom-propagators.yaml | 37 - .../rules/java/lib/stirling-source.yaml | 10 - .../security/stirling-path-traversal.yaml | 18 - .../src/main/java/test/PathTraversalTest.java | 38 - agent-mode/test/pytest.ini | 5 - agent-mode/test/test_approximations.py | 358 --- agent-mode/test/test_build.py | 152 -- agent-mode/test/test_external_methods.py | 260 -- agent-mode/test/test_full_loop.py | 290 --- agent-mode/test/test_rules.py | 409 ---- task.md | 86 - 23 files changed, 9612 deletions(-) delete mode 100644 agent-mode/design/agent-mode-design.md delete mode 100644 agent-mode/impl/agent-mode-impl.md delete mode 100644 agent-mode/info/agent-pipeline.md delete mode 100644 agent-mode/info/approximations-config.md delete mode 100644 agent-mode/info/pattern-rules.md delete mode 100644 agent-mode/mismatch.md delete mode 100644 agent-mode/mitigation-plan.md delete mode 100644 agent-mode/plan.md delete mode 100644 agent-mode/test-status.md delete mode 100644 agent-mode/test/agent-mode-test.md delete mode 100644 agent-mode/test/conftest.py delete mode 100644 agent-mode/test/fixtures/approximations/java/PdfBoxDocumentApprox.java delete mode 100644 agent-mode/test/fixtures/approximations/yaml/custom-propagators.yaml delete mode 100644 agent-mode/test/fixtures/rules/java/lib/stirling-source.yaml delete mode 100644 agent-mode/test/fixtures/rules/java/security/stirling-path-traversal.yaml delete mode 100644 agent-mode/test/fixtures/test-samples/src/main/java/test/PathTraversalTest.java delete mode 100644 agent-mode/test/pytest.ini delete mode 100644 agent-mode/test/test_approximations.py delete mode 100644 agent-mode/test/test_build.py delete mode 100644 agent-mode/test/test_external_methods.py delete mode 100644 agent-mode/test/test_full_loop.py delete mode 100644 agent-mode/test/test_rules.py delete mode 100644 task.md diff --git a/agent-mode/design/agent-mode-design.md b/agent-mode/design/agent-mode-design.md deleted file mode 100644 index ad6ee61ef..000000000 --- a/agent-mode/design/agent-mode-design.md +++ /dev/null @@ -1,1407 +0,0 @@ -# Agent Mode Design - -## Table of Contents - -1. [Required Engine Changes](#1-required-engine-changes) -2. [Go CLI API Design](#2-go-cli-api-design) -3. [Agent Skills](#3-agent-skills) -4. [Meta Prompt](#4-meta-prompt) - ---- - -## 1. Required Engine Changes - -### 1.1 External Methods List Output - -**Problem**: The engine currently performs call-to-return passthrough for unresolved external methods — taint is silently preserved. There is no reporting of which external methods were encountered, making it impossible for the agent to know where taint propagation models are missing. - -**Current behavior** (in `JIRMethodCallFlowFunction.applyPassRulesOrCallSkip()`): -1. Taint fact arrives at a call to an unresolved method -2. `unresolvedCallDefaultFactPropagation()` copies the fact unchanged to the return site -3. If YAML pass-through rules exist for the method, those are also applied -4. No record is kept of this event - -**Required change**: Collect external method call information during analysis and output it as a YAML file. - -**Collection architecture**: Follow the `TaintSinkTracker` / `TaintAnalysisUnitStorage` pattern: - -``` -ExternalMethodTracker (like TaintSinkTracker) - └── backed by per-unit storage in TaintAnalysisUnitStorage - └── ConcurrentLinkedQueue - -Wiring: - TaintAnalysisContext (already carries TaintSinkTracker) - └── + val externalMethodTracker: ExternalMethodTracker - - TaintAnalysisUnitRunnerManager - └── spawnNewRunner() - ├── creates ExternalMethodTracker(storage) per unit - └── passes it into TaintAnalysisContext - └── getExternalMethods() (aggregates across all units, like getVulnerabilities()) -``` - -**Collection point**: `JIRMethodCallFlowFunction.applyPassRulesOrCallSkip()` — this is called for every taint fact that encounters an unresolved method. At this point we know: -- The called method (class, name, signature) -- The taint fact position that was passthrough-ed (the `factReader`/`factAp` tells us `this`, `arg(N)`, etc.) -- Whether YAML pass-through rules were found for this method - -The tracker records each encounter. Deduplication (by method identity) and aggregation (merging fact positions, counting call sites) happen at collection time via `ConcurrentHashMap`, same pattern as `TaintSinkTracker`'s `reportedVulnerabilities`. - -**Output format** (`external-methods.yaml`): - -Two separate lists — methods without rules (agent's priority list) and methods with rules (already modeled, for review): - -```yaml -withoutRules: - - method: com.example.lib.DataWrapper#getValue - signature: "() java.lang.String" - factPositions: - - this - callSites: 5 - - - method: com.example.lib.Processor#transform - signature: "(java.lang.Object) java.lang.Object" - factPositions: - - arg(0) - - this - callSites: 12 - -withRules: - - method: java.lang.StringBuilder#append - signature: "(java.lang.String) java.lang.StringBuilder" - factPositions: - - arg(0) - callSites: 87 -``` - -Fields: -- `method`: Fully qualified `Class#method` (class and method name are derivable from this, no need to store separately) -- `signature`: JVM-style `(paramTypes) returnType` -- `factPositions`: Deduplicated list of taint positions that were passthrough-ed at this method -- `callSites`: Number of distinct call sites where this method was encountered with taint - -The split into `withoutRules` / `withRules` reduces the agent's effort — it can focus on `withoutRules` first (methods with no propagation model at all), and only review `withRules` if specific traces look suspicious. - -**Kotlin CLI flag**: `--track-external-methods` (boolean, on `ProjectAnalyzerRunner`). Output filenames are fixed: `<--output-dir>/external-methods-without-rules.yaml` and `<--output-dir>/external-methods-with-rules.yaml`. The path is not configurable. -**Go CLI flag**: `--track-external-methods` (boolean, on `scan`). The two YAMLs are written into the same directory as the SARIF file specified by `-o`. - -### 1.2 Allow `--approximations-config` + `--semgrep-rule-set` Together - -**Problem**: `--config` and `--semgrep-rule-set` are mutually exclusive (`check(options.customConfig == null)` in `ProjectAnalyzer.preloadRules()`). The agent needs both: -- `--semgrep-rule-set` for pattern rules (sources, sinks, vulnerability patterns) -- `--approximations-config` for YAML propagation rules (passThrough) - -**Required change**: Rename the existing `--config` flag to `--approximations-config` to clarify its purpose. When both `--approximations-config` and `--semgrep-rule-set` are provided, load Semgrep rules as the pattern-matching layer and use the custom config to **override** the default propagation config. - -**Implementation**: In `ProjectAnalyzer.preloadRules()`, add a fourth branch: - -```kotlin -if (options.semgrepRuleSet.isNotEmpty() && options.approximationsConfig != null) { - val semgrepRules = loadSemgrepRules(...) - val customConfig = loadSerializedTaintConfig(options.approximationsConfig) - return PreloadedRules.SemgrepRulesWithCustomConfig(semgrepRules, customConfig) -} -``` - -In `loadTaintConfig()`, the new `SemgrepRulesWithCustomConfig` case should: -1. Load default pass-through rules into a `TaintConfiguration` -2. Load the custom config into another `TaintConfiguration` -3. Merge via `JIRCombinedTaintRulesProvider(defaultRules, customRules)` with **OVERRIDE** mode for all categories - -The agent's custom config intentionally overrides the default config — when the agent provides rules for a method, it means the agent has determined the correct behavior and the default should be replaced, not merged. Using EXTEND would mix the agent's corrections with the (possibly wrong) defaults, defeating the purpose. - -**Note**: Despite the YAML config schema supporting a `cleaner` section, the analyzer currently cannot use sanitizers from the config. The `--approximations-config` is used exclusively for `passThrough` rules. - -**Kotlin CLI**: `--approximations-config` is repeatable (`List`). Every occurrence is OVERRIDE-merged with the default config. -**Go CLI**: Exposes `--approximations-config ` on the `scan` command as a repeatable flag; each occurrence is forwarded to the analyzer. - -### 1.3 Custom Code-Based Approximations via CLI - -**Problem**: There is no way to pass custom approximation source code via CLI. The agent needs to provide code-based approximations for complex methods (lambdas, async, callbacks). - -**Required change**: The `--dataflow-approximations ` flag on `scan` accepts a directory of Java source files. The CLI automatically compiles them during scan and passes the resulting `.class` files to the analyzer. - -**Design**: Custom approximations are **dataflow approximations** — they go through the same `useDataflowApproximation` path as the built-in ones (Stream, CompletableFuture, etc.), not through the separate `useOpentaintApproximations` / environment variable mechanism. - -**Implementation in `DataFlowApproximationLoader`**: - -1. Add `customApproximationPaths: List = emptyList()` to `Options` -2. In `approximationFiles()`, append custom paths **after** built-in ones: - -```kotlin -private fun approximationFiles(options: Options): List { - val result = mutableListOf() - if (options.useDataflowApproximation) { - result += listOfNotNull(dataflowApproximationsPath?.toFile()) - } - result += options.customApproximationPaths.map { it.toFile() } - return result -} -``` - -No changes needed to `installApproximations()` or `createCpWithApproximations()` — they already consume whatever `approximationFiles()` returns. The `Approximations` feature indexes `@Approximate` annotations from all paths uniformly. - -**Conflict behavior**: If a custom approximation targets the same class as a built-in one, the `ApproximationIndexer`'s bijection `require()` assertions will fire and **report an error**. This is intentional — the agent must not silently override built-in approximations. If the agent needs different behavior for a class that already has a built-in approximation, this indicates a design problem that should be escalated, not silently resolved. - -**Kotlin CLI flag**: `--dataflow-approximations ` (repeatable, accepts directories of compiled `.class` files) -**Go CLI flag**: `--dataflow-approximations ` on `scan`, accepts source directory, compiles automatically (see 1.4) - -### 1.4 Automatic Approximation Compilation During Scan - -**Problem**: The agent writes Java source files for approximations. These need to be compiled to `.class` files before the analyzer can use them. This should be seamless. - -**Design**: The Go CLI's `--dataflow-approximations ` flag: - -1. Scans the directory for `.java` files -2. If `.java` files are found, compiles them automatically: - - Resolves `opentaint-analyzer.jar` (same tier resolution as `scan`) - - Resolves `javac` from managed JRE - - Resolves additional classpath from the target project's dependencies (from `project.yaml`) - - Runs: `javac -source 8 -target 8 -cp : -d ` -3. If compilation fails, reports errors to the agent and aborts scan -4. If compilation succeeds, passes the compiled `.class` directory to the analyzer via `--dataflow-approximations` -5. If only `.class` files are found (no `.java`), passes them directly (pre-compiled) - -**Why this is better than a separate command**: The agent writes source → runs scan → gets results. One command. No intermediate compile step to manage. If compilation fails, the error is reported in the context of the scan attempt. - -**Error reporting**: The CLI captures `javac` stderr and presents compilation errors clearly: -``` -Approximation compilation failed: - agent-approximations/src/ReactiveProcessor.java:12: error: cannot find symbol - com.example.lib.ReactiveProcessor self = ... - ^ - symbol: class ReactiveProcessor - -Hint: Ensure the library being approximated is in the project's dependencies. -``` - -### 1.5 Rule Test Command via Go CLI - -**Problem**: Running rule tests currently requires invoking the Kotlin analyzer JAR directly with `--debug-run-rule-tests`. The Go CLI doesn't expose this capability. - -**Required change**: Add a `test-rules` command to the Go CLI. - -**Go CLI**: -``` -opentaint agent test-rules \ - --ruleset # required, rule files to test - --output # output directory for test-result.json -``` - -The positional argument is the directory produced by `opentaint compile` (contains `project.yaml`). - -**Behavior**: -1. If input is a project directory (not project.yaml), auto-compile via autobuilder -2. Invoke analyzer JAR with `--debug-run-rule-tests --semgrep-rule-set ` -3. Parse and display `test-result.json` summary -4. Exit with non-zero code if any `falsePositive` or `falseNegative` entries exist - -### 1.6 Rule ID Filter - -**Problem**: The agent creates its own rules and may reference built-in library rules. When running analysis, the agent wants to execute **only its rules** (plus the referenced built-in library rules they depend on), without all other built-in security rules firing and producing noise. - -**Current state**: The `--semgrep-rule-severity` flag filters rules by severity. There is no way to filter by rule ID. When `--ruleset builtin --ruleset ./agent-rules` is used, ALL rules from both rulesets are active. - -**Required change**: Add a `--semgrep-rule-id` filter flag (repeatable) that restricts which rules are active. Only rules whose **full** ID is in the filter are kept; every other rule (including library rules referenced via `refs`) is dropped. The filter is intentionally exact: callers must list every rule they want active. - -The full rule ID has the form `.yaml:`, e.g. -`java/security/my-vuln.yaml:my-vulnerability`. - -**Kotlin CLI flag**: `--semgrep-rule-id ` (repeatable) -**Go CLI flag**: `--rule-id ` (repeatable, on `scan` command) - -**Example**: -```bash -# The agent's own rule plus every library rule it depends on must be listed explicitly. -opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin \ - --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --rule-id java/lib/generic/servlet-untrusted-data-source.yaml:java-servlet-untrusted-data-source \ - --rule-id java/lib/generic/jdbc-sql-sink.yaml:java-jdbc-sql-sink -``` - -**Implementation**: In `SemgrepRuleLoader.loadRules`, the filter is applied per-rule via -`ruleIdAllow(rule, filter)`: a rule is kept iff the filter is empty or -`rule.info.ruleId in filter`. Referenced library rules are NOT auto-included. If -`--semgrep-rule-id` is not provided, all loaded rules are active (current behavior preserved). - -### 1.7 Hidden Local JAR Path Flags (Development) - -**Problem**: The CLI resolves analyzer and autobuilder JARs via a 3-tier path system (bundled > install > cache), tied to a version string embedded at compile time. For development, this means the CLI is unusable without publishing the JARs to GitHub Releases. Developers building the analyzer locally cannot test through the CLI. - -**Current state**: Hidden `--analyzer-version` and `--autobuilder-version` flags exist but only change the version tag used for resolution/download — they still require the JAR to be published. - -**Required change**: Add hidden flags that accept a direct filesystem path to the JAR, bypassing version-based resolution entirely. - -**New hidden persistent flags on root command** (`root.go`): - -| Flag | Type | Viper Key | Description | -|---|---|---|---| -| `--analyzer-jar` | string | `analyzer.jar` | Direct path to analyzer JAR (bypasses version resolution) | -| `--autobuilder-jar` | string | `autobuilder.jar` | Direct path to autobuilder JAR (bypasses version resolution) | - -**Implementation in `scan.go`**: -```go -func ensureAnalyzerAvailable() (string, error) { - // Direct path takes priority — skip all resolution and download - if directPath := globals.Config.Analyzer.Jar; directPath != "" { - if _, err := os.Stat(directPath); err != nil { - return "", fmt.Errorf("analyzer JAR not found at %s", directPath) - } - return directPath, nil - } - // Fall back to version-based resolution - analyzerJarPath, err := utils.GetAnalyzerJarPath(globals.Config.Analyzer.Version) - // ...existing logic... -} -``` - -Identical pattern in `compile.go` for `ensureAutobuilderAvailable()`. - -**Usage**: -```bash -# Use locally-built analyzer -opentaint scan --project-model ./opentaint-project -o report.sarif \ - --analyzer-jar ./core/build/libs/opentaint-project-analyzer.jar - -# Use locally-built autobuilder -opentaint compile ./project -o ./opentaint-project \ - --autobuilder-jar ./autobuilder/build/libs/opentaint-project-auto-builder.jar - -# Both -opentaint scan --project-model ./opentaint-project -o report.sarif \ - --analyzer-jar /path/to/local/analyzer.jar \ - --autobuilder-jar /path/to/local/autobuilder.jar - -# Via environment variables (viper binding) -export OPENTAINT_ANALYZER_JAR=/path/to/local/analyzer.jar -opentaint scan --project-model ./opentaint-project -o report.sarif -``` - -**Note**: These flags are hidden (not shown in `--help`) — they are for development use only. When set, no download is attempted. - -### 1.8 Builtin Rules Path Command - -**Problem**: The agent needs to read built-in rules (to understand existing sources/sinks/patterns, to reference them via `refs`, and to decide whether custom rules are needed). Rules are a separate artifact (`opentaint-rules.tar.gz`) resolved via a 3-tier path system (bundled > install > cache) and downloaded lazily. The agent has no way to discover where the rules directory is on disk. - -**Required change**: Add a `rules-path` command to the Go CLI that prints the resolved filesystem path to the built-in rules directory, downloading the rules if not already present. - -**Go CLI**: -``` -opentaint agent rules-path -``` - -**Behavior**: -1. Resolves the rules path using the same 3-tier logic as `scan --ruleset builtin` -2. If rules are not present on disk, downloads `opentaint-rules.tar.gz` from GitHub Releases and extracts -3. Prints the absolute path to stdout (e.g., `/home/user/.opentaint/install/lib/rules`) -4. Exit code 0 on success - -**Usage by the agent**: -```bash -# Get the rules path -RULES_DIR=$(opentaint agent rules-path) - -# Read builtin rules to understand available sources/sinks -ls $RULES_DIR/java/lib/generic/ -cat $RULES_DIR/java/lib/generic/servlet-untrusted-data-source.yaml - -# Read builtin security rules to check coverage -ls $RULES_DIR/java/security/ -``` - -**Implementation**: New command in `cli/cmd/rules_path.go`. Reuses `utils.GetRulesPath()` and the existing download logic from `scan.go:214-224`. - -### 1.8 Test Project Bootstrap Command - -**Problem**: Creating a test project for rule testing requires setting up a Gradle project with the correct `opentaint-sast-test-util` dependency. The agent needs to know how to obtain this JAR and wire it into the build script. This is error-prone. - -**Required change**: Add an `init-test-project` command to the Go CLI that bootstraps a ready-to-use test project. - -**Go CLI**: -``` -opentaint agent init-test-project \ - [--dependency ] ... # additional maven dependencies for test code -``` - -**Behavior**: -1. Creates the directory structure: - ``` - / - ├── build.gradle.kts - ├── settings.gradle.kts - ├── libs/ - │ └── opentaint-sast-test-util.jar - └── src/main/java/test/ - └── .gitkeep - ``` -2. Downloads `opentaint-sast-test-util.jar` from the same artifact source as the analyzer (GitHub releases, tiered resolution: bundled > install > cache). Alternatively, extracts it from the `opentaint-analyzer.jar` if bundled inside. -3. Generates `build.gradle.kts` referencing the local JAR: - ```kotlin - plugins { java } - java { - sourceCompatibility = JavaVersion.VERSION_1_8 - targetCompatibility = JavaVersion.VERSION_1_8 - } - repositories { mavenCentral() } - dependencies { - compileOnly(files("libs/opentaint-sast-test-util.jar")) - // User-requested dependencies: - compileOnly("javax.servlet:javax.servlet-api:4.0.1") - } - ``` -4. Generates `settings.gradle.kts` with a project name derived from the directory. -5. Prints next steps: - ``` - Test project created at ./agent-test-project - - Next steps: - 1. Add test samples in src/main/java/test/ - 2. Build: opentaint compile ./agent-test-project -o ./agent-test-compiled - 3. Test: opentaint agent test-rules ./agent-test-compiled --ruleset -o ./test-output - ``` - ---- - -## 2. Go CLI API Design - -All agent operations flow through the Go CLI (`opentaint`). The design adds 4 new commands and 4 new flags to existing commands. - -### 2.1 Complete Command Reference (Existing + New) - -#### `opentaint compile` (existing) -Build project and create project model. -``` -opentaint compile -o [--dry-run] -``` - -#### `opentaint project` (existing) -Create project model from precompiled artifacts. -``` -opentaint project \ - --output \ - --source-root \ - --classpath ... \ - --package ... \ - [--dependency ...] -``` - -#### `opentaint scan` (existing, extended) -Run analysis. **New flags** marked with ★. -``` -opentaint scan [] \ - [--project-model ] \ - -o \ - [--ruleset builtin] \ - [--ruleset ] \ - [--rule-id ] ★ filter: only run these rule IDs (repeatable) - [--approximations-config ] ★ YAML passThrough config, OVERRIDE mode (repeatable) - [--dataflow-approximations ] ★ approximation source/class dir (auto-compiles .java) - [--track-external-methods] ★ write external-methods-{without,with}-rules.yaml next to SARIF - [--timeout ] \ - [--max-memory ] \ - [--severity ] \ - [--code-flow-limit ] -``` - -Flag interactions: -- Pass either the source project as a positional argument (will be compiled) or a pre-compiled project model via `--project-model ` (contains `project.yaml`). Not both. -- `--ruleset` and `--approximations-config` can be used together (engine change 1.2). -- `--dataflow-approximations` accepts `.java` source dir (auto-compiled) or `.class` dir (passed directly). -- `--track-external-methods` is a boolean; output filenames and directory are fixed (next to the SARIF). -- `--rule-id` takes the FULL rule ID `.yaml:`; rules whose full ID is not listed are dropped, including library rules referenced via join-mode `refs`. - -#### `opentaint agent test-rules` ★ NEW -Run rule tests against a test project. Registered under the `agent` command group. -``` -opentaint agent test-rules \ - --ruleset \ - -o \ - [--timeout ] \ - [--max-memory ] -``` - -The positional argument is the **directory** that contains `project.yaml` (e.g. -`./agent-test-compiled`), not the `project.yaml` file path. - -Output: `/test-result.json` with verdicts per test sample. - -Exit codes: -- `0`: All tests pass (only `success` and `disabled` entries) -- `1`: Test failures exist (`falsePositive`, `falseNegative`, or `skipped` entries) - -Prints a summary table: -``` -Rule Tests Summary: - ✓ success: 12 - ✗ false positive: 1 - ✗ false negative: 2 - - skipped: 0 - - disabled: 1 -``` - -#### `opentaint agent rules-path` ★ NEW -Print the resolved filesystem path to built-in rules (downloads if needed). -Registered under the `agent` command group. -``` -opentaint agent rules-path -``` - -Prints absolute path to stdout. The agent uses this to read built-in rule YAML files. - -#### `opentaint agent init-test-project` ★ NEW -Bootstrap a test project for rule testing. -Registered under the `agent` command group. -``` -opentaint agent init-test-project \ - [--dependency ] ... -``` - -Downloads `opentaint-sast-test-util.jar`, generates `build.gradle.kts` and directory structure. - -#### `opentaint summary` (existing) -Print SARIF results. -``` -opentaint summary \ - [--show-findings] \ - [--show-code-snippets] \ - [--verbose-flow] -``` - -### 2.2 Command Builder Changes - -The `AnalyzerBuilder` in `command_builder.go` needs new methods for the new flags: - -```go -func (b *AnalyzerBuilder) SetApproximationsConfig(configPath string) *AnalyzerBuilder -func (b *AnalyzerBuilder) AddDataflowApproximations(approxPath string) *AnalyzerBuilder -func (b *AnalyzerBuilder) SetExternalMethodsOutput(path string) *AnalyzerBuilder -func (b *AnalyzerBuilder) SetDebugRunRuleTests(enabled bool) *AnalyzerBuilder -func (b *AnalyzerBuilder) AddRuleIdFilter(ruleId string) *AnalyzerBuilder -``` - -These translate to: -| Go CLI flag | Analyzer CLI flag | -|---|---| -| `--approximations-config ` (repeatable) | `--approximations-config ` (repeatable) | -| `--dataflow-approximations ` | `--dataflow-approximations ` (compiled classes dir) | -| `--track-external-methods` | `--track-external-methods` | -| `--rule-id ` | `--semgrep-rule-id ` | -| (`opentaint agent test-rules` command) | `--debug-run-rule-tests` | - -The Go CLI `AnalyzerBuilder` methods: `AddApproximationsConfig(path)`, -`AddDataflowApproximations(path)`, `SetTrackExternalMethods(bool)`, `AddRuleID(id)`, -`EnableRunRuleTests()`. - ---- - -## 3. Agent Skills - -Skills are self-contained instruction sets the agent loads to perform specific operations. Each skill contains: purpose, prerequisites, step-by-step instructions, CLI commands with examples, expected outputs, and error handling. - -### 3.1 Skill: `build-project` - -**Purpose**: Build a target project and prepare it for analysis. - -**Instructions**: - -1. Determine the project type by examining the project directory: - - Look for `build.gradle`, `build.gradle.kts` → Gradle project - - Look for `pom.xml` → Maven project - - Look for pre-compiled JARs → classpath mode - -2. For Gradle/Maven projects, use the autobuilder: - ```bash - opentaint compile /path/to/project -o ./opentaint-project - ``` - -3. For pre-compiled artifacts, use the project command: - ```bash - opentaint project \ - --output ./opentaint-project \ - --source-root /path/to/src \ - --classpath /path/to/app.jar \ - --package com.example.app - ``` - -4. Verify `./opentaint-project/project.yaml` was created. - -5. If compilation fails: - - Check build tool is installed and project builds independently - - Check Java version compatibility (OpenTaint uses Java 21) - - Examine the autobuilder log for specific errors - - Fall back to `opentaint project` with pre-compiled artifacts - -**Expected output**: A directory containing `project.yaml` and compiled class files. - -### 3.2 Skill: `discover-entry-points` - -**Purpose**: Identify entry points and attack surface of the target project by reading source code and analyzing project structure. - -**Instructions**: - -The agent discovers entry points itself — no special CLI command is needed. The analysis engine automatically selects entry points (all public/protected methods for generic projects, Spring endpoints for Spring projects). The agent's role is to **understand** the attack surface to plan rules effectively. - -1. Read the project's source code and identify: - - **Spring controllers**: Search for `@RestController`, `@Controller` annotations. Read `@RequestMapping`, `@GetMapping`, `@PostMapping`, `@PutMapping`, `@DeleteMapping` to understand routes and parameters. - - **Servlet handlers**: Search for classes extending `HttpServlet` with `doGet`, `doPost`, `doPut`, `doDelete` methods. - - **JAX-RS endpoints**: Search for `@Path`, `@GET`, `@POST`, `@PUT`, `@DELETE` annotations. - - **Message handlers**: Search for `@JmsListener`, `@KafkaListener`, `@RabbitListener` annotations. - - **CLI entry points**: Find `main(String[])` methods that process external input (command-line args, stdin, files). - - **Scheduled tasks**: Search for `@Scheduled` methods that read external state (files, DB, network). - -2. For each entry point, determine: - - What external data it receives (HTTP params, headers, body, path variables, message payloads) - - What operations it performs (DB queries, file I/O, command execution, HTTP responses, serialization) - - Which vulnerability classes are relevant (SQLi, XSS, command injection, path traversal, SSRF, XXE, etc.) - -3. Examine the project's dependencies (from `build.gradle`, `pom.xml`, or `project.yaml`) to understand: - - Which frameworks are used (Spring, Servlets, JAX-RS, etc.) - - Which database libraries (JDBC, JPA, MyBatis, etc.) - - Which template engines (Thymeleaf, JSP, Freemarker) - - Which HTTP clients (OkHttp, Apache HttpClient, RestTemplate) - -4. Record findings in `opentaint-analysis-plan.md`. - -**Note**: The engine handles entry point selection automatically during analysis: -- For `--project-kind spring-web`: Uses Spring endpoint discovery (`SpringWebProject.kt`) -- For `--project-kind unknown` (default): Uses all public/protected methods from public project classes -- For targeted analysis: Agent can use `--debug-run-analysis-on-selected-entry-points "com.example.Class#method"` via the Kotlin CLI directly - -### 3.3 Skill: `create-rule` - -**Purpose**: Create a pattern rule for detecting a specific vulnerability class. - -**Instructions**: - -1. Determine the rule architecture: - - **Source**: Where does untrusted data enter? (HTTP params, headers, body, etc.) - - **Sink**: Where is the data dangerous? (SQL query, command exec, file path, HTML output, etc.) - - **Sanitizers**: What makes the data safe? (encoding, escaping, parameterized queries, etc.) - -2. Read built-in rules to check existing coverage: - ```bash - RULES_DIR=$(opentaint agent rules-path) - # List available source/sink library rules - ls $RULES_DIR/java/lib/generic/ - ls $RULES_DIR/java/lib/spring/ - # Read specific rules to understand their patterns and IDs - cat $RULES_DIR/java/lib/generic/servlet-untrusted-data-source.yaml - cat $RULES_DIR/java/lib/generic/jdbc-sql-sink.yaml - # List existing security rules to check what's already covered - ls $RULES_DIR/java/security/ - ``` - - Sources: `$RULES_DIR/java/lib/generic/` and `$RULES_DIR/java/lib/spring/` - - Sinks: Same directories - - If existing rules cover the needed source/sink, skip to step 4 (join-mode composition referencing built-in rules) - -3. If new source/sink patterns are needed, create library rules: - - **Source library rule** (`agent-rules/java/lib/my-source.yaml`): - ```yaml - rules: - - id: my-custom-source - options: - lib: true - severity: NOTE - message: Custom untrusted data source - languages: [java] - patterns: - - pattern-either: - - patterns: - - pattern: | - $RETURNTYPE $METHOD(HttpServletRequest $UNTRUSTED, ...) { ... } - - metavariable-pattern: - metavariable: $METHOD - pattern-either: - - pattern: doGet - - pattern: doPost - ``` - - **Sink library rule** (`agent-rules/java/lib/my-sink.yaml`): - ```yaml - rules: - - id: my-custom-sink - options: - lib: true - severity: NOTE - message: Custom dangerous operation - languages: [java] - mode: taint - pattern-sinks: - - patterns: - - pattern-either: - - pattern: (java.sql.Statement $S).executeQuery($UNTRUSTED) - - pattern: (java.sql.Statement $S).execute($UNTRUSTED) - - focus-metavariable: $UNTRUSTED - ``` - -4. Create the join-mode security rule (`agent-rules/java/security/my-vuln.yaml`): - ```yaml - rules: - - id: my-vulnerability - severity: ERROR - message: >- - Untrusted data flows to dangerous operation - metadata: - cwe: CWE-89 - short-description: SQL Injection via untrusted input - languages: [java] - mode: join - join: - refs: - - rule: java/lib/my-source.yaml#my-custom-source - as: source - - rule: java/lib/my-sink.yaml#my-custom-sink - as: sink - on: - - 'source.$UNTRUSTED -> sink.$UNTRUSTED' - ``` - - You can reference built-in library rules — they will be auto-included when the agent's rule is active: - ```yaml - refs: - - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source - as: servlet-source - - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source - as: spring-source - ``` - -5. For simple structural patterns (no dataflow), use default mode: - ```yaml - rules: - - id: weak-crypto - severity: WARNING - message: Use of weak cryptographic algorithm - metadata: - cwe: CWE-327 - short-description: Weak cryptography - languages: [java] - patterns: - - pattern: Cipher.getInstance("DES") - ``` - -6. When running analysis, use `--rule-id` to activate only the agent's rules. The flag - takes the FULL rule ID (`.yaml:`). Library rules referenced - via `refs` are NOT auto-included — the filter drops every rule whose full ID is missing, - so either list every library rule explicitly or omit `--rule-id` to keep all loaded rules active. - ```bash - opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --rule-id java/security/weak-crypto.yaml:weak-crypto - ``` - -**Constraints**: -- Rule IDs must be globally unique -- Library rules must have `options.lib: true` and `severity: NOTE` -- Security rules must have `metadata.cwe` and `metadata.short-description` -- Source/sink metavariable names must match across `refs` + `on` clauses (convention: `$UNTRUSTED`) -- The `rule:` path in `refs` is relative to the ruleset root; when using `--ruleset`, the root is the ruleset directory -- `--rule-id` does not auto-include rules referenced via `refs`; list every library rule explicitly or omit the flag - -### 3.4 Skill: `test-rule` - -**Purpose**: Create test samples for a rule and verify it works correctly. - -**Instructions**: - -1. Bootstrap a test project: - ```bash - opentaint agent init-test-project ./agent-test-project \ - --dependency "javax.servlet:javax.servlet-api:4.0.1" - ``` - - This creates the directory structure with `build.gradle.kts`, `settings.gradle.kts`, and the `opentaint-sast-test-util.jar` in `libs/`. Add more `--dependency` flags for additional libraries your test code needs (e.g., Spring, JDBC drivers). - -2. Create test samples in `src/main/java/test/MyVulnTest.java`: - ```java - package test; - - import org.opentaint.sast.test.util.PositiveRuleSample; - import org.opentaint.sast.test.util.NegativeRuleSample; - import javax.servlet.http.HttpServletRequest; - import java.sql.Connection; - import java.sql.Statement; - - public class MyVulnTest { - - private Connection db; - - @PositiveRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") - public void vulnerable(HttpServletRequest req) throws Exception { - String input = req.getParameter("id"); - Statement stmt = db.createStatement(); - stmt.executeQuery("SELECT * FROM users WHERE id = " + input); - } - - @NegativeRuleSample(value = "java/security/my-vuln.yaml", id = "my-vulnerability") - public void safe(HttpServletRequest req) throws Exception { - String input = req.getParameter("id"); - var pstmt = db.prepareStatement("SELECT * FROM users WHERE id = ?"); - pstmt.setString(1, input); - pstmt.executeQuery(); - } - } - ``` - - Annotation fields: - - `value`: Path to the rule YAML file, relative to the ruleset root - - `id`: The rule ID within that file - -3. Build the test project: - ```bash - opentaint compile ./agent-test-project -o ./agent-test-compiled - ``` - -4. Run rule tests (positional argument is the project-model **directory**, not `project.yaml`): - ```bash - opentaint agent test-rules ./agent-test-compiled \ - --ruleset ./agent-rules \ - -o ./test-output - ``` - -5. Check results in `./test-output/test-result.json`: - ```json - { - "success": [ - {"className": "test.MyVulnTest", "methodName": "vulnerable", - "rule": {"rulePath": "java/security/my-vuln.yaml", "ruleId": "my-vulnerability"}}, - {"className": "test.MyVulnTest", "methodName": "safe", - "rule": {"rulePath": "java/security/my-vuln.yaml", "ruleId": "my-vulnerability"}} - ], - "falsePositive": [], - "falseNegative": [], - "skipped": [], - "disabled": [] - } - ``` - -6. If tests fail: - - `falseNegative` (positive sample didn't trigger): Rule patterns too narrow, or missing source/sink patterns - - `falsePositive` (negative sample triggered): Rule patterns too broad, need `pattern-not` or sanitizer exclusion - - `skipped` (rule not found): Check that `value` path and `id` in annotations match the rule file - -7. Fix the rule or test samples and repeat from step 3. - -### 3.5 Skill: `run-analysis` - -**Purpose**: Run OpenTaint analysis on the target project and collect results. - -**Instructions**: - -1. Run analysis with the agent's rules. Pass the pre-compiled model via `--project-model`: - ```bash - opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin \ - --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --track-external-methods \ - --timeout 900s \ - --severity warning,error - ``` - - If you have custom passThrough config: - ```bash - opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --approximations-config ./agent-config/custom-propagators.yaml \ - --track-external-methods - ``` - - If you have approximation source files: - ```bash - opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --approximations-config ./agent-config/custom-propagators.yaml \ - --dataflow-approximations ./agent-approximations/src \ - --track-external-methods - ``` - - The `--dataflow-approximations` flag accepts a directory. If it contains `.java` files, the CLI auto-compiles them using `opentaint-analyzer.jar` as the classpath (which contains `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) plus the target project's dependencies. Compilation errors are reported before analysis starts. - -2. View results summary: - ```bash - opentaint summary ./results/report.sarif --show-findings --verbose-flow - ``` - -3. Collect outputs for the decision loop (all next to the SARIF file): - - `./results/report.sarif` — vulnerability findings with traces - - `./results/external-methods-without-rules.yaml` — priority list (killed dataflow) - - `./results/external-methods-with-rules.yaml` — already modeled (for review) - - The `--track-external-methods` flag is a boolean; filenames and directory are fixed. - -### 3.6 Skill: `analyze-findings` - -**Purpose**: Interpret SARIF findings and decide on TP/FP/FN actions. - -**Instructions**: - -For each finding in the SARIF report: - -1. **Read the trace** (codeFlows in SARIF): - - First location = source (where tainted data enters) - - Last location = sink (where tainted data is used dangerously) - - Intermediate locations = dataflow path - -2. **Classify the finding**: - - **TRUE POSITIVE (TP)**: The trace represents a real vulnerability. - - The source genuinely provides attacker-controlled data - - The sink genuinely performs a dangerous operation with that data - - No sanitization occurs between source and sink - - Action: Generate a proof-of-concept, document in `vulnerabilities.md` - - **FALSE POSITIVE (FP) — fixable via Rule**: The trace is invalid due to over-broad pattern matching. - - The sink pattern is too broad (matches safe methods) - - A sanitizer is not recognized by the pattern - - The source pattern matches non-attacker-controlled data - - Action: Add `pattern-not`, `pattern-not-inside`, `pattern-sanitizers`, or narrow `metavariable-regex`. Update tests. Re-run. - - **FALSE POSITIVE (FP) — fixable via Approximation** (non-preferred): The trace is invalid due to imprecise taint propagation modeling. - - A library method is modeled as propagating taint when it actually transforms data in a way that neutralizes the threat - - Action: Override the passThrough approximation to remove the incorrect propagation. Re-run. - -3. **For external methods list** (FN discovery): - - Focus on the `withoutRules` section first — these methods have no propagation model at all. Classify each: - - **PROPAGATOR**: The method passes taint from input to output. - - Example: `DataWrapper#getValue()` — taint on `this` flows to `result` - - Action: Create a `passThrough` YAML rule via `--approximations-config` - - **TRANSFORMER with lambdas**: The method invokes callbacks/lambdas. - - Example: `ReactiveStream#map(Function)` — taint flows through the function - - Action: Create a code-based approximation via `--dataflow-approximations` - - **NEUTRAL**: The method is irrelevant to taint flow (logging, metrics, sanitizers, etc.) - - Action: Skip — the default call-to-return passthrough is correct - - The `withRules` section can be reviewed if specific traces look suspicious (existing rules may be incorrect or incomplete). - -### 3.7 Skill: `create-yaml-config` - -**Purpose**: Create YAML propagation rules (passThrough) for library methods. - -**Instructions**: - -1. Create a YAML config file (`agent-config/custom-propagators.yaml`): - - **Simple getter propagation** (taint on `this` → `result`): - ```yaml - passThrough: - - function: com.example.lib.DataWrapper#getValue - copy: - - from: this - to: result - ``` - - **Argument-to-result propagation**: - ```yaml - passThrough: - - function: com.example.lib.Converter#convert - copy: - - from: arg(0) - to: result - ``` - - **Builder pattern** (taint flows through builder chain): - ```yaml - passThrough: - - function: com.example.lib.Builder#withName - copy: - - from: arg(0) - to: this - - from: arg(0) - to: result - - from: this - to: result - ``` - - **Object with internal state** (using ``): - ```yaml - passThrough: - # Store taint - - function: com.example.lib.Container#put - copy: - - from: arg(0) - to: - - this - - .com.example.lib.Container##java.lang.Object - # Retrieve taint - - function: com.example.lib.Container#get - copy: - - from: - - this - - .com.example.lib.Container##java.lang.Object - to: result - ``` - - **Package-wide getter pattern** (all getters in a package): - ```yaml - passThrough: - - function: - package: com.example.dto - class: - pattern: .* - name: - pattern: get.* - copy: - - from: this - to: result - ``` - - **Conditional propagation**: - ```yaml - passThrough: - - function: com.example.lib.Parser#parse - condition: - typeIs: - position: arg(0) - type: java.lang.String - copy: - - from: arg(0) - to: result - ``` - -2. Use with analysis (`--approximations-config` is repeatable, each OVERRIDE-merged): - ```bash - opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --approximations-config ./agent-config/custom-propagators.yaml - ``` - -**Constraints**: -- The `function` field format is `package.Class#method` (simple) or `{package, class, name}` (complex with patterns) -- Position values: `this`, `result`, `arg(0)`, `arg(1)`, ..., `arg(*)`, `any(classifier)` -- Position modifiers (YAML list): `.[*]` (array element), `.ClassName#fieldName#fieldType` (field access), `.` (synthetic internal state) -- `overrides: true` (default) means the rule applies to subclasses too -- Custom config rules **override** the default config when passed via `--approximations-config` -- Only `passThrough` rules are supported; the analyzer cannot use sanitizers from the config - -### 3.8 Skill: `create-approximation` - -**Purpose**: Create code-based approximations for complex library methods (lambdas, async, callbacks). - -**Instructions**: - -1. Create a Java source file for the approximation in `agent-approximations/src/`: - - ```java - package agent.approximations; - - import org.opentaint.ir.approximation.annotation.Approximate; - // For methods with lambda parameters: - import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; - // For non-deterministic branching: - import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; - - import java.util.function.Function; - - @Approximate(com.example.lib.ReactiveProcessor.class) - public class ReactiveProcessor { - - // Model: taint on this flows through the function to the result - public Object transform(@ArgumentTypeContext Function fn) throws Throwable { - com.example.lib.ReactiveProcessor self = - (com.example.lib.ReactiveProcessor) (Object) this; - if (OpentaintNdUtil.nextBool()) return null; // async failure path - Object input = self.getValue(); - return fn.apply(input); - } - - // Model: taint on this flows to the consumer argument - public void subscribe(@ArgumentTypeContext java.util.function.Consumer consumer) { - com.example.lib.ReactiveProcessor self = - (com.example.lib.ReactiveProcessor) (Object) this; - if (OpentaintNdUtil.nextBool()) { - consumer.accept(self.getValue()); - } - } - } - ``` - - **Key patterns**: - - `@Approximate(TargetClass.class)` or `@ApproximateByName("fqn")` on the class - - `(TargetClass) (Object) this` cast to access the real object's methods - - `@ArgumentTypeContext` on lambda/functional interface parameters - - `OpentaintNdUtil.nextBool()` for non-deterministic branching (models both success and failure paths) - - Java 8 source compatibility - - One approximation class per target class (strict bijection) - - Must NOT target a class that already has a built-in approximation (will error) - -2. Use with analysis — compilation is automatic: - ```bash - opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --dataflow-approximations ./agent-approximations/src - ``` - - The `--dataflow-approximations` flag detects `.java` files and auto-compiles them using: - - `opentaint-analyzer.jar` as classpath (contains `@Approximate`, `OpentaintNdUtil`, `ArgumentTypeContext`) - - Target project's dependencies from `project.yaml` (so `javac` can resolve the library being approximated) - - If compilation fails, errors are reported before analysis starts. - If a custom approximation targets a class that already has a built-in approximation, the analyzer reports an error and aborts. - -**When to use code-based approximations vs YAML config**: -- Lambda/callback invocation → code-based (YAML cannot model lambda calls) -- Non-deterministic branching (async paths) → code-based (`OpentaintNdUtil.nextBool()`) -- Complex internal state with multiple method interactions → code-based (more expressive) -- Simple from→to propagation → YAML passThrough (simpler, faster to write) - -### 3.9 Skill: `generate-poc` - -**Purpose**: Generate a proof-of-concept exploit for a confirmed true positive vulnerability. - -**Instructions**: - -1. Extract the vulnerability trace from SARIF: - - Source: entry point method and parameter (e.g., HTTP request parameter `id`) - - Path: sequence of method calls through which taint flows - - Sink: dangerous operation (e.g., SQL query execution) - -2. Construct a PoC: - - For HTTP-based sources: a `curl` command or HTTP request demonstrating the attack - - For command injection: the payload that achieves command execution - - For SQL injection: the payload that demonstrates data extraction - - For path traversal: the payload that reads/writes unauthorized files - - For XSS: the payload that executes JavaScript in the browser - -3. Document in `vulnerabilities.md`: - ```markdown - ## VULN-001: SQL Injection in UserController.getUser - - **Severity**: Critical (CWE-89) - **Location**: `src/main/java/com/example/controller/UserController.java:45` - **Rule**: `my-vulnerability` - - ### Description - User-controlled input from HTTP parameter `id` flows unsanitized into - a SQL query via `Statement.executeQuery()`. - - ### Trace - 1. **Source**: `UserController.getUser()` — `request.getParameter("id")` (line 42) - 2. **Flow**: String concatenation `"SELECT * FROM users WHERE id = " + input` (line 44) - 3. **Sink**: `Statement.executeQuery(query)` (line 45) - - ### Proof of Concept - ``` - curl "http://target:8080/api/users/1' OR '1'='1" - ``` - - ### Remediation - Use parameterized queries: - ```java - PreparedStatement pstmt = conn.prepareStatement("SELECT * FROM users WHERE id = ?"); - pstmt.setString(1, input); - ``` - ``` - ---- - -## 4. Meta Prompt - -The meta prompt orchestrates the agent through the complete workflow. It references skills and implements the decision loop from task.md steps 1-10. - -``` -You are a security analysis agent using OpenTaint, a dataflow-based SAST analyzer for JVM projects. -OpenTaint is available on PATH as `opentaint`. - -Your goal: Perform comprehensive security analysis of a target project, discovering all vulnerabilities -and minimizing false positives and false negatives. - -## Your Capabilities - -You can: -- Generate pattern rules (YAML) defining vulnerability patterns (sources, sinks, sanitizers) -- Generate YAML passThrough config for library methods -- Generate code-based approximations (Java stubs) for complex methods with lambdas/callbacks -- Test rules against sample code -- Run analysis and interpret results -- Override existing passThrough rules via --approximations-config - -You cannot: -- Modify framework support (Spring detection is automatic) -- Change the analysis algorithm itself -- Add sanitizers via YAML config (sanitizers are handled via pattern rules only) -- Override built-in code-based approximations (will error on conflict) - -## Available Skills - -Load these skills as needed during your workflow: -- `build-project` — Build and prepare the target project -- `discover-entry-points` — Analyze source code to find entry points and attack surface -- `create-rule` — Create pattern rules for vulnerability detection -- `test-rule` — Test rules with annotated samples -- `run-analysis` — Run OpenTaint and collect results -- `analyze-findings` — Interpret SARIF findings and external methods list -- `create-yaml-config` — Create YAML passThrough rules -- `create-approximation` — Create code-based approximations for complex methods -- `generate-poc` — Generate proof-of-concept for confirmed vulnerabilities - -## Workflow - -### Phase 1: Project Setup - -1. Load `build-project` skill. Build the target project: - ```bash - opentaint compile -o ./opentaint-project - ``` - -2. Load `discover-entry-points` skill. Read source code, analyze project structure, identify: - - Framework in use (Spring, Servlets, JAX-RS, etc.) - - Entry points (controllers, servlets, listeners, CLI entry points) - - Attack surface (what external data enters, what dangerous operations are performed) - - Relevant vulnerability classes to test - -3. Create `opentaint-analysis-plan.md` with: - - Project description and technology stack - - Identified entry points and attack surface - - Relevant vulnerability classes to test - - Plan for rule creation - -### Phase 2: Rule Creation - -4. For each relevant vulnerability class (SQLi, XSS, command injection, path traversal, etc.): - - a. Load `create-rule` skill. Read built-in rules to check coverage: - ```bash - RULES_DIR=$(opentaint agent rules-path) - ls $RULES_DIR/java/security/ # existing security rules - ls $RULES_DIR/java/lib/generic/ # available source/sink libraries - ``` - - b. Create rules in `./agent-rules/`: - - Library rules in `./agent-rules/java/lib/` - - Security rules in `./agent-rules/java/security/` - - Reference built-in library rules where applicable - - c. Load `test-rule` skill. Bootstrap and test: - ```bash - opentaint agent init-test-project ./agent-test-project --dependency "javax.servlet:javax.servlet-api:4.0.1" - ``` - - Add `@PositiveRuleSample` and `@NegativeRuleSample` test methods - - Run: `opentaint agent test-rules ./agent-test-compiled --ruleset ./agent-rules -o ./test-output` - - Fix until `test-result.json` shows zero failures - -### Phase 3: Analysis Loop - -5. Load `run-analysis` skill. Run initial analysis: - ```bash - opentaint scan --project-model ./opentaint-project \ - -o ./results/report.sarif \ - --ruleset builtin --ruleset ./agent-rules \ - --rule-id java/security/my-vuln.yaml:my-vulnerability \ - --track-external-methods - ``` - -6. Load `analyze-findings` skill. For each SARIF finding: - - **If TRUE POSITIVE**: - - Load `generate-poc` skill - - Generate proof-of-concept exploit - - Document in `vulnerabilities.md` - - **If FALSE POSITIVE (fixable via rule)**: - - Load `create-rule` skill - - Add `pattern-not`, `pattern-sanitizers`, or narrow patterns - - Load `test-rule` skill — add `@NegativeRuleSample` for the FP case - - Re-run tests, then goto step 5 - - **If FALSE POSITIVE (fixable via approximation)** (non-preferred): - - Load `create-yaml-config` skill - - Override the passThrough approximation to remove incorrect propagation - - Goto step 5 - -7. For each entry in `external-methods.yaml` (focus on `withoutRules` section): - - Classify the method (propagator / transformer / neutral): - - **If PROPAGATOR** (simple taint flow): - - Load `create-yaml-config` skill - - Create passThrough rule - - Goto step 5 - - **If TRANSFORMER** (involves lambdas/callbacks): - - Load `create-approximation` skill - - Create approximation source file in `./agent-approximations/src/` - - Goto step 5 - - **If NEUTRAL** (logging, metrics, sanitizers, irrelevant): - - Skip — default passthrough is correct - -### Phase 4: Finalization - -8. When the agent determines analysis is complete: - - All traces have been reviewed and classified - - All identified FP have been fixed - - All relevant external methods have been addressed - - Remaining external methods are classified as NEUTRAL - -9. Update `opentaint-analysis-plan.md` with final status. - -10. Deliver: - - `vulnerabilities.md` — confirmed vulnerabilities with PoCs - - `opentaint-analysis-plan.md` — analysis log - - `./agent-rules/` — custom pattern rules - - `./agent-config/` — custom YAML passThrough rules (if any) - - `./agent-approximations/src/` — custom code-based approximation sources (if any) - -## Working Directory Layout - -``` -/ -├── opentaint-analysis-plan.md # Analysis progress tracking -├── vulnerabilities.md # Confirmed vulnerabilities -├── opentaint-project/ # Compiled project model -│ └── project.yaml -├── agent-rules/ # Agent-created pattern rules -│ └── java/ -│ ├── security/ # Executable security rules -│ └── lib/ # Reusable library rules -├── agent-config/ # Agent-created YAML passThrough config -│ └── custom-propagators.yaml -├── agent-approximations/ # Agent-created code-based approximations -│ └── src/ # Java source files (auto-compiled by CLI) -├── agent-test-project/ # Test project (bootstrapped via init-test-project) -│ ├── build.gradle.kts -│ ├── libs/opentaint-sast-test-util.jar -│ └── src/main/java/test/ -└── results/ # Analysis outputs - ├── report.sarif - └── external-methods.yaml -``` - -## Decision Priorities - -When fixing FN: -1. YAML passThrough rule (simplest, covers most cases) -2. Code-based approximation (for lambdas/callbacks only) -3. Rule pattern fix (only if FN is due to missing source/sink pattern, not missing propagation) - -When fixing FP: -1. Rule fix via `pattern-not` / `pattern-sanitizers` (preferred, scoped to one rule) -2. PassThrough override (non-preferred, affects all rules globally) - -## Iteration Strategy - -- Process findings batch by batch (don't try to fix everything at once) -- After each batch of fixes, re-run analysis and check for regressions -- Group external methods by library/package for efficient batch processing -- Stop when the external methods list stabilizes (no new entries between iterations) - and all SARIF findings are classified -``` - ---- - -## Appendix A: Sample Test Project Bootstrap - -```bash -# Bootstrap test project with servlet API dependency -opentaint agent init-test-project ./agent-test-project \ - --dependency "javax.servlet:javax.servlet-api:4.0.1" - -# Add test samples -cat > ./agent-test-project/src/main/java/test/SampleTest.java << 'EOF' -package test; - -import org.opentaint.sast.test.util.PositiveRuleSample; -import org.opentaint.sast.test.util.NegativeRuleSample; - -public class SampleTest { - - @PositiveRuleSample(value = "java/security/my-rule.yaml", id = "my-rule-id") - public void vulnerableMethod() { - // Write code that demonstrates the vulnerability pattern - } - - @NegativeRuleSample(value = "java/security/my-rule.yaml", id = "my-rule-id") - public void safeMethod() { - // Write code that is safe (sanitized, parameterized, etc.) - } -} -EOF - -# Build and test — the test-rules argument is the project-model directory -opentaint compile ./agent-test-project -o ./agent-test-compiled -opentaint agent test-rules ./agent-test-compiled \ - --ruleset ./agent-rules -o ./test-output -cat ./test-output/test-result.json -``` - ---- - -## Appendix B: SARIF Output Structure (Quick Reference) - -```json -{ - "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1/schema/sarif-schema-2.1.0.json", - "version": "2.1.0", - "runs": [{ - "tool": { "driver": { "name": "OpenTaint", "rules": [...] } }, - "results": [{ - "ruleId": "my-vulnerability", - "level": "error", - "message": { "text": "Untrusted data flows to SQL query" }, - "locations": [{ - "physicalLocation": { - "artifactLocation": { "uri": "src/main/java/com/example/UserController.java" }, - "region": { "startLine": 45, "startColumn": 9 } - } - }], - "codeFlows": [{ - "threadFlows": [{ - "locations": [ - { "location": { "physicalLocation": { "region": { "startLine": 42 } }, "message": { "text": "source" } } }, - { "location": { "physicalLocation": { "region": { "startLine": 44 } }, "message": { "text": "flow" } } }, - { "location": { "physicalLocation": { "region": { "startLine": 45 } }, "message": { "text": "sink" } } } - ] - }] - }], - "relatedLocations": [...] - }] - }] -} -``` - ---- - -## Appendix C: External Methods Output Structure (Quick Reference) - -```yaml -withoutRules: - - method: com.example.lib.DataWrapper#getValue - signature: "() java.lang.String" - factPositions: - - this - callSites: 5 - - - method: com.example.lib.Processor#transform - signature: "(java.lang.Object) java.lang.Object" - factPositions: - - arg(0) - - this - callSites: 12 - -withRules: - - method: java.lang.StringBuilder#append - signature: "(java.lang.String) java.lang.StringBuilder" - factPositions: - - arg(0) - callSites: 87 -``` diff --git a/agent-mode/impl/agent-mode-impl.md b/agent-mode/impl/agent-mode-impl.md deleted file mode 100644 index bcb5cc3da..000000000 --- a/agent-mode/impl/agent-mode-impl.md +++ /dev/null @@ -1,1275 +0,0 @@ -# Agent Mode — Implementation Plan - -This document translates the design in `agent-mode/design/agent-mode-design.md` into a concrete, file-level implementation plan. It covers every module that needs modification, where skills and meta-prompt live, how they're distributed, and how to test without the CLI installed on PATH. - ---- - -## Table of Contents - -1. [Implementation Overview](#1-implementation-overview) -2. [Kotlin Analyzer Changes](#2-kotlin-analyzer-changes) - - 2.1 [External Methods Tracker](#21-external-methods-tracker) - - 2.2 [Rule ID Filter](#22-rule-id-filter) - - 2.3 [Approximations Config + Semgrep Rules Together](#23-approximations-config--semgrep-rules-together) - - 2.4 [Custom Dataflow Approximations Path](#24-custom-dataflow-approximations-path) - - 2.5 [New CLI Flags Wiring](#25-new-cli-flags-wiring) -3. [Go CLI Changes](#3-go-cli-changes) - - 3.1 [New Flags on `scan` Command](#31-new-flags-on-scan-command) - - 3.2 [Approximation Auto-Compilation](#32-approximation-auto-compilation) - - 3.3 [`opentaint agent` Command Group](#33-opentaint-agent-command-group) - - 3.4 [Hidden Dev Flags](#34-hidden-dev-flags) - - 3.5 [AnalyzerBuilder Extensions](#35-analyzerbuilder-extensions) -4. [Skills and Meta-Prompt Location](#4-skills-and-meta-prompt-location) - - 4.1 [Source Layout](#41-source-layout) - - 4.2 [Bundling and Distribution](#42-bundling-and-distribution) - - 4.3 [Runtime Access (Direct File Read)](#43-runtime-access-direct-file-read) -5. [Testing Without CLI on PATH](#5-testing-without-cli-on-path) - - 5.1 [Hidden `--analyzer-jar` / `--autobuilder-jar` Flags](#51-hidden---analyzer-jar----autobuilder-jar-flags) - - 5.2 [Environment Variables](#52-environment-variables) - - 5.3 [Python Test Infrastructure (conftest.py)](#53-python-test-infrastructure-conftestpy) - - 5.4 [Local Dev Workflow](#54-local-dev-workflow) -6. [Implementation Order](#6-implementation-order) -7. [File Change Summary](#7-file-change-summary) - ---- - -## 1. Implementation Overview - -The implementation spans two main codebases (Kotlin analyzer, Go CLI) plus a new `agent/` directory for distributable agent artifacts (skills, meta-prompt). The `agent-mode/` directory remains for design docs and tests only — it is not distributed. - -| Area | Scope | Effort | -|------|-------|--------| -| Kotlin analyzer | 4 features: external methods tracker, rule ID filter, combined config+rules, custom approximations path | Medium-Large | -| Go CLI | 4 new flags on `scan`, `opentaint agent` command group (5 subcommands), hidden dev flags, auto-compilation logic | Medium | -| Skills + Meta-prompt | 9 skill files + 1 meta-prompt in `agent/`, bundled into CLI distribution | Small | -| Distribution | Release pipeline changes to bundle `lib/agent/` | Small | -| Test infrastructure | Already built in Phase 3; needs hidden flag support | Small | - ---- - -## 2. Kotlin Analyzer Changes - -### 2.1 External Methods Tracker - -**Goal**: Collect all external (unresolved) method calls during taint analysis and output them as YAML. - -#### New files - -**`core/opentaint-dataflow-core/opentaint-dataflow/src/main/kotlin/org/opentaint/dataflow/ap/ifds/taint/ExternalMethodTracker.kt`** - -```kotlin -package org.opentaint.dataflow.ap.ifds.taint - -import java.util.concurrent.ConcurrentHashMap -import java.util.concurrent.ConcurrentLinkedQueue - -data class ExternalMethodRecord( - val method: String, // "com.example.Foo#bar" - val signature: String, // JVM-style: "(Ljava/lang/String;)V" - val factPositions: Set, // "this", "arg(0)", "arg(1)", "result" - val passRulesApplied: Boolean, // true if passThrough rules were actually applied for this method -) - -class ExternalMethodTracker { - // Dedup key: method+signature+factPosition - private val seen = ConcurrentHashMap.newKeySet() - - // Per-method aggregation: method+signature → (factPositions, passRulesApplied, callSiteCount) - private val records = ConcurrentHashMap() - - fun report( - method: String, - signature: String, - factPosition: String, - passRulesApplied: Boolean, - ) { - val key = "$method|$signature|$factPosition" - if (!seen.add(key)) return - - records.computeIfAbsent("$method|$signature") { - ExternalMethodAggregation(method, signature, passRulesApplied) - }.apply { - addFactPosition(factPosition) - if (passRulesApplied) markPassRulesApplied() - } - } - - fun reportCallSite(method: String, signature: String) { - records.computeIfAbsent("$method|$signature") { - ExternalMethodAggregation(method, signature, false) - }.incrementCallSites() - } - - fun getResults(): ExternalMethodResults { - val withoutRules = mutableListOf() - val withRules = mutableListOf() - - for (agg in records.values) { - val record = agg.toRecord() - if (record.passRulesApplied) withRules.add(record) else withoutRules.add(record) - } - - return ExternalMethodResults( - withoutRules.sortedByDescending { it.callSites }, - withRules.sortedByDescending { it.callSites }, - ) - } -} -``` - -**Pattern**: Modeled after `TaintSinkTracker` (same file location, same `ConcurrentHashMap` dedup pattern, same wiring through storage). - -#### Modified files - -| File | Change | -|------|--------| -| `core/opentaint-dataflow-core/opentaint-dataflow/.../taint/TaintAnalysisUnitStorage.kt` | Add `externalMethodTracker: ExternalMethodTracker` field | -| `core/opentaint-dataflow-core/opentaint-dataflow/.../taint/TaintAnalysisContext.kt` | Expose `externalMethodTracker` from storage | -| `core/opentaint-dataflow-core/opentaint-jvm-dataflow/.../JIRMethodCallFlowFunction.kt` | In `applyPassRulesOrCallSkip()` at line ~617: after resolving `callExpr.callee`, call `externalMethodTracker.report(...)` | -| `core/opentaint-jvm-sast-dataflow/.../TaintAnalysisUnitRunnerManager.kt` | Wire `ExternalMethodTracker` into unit storage creation (same pattern as `TaintSinkTracker`) | -| `core/src/main/kotlin/.../project/ProjectAnalyzer.kt` | After analysis completes, if `externalMethodsOutput` path is set, serialize tracker results to YAML | -| `core/src/main/kotlin/.../project/ProjectAnalysisOptions.kt` | Add `externalMethodsOutput: Path? = null` field | - -#### Integration point in `JIRMethodCallFlowFunction` - -The key insertion point is `applyPassRulesOrCallSkip()`. The existing code already computes whether pass-through rules were applied via `passThroughFacts.onSome { ... }` (line 651). We use this result directly — no separate lookup needed. - -```kotlin -// EXISTING: line 617 -val method = callExpr.callee - -// EXISTING: lines 642-649 -val passThroughFacts = applyPassThrough( - config, method, statement, - fact = passFactReader.factAp, - simpleConditionEvaluator, passEvaluator -) - -// NEW: report to tracker using the actual applyPassThrough result -val tracker = analysisContext.taint.externalMethodTracker -if (tracker != null) { - val methodName = "${method.declaringClass.name}#${method.name}" - val signature = method.jvmSignature - val factPosition = resolveFactPosition(factAp) // "this", "arg(0)", etc. - val passRulesApplied = passThroughFacts.isSome - tracker.report(methodName, signature, factPosition, passRulesApplied) -} -``` - -The `resolveFactPosition` helper maps `FinalFactAp` base to a human-readable position string. The `passRulesApplied` boolean comes directly from checking whether `applyPassThrough` returned `Some` (rules matched and were applied) vs `None` (no matching rules). This is more accurate than checking whether rules *exist* for the method — it reflects whether rules actually *fired* for the given fact position. - -#### Output format - -YAML file written by `ProjectAnalyzer` after analysis: - -```yaml -withoutRules: - - method: "org.apache.pdfbox.pdmodel.PDDocument#save" - signature: "(Ljava/io/OutputStream;)V" - factPositions: ["arg(0)", "this"] - callSites: 12 - - method: "com.fasterxml.jackson.databind.ObjectMapper#readValue" - signature: "(Ljava/lang/String;Ljava/lang/Class;)Ljava/lang/Object;" - factPositions: ["arg(0)", "result"] - callSites: 7 - -withRules: - - method: "java.lang.String#substring" - signature: "(I)Ljava/lang/String;" - factPositions: ["this", "result"] - callSites: 45 -``` - -Serialization uses `kaml` (already a dependency) or `snakeyaml` — consistent with `Project.kt` pattern. - ---- - -### 2.2 Rule ID Filter - -**Goal**: Filter loaded rules by ID. Same mechanism as the existing severity filter. - -#### Modified files - -| File | Change | -|------|--------| -| `core/opentaint-java-querylang/.../SemgrepRuleLoader.kt` | Add `ruleIdFilter` parameter to `loadRules()`, add ID check to `skip()` predicate | -| `core/src/main/kotlin/.../project/ProjectAnalysisOptions.kt` | Add `semgrepRuleId: List = emptyList()` field | -| `core/src/main/kotlin/.../runner/ProjectAnalyzerRunner.kt` | Add `--semgrep-rule-id` Clikt option, wire to `ProjectAnalysisOptions` | - -#### Implementation in `SemgrepRuleLoader.loadRules()` - -The existing `loadRules()` (line 106) already has a `skip()` predicate that filters by severity and library/disabled status: - -```kotlin -fun loadRules(severity: List = emptyList()): RuleLoadResult { - fun Rule<*>.skip(): Boolean = - info.isDisabled || info.isLibraryRule || !ruleSeverityAllow(this, severity) -``` - -The rule ID filter works the same way — just another predicate in `skip()`: - -```kotlin -fun loadRules( - severity: List = emptyList(), - ruleIdFilter: List = emptyList(), -): RuleLoadResult { - fun Rule<*>.skip(): Boolean = - info.isDisabled || info.isLibraryRule - || !ruleSeverityAllow(this, severity) - || !ruleIdAllow(this, ruleIdFilter) - // ... rest unchanged -} - -private fun ruleIdAllow(rule: Rule<*>, ruleIdFilter: List): Boolean = - ruleIdFilter.isEmpty() || rule.id in ruleIdFilter -``` - -Library rules (`isLibraryRule = true`) are already excluded by the existing `skip()` logic — they are loaded but not run directly. They only participate when referenced by join-mode rules. The ID filter does not need to walk `refs` because the existing rule resolution pipeline already handles library rule inclusion for join-mode rules independently of the skip filter. - ---- - -### 2.3 Approximations Config + Semgrep Rules Together - -**Goal**: Remove the mutual exclusion between `--config` (approximations) and `--semgrep-rule-set`. - -#### Modified files - -| File | Change | -|------|--------| -| `core/src/main/kotlin/.../project/ProjectAnalyzer.kt` | Modify `preloadRules()` at lines 54-80: add 4th variant, remove `check()` at line 62 | -| `core/src/main/kotlin/.../runner/ProjectAnalyzerRunner.kt` | Rename `--config` flag to `--approximations-config` | - -#### Implementation in `ProjectAnalyzer.preloadRules()` - -Current code (lines 54-80): -```kotlin -private sealed interface PreloadedRules { - data class SemgrepRules(val rules: List) : PreloadedRules - data class Custom(val config: SerializedTaintConfig) : PreloadedRules - data object DefaultRules : PreloadedRules -} - -private fun preloadRules(): PreloadedRules { - if (options.semgrepRuleSet.isNotEmpty()) { - check(options.customConfig == null) { "Unsupported custom config" } // ← REMOVE THIS - val loadedRules = options.loadSemgrepRules() - ruleMetadatas += loadedRules.rulesWithMeta.map { it.second } - return PreloadedRules.SemgrepRules(loadedRules.rulesWithMeta.map { it.first }) - } - // ... -} -``` - -New code: -```kotlin -private sealed interface PreloadedRules { - data class SemgrepRules(val rules: List) : PreloadedRules - data class Custom(val config: SerializedTaintConfig) : PreloadedRules - data class SemgrepRulesWithCustomConfig( - val rules: List, - val config: SerializedTaintConfig, - ) : PreloadedRules - data object DefaultRules : PreloadedRules -} - -private fun preloadRules(): PreloadedRules { - val customConfig = options.customConfig?.let { cfg -> - cfg.inputStream().use { loadSerializedTaintConfig(it) } - } - - if (options.semgrepRuleSet.isNotEmpty()) { - val loadedRules = options.loadSemgrepRules(ruleIdFilter = options.semgrepRuleId) - ruleMetadatas += loadedRules.rulesWithMeta.map { it.second } - val rules = loadedRules.rulesWithMeta.map { it.first } - - return if (customConfig != null) { - PreloadedRules.SemgrepRulesWithCustomConfig(rules, customConfig) - } else { - PreloadedRules.SemgrepRules(rules) - } - } - - if (customConfig != null) { - return PreloadedRules.Custom(customConfig) - } - - return PreloadedRules.DefaultRules -} -``` - -Then in `loadTaintConfig()` (lines 82-103), add a branch for `SemgrepRulesWithCustomConfig`: - -```kotlin -is PreloadedRules.SemgrepRulesWithCustomConfig -> { - // Load default config, override with custom, then layer semgrep rules on top - val defaultConfig = loadDefaultConfig() - val mergedConfig = JIRCombinedTaintRulesProvider(defaultConfig, rules.config) // OVERRIDE mode - // Then apply semgrep rules to mergedConfig - // ... (same as SemgrepRules branch but with mergedConfig as base) -} -``` - -The OVERRIDE semantics means: custom config entries for the same method signature replace (not extend) the default config entries. This is already how `JIRCombinedTaintRulesProvider` works — later entries take precedence. - ---- - -### 2.4 Custom Dataflow Approximations Path - -**Goal**: Accept external directories of compiled approximation `.class` files via CLI flag. - -#### Modified files - -| File | Change | -|------|--------| -| `core/opentaint-jvm-sast-dataflow/.../DataFlowApproximationLoader.kt` | Add `customApproximationPaths: List` to `Options`, append in `approximationFiles()` | -| `core/src/main/kotlin/.../project/ProjectAnalysisOptions.kt` | Already has `approximationOptions: DataFlowApproximationLoader.Options` — new paths flow through this | -| `core/src/main/kotlin/.../runner/ProjectAnalyzerRunner.kt` | Add `--dataflow-approximations` Clikt option | - -#### Implementation in `DataFlowApproximationLoader` - -Current `Options` (line 20-23): -```kotlin -data class Options( - val useDataflowApproximation: Boolean = true, - val useOpentaintApproximations: Boolean = false, -) -``` - -New `Options`: -```kotlin -data class Options( - val useDataflowApproximation: Boolean = true, - val useOpentaintApproximations: Boolean = false, - val customApproximationPaths: List = emptyList(), -) -``` - -Modified `approximationFiles()` (lines 52-63): -```kotlin -private fun approximationFiles(options: Options): List { - val result = mutableListOf() - if (options.useDataflowApproximation) { - result += listOfNotNull(dataflowApproximationsPath?.toFile()) - } - if (options.useOpentaintApproximations) { - result += approximationPaths.presentPaths.map { File(it) } - } - // NEW: append custom paths AFTER built-in ones - result += options.customApproximationPaths.map { it.toFile() } - return result -} -``` - -Custom paths are appended **after** built-in ones. The `ApproximationIndexer` (which scans `@Approximate` annotations) maintains a bijection map from target class → approximation class. If a custom approximation targets the same class as a built-in one, the bijection's `require()` will throw — this is intentional (no silent override of built-in approximations). - ---- - -### 2.5 New CLI Flags Wiring - -**File**: `core/src/main/kotlin/org/opentaint/jvm/sast/runner/ProjectAnalyzerRunner.kt` - -Current flag definitions (lines 23-52): - -```kotlin -class ProjectAnalyzerRunner : AbstractAnalyzerRunner(name = "analyze") { - // existing flags... - private val config: Path? by option("--config").file(mustExist = true) - private val semgrepRuleSet: List by option("--semgrep-rule-set").file(mustExist = true).multiple() - private val semgrepSeverity: List by option("--semgrep-rule-severity").enum().multiple() - // ... -} -``` - -New flags to add: - -```kotlin -// Rename --config → --approximations-config (keep --config as hidden alias for backward compat) -private val approximationsConfig: Path? by option("--approximations-config", "--config") - .file(mustExist = true) - -// New: Rule ID filter -private val semgrepRuleId: List by option("--semgrep-rule-id") - .multiple() - -// New: External methods output path -private val externalMethodsOutput: Path? by option("--external-methods-output") - .newFile() - -// New: Custom dataflow approximation directories -private val dataflowApproximations: List by option("--dataflow-approximations") - .file(mustExist = true, canBeDir = true) - .multiple() -``` - -In `analyzeProject()` (line ~80), wire to `ProjectAnalysisOptions`: - -```kotlin -val options = ProjectAnalysisOptions( - customConfig = approximationsConfig, // was: config - semgrepRuleSet = semgrepRuleSet, - semgrepRuleId = semgrepRuleId, // NEW - externalMethodsOutput = externalMethodsOutput, // NEW - // ... - approximationOptions = DataFlowApproximationLoader.Options( - useDataflowApproximation = true, - customApproximationPaths = dataflowApproximations, // NEW - ), -) -``` - ---- - -## 3. Go CLI Changes - -### 3.1 New Flags on `scan` Command - -**File**: `cli/cmd/scan.go` - -Add to `init()`: - -```go -// New flags -var RuleId []string -var ApproximationsConfig string -var DataflowApproximations string -var ExternalMethods string - -func init() { - rootCmd.AddCommand(scanCmd) - // ... existing flags ... - - // NEW - scanCmd.Flags().StringArrayVar(&RuleId, "rule-id", nil, - "Filter active rules by ID (repeatable)") - scanCmd.Flags().StringVar(&ApproximationsConfig, "approximations-config", "", - "Path to YAML passThrough approximations config (OVERRIDE mode)") - scanCmd.Flags().StringVar(&DataflowApproximations, "dataflow-approximations", "", - "Directory of .java or .class approximation files") - scanCmd.Flags().StringVar(&ExternalMethods, "external-methods", "", - "Output path for external methods YAML list") -} -``` - -In the `scan` command's `Run` function, before building the analyzer command: - -```go -// Handle --dataflow-approximations auto-compilation -compiledApproxDir := "" -if DataflowApproximations != "" { - compiledApproxDir = compileApproximationsIfNeeded(DataflowApproximations, projectPath) -} - -// Build analyzer command -nativeBuilder := NewAnalyzerBuilder(). - // ... existing ... - -// NEW: wire flags -for _, id := range RuleId { - nativeBuilder.AddRuleIdFilter(id) -} -if ApproximationsConfig != "" { - nativeBuilder.SetApproximationsConfig(ApproximationsConfig) -} -if compiledApproxDir != "" { - nativeBuilder.AddDataflowApproximations(compiledApproxDir) -} else if DataflowApproximations != "" { - nativeBuilder.AddDataflowApproximations(DataflowApproximations) -} -if ExternalMethods != "" { - nativeBuilder.SetExternalMethodsOutput(ExternalMethods) -} -``` - ---- - -### 3.2 Approximation Auto-Compilation - -**New file**: `cli/cmd/compile_approximations.go` - -This function handles the `--dataflow-approximations` flag when the directory contains `.java` sources: - -```go -package cmd - -func compileApproximationsIfNeeded(approxDir string, projectPath string) string { - // 1. Scan dir for .java files - javaFiles := findJavaFiles(approxDir) - if len(javaFiles) == 0 { - return approxDir // .class files only — pass directly - } - - // 2. Resolve javac (from managed JRE or system) - javaRunner := java.NewJavaRunner().TrySystem().TrySpecificVersion(globals.DefaultJavaVersion) - javacPath := resolveJavac(javaRunner) - - // 3. Resolve analyzer JAR (for @Approximate annotation classes) - analyzerJar := resolveAnalyzerJar() - - // 4. Resolve project dependencies (from project.yaml) - projectDeps := resolveProjectDeps(projectPath) - - // 5. Create temp output dir - outputDir := createTempDir("opentaint-approx-compiled-") - - // 6. Build classpath: analyzer.jar + project deps - classpath := analyzerJar + string(os.PathListSeparator) + strings.Join(projectDeps, string(os.PathListSeparator)) - - // 7. Run javac - args := []string{ - "-source", "8", "-target", "8", - "-cp", classpath, - "-d", outputDir, - } - args = append(args, javaFiles...) - - err := exec.Command(javacPath, args...).Run() - if err != nil { - // Report javac stderr, abort - out.Fatalf("Failed to compile approximations: %v\n%s", err, stderr) - } - - return outputDir -} -``` - -**Dependencies**: Uses `java.NewJavaRunner()` (existing) for JDK resolution. Uses `utils.GetAnalyzerJarPath()` (existing) for analyzer JAR. Reads `project.yaml` via `utils/project/config.go` (existing) for dependency classpath. - -**Key detail**: The `-source 8 -target 8` ensures compatibility with the analyzer's classloader. The analyzer JAR is needed on the compilation classpath because it contains `@Approximate`, `@ApproximateByName`, `@ArgumentTypeContext`, and `OpentaintNdUtil` classes. - ---- - -### 3.3 `opentaint agent` Command Group - -All agent-related commands are grouped under `opentaint agent`: - -| Command | Purpose | -|---------|---------| -| `opentaint agent skills` | Print resolved path to bundled skill files | -| `opentaint agent prompt` | Print resolved path to the meta-prompt file | -| `opentaint agent rules-path` | Print resolved path to builtin rules (downloads on demand) | -| `opentaint agent test-rules` | Run rule tests against annotated test samples | -| `opentaint agent init-test-project` | Bootstrap a rule test project with build.gradle.kts and test utility JAR | - -#### Parent command - -**New file**: `cli/cmd/agent.go` - -```go -package cmd - -import "github.com/spf13/cobra" - -var agentCmd = &cobra.Command{ - Use: "agent", - Short: "Agent mode commands: skills, prompts, rule testing", -} - -func init() { - rootCmd.AddCommand(agentCmd) -} -``` - -#### `opentaint agent skills` - -**New file**: `cli/cmd/agent_skills.go` - -```go -package cmd - -var agentSkillsCmd = &cobra.Command{ - Use: "skills", - Short: "Print the resolved path to bundled agent skill files", - Args: cobra.NoArgs, - Run: func(cmd *cobra.Command, args []string) { - skillsDir, err := utils.GetAgentPath("skills") - if err != nil { - out.Fatalf("Skills not found: %v", err) - } - fmt.Println(skillsDir) - }, -} - -func init() { - agentCmd.AddCommand(agentSkillsCmd) -} -``` - -#### `opentaint agent prompt` - -**New file**: `cli/cmd/agent_prompt.go` - -```go -package cmd - -var agentPromptCmd = &cobra.Command{ - Use: "prompt", - Short: "Print the resolved path to the agent meta-prompt", - Args: cobra.NoArgs, - Run: func(cmd *cobra.Command, args []string) { - promptPath, err := utils.GetAgentPath("meta-prompt.md") - if err != nil { - out.Fatalf("Meta-prompt not found: %v", err) - } - fmt.Println(promptPath) - }, -} - -func init() { - agentCmd.AddCommand(agentPromptCmd) -} -``` - -#### `opentaint agent rules-path` - -**New file**: `cli/cmd/agent_rules_path.go` - -```go -package cmd - -var agentRulesPathCmd = &cobra.Command{ - Use: "rules-path", - Short: "Print the resolved path to builtin rules", - Args: cobra.NoArgs, - Run: func(cmd *cobra.Command, args []string) { - version := globals.Config.Rules.Version - if version == "" { - version = globals.RulesBindVersion - } - - rulesPath, err := utils.GetRulesPath(version) - if err != nil { - err = ensureArtifactAvailable("rules", version, rulesPath, downloadRules) - if err != nil { - out.Fatalf("Failed to resolve rules: %v", err) - } - rulesPath, _ = utils.GetRulesPath(version) - } - - fmt.Println(rulesPath) - }, -} - -func init() { - agentCmd.AddCommand(agentRulesPathCmd) -} -``` - -Reuses `utils.GetRulesPath()` and download logic already present in `scan.go:214-224`. Downloads rules on demand (same 3-tier resolution: bundled > install > cache). - -#### `opentaint agent test-rules` - -**New file**: `cli/cmd/agent_test_rules.go` - -```go -package cmd - -var TestRulesRuleset string -var TestRulesOutput string - -var agentTestRulesCmd = &cobra.Command{ - Use: "test-rules ", - Short: "Run rule tests against annotated test samples", - Args: cobra.ExactArgs(1), - Annotations: map[string]string{"PrintConfig": "true"}, - Run: func(cmd *cobra.Command, args []string) { - projectPath := args[0] - - // 1. If projectPath is a directory (not project.yaml), auto-compile - if isDirectory(projectPath) { - projectPath = autoCompile(projectPath, TestRulesOutput) - } - - // 2. Build analyzer command with --debug-run-rule-tests - builder := NewAnalyzerBuilder(). - SetProject(projectPath). - SetOutputDir(TestRulesOutput). - SetDebugRunRuleTests(true) - - if TestRulesRuleset != "" { - builder.AddRuleSet(resolveRuleset(TestRulesRuleset)) - } - - // 3. Execute - err := executeAnalyzer(builder) - - // 4. Parse test-result.json - result := parseTestResult(filepath.Join(TestRulesOutput, "test-result.json")) - - // 5. Print summary table - printTestSummary(result) - - // 6. Exit code: 0 if only success/disabled, 1 if any falsePositive/falseNegative/skipped - if result.HasFailures() { - os.Exit(1) - } - }, -} - -func init() { - agentCmd.AddCommand(agentTestRulesCmd) - agentTestRulesCmd.Flags().StringVar(&TestRulesRuleset, "ruleset", "", "Path to rules directory") - agentTestRulesCmd.Flags().StringVarP(&TestRulesOutput, "output", "o", "", "Output directory") - _ = agentTestRulesCmd.MarkFlagRequired("output") -} -``` - -#### `opentaint agent init-test-project` - -**New file**: `cli/cmd/agent_init_test_project.go` - -```go -package cmd - -var InitTestProjectDeps []string - -var agentInitTestProjectCmd = &cobra.Command{ - Use: "init-test-project ", - Short: "Bootstrap a rule test project with build.gradle.kts and test utility JAR", - Args: cobra.ExactArgs(1), - Run: func(cmd *cobra.Command, args []string) { - outputDir := args[0] - - // 1. Create directory structure - os.MkdirAll(filepath.Join(outputDir, "libs"), 0755) - os.MkdirAll(filepath.Join(outputDir, "src", "main", "java", "test"), 0755) - - // 2. Resolve and copy opentaint-sast-test-util.jar - testUtilJar := resolveTestUtilJar() - copyFile(testUtilJar, filepath.Join(outputDir, "libs", "opentaint-sast-test-util.jar")) - - // 3. Generate build.gradle.kts - generateBuildGradle(outputDir, InitTestProjectDeps) - - // 4. Generate settings.gradle.kts - generateSettingsGradle(outputDir) - }, -} - -func init() { - agentCmd.AddCommand(agentInitTestProjectCmd) - agentInitTestProjectCmd.Flags().StringArrayVar(&InitTestProjectDeps, "dependency", nil, - "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") -} -``` - -The `opentaint-sast-test-util.jar` is bundled in the CLI distribution as `lib/opentaint-sast-test-util.jar`. It's tiny (just 2 annotation classes). The release workflow's "Download bundled artifacts" step fetches it alongside the analyzer and autobuilder JARs. - -Generated `build.gradle.kts`: -```kotlin -plugins { - java -} - -java { - sourceCompatibility = JavaVersion.VERSION_1_8 - targetCompatibility = JavaVersion.VERSION_1_8 -} - -repositories { - mavenCentral() -} - -dependencies { - compileOnly(files("libs/opentaint-sast-test-util.jar")) - // User-specified dependencies: - // compileOnly("javax.servlet:javax.servlet-api:4.0.1") -} -``` - -#### Resolution logic - -**File**: `cli/internal/utils/opentaint_home.go` - -```go -// GetAgentPath resolves a path within the bundled agent directory. -// Checks bundled tier (exe-dir/lib/agent/) then install tier (~/.opentaint/install/lib/agent/). -func GetAgentPath(subpath string) (string, error) { - exeDir := getExeDir() - bundled := filepath.Join(exeDir, "lib", "agent", subpath) - if _, err := os.Stat(bundled); err == nil { - return bundled, nil - } - - install := filepath.Join(OpentaintHome(), "install", "lib", "agent", subpath) - if _, err := os.Stat(install); err == nil { - return install, nil - } - - return "", fmt.Errorf("agent resource '%s' not found; reinstall opentaint or run 'opentaint pull'", subpath) -} -``` - ---- - -### 3.4 Hidden Dev Flags - -**File**: `cli/cmd/root.go` - -Add persistent flags (hidden): - -```go -func init() { - // ... existing PersistentFlags ... - - // Hidden: direct JAR paths for development - rootCmd.PersistentFlags().StringVar(&globals.Config.Analyzer.JarPath, "analyzer-jar", "", - "Direct path to analyzer JAR (bypasses version resolution)") - rootCmd.PersistentFlags().StringVar(&globals.Config.Autobuilder.JarPath, "autobuilder-jar", "", - "Direct path to autobuilder JAR (bypasses version resolution)") - - rootCmd.PersistentFlags().MarkHidden("analyzer-jar") - rootCmd.PersistentFlags().MarkHidden("autobuilder-jar") - - _ = viper.BindPFlag("analyzer.jar", rootCmd.PersistentFlags().Lookup("analyzer-jar")) - _ = viper.BindPFlag("autobuilder.jar", rootCmd.PersistentFlags().Lookup("autobuilder-jar")) -} -``` - -**File**: `cli/internal/globals/global.go` - -Add fields to `ConfigType`: - -```go -type ConfigType struct { - // ... existing ... - Analyzer struct { - Version string `mapstructure:"version"` - JarPath string `mapstructure:"jar"` // NEW - } - Autobuilder struct { - Version string `mapstructure:"version"` - JarPath string `mapstructure:"jar"` // NEW - } -} -``` - -**File**: `cli/cmd/artifacts.go` or `scan.go` - -In `ensureAnalyzerAvailable()` or wherever the JAR path is resolved: - -```go -func resolveAnalyzerJar() string { - // NEW: check direct path first - if globals.Config.Analyzer.JarPath != "" { - if _, err := os.Stat(globals.Config.Analyzer.JarPath); err == nil { - return globals.Config.Analyzer.JarPath - } - out.Fatalf("Analyzer JAR not found at specified path: %s", globals.Config.Analyzer.JarPath) - } - - // Existing: 3-tier resolution - return existingResolutionLogic() -} -``` - -**Environment variables**: Via viper's env binding, these are also settable as: -- `OPENTAINT_ANALYZER_JAR=/path/to/jar` -- `OPENTAINT_AUTOBUILDER_JAR=/path/to/jar` - ---- - -### 3.5 AnalyzerBuilder Extensions - -**File**: `cli/cmd/command_builder.go` - -Add fields to `AnalyzerBuilder`: - -```go -type AnalyzerBuilder struct { - *BaseCommandBuilder - // ... existing fields ... - - // NEW - ruleIdFilters []string - approximationsConfig string - dataflowApproximations []string - externalMethodsOutput string - debugRunRuleTests bool -} -``` - -Add setter methods: - -```go -func (a *AnalyzerBuilder) AddRuleIdFilter(ruleId string) *AnalyzerBuilder { - a.ruleIdFilters = append(a.ruleIdFilters, ruleId) - return a -} - -func (a *AnalyzerBuilder) SetApproximationsConfig(path string) *AnalyzerBuilder { - a.approximationsConfig = path - return a -} - -func (a *AnalyzerBuilder) AddDataflowApproximations(path string) *AnalyzerBuilder { - a.dataflowApproximations = append(a.dataflowApproximations, path) - return a -} - -func (a *AnalyzerBuilder) SetExternalMethodsOutput(path string) *AnalyzerBuilder { - a.externalMethodsOutput = path - return a -} - -func (a *AnalyzerBuilder) SetDebugRunRuleTests(enabled bool) *AnalyzerBuilder { - a.debugRunRuleTests = enabled - return a -} -``` - -Modify `BuildNativeCommand()`: - -```go -func (a *AnalyzerBuilder) BuildNativeCommand() []string { - flags := []string{...} // existing - - // NEW: append new flags - for _, id := range a.ruleIdFilters { - flags = append(flags, "--semgrep-rule-id", id) - } - if a.approximationsConfig != "" { - flags = append(flags, "--approximations-config", a.approximationsConfig) - } - for _, path := range a.dataflowApproximations { - flags = append(flags, "--dataflow-approximations", path) - } - if a.externalMethodsOutput != "" { - flags = append(flags, "--external-methods-output", a.externalMethodsOutput) - } - if a.debugRunRuleTests { - flags = append(flags, "--debug-run-rule-tests") - } - - return flags -} -``` - ---- - -## 4. Skills and Meta-Prompt Location - -### 4.1 Source Layout - -Skills and meta-prompt are Markdown files in a dedicated `agent/` directory at the repository root. This directory contains **distributable artifacts only** — design docs and tests remain in `agent-mode/`. - -``` -opentaint/ -├── agent/ # Distributable agent artifacts -│ ├── meta-prompt.md # The system prompt for the agent -│ └── skills/ # Individual skill files -│ ├── build-project.md # Skill 3.1 -│ ├── discover-entry-points.md # Skill 3.2 -│ ├── create-rule.md # Skill 3.3 -│ ├── test-rule.md # Skill 3.4 -│ ├── run-analysis.md # Skill 3.5 -│ ├── analyze-findings.md # Skill 3.6 -│ ├── create-yaml-config.md # Skill 3.7 -│ ├── create-approximation.md # Skill 3.8 -│ └── generate-poc.md # Skill 3.9 -├── agent-mode/ # Design docs and tests (NOT distributed) -│ ├── design/ -│ ├── impl/ -│ ├── info/ -│ └── test/ -└── ... -``` - -Each skill file is a self-contained Markdown document with: -- **Title and purpose** — what the skill does -- **Prerequisites** — what must be true before using this skill -- **Procedure** — step-by-step instructions with CLI commands -- **Examples** — concrete YAML/Java/command examples -- **Troubleshooting** — common errors and fixes - -The meta-prompt (`meta-prompt.md`) is the top-level system prompt that references skills by name and defines the 4-phase agent workflow. - -### 4.2 Bundling and Distribution - -Skills are bundled into the CLI distribution archive as `lib/agent/`, following the same pattern as rules. - -**Archive layout**: - -``` -opentaint_linux_amd64.tar.gz -├── opentaint # Go binary -└── lib/ - ├── opentaint-project-analyzer.jar - ├── opentaint-project-auto-builder.jar - ├── opentaint-sast-test-util.jar # NEW - ├── rules/ # Extracted rules - └── agent/ # NEW - ├── meta-prompt.md - └── skills/ - ├── build-project.md - ├── create-rule.md - ├── test-rule.md - ├── run-analysis.md - ├── analyze-findings.md - ├── create-yaml-config.md - ├── create-approximation.md - ├── discover-entry-points.md - └── generate-poc.md -``` - -All three archive variants (`cli`, `default`, `full`) include agent files. - -**Release workflow change** (`.github/workflows/release-cli.yaml`): - -Add step after "Download bundled artifacts": - -```yaml -- name: Bundle agent skills and prompt - run: | - mkdir -p cli/lib/agent/skills - cp agent/meta-prompt.md cli/lib/agent/ - cp agent/skills/*.md cli/lib/agent/skills/ - -- name: Bundle test utility JAR - run: | - # Download from analyzer release (built alongside the analyzer) - cp opentaint-sast-test-util.jar cli/lib/ -``` - -No changes to installation scripts needed. The `install.sh`/`install.ps1` scripts download and extract the archive — agent files and test-util JAR come along automatically. - -### 4.3 Runtime Access (Direct File Read) - -The agent reads skill files directly from the filesystem. The meta-prompt instructs: - -```markdown -## Setup -1. Run `opentaint agent skills` to get the skills directory path -2. Run `opentaint agent prompt` to get the meta-prompt file path -3. Read the meta-prompt for the overall workflow -4. Read individual skill files as needed during each phase -``` - -This works with any agent framework (Cursor, Cline, Aider, custom). The agent resolves paths via CLI commands and reads files using its native file-read capabilities. - ---- - -## 5. Testing Without CLI on PATH - -### 5.1 Hidden `--analyzer-jar` / `--autobuilder-jar` Flags - -When `opentaint` IS on PATH but JARs haven't been downloaded (no `~/.opentaint`), the hidden flags allow pointing directly to locally-built JARs: - -```bash -opentaint scan /path/to/project.yaml \ - --analyzer-jar ./core/build/libs/opentaint-jvm-sast.jar \ - -o report.sarif -``` - -This skips the 3-tier resolution entirely. - -### 5.2 Environment Variables - -Via viper's env binding (prefix `OPENTAINT_`, `_` separator): - -```bash -export OPENTAINT_ANALYZER_JAR=/home/sobol/IdeaProjects/opentaint/core/build/libs/opentaint-jvm-sast.jar -export OPENTAINT_AUTOBUILDER_JAR=/home/sobol/IdeaProjects/opentaint/core/opentaint-jvm-autobuilder/build/libs/opentaint-project-auto-builder.jar - -# Now scan just works -opentaint scan /path/to/project.yaml -o report.sarif -``` - -### 5.3 Python Test Infrastructure (`conftest.py`) - -The test infrastructure already handles the "no CLI on PATH" case with a dual-mode strategy: - -1. **`_find_opentaint_cli()`** — calls `shutil.which("opentaint")`. Returns `None` if not found. -2. **`OpenTaintCLI.has_cli`** — `True` if Go CLI found, `False` otherwise. -3. **Each command method** (`.scan()`, `.compile()`, etc.) branches: - - CLI mode: `opentaint scan ...` - - JAR mode: `java -jar analyzer.jar --project ... --output-dir ...` - -**Flag translation** (Go CLI → Kotlin CLI): - -| Go CLI | Kotlin CLI (JAR mode) | -|--------|----------------------| -| `--ruleset ` | `--semgrep-rule-set ` | -| `--rule-id ` | `--semgrep-rule-id ` | -| `--approximations-config ` | `--approximations-config ` (same name after rename) | -| `--dataflow-approximations ` | `--dataflow-approximations ` (same name) | -| `--external-methods ` | `--external-methods-output ` | -| `--severity ` | `--semgrep-rule-severity=` | - -**Limitation in JAR mode**: Agent subcommands (`opentaint agent rules-path`, `opentaint agent init-test-project`, `opentaint agent skills`) have no JAR equivalent. They return hardcoded results or skip: - -```python -def agent_rules_path(self) -> CLIResult: - if self.has_cli: - return self._run(["agent", "rules-path"]) - # Fallback: return known path in dev environment - return CLIResult(0, str(BUILTIN_RULES_DIR), "", []) - -def agent_init_test_project(self, output_dir, dependencies=None) -> CLIResult: - if self.has_cli: - args = ["agent", "init-test-project", str(output_dir)] - for dep in (dependencies or []): - args += ["--dependency", dep] - return self._run(args) - # No JAR equivalent — skip - return CLIResult(1, "", "init-test-project not available in direct JAR mode", []) -``` - -**For `--dataflow-approximations` in JAR mode**: The auto-compilation step (which the Go CLI does) must be done manually. The test infrastructure should detect `.java` files and compile them before passing the compiled directory to the JAR. This is already handled in the test fixture setup. - -### 5.4 Local Dev Workflow - -#### Option 1: Build Go CLI locally + use hidden flags - -```bash -# Build CLI -cd cli && go build -o opentaint . - -# Build analyzer (if needed) -cd core && ./gradlew :projectAnalyzerJar - -# Run scan with direct JAR paths -./cli/opentaint scan /path/to/project.yaml \ - --analyzer-jar ./core/build/libs/opentaint-jvm-sast.jar \ - -o report.sarif - -# Agent commands work too -./cli/opentaint agent rules-path -./cli/opentaint agent test-rules ./test-project/project.yaml \ - --ruleset ./agent-rules -o ./test-output -``` - -#### Option 2: Direct JAR mode (no Go CLI) - -```bash -# Build analyzer -cd core && ./gradlew :projectAnalyzerJar - -# Run directly -java -Xmx8G \ - -Dorg.opentaint.ir.impl.storage.defaultBatchSize=2000 \ - -Djdk.util.jar.enableMultiRelease=false \ - -jar core/build/libs/opentaint-jvm-sast.jar \ - --project /path/to/project.yaml \ - --output-dir ./output \ - --semgrep-rule-set ./rules/ruleset \ - --semgrep-rule-id my-rule-id \ - --approximations-config ./my-config.yaml \ - --external-methods-output ./external-methods.yaml -``` - -#### Option 3: Python tests with auto-detection - -```bash -# Build analyzer -cd core && ./gradlew :projectAnalyzerJar - -# Run tests — conftest.py auto-detects JAR, falls back from Go CLI -cd agent-mode/test -pytest test_build.py -v -k "not slow" -``` - -The `conftest.py` tries `shutil.which("opentaint")` first. If not found, it searches for the JAR at: -- `core/build/libs/opentaint-jvm-sast.jar` -- `core/build/libs/opentaint-project-analyzer.jar` - -Both paths are relative to `OPENTAINT_ROOT` (3 levels up from `conftest.py`). - ---- - -## 6. Implementation Order - -Recommended sequence based on dependency analysis: - -### Phase A: Kotlin Analyzer Core (can be parallelized internally) - -| # | Task | Files | Depends On | -|---|------|-------|------------| -| A1 | Add `ExternalMethodTracker` class | `ExternalMethodTracker.kt` (new) | — | -| A2 | Wire tracker into analysis pipeline | `TaintAnalysisUnitStorage.kt`, `TaintAnalysisContext.kt`, `TaintAnalysisUnitRunnerManager.kt` | A1 | -| A3 | Report external methods from flow function | `JIRMethodCallFlowFunction.kt` | A2 | -| A4 | Add `--external-methods-output` flag + YAML serialization | `ProjectAnalyzerRunner.kt`, `ProjectAnalysisOptions.kt`, `ProjectAnalyzer.kt` | A3 | -| A5 | Add `--semgrep-rule-id` flag + filtering in loader | `SemgrepRuleLoader.kt`, `ProjectAnalyzerRunner.kt`, `ProjectAnalysisOptions.kt` | — | -| A6 | Rename `--config` → `--approximations-config`, remove mutual exclusion, add `SemgrepRulesWithCustomConfig` variant | `ProjectAnalyzerRunner.kt`, `ProjectAnalyzer.kt` | — | -| A7 | Add `customApproximationPaths` to `DataFlowApproximationLoader.Options`, add `--dataflow-approximations` flag | `DataFlowApproximationLoader.kt`, `ProjectAnalyzerRunner.kt`, `ProjectAnalysisOptions.kt` | — | - -A1-A4 are sequential (pipeline). A5, A6, A7 are independent of each other and of A1-A4. - -### Phase B: Go CLI (depends on Phase A for flag names) - -| # | Task | Files | Depends On | -|---|------|-------|------------| -| B1 | Add hidden `--analyzer-jar`/`--autobuilder-jar` flags | `root.go`, `global.go`, `artifacts.go` | — | -| B2 | Add `AnalyzerBuilder` extensions | `command_builder.go` | — | -| B3 | Add new flags to `scan` command | `scan.go` | B2 | -| B4 | Implement approximation auto-compilation | `compile_approximations.go` (new) | B2 | -| B5 | Implement `opentaint agent` parent command | `agent.go` (new) | — | -| B6 | Implement `opentaint agent rules-path` | `agent_rules_path.go` (new) | B5 | -| B7 | Implement `opentaint agent test-rules` | `agent_test_rules.go` (new) | B2, B5 | -| B8 | Implement `opentaint agent init-test-project` | `agent_init_test_project.go` (new) | B5 | -| B9 | Implement `opentaint agent skills` + `opentaint agent prompt` | `agent_skills.go`, `agent_prompt.go` (new) | B5 | - -B1, B5 are independent starting points. B2 must precede B3, B4, B7. B5 must precede B6-B9. - -### Phase C: Skills and Meta-Prompt - -| # | Task | Files | Depends On | -|---|------|-------|------------| -| C1 | Write 9 skill files | `agent/skills/*.md` | A, B (need final CLI flag names) | -| C2 | Write meta-prompt | `agent/meta-prompt.md` | C1 | -| C3 | Update release workflow | `.github/workflows/release-cli.yaml` | C1 | -| C4 | Publish test-util JAR as release asset | `.github/workflows/publish-analyzer.yaml` | — | - -### Phase D: Validation - -| # | Task | Depends On | -|---|------|------------| -| D1 | Run existing Python test suite (6 passing tests) | A, B | -| D2 | Run `new_feature` tests (20 tests) | A, B | -| D3 | Run full agent loop test | A, B, C | - ---- - -## 7. File Change Summary - -### New Files (17) - -| File | Purpose | -|------|---------| -| `core/.../taint/ExternalMethodTracker.kt` | External method collection during analysis | -| `cli/cmd/agent.go` | `opentaint agent` parent command | -| `cli/cmd/agent_skills.go` | `opentaint agent skills` subcommand | -| `cli/cmd/agent_prompt.go` | `opentaint agent prompt` subcommand | -| `cli/cmd/agent_rules_path.go` | `opentaint agent rules-path` subcommand | -| `cli/cmd/agent_test_rules.go` | `opentaint agent test-rules` subcommand | -| `cli/cmd/agent_init_test_project.go` | `opentaint agent init-test-project` subcommand | -| `cli/cmd/compile_approximations.go` | Auto-compile .java approximations to .class | -| `agent/meta-prompt.md` | Agent system prompt | -| `agent/skills/build-project.md` | Skill: build project | -| `agent/skills/discover-entry-points.md` | Skill: discover entry points | -| `agent/skills/create-rule.md` | Skill: create pattern rules | -| `agent/skills/test-rule.md` | Skill: test rules | -| `agent/skills/run-analysis.md` | Skill: run analysis | -| `agent/skills/analyze-findings.md` | Skill: analyze SARIF findings | -| `agent/skills/create-yaml-config.md` | Skill: create YAML passThrough config | -| `agent/skills/create-approximation.md` | Skill: create code-based approximations | -| `agent/skills/generate-poc.md` | Skill: generate proof-of-concept | - -### Modified Files (15) - -| File | Change Summary | -|------|----------------| -| `core/.../taint/TaintAnalysisUnitStorage.kt` | Add `externalMethodTracker` field | -| `core/.../taint/TaintAnalysisContext.kt` | Expose tracker from storage | -| `core/.../TaintAnalysisUnitRunnerManager.kt` | Wire tracker into unit storage creation | -| `core/.../JIRMethodCallFlowFunction.kt` | Report to tracker in `applyPassRulesOrCallSkip()` | -| `core/.../project/ProjectAnalyzer.kt` | New `PreloadedRules` variant, YAML output, combined config+rules | -| `core/.../project/ProjectAnalysisOptions.kt` | New fields: `externalMethodsOutput`, `semgrepRuleId` | -| `core/.../runner/ProjectAnalyzerRunner.kt` | 4 new Clikt flags | -| `core/.../dataflow/DataFlowApproximationLoader.kt` | `customApproximationPaths` in `Options` | -| `core/.../semgrep/pattern/SemgrepRuleLoader.kt` | Rule ID filter in `loadRules()` | -| `cli/cmd/root.go` | Hidden `--analyzer-jar`, `--autobuilder-jar` flags | -| `cli/cmd/scan.go` | 4 new flags: `--rule-id`, `--approximations-config`, `--dataflow-approximations`, `--external-methods` | -| `cli/cmd/command_builder.go` | 5 new `AnalyzerBuilder` methods + fields | -| `cli/internal/globals/global.go` | `JarPath` fields in `Analyzer`/`Autobuilder` config structs | -| `cli/internal/utils/opentaint_home.go` | `GetAgentPath()` function | -| `.github/workflows/release-cli.yaml` | Bundle agent files + test-util JAR | diff --git a/agent-mode/info/agent-pipeline.md b/agent-mode/info/agent-pipeline.md deleted file mode 100644 index 0b876db31..000000000 --- a/agent-mode/info/agent-pipeline.md +++ /dev/null @@ -1,672 +0,0 @@ -# Agent Pipeline Design Document - -## Overview - -This document describes the end-to-end pipeline for an LLM agent to perform security analysis of a JVM project using OpenTaint. The agent builds the project, creates rules, tests them, runs analysis, interprets results (SARIF + external methods list), and iterates to fix FP/FN until coverage is satisfactory. - -## Agent Capabilities Summary - -| Capability | Artifact Type | Reference | -|-----------|---------------|-----------| -| Generate vulnerability detection rules | Pattern rules (YAML) | `pattern-rules.md` | -| Debug/fix rules (FP/FN) | Pattern rules (YAML) | `pattern-rules.md` | -| Generate taint propagation rules | YAML config rules | `approximations-config.md` | -| Generate complex propagators | Code-based approximations (Java) | `approximations-config.md` | -| Override existing propagation | Either YAML or Java stubs | `approximations-config.md` | -| Framework support | Not configurable | Provided as-is | - -## CLI Interfaces - -OpenTaint provides two CLI interfaces. The agent uses them at different pipeline stages. - -### Go CLI (`opentaint`) - -High-level wrapper. Manages Java runtime, downloads artifacts, invokes the analyzer JAR. - -| Command | Purpose | -|---------|---------| -| `opentaint compile -o ` | Build project and create `project.yaml` | -| `opentaint project --output --source-root --classpath --package ` | Create `project.yaml` from precompiled JARs/classes | -| `opentaint scan -o [--ruleset builtin] [--ruleset ]` | Run full analysis (optionally compile first) | -| `opentaint summary [--show-findings] [--show-code-snippets] [--verbose-flow]` | Print SARIF results summary | -| `opentaint pull` | Download all artifacts + JRE | - -**Key flags for `opentaint scan`:** -- `--output ` — SARIF output file (required) -- `--ruleset builtin` — use built-in rules (default) -- `--ruleset ` — custom Semgrep rule file/directory (can specify multiple times; combinable with `builtin`) -- `--timeout ` — analysis timeout (default: 900) -- `--max-memory ` — JVM memory limit (default: `8G`) -- `--severity ` — severity filter (default: `warning,error`) -- `--code-flow-limit ` — max code flows per finding - -### Kotlin CLI (`opentaint-project-analyzer.jar`) - -Low-level analyzer JAR. Invoked by the Go CLI, but can be used directly for advanced features. - -| Flag | Purpose | -|------|---------| -| `--project ` | Project model (required) | -| `--output-dir ` | Output directory (required) | -| `--semgrep-rule-set ` | Semgrep rule files/directories (multiple) | -| `--config ` | Custom passThrough/approximation YAML (**mutually exclusive** with `--semgrep-rule-set`) | -| `--debug-run-rule-tests` | Run rule tests instead of project analysis | -| `--debug-run-analysis-on-selected-entry-points ` | `*` for all methods or `com.example.Class#method` | -| `--semgrep-rule-load-trace ` | Output rule loader diagnostics | -| `--sarif-file-name ` | SARIF filename (default: `report-ifds.sarif`) | -| `--ifds-analysis-timeout ` | IFDS timeout (default: 10000) | -| `--project-kind ` | `unknown` or `spring-web` | - -**Important**: `--config` and `--semgrep-rule-set` are **mutually exclusive**. The `--config` flag is the only way to pass custom passThrough/cleaner YAML rules directly. The Go CLI does not expose `--config` — it only passes `--semgrep-rule-set` via `--ruleset`. - -### Autobuilder (`opentaint-project-auto-builder.jar`) - -| Flag | Purpose | -|------|---------| -| `--project-root-dir ` | Project root (required) | -| `--build portable` | Build + create self-contained project directory | -| `--result-dir ` | Output directory for portable build | -| `--build simple` | Just dump `project.yaml` | -| `--result ` | Output path for simple build | - -## Full Agent Workflow - -### Step 1: Project Setup - -Agent takes the path to the target project and prepares it for analysis. - -**Option A: Use Go CLI (recommended)** -```bash -# Build and create project model -opentaint compile /path/to/project -o ./opentaint-project - -# Result: ./opentaint-project/project.yaml -``` - -**Option B: Use Autobuilder directly** -```bash -java -jar opentaint-project-auto-builder.jar \ - --project-root-dir /path/to/project \ - --build portable \ - --result-dir ./opentaint-project \ - --logs-file autobuild.log \ - --verbosity debug -``` - -**Option C: Create project.yaml manually** - -For projects that don't use standard Gradle/Maven builds, or for pre-compiled artifacts: - -```bash -opentaint project \ - --output ./opentaint-project \ - --source-root /path/to/sources \ - --classpath /path/to/classes.jar \ - --classpath /path/to/dependency.jar \ - --package com.example.app -``` - -The generated `project.yaml` follows this schema: -```yaml -sourceRoot: sources -javaToolchain: toolchain/jdk-17 -modules: - - moduleSourceRoot: sources/src/main/java - packages: [com.example.app] - moduleClasses: - - classes/c0_main -dependencies: - - dependencies/spring-web-5.3.39.jar - - dependencies/javax.servlet-api-4.0.1.jar -``` - -### Step 2: Entry Point Discovery - -Agent searches for entry points and potentially vulnerable places. This is a code-level analysis step. - -The agent should examine: -- **Spring controllers**: `@RestController`/`@Controller` classes with `@RequestMapping`/`@GetMapping` etc. -- **Servlet handlers**: Classes extending `HttpServlet` with `doGet`/`doPost`/etc. -- **JAX-RS endpoints**: Classes with `@Path` and `@GET`/`@POST` annotations -- **Message handlers**: JMS/Kafka/RabbitMQ listeners -- **CLI entry points**: `main()` methods that process external input - -The engine automatically discovers Spring entry points (via `SpringWebProject.kt`) and for unknown projects selects all public/protected methods from public project classes. The agent can also use `--debug-run-analysis-on-selected-entry-points "com.example.Class#method"` to target specific methods. - -### Step 3: Analysis Planning - -Agent creates `opentaint-analysis-plan.md` to track progress. This document records: -- Target project description -- Identified entry points and attack surface -- Rules to create/apply -- Analysis iterations with findings -- FP/FN tracking and resolution status -- Final vulnerability inventory - -### Step 4: Rule Creation - -Agent creates pattern rules for the vulnerability classes relevant to the target project. See `pattern-rules.md` for the full rule language. - -**Typical rule structure:** - -``` -rules/ -├── agent-rules/ # Agent-created rules -│ ├── java/ -│ │ ├── security/ -│ │ │ └── custom-sqli.yaml # Security rule (join mode) -│ │ └── lib/ -│ │ └── custom-sinks.yaml # Sink library rule -│ └── test/ -│ └── CustomSqliTest.java # Test samples -``` - -The agent composes rules using the three modes: -1. **Simple patterns** — for structural issues (no dataflow) -2. **Taint mode** — for defining sinks with `focus-metavariable` -3. **Join mode** — for composing source + sink library rules via `refs` and `on` clauses - -### Step 5: Rule Testing - -Agent creates test samples and validates rules work before running on the real project. - -**5a. Create a simple test project:** - -A minimal Gradle project with Java source files containing annotated test samples: - -```java -import org.opentaint.sast.test.util.PositiveRuleSample; -import org.opentaint.sast.test.util.NegativeRuleSample; - -public class CustomSqliTest { - - @PositiveRuleSample(value = "java/security/custom-sqli.yaml", id = "custom-sql-injection") - public void vulnerable(HttpServletRequest req) { - String input = req.getParameter("id"); - db.execute("SELECT * FROM users WHERE id = " + input); - } - - @NegativeRuleSample(value = "java/security/custom-sqli.yaml", id = "custom-sql-injection") - public void safe(HttpServletRequest req) { - String input = req.getParameter("id"); - db.execute("SELECT * FROM users WHERE id = ?", input); - } -} -``` - -**5b. Build the test project:** -```bash -opentaint compile ./test-project -o ./test-opentaint-project -``` - -**5c. Run rule tests (via Kotlin CLI directly):** -```bash -java -Xmx8G -jar opentaint-project-analyzer.jar \ - --project ./test-opentaint-project/project.yaml \ - --output-dir ./test-result \ - --semgrep-rule-set ./agent-rules \ - --debug-run-rule-tests \ - --verbosity debug -``` - -This produces `test-result/test-result.json` with per-sample verdicts: -```json -{ - "success": [...], - "falsePositive": [...], - "falseNegative": [...], - "skipped": [...], - "disabled": [...] -} -``` - -**5d. Fix and repeat** until all tests pass (no falsePositive/falseNegative entries). - -### Step 6: Run Analysis on Target Project - -```bash -# Option A: Go CLI -opentaint scan ./opentaint-project/project.yaml \ - -o ./results/report.sarif \ - --ruleset builtin \ - --ruleset ./agent-rules - -# Option B: Kotlin CLI (if custom --config needed) -java -Xmx8G -jar opentaint-project-analyzer.jar \ - --project ./opentaint-project/project.yaml \ - --output-dir ./results \ - --semgrep-rule-set ./agent-rules \ - --ifds-analysis-timeout 900 \ - --verbosity info -``` - -### Step 7: Interpret Results - -The analyzer produces two output files: - -**7a. SARIF report** (`results/report-ifds.sarif`) - -Standard SARIF 2.1.0 format containing: -- `runs[0].results[]` — each result is a vulnerability finding with: - - `ruleId` — which rule triggered - - `message.text` — human-readable description - - `level` — severity (error/warning/note) - - `locations[]` — sink location (file, line, column) - - `codeFlows[]` — taint traces from source to sink - - `relatedLocations[]` — HTTP endpoints, parameter info - -View results: -```bash -opentaint summary ./results/report.sarif --show-findings --show-code-snippets --verbose-flow -``` - -**7b. External methods list** (`results/external-methods.json`) - -JSON list of external methods where a dataflow fact was killed during analysis. Each entry contains: -- Method signature (class, name, parameter types) -- Fact position information (the taint flow position from the passThrough rule perspective) - -This is the primary signal for fixing FN caused by missing taint propagation models. - -### Step 8: Decision Loop - -For each analysis result, the agent decides between the following actions: - -``` -For each finding in SARIF: -│ -├── Analyze the trace (codeFlow) -│ │ -│ ├── Trace is a TRUE POSITIVE (TP) -│ │ → Generate POC exploit -│ │ → Save to vulnerabilities.md -│ │ -│ ├── Trace contains FALSE POSITIVE (FP) — fixable via Rule -│ │ → Add pattern-not / pattern-not-inside to exclude the safe pattern -│ │ → Update tests (add @NegativeRuleSample) -│ │ → Re-run analysis (go to Step 6) -│ │ -│ └── Trace contains FALSE POSITIVE (FP) — fixable via Approximation (non-preferred) -│ → Override approximation to remove impossible dataflow path -│ → Re-run analysis (go to Step 6) -│ -For each entry in external methods list: -│ -├── Method is a taint PROPAGATOR -│ → Generate passThrough YAML rule (preferred) -│ → Re-run analysis (go to Step 6) -│ -├── Method is a complex TRANSFORMER (lambdas/callbacks) -│ → Generate code-based approximation (Java stub) -│ → Re-run analysis (go to Step 6) -│ -├── Method is a SANITIZER -│ → Generate cleaner YAML rule -│ → Re-run analysis (go to Step 6) -│ -└── Method is NEUTRAL (logging, metrics) - → Skip (default call-to-return passthrough is correct) -``` - -**FN fix via Rule** (non-preferred): If the FN is due to a missing source/sink pattern (not a missing approximation), the agent can add more patterns and tests to the rule. This is less common since most FN stem from taint being lost at external method calls. - -### Step 9: Iteration - -Steps 6-8 repeat until the agent determines: -- All traces have been reviewed -- All identified FP have been fixed -- All relevant external methods have been addressed -- Remaining external methods are classified as NEUTRAL -- All TPs have been documented with POCs in `vulnerabilities.md` - -## Detailed Sub-Scenarios - -### Fixing FN via External Methods List - -This is the most common and impactful iteration. Each external methods list entry provides: - -```json -{ - "method": "com.example.lib.DataWrapper#getValue", - "signature": "() java.lang.String", - "factPosition": "this" -} -``` - -The `factPosition` tells the agent **from where** taint should propagate. The agent uses this to generate the correct `copy.from` in the passThrough rule: - -```yaml -# factPosition: "this" means taint is on the receiver → should flow to result -passThrough: - - function: com.example.lib.DataWrapper#getValue - copy: - - from: this - to: result -``` - -```yaml -# factPosition: "arg(0)" means taint is on first argument → should flow to result/this -passThrough: - - function: com.example.lib.DataWrapper#process - copy: - - from: arg(0) - to: result -``` - -#### Decision Tree: YAML Config vs Code-Based Approximation - -``` -Does the method involve lambdas/callbacks/functional interfaces? -├── YES → Code-based approximation required -│ (YAML cannot model lambda invocation) -└── NO - Does the method involve complex internal state? - ├── YES → YAML with pattern - │ (model internal state with synthetic fields) - └── NO - Is it a simple from→to propagation? - ├── YES → YAML passThrough rule (simplest, preferred) - └── NO - Does it require non-deterministic branching? - ├── YES → Code-based approximation - │ (use OpentaintNdUtil.nextBool()) - └── NO → YAML passThrough rule with conditions -``` - -#### Batch Processing Strategy - -When the external methods list is large, process library-by-library: - -``` -1. Group by package/library - com.fasterxml.jackson.* → 47 methods - org.springframework.* → 23 methods - org.apache.commons.* → 15 methods - -2. For each library: - ├── Check if built-in config already covers it (jar-split/ configs) - ├── Look up library documentation / source - ├── Generate comprehensive passThrough rules - └── Save as agent-config/.yaml - -3. Re-run analysis after each library batch -``` - -### Fixing FP via Rule - -When a SARIF trace shows a false positive: - -**Common causes and fixes:** - -| Cause | Fix | Example | -|-------|-----|---------| -| Sanitization not recognized | Add `pattern-not-inside` or `pattern-sanitizers` | `Encoder.htmlEncode()` not recognized | -| Safe type not excluded | Add `metavariable-regex` with negative lookahead | Primitive types flowing to sink | -| Context makes it safe | Add `pattern-not-inside` for the safe context | Inside validation block | -| Wrong method matched | Narrow `metavariable-regex` or pattern | Too broad `$OBJ.$METHOD(...)` | - -**Example: Adding a sanitizer exclusion** -```yaml -# Before: sink matches all calls -pattern-sinks: - - pattern: response.getWriter().write($UNTRUSTED) - -# After: exclude sanitized paths -pattern-sinks: - - patterns: - - pattern: response.getWriter().write($UNTRUSTED) - - pattern-not-inside: | - $X = Encoder.htmlEncode(...); - ... - response.getWriter().write($X); -``` - -### Fixing FP via Approximation (Non-Preferred) - -Sometimes a false dataflow path exists because an approximation is too permissive (e.g., models a method as propagating taint when it actually transforms data in a way that neutralizes it). - -**Fix**: Override with a more precise approximation or add a cleaner rule: - -```yaml -# Add a cleaner rule to kill taint at the sanitizing method -cleaner: - - function: com.example.security.Sanitizer#clean - clean: - - position: result - mark: tainted -``` - -**Note**: This is non-preferred because approximation changes affect all rules globally, not just the specific FP case. - -### Overriding Existing Approximations - -The agent can override built-in approximations at two levels: - -**Override YAML config rules**: Provide a custom config via `--config` flag (Kotlin CLI only). PassThrough and cleaner rules are **extended** (merged with built-in), while source/sink/entryPoint rules are **overridden** (replace built-in). - -**Override with code-based approximations**: Create a Java stub class with `@Approximate`. Code-based approximations always take priority over YAML config for the same method. However, custom code-based approximations are **not currently passable via CLI flags** — they require building a custom approximations JAR and setting environment variables (`opentaint.jvm.api.jar.path`, `opentaint.jvm.approximations.jar.path`). - -### Priority chain: - -``` -Code-based approximation ← Highest (analyzed as actual code) - overrides -YAML config rules ← Agent CAN provide via --config - merged with -Auto-generated defaults ← Engine auto-generates for get* on non-project classes - fall back to -Intra-procedural analysis ← Engine analyzes callee body if available - fall back to -Call-to-return passthrough ← Taint preserved, method treated as no-op -``` - -## Passing Custom Approximations to the Analyzer - -### Via `--semgrep-rule-set` (Go CLI `--ruleset`) - -Pattern rules (source/sink/sanitizer patterns). This is the primary path for agent-generated rules. - -```bash -opentaint scan project.yaml -o report.sarif \ - --ruleset builtin \ - --ruleset ./agent-rules/ -``` - -### Via `--config` (Kotlin CLI only, not exposed in Go CLI) - -PassThrough/cleaner/source/sink YAML in `SerializedTaintConfig` format. Use this when the agent needs to add custom taint propagation models. - -```bash -java -jar opentaint-project-analyzer.jar \ - --project project.yaml \ - --output-dir ./results \ - --config ./agent-config/custom-propagators.yaml -``` - -**Limitation**: `--config` and `--semgrep-rule-set` are **mutually exclusive**. If the agent needs both custom pattern rules and custom propagation config, it must either: -1. Use `--semgrep-rule-set` — pattern rules include only passThrough from the default built-in config (agent cannot add extra passThrough rules this way) -2. Use `--config` — loses the ability to provide Semgrep-format pattern rules - -This is a current limitation that may need to be addressed (see "Required Engine Enhancements" below). - -### Via Environment Variables (Code-Based Approximations) - -Custom compiled Java stub JARs. Requires `useOpentaintApproximations=true` which is not exposed via CLI. - -```bash -export opentaint.jvm.api.jar.path=/path/to/api.jar -export opentaint.jvm.approximations.jar.path=/path/to/approximations.jar -``` - -**Current status**: Not practically usable via CLI. Requires programmatic API access. - -## Common Templates - -### Template: PassThrough for Simple Getter - -```yaml -passThrough: - - function: com.example.Type#getField - copy: - - from: this - to: result -``` - -### Template: PassThrough for Builder Pattern - -```yaml -passThrough: - - function: com.example.Builder#withField - copy: - - from: arg(0) - to: this - - from: arg(0) - to: result - - from: this - to: result -``` - -### Template: PassThrough for Collection Wrapper - -```yaml -passThrough: - - function: com.example.Collection#add - copy: - - from: arg(0) - to: - - this - - .com.example.Collection##java.lang.Object - - function: com.example.Collection#get - copy: - - from: - - this - - .com.example.Collection##java.lang.Object - to: result -``` - -### Template: PassThrough for Generic Pattern (All Getters in a Package) - -```yaml -passThrough: - - function: - package: com.example.dto - class: - pattern: .* - name: - pattern: get.* - copy: - - from: this - to: result -``` - -### Template: Code-Based Approximation for Functional API - -```java -@Approximate(com.example.FunctionalApi.class) -public class FunctionalApi { - public Object transform(@ArgumentTypeContext Function fn) throws Throwable { - FunctionalApi self = (FunctionalApi) (Object) this; - if (OpentaintNdUtil.nextBool()) return null; - Object input = self.getValue(); - return fn.apply(input); - } - - public void consume(@ArgumentTypeContext Consumer consumer) { - FunctionalApi self = (FunctionalApi) (Object) this; - if (OpentaintNdUtil.nextBool()) { - consumer.accept(self.getValue()); - } - } -} -``` - -### Template: Cleaner Rule (Sanitizer) - -```yaml -cleaner: - - function: com.example.security.HtmlEncoder#encode - clean: - - position: result - mark: tainted -``` - -## Required Engine Enhancements - -Based on the requirements in the task specification, the following features need to be implemented: - -### 1. External Methods List Output (JSON) - -**Requirement**: "Engine will return a list of external methods, where dataflow fact was killed" (task.md line 13). - -**Current state**: The engine does not produce this output. When a method is unresolvable (external), the fact is preserved via call-to-return passthrough, not killed. The engine needs a new mechanism to: -- Track which external methods were encountered during analysis -- Record the fact position (from which taint was propagating) at each external call -- Output this as a JSON file alongside the SARIF report - -**Proposed format**: -```json -[ - { - "method": "com.example.lib.Wrapper#getValue", - "signature": "() java.lang.String", - "factPosition": "this", - "callCount": 5 - } -] -``` - -A CLI flag (e.g., `--external-methods-output `) should be added to both CLIs. - -### 2. Combined `--config` + `--semgrep-rule-set` - -**Current state**: These flags are mutually exclusive. - -**Requirement**: The agent needs to provide both custom pattern rules (`--semgrep-rule-set`) and custom passThrough/approximation YAML (`--config`) simultaneously. - -**Proposed fix**: Allow both flags. When both are provided, load Semgrep rules as the pattern-matching layer and merge the custom config's passThrough/cleaner rules with the default config. - -### 3. Custom Code-Based Approximations via CLI - -**Current state**: No CLI flag to pass custom approximation JARs. The `useOpentaintApproximations` flag is hardcoded to `false`. - -**Requirement**: Agent must be able to provide code-based approximations for complex methods. - -**Proposed fix**: Add a CLI flag (e.g., `--approximations-jar `) that enables `useOpentaintApproximations` and sets the JAR paths. Expose this in both CLIs. - -## Integration Constraints - -### What the Agent CAN Do - -1. Create/modify pattern rules (YAML) in custom rule directories -2. Create/modify YAML config rules (passThrough, source, sink, cleaner) via `--config` -3. Create code-based approximation Java stubs (pending CLI support) -4. Generate test cases for rules -5. Override YAML config rules with more specific rules -6. Override YAML config rules with code-based approximations (pending CLI support) -7. Use `--ruleset` with multiple custom rule directories alongside `builtin` - -### What the Agent CANNOT Do - -1. Modify framework support (Spring, etc.) — provided as-is -2. Change the IFDS analysis algorithm -3. Change the access path abstraction mode -4. Change how the call graph is constructed -5. Modify the pattern matching engine semantics -6. Currently: combine `--config` and `--semgrep-rule-set` in a single run -7. Currently: pass custom code-based approximations via CLI - -### Validation Checklist - -Before submitting any artifact, the agent should verify: - -- [ ] YAML is valid and parseable -- [ ] Rule IDs are globally unique -- [ ] Library rules have `options.lib: true` -- [ ] Security rules have `metadata.cwe` and `metadata.short-description` -- [ ] Source/sink rules use consistent metavariable names (`$UNTRUSTED`) -- [ ] Join-mode `on` clauses reference valid aliases defined in `refs` -- [ ] Test cases exist for all enabled non-library rules -- [ ] passThrough `from`/`to` positions are valid (`this`, `arg(N)`, `result`, etc.) -- [ ] Code-based approximations compile and use `@Approximate` annotation -- [ ] No regressions: existing test cases still pass -- [ ] `opentaint-analysis-plan.md` is updated with current iteration status diff --git a/agent-mode/info/approximations-config.md b/agent-mode/info/approximations-config.md deleted file mode 100644 index 974d50f36..000000000 --- a/agent-mode/info/approximations-config.md +++ /dev/null @@ -1,487 +0,0 @@ -# Approximations Configuration Design Document - -## Overview - -Approximations tell the dataflow engine **how taint propagates** through library and framework methods that the engine cannot analyze directly (because their source code is not part of the project). There are two layers: - -| Layer | Format | Location | Scope | Override Priority | -|-------|--------|----------|-------|-------------------| -| **YAML config rules** | Declarative passThrough YAML | `core/opentaint-config/config/` | Tens of thousands of library methods | Base layer (lowest priority) | -| **Code-based approximations** | Java stub classes | `core/opentaint-jvm-sast-dataflow/dataflow-approximations/` | Complex functional/async APIs | Overrides YAML config (highest priority) | - -When both exist for the same method, **code-based approximations always win** — the engine analyzes the stub body directly instead of applying config rules. - -## Layer 1: YAML Config Rules (passThrough) - -### File Layout - -``` -core/opentaint-config/config/config/ -├── stdlib.yaml # ~21,000 lines — java.io, java.lang, java.util, java.net, java.nio -├── config.yaml # ~18,000 lines — javax.*, org.json -├── jmod.yaml # ~3,400 lines — javax.naming, javax.script, javax.sql -├── unverified.yaml # ~1,100 lines — Jackson, Spring utils, Reactor, XML parsers -└── jar-split/ # Per-library configs (29 files) - ├── spring-web-7.0.2.yaml - ├── spring-webmvc-7.0.2.yaml - ├── reactor-core-3.8.2.yaml - ├── guava-33.5.0-jre.yaml - ├── jackson-databind-2.20.1.yaml - ├── netty-buffer-4.2.0.Final.yaml - └── ... -``` - -### YAML Schema - -All config files share the same top-level `passThrough:` schema: - -```yaml -passThrough: - - function: - signature: - overrides: - condition: - copy: - - from: - to: -``` - -### Function Name Matching - -#### Simple Form (String) - -```yaml -function: java.lang.String#concat -``` - -Parsed as: package = `java.lang`, class = `String`, method = `concat`. - -#### Complex Form (Map with Patterns) - -```yaml -function: - package: org.apache.axis.types - class: - pattern: .* - name: - pattern: get.* -``` - -Each of `package`, `class`, `name` can be either a plain string (exact match) or `pattern: ` (substring regex match). - -#### Pattern Matching Semantics - -- **Exact name** (`"get"`) — matches only methods named exactly `get` -- **Pattern** (`pattern: "get.*"`) — matches any method whose name contains a substring matching `get.*` (uses `containsMatchIn`, not `fullMatch`) -- **Wildcard** (`pattern: ".*"`) — matches any name - -### Signature Matching - -```yaml -signature: (java.lang.String) java.lang.String -``` - -Format: `() `. Used to disambiguate overloaded methods. - -Alternative structured form: -```yaml -signature: - params: - - index: 0 - type: java.lang.String -``` - -### Taint Flow Positions - -Positions describe where taint lives on a method's interface: - -| Position | Meaning | -|-----------------------------------------------|----------------------------------------------------| -| `this` | The receiver object (`this` reference) | -| `arg(0)`, `arg(1)`, ... | Method arguments (0-indexed) | -| `arg(*)` | All arguments (expanded to individual arg rules) | -| `result` | The method's return value | -| `[*]` | Array element access (appended to a base position) | -| `.#$` | Field access (appended to a base position) | - -#### Internal State Tracking (Rule Storage) - -For modeling taint that persists inside an object across method calls: - -```yaml -from: - - this - - .java.io.ByteArrayOutputStream##java.lang.Object -to: result -``` - -The `` is a synthetic field — it doesn't exist in the real class. The engine uses it as a virtual container to track taint flow through an object's internal state. When a method stores taint into the object, the `to` side points to the rule-storage field. When another method retrieves it, the `from` side reads from the same field. - -#### Named Field Access - -```yaml -from: - - this - - .java.lang.Throwable#message#java.lang.Object -to: result -``` - -This models taint flowing from a specific named field (`message`) of the `this` object to the result. - -### Actions - -#### `copy` — Propagate Taint - -```yaml -copy: - - from: arg(0) - to: result - - from: this - to: result -``` - -Copies all taint marks from `from` position to `to` position. The most common action. - -### Conditions - -Conditions restrict when a rule applies: - -```yaml -condition: - typeIs: - position: arg(0) - type: java.lang.String - -condition: - anyOf: - - typeIs: - position: arg(0) - type: java.lang.String - - typeIs: - position: arg(0) - type: java.lang.CharSequence - -condition: - not: - isConstant: - position: arg(0) - -condition: - allOf: - - annotatedWith: - position: arg(0) - type: javax.annotation.Nonnull - - numberOfArgs: 2 -``` - -#### Condition Types - -| Condition | YAML Key | Description | -|-----------|----------|-------------| -| Type check | `typeIs` | Position's type matches a name/pattern | -| Annotation | `annotatedWith` | Position has an annotation | -| Constant | `isConstant` | Position is a compile-time constant | -| Null | `isNull` | Position is null | -| Constant regex | `constantMatches` | Constant value matches regex | -| Constant comparison | `constantEq`, `constantGt`, `constantLt` | Compare constant value | -| Taint check | `tainted` | Position already carries a taint mark | -| Arg count | `numberOfArgs` | Method has N parameters | -| Method annotation | `methodAnnotated` | Method has annotation | -| Class annotation | `classAnnotated` | Enclosing class has annotation | -| Method name | `methodNameMatches` | Method name matches pattern | -| Class name | `classNameMatches` | Class name matches pattern | -| Static field | `isStaticField` | Position is a specific static field | -| Combinators | `anyOf`, `allOf`, `not` | Boolean logic | - -### The `overrides` Field and Hierarchy - -The `overrides` field (default: `true`) controls **class hierarchy inheritance**: - -- `overrides: true` — Rule applies to the specified class **and all subclasses**. When looking up rules for a method, the engine walks the class hierarchy upward and includes matching rules from superclasses. -- `overrides: false` — Rule applies **only** to the exact specified class. - -#### Hierarchical Matching (Method Name Level) - -The `MethodTaintRulesStorage` indexes rules in three tiers: - -1. **Concrete name rules** — exact method name match (e.g., `getEntry`) -2. **Pattern method rules** — regex match on method name (e.g., `get.*`) -3. **Any method rules** — wildcard `.*` match - -When resolving rules for a specific method: -1. Check concrete name match first -2. Evaluate all pattern matches -3. Include any-method wildcard matches -4. All matching rules are **merged** (not prioritized) — they all apply - -**Important**: There is no priority between concrete and pattern rules at this level. If both a `get*` pattern rule and a `getEntry` concrete rule match `getEntry`, **both apply**. To make a specific rule override a general pattern, use conditions to restrict the general rule, or ensure the specific rule's actions make the general rule's actions redundant. - -#### Hierarchical Matching (Class Name Level) - -The `MethodClassTaintRulesStorage` resolves rules by: - -1. **Exact class match** — highest specificity -2. **Pattern class match** — four strategies: - - Concrete class name, any package - - Concrete class name, package pattern - - Concrete package, class pattern - - Both class and package patterns -3. **Any-class wildcard** — lowest specificity -4. **Hierarchy walk** — for superclasses, only `overrides: true` rules propagate -5. **Subclass push** — rules are pushed to supertypes with added `typeIs: This` conditions - -### Complete Example - -```yaml -passThrough: - # String.concat: taint on this or arg flows to result - - function: java.lang.String#concat - signature: (java.lang.String) java.lang.String - copy: - - from: arg(0) - to: result - - from: this - to: result - - # StringBuilder.append: taint on arg flows to this and result - - function: java.lang.StringBuilder#append - copy: - - from: arg(0) - to: this - - from: arg(0) - to: result - - from: this - to: result - - # ByteArrayOutputStream: write stores taint, toString retrieves it - - function: java.io.ByteArrayOutputStream#write - copy: - - from: arg(0) - to: - - this - - .java.io.ByteArrayOutputStream##java.lang.Object - - function: java.io.ByteArrayOutputStream#toString - copy: - - from: - - this - - .java.io.ByteArrayOutputStream##java.lang.Object - to: result - - # Generic getter pattern for Axis types: any get* method propagates this to result - - function: - package: org.apache.axis.types - class: - pattern: .* - name: - pattern: get.* - copy: - - from: this - to: result -``` - -## Layer 2: Code-Based Approximations - -### Purpose - -Code-based approximations replace complex library method bodies with simplified Java implementations that the IFDS taint analyzer can reason about. They are essential for: - -- **Functional APIs** (Stream, Optional) — make lambda data flow explicit -- **Async APIs** (CompletableFuture, CompletionStage) — linearize async composition -- **Threading** (Thread, Executor) — make cross-thread data flow visible -- **Coroutines** (Kotlin builders) — linearize coroutine control flow - -### File Layout - -``` -core/opentaint-jvm-sast-dataflow/dataflow-approximations/ -└── src/main/java/org/opentaint/jvm/dataflow/approximations/ - ├── OpentaintNdUtil.java # Non-deterministic boolean utility - ├── ArgumentTypeContext.java # Annotation for type context parameters - └── stdlib/ - ├── Stream.java # java.util.stream.Stream - ├── Optional.java # java.util.Optional - ├── CompletableFuture.java # java.util.concurrent.CompletableFuture - ├── CompletionStage.java # java.util.concurrent.CompletionStage - ├── Executor.java # java.util.concurrent.Executor - ├── ExecutorService.java # java.util.concurrent.ExecutorService - └── Thread.java # java.lang.Thread - └── kotlin/ - ├── Builders.java # kotlinx.coroutines builders - ├── BuildersBuilders.java - └── BuildersBuildersCommon.java -``` - -### How Approximations Work - -#### Annotation-Based Registration - -```java -@Approximate(java.util.stream.Stream.class) -public class Stream { - // Methods here replace the real Stream methods during analysis -} -``` - -The `@Approximate` annotation binds this stub class to the real `java.util.stream.Stream`. The analyzer loads approximation bytecode from a JAR resource and installs them as `JIRClasspathFeature`. When the analyzer encounters a call to a method that has an approximation, it analyzes the stub body instead of: -- Looking up YAML config rules -- Treating the method as opaque external - -#### Key Infrastructure - -**`OpentaintNdUtil.nextBool()`** — Non-deterministic choice. The analyzer considers **both** branches, enabling modeling of success + failure paths: - -```java -if (OpentaintNdUtil.nextBool()) { - // success path -} else { - // failure path (or return null/empty) -} -``` - -**`@ArgumentTypeContext`** — Marks parameters that carry generic type context (e.g., lambda types). The analyzer uses this to resolve lambda parameter/return types for dataflow through higher-order functions. - -#### Common Patterns - -**Functional transformation** (explicit lambda data flow): - -```java -// Approximation for Stream.map(Function) -public java.util.stream.Stream map(@ArgumentTypeContext Function mapper) { - java.util.stream.Stream t = (java.util.stream.Stream) (Object) this; - Iterator it = t.iterator(); - if (it.hasNext()) { - Object result = mapper.apply(it.next()); - return java.util.stream.Stream.of(result); - } - return java.util.stream.Stream.empty(); -} -``` - -This makes explicit: element extracted from stream → passed to lambda → result wrapped in new stream. - -**Async linearization** (flatten future composition): - -```java -// Approximation for CompletableFuture.thenApply(Function) -public CompletableFuture thenApply(@ArgumentTypeContext Function fn) throws Throwable { - CompletableFuture t = (CompletableFuture) (Object) this; - if (OpentaintNdUtil.nextBool()) return null; - Object result = fn.apply(t.get()); - return CompletableFuture.completedFuture(result); -} -``` - -This linearizes: future value extracted via `.get()` → passed to function → wrapped in completed future. - -**Threading** (potential direct invocation): - -```java -// Approximation for Thread.start() -public void start() { - Thread t = (Thread) (Object) this; - if (OpentaintNdUtil.nextBool()) { - t.run(); - } -} -``` - -Models `Thread.start()` as a potential direct call to `run()` so the analyzer can trace data through threads. - -### Covered API Surface - -| API | Methods Approximated | -|-----|---------------------| -| `Stream` | filter, map, flatMap, mapToInt/Long/Double, peek, sorted, forEach, reduce, collect, min, max, anyMatch/allMatch/noneMatch, takeWhile/dropWhile, toArray, mapMulti | -| `Optional` | ifPresent, ifPresentOrElse, filter, map, flatMap, or, orElseGet, orElseThrow | -| `CompletableFuture` | supplyAsync, thenApply/Accept/Run, thenCombine/AcceptBoth, thenCompose, handle, whenComplete, exceptionally, all `*Async` variants | -| `CompletionStage` | All corresponding CompletionStage methods | -| `Executor` | execute | -| `ExecutorService` | submit, invokeAll, invokeAny | -| `Thread` | start, constructors with Runnable | -| Kotlin Coroutines | runBlocking, launch, async, withContext | - -## Override Hierarchy - -The engine resolves taint propagation rules in this priority order: - -``` -1. Code-based approximations (HIGHEST — analyzed as actual code) - ↓ if no approximation exists -2. YAML passThrough config (applied at call sites as summary edges) - ↓ if no config rule exists -3. Auto-generated defaults (JIRMethodGetDefaultProvider: get* methods → copy this to result) - ↓ if none of the above -4. Intra-procedural analysis (analyze the actual callee body if available) - ↓ if callee is external/unknown -5. Call-to-return passthrough (taint preserved, method treated as no-op) -``` - -### Provider Chain (Runtime) - -``` -JIRTaintRulesProvider ← loads from TaintConfiguration (YAML) - └── StringConcatRuleProvider ← adds synthetic rules for string concat - └── JIRMethodGetDefaultProvider ← auto-generates get* passthrough - └── JIRCombinedTaintRulesProvider ← merges base + custom config - └── JIRFilteredTaintRulesProvider ← applies TaintRuleFilter -``` - -### JIRCombinedTaintRulesProvider - -When a custom config is provided alongside the default config, `JIRCombinedTaintRulesProvider` merges them with configurable per-category modes: - -| Category | Default Mode | Behavior | -|----------|-------------|----------| -| Entry points | OVERRIDE | Custom replaces base | -| Sources | OVERRIDE | Custom replaces base | -| Sinks | OVERRIDE | Custom replaces base | -| PassThrough | EXTEND | Custom + base merged | -| Cleaners | EXTEND | Custom + base merged | - -Modes: `EXTEND` (union), `OVERRIDE` (only custom), `IGNORE` (only base). - -## Agent Interaction Points - -### Generating YAML Config Rules - -An agent can create new passThrough rules to fix false negatives where taint is lost through library method calls: - -```yaml -# Agent-generated rule for a missed library method -passThrough: - - function: com.example.lib.DataProcessor#transform - copy: - - from: arg(0) - to: result -``` - -### Generating Code-Based Approximations - -For complex APIs with lambdas/callbacks, the agent can write Java stub classes: - -```java -@Approximate(com.example.lib.AsyncProcessor.class) -public class AsyncProcessor { - public CompletableFuture processAsync(@ArgumentTypeContext Function fn) { - AsyncProcessor self = (AsyncProcessor) (Object) this; - if (OpentaintNdUtil.nextBool()) return null; - Object result = fn.apply(self.getData()); - return CompletableFuture.completedFuture(result); - } -} -``` - -### Overriding Existing Rules - -An agent can override existing rules by: - -1. **For YAML config**: Add rules with more specific function/class matchers. Since all matching rules are merged, add a `cleaner` rule to cancel out an incorrect passthrough, or provide a corrected passthrough with more specific conditions. -2. **For code-based approximations**: Create a new approximation class for the same target class. Code-based approximations always override YAML config for the same methods. - -### Important Constraints - -1. YAML config rules follow a **merge** (not replace) model — all matching rules contribute -2. Code-based approximations require compilation to bytecode and inclusion in the approximations JAR -3. The `` pattern must be used consistently for object state tracking -4. Conditions are resolved at rule load time for structural checks (annotations, class names) and at runtime for value checks (constants, types, taint marks) -5. Framework support (Spring) is provided as-is and is **not** configurable through config rules diff --git a/agent-mode/info/pattern-rules.md b/agent-mode/info/pattern-rules.md deleted file mode 100644 index 1c1583da6..000000000 --- a/agent-mode/info/pattern-rules.md +++ /dev/null @@ -1,313 +0,0 @@ -# Pattern Rules Design Document - -## Overview - -Pattern rules are Semgrep-compatible YAML files that define **what** the analyzer should look for in the target codebase. They describe vulnerable dataflow patterns by composing sources, sinks, sanitizers, and structural code patterns. All rules live under `rules/ruleset/`. - -## Directory Layout - -``` -rules/ruleset/ -├── java/ -│ ├── security/ # Executable rules (one per vulnerability class) -│ │ ├── sqli.yaml -│ │ ├── xss.yaml -│ │ ├── command-injection.yaml -│ │ ├── path-traversal.yaml -│ │ └── ... (22 files) -│ └── lib/ # Reusable library rules (non-executable) -│ ├── generic/ # Framework-agnostic sources/sinks -│ │ ├── servlet-untrusted-data-source.yaml -│ │ ├── command-injection-sinks.yaml -│ │ ├── path-traversal-sinks.yaml -│ │ └── ... (17 files) -│ └── spring/ # Spring-specific sources/sinks -│ ├── untrusted-data-source.yaml -│ ├── jdbc-sqli-sinks.yaml -│ └── ... (6 files) -└── test/ # Test samples and coverage enforcement -``` - -## Rule File Structure - -Every rule file is a YAML document with a top-level `rules:` list. Each entry is a single rule. - -### Common Fields (All Modes) - -```yaml -rules: - - id: # Required. Globally unique identifier. - severity: ERROR | WARNING | NOTE # Required. ERROR = critical, WARNING = medium, NOTE = library/informational. - message: >- # Required. Human-readable finding message. - Description of the vulnerability - metadata: # Required for security rules. Structured metadata. - cwe: CWE-xxx # CWE identifier(s) - short-description: ... # One-line summary - full-description: |- # Multiline markdown with code examples (vulnerable + safe) - ... - references: # External links (OWASP, CWE, etc.) - - https://... - provenance: ... # Upstream rule source - license: ... # License info - languages: - - java # Target language - options: # Optional flags - lib: true # Marks as non-executable library rule - disabled: "reason" # Disables rule with explanation -``` - -## Three Pattern Modes - -### Mode 1: Simple Pattern Matching (Default) - -No `mode:` key needed. Uses structural code patterns to find matches. - -#### Pattern Operators - -| Operator | Semantics | -|----------|-----------| -| `pattern` | Match a single code pattern | -| `patterns` | Conjunction (AND) — all sub-patterns must match | -| `pattern-either` | Disjunction (OR) — any sub-pattern matches | -| `pattern-inside` | Match must occur inside another pattern | -| `pattern-not` | Negation — exclude matches fitting this pattern | -| `pattern-not-inside` | Exclude matches inside another pattern | -| `metavariable-regex` | Constrain a captured metavariable by regex | -| `metavariable-pattern` | Constrain a captured metavariable by sub-pattern | -| `focus-metavariable` | Narrow the match region to a specific metavariable | - -#### Metavariables - -Metavariables (prefixed with `$`) capture parts of the matched code: - -- `$VAR` — single expression or identifier -- `$...ARGS` — zero or more expressions (variadic) -- `$_` — anonymous wildcard (don't-care) - -#### Example: Structural Pattern - -```yaml -rules: - - id: wicket-xss - severity: WARNING - message: XSS via Wicket setEscapeModelStrings - languages: [java] - patterns: - - pattern: | - (org.apache.wicket.$A $OBJ).setEscapeModelStrings(false); -``` - -#### Example: Pattern with Metavariable Constraints - -```yaml -patterns: - - pattern-either: - - pattern: | - $RETURNTYPE $METHOD(HttpServletRequest $UNTRUSTED, ...) { ... } - - metavariable-pattern: - metavariable: $METHOD - pattern-either: - - pattern: doGet - - pattern: doPost - - pattern: doPut -``` - -### Mode 2: Taint Mode - -Explicitly declares `mode: taint`. Used to define source/sink/sanitizer triples within a single rule file. - -```yaml -rules: - - id: rule-id - mode: taint - pattern-sources: # Where tainted data originates - - patterns: - - pattern: ... - pattern-sinks: # Where tainted data is dangerous - - patterns: - - pattern-either: - - pattern: $DB.execute($UNTRUSTED, ...) - - focus-metavariable: $UNTRUSTED # Narrow to the tainted arg - pattern-sanitizers: # What makes data safe - - pattern: Encoder.encode(...) - pattern-propagators: # Custom propagation through methods - - pattern: ... - from: $FROM - to: $TO -``` - -**Key feature**: `focus-metavariable` in sinks narrows the match to the specific tainted expression, not the entire call. - -**Used primarily for library sink rules** that define only `pattern-sinks` (no sources), relying on join-mode composition to supply sources. - -### Mode 3: Join Mode (Primary Composition Mechanism) - -Explicitly declares `mode: join`. Composes library rules to form complete vulnerability detectors. - -```yaml -rules: - - id: sql-injection - mode: join - join: - refs: - - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source - as: servlet-source - - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source - as: spring-source - - rule: java/lib/spring/jdbc-sqli-sinks.yaml#spring-sqli-sink - as: sink - on: - - 'servlet-source.$UNTRUSTED -> sink.$UNTRUSTED' - - 'spring-source.$UNTRUSTED -> sink.$UNTRUSTED' -``` - -#### Reference Syntax - -``` -rule: # -as: -``` - -The path is relative to `rules/ruleset/`. The `#rule-id` fragment selects which rule from a multi-rule file. - -#### `on` Clause Syntax - -``` -'.$METAVAR -> .$METAVAR' -``` - -- `->` denotes a **dataflow relationship**: the value captured by `$METAVAR` in the source must flow (through taint propagation) to the same `$METAVAR` in the sink. -- Multiple `on` clauses act as **alternatives (OR)** — any match triggers a finding. -- The metavariable `$UNTRUSTED` is the conventional name for the tainted data binding point across source and sink rules. - -## Library Rules - -Library rules are reusable building blocks marked with `options.lib: true`. They are **never executed standalone** — they exist only to be referenced by join-mode rules. - -### Source Library Rules - -Define where untrusted data enters the application: - -```yaml -rules: - - id: java-servlet-untrusted-data-source - options: - lib: true - severity: NOTE - patterns: - - pattern-either: - - patterns: - - pattern: | - $RETURNTYPE $ENTRYPOINT(HttpServletRequest $UNTRUSTED, ...) { ... } - - metavariable-pattern: - metavariable: $ENTRYPOINT - pattern-either: - - pattern: doGet - - pattern: doPost -``` - -Captures `$UNTRUSTED` at the point where user-controlled data enters. - -### Sink Library Rules - -Define where tainted data becomes dangerous. Can use either pattern mode or taint mode: - -**Pattern-based sink** (simple structure): -```yaml -rules: - - id: command-injection-sinks - options: - lib: true - patterns: - - pattern-either: - - pattern: new ProcessBuilder($UNTRUSTED, ...) - - pattern: Runtime.$EXEC($UNTRUSTED, ...) - - metavariable-regex: - metavariable: $EXEC - regex: (exec|loadLibrary|load) -``` - -**Taint-mode sink** (for complex matching with focus): -```yaml -rules: - - id: spring-sqli-sink - mode: taint - options: - lib: true - pattern-sinks: - - patterns: - - pattern-either: - - pattern: (Statement $S).execute($UNTRUSTED) - - pattern: (JdbcTemplate $T).$METHOD($UNTRUSTED, ...) - - metavariable-regex: - metavariable: $METHOD - regex: (query|update|execute|batchUpdate) - - focus-metavariable: $UNTRUSTED -``` - -## How the Engine Processes Pattern Rules - -1. **Rule loading**: YAML files are parsed and rules are categorized by mode -2. **Join resolution**: Join-mode rules resolve their `refs` to load referenced library rules -3. **Pattern compilation**: Code patterns are compiled into Semgrep-compatible matchers -4. **Dataflow binding**: In join mode, `$UNTRUSTED` from sources and sinks are linked via the `->` operator. The engine performs taint analysis to determine if data flows from source to sink. -5. **Result generation**: Matches produce SARIF findings with vulnerability traces - -## Agent Interaction Points - -### Generating New Rules - -An agent can generate new security rules by: - -1. **Creating source library rules** — define new entry points for untrusted data -2. **Creating sink library rules** — define new dangerous operations -3. **Creating join-mode rules** — compose sources and sinks into vulnerability detectors - -### Fixing False Positives (FP) - -An agent can fix FP by: - -1. **Adding `pattern-not` / `pattern-not-inside`** to exclude safe patterns -2. **Adding `pattern-sanitizers`** (in taint mode) to recognize sanitization -3. **Adding `metavariable-regex`** with negative lookaheads to exclude safe types/methods -4. **Setting `options.disabled`** with a reason to disable overly broad rules - -### Fixing False Negatives (FN) - -An agent can fix FN by: - -1. **Adding new patterns to `pattern-either`** in source/sink library rules -2. **Creating new library rules** for uncovered frameworks/APIs -3. **Adding new `on` clauses** in join-mode rules to link new source/sink combinations -4. **Widening `metavariable-regex`** to accept more matching patterns - -### Constraints for Agent-Generated Rules - -1. All rules **must** follow the YAML schema above -2. Library rules **must** have `options.lib: true` and `severity: NOTE` -3. Security rules **must** have `metadata.cwe` and `metadata.short-description` -4. Source rules **must** capture `$UNTRUSTED` (or equivalent metavariable) -5. Sink rules **must** use the same metavariable name for the tainted position -6. Join-mode `on` clauses **must** reference aliases defined in `refs` -7. Rule ids **must** be globally unique -8. Each enabled non-lib rule **must** have corresponding test coverage (`@PositiveRuleSample` / `@NegativeRuleSample`) - -## Testing Rules - -Rules are tested via annotated Java code samples in `rules/test/`: - -```java -@PositiveRuleSample(ruleId = "sql-injection") -public void vulnerable(HttpServletRequest req) { - String input = req.getParameter("id"); - db.execute("SELECT * FROM users WHERE id = " + input); -} - -@NegativeRuleSample(ruleId = "sql-injection") -public void safe(HttpServletRequest req) { - String input = req.getParameter("id"); - db.execute("SELECT * FROM users WHERE id = ?", input); -} -``` - -The `checkRulesCoverage` Gradle task enforces that all enabled, non-library rules have test samples. diff --git a/agent-mode/mismatch.md b/agent-mode/mismatch.md deleted file mode 100644 index 572318a92..000000000 --- a/agent-mode/mismatch.md +++ /dev/null @@ -1,358 +0,0 @@ -# Agent Mode — Design vs Implementation Mismatches - -Scope of review: -- Design: `agent-mode/design/agent-mode-design.md` -- Agent prompt: `agent/meta-prompt.md` -- Agent skills: `agent/skills/*.md` -- Go CLI: `cli/cmd/*.go` -- Core analyzer CLI: `core/src/main/kotlin/org/opentaint/jvm/sast/runner/{AbstractAnalyzerRunner,ProjectAnalyzerRunner}.kt` - -The classification below splits findings into: -- **CLI ↔ Core mismatches**: Go CLI passes a wrong flag name, wrong semantics, or wrong value to the Kotlin analyzer. -- **Skill ↔ CLI mismatches**: The skill / meta-prompt tells the agent to use a CLI surface that does not exist or works differently. -- **Skill ↔ Design mismatches**: The design promises a behavior that the skill contradicts (even if the skill happens to match the implementation). - -Severities: -- **BLOCKER** — user command fails (non-zero exit) or produces no output. -- **MAJOR** — produces incorrect behavior or wrong output path. -- **MINOR** — misleading documentation; commands still work. - ---- - -## 1. CLI ↔ Core analyzer mismatches - -### 1.1 BLOCKER — `--external-methods-output` flag does not exist on the analyzer - -**Go CLI** (`cli/cmd/command_builder.go:255`): -```go -if a.externalMethodsOutput != "" { - flags = append(flags, "--external-methods-output", a.externalMethodsOutput) -} -``` - -**Kotlin analyzer** (`core/src/main/kotlin/org/opentaint/jvm/sast/runner/ProjectAnalyzerRunner.kt:51`): -```kotlin -private val trackExternalMethods: Boolean by option(help = "Track external methods, produce external methods YAML lists") - .flag() -``` - -The analyzer exposes a **boolean flag** (`--track-external-methods`), not a path option. The -Go CLI sends `--external-methods-output `, which Clikt will reject because no such -option is declared. - -Additionally, the output **location is not configurable** in the analyzer — it always writes -into the analyzer output directory (`ProjectAnalyzer.writeExternalMethodsYaml`, lines 222–237): - -```kotlin -val withoutRulesPath = resultDir / "external-methods-without-rules.yaml" -val withRulesPath = resultDir / "external-methods-with-rules.yaml" -``` - -**Consequence**: the Go CLI flag `--external-methods` is broken end-to-end. Passing it fails -the scan (unknown option). Even if the flag name were fixed, the user-supplied base path -would be ignored. - -**Fix options**: -- Change the Go CLI to pass `--track-external-methods` when `ExternalMethodsOutput != ""`, and - surface the files from the analyzer output dir (`/external-methods-{without,with}-rules.yaml`); -- Or extend the analyzer to accept `--external-methods-output ` (matching the design). - -> Historical note: `core/bin/main/.../ProjectAnalyzerRunner.kt:50–51` did declare -> `--external-methods-output` as `Path? by option(...).newFile()`. The current -> source has replaced it with a boolean `trackExternalMethods`. The Go CLI still -> targets the old contract. - -### 1.2 MAJOR — Design / CLI disagreement on `--approximations-config` cardinality - -**Design (1.2)**: -> **Kotlin CLI**: Rename `--config` to `--approximations-config`. - -Design implies a **single** config path (consistent with pre-existing `customConfig: Path?`). - -**Kotlin analyzer** (current source, `ProjectAnalyzerRunner.kt:37`): -```kotlin -private val approximationsConfig: List by option(help = "...") - .file() - .multiple() -``` - -So the real core API is now **repeatable** (`List`), but: - -**Go CLI** (`cli/cmd/scan.go:35`): -```go -ApproximationsConfig string -... -scanCmd.Flags().StringVar(&ApproximationsConfig, "approximations-config", "", "...") -``` - -and builder (`command_builder.go:60, 246–248`): -```go -approximationsConfig string -... -if a.approximationsConfig != "" { - flags = append(flags, "--approximations-config", a.approximationsConfig) -} -``` - -The Go CLI exposes a **single-valued** flag and passes at most one occurrence. - -**Consequence**: agents relying on "OVERRIDE mode" semantics documented in the design can -only ever supply a single file. If the analyzer expects multiple (it accepts `.multiple()`) -there is no way to supply them through the Go CLI. - -**Fix options**: make the Go CLI flag repeatable (`StringArrayVar`) and proxy every value, -or revert the Kotlin option to `Path?` and update the design to make the single-path -contract explicit. - -### 1.3 MINOR — `--dataflow-approximations` accepts different path kinds - -**Design (1.3/1.4)** and all skills: `--dataflow-approximations ` — "Directory of compiled -approximation class files" (or sources which the Go CLI auto-compiles). - -**Kotlin analyzer** current source (`ProjectAnalyzerRunner.kt:54`): -```kotlin -private val dataflowApproximations: List by option(help = "Directory of compiled approximation class files") - .directory() - .multiple() -``` - -The `core/bin/...` copy uses `.path()` instead of `.directory()`. Not a behavioural -mismatch between the Go CLI and the analyzer (the CLI does compile sources to a directory -and passes the directory path), but there is an inconsistency between the compiled artifact -and the source, which can silently bite integrators using the `bin` classpath. - ---- - -## 2. Skill / meta-prompt ↔ Go CLI mismatches - -### 2.1 BLOCKER — `opentaint rules-path` does not exist - -**Design (1.8, 2.1)** and `agent/skills/create-rule.md:15` prescribe: -```bash -RULES_DIR=$(opentaint rules-path) -``` - -**Meta prompt** (`agent/meta-prompt.md:24`): -``` -1. **Check built-in rules** -- read rules in `$(opentaint agent rules-path)` -``` - -**Actual CLI** (`cli/cmd/agent_rules_path.go`): the command is registered under -the `agent` command group, i.e. `opentaint agent rules-path`, not `opentaint rules-path`. - -- The meta-prompt uses the correct form (`opentaint agent rules-path`). -- The `create-rule.md` skill uses the **wrong** form (`opentaint rules-path`), - matching the design document verbatim. - -**Fix**: change `create-rule.md:15` to `opentaint agent rules-path`. - -### 2.2 BLOCKER — `opentaint test-rules` does not exist as a top-level command - -**Design (1.5, 2.1)** and all design examples: -```bash -opentaint test-rules --ruleset ... -o ... -``` - -**Actual CLI** (`cli/cmd/agent_test_rules.go:24`): the command is registered under the -`agent` group: -```go -agentCmd.AddCommand(agentTestRulesCmd) -``` -Real invocation is `opentaint agent test-rules ...`. - -- The `agent/skills/test-rule.md:64` uses the correct form (`opentaint agent test-rules`). -- The design document and every `run-analysis`/phased example in the design file use the - incorrect top-level form. - -### 2.3 BLOCKER — `opentaint init-test-project` does not exist as a top-level command - -Same pattern as 2.2. Design says `opentaint init-test-project `; the implementation -registers it as `opentaint agent init-test-project` (`cli/cmd/agent_init_test_project.go:67`). - -- `agent/skills/test-rule.md:16` uses the correct `opentaint agent init-test-project`. -- Design document uses `opentaint init-test-project` in Appendix A and §2.1 / §3.4. - -### 2.4 MAJOR — `opentaint agent test-rules` argument is a directory, not `project.yaml` - -**Design (1.5, 2.1)** and **skill `run-analysis.md`** repeatedly say: -```bash -opentaint test-rules ./agent-test-compiled/project.yaml --ruleset ... -o ... -``` - -**Actual CLI** (`cli/cmd/agent_test_rules.go:37-42`): -```go -projectPath := log.AbsPathOrExit(args[0], "project-model") -nativeProjectPath := filepath.Join(projectPath, "project.yaml") - -if _, err := os.Stat(nativeProjectPath); os.IsNotExist(err) { - out.Fatalf("Project model not found: %s", nativeProjectPath) -} -``` - -The CLI joins the argument with `project.yaml` and then stats it. If the user passes -`./agent-test-compiled/project.yaml`, the CLI stats `./agent-test-compiled/project.yaml/project.yaml` -and aborts. - -Skill `test-rule.md:64-66` has the **correct** form (passes a directory). Design file -and the `analyze-findings` narrative in the design have the wrong form. - -### 2.5 MAJOR — `opentaint scan` argument is **not** the directory containing `project.yaml` only - -**Meta prompt** (`agent/meta-prompt.md:36`) and skills `run-analysis.md`, -`create-yaml-config.md`, `debug-rule-reachability.md`, `create-rule.md`, -`create-approximation.md` pass `./opentaint-project` (a directory) to `scan`. -`agent/skills/run-analysis.md:78` even states this as a "Note": -> The scan path is the **directory** containing `project.yaml`, not the path to `project.yaml` itself - -**Actual CLI** (`cli/cmd/scan.go:158-167`): -- Checks `validation.ValidateSourceProject(absUserProjectRoot)` against source-project markers - (`pom.xml`, `build.gradle*`, `mvnw`, `gradlew`, `.mvn`). A directory that contains only - `project.yaml` and compiled classes has **none** of these markers. -- When validation fails it then tests `validation.IsProjectModel(absUserProjectRoot)` and, if - true, **aborts with a suggestion** to use `--project-model`, exit code 1. - -So `opentaint scan ./opentaint-project` (directory with `project.yaml`) **does not scan**; -it prints a suggestion and exits. The correct invocation is either `opentaint scan` on the -source directory (for compile+scan) or `opentaint scan --project-model ./opentaint-project`. - -The design file is particularly bad about this — e.g. §3.5: -```bash -opentaint scan ./opentaint-project/project.yaml -o ./results/report.sarif ... -``` -This passes a file to a command expecting a directory and fails validation in a different -way. - -**Fix**: update every skill and design snippet to use -```bash -opentaint scan --project-model ./opentaint-project ... -``` -(or pass the source directory if a fresh compile is desired). - -### 2.6 MAJOR — `agent/skills/run-analysis.md` claims `--external-methods` produces two files whose base path is user-configurable - -`run-analysis.md:59-60`: -> The `--external-methods` flag specifies the **base path**. The analyzer derives two -> filenames by appending `-without-rules` and `-with-rules` before the `.yaml` extension. - -Both statements are wrong against the current analyzer: -1. Per 1.1 above, the analyzer does not accept a path at all — only a boolean - `--track-external-methods`. The Go CLI itself currently passes an unsupported - `--external-methods-output ` flag, so `--external-methods` in the Go CLI never - actually drives the output path. -2. Output file names are hard-coded (`external-methods-{without,with}-rules.yaml`), - written into the analyzer `resultDir`, not to a user-supplied base path. - -The meta-prompt (`agent/meta-prompt.md:40,81`) and `analyze-findings.md:40-42` repeat the -"two files" expectation; the files do exist but at the fixed location above. - -### 2.7 MINOR — `--rule-id` argument format - -**Implementation** (`ruleIdAllow` in `SemgrepRuleLoader.kt:493-494`) compares the -`--semgrep-rule-id` value against `rule.info.ruleId`, which is built by -`SemgrepRuleUtils.getRuleId(ruleSetName, id)` as `"$ruleSetName:$id"` where `ruleSetName` -is the rule file path relative to the ruleset root (e.g. `java/security/my-vuln.yaml`). - -Skills `run-analysis.md`, `create-rule.md`, `debug-rule-reachability.md`, and `test-rule.md` -all correctly document `--rule-id java/security/my-vuln.yaml:my-vulnerability`. - -**Design file is wrong** — §1.6 and §3.3 examples give: -```bash ---rule-id my-vulnerability -``` -and state "No need to list [refs] in `--rule-id`". With the implementation, the plain short -ID does not match any rule (`ruleIdAllow` will drop every rule), yielding zero findings. - -The design's claim that referenced library rules are auto-included when a join rule is in -the filter is **not** visible in the current `SemgrepRuleLoader.loadRules`: the single filter -check is `ruleIdAllow(this, ruleIdFilter)` applied to every rule independently. If a library -rule's full ID is not in the filter, it is skipped (library rules are also skipped by -`info.isLibraryRule` anyway, regardless of refs). - -The meta-prompt (`agent/meta-prompt.md:39`) writes `--rule-id ` without -specifying the format, which is less wrong but still misleading for an agent; the explicit -full-ID examples in the skills are correct. - -### 2.8 MINOR — `create-rule.md` duplicates the design's wrong `RULES_DIR=$(opentaint rules-path)` - -See 2.1 — `create-rule.md:15` needs the `agent` prefix. - ---- - -## 3. Skill ↔ Design mismatches - -### 3.1 Rule filter semantics re-refs - -**Design (1.6)**: "Library rules (`options.lib: true`) referenced by active rules via `refs` -are automatically included — they don't need to be listed explicitly." and §3.3. - -**Implementation** (`SemgrepRuleLoader.loadRules` and `ruleIdAllow`, lines 105-107, 493-494): -A rule is kept iff it is not disabled, not a library rule, passes severity, **and** passes -the `ruleIdFilter`. Library rules are always skipped (`info.isLibraryRule` skip), so the -"auto-include" only works to the extent that join rules physically carry their refs' -patterns internally. There is no code path that adds library rule IDs to the filter or -treats them as implicitly active via a join rule's `refs`. - -This is a **design ↔ implementation** mismatch; it also explains why skills that repeat the -design's claim (`create-rule.md:127`, `meta-prompt.md:99`) are misleading. - -### 3.2 `--approximations-config` OVERRIDE mode and scope - -**Design (1.2, §3.7)** says the custom config **overrides** the default config and is used -**exclusively for passThrough**, because the analyzer "currently cannot use sanitizers from -the config". - -**Implementation** (`ProjectAnalyzer.approximationConfigCombinationOptions`, lines 246-252): -```kotlin -private val approximationConfigCombinationOptions = CombinationOptions( - entryPoint = CombinationMode.IGNORE, - source = CombinationMode.IGNORE, - sink = CombinationMode.IGNORE, - cleaner = CombinationMode.IGNORE, - passThrough = CombinationMode.OVERRIDE, -) -``` - -Skills match the design (OVERRIDE, passThrough only), so no mismatch between skills and -implementation **for this category**. The design note in §3.7 says *"cleaner ignored"* but -describes it as `conditions` in §3.7 enumeration which includes `cleaner`-ish constructs -implicitly via custom configurations — that part is consistent. - -### 3.3 `agent-approximations/` directory layout - -- **Design § Working Directory Layout** says `agent-approximations/src/` contains Java - sources that are auto-compiled by the CLI. -- **Meta prompt §Working Directory Layout** says `agent-approximations/classes/` (compiled - classes). -- **Skill `create-approximation.md`** uses `agent-approximations/src/` for sources and - `agent-approximations/classes/` for compiled output (compile manually with `javac`). -- **Go CLI** (`cli/cmd/compile_approximations.go`) auto-compiles `.java` files found in the - given `--dataflow-approximations` directory (aligning with design). - -The documentation is internally inconsistent: meta-prompt's layout omits the `src/` -directory and assumes the agent compiles manually, while design/skill direct the agent to -let the CLI auto-compile. Pick one convention; currently the agent may do both, depending -on which file it reads. - -### 3.4 `agent-approximations/classes` vs `src` (skill text) - -`agent/skills/create-approximation.md:52-56` tells the agent to compile manually and pass -`./agent-approximations/classes`, even though the CLI auto-compiles `.java` files. This is -not wrong (CLI accepts `.class` directories unchanged) but contradicts the design's -"one command" story and makes it awkward for agents that wrote only `.java` sources in -`src/`. - ---- - -## 4. Summary of concrete fixes required - -| # | Severity | File(s) | Change | -|---|---|---|---| -| 1 | BLOCKER | `cli/cmd/command_builder.go` | Replace `--external-methods-output ` with boolean `--track-external-methods` **or** add the corresponding option to `ProjectAnalyzerRunner.kt`. Align `ExternalMethodsOutput` semantics with whichever direction is chosen. | -| 2 | BLOCKER | `agent-mode/design/agent-mode-design.md`, `agent/skills/create-rule.md:15` | Use `opentaint agent rules-path`, `opentaint agent test-rules`, `opentaint agent init-test-project` consistently. | -| 3 | MAJOR | design doc, `agent/skills/run-analysis.md`, `agent/skills/create-rule.md`, `agent/skills/create-approximation.md`, `agent/skills/debug-rule-reachability.md`, `agent/skills/create-yaml-config.md`, `agent/meta-prompt.md` | When passing a pre-compiled model, use `opentaint scan --project-model ./opentaint-project ...`. Never pass `./opentaint-project` or `./opentaint-project/project.yaml` as the positional argument. | -| 4 | MAJOR | design doc | For `opentaint agent test-rules`, pass the **directory** (e.g. `./agent-test-compiled`), not `project.yaml`. | -| 5 | MAJOR | `cli/cmd/scan.go`, `cli/cmd/command_builder.go` | Decide single vs multiple `--approximations-config`; align Go CLI and Kotlin analyzer to one cardinality. | -| 6 | MAJOR | `agent/skills/run-analysis.md`, `agent/meta-prompt.md` | Document that the external-methods YAML files are emitted in the analyzer output directory with fixed names `external-methods-{without,with}-rules.yaml`, and that `--external-methods ` has no effect on the output location. | -| 7 | MINOR | `agent-mode/design/agent-mode-design.md` | Replace `--rule-id my-vulnerability` with `--rule-id java/security/my-vuln.yaml:my-vulnerability` everywhere, and remove the "refs are auto-included" claim unless `SemgrepRuleLoader` is updated to implement it. | -| 8 | MINOR | `agent/meta-prompt.md`, `agent/skills/create-approximation.md` | Standardise on `agent-approximations/src/` (auto-compile) and remove `classes/` references or vice versa. | -| 9 | MINOR | `core/bin/main/org/opentaint/jvm/sast/runner/ProjectAnalyzerRunner.kt` | Rebuild — the binary copy is out of sync with `core/src` on several option declarations (see diff in §1.1, §1.3). | diff --git a/agent-mode/mitigation-plan.md b/agent-mode/mitigation-plan.md deleted file mode 100644 index 9a2464ce2..000000000 --- a/agent-mode/mitigation-plan.md +++ /dev/null @@ -1,407 +0,0 @@ -# Agent Mode — Mismatch Mitigation Plan - -## Priority Rules - -1. **Core analyzer API is frozen.** Its current surface (`AbstractAnalyzerRunner` + - `ProjectAnalyzerRunner`) is the source of truth. We do **not** add, rename, or change - the semantics of any Kotlin option. -2. **CLI follows Core.** Any Go CLI flag that does not correctly map onto a Core option is - changed until it does. -3. **Skills and design docs follow CLI.** Anything still documented incorrectly is rewritten - in skills / meta-prompt / design. - -Every fix below is locked to this hierarchy. - ---- - -## 1. Frozen Core API (reference) - -From `core/src/main/kotlin/org/opentaint/jvm/sast/runner/ProjectAnalyzerRunner.kt` -(+ `AbstractAnalyzerRunner.kt`). These are the options the CLI is allowed to use: - -| Kotlin name | CLI flag (Clikt-derived) | Kind | Notes | -|---|---|---|---| -| `approximationsConfig` | `--approximations-config` | `List` (repeatable) | Custom YAML passThrough; OVERRIDE mode. | -| `semgrepRuleSet` | `--semgrep-rule-set` | `List` | Ruleset roots. | -| `semgrepRuleSeverity` | `--semgrep-rule-severity` | `List` | | -| `semgrepRuleId` | `--semgrep-rule-id` | `List` | Full ID `.yaml:`. | -| `trackExternalMethods` | `--track-external-methods` | Boolean flag | Writes fixed-name YAMLs into `outputDir`. | -| `dataflowApproximations` | `--dataflow-approximations` | `List` (directories) | Compiled class dirs. | -| `semgrepRuleLoadTrace` | `--semgrep-rule-load-trace` | `Path?` | | -| `sarifFileName`, `sarifCodeFlowLimit`, `sarifSemgrepStyleId`, `sarifToolVersion`, `sarifToolSemanticVersion`, `sarifGenerateFingerprint`, `sarifUriBase` | corresponding `--sarif-*` flags | … | | -| `debugFactReachabilitySarif` | `--debug-fact-reachability-sarif` | Flag | Output: `outputDir/debug-ifds-fact-reachability.sarif`. | -| `debugRunRuleTests` | `--debug-run-rule-tests` | Flag | Output: `outputDir/test-result.json`. | -| `--project`, `--output-dir`, `--project-kind`, `--ifds-analysis-timeout`, `--ifds-ap-mode`, `--verbosity`, `--logs-file` | inherited | | | - -**Hard consequences** locked in by this surface: -- External-methods output: fixed filenames (`external-methods-{without,with}-rules.yaml`) in - `outputDir`. Users cannot choose a path. -- `--approximations-config` is repeatable. -- No "refs auto-include" for `--semgrep-rule-id`: filtering is purely - `rule.info.ruleId in filter` (`SemgrepRuleLoader.kt:493`). - -Anything in CLI / skills / design that contradicts the table above must yield. - ---- - -## 2. CLI changes (to match Core) - -### 2.1 External methods — decision: boolean `--track-external-methods` - -**File**: `cli/cmd/scan.go`, `cli/cmd/command_builder.go`. - -We considered two CLI shapes: - -| Option | UX | Code | Failure modes | -|---|---|---|---| -| **A. Boolean** `--track-external-methods` | Files always in `/external-methods-{without,with}-rules.yaml` (next to SARIF). | 1:1 with Core; emitter is three lines. | None introduced. | -| B. String `--external-methods ` with post-scan rename | User picks base path; CLI renames/copies after the analyzer exits. | Extra I/O code; partial-failure semantics if analyzer crashes mid-write or user path is on a different volume; must handle two files atomically. | CLI must also decide whether to leave the originals; rename logic has to run even when analyzer returns non-zero (for partial output). | - -**Decision: Option A — boolean `--track-external-methods`.** It matches the frozen Core API 1:1 (Priority 1 rule), removes a class of failure modes, and the agent workflow doesn't need a custom base path — it already knows the output directory because it controls `-o`. - -Concrete changes: - -- Remove from `cli/cmd/command_builder.go`: the `externalMethodsOutput string` field, the `SetExternalMethodsOutput(...)` method, and the `--external-methods-output` emitter (`command_builder.go:254-256`). Add `trackExternalMethods bool` + `SetTrackExternalMethods(bool)`; emit only `"--track-external-methods"` (no value). -- Remove from `cli/cmd/scan.go`: the `ExternalMethodsOutput string` var, the `--external-methods` flag registration, and the absolute-path resolution block that calls `SetExternalMethodsOutput(...)`. Add: - ```go - var TrackExternalMethods bool - scanCmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, - "Write external-methods-{without,with}-rules.yaml next to the SARIF report") - ... - if TrackExternalMethods { - nativeBuilder.SetTrackExternalMethods(true) - } - ``` -- Document the fixed output location in the flag help text and in the updated skill (`run-analysis.md`): `/external-methods-{without,with}-rules.yaml`. - -### 2.2 `--approximations-config` — make repeatable - -**File**: `cli/cmd/scan.go`, `cli/cmd/command_builder.go`. - -Core takes `List`; CLI currently takes `string`. Promote to `[]string`: - -```go -var ApproximationsConfig []string -scanCmd.Flags().StringArrayVar(&ApproximationsConfig, "approximations-config", nil, - "YAML passThrough approximations config (OVERRIDE mode, repeatable)") -``` - -Builder gains `AddApproximationsConfig(path string)`; the emitter loops and appends -`--approximations-config

` per entry. Same treatment for the absolute-path resolution -loop in `scan.go`. - -### 2.3 Verify `--dataflow-approximations` stays compatible - -The current `directory()` typing in Core rejects files. The CLI already produces a -directory (auto-compile branch in `compile_approximations.go`). Keep as-is; add a -defensive check in the CLI that `info.IsDir()` before proxying (we do already). -No changes needed. - -### 2.4 Other flags reviewed — keep as-is - -- `--rule-id` → `--semgrep-rule-id` proxy: correct. -- `--ruleset` → `--semgrep-rule-set`: correct. -- `--severity` → `--semgrep-rule-severity`: correct. -- `--timeout` → `--ifds-analysis-timeout`: correct. -- `--debug-fact-reachability-sarif`: correct. -- `--debug-run-rule-tests` (used via `EnableRunRuleTests`): correct. - -### 2.5 `opentaint agent test-rules` — clarify argument - -The CLI already requires a project-model **directory** (it joins `project.yaml`). No code -change; just ensure the `Short`/`Long` help strings are unambiguous: - -``` -Usage: opentaint agent test-rules -``` - -(`project-model` is already used but is easy to misread as a file.) - -### 2.6 `opentaint scan` with compiled model — keep `--project-model` - -No CLI change required. Validation already tells users the right command via `suggest(...)`. -All misuse is documentation-side; fixed in §3. - -### 2.7 Sync `core/bin/main/...` to `core/src/main/...` - -Clean-rebuild the `core/bin` tree (or delete it if it's a stale IDE artefact). It diverges -from `core/src` (still declares the old `--external-methods-output`, the `--config` alias -on `approximationsConfig`, etc.), which will confuse anyone who runs the binary copy. - -Action: add a pre-commit / CI check or just `./gradlew clean build` + commit the result; if -`core/bin` is in `.gitignore`, drop the stale directory from the workspace. - ---- - -## 3. Skill & design changes (to match CLI) - -For each file, exhaustive edits: - -### 3.1 `agent/meta-prompt.md` - -- Line 36 example `opentaint scan ./opentaint-project \...` → replace with - `opentaint scan --project-model ./opentaint-project \...`. -- Line 40 `--external-methods ./results/external-methods.yaml` → replace with - `--track-external-methods` and add note that files are emitted next to the SARIF in - the analyzer output directory. -- Line 42 "Collect `report.sarif`, `external-methods-without-rules.yaml`..." → update paths - to `/external-methods-{without,with}-rules.yaml`. -- Line 39 `--rule-id ` → add a note that IDs are `.yaml:`. -- Layout block (§Working Directory Layout): drop `agent-approximations/classes/`; use - `agent-approximations/src/` only (the CLI auto-compiles). -- Line 99 remove the "library rules auto-included via join-mode refs" claim; replace with: - "every rule ID you want active (including library rules referenced by join rules) must be - listed explicitly in `--rule-id`". - -### 3.2 `agent/skills/run-analysis.md` - -- Every `opentaint scan ./opentaint-project \` → `opentaint scan --project-model ./opentaint-project \`. -- Replace all `--external-methods ./results/external-methods.yaml` with - `--track-external-methods`. -- Rewrite the "base path" note: the analyzer writes - `/external-methods-{without,with}-rules.yaml`, where `` is the - directory that contains the SARIF file. The user does **not** choose filenames. -- Update the "Outputs" section accordingly. -- "Key Flags" table: replace `--external-methods` row with `--track-external-methods`; - mark `--approximations-config` as repeatable. - -### 3.3 `agent/skills/create-rule.md` - -- Line 15 `RULES_DIR=$(opentaint rules-path)` → `RULES_DIR=$(opentaint agent rules-path)`. -- §6 example: `opentaint scan ./opentaint-project \...` → `opentaint scan --project-model ./opentaint-project \...`. -- Keep the `--rule-id .yaml:` guidance (already correct). - -### 3.4 `agent/skills/create-approximation.md` - -- Remove the manual `javac` block (§2). The Go CLI auto-compiles `.java` from the - `--dataflow-approximations` directory. -- Replace §3 example `opentaint scan ./opentaint-project` with - `opentaint scan --project-model ./opentaint-project`. -- Change `--dataflow-approximations ./agent-approximations/classes` to - `--dataflow-approximations ./agent-approximations/src` (CLI handles compilation). - -### 3.5 `agent/skills/create-yaml-config.md` - -- Replace `opentaint scan ./opentaint-project` with - `opentaint scan --project-model ./opentaint-project`. - -### 3.6 `agent/skills/debug-rule-reachability.md` - -- Same `--project-model` replacement. -- Keep the single `--rule-id` warning. - -### 3.7 `agent/skills/test-rule.md` - -- Already uses `opentaint agent test-rules` and `opentaint agent init-test-project`. No - change needed beyond one pass to verify the project-model argument is a directory - (already is). - -### 3.8 `agent/skills/analyze-findings.md` - -- Under §3 ("Process external methods"), update to fixed filenames in the SARIF output - directory; remove the "base path" wording. - -### 3.9 `agent/skills/build-project.md`, `discover-entry-points.md`, `generate-poc.md` - -- No CLI usages to fix (these are about source reading and documentation). -- Spot-check: `discover-entry-points.md` mentions - `--debug-run-analysis-on-selected-entry-points`; verify it is still in - `AbstractAnalyzerRunner.kt` (it is, line 45). Keep. - -### 3.10 `agent-mode/design/agent-mode-design.md` - -This is the biggest offender; edit inline, do **not** re-open the design question: - -- §1.1 "Kotlin CLI flag: `--external-methods-output `" → replace with - "Kotlin CLI flag: `--track-external-methods` (boolean). Output filenames are fixed - (`external-methods-{without,with}-rules.yaml` in `--output-dir`)." -- §1.1 "Go CLI flag: `--external-methods `" → "Go CLI flag: - `--track-external-methods`." -- §1.2 Kotlin CLI rename note: keep but mark `approximationsConfig` as `List`. -- §1.6 rewrite: remove "referenced library rules are automatically included"; state - explicitly: "Each rule whose ID is not in the filter is dropped, including library rules - referenced via `refs`. Callers must list every rule they want active." -- §2.1 Complete Command Reference: - - Every `opentaint scan ./opentaint-project/project.yaml` → either `opentaint scan ` - (for compile+scan) or `opentaint scan --project-model ./opentaint-project`. - - `opentaint test-rules` → `opentaint agent test-rules`, and argument is a directory. - - `opentaint rules-path` → `opentaint agent rules-path`. - - `opentaint init-test-project` → `opentaint agent init-test-project`. - - `--external-methods ` row → `--track-external-methods`. - - `--rule-id my-vulnerability` → `--rule-id java/security/my-vuln.yaml:my-vulnerability`. -- §2.2 Command Builder mapping: - - `--external-methods ` → `--external-methods-output ` row → delete; replace - with `--track-external-methods` → `--track-external-methods`. -- §3.3 / §3.5 / §3.7 / §3.8 / §3.9 examples: propagate the same three changes - (`--project-model`, `--track-external-methods`, full `--rule-id`). -- Appendix A: `opentaint init-test-project` → `opentaint agent init-test-project`; - `opentaint test-rules ./agent-test-compiled/project.yaml` → - `opentaint agent test-rules ./agent-test-compiled`. -- Appendix C: keep (output format itself is correct, only the path control prose in body is - wrong). - ---- - -## 4. Execution order - -Pick the order to minimise churn re-testing: - -1. **CLI code changes** (§2.1, §2.2, §2.5 help text, §2.7 bin cleanup). - After this step, `opentaint scan --track-external-methods` and repeatable - `--approximations-config` work end-to-end against the unchanged Core. -2. **Test suite sync** (§4.5). Update `conftest.py` + the five `test_*.py` files - so the CLI changes can be validated. Running the suite after step 2 is the - primary regression gate for the whole mitigation. -3. **Skill updates** (§3.1–§3.9). These only touch `.md`; do in one pass with a single - `edit_file` per skill, guided by this plan. -4. **Design doc rewrite** (§3.10). Largest text change; do last so the design reflects - settled CLI + skill wording. -5. **Verification pass** — for each updated file, grep for the banned tokens and fail the - build if any survive: - ``` - opentaint rules-path -> must be absent (except after "agent ") - opentaint test-rules -> must be absent - opentaint init-test-project -> must be absent - --external-methods -> must be absent (replaced by --track-external-methods) - --external-methods-output -> must be absent (Core doesn't have it) - opentaint scan ./opentaint-project -> must be absent - scan .*project\.yaml -> must be absent - --rule-id my-vulnerability -> must be absent (must be full ID) - ``` - Add this grep as a `scripts/check-docs.sh` and run it in CI. - -## 4.5 `agent-mode/test/` — verification suite fixes - -The pytest suite under `agent-mode/test/` is currently wired to the old/wrong -CLI surface. After Core freezes and CLI is corrected, every test file needs targeted -edits; otherwise **no test can pass** against the new CLI (scan rejects the model path, -`--external-methods` no longer exists, etc.). Changes below are keyed to the decisions -made in §1–§3. - -### 4.5.1 `agent-mode/test/conftest.py` - -- `OpenTaintCLI.scan(...)` (lines ~190–230): - - Signature: replace `external_methods: Optional[str] = None` with - `track_external_methods: bool = False`. - - Body: when the incoming `project_path` points at a pre-compiled model (directory - containing `project.yaml` or the file itself), pass `--project-model

` instead of - a positional argument. Pseudocode: - ```python - p = Path(project_path) - if p.name == "project.yaml" and p.is_file(): - p = p.parent - if (p / "project.yaml").is_file(): - cmd = self._base_cmd() + ["scan", "-o", output, "--project-model", str(p)] - else: - cmd = self._base_cmd() + ["scan", str(p), "-o", output] - ``` - - Flag emission: drop `--external-methods`; when `track_external_methods=True`, append - `--track-external-methods` (no value). -- `OpenTaintCLI.test_rules(...)` (lines ~230–245): the current code already passes a - directory — just verify the swap from `project.yaml` file to parent directory stays - (it does). No semantic change required; keep as-is. -- Helper `_derive_external_methods_paths(base_path)` and `load_external_methods(base_path)` - (lines ~335–380): switch them to take the **SARIF path** (or its parent directory) and - return the two fixed filenames in that directory: - ```python - def _derive_external_methods_paths(sarif_path: Path) -> tuple[Path, Path]: - parent = sarif_path.parent - return ( - parent / "external-methods-without-rules.yaml", - parent / "external-methods-with-rules.yaml", - ) - - def load_external_methods(sarif_path: Path) -> dict: ... - def external_methods_exist(sarif_path: Path) -> bool: ... - ``` -- No other helper changes needed; `count_external_methods`, `sarif_*`, and timing helpers - are agnostic to CLI wiring. - -### 4.5.2 `agent-mode/test/test_external_methods.py` - -Every test in this file threads the `ext_methods_path = tmp_output / "external-methods.yaml"` -variable through `external_methods=str(ext_methods_path)` and then loads the pair via -`load_external_methods(ext_methods_path)`. After the CLI change: - -- Drop the `ext_methods_path` computation entirely. -- Call `cli.scan(..., track_external_methods=True)`. -- Pass the SARIF path to `load_external_methods` / `external_methods_exist` (they are the - fixed files next to SARIF). - -All three test classes (`TestExternalMethodsBasic`, `TestExternalMethodsContent`, -`TestExternalMethodsWithApproximations`, `TestExternalMethodsAlongsideSarif`) are updated -the same way; the `run1`/`run2` subdirectory pattern in -`test_approximations_reduce_without_rules` is kept so the two runs don't collide. - -### 4.5.3 `agent-mode/test/test_full_loop.py` - -- `test_full_agent_loop` sets `ext_methods_path = ws["results"] / "external-methods-1.yaml"` - and `-2.yaml`, passes them to `cli.scan(external_methods=...)`, and inspects them. -- The two scan runs share `ws["results"]`, which means the fixed filenames would - collide. Fix: give each scan its own subdir (`ws["results"] / "run-1"` and `run-2`), - and write SARIF into that subdir. Then `load_external_methods(sarif_path)` picks up - the two files next to it. -- Replace `external_methods=str(...)` with `track_external_methods=True`. -- Replace every `cli.scan(project_path=str(stirling_project), ...)` call's argument - handling via conftest (no per-test change needed once conftest uses `--project-model`). - -### 4.5.4 `agent-mode/test/test_approximations.py` - -- No `external_methods=` usage — only scan-with-approx. The fix is entirely indirect via - conftest (project-model routing). The test `test_approximations_change_results` keeps its - separate `run1` / `run2` output dirs; no collision. -- `test_approximations_config_with_custom_ruleset` currently passes a single - `approximations_config` value; this continues to work since CLI accepts the flag once. - If we also promote `cli.scan`'s signature to accept a list (optional), add a follow-up. -- Verify `test_approximation_compilation_failure` against the current - `compile_approximations.go` — keep assertions. - -### 4.5.5 `agent-mode/test/test_rules.py` - -- No `external_methods=` usage. Indirect fix via conftest (`--project-model`). -- `test_rules_path_command`: keep — already uses `cli.rules_path()` which is - `opentaint agent rules-path`. -- `test_init_test_project`, `test_rule_test_all_pass`, - `test_rule_test_detects_false_negative`: keep — already use - `cli.init_test_project` / `cli.test_rules` (both under `agent` subcommand). -- `test_rule_test_all_pass` still passes `project_path=str(compiled_dir / "project.yaml")` - to `cli.test_rules`. Conftest already strips `project.yaml` for that method; keep. - -### 4.5.6 `agent-mode/test/test_build.py` - -- No external-methods or rule-id references that need changing. -- `test_scan_nonexistent_project`, `test_scan_missing_output_flag`: keep as-is (exit-code - checks). -- All other tests pass `stirling_project` which resolves via conftest — will work once - conftest routes pre-compiled models through `--project-model`. - -### 4.5.7 Run/verify procedure - -After CLI and test edits: - -1. `cd cli && go build -o ./bin/opentaint .` -2. Build local JARs once (`cd core && ./gradlew build`) so the hidden `--analyzer-jar` / - `--autobuilder-jar` resolution finds them. -3. `cd agent-mode/test && pytest -m "not slow" -q` for the fast smoke set, then - `pytest -q` for the full (slow) set. -4. Expected outcomes: - - `test_external_methods.py::TestExternalMethodsBasic::test_scan_produces_external_methods_file` passes because the CLI now enables tracking and the helper looks at the fixed file names. - - `test_full_loop.py::test_full_agent_loop` passes after the per-run subdir split. - - `test_rules.py::test_rules_path_command` passes (already correct). - - `test_build.py::test_scan_with_builtin_rules` passes because conftest now uses - `--project-model`. -5. If a test still fails, inspect the CLI's stderr (captured in `CLIResult.stderr`) for - the real Clikt / analyzer error; do not reintroduce the old flag names. - ---- - -## 5. Out of scope / explicitly NOT changed - -- **Core option rename** (e.g. bringing `--external-methods-output` back). Design's §1.1 - lost out to the frozen-Core rule; the external-methods output path is not configurable. -- **"Auto-include library refs" in `SemgrepRuleLoader`**. Implementation requires - explicit IDs; design doc gets rewritten, not the loader. -- **`agent-approximations/classes/` as the canonical compiled directory**. The CLI - auto-compiles; we commit to `src/`. -- **Changing `opentaint agent ...` subcommands to top-level**. Current grouping stays; - docs follow the grouping. diff --git a/agent-mode/plan.md b/agent-mode/plan.md deleted file mode 100644 index 7f0075b62..000000000 --- a/agent-mode/plan.md +++ /dev/null @@ -1,303 +0,0 @@ -# Agent Mode — Implementation Progress - -Tracking document for the implementation of agent mode features. -Refer to `agent-mode/impl/agent-mode-impl.md` for the full design. - ---- - -## Phase A: Kotlin Analyzer Changes - -### A1: ExternalMethodTracker class — [x] -- New file: `ExternalMethodTracker.kt` -- Data classes: `ExternalMethodRecord`, `SkippedExternalMethods`, `ExternalMethodAggregation` -- Thread-safe via ConcurrentHashMap (same pattern as TaintSinkTracker) - -### A2: Wire tracker into analysis pipeline — [x] -- `TaintAnalysisContext.kt` — added `externalMethodTracker: ExternalMethodTracker?` -- `TaintAnalysisUnitRunnerManager.kt` — constructor param, pass through `TaintAnalysisManagerWithContext` -- `JIRTaintAnalyzer.kt` — constructor param, pass to engine, expose `getSkippedExternalMethods()` - -### A3: Report external methods from flow function — [x] -- `JIRMethodCallFlowFunction.kt` — report to tracker in `applyPassRulesOrCallSkip()` -- Uses `passThroughFacts.isSome` to determine `passRulesApplied` - -### A4: External methods output flag + YAML serialization — [x] -- `ProjectAnalysisOptions.kt` — `externalMethodsOutput: Path?` -- `ProjectAnalyzerRunner.kt` — `--external-methods-output` Clikt flag -- `ProjectAnalyzer.kt` — `@Serializable` data classes + kaml `encodeToStream` - -### A5: Rule ID filter — [x] -- `SemgrepRuleLoader.kt` — `ruleIdFilter` parameter, `ruleIdAllow()` in `skip()` -- `ProjectAnalysisOptions.kt` — `semgrepRuleId: List` -- `ProjectAnalyzerRunner.kt` — `--semgrep-rule-id` Clikt flag -- `LoadSemgrepRules.kt` — pass filter through - -### A6: Combined config+rules — [x] -- `ProjectAnalyzer.kt` — removed `check()`, added `SemgrepRulesWithCustomConfig` variant -- `ProjectAnalyzerRunner.kt` — renamed `--config` → `--approximations-config` (with `--config` alias) - -### A7: Custom dataflow approximations path — [x] -- `DataFlowApproximationLoader.kt` — `customApproximationPaths: List` in `Options` -- `ProjectAnalyzerRunner.kt` — `--dataflow-approximations` Clikt flag - ---- - -## Phase B: Go CLI Changes - -### B1: Hidden dev flags — [x] -- `root.go` — `--analyzer-jar`, `--autobuilder-jar` persistent hidden flags -- `global.go` — `JarPath` fields on `Analyzer`/`Autobuilder` structs - -### B2: AnalyzerBuilder extensions — [x] -- `command_builder.go` — new fields, setters, `BuildNativeCommand` entries - -### B3: New scan flags — [x] -- `scan.go` — `--rule-id`, `--approximations-config`, `--dataflow-approximations`, `--external-methods` - -### B4: Agent command group — [x] -- `agent.go`, `agent_skills.go`, `agent_prompt.go`, `agent_rules_path.go`, `agent_test_rules.go` -- `opentaint_home.go` — `GetBundledAgentPath()` -- `compile.go` — autobuilder jar override support - ---- - -## Phase C: Skills and Meta-Prompt - -### C1: Write skill files — [x] -- 9 skill files in `agent/skills/` - -### C2: Write meta-prompt — [x] -- `agent/meta-prompt.md` - -### C3-C4: Release pipeline changes — [ ] -- Bundle agent files + test-util JAR in release (deferred to release work) - ---- - -## Phase D: Validation - -### D1: Run existing tests — [x] -- 6 passed, 1 skipped (quick tests) -- Fixed conftest JAR resolution order - -### D2: Run slow tests — [x] -- 6 passed, 1 failed (autobuilder JAR not built locally — expected) -- Scan tests against Stirling-PDF all pass - -### D3: Run new_feature tests — [x] -- 1 passed (rules-path command) - ---- - -## Phase E: CLI Testing and Fixes - -### E1: Revert ruleIdAllow to match full rule ID only — [x] -- `SemgrepRuleLoader.kt` — removed `shortRuleId` fallback, keep only `info.ruleId` match -- Full rule ID format is `:`, e.g. `java/security/path-traversal.yaml:path-traversal` - -### E2: Skip external method tracking for static fact base — [x] -- `JIRMethodCallFlowFunction.kt` — added `startFactBase !is AccessPathBase.ClassStatic` guard -- External methods YAML reduced from ~10,643 to ~2,246 lines (no `` entries) - -### E3: Update skills with full rule ID format — [x] -- `agent/skills/create-rule.md` — documented full ID format `:`, how to discover IDs -- `agent/skills/run-analysis.md` — updated `--rule-id` examples with full IDs -- `agent/skills/test-rule.md` — clarified annotation `id` field vs full rule ID - -### E4: Rebuild analyzer and CLI, retest — [x] -- Rebuilt `projectAnalyzerJar` + Go CLI binary -- CLI scan with `--rule-id java/security/path-traversal.yaml:path-traversal` → 20 findings -- External methods output confirmed clean (0 `` entries) -- Updated test expectations: full rule IDs, fact position format (``, `arg(N)`, `ret`) -- All pytest tests pass (29 passed, 1 skipped, 5 pre-existing failures excluded) - ---- - -## Phase F: Test Infrastructure and Missing Features - -### F1: Refactor tests to use Go CLI only — [x] -- Removed dual-mode (Go CLI + direct JAR) from `conftest.py` -- All tests now require the Go CLI binary at `cli/bin/opentaint` (dev mode) -- Hidden `--analyzer-jar` / `--autobuilder-jar` flags auto-detected for local builds -- Removed `_find_java()`, `has_cli` branching, direct JAR invocation code paths -- Fixed CLI scan path: auto-strip `project.yaml` from file paths (CLI expects directory) - -### F2: `opentaint agent init-test-project` command — [x] -- New file: `cli/cmd/agent_init_test_project.go` -- Creates directory structure, copies test-util JAR, generates `build.gradle.kts` and `settings.gradle.kts` -- Supports `--dependency` flag for Maven coordinates -- Resolves test-util JAR from bundled, install, or dev build tiers -- `test_init_test_project` now passes (was previously skipping) - -### F3: Add timing instrumentation to all tests — [x] -- Added pytest hooks (`pytest_runtest_setup`/`pytest_runtest_teardown`) for per-test timing -- Added per-phase `time.time()` checkpoints to `test_full_agent_loop` -- All test output now includes `[timing]` lines with elapsed seconds - -### F4: Run all tests via CLI, write test report — [x] -- Full suite: 31 passed, 3 failed (all pre-existing), 0 skipped -- Report written to `agent-mode/test-status.md` -- Pre-existing failures: analyzer exit code 0 on approximation errors (2 tests), autobuilder JAR not built (1 test) - ---- - -## Phase G: Known Issues - -### G1: Fix sink rule ID mismatch in fixture rule and tests — [x] -- Fixed `#java-path-traversal-sink` → `#java-path-traversal-sinks` in fixture rule and inline test YAML -- Tests now produce 4 path-traversal findings on Stirling-PDF - -### G2: Fix `agent test-rules` Go command — missing flags and output — [x] -- Rewrote `agent_test_rules.go`: local flag vars for `--ruleset`, `-o`, `--timeout`, `--max-memory`, `--rule-id` -- Output dir uses `-o` flag (temp dir only as fallback); user rulesets passed to builder - -### G3: Strengthen test assertions — remove vacuous passes — [x] -- `test_rule_test_detects_false_negative`: added `assert result_json.exists()` + `test_result.assert_ok()` -- `test_scan_stirling_with_path_traversal_rule`: added `assert len(findings) > 0` -- `test_approximations_change_results`: added `assert count1 != count2` -- `test_full_agent_loop`: added `assert len(findings) > 0` -- Updated `sarif_findings_for_rule()` to match both exact and semgrep-style dot-separated IDs - -### G4: Analyzer exits non-zero on errors + auto-compile approximations — [x] -- `AbstractAnalyzerRunner.runProjectAnalysisRecursively()`: re-throw exceptions after logging -- `AbstractAnalyzerRunner.main()`: removed `return` on project load failure (let exception propagate) -- NEW: `cli/cmd/compile_approximations.go` — auto-compile `.java` files in `--dataflow-approximations` - - Resolves `javac` from managed JDK - - Extracts approximation utility classes from analyzer JAR (`opentaint-dataflow-approximations/` prefix) - - Resolves project dependencies from `project.yaml` for the compilation classpath - - Compiles with `javac -source 8 -target 8` and returns compiled output directory - - On compilation failure, reports `javac` output and aborts scan -- Wired into `scan.go`: `compileApproximationsIfNeeded()` called for each `--dataflow-approximations` path - -### G5: Build autobuilder JAR or skip test gracefully — [x] -- `test_rule_test_all_pass`: skip with clear message when compilation fails (autobuilder not available) -- `test_rule_test_detects_false_negative`: same skip logic - -### G6: Verify error message in test_invalid_approximations_config_errors — [x] -- Added assertion checking combined stdout+stderr for config/yaml/parse/fail keywords - -### G7: CLI errors go to stdout — update tests to check both — [x] -- `test_approximation_compilation_failure`: check `combined_output` (stdout + stderr) -- `test_invalid_approximations_config_errors`: same approach - -### G8: Better timing breakdown — [x] -- Added `parse_analyzer_timing()` helper to `conftest.py` — parses IFDS elapsed time, phase markers, vulnerability count from analyzer output -- Added `print_timing_breakdown()` helper for formatted output -- Wired into `test_full_agent_loop` for initial scan and rescan phases - ---- - -## Phase H: Discovered Issues (from design-vs-implementation comparison) - -### H1: ~~Release pipeline — bundle agent files~~ → Embed agent files in binary — [x] -- Agent files (~28KB) embedded in Go binary via `go:generate` + `go:embed` -- New package `cli/internal/agent/` with `GetPath()`: - - Tier 1: bundled `/lib/agent/` (release archives) - - Tier 2: extract from embedded FS to `~/.opentaint/agent/` (go install, dev builds) - - SHA-256 content hash marker for staleness detection -- Removed `GetBundledAgentPath()` from `opentaint_home.go` -- Updated `agent_prompt.go` and `agent_skills.go` to use `agent.GetPath()` -- Works with: `go install`, released builds, dev builds - -### H2: Release pipeline — bundle test-util JAR — [ ] -- `.github/workflows/release-cli.yaml` — add step to build/download `opentaint-sast-test-util.jar` to `cli/lib/` -- Without this, `opentaint agent init-test-project` fails in released builds -- `resolveTestUtilJar()` tier 1/2 won't find the JAR in release archives -- **Priority: HIGH** - -### H3: Fix short rule IDs in skill docs — [x] -- `agent/skills/create-yaml-config.md:101` — uses `--rule-id my-vulnerability` instead of full format `java/security/my-vuln.yaml:my-vulnerability` -- `agent/skills/create-approximation.md:66` — same issue -- Inconsistent with the documented full rule ID format in `create-rule.md` and `run-analysis.md` -- **Priority: MEDIUM** - -### H4: ~~Agent path resolution — single-tier only~~ — [x] -- Superseded by H1: agent files are now embedded in binary and extracted on demand -- Two-tier resolution: bundled (release) → embedded extraction (`~/.opentaint/agent/`) -- No longer depends on external file distribution - -### H5: Env var naming mismatch in docs — [ ] -- Design docs say `OPENTAINT_ANALYZER_JAR` / `OPENTAINT_AUTOBUILDER_JAR` -- Actual viper binding uses `OPENTAINT_ANALYZER_JAR_PATH` / `OPENTAINT_AUTOBUILDER_JAR_PATH` -- Update `agent-mode/impl/agent-mode-impl.md` section 5.2 to match actual env var names -- **Priority: LOW** - -### H6: Pre-existing analyzer exit code issues — [ ] -- `test_approximation_compilation_failure` — analyzer still exits 0 on some approximation loading errors -- `test_duplicate_approximation_errors` — same root cause (bijection violation swallowed) -- G4 fix addressed `runProjectAnalysisRecursively` but approximation loading errors in `installApproximations()` may not propagate -- **Priority: LOW** - ---- - -## Phase I: Skill Fixes and Clarifications - -### I1: Test-util JAR not bundled — `init-test-project` broken after `go install` — [x] -- JAR is only 1.8KB (2 annotation classes) — small enough to embed in binary -- New package `cli/internal/testutil/` with `go:generate` + `go:embed`: - - `go:generate` copies JAR from `core/opentaint-sast-test-util/build/libs/` to `jar/` - - `go:embed jar/opentaint-sast-test-util.jar` embeds the JAR data - - `ExtractJar()` extracts to `~/.opentaint/test-util/` with SHA-256 content hash staleness detection -- Added Tier 4 (embedded extraction) to `resolveTestUtilJar()` as fallback after bundled/install/dev-build -- Also fixed `defer os.RemoveAll(tmpDir)` bug in `agent_test_rules.go` — temp dir no longer deleted -- Added output path printing: `Results directory:` and `Test results:` lines - -### I2: `test-rule.md` — unclear where to find test results — [x] -- Updated skill to always specify `-o ./agent-test-results` in the `opentaint agent test-rules` example -- Changed result reading instruction from generic "in the output directory" to explicit `./agent-test-results/test-result.json` - -### I3: `scan` command expects directory, not `project.yaml` path — [x] -- Changed all scan examples from `./opentaint-project/project.yaml` to `./opentaint-project` -- Files fixed: `run-analysis.md` (3 examples), `create-yaml-config.md`, `create-approximation.md`, `create-rule.md` -- Added note to `run-analysis.md`: scan path is the directory containing `project.yaml`, not the file itself - -### I4: `analyze-findings.md` — clarify external methods represent missed *fact propagations* — [x] -- Rewrote section 3 to explain that external methods show where the analyzer killed dataflow facts -- Added priority levels: HIGH (generic propagators like collections/strings), MEDIUM (lambda/callback), LOW (vulnerability-specific) -- Added concrete examples: `List.add/get`, `Map.put/get`, `StringBuilder.append`, `Iterator.next` -- Updated batch processing guidance to start with generic propagators - -### I5: `build-project.md` — add manual build fallback with `opentaint project` and `--package` warning — [x] -- Added section 2b: manual build with `./gradlew build` or `mvn package` followed by `opentaint project` -- Added CRITICAL warning about `--package` being mandatory -- Added multi-module project example with multiple `--classpath` and `--package` flags -- Updated troubleshooting: added "Analysis hangs" entry pointing to missing `--package` - -### I6: Update meta-prompt scan example to use directory path — [x] -- Fixed `meta-prompt.md` line 37: `./opentaint-project/project.yaml` → `./opentaint-project` -- Part of I3 fix - -### I7: Split external methods output into two files — [x] -- **Problem**: Agent sees both `withoutRules` and `withRules` in a single file and doesn't understand only `withoutRules` contains taint-killing methods -- **Kotlin**: Rewrote `writeExternalMethodsYaml()` in `ProjectAnalyzer.kt` to derive two filenames from the base path: - - `-without-rules.yaml` — methods with NO approximation rules (taint killed here) - - `-with-rules.yaml` — methods with existing approximation rules - - Each file has a `methods:` top-level key with the list of records - - Removed `SerializedSkippedExternalMethods` (combined wrapper), added `SerializedExternalMethodRecordList` -- **Go CLI**: Updated `--external-methods` flag help text to document two-file output -- **Skills**: Updated `run-analysis.md` (Outputs section), `analyze-findings.md` (Section 3), `meta-prompt.md` (Phase 3, Phase 4, directory layout) -- **Tests**: Updated `conftest.py`: - - `load_external_methods()` now derives two paths from base, reads both files, recombines into legacy dict - - Added `external_methods_exist()` helper - - Updated `test_external_methods.py` and `test_full_loop.py` to use `external_methods_exist()` instead of `.exists()` -- All 6 external methods tests pass, full loop test passes - ---- - -## Git Commits - -| Commit | Tasks | Description | -|--------|-------|-------------| -| e204e455 | A1-A7 | Phase A: Kotlin analyzer agent-mode features | -| e53f8c16 | Fix | Rename ExternalMethodResults -> SkippedExternalMethods, use kaml | -| 6d445b36 | B1-B4 | Phase B: Go CLI agent-mode features | -| 8734ae31 | C1-C2 | Phase C: Skills and meta-prompt | -| 7d094862 | D1 | Fix conftest JAR resolution order | -| 4e06427b | E-plan | Add Phase E tasks to plan | -| 67b9276f | E1-E4 | Phase E: Filter static facts, update rule ID format in skills/tests | -| 7c3f94ed | F-plan | Add Phase F to plan | -| 195d23a9 | F1 | Refactor tests to CLI-only mode | -| 592f2667 | F2 | Implement opentaint agent init-test-project command | -| 63c84b96 | F3 | Add timing instrumentation to all tests | -| 235af7e3 | F4 | Fix CLI scan path, run full suite, write test report | -| (pending) | G1-G8 | Phase G: Fix known issues, auto-compile approximations, strengthen tests | diff --git a/agent-mode/test-status.md b/agent-mode/test-status.md deleted file mode 100644 index fc12d0210..000000000 --- a/agent-mode/test-status.md +++ /dev/null @@ -1,95 +0,0 @@ -# Agent Mode — Test Status Report - -**Date**: 2026-03-31 -**CLI binary**: `cli/bin/opentaint` (dev build with `--analyzer-jar` override) -**Analyzer JAR**: `core/build/libs/opentaint-project-analyzer.jar` -**Test target**: Stirling-PDF at `/home/sobol/data/Stirling-PDF/seqra-project/project.yaml` -**Total**: 34 tests (8 quick, 26 slow) -**Results**: 31 passed, 3 failed (all pre-existing) - ---- - -## Quick Tests (non-slow) - -| Suite | Test | Scenario | Status | Time | -|-------|------|----------|--------|------| -| build | test_scan_nonexistent_project | Error: scan with bad path | PASS | <0.1s | -| build | test_scan_missing_output_flag | Error: scan without -o | PASS | <0.1s | -| rules | test_builtin_rules_directory_exists | Verify rule directory structure | PASS | <0.1s | -| rules | test_builtin_lib_rules_exist | Verify library rule files | PASS | <0.1s | -| rules | test_rules_path_command | `opentaint agent rules-path` | PASS | <0.1s | -| rules | test_custom_rules_are_valid_yaml | Validate fixture rule YAML | PASS | <0.1s | -| rules | test_library_rule_has_lib_option | Library rule options.lib:true | PASS | <0.1s | -| rules | test_security_rule_has_metadata | Security rule CWE metadata | PASS | <0.1s | - -## Slow Tests — Build (test_build.py) - -| Test | Scenario | Status | Time | -|------|----------|--------|------| -| test_scan_with_builtin_rules | Scan Stirling-PDF, 69 findings across 9 rules | PASS | 44.4s | -| test_scan_with_custom_ruleset_directory | Scan with explicit rules path | PASS | 44.1s | -| test_scan_severity_filter_note | Include note-severity findings | PASS | 50.4s | -| test_scan_from_source_directory | Auto-compile + scan | PASS | 78.6s | -| test_compile_source_project | Compile-only (autobuilder) | PASS | 28.7s | - -## Slow Tests — Rules (test_rules.py) - -| Test | Scenario | Status | Time | Notes | -|------|----------|--------|------|-------| -| test_scan_with_rule_id_filter | `--rule-id` filters SARIF output | PASS | 25.0s | | -| test_scan_without_rule_id_filter_includes_all | No filter → multiple rule IDs | PASS | 42.6s | | -| test_init_test_project | `opentaint agent init-test-project` | PASS | <0.1s | Previously skipped | -| test_rule_test_all_pass | Compile test project + test-rules | **FAIL** | 4.2s | Pre-existing: autobuilder JAR not built locally | -| test_rule_test_detects_false_negative | FN detection in test framework | PASS | 5.1s | | -| test_scan_stirling_with_path_traversal_rule | Custom rule on Stirling-PDF | PASS | 26.1s | | - -## Slow Tests — Approximations (test_approximations.py) - -| Test | Scenario | Status | Time | Notes | -|------|----------|--------|------|-------| -| test_scan_with_approximations_config | YAML passThrough config | PASS | 22.0s | | -| test_approximations_config_with_custom_ruleset | Config + custom ruleset together | PASS | 25.2s | | -| test_invalid_approximations_config_errors | Bad YAML → error | PASS | 0.3s | | -| test_scan_with_java_source_approximations | Code-based .java approximations | PASS | 46.0s | | -| test_approximation_compilation_failure | Bad Java source → error | **FAIL** | 44.7s | Pre-existing: analyzer exits 0 despite error | -| test_duplicate_approximation_errors | Duplicate builtin class → error | **FAIL** | 43.6s | Pre-existing: analyzer exits 0 despite error | -| test_scan_with_both_approximation_types | Combined YAML + Java approx | PASS | 24.4s | | -| test_approximations_change_results | Compare with/without approx | PASS | 49.3s | | - -## Slow Tests — External Methods (test_external_methods.py) - -| Test | Scenario | Status | Time | -|------|----------|--------|------| -| test_scan_produces_external_methods_file | `--external-methods` flag | PASS | 45.5s | -| test_external_methods_structure | YAML structure validation | PASS | 52.8s | -| test_without_rules_nonempty_for_real_project | withoutRules non-empty (324) | PASS | 50.0s | -| test_with_rules_contains_standard_library_methods | withRules has stdlib (167) | PASS | 49.8s | -| test_approximations_reduce_without_rules | Approx reduces withoutRules | PASS | 75.8s | -| test_both_outputs_produced | SARIF + external methods together | PASS | 45.9s | - -## Slow Tests — Full Loop (test_full_loop.py) - -| Test | Scenario | Status | Time | Phase Timing | -|------|----------|--------|------|-------------| -| test_full_agent_loop | End-to-end agent workflow | PASS | 25.6s | P1: 0.0s, P2: 0.0s, P3 (scan): 25.6s, P3b: 25.6s, P4: 25.6s | - ---- - -## Pre-Existing Failures (not caused by agent-mode) - -1. **`test_approximation_compilation_failure`** — The Kotlin analyzer catches the compilation error internally but still exits with code 0. The Go CLI propagates this as success. Fix requires analyzer to exit non-zero on approximation compilation failures. - -2. **`test_duplicate_approximation_errors`** — Same root cause: analyzer detects the bijection violation but exits with code 0. Fix requires analyzer exit code propagation. - -3. **`test_rule_test_all_pass`** — The `compile` step fails because the autobuilder JAR is not built locally. This test requires `./gradlew :autobuilder:jar` to be run first. The test itself is correct; the environment is incomplete. - ---- - -## Phase F Summary - -| Task | Description | Status | -|------|-------------|--------| -| F1 | Refactor tests to CLI-only (remove direct JAR mode) | Done | -| F2 | Implement `opentaint agent init-test-project` command | Done | -| F3 | Add timing instrumentation to all tests | Done | -| F4 | Run all tests via CLI, write test report | Done | diff --git a/agent-mode/test/agent-mode-test.md b/agent-mode/test/agent-mode-test.md deleted file mode 100644 index 8550a50e4..000000000 --- a/agent-mode/test/agent-mode-test.md +++ /dev/null @@ -1,2085 +0,0 @@ -# Agent Mode Test Pipeline - -## Table of Contents - -1. [Overview](#1-overview) -2. [Test Environment Setup](#2-test-environment-setup) -3. [Test Infrastructure (`conftest.py`)](#3-test-infrastructure-conftestpy) -4. [Test Suite 1: Project Build Scenarios](#4-test-suite-1-project-build-scenarios) -5. [Test Suite 2: Rule Generation Pipeline](#5-test-suite-2-rule-generation-pipeline) -6. [Test Suite 3: Approximations Generation/Override](#6-test-suite-3-approximations-generationoverride) -7. [Test Suite 4: External Methods Extraction](#7-test-suite-4-external-methods-extraction) -8. [Test Suite 5: Full Agent Loop (Integration)](#8-test-suite-5-full-agent-loop-integration) -9. [Running Tests](#9-running-tests) - ---- - -## 1. Overview - -This document defines a test pipeline for validating the agent-mode features designed in `agent-mode/design/agent-mode-design.md`. Tests use **Python (pytest)** scripts that invoke the `opentaint` Go CLI and the analyzer JAR directly, validating outputs against expected results. - -### Test target project - -All tests use the Stirling-PDF project at `/home/sobol/data/Stirling-PDF/seqra-project/project.yaml` — a real-world Spring Boot application with 538 Java source files, 3 modules (proprietary, core, common), and 400 dependencies. This project is already compiled (classes + dependencies + sources are in place), so tests can skip the build step for faster iteration, or exercise the build pipeline explicitly. - -### What we are testing - -The test pipeline validates that the **new CLI features** from the design doc work correctly: - -| Feature | Design Section | Test Suite | -|---|---|---| -| `opentaint scan` with pre-compiled project | §2.1 | Suite 1 | -| `opentaint compile` (autobuilder) | §2.1 | Suite 1 | -| `--ruleset` with custom rules | §2.1 | Suite 2 | -| `--rule-id` filter | §1.6, §2.1 | Suite 2 | -| `opentaint test-rules` | §1.5, §2.1 | Suite 2 | -| `opentaint init-test-project` | §1.8 | Suite 2 | -| `--approximations-config` (YAML passThrough) | §1.2, §2.1 | Suite 3 | -| `--dataflow-approximations` (code-based, auto-compile) | §1.3, §1.4, §2.1 | Suite 3 | -| `--external-methods` output | §1.1, §2.1 | Suite 4 | -| `opentaint rules-path` | §1.8, §2.1 | Suite 4 | -| Full loop: rule → test → scan → external methods → approx → rescan | §4 (Meta Prompt) | Suite 5 | - -### Constraints - -Since the new CLI features are **not yet implemented**, the tests serve two purposes: -1. **Specification** — define the expected behavior precisely so implementation can be verified -2. **Incremental validation** — tests that exercise current (existing) functionality can run today; tests for new features are marked `@pytest.mark.new_feature` and will pass once implemented - -Where a new CLI command doesn't exist yet, we fall back to invoking the analyzer JAR directly with the equivalent Kotlin CLI flags. This ensures we can test the **engine behavior** even before the Go CLI wrapper is ready. - ---- - -## 2. Test Environment Setup - -### Directory layout - -``` -agent-mode/test/ -├── agent-mode-test.md # This document -├── conftest.py # Shared fixtures and helpers -├── test_build.py # Suite 1: Project build scenarios -├── test_rules.py # Suite 2: Rule generation pipeline -├── test_approximations.py # Suite 3: Approximations -├── test_external_methods.py # Suite 4: External methods extraction -├── test_full_loop.py # Suite 5: Full agent loop -├── fixtures/ -│ ├── rules/ # Test rule YAML files -│ │ ├── java/ -│ │ │ ├── lib/ -│ │ │ │ └── stirling-source.yaml -│ │ │ └── security/ -│ │ │ ├── stirling-path-traversal.yaml -│ │ │ └── stirling-sqli.yaml -│ │ └── README.md -│ ├── approximations/ -│ │ ├── yaml/ -│ │ │ └── custom-propagators.yaml -│ │ └── java/ -│ │ └── StirlingPDFUtils.java -│ └── test-samples/ -│ └── src/main/java/test/ -│ ├── PathTraversalTest.java -│ └── SqlInjectionTest.java -└── pytest.ini -``` - -### Prerequisites - -```bash -# Python dependencies -pip install pytest pyyaml - -# OpenTaint CLI on PATH (or use --analyzer-jar / --autobuilder-jar flags for local dev) -which opentaint || echo "opentaint not on PATH — will use direct JAR invocation" - -# Stirling-PDF project available -test -f /home/sobol/data/Stirling-PDF/seqra-project/project.yaml -``` - -### `pytest.ini` - -```ini -[pytest] -testpaths = . -markers = - new_feature: Tests for features not yet implemented (deselect with -m "not new_feature") - slow: Tests that run full analysis (>60s) -``` - ---- - -## 3. Test Infrastructure (`conftest.py`) - -```python -""" -Shared fixtures and helpers for agent-mode tests. - -Handles two execution modes: -1. Go CLI mode: when `opentaint` is on PATH (production) -2. Direct JAR mode: when running against locally-built JARs (development) -""" - -import json -import os -import shutil -import subprocess -import tempfile -from dataclasses import dataclass, field -from pathlib import Path -from typing import Optional - -import pytest -import yaml - - -# ─── Paths ─────────────────────────────────────────────────────────────────── - -STIRLING_PROJECT = Path("/home/sobol/data/Stirling-PDF/seqra-project/project.yaml") -STIRLING_PROJECT_DIR = STIRLING_PROJECT.parent -OPENTAINT_ROOT = Path(__file__).resolve().parent.parent.parent # -> opentaint/ -FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures" -BUILTIN_RULES_DIR = OPENTAINT_ROOT / "rules" / "ruleset" - - -# ─── CLI Abstraction ───────────────────────────────────────────────────────── - -def _find_opentaint_cli() -> Optional[str]: - """Check if opentaint is on PATH.""" - return shutil.which("opentaint") - - -def _find_analyzer_jar() -> Optional[Path]: - """Find locally-built analyzer JAR.""" - candidates = [ - OPENTAINT_ROOT / "core" / "build" / "libs" / "opentaint-jvm-sast.jar", - OPENTAINT_ROOT / "core" / "build" / "libs" / "opentaint-project-analyzer.jar", - ] - for c in candidates: - if c.exists(): - return c - return None - - -def _find_autobuilder_jar() -> Optional[Path]: - """Find locally-built autobuilder JAR.""" - candidates = [ - OPENTAINT_ROOT / "autobuilder" / "build" / "libs" / "opentaint-project-auto-builder.jar", - ] - for c in candidates: - if c.exists(): - return c - return None - - -def _find_java() -> str: - """Find Java 21 (analyzer requires it).""" - # Check JAVA_HOME first - java_home = os.environ.get("JAVA_HOME") - if java_home: - java = Path(java_home) / "bin" / "java" - if java.exists(): - return str(java) - # Fall back to PATH - java = shutil.which("java") - if java: - return java - raise RuntimeError("Java not found. Set JAVA_HOME or add java to PATH.") - - -@dataclass -class CLIResult: - """Result of a CLI command execution.""" - returncode: int - stdout: str - stderr: str - command: list[str] - - @property - def ok(self) -> bool: - return self.returncode == 0 - - def assert_ok(self, msg: str = ""): - assert self.ok, ( - f"Command failed (rc={self.returncode}){': ' + msg if msg else ''}\n" - f" cmd: {' '.join(self.command)}\n" - f" stderr: {self.stderr[:2000]}" - ) - - def assert_failed(self, msg: str = ""): - assert not self.ok, ( - f"Command unexpectedly succeeded{': ' + msg if msg else ''}\n" - f" cmd: {' '.join(self.command)}\n" - f" stdout: {self.stdout[:2000]}" - ) - - -@dataclass -class OpenTaintCLI: - """ - Abstraction over the opentaint CLI. - - Supports two modes: - - Go CLI: uses `opentaint` binary from PATH - - Direct JAR: uses `java -jar analyzer.jar` for scan, `java -jar autobuilder.jar` for compile - """ - cli_path: Optional[str] = None - analyzer_jar: Optional[Path] = None - autobuilder_jar: Optional[Path] = None - java_path: str = "java" - timeout: int = 600 # seconds - - @property - def has_cli(self) -> bool: - return self.cli_path is not None - - def run(self, args: list[str], timeout: Optional[int] = None, env: Optional[dict] = None) -> CLIResult: - """Run an arbitrary command and return the result.""" - run_env = {**os.environ, **(env or {})} - t = timeout or self.timeout - try: - proc = subprocess.run( - args, - capture_output=True, - text=True, - timeout=t, - env=run_env, - ) - return CLIResult(proc.returncode, proc.stdout, proc.stderr, args) - except subprocess.TimeoutExpired: - return CLIResult(-1, "", f"Timeout after {t}s", args) - - def scan( - self, - project_path: str, - output: str, - rulesets: list[str] = None, - rule_ids: list[str] = None, - approximations_config: Optional[str] = None, - dataflow_approximations: Optional[str] = None, - external_methods: Optional[str] = None, - severity: list[str] = None, - timeout: int = 900, - max_memory: str = "8G", - extra_flags: list[str] = None, - ) -> CLIResult: - """Run opentaint scan (or direct analyzer JAR invocation).""" - - if self.has_cli: - cmd = [self.cli_path, "scan", project_path, "-o", output] - for rs in (rulesets or ["builtin"]): - cmd.extend(["--ruleset", rs]) - for rid in (rule_ids or []): - cmd.extend(["--rule-id", rid]) - if approximations_config: - cmd.extend(["--approximations-config", approximations_config]) - if dataflow_approximations: - cmd.extend(["--dataflow-approximations", dataflow_approximations]) - if external_methods: - cmd.extend(["--external-methods", external_methods]) - for sev in (severity or ["warning", "error"]): - cmd.extend(["--severity", sev]) - cmd.extend(["--timeout", f"{timeout}s", "--max-memory", max_memory]) - cmd.extend(extra_flags or []) - return self.run(cmd, timeout=timeout + 60) - - # Direct JAR invocation - assert self.analyzer_jar, "No analyzer JAR found" - output_dir = str(Path(output).parent) - sarif_name = Path(output).name - cmd = [ - self.java_path, f"-Xmx{max_memory}", - "-Dorg.opentaint.ir.impl.storage.defaultBatchSize=2000", - "-Djdk.util.jar.enableMultiRelease=false", - "-jar", str(self.analyzer_jar), - "--project", project_path, - "--output-dir", output_dir, - "--sarif-file-name", sarif_name, - f"--ifds-analysis-timeout={timeout}", - "--verbosity=info", - ] - for rs in (rulesets or []): - if rs == "builtin": - cmd.extend(["--semgrep-rule-set", str(BUILTIN_RULES_DIR)]) - else: - cmd.extend(["--semgrep-rule-set", rs]) - for rid in (rule_ids or []): - cmd.extend(["--semgrep-rule-id", rid]) - if approximations_config: - cmd.extend(["--config", approximations_config]) - if external_methods: - cmd.extend(["--external-methods-output", external_methods]) - for sev in (severity or ["warning", "error"]): - cmd.extend([f"--semgrep-rule-severity={sev}"]) - # Note: --dataflow-approximations needs auto-compile in Go CLI; - # for direct JAR, pass pre-compiled classes directory - if dataflow_approximations: - cmd.extend(["--dataflow-approximations", dataflow_approximations]) - cmd.extend(extra_flags or []) - return self.run(cmd, timeout=timeout + 60) - - def test_rules( - self, - project_path: str, - rulesets: list[str], - output_dir: str, - timeout: int = 300, - max_memory: str = "8G", - ) -> CLIResult: - """Run opentaint test-rules (or direct JAR with --debug-run-rule-tests).""" - - if self.has_cli: - cmd = [self.cli_path, "test-rules", project_path] - for rs in rulesets: - cmd.extend(["--ruleset", rs]) - cmd.extend(["-o", output_dir]) - cmd.extend(["--timeout", f"{timeout}s", "--max-memory", max_memory]) - return self.run(cmd, timeout=timeout + 60) - - # Direct JAR invocation - assert self.analyzer_jar, "No analyzer JAR found" - cmd = [ - self.java_path, f"-Xmx{max_memory}", - "-Dorg.opentaint.ir.impl.storage.defaultBatchSize=2000", - "-Djdk.util.jar.enableMultiRelease=false", - "-jar", str(self.analyzer_jar), - "--project", project_path, - "--output-dir", output_dir, - "--debug-run-rule-tests", - f"--ifds-analysis-timeout={timeout}", - "--verbosity=info", - ] - for rs in rulesets: - cmd.extend(["--semgrep-rule-set", rs]) - return self.run(cmd, timeout=timeout + 60) - - def compile( - self, - project_path: str, - output_dir: str, - timeout: int = 300, - ) -> CLIResult: - """Run opentaint compile (or direct autobuilder JAR invocation).""" - - if self.has_cli: - cmd = [self.cli_path, "compile", project_path, "-o", output_dir] - return self.run(cmd, timeout=timeout + 60) - - # Direct JAR invocation - assert self.autobuilder_jar, "No autobuilder JAR found" - cmd = [ - self.java_path, "-Xmx1G", - "-jar", str(self.autobuilder_jar), - "--project-root-dir", project_path, - "--result-dir", output_dir, - "--build", "portable", - "--verbosity=info", - ] - return self.run(cmd, timeout=timeout + 60) - - def rules_path(self) -> CLIResult: - """Run opentaint rules-path.""" - if self.has_cli: - return self.run([self.cli_path, "rules-path"]) - # Fall back to known builtin path - return CLIResult(0, str(BUILTIN_RULES_DIR), "", ["echo", str(BUILTIN_RULES_DIR)]) - - def init_test_project( - self, - output_dir: str, - dependencies: list[str] = None, - ) -> CLIResult: - """Run opentaint init-test-project.""" - if self.has_cli: - cmd = [self.cli_path, "init-test-project", output_dir] - for dep in (dependencies or []): - cmd.extend(["--dependency", dep]) - return self.run(cmd) - # Fallback: not available without Go CLI - return CLIResult(1, "", "init-test-project not available in direct JAR mode", []) - - -# ─── Fixtures ───────────────────────────────────────────────────────────────── - -@pytest.fixture(scope="session") -def cli() -> OpenTaintCLI: - """Provide an OpenTaintCLI instance configured for the current environment.""" - return OpenTaintCLI( - cli_path=_find_opentaint_cli(), - analyzer_jar=_find_analyzer_jar(), - autobuilder_jar=_find_autobuilder_jar(), - java_path=_find_java(), - ) - - -@pytest.fixture(scope="session") -def stirling_project() -> Path: - """Path to the Stirling-PDF project.yaml.""" - assert STIRLING_PROJECT.exists(), f"Stirling-PDF project not found at {STIRLING_PROJECT}" - return STIRLING_PROJECT - - -@pytest.fixture -def tmp_output(tmp_path) -> Path: - """Provide a temporary output directory for test results.""" - return tmp_path - - -@pytest.fixture(scope="session") -def builtin_rules() -> Path: - """Path to the built-in rules directory.""" - assert BUILTIN_RULES_DIR.exists(), f"Builtin rules not found at {BUILTIN_RULES_DIR}" - return BUILTIN_RULES_DIR - - -# ─── Helpers ────────────────────────────────────────────────────────────────── - -def load_sarif(path: Path) -> dict: - """Load and validate a SARIF file.""" - assert path.exists(), f"SARIF file not found: {path}" - with open(path) as f: - data = json.load(f) - assert data.get("version") == "2.1.0", "Not a valid SARIF 2.1.0 file" - assert "runs" in data and len(data["runs"]) > 0, "SARIF has no runs" - return data - - -def sarif_results(data: dict) -> list[dict]: - """Extract results from a SARIF report.""" - return data["runs"][0].get("results", []) - - -def sarif_rule_ids(data: dict) -> set[str]: - """Extract unique rule IDs from SARIF results.""" - return {r["ruleId"] for r in sarif_results(data)} - - -def sarif_findings_for_rule(data: dict, rule_id: str) -> list[dict]: - """Get findings for a specific rule ID.""" - return [r for r in sarif_results(data) if r["ruleId"] == rule_id] - - -def load_external_methods(path: Path) -> dict: - """Load and validate an external methods YAML file.""" - assert path.exists(), f"External methods file not found: {path}" - with open(path) as f: - data = yaml.safe_load(f) - assert isinstance(data, dict), "External methods file must be a YAML mapping" - assert "withoutRules" in data or "withRules" in data, "Missing withoutRules/withRules sections" - return data - - -def count_external_methods(data: dict) -> tuple[int, int]: - """Return (without_rules_count, with_rules_count).""" - without = len(data.get("withoutRules", [])) - with_rules = len(data.get("withRules", [])) - return without, with_rules - - -def write_yaml(path: Path, content: dict): - """Write a YAML file.""" - path.parent.mkdir(parents=True, exist_ok=True) - with open(path, "w") as f: - yaml.dump(content, f, default_flow_style=False, sort_keys=False) - - -def write_text(path: Path, content: str): - """Write a text file.""" - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content) -``` - ---- - -## 4. Test Suite 1: Project Build Scenarios - -**File: `test_build.py`** - -Tests that `opentaint scan` and `opentaint compile` work with different project input modes. - -```python -""" -Suite 1: Project Build Scenarios - -Tests: -1.1 Scan with pre-compiled project model (project.yaml) -1.2 Scan with source project (triggers auto-compile) -1.3 Compile-only (autobuilder) -1.4 Scan with invalid project path (error handling) -1.5 Scan with pre-compiled project, custom output directory -""" - -import pytest -from pathlib import Path -from conftest import ( - OpenTaintCLI, load_sarif, sarif_results, sarif_rule_ids, - STIRLING_PROJECT_DIR, BUILTIN_RULES_DIR, -) - - -class TestScanPreCompiledProject: - """1.1: Scan using the pre-compiled Stirling-PDF project model.""" - - @pytest.mark.slow - def test_scan_with_builtin_rules(self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path): - """Basic scan with builtin rules produces a valid SARIF with findings.""" - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with builtin rules failed") - - # Validate SARIF output - data = load_sarif(sarif_path) - results = sarif_results(data) - assert len(results) > 0, "Scan produced no findings — expected some on Stirling-PDF" - - # Should contain known vulnerability types - rule_ids = sarif_rule_ids(data) - # Stirling-PDF is known to have path-traversal and XSS issues - print(f"Found {len(results)} findings across rules: {rule_ids}") - - @pytest.mark.slow - def test_scan_with_custom_ruleset_directory(self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path): - """Scan with a custom ruleset directory works alongside builtin.""" - sarif_path = tmp_output / "report.sarif" - - # Use the builtin rules directory directly as a "custom" ruleset - # This is equivalent to --ruleset builtin but tests the custom path logic - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=[str(BUILTIN_RULES_DIR)], - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with custom ruleset directory failed") - data = load_sarif(sarif_path) - assert len(sarif_results(data)) > 0 - - @pytest.mark.slow - def test_scan_severity_filter_note(self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path): - """Scan with severity=note should include more findings.""" - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - severity=["note", "warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with note severity failed") - - -class TestScanFromSourceProject: - """1.2: Scan from source (auto-compiles via autobuilder first).""" - - @pytest.mark.slow - def test_scan_from_source_directory(self, cli: OpenTaintCLI, tmp_output: Path): - """ - Scan the Stirling-PDF source directory (not pre-compiled). - This triggers auto-compilation via autobuilder. - - Uses the mirrored source tree inside seqra-project/sources/ which - is a full copy of the Stirling-PDF repo. - """ - sarif_path = tmp_output / "report.sarif" - source_dir = STIRLING_PROJECT_DIR / "sources" - - if not source_dir.exists(): - pytest.skip("Stirling-PDF source directory not available") - - result = cli.scan( - project_path=str(source_dir), - output=str(sarif_path), - rulesets=["builtin"], - timeout=900, - ) - # This may fail if the autobuilder can't build Stirling-PDF - # (requires Java 17+, Gradle wrapper). That's acceptable — the test - # validates the auto-compile → scan pipeline. - if result.ok: - data = load_sarif(sarif_path) - assert len(sarif_results(data)) > 0 - - -class TestCompileOnly: - """1.3: Test the compile command separately.""" - - @pytest.mark.slow - def test_compile_source_project(self, cli: OpenTaintCLI, tmp_output: Path): - """Compile a source project into a project model.""" - source_dir = STIRLING_PROJECT_DIR / "sources" - model_dir = tmp_output / "project-model" - - if not source_dir.exists(): - pytest.skip("Stirling-PDF source directory not available") - - result = cli.compile( - project_path=str(source_dir), - output_dir=str(model_dir), - timeout=300, - ) - if result.ok: - project_yaml = model_dir / "project.yaml" - assert project_yaml.exists(), "compile did not produce project.yaml" - - -class TestErrorHandling: - """1.4: Error handling for invalid inputs.""" - - def test_scan_nonexistent_project(self, cli: OpenTaintCLI, tmp_output: Path): - """Scan with nonexistent project path should fail gracefully.""" - sarif_path = tmp_output / "report.sarif" - result = cli.scan( - project_path="/nonexistent/project/path", - output=str(sarif_path), - ) - result.assert_failed("Scan should fail for nonexistent project") - - def test_scan_missing_output_flag(self, cli: OpenTaintCLI, stirling_project: Path): - """Scan without -o flag should fail (it's required).""" - if not cli.has_cli: - pytest.skip("Requires Go CLI for flag validation") - # Invoke without -o - result = cli.run([cli.cli_path, "scan", str(stirling_project)]) - result.assert_failed("Scan should require -o flag") -``` - ---- - -## 5. Test Suite 2: Rule Generation Pipeline - -**File: `test_rules.py`** - -Tests the full rule lifecycle: create rule → create test samples → build test project → run rule tests → run scan with rule. - -### Fixture rules used by tests - -**`fixtures/rules/java/lib/stirling-source.yaml`** — a library rule defining a source for Spring `@PostMapping` multipart file parameters: - -```yaml -rules: - - id: stirling-multipart-file-source - options: - lib: true - severity: NOTE - message: Untrusted multipart file data from Spring controller - languages: [java] - patterns: - - pattern: | - $RETURNTYPE $METHOD(..., @RequestParam MultipartFile $UNTRUSTED, ...) { ... } -``` - -**`fixtures/rules/java/security/stirling-path-traversal.yaml`** — a security rule joining the source with a built-in path traversal sink: - -```yaml -rules: - - id: stirling-path-traversal - severity: ERROR - message: >- - User-uploaded file name flows to file system operation without sanitization - metadata: - cwe: CWE-22 - short-description: Path Traversal via uploaded file name - languages: [java] - mode: join - join: - refs: - - rule: java/lib/stirling-source.yaml#stirling-multipart-file-source - as: source - - rule: java/lib/generic/path-traversal-sinks.yaml#java-path-traversal-sink - as: sink - on: - - 'source.$UNTRUSTED -> sink.$UNTRUSTED' -``` - -### Test samples - -**`fixtures/test-samples/src/main/java/test/PathTraversalTest.java`**: - -```java -package test; - -import org.opentaint.sast.test.util.PositiveRuleSample; -import org.opentaint.sast.test.util.NegativeRuleSample; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; -import org.springframework.web.multipart.MultipartFile; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; - -@RestController -public class PathTraversalTest { - - @PositiveRuleSample(value = "java/security/stirling-path-traversal.yaml", id = "stirling-path-traversal") - @PostMapping("/upload-vulnerable") - public String vulnerable(@RequestParam MultipartFile file) throws IOException { - // Directly use original filename — path traversal possible - String filename = file.getOriginalFilename(); - Path dest = Paths.get("/uploads/" + filename); - Files.copy(file.getInputStream(), dest); - return "uploaded"; - } - - @NegativeRuleSample(value = "java/security/stirling-path-traversal.yaml", id = "stirling-path-traversal") - @PostMapping("/upload-safe") - public String safe(@RequestParam MultipartFile file) throws IOException { - // Use sanitized filename — only the base name, no path components - String filename = new File(file.getOriginalFilename()).getName(); - Path dest = Paths.get("/uploads/").resolve(filename); - Files.copy(file.getInputStream(), dest); - return "uploaded"; - } -} -``` - -### Test script - -```python -""" -Suite 2: Rule Generation Pipeline - -Tests: -2.1 Read builtin rules via `opentaint rules-path` (or known path) -2.2 Create custom library + security rules, verify YAML validity -2.3 Run scan with custom ruleset + --rule-id filter -2.4 Run scan with custom ruleset without --rule-id filter (all rules active) -2.5 Bootstrap test project, build, and run rule tests -2.6 Rule test: false negative detected (positive sample with wrong pattern) -2.7 Rule test: false positive detected (negative sample with too-broad pattern) -2.8 Run scan on Stirling-PDF with custom path-traversal rule -""" - -import json -import shutil -import pytest -from pathlib import Path -from conftest import ( - OpenTaintCLI, load_sarif, sarif_results, sarif_rule_ids, - sarif_findings_for_rule, write_yaml, write_text, - BUILTIN_RULES_DIR, FIXTURES_DIR, -) - - -class TestReadBuiltinRules: - """2.1: Agent can discover and read builtin rules.""" - - def test_builtin_rules_directory_exists(self, builtin_rules: Path): - """Builtin rules directory exists and contains rule files.""" - security_dir = builtin_rules / "java" / "security" - assert security_dir.exists(), f"No security rules at {security_dir}" - rule_files = list(security_dir.glob("*.yaml")) - assert len(rule_files) > 10, f"Expected >10 security rules, found {len(rule_files)}" - - def test_builtin_lib_rules_exist(self, builtin_rules: Path): - """Library rules (sources/sinks) exist.""" - lib_generic = builtin_rules / "java" / "lib" / "generic" - assert lib_generic.exists() - assert (lib_generic / "servlet-untrusted-data-source.yaml").exists() - assert (lib_generic / "path-traversal-sinks.yaml").exists() - - @pytest.mark.new_feature - def test_rules_path_command(self, cli: OpenTaintCLI): - """opentaint rules-path prints the rules directory.""" - result = cli.rules_path() - result.assert_ok("rules-path command failed") - rules_dir = Path(result.stdout.strip()) - assert rules_dir.exists(), f"rules-path returned non-existent dir: {rules_dir}" - assert (rules_dir / "java" / "security").is_dir() - - -class TestCustomRuleCreation: - """2.2: Create and validate custom rules.""" - - def test_custom_rules_are_valid_yaml(self): - """Fixture rule files are syntactically valid YAML with expected structure.""" - import yaml - for rule_file in FIXTURES_DIR.rglob("*.yaml"): - if rule_file.parent.name == "yaml": - continue # skip approximation configs - with open(rule_file) as f: - data = yaml.safe_load(f) - assert "rules" in data, f"Rule file {rule_file} missing 'rules' key" - for rule in data["rules"]: - assert "id" in rule, f"Rule in {rule_file} missing 'id'" - assert "severity" in rule, f"Rule {rule['id']} missing 'severity'" - assert "languages" in rule, f"Rule {rule['id']} missing 'languages'" - - def test_library_rule_has_lib_option(self): - """Library rules must have options.lib: true.""" - import yaml - lib_rule = FIXTURES_DIR / "rules" / "java" / "lib" / "stirling-source.yaml" - if not lib_rule.exists(): - pytest.skip("Library rule fixture not created yet") - with open(lib_rule) as f: - data = yaml.safe_load(f) - for rule in data["rules"]: - assert rule.get("options", {}).get("lib") is True, \ - f"Library rule {rule['id']} missing options.lib: true" - - def test_security_rule_has_metadata(self): - """Security rules must have metadata.cwe and metadata.short-description.""" - import yaml - sec_rule = FIXTURES_DIR / "rules" / "java" / "security" / "stirling-path-traversal.yaml" - if not sec_rule.exists(): - pytest.skip("Security rule fixture not created yet") - with open(sec_rule) as f: - data = yaml.safe_load(f) - for rule in data["rules"]: - if rule.get("options", {}).get("lib"): - continue - meta = rule.get("metadata", {}) - assert "cwe" in meta, f"Security rule {rule['id']} missing metadata.cwe" - assert "short-description" in meta, f"Security rule {rule['id']} missing metadata.short-description" - - -class TestScanWithRuleIdFilter: - """2.3-2.4: Scan with --rule-id filter.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_scan_with_rule_id_filter(self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path): - """ - Scan with --rule-id should only produce findings for the specified rule. - Library rules referenced via refs should be auto-included. - """ - sarif_path = tmp_output / "report.sarif" - custom_rules = FIXTURES_DIR / "rules" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin", str(custom_rules)], - rule_ids=["stirling-path-traversal"], - severity=["note", "warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with --rule-id filter failed") - - data = load_sarif(sarif_path) - rule_ids = sarif_rule_ids(data) - # Only our rule should appear (lib rules don't produce top-level findings) - for rid in rule_ids: - assert rid == "stirling-path-traversal", \ - f"Unexpected rule '{rid}' in output — --rule-id filter not working" - - @pytest.mark.slow - def test_scan_without_rule_id_filter_includes_all(self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path): - """ - Scan without --rule-id should include findings from all active rules. - """ - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok("Scan without rule-id filter failed") - - data = load_sarif(sarif_path) - rule_ids = sarif_rule_ids(data) - # Should have multiple rule IDs - assert len(rule_ids) > 1, f"Expected multiple rule IDs, got: {rule_ids}" - - -class TestRuleTests: - """2.5-2.7: Rule test workflow.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_init_test_project(self, cli: OpenTaintCLI, tmp_output: Path): - """ - opentaint init-test-project bootstraps a valid Gradle test project. - """ - test_project_dir = tmp_output / "test-project" - - result = cli.init_test_project( - output_dir=str(test_project_dir), - dependencies=["org.springframework:spring-web:6.2.12", "jakarta.servlet:jakarta.servlet-api:6.0.0"], - ) - if not result.ok: - pytest.skip("init-test-project not available (new feature)") - - # Verify structure - assert (test_project_dir / "build.gradle.kts").exists() - assert (test_project_dir / "settings.gradle.kts").exists() - assert (test_project_dir / "libs" / "opentaint-sast-test-util.jar").exists() - assert (test_project_dir / "src" / "main" / "java" / "test").is_dir() - - @pytest.mark.slow - @pytest.mark.new_feature - def test_rule_test_all_pass(self, cli: OpenTaintCLI, tmp_output: Path): - """ - Create a test project with correct positive/negative samples. - Rule tests should all pass. - """ - # Setup: copy fixture test samples and rules - test_project_dir = tmp_output / "test-project" - compiled_dir = tmp_output / "test-compiled" - test_output = tmp_output / "test-output" - rules_dir = FIXTURES_DIR / "rules" - - # Bootstrap (or manually create if CLI not available) - result = cli.init_test_project( - output_dir=str(test_project_dir), - dependencies=[ - "org.springframework:spring-web:6.2.12", - "jakarta.servlet:jakarta.servlet-api:6.0.0", - ], - ) - if not result.ok: - pytest.skip("init-test-project not available") - - # Copy test samples - samples_src = FIXTURES_DIR / "test-samples" / "src" - samples_dst = test_project_dir / "src" - if samples_src.exists(): - shutil.copytree(samples_src, samples_dst, dirs_exist_ok=True) - - # Compile test project - compile_result = cli.compile(str(test_project_dir), str(compiled_dir)) - compile_result.assert_ok("Failed to compile test project") - - # Run rule tests - test_result = cli.test_rules( - project_path=str(compiled_dir / "project.yaml"), - rulesets=[str(rules_dir)], - output_dir=str(test_output), - ) - test_result.assert_ok("Rule tests failed") - - # Check test-result.json - result_json = test_output / "test-result.json" - assert result_json.exists(), "test-result.json not produced" - with open(result_json) as f: - results = json.load(f) - - assert len(results.get("falsePositive", [])) == 0, \ - f"Unexpected false positives: {results['falsePositive']}" - assert len(results.get("falseNegative", [])) == 0, \ - f"Unexpected false negatives: {results['falseNegative']}" - assert len(results.get("success", [])) > 0, \ - "No successful tests — something is wrong" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_rule_test_detects_false_negative(self, cli: OpenTaintCLI, tmp_output: Path): - """ - A @PositiveRuleSample that doesn't match the rule → false negative. - This tests that the test framework correctly detects missing findings. - """ - test_project_dir = tmp_output / "test-project-fn" - compiled_dir = tmp_output / "test-compiled-fn" - test_output = tmp_output / "test-output-fn" - - # Create a rule that intentionally won't match the test sample - rules_dir = tmp_output / "broken-rules" / "java" / "security" - rules_dir.mkdir(parents=True) - write_text(rules_dir / "broken-rule.yaml", """\ -rules: - - id: broken-path-traversal - severity: ERROR - message: This rule intentionally won't match - metadata: - cwe: CWE-22 - short-description: Broken rule for testing FN detection - languages: [java] - patterns: - - pattern: ThisClassDoesNotExist.neverCalled($X) -""") - - # Create test sample that references the rule - result = cli.init_test_project( - output_dir=str(test_project_dir), - dependencies=["jakarta.servlet:jakarta.servlet-api:6.0.0"], - ) - if not result.ok: - pytest.skip("init-test-project not available") - - test_file = test_project_dir / "src" / "main" / "java" / "test" / "FalseNegativeTest.java" - write_text(test_file, """\ -package test; - -import org.opentaint.sast.test.util.PositiveRuleSample; - -public class FalseNegativeTest { - - @PositiveRuleSample(value = "java/security/broken-rule.yaml", id = "broken-path-traversal") - public void shouldTriggerButWont() { - String x = System.getenv("USER_INPUT"); - System.out.println(x); // not a real sink for the broken rule - } -} -""") - - compile_result = cli.compile(str(test_project_dir), str(compiled_dir)) - if not compile_result.ok: - pytest.skip("Cannot compile test project") - - test_result = cli.test_rules( - project_path=str(compiled_dir / "project.yaml"), - rulesets=[str(tmp_output / "broken-rules")], - output_dir=str(test_output), - ) - - # The test framework should detect this as a false negative - result_json = test_output / "test-result.json" - if result_json.exists(): - with open(result_json) as f: - results = json.load(f) - assert len(results.get("falseNegative", [])) > 0, \ - "Expected false negative not detected" - - -class TestScanStirlingWithCustomRule: - """2.8: Run custom path-traversal rule on Stirling-PDF.""" - - @pytest.mark.slow - def test_scan_stirling_with_path_traversal_rule( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Scan Stirling-PDF with our custom path-traversal rule. - Stirling-PDF handles file uploads in several controllers — - we expect the rule to find some findings. - """ - sarif_path = tmp_output / "report.sarif" - custom_rules = FIXTURES_DIR / "rules" - - if not custom_rules.exists(): - pytest.skip("Fixture rules not created yet") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin", str(custom_rules)], - rule_ids=["stirling-path-traversal"], - severity=["note", "warning", "error"], - timeout=600, - ) - - if result.ok: - data = load_sarif(sarif_path) - findings = sarif_findings_for_rule(data, "stirling-path-traversal") - print(f"Found {len(findings)} path-traversal findings in Stirling-PDF") - for f in findings[:5]: - locs = f.get("locations", [{}]) - if locs: - uri = locs[0].get("physicalLocation", {}).get("artifactLocation", {}).get("uri", "?") - line = locs[0].get("physicalLocation", {}).get("region", {}).get("startLine", "?") - print(f" - {uri}:{line}") - else: - # Rule might not match if patterns are wrong — that's part of testing - print(f"Scan failed or produced no output: {result.stderr[:500]}") -``` - ---- - -## 6. Test Suite 3: Approximations Generation/Override - -**File: `test_approximations.py`** - -Tests YAML passThrough config (`--approximations-config`) and code-based approximations (`--dataflow-approximations`). Both types of approximations are **only applicable to external methods** — library classes without source code in the project. The agent discovers which methods need approximations via the `--external-methods` output. - -### Key constraint: External methods only - -**Approximations (both YAML passThrough and code-based) are ONLY applicable to external methods** — library classes whose source code is NOT part of the analyzed project. Project classes with source code are analyzed directly by the engine; approximations for them would be ignored or cause errors. - -The agent's workflow is: run scan → get external methods list → create approximations for methods in `withoutRules` → rescan. The external methods list drives which methods need approximations. - -### Fixture: YAML approximation config - -**`fixtures/approximations/yaml/custom-propagators.yaml`** — models external library methods from Stirling-PDF's dependencies: - -```yaml -# Custom passThrough rules for external library methods encountered by the engine. -# -# IMPORTANT: Approximations are ONLY applicable to external methods — library -# classes whose source code is NOT part of the project. These methods would -# appear in the external-methods.yaml output under withoutRules. - -passThrough: - # org.apache.pdfbox.pdmodel.PDDocument#getPage — taint on this flows to result - # PDFBox is an external dependency of Stirling-PDF - - function: org.apache.pdfbox.pdmodel.PDDocument#getPage - copy: - - from: this - to: result - - # org.apache.pdfbox.text.PDFTextStripper#getText — taint on arg(0) flows to result - - function: org.apache.pdfbox.text.PDFTextStripper#getText - copy: - - from: arg(0) - to: result - - # com.fasterxml.jackson.databind.ObjectMapper#readValue — taint flows through deserialization - - function: com.fasterxml.jackson.databind.ObjectMapper#readValue - copy: - - from: arg(0) - to: result - - # org.jsoup.Jsoup#parse — taint on arg(0) flows to result - - function: org.jsoup.Jsoup#parse - copy: - - from: arg(0) - to: result -``` - -### Fixture: Code-based approximation - -**`fixtures/approximations/java/PdfBoxDocumentApprox.java`** — approximation for PDFBox's `PDDocument` (an external library class): - -```java -package agent.approximations; - -import org.opentaint.ir.approximation.annotation.ApproximateByName; -import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; -import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; - -/** - * Code-based approximation for PDFBox's PDDocument class. - * - * IMPORTANT: Approximations are ONLY applicable to external methods — - * library classes whose source code is NOT part of the project being analyzed. - * PDFBox is an external dependency of Stirling-PDF (pdfbox-3.0.6.jar). - */ -@ApproximateByName("org.apache.pdfbox.pdmodel.PDDocument") -public class PdfBoxDocumentApprox { - - /** - * Model save(OutputStream) — taint on this flows to arg(0). - * A tainted document writes tainted bytes to the output stream. - */ - public void save(java.io.OutputStream output) throws java.io.IOException { - org.apache.pdfbox.pdmodel.PDDocument self = - (org.apache.pdfbox.pdmodel.PDDocument) (Object) this; - if (OpentaintNdUtil.nextBool()) { - throw new java.io.IOException("approximation: failure path"); - } - byte[] data = new byte[1]; - output.write(data); - } - - /** - * Model getPage(int) — taint on this flows to result. - * A tainted document produces tainted pages. - */ - public Object getPage(int pageIndex) { - org.apache.pdfbox.pdmodel.PDDocument self = - (org.apache.pdfbox.pdmodel.PDDocument) (Object) this; - if (OpentaintNdUtil.nextBool()) { - return null; - } - return self.getPages().get(pageIndex); - } -} -``` - -### Test script - -```python -""" -Suite 3: Approximations Generation/Override - -Tests: -3.1 Scan with --approximations-config (YAML passThrough) -3.2 Scan with --approximations-config + --ruleset together (§1.2) -3.3 Scan with --dataflow-approximations (pre-compiled .class files) -3.4 Scan with --dataflow-approximations from .java sources (auto-compile, §1.4) -3.5 Approximation compilation failure handling (bad Java source) -3.6 Duplicate approximation targeting built-in class (error) -3.7 Scan with both --approximations-config and --dataflow-approximations -3.8 Verify approximation changes analysis results -""" - -import pytest -import shutil -from pathlib import Path -from conftest import ( - OpenTaintCLI, load_sarif, sarif_results, sarif_rule_ids, - sarif_findings_for_rule, write_text, write_yaml, - FIXTURES_DIR, BUILTIN_RULES_DIR, -) - - -class TestYAMLApproximationsConfig: - """3.1-3.2: YAML passThrough config.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_scan_with_approximations_config( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Scan with --approximations-config applies custom passThrough rules. - We verify the scan completes successfully (the config is accepted). - """ - sarif_path = tmp_output / "report.sarif" - config_path = FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" - - if not config_path.exists(): - pytest.skip("Fixture approximation config not created yet") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - approximations_config=str(config_path), - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with --approximations-config failed") - data = load_sarif(sarif_path) - assert len(sarif_results(data)) >= 0 # May have results, may not - - @pytest.mark.slow - @pytest.mark.new_feature - def test_approximations_config_with_custom_ruleset( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - --approximations-config and --ruleset can be used together (§1.2). - Previously these were mutually exclusive. - """ - sarif_path = tmp_output / "report.sarif" - config_path = FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" - custom_rules = FIXTURES_DIR / "rules" - - if not config_path.exists() or not custom_rules.exists(): - pytest.skip("Fixture files not created yet") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin", str(custom_rules)], - rule_ids=["stirling-path-traversal"], - approximations_config=str(config_path), - severity=["note", "warning", "error"], - timeout=600, - ) - result.assert_ok( - "Scan with both --approximations-config and --ruleset failed. " - "These should work together per design §1.2" - ) - - @pytest.mark.slow - @pytest.mark.new_feature - def test_invalid_approximations_config_errors( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """Invalid YAML config should produce a clear error.""" - sarif_path = tmp_output / "report.sarif" - bad_config = tmp_output / "bad-config.yaml" - write_text(bad_config, "this is not: [valid: yaml: config") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - approximations_config=str(bad_config), - timeout=120, - ) - result.assert_failed("Scan should fail with invalid approximations config") - - -class TestCodeBasedApproximations: - """3.3-3.6: Code-based approximations via --dataflow-approximations.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_scan_with_java_source_approximations( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - --dataflow-approximations with .java source files auto-compiles them. - The scan should complete successfully. - """ - sarif_path = tmp_output / "report.sarif" - approx_dir = FIXTURES_DIR / "approximations" / "java" - - if not approx_dir.exists(): - pytest.skip("Fixture approximation source not created yet") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - dataflow_approximations=str(approx_dir), - severity=["warning", "error"], - timeout=600, - ) - # If the auto-compile works, scan should succeed - if result.ok: - data = load_sarif(sarif_path) - print(f"Scan with code-based approximations: {len(sarif_results(data))} findings") - - @pytest.mark.slow - @pytest.mark.new_feature - def test_approximation_compilation_failure( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - --dataflow-approximations with invalid Java source should fail - with compilation errors before analysis starts. - """ - sarif_path = tmp_output / "report.sarif" - bad_approx_dir = tmp_output / "bad-approximations" - bad_approx_dir.mkdir() - write_text(bad_approx_dir / "BrokenApprox.java", """\ -package agent.approximations; - -import org.opentaint.ir.approximation.annotation.Approximate; - -// This won't compile — referencing nonexistent class -@Approximate(com.nonexistent.library.DoesNotExist.class) -public class BrokenApprox { - public void broken() { - com.nonexistent.library.DoesNotExist x = null; // compile error - } -} -""") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - dataflow_approximations=str(bad_approx_dir), - timeout=120, - ) - result.assert_failed("Scan should fail when approximation compilation fails") - # Error message should mention compilation - assert "compil" in result.stderr.lower() or "error" in result.stderr.lower(), \ - f"Error message should mention compilation failure: {result.stderr[:500]}" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_duplicate_approximation_errors( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - A custom approximation targeting a class that already has a built-in - approximation should produce an error (bijection violation). - """ - sarif_path = tmp_output / "report.sarif" - dup_approx_dir = tmp_output / "dup-approximations" - dup_approx_dir.mkdir() - - # java.util.stream.Stream already has a built-in approximation - write_text(dup_approx_dir / "StreamDuplicate.java", """\ -package agent.approximations; - -import org.opentaint.ir.approximation.annotation.Approximate; - -@Approximate(java.util.stream.Stream.class) -public class StreamDuplicate { - public Object map(java.util.function.Function fn) throws Throwable { - return fn.apply(null); - } -} -""") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - dataflow_approximations=str(dup_approx_dir), - timeout=300, - ) - # Should fail due to ApproximationIndexer bijection assertion - result.assert_failed("Duplicate approximation should produce an error") - - -class TestCombinedApproximations: - """3.7-3.8: Combining YAML config + code-based approximations.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_scan_with_both_approximation_types( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Both --approximations-config and --dataflow-approximations can be - used in the same scan. YAML handles simple passThrough, code-based - handles complex methods. - """ - sarif_path = tmp_output / "report.sarif" - yaml_config = FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" - java_approx = FIXTURES_DIR / "approximations" / "java" - custom_rules = FIXTURES_DIR / "rules" - - if not yaml_config.exists() or not java_approx.exists(): - pytest.skip("Fixture files not created yet") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin", str(custom_rules)], - rule_ids=["stirling-path-traversal"], - approximations_config=str(yaml_config), - dataflow_approximations=str(java_approx), - severity=["note", "warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with combined approximation types failed") - - @pytest.mark.slow - @pytest.mark.new_feature - def test_approximations_change_results( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Running the same scan with and without custom approximations should - produce different results (the approximations add propagation paths - that weren't there before). - - This is a differential test — we compare finding counts. - """ - custom_rules = FIXTURES_DIR / "rules" - yaml_config = FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" - - if not custom_rules.exists() or not yaml_config.exists(): - pytest.skip("Fixture files not created yet") - - # Run 1: without approximations - sarif_no_approx = tmp_output / "no-approx" / "report.sarif" - (tmp_output / "no-approx").mkdir() - r1 = cli.scan( - project_path=str(stirling_project), - output=str(sarif_no_approx), - rulesets=["builtin", str(custom_rules)], - rule_ids=["stirling-path-traversal"], - severity=["note", "warning", "error"], - timeout=600, - ) - - # Run 2: with approximations - sarif_with_approx = tmp_output / "with-approx" / "report.sarif" - (tmp_output / "with-approx").mkdir() - r2 = cli.scan( - project_path=str(stirling_project), - output=str(sarif_with_approx), - rulesets=["builtin", str(custom_rules)], - rule_ids=["stirling-path-traversal"], - approximations_config=str(yaml_config), - severity=["note", "warning", "error"], - timeout=600, - ) - - if r1.ok and r2.ok: - data1 = load_sarif(sarif_no_approx) - data2 = load_sarif(sarif_with_approx) - count1 = len(sarif_results(data1)) - count2 = len(sarif_results(data2)) - print(f"Without approximations: {count1} findings") - print(f"With approximations: {count2} findings") - # We don't assert which is larger — just that they're potentially different - # The agent would analyze the difference to validate the approximations -``` - ---- - -## 7. Test Suite 4: External Methods Extraction - -**File: `test_external_methods.py`** - -Tests the `--external-methods` output functionality. - -```python -""" -Suite 4: External Methods Extraction - -Tests: -4.1 Scan with --external-methods produces a YAML file -4.2 External methods file has correct structure (withoutRules/withRules) -4.3 External methods contain expected fields (method, signature, factPositions, callSites) -4.4 withoutRules list is non-empty for a real project (Stirling-PDF has many unmodeled methods) -4.5 withRules list contains known standard library methods -4.6 Scan with custom approximations reduces withoutRules count -4.7 External methods extraction alongside SARIF output -""" - -import pytest -from pathlib import Path -from conftest import ( - OpenTaintCLI, load_sarif, sarif_results, - load_external_methods, count_external_methods, - FIXTURES_DIR, BUILTIN_RULES_DIR, -) - - -class TestExternalMethodsBasic: - """4.1-4.3: Basic external methods output.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_scan_produces_external_methods_file( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - --external-methods flag produces a YAML file alongside SARIF output. - """ - sarif_path = tmp_output / "report.sarif" - ext_methods_path = tmp_output / "external-methods.yaml" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - external_methods=str(ext_methods_path), - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with --external-methods failed") - assert ext_methods_path.exists(), "External methods file not produced" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_external_methods_structure( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - External methods file has two sections: withoutRules and withRules. - Each entry has: method, signature, factPositions, callSites. - """ - sarif_path = tmp_output / "report.sarif" - ext_methods_path = tmp_output / "external-methods.yaml" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - external_methods=str(ext_methods_path), - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok() - - data = load_external_methods(ext_methods_path) - - # Validate structure - for section_name in ["withoutRules", "withRules"]: - section = data.get(section_name, []) - for entry in section[:5]: # Check first 5 entries - assert "method" in entry, f"Entry in {section_name} missing 'method'" - assert "signature" in entry, f"Entry in {section_name} missing 'signature'" - assert "factPositions" in entry, f"Entry in {section_name} missing 'factPositions'" - assert "callSites" in entry, f"Entry in {section_name} missing 'callSites'" - - # Validate method format: Class#method - assert "#" in entry["method"], \ - f"Method should be in Class#method format: {entry['method']}" - - # Validate factPositions is a list - assert isinstance(entry["factPositions"], list), \ - f"factPositions should be a list: {entry['factPositions']}" - - # Validate callSites is a positive integer - assert isinstance(entry["callSites"], int) and entry["callSites"] > 0, \ - f"callSites should be a positive integer: {entry['callSites']}" - - # Validate factPositions values - valid_positions = {"this", "result"} - for pos in entry["factPositions"]: - assert pos == "this" or pos == "result" or pos.startswith("arg("), \ - f"Invalid fact position: {pos}" - - -class TestExternalMethodsContent: - """4.4-4.5: External methods content validation.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_without_rules_nonempty_for_real_project( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Stirling-PDF uses many libraries without built-in approximations. - The withoutRules list should be non-empty. - """ - sarif_path = tmp_output / "report.sarif" - ext_methods_path = tmp_output / "external-methods.yaml" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - external_methods=str(ext_methods_path), - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok() - - data = load_external_methods(ext_methods_path) - without_count, with_count = count_external_methods(data) - print(f"External methods: {without_count} without rules, {with_count} with rules") - - assert without_count > 0, \ - "Expected non-empty withoutRules for Stirling-PDF (it uses many unmodeled libraries)" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_with_rules_contains_standard_library_methods( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - The withRules section should contain standard library methods that - have built-in approximations (e.g., StringBuilder, String methods). - """ - sarif_path = tmp_output / "report.sarif" - ext_methods_path = tmp_output / "external-methods.yaml" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - external_methods=str(ext_methods_path), - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok() - - data = load_external_methods(ext_methods_path) - with_rules = data.get("withRules", []) - with_rules_methods = {e["method"] for e in with_rules} - - # Known methods that should have rules in the default config - # (these are common and Stirling-PDF definitely calls them) - print(f"Methods with rules ({len(with_rules_methods)}):") - for m in sorted(list(with_rules_methods))[:20]: - print(f" - {m}") - - -class TestExternalMethodsWithApproximations: - """4.6: Custom approximations reduce withoutRules count.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_approximations_reduce_without_rules( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Adding custom passThrough rules for methods that were in withoutRules - should move them to withRules (or remove them from withoutRules entirely). - """ - yaml_config = FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" - custom_rules = FIXTURES_DIR / "rules" - - if not yaml_config.exists(): - pytest.skip("Fixture approximation config not created yet") - - # Run 1: without custom approximations - sarif1 = tmp_output / "run1" / "report.sarif" - ext1 = tmp_output / "run1" / "external-methods.yaml" - (tmp_output / "run1").mkdir() - r1 = cli.scan( - project_path=str(stirling_project), - output=str(sarif1), - rulesets=["builtin"], - external_methods=str(ext1), - severity=["warning", "error"], - timeout=600, - ) - - # Run 2: with custom approximations - sarif2 = tmp_output / "run2" / "report.sarif" - ext2 = tmp_output / "run2" / "external-methods.yaml" - (tmp_output / "run2").mkdir() - r2 = cli.scan( - project_path=str(stirling_project), - output=str(sarif2), - rulesets=["builtin"], - approximations_config=str(yaml_config), - external_methods=str(ext2), - severity=["warning", "error"], - timeout=600, - ) - - if r1.ok and r2.ok: - data1 = load_external_methods(ext1) - data2 = load_external_methods(ext2) - wo1, _ = count_external_methods(data1) - wo2, _ = count_external_methods(data2) - print(f"Without custom approx: {wo1} methods without rules") - print(f"With custom approx: {wo2} methods without rules") - - # Methods we added rules for should no longer be in withoutRules - methods_without_1 = {e["method"] for e in data1.get("withoutRules", [])} - methods_without_2 = {e["method"] for e in data2.get("withoutRules", [])} - newly_covered = methods_without_1 - methods_without_2 - if newly_covered: - print(f"Newly covered methods ({len(newly_covered)}):") - for m in sorted(newly_covered): - print(f" + {m}") - - -class TestExternalMethodsAlongsideSarif: - """4.7: External methods and SARIF are produced together.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_both_outputs_produced( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - A single scan produces both SARIF report and external methods file. - """ - sarif_path = tmp_output / "report.sarif" - ext_methods_path = tmp_output / "external-methods.yaml" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - external_methods=str(ext_methods_path), - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok() - - # Both files should exist - assert sarif_path.exists(), "SARIF report not produced" - assert ext_methods_path.exists(), "External methods file not produced" - - # Both should be non-trivial - sarif_data = load_sarif(sarif_path) - ext_data = load_external_methods(ext_methods_path) - assert len(sarif_results(sarif_data)) > 0, "SARIF has no results" - wo, wr = count_external_methods(ext_data) - assert wo + wr > 0, "External methods file is empty" -``` - ---- - -## 8. Test Suite 5: Full Agent Loop (Integration) - -**File: `test_full_loop.py`** - -End-to-end test simulating the agent's workflow from the meta prompt: create rule → test → scan → analyze external methods → create approximation → rescan. - -```python -""" -Suite 5: Full Agent Loop (Integration) - -This test simulates the complete agent workflow on Stirling-PDF: -1. Discover entry points (by reading source) -2. Create a custom path-traversal rule -3. Test the rule with samples -4. Run initial scan on Stirling-PDF -5. Analyze external methods -6. Create YAML approximation for an unmodeled method -7. Re-scan and verify the approximation has effect - -This is a single large integration test, not meant for fast CI. -""" - -import json -import shutil -import pytest -from pathlib import Path -from conftest import ( - OpenTaintCLI, load_sarif, sarif_results, sarif_rule_ids, - sarif_findings_for_rule, load_external_methods, count_external_methods, - write_text, write_yaml, - FIXTURES_DIR, BUILTIN_RULES_DIR, STIRLING_PROJECT_DIR, -) - - -@pytest.mark.slow -@pytest.mark.new_feature -class TestFullAgentLoop: - """ - Simulates the agent's analysis workflow on Stirling-PDF. - - This test class follows the meta prompt phases: - Phase 1 → discover entry points (manual) - Phase 2 → create rule + test - Phase 3 → scan + analyze + create approx + rescan - """ - - def _setup_workspace(self, tmp_output: Path) -> dict: - """Create the agent workspace directory layout.""" - workspace = { - "root": tmp_output, - "rules": tmp_output / "agent-rules", - "config": tmp_output / "agent-config", - "approximations": tmp_output / "agent-approximations" / "src", - "results": tmp_output / "results", - "test_project": tmp_output / "agent-test-project", - "test_compiled": tmp_output / "agent-test-compiled", - "test_output": tmp_output / "agent-test-output", - } - for d in workspace.values(): - if isinstance(d, Path): - d.mkdir(parents=True, exist_ok=True) - return workspace - - def test_full_agent_loop( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """Full end-to-end agent loop on Stirling-PDF.""" - ws = self._setup_workspace(tmp_output) - - # ── Phase 1: Source Discovery (simulated) ───────────────────── - # The agent would read source files to identify controllers and - # attack surface. Here we simulate the discovery result. - controllers = [ - "stirling.software.SPDF.controller.api.misc.PrintFileController", - "stirling.software.SPDF.controller.api.MergeController", - "stirling.software.SPDF.controller.api.SplitPDFController", - "stirling.software.SPDF.controller.api.security.*", - ] - print(f"Phase 1: Discovered {len(controllers)} controller groups") - - # ── Phase 2: Create Rule ────────────────────────────────────── - - # 2a: Read builtin rules to check coverage - builtin_path_traversal = BUILTIN_RULES_DIR / "java" / "security" / "path-traversal.yaml" - assert builtin_path_traversal.exists(), "Builtin path-traversal rule not found" - print("Phase 2a: Read builtin path-traversal rule") - - # 2b: Create custom source library rule for Stirling's multipart upload - lib_dir = ws["rules"] / "java" / "lib" - lib_dir.mkdir(parents=True, exist_ok=True) - write_text(lib_dir / "stirling-source.yaml", """\ -rules: - - id: stirling-multipart-file-source - options: - lib: true - severity: NOTE - message: Untrusted multipart file data from Spring controller - languages: [java] - patterns: - - pattern: | - $RETURNTYPE $METHOD(..., @RequestParam MultipartFile $UNTRUSTED, ...) { ... } -""") - - # 2c: Create join-mode security rule - sec_dir = ws["rules"] / "java" / "security" - sec_dir.mkdir(parents=True, exist_ok=True) - write_text(sec_dir / "stirling-path-traversal.yaml", """\ -rules: - - id: stirling-path-traversal - severity: ERROR - message: >- - User-uploaded file name flows to file system operation without sanitization - metadata: - cwe: CWE-22 - short-description: Path Traversal via uploaded file name - languages: [java] - mode: join - join: - refs: - - rule: java/lib/stirling-source.yaml#stirling-multipart-file-source - as: source - - rule: java/lib/generic/path-traversal-sinks.yaml#java-path-traversal-sink - as: sink - on: - - 'source.$UNTRUSTED -> sink.$UNTRUSTED' -""") - print("Phase 2b-c: Created custom rules") - - # ── Phase 3: Initial Scan ───────────────────────────────────── - - sarif_path = ws["results"] / "report-1.sarif" - ext_methods_path = ws["results"] / "external-methods-1.yaml" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin", str(ws["rules"])], - rule_ids=["stirling-path-traversal"], - external_methods=str(ext_methods_path), - severity=["note", "warning", "error"], - timeout=600, - ) - result.assert_ok("Initial scan failed") - - # Analyze results - sarif_data = load_sarif(sarif_path) - findings = sarif_findings_for_rule(sarif_data, "stirling-path-traversal") - print(f"Phase 3: Initial scan found {len(findings)} path-traversal findings") - - for f in findings[:5]: - locs = f.get("locations", [{}]) - if locs: - uri = locs[0].get("physicalLocation", {}).get("artifactLocation", {}).get("uri", "?") - line = locs[0].get("physicalLocation", {}).get("region", {}).get("startLine", "?") - print(f" Finding: {uri}:{line}") - - # ── Phase 3b: Analyze External Methods ──────────────────────── - - if ext_methods_path.exists(): - ext_data = load_external_methods(ext_methods_path) - wo_count, wr_count = count_external_methods(ext_data) - print(f"Phase 3b: External methods — {wo_count} without rules, {wr_count} with rules") - - # Identify methods the agent would want to model - without_rules = ext_data.get("withoutRules", []) - priority_methods = [ - m for m in without_rules - if m.get("callSites", 0) > 5 - ] - priority_methods.sort(key=lambda m: m.get("callSites", 0), reverse=True) - print(f" Priority unmodeled methods (>5 call sites): {len(priority_methods)}") - for m in priority_methods[:10]: - print(f" {m['method']} ({m['callSites']} call sites, positions: {m['factPositions']})") - - # ── Phase 4: Create Approximation and Rescan ────────────────── - # Approximations are ONLY for external methods (from withoutRules). - # These are library methods without source code in the project. - - # Create YAML approximation for top unmodeled external methods - if ext_methods_path.exists() and priority_methods: - pass_through_rules = [] - for m in priority_methods[:5]: - method_name = m["method"] - positions = m["factPositions"] - - # Simple heuristic: if taint is on arg(0), propagate to result - copies = [] - for pos in positions: - if pos.startswith("arg("): - copies.append({"from": pos, "to": "result"}) - elif pos == "this": - copies.append({"from": "this", "to": "result"}) - - if copies: - pass_through_rules.append({ - "function": method_name, - "copy": copies, - }) - - if pass_through_rules: - config_file = ws["config"] / "custom-propagators.yaml" - write_yaml(config_file, {"passThrough": pass_through_rules}) - print(f"Phase 4: Created {len(pass_through_rules)} custom passThrough rules") - - # Rescan with approximations - sarif_path_2 = ws["results"] / "report-2.sarif" - ext_methods_path_2 = ws["results"] / "external-methods-2.yaml" - - result2 = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path_2), - rulesets=["builtin", str(ws["rules"])], - rule_ids=["stirling-path-traversal"], - approximations_config=str(config_file), - external_methods=str(ext_methods_path_2), - severity=["note", "warning", "error"], - timeout=600, - ) - - if result2.ok: - sarif_data_2 = load_sarif(sarif_path_2) - findings_2 = sarif_findings_for_rule(sarif_data_2, "stirling-path-traversal") - print(f"Phase 4: Rescan found {len(findings_2)} findings (was {len(findings)})") - - if ext_methods_path_2.exists(): - ext_data_2 = load_external_methods(ext_methods_path_2) - wo2, wr2 = count_external_methods(ext_data_2) - print(f" External methods after approx: {wo2} without (was {wo_count}), {wr2} with (was {wr_count})") - - # Verify the approximations had some effect - delta_findings = len(findings_2) - len(findings) - delta_methods = wo_count - wo2 - print(f" Delta: {delta_findings:+d} findings, {delta_methods:+d} newly modeled methods") - - print("\n=== Full agent loop completed ===") -``` - ---- - -## 9. Running Tests - -### Quick validation (existing features only) - -```bash -cd agent-mode/test - -# Run only tests that work with current implementation -pytest test_build.py -m "not new_feature and not slow" -v - -# Run build tests including slow ones (actual scans) -pytest test_build.py -m "not new_feature" -v --timeout=900 -``` - -### Full test suite (after new features are implemented) - -```bash -cd agent-mode/test - -# Run all tests -pytest -v --timeout=900 - -# Run specific suite -pytest test_rules.py -v --timeout=900 -pytest test_approximations.py -v --timeout=900 -pytest test_external_methods.py -v --timeout=900 - -# Run the full integration loop -pytest test_full_loop.py -v --timeout=1800 - -# Exclude slow tests for quick checks -pytest -m "not slow" -v -``` - -### Development mode (direct JAR invocation) - -When `opentaint` is not on PATH, tests automatically fall back to invoking the analyzer/autobuilder JARs directly. Set environment variables if the JARs are in non-default locations: - -```bash -# Point to locally-built JARs -export JAVA_HOME=/path/to/java-21 -export OPENTAINT_ANALYZER_JAR=/path/to/opentaint-project-analyzer.jar -export OPENTAINT_AUTOBUILDER_JAR=/path/to/opentaint-project-auto-builder.jar - -pytest -v --timeout=900 -``` - -### CI Integration - -For CI, use a matrix of test suites to parallelize: - -```yaml -# .github/workflows/ci-agent-mode-tests.yaml -jobs: - test: - strategy: - matrix: - suite: [test_build, test_rules, test_approximations, test_external_methods] - steps: - - name: Run agent-mode tests - run: | - cd agent-mode/test - pytest ${{ matrix.suite }}.py -v --timeout=900 -m "not new_feature" -``` - ---- - -## Summary - -| Suite | Tests | Markers | Purpose | -|---|---|---|---| -| `test_build.py` | 5 | `slow` | Project build: pre-compiled, auto-compile, error handling | -| `test_rules.py` | 8 | `slow`, `new_feature` | Rule creation, rule-id filter, rule tests, custom rules on Stirling | -| `test_approximations.py` | 8 | `slow`, `new_feature` | YAML config, code-based approximations, compilation errors, combined | -| `test_external_methods.py` | 7 | `slow`, `new_feature` | External methods output, structure validation, coverage changes | -| `test_full_loop.py` | 1 | `slow`, `new_feature` | Full agent workflow: rule → test → scan → approx → rescan | -| **Total** | **29** | | | - -Tests marked `new_feature` will pass once the corresponding engine/CLI changes from `agent-mode-design.md` are implemented. Tests without that marker can run today against the existing codebase (using direct JAR invocation). diff --git a/agent-mode/test/conftest.py b/agent-mode/test/conftest.py deleted file mode 100644 index 4fcc07c9f..000000000 --- a/agent-mode/test/conftest.py +++ /dev/null @@ -1,497 +0,0 @@ -""" -Shared fixtures and helpers for agent-mode tests. - -All tests use the Go CLI binary (`opentaint`). In development mode, the binary -is located at `cli/bin/opentaint` relative to the repo root, and hidden -`--analyzer-jar` / `--autobuilder-jar` flags are passed automatically to point -at locally-built JARs. -""" - -import json -import os -import shutil -import subprocess -import tempfile -import time -from dataclasses import dataclass, field -from pathlib import Path -from typing import Optional - -import pytest -import yaml - - -# ─── Timing ────────────────────────────────────────────────────────────────── - - -@pytest.hookimpl(tryfirst=True) -def pytest_runtest_setup(item): - """Record start time before each test.""" - item._start_time = time.time() - - -@pytest.hookimpl(trylast=True) -def pytest_runtest_teardown(item, nextitem): - """Print elapsed time after each test.""" - start = getattr(item, "_start_time", None) - if start is not None: - elapsed = time.time() - start - print(f"\n [timing] {item.nodeid}: {elapsed:.1f}s") - - -# ─── Paths ─────────────────────────────────────────────────────────────────── - -STIRLING_PROJECT = Path("/home/sobol/data/Stirling-PDF/seqra-project/project.yaml") -STIRLING_PROJECT_DIR = STIRLING_PROJECT.parent -OPENTAINT_ROOT = Path(__file__).resolve().parent.parent.parent # -> opentaint/ -FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures" -BUILTIN_RULES_DIR = OPENTAINT_ROOT / "rules" / "ruleset" - - -# ─── CLI Resolution ────────────────────────────────────────────────────────── - - -def _find_cli_binary() -> str: - """ - Find the opentaint CLI binary. Resolution order: - 1. OPENTAINT_CLI env var - 2. Local dev build at cli/bin/opentaint - 3. opentaint on PATH - """ - env_cli = os.environ.get("OPENTAINT_CLI") - if env_cli: - p = Path(env_cli) - if p.exists(): - return str(p) - - dev_binary = OPENTAINT_ROOT / "cli" / "bin" / "opentaint" - if dev_binary.exists(): - return str(dev_binary) - - on_path = shutil.which("opentaint") - if on_path: - return on_path - - pytest.exit( - "opentaint CLI binary not found. Build it with: cd cli && go build -o ./bin/opentaint .", - returncode=1, - ) - - -def _find_local_jar(env_var: str, candidates: list) -> Optional[str]: - """Find a locally-built JAR by env var or candidate paths.""" - env_jar = os.environ.get(env_var) - if env_jar: - p = Path(env_jar) - if p.exists(): - return str(p) - - for c in candidates: - if c.exists(): - return str(c) - return None - - -def _find_analyzer_jar() -> Optional[str]: - """Find locally-built analyzer JAR for --analyzer-jar hidden flag.""" - return _find_local_jar( - "OPENTAINT_ANALYZER_JAR", - [ - OPENTAINT_ROOT - / "core" - / "build" - / "libs" - / "opentaint-project-analyzer.jar", - ], - ) - - -def _find_autobuilder_jar() -> Optional[str]: - """Find locally-built autobuilder JAR for --autobuilder-jar hidden flag.""" - return _find_local_jar( - "OPENTAINT_AUTOBUILDER_JAR", - [ - OPENTAINT_ROOT - / "autobuilder" - / "build" - / "libs" - / "opentaint-project-auto-builder.jar", - ], - ) - - -# ─── CLI Abstraction ───────────────────────────────────────────────────────── - - -@dataclass -class CLIResult: - """Result of a CLI command execution.""" - - returncode: int - stdout: str - stderr: str - command: list - - @property - def ok(self) -> bool: - return self.returncode == 0 - - def assert_ok(self, msg: str = ""): - assert self.ok, ( - f"Command failed (rc={self.returncode}){': ' + msg if msg else ''}\n" - f" cmd: {' '.join(str(c) for c in self.command)}\n" - f" stderr: {self.stderr[:2000]}" - ) - - def assert_failed(self, msg: str = ""): - assert not self.ok, ( - f"Command unexpectedly succeeded{': ' + msg if msg else ''}\n" - f" cmd: {' '.join(str(c) for c in self.command)}\n" - f" stdout: {self.stdout[:2000]}" - ) - - -@dataclass -class OpenTaintCLI: - """ - Abstraction over the opentaint Go CLI binary. - - All commands go through the CLI. In dev mode, hidden --analyzer-jar and - --autobuilder-jar flags are passed to point at locally-built JARs. - """ - - cli_path: str = "" - analyzer_jar: Optional[str] = None - autobuilder_jar: Optional[str] = None - timeout: int = 600 # seconds - - def _base_cmd(self) -> list: - """Return the base command with hidden JAR flags if set.""" - cmd = [self.cli_path] - if self.analyzer_jar: - cmd.extend(["--analyzer-jar", self.analyzer_jar]) - if self.autobuilder_jar: - cmd.extend(["--autobuilder-jar", self.autobuilder_jar]) - return cmd - - def run( - self, args: list, timeout: Optional[int] = None, env: Optional[dict] = None - ) -> CLIResult: - """Run an arbitrary command and return the result.""" - str_args = [str(a) for a in args] - run_env = {**os.environ, **(env or {})} - t = timeout or self.timeout - try: - proc = subprocess.run( - str_args, - capture_output=True, - text=True, - timeout=t, - env=run_env, - ) - return CLIResult(proc.returncode, proc.stdout, proc.stderr, str_args) - except subprocess.TimeoutExpired: - return CLIResult(-1, "", f"Timeout after {t}s", str_args) - - def scan( - self, - project_path: str, - output: str, - rulesets: list = None, - rule_ids: list = None, - approximations_config: Optional[str] = None, - dataflow_approximations: Optional[str] = None, - track_external_methods: bool = False, - severity: list = None, - timeout: int = 900, - max_memory: str = "8G", - extra_flags: list = None, - ) -> CLIResult: - """Run opentaint scan. - - If ``project_path`` points at a pre-compiled project model (either the - directory containing ``project.yaml`` or the ``project.yaml`` file - itself), the scan is invoked with ``--project-model ``. Otherwise - the path is forwarded as a source project. - """ - p = Path(project_path) - if p.name == "project.yaml" and p.is_file(): - p = p.parent - if p.is_dir() and (p / "project.yaml").is_file(): - cmd = self._base_cmd() + ["scan", "-o", output, "--project-model", str(p)] - else: - cmd = self._base_cmd() + ["scan", str(p), "-o", output] - for rs in rulesets or ["builtin"]: - cmd.extend(["--ruleset", rs]) - for rid in rule_ids or []: - cmd.extend(["--rule-id", rid]) - if approximations_config: - cmd.extend(["--approximations-config", approximations_config]) - if dataflow_approximations: - cmd.extend(["--dataflow-approximations", dataflow_approximations]) - if track_external_methods: - cmd.append("--track-external-methods") - for sev in severity or ["warning", "error"]: - cmd.extend(["--severity", sev]) - cmd.extend(["--timeout", f"{timeout}s", "--max-memory", max_memory]) - cmd.extend(extra_flags or []) - return self.run(cmd, timeout=timeout + 60) - - def test_rules( - self, - project_path: str, - rulesets: list, - output_dir: str, - timeout: int = 300, - max_memory: str = "8G", - ) -> CLIResult: - """Run opentaint agent test-rules. - - The CLI expects a directory path (it looks for project.yaml inside). - If project_path points to a project.yaml file, the parent directory is used. - """ - p = Path(project_path) - if p.name == "project.yaml" and p.is_file(): - project_path = str(p.parent) - cmd = self._base_cmd() + ["agent", "test-rules", project_path] - for rs in rulesets: - cmd.extend(["--ruleset", rs]) - cmd.extend(["-o", output_dir]) - cmd.extend(["--timeout", f"{timeout}s", "--max-memory", max_memory]) - return self.run(cmd, timeout=timeout + 60) - - def compile( - self, - project_path: str, - output_dir: str, - timeout: int = 300, - ) -> CLIResult: - """Run opentaint compile.""" - cmd = self._base_cmd() + ["compile", project_path, "-o", output_dir] - return self.run(cmd, timeout=timeout + 60) - - def rules_path(self) -> CLIResult: - """Run opentaint agent rules-path.""" - return self.run(self._base_cmd() + ["agent", "rules-path"]) - - def init_test_project( - self, - output_dir: str, - dependencies: list = None, - ) -> CLIResult: - """Run opentaint agent init-test-project.""" - cmd = self._base_cmd() + ["agent", "init-test-project", output_dir] - for dep in dependencies or []: - cmd.extend(["--dependency", dep]) - return self.run(cmd) - - -# ─── Fixtures ───────────────────────────────────────────────────────────────── - - -@pytest.fixture(scope="session") -def cli() -> OpenTaintCLI: - """Provide an OpenTaintCLI instance configured for the current environment.""" - return OpenTaintCLI( - cli_path=_find_cli_binary(), - analyzer_jar=_find_analyzer_jar(), - autobuilder_jar=_find_autobuilder_jar(), - ) - - -@pytest.fixture(scope="session") -def stirling_project() -> Path: - """Path to the Stirling-PDF project.yaml.""" - assert STIRLING_PROJECT.exists(), ( - f"Stirling-PDF project not found at {STIRLING_PROJECT}" - ) - return STIRLING_PROJECT - - -@pytest.fixture -def tmp_output(tmp_path) -> Path: - """Provide a temporary output directory for test results.""" - return tmp_path - - -@pytest.fixture(scope="session") -def builtin_rules() -> Path: - """Path to the built-in rules directory.""" - assert BUILTIN_RULES_DIR.exists(), f"Builtin rules not found at {BUILTIN_RULES_DIR}" - return BUILTIN_RULES_DIR - - -# ─── Helpers ────────────────────────────────────────────────────────────────── - - -def load_sarif(path: Path) -> dict: - """Load and validate a SARIF file.""" - assert path.exists(), f"SARIF file not found: {path}" - with open(path) as f: - data = json.load(f) - assert data.get("version") == "2.1.0", "Not a valid SARIF 2.1.0 file" - assert "runs" in data and len(data["runs"]) > 0, "SARIF has no runs" - return data - - -def sarif_results(data: dict) -> list: - """Extract results from a SARIF report.""" - return data["runs"][0].get("results", []) - - -def sarif_rule_ids(data: dict) -> set: - """Extract unique rule IDs from SARIF results.""" - return {r["ruleId"] for r in sarif_results(data)} - - -def sarif_findings_for_rule(data: dict, rule_id: str) -> list: - """Get findings for a specific rule ID. - - Matches both exact ID and semgrep-style dot-separated ID (e.g. - 'stirling-path-traversal' matches 'java.security.stirling-path-traversal'). - """ - return [ - r - for r in sarif_results(data) - if r["ruleId"] == rule_id or r["ruleId"].endswith("." + rule_id) - ] - - -def _derive_external_methods_paths(sarif_path: Path) -> tuple: - """Return the two fixed external-methods file paths next to the SARIF. - - The analyzer always writes ``dropped-external-methods.yaml`` and - ``approximated-external-methods.yaml`` into its output directory. Here we - key off the SARIF path (or its parent directory), matching how the - Go CLI routes ``-o`` to ``--output-dir``. - """ - parent = sarif_path if sarif_path.is_dir() else sarif_path.parent - return ( - parent / "dropped-external-methods.yaml", - parent / "approximated-external-methods.yaml", - ) - - -def load_external_methods(sarif_path: Path) -> dict: - """Load external methods from the two fixed files next to the SARIF. - - Returns ``{"withoutRules": [...], "withRules": [...]}``. - """ - wo_path, wr_path = _derive_external_methods_paths(sarif_path) - - without_rules = [] - with_rules = [] - - def _read(path: Path) -> list: - if not path.exists(): - return [] - with open(path) as f: - data = yaml.safe_load(f) - if data is None: - return [] - # Analyzer writes a top-level YAML list; tolerate {methods: [...]} too. - if isinstance(data, list): - return data - if isinstance(data, dict): - return data.get("methods", []) - return [] - - without_rules = _read(wo_path) - with_rules = _read(wr_path) - - assert wo_path.exists() or wr_path.exists(), ( - f"Neither external methods file found: {wo_path}, {wr_path}" - ) - return {"withoutRules": without_rules, "withRules": with_rules} - - -def external_methods_exist(sarif_path: Path) -> bool: - """Check if at least one external-methods file exists next to the SARIF.""" - wo_path, wr_path = _derive_external_methods_paths(sarif_path) - return wo_path.exists() or wr_path.exists() - - -def count_external_methods(data: dict) -> tuple: - """Return (without_rules_count, with_rules_count).""" - without = len(data.get("withoutRules", [])) - with_rules = len(data.get("withRules", [])) - return without, with_rules - - -def write_yaml(path: Path, content: dict): - """Write a YAML file.""" - path.parent.mkdir(parents=True, exist_ok=True) - with open(path, "w") as f: - yaml.dump(content, f, default_flow_style=False, sort_keys=False) - - -def write_text(path: Path, content: str): - """Write a text file.""" - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content) - - -import re - - -def parse_analyzer_timing(output: str) -> dict: - """ - Parse timing information from CLI/analyzer output. - - Looks for patterns like: - - "Compiling project model" (Go CLI spinner phase) - - "Analyzing project" (Go CLI spinner phase) - - "Start IFDS analysis" / "Finish IFDS analysis" (analyzer log) - - "Analysis done in " (IFDS elapsed) - - "Start SARIF report generation" / "Finish SARIF report" - - "Start vulnerability confirmation" - - "Start trace generation" / "Finish trace generation" - - Returns a dict with discovered timing info (best-effort, may be empty - if the analyzer doesn't log at info level to stdout). - """ - timing = {} - - # Look for IFDS analysis elapsed time: "Analysis done in 12.345s" or "Analysis done in 1m 23s" etc. - m = re.search(r"Analysis done in (.+?)$", output, re.MULTILINE) - if m: - timing["ifds_elapsed"] = m.group(1).strip() - - # Count phase markers - phases = [ - ("ifds_start", r"Start IFDS analysis"), - ("ifds_finish", r"Finish IFDS analysis"), - ("sarif_start", r"Start SARIF report generation"), - ("sarif_finish", r"Finish SARIF report"), - ("vuln_confirm", r"Start vulnerability confirmation"), - ("trace_gen_start", r"Start trace generation"), - ("trace_gen_finish", r"Finish trace generation"), - ("se_start", r"Start SE for project"), - ("se_finish", r"Finish SE for project"), - ] - for key, pattern in phases: - if re.search(pattern, output): - timing[key] = True - - # Total vulnerabilities count - m = re.search(r"Total vulnerabilities:\s*(\d+)", output) - if m: - timing["total_vulnerabilities"] = int(m.group(1)) - - return timing - - -def print_timing_breakdown(label: str, result: "CLIResult"): - """Print a timing breakdown from CLI output if available.""" - timing = parse_analyzer_timing(result.stdout + result.stderr) - if timing: - parts = [] - if "ifds_elapsed" in timing: - parts.append(f"IFDS: {timing['ifds_elapsed']}") - if "total_vulnerabilities" in timing: - parts.append(f"vulns: {timing['total_vulnerabilities']}") - detected_phases = [k for k, v in timing.items() if v is True] - if detected_phases: - parts.append(f"phases: {', '.join(detected_phases)}") - if parts: - print(f" [timing:{label}] {' | '.join(parts)}") diff --git a/agent-mode/test/fixtures/approximations/java/PdfBoxDocumentApprox.java b/agent-mode/test/fixtures/approximations/java/PdfBoxDocumentApprox.java deleted file mode 100644 index fdd16d141..000000000 --- a/agent-mode/test/fixtures/approximations/java/PdfBoxDocumentApprox.java +++ /dev/null @@ -1,50 +0,0 @@ -package agent.approximations; - -import org.opentaint.ir.approximation.annotation.ApproximateByName; -import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; -import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; - -/** - * Code-based approximation for PDFBox's PDDocument class. - * - * IMPORTANT: Approximations are ONLY applicable to external methods — - * library classes whose source code is NOT part of the project being analyzed. - * PDFBox is an external dependency of Stirling-PDF (pdfbox-3.0.6.jar). - * - * This models complex taint propagation through PDDocument methods that - * involve internal state and cannot be expressed with simple YAML passThrough. - * - * PDDocument.save(OutputStream) — taint on the document (this) flows to - * the output stream, modeling the case where a tainted PDF is serialized. - */ -@ApproximateByName("org.apache.pdfbox.pdmodel.PDDocument") -public class PdfBoxDocumentApprox { - - /** - * Model save(OutputStream) — taint on this flows to arg(0). - * A tainted document writes tainted bytes to the output stream. - */ - public void save(java.io.OutputStream output) throws java.io.IOException { - org.apache.pdfbox.pdmodel.PDDocument self = - (org.apache.pdfbox.pdmodel.PDDocument) (Object) this; - if (OpentaintNdUtil.nextBool()) { - throw new java.io.IOException("approximation: failure path"); - } - // Model: taint from document flows to output stream - byte[] data = new byte[1]; - output.write(data); - } - - /** - * Model getPage(int) — taint on this flows to result. - * A tainted document produces tainted pages. - */ - public Object getPage(int pageIndex) { - org.apache.pdfbox.pdmodel.PDDocument self = - (org.apache.pdfbox.pdmodel.PDDocument) (Object) this; - if (OpentaintNdUtil.nextBool()) { - return null; - } - return self.getPages().get(pageIndex); - } -} diff --git a/agent-mode/test/fixtures/approximations/yaml/custom-propagators.yaml b/agent-mode/test/fixtures/approximations/yaml/custom-propagators.yaml deleted file mode 100644 index 6b475284a..000000000 --- a/agent-mode/test/fixtures/approximations/yaml/custom-propagators.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# Custom passThrough rules for external library methods encountered by the engine. -# -# IMPORTANT: Approximations (both YAML and code-based) are ONLY applicable to -# external methods — library classes whose source code is NOT part of the project. -# Project classes with source code are analyzed directly by the engine. -# -# These methods would appear in the external-methods.yaml output under -# withoutRules, indicating the engine has no propagation model for them. - -passThrough: - # org.apache.pdfbox.pdmodel.PDDocument#getPage — taint on this flows to result - # PDFBox is an external dependency of Stirling-PDF - - function: org.apache.pdfbox.pdmodel.PDDocument#getPage - copy: - - from: this - to: result - - # org.apache.pdfbox.text.PDFTextStripper#getText — taint on arg(0) flows to result - # Extracts text from a tainted document - - function: org.apache.pdfbox.text.PDFTextStripper#getText - copy: - - from: arg(0) - to: result - - # com.fasterxml.jackson.databind.ObjectMapper#readValue — taint flows through deserialization - # Jackson is an external dependency - - function: com.fasterxml.jackson.databind.ObjectMapper#readValue - copy: - - from: arg(0) - to: result - - # org.jsoup.Jsoup#parse — taint on arg(0) flows to result - # Jsoup is an external dependency used for HTML parsing - - function: org.jsoup.Jsoup#parse - copy: - - from: arg(0) - to: result diff --git a/agent-mode/test/fixtures/rules/java/lib/stirling-source.yaml b/agent-mode/test/fixtures/rules/java/lib/stirling-source.yaml deleted file mode 100644 index 3b3d04763..000000000 --- a/agent-mode/test/fixtures/rules/java/lib/stirling-source.yaml +++ /dev/null @@ -1,10 +0,0 @@ -rules: - - id: stirling-multipart-file-source - options: - lib: true - severity: NOTE - message: Untrusted multipart file data from Spring controller - languages: [java] - patterns: - - pattern: | - $RETURNTYPE $METHOD(..., @RequestParam MultipartFile $UNTRUSTED, ...) { ... } diff --git a/agent-mode/test/fixtures/rules/java/security/stirling-path-traversal.yaml b/agent-mode/test/fixtures/rules/java/security/stirling-path-traversal.yaml deleted file mode 100644 index da645e1a5..000000000 --- a/agent-mode/test/fixtures/rules/java/security/stirling-path-traversal.yaml +++ /dev/null @@ -1,18 +0,0 @@ -rules: - - id: stirling-path-traversal - severity: ERROR - message: >- - User-uploaded file name flows to file system operation without sanitization - metadata: - cwe: CWE-22 - short-description: Path Traversal via uploaded file name - languages: [java] - mode: join - join: - refs: - - rule: java/lib/stirling-source.yaml#stirling-multipart-file-source - as: source - - rule: java/lib/generic/path-traversal-sinks.yaml#java-path-traversal-sinks - as: sink - on: - - 'source.$UNTRUSTED -> sink.$UNTRUSTED' diff --git a/agent-mode/test/fixtures/test-samples/src/main/java/test/PathTraversalTest.java b/agent-mode/test/fixtures/test-samples/src/main/java/test/PathTraversalTest.java deleted file mode 100644 index 94c96ff51..000000000 --- a/agent-mode/test/fixtures/test-samples/src/main/java/test/PathTraversalTest.java +++ /dev/null @@ -1,38 +0,0 @@ -package test; - -import org.opentaint.sast.test.util.PositiveRuleSample; -import org.opentaint.sast.test.util.NegativeRuleSample; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; -import org.springframework.web.multipart.MultipartFile; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; - -@RestController -public class PathTraversalTest { - - @PositiveRuleSample(value = "java/security/stirling-path-traversal.yaml", id = "stirling-path-traversal") - @PostMapping("/upload-vulnerable") - public String vulnerable(@RequestParam MultipartFile file) throws IOException { - // Directly use original filename — path traversal possible - String filename = file.getOriginalFilename(); - Path dest = Paths.get("/uploads/" + filename); - Files.copy(file.getInputStream(), dest); - return "uploaded"; - } - - @NegativeRuleSample(value = "java/security/stirling-path-traversal.yaml", id = "stirling-path-traversal") - @PostMapping("/upload-safe") - public String safe(@RequestParam MultipartFile file) throws IOException { - // Use sanitized filename — only the base name, no path components - String filename = new File(file.getOriginalFilename()).getName(); - Path dest = Paths.get("/uploads/").resolve(filename); - Files.copy(file.getInputStream(), dest); - return "uploaded"; - } -} diff --git a/agent-mode/test/pytest.ini b/agent-mode/test/pytest.ini deleted file mode 100644 index cc24b35fb..000000000 --- a/agent-mode/test/pytest.ini +++ /dev/null @@ -1,5 +0,0 @@ -[pytest] -testpaths = . -markers = - new_feature: Tests for features not yet implemented (deselect with -m "not new_feature") - slow: Tests that run full analysis (>60s) diff --git a/agent-mode/test/test_approximations.py b/agent-mode/test/test_approximations.py deleted file mode 100644 index 6f911c151..000000000 --- a/agent-mode/test/test_approximations.py +++ /dev/null @@ -1,358 +0,0 @@ -""" -Suite 3: Approximations Generation/Override - -Approximations (both YAML passThrough and code-based) are ONLY applicable to -external methods — library classes whose source code is NOT part of the analyzed -project. The agent discovers which methods need approximations via the ---external-methods output (withoutRules section). - -Tests: -3.1 Scan with --approximations-config (YAML passThrough for external library methods) -3.2 Scan with --approximations-config + --ruleset together (§1.2) -3.3 Scan with --dataflow-approximations from .java sources (auto-compile, §1.4) -3.4 Approximation compilation failure handling (bad Java source) -3.5 Duplicate approximation targeting built-in class (error) -3.6 Scan with both --approximations-config and --dataflow-approximations -3.7 Verify approximation changes analysis results -3.8 Invalid YAML config error handling -""" - -import pytest -from pathlib import Path -from conftest import ( - OpenTaintCLI, - load_sarif, - sarif_results, - sarif_rule_ids, - sarif_findings_for_rule, - write_text, - write_yaml, - FIXTURES_DIR, - BUILTIN_RULES_DIR, -) - - -class TestYAMLApproximationsConfig: - """3.1-3.2: YAML passThrough config for external library methods.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_scan_with_approximations_config( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Scan with --approximations-config applies custom passThrough rules - for external library methods (PDFBox, Jackson, etc.). - """ - sarif_path = tmp_output / "report.sarif" - config_path = ( - FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" - ) - - if not config_path.exists(): - pytest.skip("Fixture approximation config not created yet") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - approximations_config=str(config_path), - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with --approximations-config failed") - data = load_sarif(sarif_path) - assert len(sarif_results(data)) >= 0 - - @pytest.mark.slow - @pytest.mark.new_feature - def test_approximations_config_with_custom_ruleset( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - --approximations-config and --ruleset can be used together (§1.2). - Previously these were mutually exclusive. - """ - sarif_path = tmp_output / "report.sarif" - config_path = ( - FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" - ) - custom_rules = FIXTURES_DIR / "rules" - - if not config_path.exists() or not custom_rules.exists(): - pytest.skip("Fixture files not created yet") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin", str(custom_rules)], - rule_ids=[ - "java/security/stirling-path-traversal.yaml:stirling-path-traversal" - ], - approximations_config=str(config_path), - severity=["note", "warning", "error"], - timeout=600, - ) - result.assert_ok( - "Scan with both --approximations-config and --ruleset failed. " - "These should work together per design §1.2" - ) - - @pytest.mark.slow - @pytest.mark.new_feature - def test_invalid_approximations_config_errors( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """Invalid YAML config should produce a clear error.""" - sarif_path = tmp_output / "report.sarif" - bad_config = tmp_output / "bad-config.yaml" - write_text(bad_config, "this is not: [valid: yaml: config") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - approximations_config=str(bad_config), - timeout=120, - ) - result.assert_failed("Scan should fail with invalid approximations config") - # Verify the error mentions config/yaml/parse/fail (may be in stdout or stderr) - combined_output = (result.stdout + result.stderr).lower() - assert any( - kw in combined_output - for kw in ["config", "yaml", "parse", "error", "failed", "fail"] - ), ( - f"Error output should mention config/yaml/parse/fail.\n" - f" stdout: {result.stdout[:500]}\n" - f" stderr: {result.stderr[:500]}" - ) - - -class TestCodeBasedApproximations: - """3.3-3.5: Code-based approximations via --dataflow-approximations.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_scan_with_java_source_approximations( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - --dataflow-approximations with .java source files auto-compiles them. - The approximation targets PDFBox's PDDocument (an external library class). - """ - sarif_path = tmp_output / "report.sarif" - approx_dir = FIXTURES_DIR / "approximations" / "java" - - if not approx_dir.exists(): - pytest.skip("Fixture approximation source not created yet") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - dataflow_approximations=str(approx_dir), - severity=["warning", "error"], - timeout=600, - ) - if result.ok: - data = load_sarif(sarif_path) - print( - f"Scan with code-based approximations: {len(sarif_results(data))} findings" - ) - - @pytest.mark.slow - @pytest.mark.new_feature - def test_approximation_compilation_failure( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - --dataflow-approximations with invalid Java source should fail - with compilation errors. The Go CLI auto-compiles .java files - using javac before passing them to the analyzer. - """ - sarif_path = tmp_output / "report.sarif" - bad_approx_dir = tmp_output / "bad-approximations" - bad_approx_dir.mkdir() - write_text( - bad_approx_dir / "BrokenApprox.java", - """\ -package agent.approximations; - -import org.opentaint.ir.approximation.annotation.Approximate; - -@Approximate(com.nonexistent.library.DoesNotExist.class) -public class BrokenApprox { - public void broken() { - com.nonexistent.library.DoesNotExist x = null; - } -} -""", - ) - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - dataflow_approximations=str(bad_approx_dir), - timeout=120, - ) - result.assert_failed("Scan should fail when approximation compilation fails") - combined_output = (result.stdout + result.stderr).lower() - assert ( - "compil" in combined_output - or "javac" in combined_output - or "error" in combined_output - ), ( - f"Error should mention compilation failure.\n" - f" stdout: {result.stdout[:500]}\n" - f" stderr: {result.stderr[:500]}" - ) - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - dataflow_approximations=str(bad_approx_dir), - timeout=120, - ) - result.assert_failed( - "Scan should fail when approximation directory has no .class files" - ) - combined_output = (result.stdout + result.stderr).lower() - assert ( - "compil" in combined_output - or ".class" in combined_output - or ".java" in combined_output - ), ( - f"Error should mention compilation or .class/.java files.\n" - f" stdout: {result.stdout[:500]}\n" - f" stderr: {result.stderr[:500]}" - ) - - @pytest.mark.slow - @pytest.mark.new_feature - def test_duplicate_approximation_errors( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - A custom approximation targeting a class that already has a built-in - approximation should produce an error (bijection violation). - - This test requires a pre-compiled fixture class file. If the fixture - directory doesn't contain .class files, the test is skipped. - """ - dup_approx_fixture = FIXTURES_DIR / "approximations" / "duplicate" - if not dup_approx_fixture.exists() or not list( - dup_approx_fixture.rglob("*.class") - ): - pytest.skip( - "Duplicate approximation fixture not available — " - "requires pre-compiled .class file with @Approximate targeting a builtin class" - ) - - sarif_path = tmp_output / "report.sarif" - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - dataflow_approximations=str(dup_approx_fixture), - timeout=300, - ) - result.assert_failed("Duplicate approximation should produce an error") - - -class TestCombinedApproximations: - """3.6-3.7: Combining YAML config + code-based approximations.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_scan_with_both_approximation_types( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Both --approximations-config and --dataflow-approximations can be - used in the same scan. - """ - sarif_path = tmp_output / "report.sarif" - yaml_config = ( - FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" - ) - java_approx = FIXTURES_DIR / "approximations" / "java" - custom_rules = FIXTURES_DIR / "rules" - - if not yaml_config.exists() or not java_approx.exists(): - pytest.skip("Fixture files not created yet") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin", str(custom_rules)], - rule_ids=[ - "java/security/stirling-path-traversal.yaml:stirling-path-traversal" - ], - approximations_config=str(yaml_config), - dataflow_approximations=str(java_approx), - severity=["note", "warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with combined approximation types failed") - - @pytest.mark.slow - @pytest.mark.new_feature - def test_approximations_change_results( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Running the same scan with and without custom approximations should - produce different results — the approximations add propagation paths. - """ - custom_rules = FIXTURES_DIR / "rules" - yaml_config = ( - FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" - ) - - if not custom_rules.exists() or not yaml_config.exists(): - pytest.skip("Fixture files not created yet") - - # Run 1: without approximations - sarif_no_approx = tmp_output / "no-approx" / "report.sarif" - (tmp_output / "no-approx").mkdir() - r1 = cli.scan( - project_path=str(stirling_project), - output=str(sarif_no_approx), - rulesets=["builtin", str(custom_rules)], - rule_ids=[ - "java/security/stirling-path-traversal.yaml:stirling-path-traversal" - ], - severity=["note", "warning", "error"], - timeout=600, - ) - - # Run 2: with approximations - sarif_with_approx = tmp_output / "with-approx" / "report.sarif" - (tmp_output / "with-approx").mkdir() - r2 = cli.scan( - project_path=str(stirling_project), - output=str(sarif_with_approx), - rulesets=["builtin", str(custom_rules)], - rule_ids=[ - "java/security/stirling-path-traversal.yaml:stirling-path-traversal" - ], - approximations_config=str(yaml_config), - severity=["note", "warning", "error"], - timeout=600, - ) - - r1.assert_ok("Scan without approximations failed") - r2.assert_ok("Scan with approximations failed") - - data1 = load_sarif(sarif_no_approx) - data2 = load_sarif(sarif_with_approx) - count1 = len(sarif_results(data1)) - count2 = len(sarif_results(data2)) - print(f"Without approximations: {count1} findings") - print(f"With approximations: {count2} findings") - assert count1 != count2, ( - f"Approximations had no effect on results — both runs produced {count1} findings. " - "Custom passThrough rules should change dataflow propagation." - ) diff --git a/agent-mode/test/test_build.py b/agent-mode/test/test_build.py deleted file mode 100644 index 60219dd8d..000000000 --- a/agent-mode/test/test_build.py +++ /dev/null @@ -1,152 +0,0 @@ -""" -Suite 1: Project Build Scenarios - -Tests: -1.1 Scan with pre-compiled project model (project.yaml) -1.2 Scan with source project (triggers auto-compile) -1.3 Compile-only (autobuilder) -1.4 Scan with invalid project path (error handling) -1.5 Scan with custom output directory -""" - -import pytest -from pathlib import Path -from conftest import ( - OpenTaintCLI, - load_sarif, - sarif_results, - sarif_rule_ids, - STIRLING_PROJECT_DIR, - BUILTIN_RULES_DIR, -) - - -class TestScanPreCompiledProject: - """1.1: Scan using the pre-compiled Stirling-PDF project model.""" - - @pytest.mark.slow - def test_scan_with_builtin_rules( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """Basic scan with builtin rules produces a valid SARIF with findings.""" - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with builtin rules failed") - - # Validate SARIF output - data = load_sarif(sarif_path) - results = sarif_results(data) - assert len(results) > 0, ( - "Scan produced no findings — expected some on Stirling-PDF" - ) - - # Should contain known vulnerability types - rule_ids = sarif_rule_ids(data) - print(f"Found {len(results)} findings across rules: {rule_ids}") - - @pytest.mark.slow - def test_scan_with_custom_ruleset_directory( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """Scan with a custom ruleset directory works alongside builtin.""" - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=[str(BUILTIN_RULES_DIR)], - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with custom ruleset directory failed") - data = load_sarif(sarif_path) - assert len(sarif_results(data)) > 0 - - @pytest.mark.slow - def test_scan_severity_filter_note( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """Scan with severity=note should include more findings.""" - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - severity=["note", "warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with note severity failed") - - -class TestScanFromSourceProject: - """1.2: Scan from source (auto-compiles via autobuilder first).""" - - @pytest.mark.slow - def test_scan_from_source_directory(self, cli: OpenTaintCLI, tmp_output: Path): - """ - Scan the Stirling-PDF source directory (not pre-compiled). - This triggers auto-compilation via autobuilder. - """ - sarif_path = tmp_output / "report.sarif" - source_dir = STIRLING_PROJECT_DIR / "sources" - - if not source_dir.exists(): - pytest.skip("Stirling-PDF source directory not available") - - result = cli.scan( - project_path=str(source_dir), - output=str(sarif_path), - rulesets=["builtin"], - timeout=900, - ) - if result.ok: - data = load_sarif(sarif_path) - assert len(sarif_results(data)) > 0 - - -class TestCompileOnly: - """1.3: Test the compile command separately.""" - - @pytest.mark.slow - def test_compile_source_project(self, cli: OpenTaintCLI, tmp_output: Path): - """Compile a source project into a project model.""" - source_dir = STIRLING_PROJECT_DIR / "sources" - model_dir = tmp_output / "project-model" - - if not source_dir.exists(): - pytest.skip("Stirling-PDF source directory not available") - - result = cli.compile( - project_path=str(source_dir), - output_dir=str(model_dir), - timeout=300, - ) - if result.ok: - project_yaml = model_dir / "project.yaml" - assert project_yaml.exists(), "compile did not produce project.yaml" - - -class TestErrorHandling: - """1.4: Error handling for invalid inputs.""" - - def test_scan_nonexistent_project(self, cli: OpenTaintCLI, tmp_output: Path): - """Scan with nonexistent project path should fail gracefully.""" - sarif_path = tmp_output / "report.sarif" - result = cli.scan( - project_path="/nonexistent/project/path", - output=str(sarif_path), - ) - result.assert_failed("Scan should fail for nonexistent project") - - def test_scan_missing_output_flag(self, cli: OpenTaintCLI, stirling_project: Path): - """Scan without -o flag should fail (it's required).""" - result = cli.run([cli.cli_path, "scan", str(stirling_project)]) - result.assert_failed("Scan should require -o flag") diff --git a/agent-mode/test/test_external_methods.py b/agent-mode/test/test_external_methods.py deleted file mode 100644 index 0d46530e8..000000000 --- a/agent-mode/test/test_external_methods.py +++ /dev/null @@ -1,260 +0,0 @@ -""" -Suite 4: External Methods Extraction - -Tests: -4.1 Scan with --external-methods produces two YAML files (without-rules / with-rules) -4.2 External methods files have correct structure (methods list with method, signature, factPositions, callSites) -4.3 External methods contain expected fields -4.4 without-rules list is non-empty for a real project -4.5 with-rules list contains known standard library methods -4.6 Scan with custom approximations reduces without-rules count -4.7 External methods extraction alongside SARIF output -""" - -import pytest -from pathlib import Path -from conftest import ( - OpenTaintCLI, - load_sarif, - sarif_results, - load_external_methods, - count_external_methods, - external_methods_exist, - FIXTURES_DIR, - BUILTIN_RULES_DIR, -) - - -class TestExternalMethodsBasic: - """4.1-4.3: Basic external methods output.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_scan_produces_external_methods_file( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """--track-external-methods produces YAML files alongside SARIF output.""" - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - track_external_methods=True, - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with --track-external-methods failed") - assert external_methods_exist(sarif_path), ( - "External methods files not produced" - ) - - @pytest.mark.slow - @pytest.mark.new_feature - def test_external_methods_structure( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - External methods are split into two files (-without-rules.yaml and -with-rules.yaml). - Each entry has: method, signature, factPositions, callSites. - """ - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - track_external_methods=True, - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok() - - data = load_external_methods(sarif_path) - - for section_name in ["withoutRules", "withRules"]: - section = data.get(section_name, []) - for entry in section[:5]: - assert "method" in entry, f"Entry in {section_name} missing 'method'" - assert "signature" in entry, ( - f"Entry in {section_name} missing 'signature'" - ) - assert "factPositions" in entry, ( - f"Entry in {section_name} missing 'factPositions'" - ) - assert "callSites" in entry, ( - f"Entry in {section_name} missing 'callSites'" - ) - - assert "#" in entry["method"], ( - f"Method should be in Class#method format: {entry['method']}" - ) - - assert isinstance(entry["factPositions"], list), ( - f"factPositions should be a list: {entry['factPositions']}" - ) - - assert isinstance(entry["callSites"], int) and entry["callSites"] > 0, ( - f"callSites should be a positive integer: {entry['callSites']}" - ) - - for pos in entry["factPositions"]: - assert pos == "" or pos == "ret" or pos.startswith("arg("), ( - f"Invalid fact position: {pos} — expected '', 'ret', or 'arg(N)'" - ) - - -class TestExternalMethodsContent: - """4.4-4.5: External methods content validation.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_without_rules_nonempty_for_real_project( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Stirling-PDF uses many libraries without built-in approximations. - The withoutRules list should be non-empty. - """ - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - track_external_methods=True, - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok() - - data = load_external_methods(sarif_path) - without_count, with_count = count_external_methods(data) - print( - f"External methods: {without_count} without rules, {with_count} with rules" - ) - - assert without_count > 0, "Expected non-empty withoutRules for Stirling-PDF" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_with_rules_contains_standard_library_methods( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - The withRules section should contain standard library methods that - have built-in approximations. - """ - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - track_external_methods=True, - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok() - - data = load_external_methods(sarif_path) - with_rules = data.get("withRules", []) - with_rules_methods = {e["method"] for e in with_rules} - - print(f"Methods with rules ({len(with_rules_methods)}):") - for m in sorted(list(with_rules_methods))[:20]: - print(f" - {m}") - - -class TestExternalMethodsWithApproximations: - """4.6: Custom approximations reduce withoutRules count.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_approximations_reduce_without_rules( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Adding custom passThrough rules for methods that were in withoutRules - should move them to withRules. - """ - yaml_config = ( - FIXTURES_DIR / "approximations" / "yaml" / "custom-propagators.yaml" - ) - - if not yaml_config.exists(): - pytest.skip("Fixture approximation config not created yet") - - # Run 1: without custom approximations - sarif1 = tmp_output / "run1" / "report.sarif" - (tmp_output / "run1").mkdir() - r1 = cli.scan( - project_path=str(stirling_project), - output=str(sarif1), - rulesets=["builtin"], - track_external_methods=True, - severity=["warning", "error"], - timeout=600, - ) - - # Run 2: with custom approximations - sarif2 = tmp_output / "run2" / "report.sarif" - (tmp_output / "run2").mkdir() - r2 = cli.scan( - project_path=str(stirling_project), - output=str(sarif2), - rulesets=["builtin"], - approximations_config=str(yaml_config), - track_external_methods=True, - severity=["warning", "error"], - timeout=600, - ) - - if r1.ok and r2.ok: - data1 = load_external_methods(sarif1) - data2 = load_external_methods(sarif2) - wo1, _ = count_external_methods(data1) - wo2, _ = count_external_methods(data2) - print(f"Without custom approx: {wo1} methods without rules") - print(f"With custom approx: {wo2} methods without rules") - - methods_without_1 = {e["method"] for e in data1.get("withoutRules", [])} - methods_without_2 = {e["method"] for e in data2.get("withoutRules", [])} - newly_covered = methods_without_1 - methods_without_2 - if newly_covered: - print(f"Newly covered methods ({len(newly_covered)}):") - for m in sorted(newly_covered): - print(f" + {m}") - - -class TestExternalMethodsAlongsideSarif: - """4.7: External methods and SARIF are produced together.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_both_outputs_produced( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """A single scan produces both SARIF report and external methods file.""" - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - track_external_methods=True, - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok() - - assert sarif_path.exists(), "SARIF report not produced" - assert external_methods_exist(sarif_path), ( - "External methods files not produced" - ) - - sarif_data = load_sarif(sarif_path) - ext_data = load_external_methods(sarif_path) - assert len(sarif_results(sarif_data)) > 0, "SARIF has no results" - wo, wr = count_external_methods(ext_data) - assert wo + wr > 0, "External methods file is empty" diff --git a/agent-mode/test/test_full_loop.py b/agent-mode/test/test_full_loop.py deleted file mode 100644 index ecadcdbcc..000000000 --- a/agent-mode/test/test_full_loop.py +++ /dev/null @@ -1,290 +0,0 @@ -""" -Suite 5: Full Agent Loop (Integration) - -Simulates the complete agent workflow on Stirling-PDF: -1. Discover entry points (by reading source) -2. Create a custom path-traversal rule -3. Test the rule with samples -4. Run initial scan on Stirling-PDF -5. Analyze external methods -6. Create YAML approximation for an unmodeled method -7. Re-scan and verify the approximation has effect -""" - -import json -import shutil -import time -import pytest -from pathlib import Path -from conftest import ( - OpenTaintCLI, - load_sarif, - sarif_results, - sarif_rule_ids, - sarif_findings_for_rule, - load_external_methods, - count_external_methods, - external_methods_exist, - write_text, - write_yaml, - print_timing_breakdown, - FIXTURES_DIR, - BUILTIN_RULES_DIR, - STIRLING_PROJECT_DIR, -) - - -@pytest.mark.slow -@pytest.mark.new_feature -class TestFullAgentLoop: - """ - Simulates the agent's analysis workflow on Stirling-PDF. - - Phase 1 → discover entry points (manual) - Phase 2 → create rule + test - Phase 3 → scan + analyze + create approx + rescan - """ - - def _setup_workspace(self, tmp_output: Path) -> dict: - """Create the agent workspace directory layout.""" - workspace = { - "root": tmp_output, - "rules": tmp_output / "agent-rules", - "config": tmp_output / "agent-config", - "approximations": tmp_output / "agent-approximations" / "src", - "results": tmp_output / "results", - "test_project": tmp_output / "agent-test-project", - "test_compiled": tmp_output / "agent-test-compiled", - "test_output": tmp_output / "agent-test-output", - } - for d in workspace.values(): - if isinstance(d, Path): - d.mkdir(parents=True, exist_ok=True) - return workspace - - def test_full_agent_loop( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """Full end-to-end agent loop on Stirling-PDF.""" - ws = self._setup_workspace(tmp_output) - t0 = time.time() - - def _phase_time(label): - elapsed = time.time() - t0 - print(f" [{elapsed:6.1f}s] {label}") - - # ── Phase 1: Source Discovery (simulated) ───────────────────── - controllers = [ - "stirling.software.SPDF.controller.api.misc.PrintFileController", - "stirling.software.SPDF.controller.api.MergeController", - "stirling.software.SPDF.controller.api.SplitPDFController", - "stirling.software.SPDF.controller.api.security.*", - ] - print(f"Phase 1: Discovered {len(controllers)} controller groups") - _phase_time("Phase 1 complete (source discovery)") - - # ── Phase 2: Create Rule ────────────────────────────────────── - - builtin_path_traversal = ( - BUILTIN_RULES_DIR / "java" / "security" / "path-traversal.yaml" - ) - assert builtin_path_traversal.exists(), "Builtin path-traversal rule not found" - print("Phase 2a: Read builtin path-traversal rule") - - # Create custom source library rule - lib_dir = ws["rules"] / "java" / "lib" - lib_dir.mkdir(parents=True, exist_ok=True) - write_text( - lib_dir / "stirling-source.yaml", - """\ -rules: - - id: stirling-multipart-file-source - options: - lib: true - severity: NOTE - message: Untrusted multipart file data from Spring controller - languages: [java] - patterns: - - pattern: | - $RETURNTYPE $METHOD(..., @RequestParam MultipartFile $UNTRUSTED, ...) { ... } -""", - ) - - # Create join-mode security rule - sec_dir = ws["rules"] / "java" / "security" - sec_dir.mkdir(parents=True, exist_ok=True) - write_text( - sec_dir / "stirling-path-traversal.yaml", - """\ -rules: - - id: stirling-path-traversal - severity: ERROR - message: >- - User-uploaded file name flows to file system operation without sanitization - metadata: - cwe: CWE-22 - short-description: Path Traversal via uploaded file name - languages: [java] - mode: join - join: - refs: - - rule: java/lib/stirling-source.yaml#stirling-multipart-file-source - as: source - - rule: java/lib/generic/path-traversal-sinks.yaml#java-path-traversal-sinks - as: sink - on: - - 'source.$UNTRUSTED -> sink.$UNTRUSTED' -""", - ) - print("Phase 2b-c: Created custom rules") - _phase_time("Phase 2 complete (rule creation)") - - # ── Phase 3: Initial Scan ───────────────────────────────────── - - # Per-run subdirectory so the fixed external-methods filenames do not - # collide between the initial scan and the rescan further down. - run1_dir = ws["results"] / "run-1" - run1_dir.mkdir(parents=True, exist_ok=True) - sarif_path = run1_dir / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin", str(ws["rules"])], - rule_ids=[ - "java/security/stirling-path-traversal.yaml:stirling-path-traversal" - ], - track_external_methods=True, - severity=["note", "warning", "error"], - timeout=600, - ) - result.assert_ok("Initial scan failed") - print_timing_breakdown("initial-scan", result) - _phase_time("Phase 3 complete (initial scan)") - - sarif_data = load_sarif(sarif_path) - findings = sarif_findings_for_rule(sarif_data, "stirling-path-traversal") - print(f"Phase 3: Initial scan found {len(findings)} path-traversal findings") - assert len(findings) > 0, ( - "Expected path-traversal findings from initial scan but got 0. " - "Check that the join rule's sink ref matches the builtin sink rule ID." - ) - - for f in findings[:5]: - locs = f.get("locations", [{}]) - if locs: - uri = ( - locs[0] - .get("physicalLocation", {}) - .get("artifactLocation", {}) - .get("uri", "?") - ) - line = ( - locs[0] - .get("physicalLocation", {}) - .get("region", {}) - .get("startLine", "?") - ) - print(f" Finding: {uri}:{line}") - - # ── Phase 3b: Analyze External Methods ──────────────────────── - - priority_methods = [] - wo_count, wr_count = 0, 0 - if external_methods_exist(sarif_path): - ext_data = load_external_methods(sarif_path) - wo_count, wr_count = count_external_methods(ext_data) - print( - f"Phase 3b: External methods — {wo_count} without rules, {wr_count} with rules" - ) - - without_rules = ext_data.get("withoutRules", []) - priority_methods = [m for m in without_rules if m.get("callSites", 0) > 5] - priority_methods.sort(key=lambda m: m.get("callSites", 0), reverse=True) - print( - f" Priority unmodeled methods (>5 call sites): {len(priority_methods)}" - ) - for m in priority_methods[:10]: - print( - f" {m['method']} ({m['callSites']} call sites, positions: {m['factPositions']})" - ) - _phase_time("Phase 3b complete (external methods analysis)") - - # ── Phase 4: Create Approximation and Rescan ────────────────── - # Approximations are ONLY for external methods (from withoutRules). - # These are library methods without source code in the project. - - if priority_methods: - pass_through_rules = [] - for m in priority_methods[:5]: - method_name = m["method"] - positions = m["factPositions"] - - copies = [] - for pos in positions: - if pos.startswith("arg("): - copies.append({"from": pos, "to": "result"}) - elif pos == "": - copies.append({"from": "", "to": "result"}) - - if copies: - pass_through_rules.append( - { - "function": method_name, - "copy": copies, - } - ) - - if pass_through_rules: - config_file = ws["config"] / "custom-propagators.yaml" - write_yaml(config_file, {"passThrough": pass_through_rules}) - print( - f"Phase 4: Created {len(pass_through_rules)} custom passThrough rules" - ) - - # Rescan with approximations — own subdir keeps the fixed - # external-methods filenames from overwriting run-1 outputs. - run2_dir = ws["results"] / "run-2" - run2_dir.mkdir(parents=True, exist_ok=True) - sarif_path_2 = run2_dir / "report.sarif" - - result2 = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path_2), - rulesets=["builtin", str(ws["rules"])], - rule_ids=[ - "java/security/stirling-path-traversal.yaml:stirling-path-traversal" - ], - approximations_config=str(config_file), - track_external_methods=True, - severity=["note", "warning", "error"], - timeout=600, - ) - - print_timing_breakdown("rescan-with-approx", result2) - - if result2.ok: - sarif_data_2 = load_sarif(sarif_path_2) - findings_2 = sarif_findings_for_rule( - sarif_data_2, "stirling-path-traversal" - ) - print( - f"Phase 4: Rescan found {len(findings_2)} findings (was {len(findings)})" - ) - - if external_methods_exist(sarif_path_2): - ext_data_2 = load_external_methods(sarif_path_2) - wo2, wr2 = count_external_methods(ext_data_2) - print( - f" External methods after approx: {wo2} without (was {wo_count}), {wr2} with (was {wr_count})" - ) - - delta_findings = len(findings_2) - len(findings) - delta_methods = wo_count - wo2 - print( - f" Delta: {delta_findings:+d} findings, {delta_methods:+d} newly modeled methods" - ) - - _phase_time("Phase 4 complete (approximation + rescan)") - total = time.time() - t0 - print(f"\n=== Full agent loop completed in {total:.1f}s ===") diff --git a/agent-mode/test/test_rules.py b/agent-mode/test/test_rules.py deleted file mode 100644 index 9dbcab5bc..000000000 --- a/agent-mode/test/test_rules.py +++ /dev/null @@ -1,409 +0,0 @@ -""" -Suite 2: Rule Generation Pipeline - -Tests: -2.1 Read builtin rules via `opentaint rules-path` (or known path) -2.2 Create custom library + security rules, verify YAML validity -2.3 Run scan with custom ruleset + --rule-id filter -2.4 Run scan with custom ruleset without --rule-id filter (all rules active) -2.5 Bootstrap test project, build, and run rule tests -2.6 Rule test: false negative detected (positive sample with wrong pattern) -2.7 Rule test: false positive detected (negative sample with too-broad pattern) -2.8 Run scan on Stirling-PDF with custom path-traversal rule -""" - -import json -import shutil -import pytest -from pathlib import Path -from conftest import ( - OpenTaintCLI, - load_sarif, - sarif_results, - sarif_rule_ids, - sarif_findings_for_rule, - write_text, - BUILTIN_RULES_DIR, - FIXTURES_DIR, -) - - -class TestReadBuiltinRules: - """2.1: Agent can discover and read builtin rules.""" - - def test_builtin_rules_directory_exists(self, builtin_rules: Path): - """Builtin rules directory exists and contains rule files.""" - security_dir = builtin_rules / "java" / "security" - assert security_dir.exists(), f"No security rules at {security_dir}" - rule_files = list(security_dir.glob("*.yaml")) - assert len(rule_files) > 10, ( - f"Expected >10 security rules, found {len(rule_files)}" - ) - - def test_builtin_lib_rules_exist(self, builtin_rules: Path): - """Library rules (sources/sinks) exist.""" - lib_generic = builtin_rules / "java" / "lib" / "generic" - assert lib_generic.exists() - assert (lib_generic / "servlet-untrusted-data-source.yaml").exists() - assert (lib_generic / "path-traversal-sinks.yaml").exists() - - @pytest.mark.new_feature - def test_rules_path_command(self, cli: OpenTaintCLI): - """opentaint rules-path prints the rules directory.""" - result = cli.rules_path() - result.assert_ok("rules-path command failed") - rules_dir = Path(result.stdout.strip()) - assert rules_dir.exists(), f"rules-path returned non-existent dir: {rules_dir}" - assert (rules_dir / "java" / "security").is_dir() - - -class TestCustomRuleCreation: - """2.2: Create and validate custom rules.""" - - def test_custom_rules_are_valid_yaml(self): - """Fixture rule files are syntactically valid YAML with expected structure.""" - import yaml - - rules_dir = FIXTURES_DIR / "rules" - for rule_file in rules_dir.rglob("*.yaml"): - with open(rule_file) as f: - data = yaml.safe_load(f) - assert "rules" in data, f"Rule file {rule_file} missing 'rules' key" - for rule in data["rules"]: - assert "id" in rule, f"Rule in {rule_file} missing 'id'" - assert "severity" in rule, f"Rule {rule['id']} missing 'severity'" - assert "languages" in rule, f"Rule {rule['id']} missing 'languages'" - - def test_library_rule_has_lib_option(self): - """Library rules must have options.lib: true.""" - import yaml - - lib_rule = FIXTURES_DIR / "rules" / "java" / "lib" / "stirling-source.yaml" - if not lib_rule.exists(): - pytest.skip("Library rule fixture not created yet") - with open(lib_rule) as f: - data = yaml.safe_load(f) - for rule in data["rules"]: - assert rule.get("options", {}).get("lib") is True, ( - f"Library rule {rule['id']} missing options.lib: true" - ) - - def test_security_rule_has_metadata(self): - """Security rules must have metadata.cwe and metadata.short-description.""" - import yaml - - sec_rule = ( - FIXTURES_DIR - / "rules" - / "java" - / "security" - / "stirling-path-traversal.yaml" - ) - if not sec_rule.exists(): - pytest.skip("Security rule fixture not created yet") - with open(sec_rule) as f: - data = yaml.safe_load(f) - for rule in data["rules"]: - if rule.get("options", {}).get("lib"): - continue - meta = rule.get("metadata", {}) - assert "cwe" in meta, f"Security rule {rule['id']} missing metadata.cwe" - assert "short-description" in meta, ( - f"Security rule {rule['id']} missing metadata.short-description" - ) - - -class TestScanWithRuleIdFilter: - """2.3-2.4: Scan with --rule-id filter.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_scan_with_rule_id_filter( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Scan with --rule-id should only produce findings for the specified rule. - Library rules referenced via refs should be auto-included. - """ - sarif_path = tmp_output / "report.sarif" - custom_rules = FIXTURES_DIR / "rules" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin", str(custom_rules)], - rule_ids=[ - "java/security/stirling-path-traversal.yaml:stirling-path-traversal" - ], - severity=["note", "warning", "error"], - timeout=600, - ) - result.assert_ok("Scan with --rule-id filter failed") - - data = load_sarif(sarif_path) - rule_ids = sarif_rule_ids(data) - for rid in rule_ids: - # With --semgrep-compatibility-sarif (default), rule IDs use dot-separated paths - assert rid in ( - "stirling-path-traversal", - "java.security.stirling-path-traversal", - ), f"Unexpected rule '{rid}' in output — --rule-id filter not working" - - @pytest.mark.slow - def test_scan_without_rule_id_filter_includes_all( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Scan without --rule-id should include findings from all active rules. - """ - sarif_path = tmp_output / "report.sarif" - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin"], - severity=["warning", "error"], - timeout=600, - ) - result.assert_ok("Scan without rule-id filter failed") - - data = load_sarif(sarif_path) - rule_ids = sarif_rule_ids(data) - assert len(rule_ids) > 1, f"Expected multiple rule IDs, got: {rule_ids}" - - -class TestRuleTests: - """2.5-2.7: Rule test workflow.""" - - @pytest.mark.slow - @pytest.mark.new_feature - def test_init_test_project(self, cli: OpenTaintCLI, tmp_output: Path): - """opentaint init-test-project bootstraps a valid Gradle test project.""" - test_project_dir = tmp_output / "test-project" - - result = cli.init_test_project( - output_dir=str(test_project_dir), - dependencies=[ - "org.springframework:spring-web:6.2.12", - "jakarta.servlet:jakarta.servlet-api:6.0.0", - ], - ) - if not result.ok: - pytest.skip("init-test-project not available (new feature)") - - assert (test_project_dir / "build.gradle.kts").exists() - assert (test_project_dir / "settings.gradle.kts").exists() - assert (test_project_dir / "libs" / "opentaint-sast-test-util.jar").exists() - assert (test_project_dir / "src" / "main" / "java" / "test").is_dir() - - @pytest.mark.slow - @pytest.mark.new_feature - def test_rule_test_all_pass(self, cli: OpenTaintCLI, tmp_output: Path): - """ - Create a test project with correct positive/negative samples. - Rule tests should all pass. - """ - test_project_dir = tmp_output / "test-project" - compiled_dir = tmp_output / "test-compiled" - test_output = tmp_output / "test-output" - rules_dir = FIXTURES_DIR / "rules" - - result = cli.init_test_project( - output_dir=str(test_project_dir), - dependencies=[ - "org.springframework:spring-web:6.2.12", - "jakarta.servlet:jakarta.servlet-api:6.0.0", - ], - ) - if not result.ok: - pytest.skip("init-test-project not available") - - # Copy test samples - samples_src = FIXTURES_DIR / "test-samples" / "src" - samples_dst = test_project_dir / "src" - if samples_src.exists(): - shutil.copytree(samples_src, samples_dst, dirs_exist_ok=True) - - compile_result = cli.compile(str(test_project_dir), str(compiled_dir)) - if not compile_result.ok: - combined = (compile_result.stdout + compile_result.stderr).lower() - # Autobuilder JAR may not be built locally — skip gracefully - if ( - "autobuilder" in combined - or "compile" in combined - or "compilation" in combined - ): - pytest.skip( - "Compilation failed (autobuilder JAR may not be available). " - "Build it with: cd core && ./gradlew :autobuilder:jar" - ) - compile_result.assert_ok("Failed to compile test project") - - test_result = cli.test_rules( - project_path=str(compiled_dir / "project.yaml"), - rulesets=[str(rules_dir)], - output_dir=str(test_output), - ) - test_result.assert_ok("Rule tests failed") - - result_json = test_output / "test-result.json" - assert result_json.exists(), "test-result.json not produced" - with open(result_json) as f: - results = json.load(f) - - assert len(results.get("falsePositive", [])) == 0, ( - f"Unexpected false positives: {results['falsePositive']}" - ) - assert len(results.get("falseNegative", [])) == 0, ( - f"Unexpected false negatives: {results['falseNegative']}" - ) - assert len(results.get("success", [])) > 0, ( - "No successful tests — something is wrong" - ) - - @pytest.mark.slow - @pytest.mark.new_feature - def test_rule_test_detects_false_negative( - self, cli: OpenTaintCLI, tmp_output: Path - ): - """ - A @PositiveRuleSample that doesn't match the rule → false negative. - This tests that the test framework correctly detects missing findings. - """ - test_project_dir = tmp_output / "test-project-fn" - compiled_dir = tmp_output / "test-compiled-fn" - test_output = tmp_output / "test-output-fn" - - rules_dir = tmp_output / "broken-rules" / "java" / "security" - rules_dir.mkdir(parents=True) - write_text( - rules_dir / "broken-rule.yaml", - """\ -rules: - - id: broken-path-traversal - severity: ERROR - message: This rule intentionally won't match - metadata: - cwe: CWE-22 - short-description: Broken rule for testing FN detection - languages: [java] - patterns: - - pattern: ThisClassDoesNotExist.neverCalled($X) -""", - ) - - result = cli.init_test_project( - output_dir=str(test_project_dir), - dependencies=["jakarta.servlet:jakarta.servlet-api:6.0.0"], - ) - if not result.ok: - pytest.skip("init-test-project not available") - - test_file = ( - test_project_dir - / "src" - / "main" - / "java" - / "test" - / "FalseNegativeTest.java" - ) - write_text( - test_file, - """\ -package test; - -import org.opentaint.sast.test.util.PositiveRuleSample; - -public class FalseNegativeTest { - - @PositiveRuleSample(value = "java/security/broken-rule.yaml", id = "broken-path-traversal") - public void shouldTriggerButWont() { - String x = System.getenv("USER_INPUT"); - System.out.println(x); - } -} -""", - ) - - compile_result = cli.compile(str(test_project_dir), str(compiled_dir)) - if not compile_result.ok: - if "autobuilder" in (compile_result.stdout + compile_result.stderr).lower(): - pytest.skip( - "Autobuilder JAR not available. " - "Build it with: cd core && ./gradlew :autobuilder:jar" - ) - pytest.skip("Cannot compile test project") - - test_result = cli.test_rules( - project_path=str(compiled_dir / "project.yaml"), - rulesets=[str(tmp_output / "broken-rules")], - output_dir=str(test_output), - ) - - result_json = test_output / "test-result.json" - assert result_json.exists(), ( - "test-result.json not produced — test-rules command may have failed.\n" - f" stdout: {test_result.stdout[:1000]}\n" - f" stderr: {test_result.stderr[:1000]}" - ) - test_result.assert_ok("test-rules command failed") - with open(result_json) as f: - results = json.load(f) - assert len(results.get("falseNegative", [])) > 0, ( - "Expected false negative not detected" - ) - - -class TestScanStirlingWithCustomRule: - """2.8: Run custom path-traversal rule on Stirling-PDF.""" - - @pytest.mark.slow - def test_scan_stirling_with_path_traversal_rule( - self, cli: OpenTaintCLI, stirling_project: Path, tmp_output: Path - ): - """ - Scan Stirling-PDF with our custom path-traversal rule. - Stirling-PDF handles file uploads in several controllers — - we expect the rule to find some findings. - """ - sarif_path = tmp_output / "report.sarif" - custom_rules = FIXTURES_DIR / "rules" - - if not custom_rules.exists(): - pytest.skip("Fixture rules not created yet") - - result = cli.scan( - project_path=str(stirling_project), - output=str(sarif_path), - rulesets=["builtin", str(custom_rules)], - rule_ids=[ - "java/security/stirling-path-traversal.yaml:stirling-path-traversal" - ], - severity=["note", "warning", "error"], - timeout=600, - ) - - result.assert_ok("Scan with custom path-traversal rule failed") - data = load_sarif(sarif_path) - findings = sarif_findings_for_rule(data, "stirling-path-traversal") - print(f"Found {len(findings)} path-traversal findings in Stirling-PDF") - assert len(findings) > 0, ( - "Expected path-traversal findings in Stirling-PDF but got 0. " - "Check that the join rule's sink ref matches the builtin sink rule ID." - ) - for f in findings[:5]: - locs = f.get("locations", [{}]) - if locs: - uri = ( - locs[0] - .get("physicalLocation", {}) - .get("artifactLocation", {}) - .get("uri", "?") - ) - line = ( - locs[0] - .get("physicalLocation", {}) - .get("region", {}) - .get("startLine", "?") - ) - print(f" - {uri}:{line}") diff --git a/task.md b/task.md deleted file mode 100644 index af654ee90..000000000 --- a/task.md +++ /dev/null @@ -1,86 +0,0 @@ -We have dataflow based SAST analyzer. - -We have multiple configuration points: -1. Patterns in `rules/ruleset`. Vulnerable patterns to search in analyzed code. We treat all such patterns as a patterns on a dataflow trace and match them similar to the taint rules. -2. Approximations in `core/opentaint-config/config`. Default dataflow propagators for common libraries. -3. Approximations in `core/opentaint-jvm-sast-dataflow/dataflow-approximations`. Complex code-based propagators for complex methods. -4. Framework support like in `core/src/main/kotlin/org/opentaint/jvm/sast/project/spring/SpringWebProject.kt`. Special handling of frameworks. - -We are trying to make all our rules and approximations customizable (e.g. via llm agent). Here are our requirements: -1. Agent should be able to generate patterns (1). We need to specialize all requirement to the pattern language -2. Agent should be able to debug and fix patterns (1). For example, fix FP or FN. -3. To work with FN, engine will return a list of external methods, where dataflow fact was killed -4. Agent should be able to generate approximations (2) and (3), mainly to fix FN -5. Approximations (2) must be hierarchical. For example, if we have rule for method `get*` and a rule for `getEntry`, the rule for `getEntry` must override the rule for `get*` -6. Approximations (3) always override (2) -7. Agent must be able to override (2) and (3) -8. Agent must be able to generate required missed approximations based on the list of external methods, where dataflow fact was killed -9. Frameworks support (4) provided as-is and is not configurable by the agent - -Expected agent workflow: -1. Agent takes path to the project -2. Agent builds project via autobuilder, or create project.yaml himself (via dedicated CLI API). -3. Agent search for entry points and potentially vulnerable places -4. Agent start working on security analysis. Each step and progress is tracked in `opentaint-analysis-plan.md` -5. Agent creates Rule -6. Agent creates Tests for the rule and verify rule works as expected. - - Take simple test sample project (like in rules/test but with few samples) - - Write tests for designed rule - - Run opentaint analyzer on the test like in .github/workflows/ci-rules.yaml (via dedicated CLI API) - - Fix rule or test and repeat -7. Agent runs opentaint with Rule on the project -8. Opentaint produces 2 files: - - Sarif with discovered vulnerabilities wrt Rule. Each vulnerability may contain multiple traces. - - List (yaml format) with external methods, where dataflow fact was killed -9. Agent decide between the following options: - - Fix FN according to missed external methods list. Each list entry contains info about method and fact position (pass rule from). Agent can generate more Approximations and override current, then rerun analysis. - - Fix FN in rule (non preferred option). Add more patters and tests into Rule. Then rerun analysis. - - Analyze trace. - a. If trace contains FP, try fix it via Rule (e.g. pattern-not). Update rule and test, then rerun analysis - b. If trace contains FP, try fix it via Approximations (non preferred option). Override approximation to remove impossible dataflow - c. If trace is TP, try to generate POC. Then save it to `vulnerabilities.md` -10. Steps 7-10 are repeated until agent decides that all vulnerabilities discovered. - -Analyze current analyzer impl and requirements and propose the following documents: -1. agent-mode/info/pattern-rules.md: design of all thing related to pattern rules -2. agent-mode/info/approximations-config.md: all things related to approximations -3. agent-mode/info/agent-pipeline.md: whole pipeline for the agent to work with rules, common scenarios - -Consider we have opentaint installed on PATH and want to use it from the agent via skills. -We need to design the following things: -1. Changes in the opentaint that are required to match expected agent workflow -2. All opentaint operations must be available via Go CLI (implemented in Go or proxied to the Analyzer CLI) - - Consider code-based approximations. Opentaint CLI must have an API to take approximation source code and compile it to further use it in the analysis. -3. Skills that can be used via agent. - - Skills must include all the required examples - - For the rule-test skill we must provide simple sample test project -4. Meta prompt to run agent wrt expected workflow using skills. - -Write all your findings into `agent-mode/design/agent-mode-design.md`. - -No we need to design the test pipeline. -Let's start with project `/home/sobol/data/Stirling-PDF/seqra-project/project.yaml` -1. We need to test various project build scenarios -2. Check that rule generations pipeline works -3. Check approximations (including code based) generation/override -4. Check external methods extraction - -Write all your findings into `agent-mode/test/agent-mode-test.md`. - -No we need to understand implementation details: -1. Which modules/files will be modified and how -2. Projects structure: where skills and meta-prompt will be located -3. How skills and meta prompt will be accessible via agent? How we distribute them and provide to the end user -4. How we can test implementation without CLI installation in PATH - -Write all your findings into `agent-mode/impl/agent-mode-impl.md`. - -OK, WE ARE HERE. We have all required design done. - -Now we can implement things. Use `agent-mode/plan.md` to track implementation process. -Track all tasks status. If you have a new task (e.g. fix failing test) add it to the plan before start working on it. -Use git to fix project state after each task completed. - -1. Make all planned changes into analyzer and CLI -2. Write skills and meta-prompt -3. Verify everything works using tests From 17c2c77f8d14c54b8f591739c51a73e9830e791b Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Mon, 8 Jun 2026 21:43:20 +0200 Subject: [PATCH 18/54] fix(cli): avoid generated jar requirement in go build --- cli/internal/testutil/.gitignore | 2 +- cli/internal/testutil/generate_jar.go | 48 +++++++++++++++++++++++++++ cli/internal/testutil/jar/README.md | 4 +++ cli/internal/testutil/testutil.go | 26 +++++++++++---- 4 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 cli/internal/testutil/generate_jar.go create mode 100644 cli/internal/testutil/jar/README.md diff --git a/cli/internal/testutil/.gitignore b/cli/internal/testutil/.gitignore index cc77386b1..7a1fe0c3a 100644 --- a/cli/internal/testutil/.gitignore +++ b/cli/internal/testutil/.gitignore @@ -1,2 +1,2 @@ # Generated by go:generate from core/opentaint-sast-test-util/build/libs/ -jar/ +jar/*.jar diff --git a/cli/internal/testutil/generate_jar.go b/cli/internal/testutil/generate_jar.go new file mode 100644 index 000000000..4bf69196f --- /dev/null +++ b/cli/internal/testutil/generate_jar.go @@ -0,0 +1,48 @@ +//go:build ignore + +package main + +import ( + "fmt" + "io" + "os" + "path/filepath" +) + +const ( + jarName = "opentaint-sast-test-util.jar" + sourceJar = "../../../core/opentaint-sast-test-util/build/libs/opentaint-sast-test-util.jar" + outputDir = "jar" +) + +func main() { + if err := copyJar(); err != nil { + fmt.Fprintf(os.Stderr, "generate test-util jar: %v\n", err) + os.Exit(1) + } +} + +func copyJar() error { + if err := os.MkdirAll(outputDir, 0o755); err != nil { + return fmt.Errorf("create %s: %w", outputDir, err) + } + + in, err := os.Open(sourceJar) + if err != nil { + return fmt.Errorf("open %s: %w; build it with 'cd ../../../core && ./gradlew :opentaint-sast-test-util:jar'", sourceJar, err) + } + defer in.Close() + + outPath := filepath.Join(outputDir, jarName) + out, err := os.Create(outPath) + if err != nil { + return fmt.Errorf("create %s: %w", outPath, err) + } + defer out.Close() + + if _, err := io.Copy(out, in); err != nil { + return fmt.Errorf("copy %s to %s: %w", sourceJar, outPath, err) + } + + return nil +} diff --git a/cli/internal/testutil/jar/README.md b/cli/internal/testutil/jar/README.md new file mode 100644 index 000000000..60f06c0c4 --- /dev/null +++ b/cli/internal/testutil/jar/README.md @@ -0,0 +1,4 @@ +This directory intentionally contains a tracked placeholder so `go build ./...` +works before `go generate` creates `opentaint-sast-test-util.jar`. + +Generated JAR files in this directory are ignored by Git. diff --git a/cli/internal/testutil/testutil.go b/cli/internal/testutil/testutil.go index b6b759b0d..f4d3b1832 100644 --- a/cli/internal/testutil/testutil.go +++ b/cli/internal/testutil/testutil.go @@ -4,23 +4,24 @@ package testutil import ( "crypto/sha256" - _ "embed" + "embed" "encoding/hex" "fmt" "os" + "path" "path/filepath" "strings" ) -//go:generate sh -c "mkdir -p jar && cp ../../../core/opentaint-sast-test-util/build/libs/opentaint-sast-test-util.jar jar/" +//go:generate go run ./generate_jar.go -//go:embed jar/opentaint-sast-test-util.jar -var jarData []byte +//go:embed jar/* +var jarFiles embed.FS // JarName is the filename of the test-util JAR. const JarName = "opentaint-sast-test-util.jar" -func contentHash() string { +func contentHash(jarData []byte) string { h := sha256.Sum256(jarData) return hex.EncodeToString(h[:]) } @@ -30,6 +31,11 @@ func contentHash() string { // marker for staleness detection so the extracted copy is refreshed when the // binary is rebuilt with a newer JAR. func ExtractJar() (string, error) { + jarData, err := embeddedJarData() + if err != nil { + return "", err + } + home, err := os.UserHomeDir() if err != nil { return "", fmt.Errorf("cannot determine home directory: %w", err) @@ -37,7 +43,7 @@ func ExtractJar() (string, error) { extractDir := filepath.Join(home, ".opentaint", "test-util") extractPath := filepath.Join(extractDir, JarName) markerPath := filepath.Join(extractDir, ".content-hash") - wantHash := contentHash() + wantHash := contentHash(jarData) if !needsExtract(markerPath, wantHash) && fileExists(extractPath) { return extractPath, nil @@ -55,6 +61,14 @@ func ExtractJar() (string, error) { return extractPath, nil } +func embeddedJarData() ([]byte, error) { + jarData, err := jarFiles.ReadFile(path.Join("jar", JarName)) + if err != nil { + return nil, fmt.Errorf("embedded %s is missing; build it with 'cd core && ./gradlew :opentaint-sast-test-util:jar', then run 'cd cli && go generate ./internal/testutil': %w", JarName, err) + } + return jarData, nil +} + func needsExtract(markerPath, wantHash string) bool { data, err := os.ReadFile(markerPath) if err != nil { From 59a022757af28b0966669cbef534b6ded8a6772c Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Mon, 8 Jun 2026 23:18:34 +0200 Subject: [PATCH 19/54] fix(cli): check file Close errors in copyFile --- cli/cmd/test_init.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cli/cmd/test_init.go b/cli/cmd/test_init.go index c1b8dc2bf..9cd873a5c 100644 --- a/cli/cmd/test_init.go +++ b/cli/cmd/test_init.go @@ -183,7 +183,7 @@ func copyFile(src, dst string) error { if err != nil { return fmt.Errorf("open source: %w", err) } - defer in.Close() + defer func() { _ = in.Close() }() if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil { return fmt.Errorf("create parent dir: %w", err) @@ -193,11 +193,15 @@ func copyFile(src, dst string) error { if err != nil { return fmt.Errorf("create destination: %w", err) } - defer outFile.Close() if _, err := io.Copy(outFile, in); err != nil { + _ = outFile.Close() return fmt.Errorf("copy: %w", err) } + + if err := outFile.Close(); err != nil { + return fmt.Errorf("close destination: %w", err) + } return nil } From a20dc530b88510d7953f6dea6c5dd055f0e238f6 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Mon, 8 Jun 2026 23:29:49 +0200 Subject: [PATCH 20/54] refactor(cli): dedupe path/scaffold helpers and clarify naming - Add exported utils.PathExists; route health and testutil through it, removing duplicate pathExistsCmd/fileExists copies - Rename health.displayVersion to shortVersion to avoid clashing with utils.displayVersion - Add utils.WriteFiles and use it from testrule/testapprox Scaffold, dropping the repeated mkdir+write loops - Document the shared-global coupling between test rule reachability and the scan command --- cli/cmd/health.go | 22 +++++++--------------- cli/cmd/test_rule_reachability.go | 6 ++++++ cli/internal/testapprox/testapprox.go | 15 ++++----------- cli/internal/testrule/testrule.go | 19 +++++-------------- cli/internal/testutil/testutil.go | 9 +++------ cli/internal/utils/opentaint_home.go | 11 ++++++----- cli/internal/utils/updater.go | 4 ++-- cli/internal/utils/write_files.go | 21 +++++++++++++++++++++ 8 files changed, 54 insertions(+), 53 deletions(-) create mode 100644 cli/internal/utils/write_files.go diff --git a/cli/cmd/health.go b/cli/cmd/health.go index 1faf0eb39..4e6606360 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -89,7 +89,7 @@ func runHealth() { for _, c := range components { value := c.path if c.version != "" { - value = displayVersion(c.version) + " " + c.path + value = shortVersion(c.version) + " " + c.path } if !c.present { value += " " + out.Theme().Error.Render("missing") @@ -105,10 +105,10 @@ func resolveHealthComponent(key string) healthComponent { switch key { case "autobuilder": path, err := utils.GetAutobuilderJarPath(globals.Config.Autobuilder.Version) - return healthComponent{"Autobuilder", globals.Config.Autobuilder.Version, path, err == nil && pathExistsCmd(path)} + return healthComponent{"Autobuilder", globals.Config.Autobuilder.Version, path, err == nil && utils.PathExists(path)} case "analyzer": path, err := utils.GetAnalyzerJarPath(globals.Config.Analyzer.Version) - return healthComponent{"Analyzer", globals.Config.Analyzer.Version, path, err == nil && pathExistsCmd(path)} + return healthComponent{"Analyzer", globals.Config.Analyzer.Version, path, err == nil && utils.PathExists(path)} case "rules": return resolveRulesComponent() case "runtime": @@ -127,7 +127,7 @@ func resolveRulesComponent() healthComponent { return c } c.path = path - if pathExistsCmd(path) { + if utils.PathExists(path) { c.present = true return c } @@ -139,7 +139,7 @@ func resolveRulesComponent() healthComponent { fmt.Fprintf(os.Stderr, "Error downloading rules: %s\n", dlErr) return c } - c.present = pathExistsCmd(path) + c.present = utils.PathExists(path) return c } @@ -166,18 +166,10 @@ func resolveRuntimeComponent() healthComponent { return c } -// displayVersion strips the artifact-kind prefix (e.g. "rules/v0.1.1" → "v0.1.1"). -func displayVersion(v string) string { +// shortVersion strips the artifact-kind prefix (e.g. "rules/v0.1.1" → "v0.1.1"). +func shortVersion(v string) string { if idx := strings.LastIndex(v, "/"); idx >= 0 { return v[idx+1:] } return v } - -func pathExistsCmd(p string) bool { - if p == "" { - return false - } - _, err := os.Stat(p) - return err == nil -} diff --git a/cli/cmd/test_rule_reachability.go b/cli/cmd/test_rule_reachability.go index 17eb90038..4cc6c09bf 100644 --- a/cli/cmd/test_rule_reachability.go +++ b/cli/cmd/test_rule_reachability.go @@ -29,6 +29,12 @@ Use --project-model to scan a pre-compiled project model instead of compiling fr Annotations: map[string]string{"PrintConfig": "true"}, Args: cobra.RangeArgs(1, 2), Run: func(cmd *cobra.Command, args []string) { + // `reachability` is `scan` with a forced flag preset. scan reads its + // inputs from package-level vars (bound to its cobra flags), so we set + // those vars here and delegate to scanCmd.Run rather than duplicating + // the scan pipeline. This relies on shared mutable state: it assumes a + // single, non-concurrent command invocation per process (the CLI + // contract), and any new scan input must be wired through the same vars. RuleID = []string{args[0]} DebugFactReachabilitySarif = true expandRuleRefs = true diff --git a/cli/internal/testapprox/testapprox.go b/cli/internal/testapprox/testapprox.go index d208ca1ee..0bc13c390 100644 --- a/cli/internal/testapprox/testapprox.go +++ b/cli/internal/testapprox/testapprox.go @@ -7,6 +7,8 @@ import ( "fmt" "os" "path/filepath" + + "github.com/seqra/opentaint/internal/utils" ) // FixedRuleFileName is the rule's path relative to the ruleset root, and the value @@ -33,17 +35,8 @@ func WriteFixedRule(dir string) (string, error) { // and the Taint source/sink helper. Samples are the agent's to write; the approximation under test // lives in its own unit folder (.opentaint/approximations/), never inside this test project. func Scaffold(projectDir string) error { - files := map[string][]byte{ + return utils.WriteFiles(map[string][]byte{ filepath.Join(projectDir, FixedRuleFileName): fixedRule, filepath.Join(projectDir, "src", "main", "java", "test", "Taint.java"): taintJava, - } - for path, content := range files { - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return fmt.Errorf("create %s: %w", filepath.Dir(path), err) - } - if err := os.WriteFile(path, content, 0o644); err != nil { - return fmt.Errorf("write %s: %w", filepath.Base(path), err) - } - } - return nil + }) } diff --git a/cli/internal/testrule/testrule.go b/cli/internal/testrule/testrule.go index 613fe2845..bb3fbd52e 100644 --- a/cli/internal/testrule/testrule.go +++ b/cli/internal/testrule/testrule.go @@ -6,9 +6,9 @@ package testrule import ( _ "embed" - "fmt" - "os" "path/filepath" + + "github.com/seqra/opentaint/internal/utils" ) //go:embed example/src/main/java/test/Taint.java @@ -33,18 +33,9 @@ const ( // Scaffold writes the Taint helper into the project sources and the generic // source/sink marker lib rules into the project's test-rules ruleset. func Scaffold(projectDir string) error { - files := map[string][]byte{ - filepath.Join(projectDir, "src", "main", "java", "test", "Taint.java"): taintJava, + return utils.WriteFiles(map[string][]byte{ + filepath.Join(projectDir, "src", "main", "java", "test", "Taint.java"): taintJava, filepath.Join(projectDir, MarkersDir, filepath.FromSlash(GenericSourceRule)): genericSource, filepath.Join(projectDir, MarkersDir, filepath.FromSlash(GenericSinkRule)): genericSink, - } - for path, content := range files { - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return fmt.Errorf("create %s: %w", filepath.Dir(path), err) - } - if err := os.WriteFile(path, content, 0o644); err != nil { - return fmt.Errorf("write %s: %w", filepath.Base(path), err) - } - } - return nil + }) } diff --git a/cli/internal/testutil/testutil.go b/cli/internal/testutil/testutil.go index f4d3b1832..30c20262f 100644 --- a/cli/internal/testutil/testutil.go +++ b/cli/internal/testutil/testutil.go @@ -11,6 +11,8 @@ import ( "path" "path/filepath" "strings" + + "github.com/seqra/opentaint/internal/utils" ) //go:generate go run ./generate_jar.go @@ -45,7 +47,7 @@ func ExtractJar() (string, error) { markerPath := filepath.Join(extractDir, ".content-hash") wantHash := contentHash(jarData) - if !needsExtract(markerPath, wantHash) && fileExists(extractPath) { + if !needsExtract(markerPath, wantHash) && utils.PathExists(extractPath) { return extractPath, nil } @@ -76,8 +78,3 @@ func needsExtract(markerPath, wantHash string) bool { } return strings.TrimSpace(string(data)) != wantHash } - -func fileExists(path string) bool { - _, err := os.Stat(path) - return err == nil -} diff --git a/cli/internal/utils/opentaint_home.go b/cli/internal/utils/opentaint_home.go index 5e92e6746..dd66af1e2 100644 --- a/cli/internal/utils/opentaint_home.go +++ b/cli/internal/utils/opentaint_home.go @@ -30,8 +30,8 @@ func GetOpenTaintHome() (string, error) { return path, nil } -// pathExists reports whether a path exists on disk. -func pathExists(p string) bool { +// PathExists reports whether a path exists on disk. +func PathExists(p string) bool { _, err := os.Stat(p) return err == nil } @@ -66,10 +66,10 @@ func resolveBundledDir(exeDir, name string) string { return "" } flat := filepath.Join(exeDir, name) - if pathExists(flat) { + if PathExists(flat) { return flat } - if sibling := filepath.Join(exeDir, "..", name); pathExists(sibling) { + if sibling := filepath.Join(exeDir, "..", name); PathExists(sibling) { return sibling } return flat @@ -170,7 +170,7 @@ func ReconcileInstallMarker() { return } for _, def := range globals.Artifacts() { - if !pathExists(filepath.Join(installLib, def.LibSubpath)) { + if !PathExists(filepath.Join(installLib, def.LibSubpath)) { return } } @@ -182,6 +182,7 @@ func ReconcileInstallMarker() { // 1. Bundled path (next to binary) — only if version matches bindVersion // 2. Install path (~/.opentaint/install/lib/) — only if version matches bindVersion // 3. Cache path (~/.opentaint/) +// // When no tier exists yet, it returns the last tier as the default download target. func resolveArtifactTier(def globals.ArtifactDef) (string, string, error) { tiers, err := ArtifactTiers(def) diff --git a/cli/internal/utils/updater.go b/cli/internal/utils/updater.go index 96b603702..f9584ff16 100644 --- a/cli/internal/utils/updater.go +++ b/cli/internal/utils/updater.go @@ -199,8 +199,8 @@ func SelfUpdate(archivePath, installDir string) error { // Preserve the installation style: if bundled artifacts exist next to the // binary, update them in place. Otherwise, place into the install tier // (~/.opentaint/install/) so bare-binary installations stay bare. - libBundled := pathExists(filepath.Join(installDir, "lib")) - jreBundled := pathExists(filepath.Join(installDir, "jre")) + libBundled := PathExists(filepath.Join(installDir, "lib")) + jreBundled := PathExists(filepath.Join(installDir, "jre")) if err := updateArtifactDir(tmpDir, "lib", libBundled, installDir); err != nil { output.LogInfof("Failed to update lib directory: %v", err) diff --git a/cli/internal/utils/write_files.go b/cli/internal/utils/write_files.go new file mode 100644 index 000000000..48b8cff52 --- /dev/null +++ b/cli/internal/utils/write_files.go @@ -0,0 +1,21 @@ +package utils + +import ( + "fmt" + "os" +) + +// WriteFiles writes each path->content entry to disk, creating parent +// directories as needed. It is the shared primitive behind the test-project +// scaffolders (see internal/testrule and internal/testapprox). +func WriteFiles(files map[string][]byte) error { + for path, content := range files { + if err := EnsureParentDir(path); err != nil { + return err + } + if err := os.WriteFile(path, content, 0o644); err != nil { + return fmt.Errorf("write %s: %w", path, err) + } + } + return nil +} From 41107ca8b8448beeb963c62eaedc1e781abc422f Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Tue, 9 Jun 2026 00:05:43 +0200 Subject: [PATCH 21/54] refactor(cli): reuse parent directory helper --- README.md | 2 +- cli/cmd/test_init.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d8dcfd05f..6634e753b 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ For more options, see [Installation](docs/README.md#installation) and [Usage](do ## AI Agent Workflows -OpenTaint now ships agent skills for turning static analysis into an end-to-end application-security workflow. Install them with: +OpenTaint includes agent skills that turn static analysis into an end-to-end application-security workflow. Install them with: ```bash npx skills add https://github.com/seqra/opentaint diff --git a/cli/cmd/test_init.go b/cli/cmd/test_init.go index 9cd873a5c..fa080f003 100644 --- a/cli/cmd/test_init.go +++ b/cli/cmd/test_init.go @@ -185,8 +185,8 @@ func copyFile(src, dst string) error { } defer func() { _ = in.Close() }() - if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil { - return fmt.Errorf("create parent dir: %w", err) + if err := utils.EnsureParentDir(dst); err != nil { + return err } outFile, err := os.Create(dst) From c5077cfc004af3774321fa028810741280c55c7a Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Tue, 9 Jun 2026 00:21:37 +0200 Subject: [PATCH 22/54] fix(cli): clarify new command descriptions --- cli/cmd/health.go | 22 +++++++++---------- cli/cmd/scan.go | 9 +++----- cli/cmd/test.go | 8 +++---- cli/cmd/test_approximation_run.go | 16 +++++++------- cli/cmd/test_init.go | 36 +++++++++++++++---------------- cli/cmd/test_rule_reachability.go | 19 ++++++++-------- cli/cmd/test_rule_run.go | 19 ++++++++-------- docs/usage.md | 36 +++++++++++++++---------------- 8 files changed, 81 insertions(+), 84 deletions(-) diff --git a/cli/cmd/health.go b/cli/cmd/health.go index 4e6606360..1a64488ed 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -27,16 +27,16 @@ type healthComponent struct { present bool } -// healthCmd represents the health command +// healthCmd represents the health command. var healthCmd = &cobra.Command{ Use: "health", - Short: "Print the resolved dependency paths", - Long: `Print the on-disk paths OpenTaint resolves for its dependencies: autobuilder, -analyzer, rules, and the Java runtime. + Short: "Show resolved dependency paths", + Long: `Show the on-disk paths OpenTaint uses for the autobuilder, analyzer, +built-in rules, and Java runtime. -Pass --autobuilder, --analyzer, --rules or --runtime to show only those; with a -single flag just the bare path is printed. Nothing is downloaded except the -rules, which are fetched on demand.`, +Use --autobuilder, --analyzer, --rules, or --runtime to select components. When +exactly one component is selected, only its path is printed. The command does +not download artifacts except built-in rules, which are fetched on demand.`, Args: cobra.NoArgs, Run: func(cmd *cobra.Command, args []string) { runHealth() @@ -45,10 +45,10 @@ rules, which are fetched on demand.`, func init() { rootCmd.AddCommand(healthCmd) - healthCmd.Flags().BoolVar(&healthAutobuilder, "autobuilder", false, "Show only the autobuilder JAR path") - healthCmd.Flags().BoolVar(&healthAnalyzer, "analyzer", false, "Show only the analyzer JAR path") - healthCmd.Flags().BoolVar(&healthRules, "rules", false, "Show only the built-in rules path (downloads on demand)") - healthCmd.Flags().BoolVar(&healthRuntime, "runtime", false, "Show only the Java runtime path") + healthCmd.Flags().BoolVar(&healthAutobuilder, "autobuilder", false, "Print only the autobuilder JAR path") + healthCmd.Flags().BoolVar(&healthAnalyzer, "analyzer", false, "Print only the analyzer JAR path") + healthCmd.Flags().BoolVar(&healthRules, "rules", false, "Print only the built-in rules path, downloading rules if needed") + healthCmd.Flags().BoolVar(&healthRuntime, "runtime", false, "Print only the Java runtime path") } func runHealth() { diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 77bd6eb82..e4dbb5f64 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -145,14 +145,11 @@ func addScanFlags(cmd *cobra.Command) { cmd.Flags().StringVar(&ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)") cmd.Flags().StringVar(&ScanLogFile, "log-file", "", "Path to the log file (default: /logs/.log)") - cmd.Flags().StringArrayVar(&PassthroughApproximations, "passthrough-approximations", nil, "passThrough approximation YAML file or directory of them (OVERRIDE mode, repeatable)") - _ = cmd.Flags().MarkHidden("passthrough-approximations") + cmd.Flags().StringArrayVar(&PassthroughApproximations, "passthrough-approximations", nil, "Pass-through approximation YAML file or directory (repeatable)") - cmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Directory of compiled approximation class files or .java sources (repeatable)") - _ = cmd.Flags().MarkHidden("dataflow-approximations") + cmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Dataflow approximation class directory or Java source directory (repeatable)") - cmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write dropped-external-methods.yaml and approximated-external-methods.yaml next to the SARIF report") - _ = cmd.Flags().MarkHidden("track-external-methods") + cmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write external-method coverage files next to the SARIF report") } // currentScanBuilder returns a builder pre-populated with the user's current scan flags. diff --git a/cli/cmd/test.go b/cli/cmd/test.go index 9e47bf593..81b9130fc 100644 --- a/cli/cmd/test.go +++ b/cli/cmd/test.go @@ -7,20 +7,20 @@ import ( // testCmd groups the rule/approximation authoring, testing, and debugging tools (experimental). var testCmd = &cobra.Command{ Use: "test", - Short: "Author, test, and debug rules and approximations (experimental)", - Long: `Utilities for the rule and approximation test-driven loop: scaffold a test project, run tests against annotated samples, and trace fact reachability when a rule misbehaves (experimental)`, + Short: "Create and run rule and approximation tests (experimental)", + Long: `Experimental tools for creating test projects, running annotated rule and approximation tests, and debugging rule reachability.`, } // testRuleCmd groups the rule-authoring subcommands (init/run/reachability). var testRuleCmd = &cobra.Command{ Use: "rule", - Short: "Scaffold, test, and debug detection rules", + Short: "Create, run, and debug detection-rule tests", } // testApproximationCmd groups the approximation-authoring subcommands (init/run). var testApproximationCmd = &cobra.Command{ Use: "approximation", - Short: "Scaffold and test dataflow approximations", + Short: "Create and run dataflow-approximation tests", } func init() { diff --git a/cli/cmd/test_approximation_run.go b/cli/cmd/test_approximation_run.go index 7c266cccc..6d7b40292 100644 --- a/cli/cmd/test_approximation_run.go +++ b/cli/cmd/test_approximation_run.go @@ -17,14 +17,14 @@ var ( var testApproximationRunCmd = &cobra.Command{ Use: "run ", - Short: "Run rule tests against annotated test samples with approximations applied", - Long: `Run rule tests against annotated test samples with the given approximations applied. + Short: "Run dataflow approximation tests on a compiled project model", + Long: `Run annotated samples with the supplied dataflow approximations applied. -The fixed source->sink harness rule is applied automatically; samples reference it as +A built-in source-to-sink harness rule is applied automatically; positive samples reference it as ` + "`@PositiveRuleSample(value = \"approximation-rule.yaml\", id = \"approximation-rule\")`" + `. Exit codes: - 0 All rule tests passed + 0 All approximation tests passed 1 General failure (configuration or infrastructure error) 252 Unhandled analyzer exception 253 Out of memory (try increasing --max-memory) @@ -55,8 +55,8 @@ Exit codes: func init() { testApproximationCmd.AddCommand(testApproximationRunCmd) - testApproximationRunCmd.Flags().StringVarP(&testApproxOutputDir, "output", "o", "", "Output directory for test results (test-result.json)") - testApproximationRunCmd.Flags().DurationVar(&testApproxTimeout, "timeout", 600*time.Second, "Timeout for analysis") - testApproximationRunCmd.Flags().StringVar(&testApproxMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") - testApproximationRunCmd.Flags().StringArrayVar(&testApproxDataflow, "dataflow-approximations", nil, "Directory of compiled approximation class files or .java sources (repeatable)") + testApproximationRunCmd.Flags().StringVarP(&testApproxOutputDir, "output", "o", "", "Directory for test-result.json and test-results.sarif") + testApproximationRunCmd.Flags().DurationVar(&testApproxTimeout, "timeout", 600*time.Second, "Analysis timeout") + testApproximationRunCmd.Flags().StringVar(&testApproxMaxMemory, "max-memory", "8G", "Maximum analyzer heap size (e.g., 8G)") + testApproximationRunCmd.Flags().StringArrayVar(&testApproxDataflow, "dataflow-approximations", nil, "Dataflow approximation class directory or Java source directory (repeatable)") } diff --git a/cli/cmd/test_init.go b/cli/cmd/test_init.go index fa080f003..478597e61 100644 --- a/cli/cmd/test_init.go +++ b/cli/cmd/test_init.go @@ -21,21 +21,19 @@ var initRuleSourcesOnly bool var testRuleInitCmd = &cobra.Command{ Use: "init ", - Short: "Bootstrap rule test projects (sinks and/or sources) with the generic Taint marker", - Long: `Creates the rule test projects under : a 'sinks' project (a package's sink -lib rules tested against the generic Taint source) and a 'sources' project (a package's source -lib rules tested against the generic Taint sink). Pass --sinks-only or --sources-only for a -package that has only one side. + Short: "Create rule test projects with source and sink harnesses", + Long: `Create one or two Gradle test projects under . The sinks +project tests sink rules against a generic Taint source; the sources project +tests source rules against a generic Taint sink. Use --sinks-only or +--sources-only when only one project is needed. Each project includes: - build.gradle.kts with compile-only dependencies, settings.gradle.kts - libs/opentaint-sast-test-util.jar (provides @PositiveRuleSample and @NegativeRuleSample) - src/main/java/test/ with Taint.java (the generic source()/sink()) for test sample sources - - test-rules/java/lib/test/generic-{source,sink}.yaml — the marker lib rules an agent refs - from a test join; these and the test join live only here, never in .opentaint/rules, so - they never reach the main project scan + - test-rules/java/lib/test/generic-{source,sink}.yaml marker rules for test-only joins -Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, +Use --dependency to add compile-only Maven dependencies for the samples.`, Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { if initRuleSinksOnly && initRuleSourcesOnly { @@ -60,20 +58,20 @@ Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, var testApproximationInitCmd = &cobra.Command{ Use: "init ", - Short: "Bootstrap a dataflow approximation test project with the fixed Taint source/sink and rule", - Long: `Creates a minimal Gradle project structure for testing OpenTaint dataflow approximations. + Short: "Create a dataflow approximation test project", + Long: `Create a minimal Gradle project for testing OpenTaint dataflow approximations. The project includes: - build.gradle.kts with compile-only dependencies - settings.gradle.kts - libs/opentaint-sast-test-util.jar (provides @PositiveRuleSample and @NegativeRuleSample annotations) - - approximation-rule.yaml, the fixed source->sink rule the samples are checked against + - approximation-rule.yaml, the fixed source-to-sink rule the samples are checked against - src/main/java/test/ with Taint.java (the fixed source() and sink()) for test sample sources -The approximation under test is NOT part of this project: it lives in its own unit folder -(.opentaint/approximations/) and is applied at test time via --dataflow-approximations. +The approximation under test is supplied separately at test time with +--dataflow-approximations. -Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, +Use --dependency to add compile-only Maven dependencies for the samples.`, Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { bootstrapTestProject(args[0], "approximation-test-project", initApproxProjectDeps) @@ -87,15 +85,15 @@ Use --dependency to add Maven dependencies (e.g., servlet-api, Spring Web).`, func init() { testRuleCmd.AddCommand(testRuleInitCmd) testRuleInitCmd.Flags().StringArrayVar(&initRuleProjectDeps, "dependency", nil, - "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") + "Compile-only Maven dependency coordinates for generated samples (repeatable)") testRuleInitCmd.Flags().BoolVar(&initRuleSinksOnly, "sinks-only", false, - "Scaffold only the sinks test project (a package with no sources)") + "Create only the sinks test project") testRuleInitCmd.Flags().BoolVar(&initRuleSourcesOnly, "sources-only", false, - "Scaffold only the sources test project (a package with no sinks)") + "Create only the sources test project") testApproximationCmd.AddCommand(testApproximationInitCmd) testApproximationInitCmd.Flags().StringArrayVar(&initApproxProjectDeps, "dependency", nil, - "Maven dependency coordinates to add (e.g., 'javax.servlet:javax.servlet-api:4.0.1')") + "Compile-only Maven dependency coordinates for generated samples (repeatable)") } // bootstrapTestProject creates the shared Gradle layout (dirs, test-util JAR, build files) diff --git a/cli/cmd/test_rule_reachability.go b/cli/cmd/test_rule_reachability.go index 4cc6c09bf..952ea0b69 100644 --- a/cli/cmd/test_rule_reachability.go +++ b/cli/cmd/test_rule_reachability.go @@ -8,21 +8,22 @@ var reachabilityEntryPoint string var testRuleReachabilityCmd = &cobra.Command{ Use: "reachability [source-path]", - Short: "Trace fact reachability for a single rule (why it does or does not fire)", - Long: `This command scans the project for one rule and writes a sibling SARIF report with fact-reachability info to debug why the rule does or does not fire + Short: "Trace why a rule can or cannot reach its facts", + Long: `Scan a project with one rule and write a sibling SARIF report with +fact-reachability details. Use this to debug why a rule does or does not fire. Arguments: - rule-id - Full rule ID, e.g. security/SqlInjection.yaml:tainted-sql-from-http (required) + rule-id - Full rule ID, e.g. security/SqlInjection.yaml:tainted-sql-from-http source-path - Path to the project sources (default: current directory) -The rule's library source/sink dependencies (its join refs) are collected and analyzed automatically. +Referenced library source and sink rules are collected and analyzed automatically. The fact-reachability report is written next to the main SARIF as debug-ifds-fact-reachability.sarif. -Use --entry-points to seed the analysis at a specific method while tracing reachability: - Non-Spring: RESTRICTS the entry-point set to this method only. - Spring: ADDS this method to Spring's auto-discovered entry-point set (the set can't be narrowed on Spring). -The value is '*' for all methods or a method FQN like com.example.Class#method. +Use --entry-points to start analysis from a specific method while tracing reachability. +The value is '*' for all methods or a fully qualified method such as com.example.Class#method. +For non-Spring projects this restricts the entry-point set. For Spring projects it adds to +the auto-discovered entry points because Spring entry points cannot be narrowed. Use --project-model to scan a pre-compiled project model instead of compiling from sources. `, @@ -50,5 +51,5 @@ func init() { testRuleCmd.AddCommand(testRuleReachabilityCmd) addScanFlags(testRuleReachabilityCmd) testRuleReachabilityCmd.Flags().StringVar(&reachabilityEntryPoint, "entry-points", "", - "Seed analysis at this method ('*' or FQN like com.example.Class#method); restricts on non-Spring, adds on Spring") + "Start from '*' or a fully qualified method such as com.example.Class#method") } diff --git a/cli/cmd/test_rule_run.go b/cli/cmd/test_rule_run.go index 6f477eff2..2b51570e8 100644 --- a/cli/cmd/test_rule_run.go +++ b/cli/cmd/test_rule_run.go @@ -25,8 +25,9 @@ var ( var testRuleRunCmd = &cobra.Command{ Use: "run ", - Short: "Run rule tests against annotated test samples", - Long: `Run rule tests against annotated test samples in the given project model. + Short: "Run detection-rule tests on a compiled project model", + Long: `Run detection rules against samples annotated with @PositiveRuleSample and +@NegativeRuleSample in the compiled project model. Exit codes: 0 All rule tests passed @@ -188,11 +189,11 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { func init() { testRuleCmd.AddCommand(testRuleRunCmd) - testRuleRunCmd.Flags().StringArrayVar(&testRulesRuleset, "ruleset", nil, "Additional ruleset path (repeatable)") - testRuleRunCmd.Flags().StringVarP(&testRulesOutputDir, "output", "o", "", "Output directory for test results (test-result.json)") - testRuleRunCmd.Flags().DurationVar(&testRulesTimeout, "timeout", 600*time.Second, "Timeout for analysis") - testRuleRunCmd.Flags().StringVar(&testRulesMaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 8G)") - testRuleRunCmd.Flags().StringArrayVar(&testRulesRuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") - testRuleRunCmd.Flags().StringArrayVar(&testRulesDataflow, "dataflow-approximations", nil, "Directory of compiled approximation class files or .java sources (repeatable)") - testRuleRunCmd.Flags().StringArrayVar(&testRulesPassthrough, "passthrough-approximations", nil, "passThrough approximation YAML file or directory of them (repeatable)") + testRuleRunCmd.Flags().StringArrayVar(&testRulesRuleset, "ruleset", nil, "Ruleset file or directory to test (repeatable)") + testRuleRunCmd.Flags().StringVarP(&testRulesOutputDir, "output", "o", "", "Directory for test-result.json and test-results.sarif") + testRuleRunCmd.Flags().DurationVar(&testRulesTimeout, "timeout", 600*time.Second, "Analysis timeout") + testRuleRunCmd.Flags().StringVar(&testRulesMaxMemory, "max-memory", "8G", "Maximum analyzer heap size (e.g., 8G)") + testRuleRunCmd.Flags().StringArrayVar(&testRulesRuleID, "rule-id", nil, "Run only rules with this ID (repeatable)") + testRuleRunCmd.Flags().StringArrayVar(&testRulesDataflow, "dataflow-approximations", nil, "Dataflow approximation class directory or Java source directory (repeatable)") + testRuleRunCmd.Flags().StringArrayVar(&testRulesPassthrough, "passthrough-approximations", nil, "Pass-through approximation YAML file or directory (repeatable)") } diff --git a/docs/usage.md b/docs/usage.md index f6794f987..a805bee0f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -80,9 +80,9 @@ Use [CodeChecker](https://github.com/Ericsson/codechecker) for advanced result m | `opentaint compile` | Build project model separately from scanning | | `opentaint project` | Create project model from precompiled JARs/classes | | `opentaint summary` | View SARIF analysis results | -| `opentaint health` | Print resolved dependency paths for the analyzer, autobuilder, rules, and Java runtime | -| `opentaint test rule` | Scaffold, test, and debug detection rules | -| `opentaint test approximation` | Scaffold and test dataflow approximations | +| `opentaint health` | Show resolved paths for the analyzer, autobuilder, rules, and Java runtime | +| `opentaint test rule` | Create, run, and debug detection-rule tests | +| `opentaint test approximation` | Create and run dataflow-approximation tests | | `opentaint pull` | Download analyzer dependencies | | `opentaint update` | Update to latest version | | `opentaint prune` | Remove stale downloaded artifacts and cached models | @@ -105,21 +105,21 @@ On the first run, the compiled project model is cached in `~/.opentaint/cache/`. | `--dry-run` | Validate inputs and show what would run without compiling or scanning | | `--log-file` | Path to the log file (default: `/logs/.log`) | -#### Agent and rule-authoring flags +#### Rule-authoring flags -These flags support custom rule development and AI-agent workflows: +These experimental flags support custom rules and approximations: | Flag | Description | |------|-------------| -| `--track-external-methods` | Write `dropped-external-methods.yaml` and `approximated-external-methods.yaml` next to the SARIF report | +| `--track-external-methods` | Write external-method coverage files next to the SARIF report | | `--passthrough-approximations` | Apply pass-through approximation YAML files or directories (repeatable) | -| `--dataflow-approximations` | Apply compiled dataflow approximation classes or Java source directories (repeatable) | +| `--dataflow-approximations` | Apply dataflow approximation classes or Java source directories (repeatable) | Use external-method tracking when a scan may miss flows through library methods. The dropped-methods file shows where taint was killed because no model was available; the approximated-methods file shows methods already covered by built-in or custom models. ### opentaint health -Print the on-disk paths OpenTaint resolves for its dependencies: +Show the on-disk paths OpenTaint uses for its dependencies: ```bash opentaint health @@ -127,14 +127,14 @@ opentaint health --rules opentaint health --analyzer ``` -With no flags, `health` prints the autobuilder, analyzer, built-in rules, and Java runtime. With a single component flag, it prints only the bare path, which is useful for scripts and agents. +With no flags, `health` shows the autobuilder, analyzer, built-in rules, and Java runtime. With a single component flag, it prints only the bare path, which is useful for scripts. | Flag | Description | |------|-------------| -| `--autobuilder` | Show only the autobuilder JAR path | -| `--analyzer` | Show only the analyzer JAR path | -| `--rules` | Show only the built-in rules path, downloading rules on demand | -| `--runtime` | Show only the Java runtime path | +| `--autobuilder` | Print only the autobuilder JAR path | +| `--analyzer` | Print only the analyzer JAR path | +| `--rules` | Print only the built-in rules path, downloading rules if needed | +| `--runtime` | Print only the Java runtime path | ### opentaint test @@ -151,9 +151,9 @@ opentaint test rule reachability java/security/my-rule.yaml:my-rule --project-mo | Command | Description | |---------|-------------| -| `opentaint test rule init ` | Bootstrap source and sink test projects with annotated sample support | -| `opentaint test rule run ` | Run rules against annotated positive and negative samples | -| `opentaint test rule reachability [source-path]` | Trace fact reachability for a single rule and its referenced library rules | +| `opentaint test rule init ` | Create source and sink test projects with annotated sample support | +| `opentaint test rule run ` | Run detection-rule tests on a compiled project model | +| `opentaint test rule reachability [source-path]` | Trace why a rule can or cannot reach its facts | #### Approximation tests @@ -166,8 +166,8 @@ opentaint test approximation run .opentaint/test-compiled/my-approximation \ | Command | Description | |---------|-------------| -| `opentaint test approximation init ` | Bootstrap a test project with the fixed `Taint.source()` to `Taint.sink(...)` harness | -| `opentaint test approximation run ` | Run annotated samples with dataflow approximations applied | +| `opentaint test approximation init ` | Create a test project with a fixed `Taint.source()` to `Taint.sink(...)` harness | +| `opentaint test approximation run ` | Run dataflow approximation tests on a compiled project model | Rule and approximation test runs write `test-result.json` and `test-results.sarif` to the selected output directory. From a278b1fd2cca7db136e570180d0e07d9d5f3da30 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Tue, 9 Jun 2026 00:42:31 +0200 Subject: [PATCH 23/54] docs: clarify appsec-agent description --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6634e753b..b270f7a0a 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ OpenTaint includes agent skills that turn static analysis into an end-to-end app npx skills add https://github.com/seqra/opentaint ``` -The `appsec-agent` skill orchestrates a full JVM project assessment: build the project, run OpenTaint, model missing library data flows, triage findings, and optionally generate dynamic proof-of-concept checks for confirmed vulnerabilities. +The `appsec-agent` skill orchestrates a full project assessment: build the project, run OpenTaint, discover the attack surface, add targeted rules, model missing library data flows, triage findings, and optionally generate dynamic proof-of-concept checks for confirmed vulnerabilities. Included skills cover the common security-analysis loop: From 808c2050da00ef5f37be4793d3ecdb7f8d464bea Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Tue, 9 Jun 2026 15:55:34 +0200 Subject: [PATCH 24/54] refactor(cli): resolve artifact jar paths via one override-aware resolver Model the configured jar_path override as a first-class field on ArtifactDef and resolve it through a single utils.ResolveJarPath helper, replacing the duplicated per-artifact resolveXJarPath functions. health, scan, compile and the autobuilder project flow now honor the override consistently and skip the download when it is set; runAutobuilder reuses ensureAutobuilderAvailable instead of duplicating the resolve+download block. Also fix the reachability help example to a real rule id (security/sqli.yaml:sql-injection) and ignore /install/. --- .gitignore | 1 + cli/cmd/compile.go | 10 +++++----- cli/cmd/health.go | 4 ++-- cli/cmd/health_test.go | 29 ++++++++++++++++++++++++++++ cli/cmd/project.go | 8 +------- cli/cmd/scan.go | 10 +++++----- cli/cmd/test_rule_reachability.go | 2 +- cli/internal/globals/artifacts.go | 3 +++ cli/internal/utils/opentaint_home.go | 14 ++++++++------ 9 files changed, 55 insertions(+), 26 deletions(-) create mode 100644 cli/cmd/health_test.go diff --git a/.gitignore b/.gitignore index 2d3487922..8ec53bdf4 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ config.local.* **/.gradle **/build +/install/ core/**/bin/ # Ignore all hidden files and directories diff --git a/cli/cmd/compile.go b/cli/cmd/compile.go index 531dbff3b..2d2f61dce 100644 --- a/cli/cmd/compile.go +++ b/cli/cmd/compile.go @@ -120,14 +120,14 @@ func init() { } func ensureAutobuilderAvailable() (string, error) { - if globals.Config.Autobuilder.JarPath != "" { - return globals.Config.Autobuilder.JarPath, nil - } - - autobuilderJarPath, err := utils.GetAutobuilderJarPath(globals.Config.Autobuilder.Version) + def := globals.ArtifactByKind("autobuilder") + autobuilderJarPath, err := utils.ResolveJarPath(def) if err != nil { return "", fmt.Errorf("failed to construct path to the autobuilder: %w", err) } + if def.Override != "" { + return autobuilderJarPath, nil + } if err = ensureArtifactAvailable("autobuilder", globals.Config.Autobuilder.Version, autobuilderJarPath, func() error { return utils.DownloadGithubReleaseAsset(globals.Config.Owner, globals.Config.Repo, globals.Config.Autobuilder.Version, globals.AutobuilderAssetName, autobuilderJarPath, globals.Config.Github.Token, globals.Config.SkipVerify, out) diff --git a/cli/cmd/health.go b/cli/cmd/health.go index 1a64488ed..d9414abd6 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -104,10 +104,10 @@ func runHealth() { func resolveHealthComponent(key string) healthComponent { switch key { case "autobuilder": - path, err := utils.GetAutobuilderJarPath(globals.Config.Autobuilder.Version) + path, err := utils.ResolveJarPath(globals.ArtifactByKind("autobuilder")) return healthComponent{"Autobuilder", globals.Config.Autobuilder.Version, path, err == nil && utils.PathExists(path)} case "analyzer": - path, err := utils.GetAnalyzerJarPath(globals.Config.Analyzer.Version) + path, err := utils.ResolveJarPath(globals.ArtifactByKind("analyzer")) return healthComponent{"Analyzer", globals.Config.Analyzer.Version, path, err == nil && utils.PathExists(path)} case "rules": return resolveRulesComponent() diff --git a/cli/cmd/health_test.go b/cli/cmd/health_test.go new file mode 100644 index 000000000..dca28c3b3 --- /dev/null +++ b/cli/cmd/health_test.go @@ -0,0 +1,29 @@ +package cmd + +import ( + "testing" + + "github.com/seqra/opentaint/internal/globals" +) + +func TestResolveHealthComponentUsesAnalyzerJarOverride(t *testing.T) { + orig := globals.Config.Analyzer.JarPath + t.Cleanup(func() { globals.Config.Analyzer.JarPath = orig }) + globals.Config.Analyzer.JarPath = "/tmp/custom-analyzer.jar" + + c := resolveHealthComponent("analyzer") + if c.path != globals.Config.Analyzer.JarPath { + t.Fatalf("health analyzer path = %q, want override %q", c.path, globals.Config.Analyzer.JarPath) + } +} + +func TestResolveHealthComponentUsesAutobuilderJarOverride(t *testing.T) { + orig := globals.Config.Autobuilder.JarPath + t.Cleanup(func() { globals.Config.Autobuilder.JarPath = orig }) + globals.Config.Autobuilder.JarPath = "/tmp/custom-autobuilder.jar" + + c := resolveHealthComponent("autobuilder") + if c.path != globals.Config.Autobuilder.JarPath { + t.Fatalf("health autobuilder path = %q, want override %q", c.path, globals.Config.Autobuilder.JarPath) + } +} diff --git a/cli/cmd/project.go b/cli/cmd/project.go index 9b7564ad0..124111c1f 100644 --- a/cli/cmd/project.go +++ b/cli/cmd/project.go @@ -121,14 +121,8 @@ func (c *JavaAutobuilderConfig) validate() error { } func (c *JavaAutobuilderConfig) runAutobuilder() error { - autobuilderJarPath, err := utils.GetAutobuilderJarPath(globals.Config.Autobuilder.Version) + autobuilderJarPath, err := ensureAutobuilderAvailable() if err != nil { - return fmt.Errorf("failed to construct path to the autobuilder: %w", err) - } - - if err = ensureArtifactAvailable("autobuilder", globals.Config.Autobuilder.Version, autobuilderJarPath, func() error { - return utils.DownloadGithubReleaseAsset(globals.Config.Owner, globals.Config.Repo, globals.Config.Autobuilder.Version, globals.AutobuilderAssetName, autobuilderJarPath, globals.Config.Github.Token, globals.Config.SkipVerify, out) - }); err != nil { return err } diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index e4dbb5f64..c7133f5d0 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -603,14 +603,14 @@ func setupSemgrepRuleLoadTrace() string { } func ensureAnalyzerAvailable() (string, error) { - if globals.Config.Analyzer.JarPath != "" { - return globals.Config.Analyzer.JarPath, nil - } - - analyzerJarPath, err := utils.GetAnalyzerJarPath(globals.Config.Analyzer.Version) + def := globals.ArtifactByKind("analyzer") + analyzerJarPath, err := utils.ResolveJarPath(def) if err != nil { return "", fmt.Errorf("failed to construct path to the analyzer: %w", err) } + if def.Override != "" { + return analyzerJarPath, nil + } if err := ensureArtifactAvailable("analyzer", globals.Config.Analyzer.Version, analyzerJarPath, func() error { return utils.DownloadGithubReleaseAsset(globals.Config.Owner, globals.Config.Repo, globals.Config.Analyzer.Version, globals.AnalyzerAssetName, analyzerJarPath, globals.Config.Github.Token, globals.Config.SkipVerify, out) diff --git a/cli/cmd/test_rule_reachability.go b/cli/cmd/test_rule_reachability.go index 952ea0b69..3dce48cd4 100644 --- a/cli/cmd/test_rule_reachability.go +++ b/cli/cmd/test_rule_reachability.go @@ -13,7 +13,7 @@ var testRuleReachabilityCmd = &cobra.Command{ fact-reachability details. Use this to debug why a rule does or does not fire. Arguments: - rule-id - Full rule ID, e.g. security/SqlInjection.yaml:tainted-sql-from-http + rule-id - Full rule ID, e.g. security/sqli.yaml:sql-injection source-path - Path to the project sources (default: current directory) Referenced library source and sink rules are collected and analyzed automatically. diff --git a/cli/internal/globals/artifacts.go b/cli/internal/globals/artifacts.go index 1bdef8c6c..fb33d4781 100644 --- a/cli/internal/globals/artifacts.go +++ b/cli/internal/globals/artifacts.go @@ -12,6 +12,7 @@ type ArtifactDef struct { CacheSuffix string // cache filename suffix (".jar", "") BindVersion string // compile-time bind version Version string // user-configured version + Override string // explicit jar path override; bypasses version resolution and download Unpack bool // unpack tar.gz; also implies dir-based cache entry } @@ -49,6 +50,7 @@ func Artifacts() []ArtifactDef { CacheSuffix: ".jar", BindVersion: AutobuilderBindVersion, Version: Config.Autobuilder.Version, + Override: Config.Autobuilder.JarPath, }, { Name: "Analyzer", @@ -59,6 +61,7 @@ func Artifacts() []ArtifactDef { CacheSuffix: ".jar", BindVersion: AnalyzerBindVersion, Version: Config.Analyzer.Version, + Override: Config.Analyzer.JarPath, }, { Name: "Rules", diff --git a/cli/internal/utils/opentaint_home.go b/cli/internal/utils/opentaint_home.go index dd66af1e2..b3cbee5fc 100644 --- a/cli/internal/utils/opentaint_home.go +++ b/cli/internal/utils/opentaint_home.go @@ -202,12 +202,14 @@ func resolveArtifactPath(def globals.ArtifactDef) (string, error) { return path, err } -func GetAutobuilderJarPath(version string) (string, error) { - return resolveArtifactPath(globals.ArtifactByKind("autobuilder").WithVersion(version)) -} - -func GetAnalyzerJarPath(version string) (string, error) { - return resolveArtifactPath(globals.ArtifactByKind("analyzer").WithVersion(version)) +// ResolveJarPath resolves an artifact's jar path, honoring an explicit override +// (which bypasses version-based resolution) and otherwise falling back to the +// versioned artifact path. +func ResolveJarPath(def globals.ArtifactDef) (string, error) { + if def.Override != "" { + return def.Override, nil + } + return resolveArtifactPath(def) } func GetRulesPath(version string) (string, error) { From 9a7083ba528bf0bb29d274c690f5404785ed15df Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Tue, 9 Jun 2026 16:47:06 +0200 Subject: [PATCH 25/54] refactor(cli): extract ScanConfig and remove reachability's global coupling Introduce a ScanConfig struct holding the per-invocation scan inputs; cobra flags bind to a single scanFlags instance and runScan reads everything from the passed config instead of package globals. `test rule reachability` now builds an explicit ScanConfig via reachabilityScanConfig and runs the shared pipeline, replacing the previous pattern of mutating scan's package-level vars and delegating to scanCmd.Run. The shared source-path/--project-model preflight moves into prepareScanConfig so both entry points reuse it. Rename the prior scanConfig type (resolved mode/model/cache-lock) to scanPlan to avoid colliding with the new input struct, and add unit tests covering the reachability preset construction. --- cli/cmd/scan.go | 229 +++++++++++++------------ cli/cmd/test_rule_reachability.go | 31 ++-- cli/cmd/test_rule_reachability_test.go | 40 +++++ 3 files changed, 183 insertions(+), 117 deletions(-) create mode 100644 cli/cmd/test_rule_reachability_test.go diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index c7133f5d0..c0d2090e8 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -24,24 +24,33 @@ import ( "github.com/seqra/opentaint/internal/utils/log" ) -var ( - UserProjectPath string - ProjectModelPath string - SarifReportPath string - SemgrepCompatibilitySarif bool - Severity []string - Ruleset []string - DryRunScan bool - Recompile bool - ScanLogFile string - RuleID []string - PassthroughApproximations []string - DataflowApproximations []string - TrackExternalMethods bool +// ScanConfig holds the per-invocation inputs for a scan. The scan command +// populates it from its flags; sibling commands such as `test rule reachability` +// construct it directly with preset overrides instead of mutating shared +// package state. +type ScanConfig struct { + UserProjectPath string + ProjectModelPath string + SarifReportPath string + SemgrepCompatibilitySarif bool + Severity []string + Ruleset []string + DryRun bool + Recompile bool + LogFile string + RuleID []string + PassthroughApproximations []string + DataflowApproximations []string + TrackExternalMethods bool + DebugFactReachabilitySarif bool DebugRunAnalysisOnSelectedEntryPoints string - expandRuleRefs bool -) + ExpandRuleRefs bool +} + +// scanFlags is the ScanConfig bound to the scan command's flags. Read it only +// from a command's Run; pass an explicit ScanConfig everywhere else. +var scanFlags ScanConfig type RulesetType struct { Path string @@ -71,8 +80,9 @@ func (m ScanMode) String() string { } } -// scanConfig holds the resolved paths and flags for a scan invocation. -type scanConfig struct { +// scanPlan holds the resolved compilation/cache plan for a scan invocation, +// derived from a ScanConfig and the on-disk model cache. +type scanPlan struct { mode ScanMode absProjectModel string // absolute path to the project model (always the cache dir when projectCachePath is set) projectCachePath string // cache dir for this project (empty for explicit model / dry-run) @@ -94,24 +104,30 @@ Use --project-model to scan a pre-compiled project model instead of compiling fr `, Annotations: map[string]string{"PrintConfig": "true"}, Run: func(cmd *cobra.Command, args []string) { - if len(args) > 0 && ProjectModelPath != "" { - out.Error("Cannot use both a source path argument and --project-model flag") - suggest("Use either a source path or --project-model", - utils.NewScanCommand("").Build()+"\n "+utils.NewScanCommand("").WithProjectModel("").Build()) - os.Exit(1) - } - if Recompile && ProjectModelPath != "" { - out.Fatalf("Cannot use --recompile with --project-model; the flag only applies when compiling from sources") - } - if len(args) > 0 { - UserProjectPath = args[0] - } else { - UserProjectPath = "." - } - scan(cmd) + runScan(cmd, prepareScanConfig(scanFlags, args)) }, } +// prepareScanConfig validates the source-path-vs-model invariants shared by +// every scan entry point and resolves the project path argument into cfg. +func prepareScanConfig(cfg ScanConfig, args []string) ScanConfig { + if len(args) > 0 && cfg.ProjectModelPath != "" { + out.Error("Cannot use both a source path argument and --project-model flag") + suggest("Use either a source path or --project-model", + utils.NewScanCommand("").Build()+"\n "+utils.NewScanCommand("").WithProjectModel("").Build()) + os.Exit(1) + } + if cfg.Recompile && cfg.ProjectModelPath != "" { + out.Fatalf("Cannot use --recompile with --project-model; the flag only applies when compiling from sources") + } + if len(args) > 0 { + cfg.UserProjectPath = args[0] + } else { + cfg.UserProjectPath = "." + } + return cfg +} + func init() { rootCmd.AddCommand(scanCmd) addScanFlags(scanCmd) @@ -122,57 +138,57 @@ func init() { // that `test rule reachability` can omit it (it takes the rule ID // positionally and supports only one rule at a time). func addRuleIDFlag(cmd *cobra.Command) { - cmd.Flags().StringArrayVar(&RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") + cmd.Flags().StringArrayVar(&scanFlags.RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") } func addScanFlags(cmd *cobra.Command) { cmd.Flags().DurationVarP(&globals.Config.Scan.Timeout, "timeout", "t", 900*time.Second, "Timeout for analysis") _ = viper.BindPFlag("scan.timeout", cmd.Flags().Lookup("timeout")) - cmd.Flags().StringArrayVar(&Ruleset, "ruleset", []string{"builtin"}, "YAML rules file, directory of YAML rules files ending in .yml or .yaml, or `builtin` to scan with built-in rules") + cmd.Flags().StringArrayVar(&scanFlags.Ruleset, "ruleset", []string{"builtin"}, "YAML rules file, directory of YAML rules files ending in .yml or .yaml, or `builtin` to scan with built-in rules") _ = viper.BindPFlag("scan.ruleset", cmd.Flags().Lookup("ruleset")) - cmd.Flags().BoolVar(&SemgrepCompatibilitySarif, "semgrep-compatibility-sarif", true, "Use Semgrep compatible ruleId") - cmd.Flags().StringVarP(&SarifReportPath, "output", "o", "", "Path to the SARIF-report output file") + cmd.Flags().BoolVar(&scanFlags.SemgrepCompatibilitySarif, "semgrep-compatibility-sarif", true, "Use Semgrep compatible ruleId") + cmd.Flags().StringVarP(&scanFlags.SarifReportPath, "output", "o", "", "Path to the SARIF-report output file") - cmd.Flags().StringArrayVar(&Severity, "severity", []string{"warning", "error"}, "Report findings only from rules matching the supplied severity level. By default only warning and error rules are run (note, warning, error)") + cmd.Flags().StringArrayVar(&scanFlags.Severity, "severity", []string{"warning", "error"}, "Report findings only from rules matching the supplied severity level. By default only warning and error rules are run (note, warning, error)") cmd.Flags().StringVar(&globals.Config.Scan.MaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 1024m, 8G, 81920k, 83886080)") _ = viper.BindPFlag("scan.max_memory", cmd.Flags().Lookup("max-memory")) cmd.Flags().Int64Var(&globals.Config.Scan.CodeFlowLimit, "code-flow-limit", 0, "Maximum number of code flows to include in the report (0 = unlimited)") _ = viper.BindPFlag("scan.code_flow_limit", cmd.Flags().Lookup("code-flow-limit")) - cmd.Flags().BoolVar(&DryRunScan, "dry-run", false, "Validate inputs and show what would run without compiling or scanning") - cmd.Flags().BoolVar(&Recompile, "recompile", false, "Force recompilation even if a cached project model exists") - cmd.Flags().StringVar(&ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)") - cmd.Flags().StringVar(&ScanLogFile, "log-file", "", "Path to the log file (default: /logs/.log)") + cmd.Flags().BoolVar(&scanFlags.DryRun, "dry-run", false, "Validate inputs and show what would run without compiling or scanning") + cmd.Flags().BoolVar(&scanFlags.Recompile, "recompile", false, "Force recompilation even if a cached project model exists") + cmd.Flags().StringVar(&scanFlags.ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)") + cmd.Flags().StringVar(&scanFlags.LogFile, "log-file", "", "Path to the log file (default: /logs/.log)") - cmd.Flags().StringArrayVar(&PassthroughApproximations, "passthrough-approximations", nil, "Pass-through approximation YAML file or directory (repeatable)") + cmd.Flags().StringArrayVar(&scanFlags.PassthroughApproximations, "passthrough-approximations", nil, "Pass-through approximation YAML file or directory (repeatable)") - cmd.Flags().StringArrayVar(&DataflowApproximations, "dataflow-approximations", nil, "Dataflow approximation class directory or Java source directory (repeatable)") + cmd.Flags().StringArrayVar(&scanFlags.DataflowApproximations, "dataflow-approximations", nil, "Dataflow approximation class directory or Java source directory (repeatable)") - cmd.Flags().BoolVar(&TrackExternalMethods, "track-external-methods", false, "Write external-method coverage files next to the SARIF report") + cmd.Flags().BoolVar(&scanFlags.TrackExternalMethods, "track-external-methods", false, "Write external-method coverage files next to the SARIF report") } // currentScanBuilder returns a builder pre-populated with the user's current scan flags. // All scan command suggestions should use this as the base to ensure that adding a new // flag in one place automatically propagates to every suggestion. -func currentScanBuilder(sourcePath string) *utils.OpentaintCommandBuilder { +func currentScanBuilder(cfg ScanConfig, sourcePath string) *utils.OpentaintCommandBuilder { return utils.NewScanCommand(sourcePath). - WithOutput(SarifReportPath). + WithOutput(cfg.SarifReportPath). WithTimeout(globals.Config.Scan.Timeout). - WithRuleset(Ruleset). - WithSemgrepCompatibility(SemgrepCompatibilitySarif) + WithRuleset(cfg.Ruleset). + WithSemgrepCompatibility(cfg.SemgrepCompatibilitySarif) } // dockerScanSuggestion builds the "try Docker-based scan" fallback hint. -func dockerScanSuggestion(projectRoot, sarifReportPath string) output.Suggestion { +func dockerScanSuggestion(cfg ScanConfig, projectRoot, sarifReportPath string) output.Suggestion { return output.Suggestion{ Description: dockerFallbackHintPrefix + "scan:", - Command: utils.BuildScanCommandWithDocker(currentScanBuilder(""), projectRoot, sarifReportPath, Ruleset), + Command: utils.BuildScanCommandWithDocker(currentScanBuilder(cfg, ""), projectRoot, sarifReportPath, cfg.Ruleset), } } -func scan(cmd *cobra.Command) { - userProjectPath := filepath.Clean(UserProjectPath) +func runScan(cmd *cobra.Command, cfg ScanConfig) { + userProjectPath := filepath.Clean(cfg.UserProjectPath) absUserProjectRoot := log.AbsPathOrExit(userProjectPath, "project path") if !utils.IsSupportedArch() { @@ -180,33 +196,33 @@ func scan(cmd *cobra.Command) { } // When compiling from sources, validate the source folder looks like a Java/Kotlin project - if ProjectModelPath == "" { + if cfg.ProjectModelPath == "" { if err := validation.ValidateSourceProject(absUserProjectRoot); err != nil { if validation.IsProjectModel(absUserProjectRoot) { out.ErrorErr(err) - suggest("Use --project-model to scan a pre-compiled model", currentScanBuilder("").WithProjectModel(absUserProjectRoot).Build()) + suggest("Use --project-model to scan a pre-compiled model", currentScanBuilder(cfg, "").WithProjectModel(absUserProjectRoot).Build()) os.Exit(1) } out.FatalErr(err) } } - cfg := resolveScanConfig(absUserProjectRoot) + plan := resolveScanPlan(cfg, absUserProjectRoot) defer func() { - if cfg.cacheLock != nil { - cfg.cacheLock.Unlock() + if plan.cacheLock != nil { + plan.cacheLock.Unlock() } }() // Activate logging - if !DryRunScan { - activateLoggingForProject(ScanLogFile, absUserProjectRoot) + if !cfg.DryRun { + activateLoggingForProject(cfg.LogFile, absUserProjectRoot) } - absProjectModelPath := cfg.absProjectModel + absProjectModelPath := plan.absProjectModel var absRuleSetPaths []RulesetType - var userRuleSetPath = Ruleset + var userRuleSetPath = cfg.Ruleset for _, ruleset := range userRuleSetPath { switch ruleset { @@ -225,8 +241,8 @@ func scan(cmd *cobra.Command) { } var absSarifReportPath string - if SarifReportPath != "" { - absSarifReportPath = log.AbsPathOrExit(SarifReportPath, "output") + if cfg.SarifReportPath != "" { + absSarifReportPath = log.AbsPathOrExit(cfg.SarifReportPath, "output") } else { absSarifReportPath = utils.DefaultSarifReportPath(absProjectModelPath) } @@ -237,7 +253,7 @@ func scan(cmd *cobra.Command) { localSemanticVersion := version.GetVersion() var sourceRoot string - if !cfg.needsCompilation { + if !plan.needsCompilation { if parsedSourceRoot, err := project.GetSourceRoot(absProjectModelPath); err != nil { out.Fatalf("Failed to parse sourceRoot from project.yaml: %v", err) } else { @@ -250,14 +266,14 @@ func scan(cmd *cobra.Command) { uriBase := fmt.Sprintf("%s%s", sourceRoot, string(filepath.Separator)) var absSemgrepRuleLoadTracePath string - if DryRunScan { + if cfg.DryRun { absSemgrepRuleLoadTracePath = filepath.Join(os.TempDir(), dryRunRuleLoadTraceFileName) } else { absSemgrepRuleLoadTracePath = setupSemgrepRuleLoadTrace() } // Display scan information in tree format - printScanInfo(cmd, cfg, absSemgrepRuleLoadTracePath, absUserProjectRoot, absRuleSetPaths, localVersion) + printScanInfo(cmd, plan, absSemgrepRuleLoadTracePath, absUserProjectRoot, absRuleSetPaths, localVersion) var nonBuiltinRulesetPaths []string for _, r := range absRuleSetPaths { @@ -266,12 +282,12 @@ func scan(cmd *cobra.Command) { } } - maxMemory, err := validation.ValidateScanInputs(absUserProjectRoot, absProjectModelPath, absSarifReportPath, nonBuiltinRulesetPaths, Severity, globals.Config.Scan.MaxMemory, cfg.mode == Scan) + maxMemory, err := validation.ValidateScanInputs(absUserProjectRoot, absProjectModelPath, absSarifReportPath, nonBuiltinRulesetPaths, cfg.Severity, globals.Config.Scan.MaxMemory, plan.mode == Scan) if err != nil { out.Fatalf("Input validation failed: %s", err) } - if DryRunScan { + if cfg.DryRun { runDryRun("Compilation and analysis") return } @@ -288,7 +304,7 @@ func scan(cmd *cobra.Command) { } } - if cfg.needsCompilation { + if plan.needsCompilation { autobuilderJarPath, err := ensureAutobuilderAvailable() if err != nil { out.Fatalf("Native compile preparation failed: %s", err) @@ -304,30 +320,30 @@ func scan(cmd *cobra.Command) { } // Wipe any residue from a prior crashed compile before writing new output. - if cfg.projectCachePath != "" { - if err := os.RemoveAll(cfg.absProjectModel); err != nil { + if plan.projectCachePath != "" { + if err := os.RemoveAll(plan.absProjectModel); err != nil { out.Fatalf("Failed to prepare cache directory: %s", err) } } if err := out.RunWithSpinner("Compiling project model", func() error { - return compile(absUserProjectRoot, cfg.absProjectModel, autobuilderJarPath, compileJavaRunner) + return compile(absUserProjectRoot, plan.absProjectModel, autobuilderJarPath, compileJavaRunner) }); err != nil { - if cfg.projectCachePath != "" { - _ = os.RemoveAll(cfg.absProjectModel) + if plan.projectCachePath != "" { + _ = os.RemoveAll(plan.absProjectModel) } - failWith(1, "Native compile has failed: "+err.Error(), dockerScanSuggestion(absUserProjectRoot, absSarifReportPath)) + failWith(1, "Native compile has failed: "+err.Error(), dockerScanSuggestion(cfg, absUserProjectRoot, absSarifReportPath)) } out.Blank() // Mark the cache as valid, then downgrade to a reader so other scans // can run the analyzer against the freshly-compiled model in parallel. - if cfg.projectCachePath != "" { - if err := utils.MarkCompileComplete(cfg.projectCachePath); err != nil { - _ = os.RemoveAll(cfg.absProjectModel) + if plan.projectCachePath != "" { + if err := utils.MarkCompileComplete(plan.projectCachePath); err != nil { + _ = os.RemoveAll(plan.absProjectModel) out.Fatalf("Failed to mark model complete: %s", err) } - if err := cfg.cacheLock.Downgrade(); err != nil { + if err := plan.cacheLock.Downgrade(); err != nil { output.LogInfof("Cache lock downgrade failed, continuing under exclusive: %v", err) } } @@ -353,10 +369,10 @@ func scan(cmd *cobra.Command) { SetIfdsAnalysisTimeout(int64(globals.Config.Scan.Timeout / time.Second)). SetRuleLoadTracePath(absSemgrepRuleLoadTracePath). EnablePartialFingerprints() - if SemgrepCompatibilitySarif { + if cfg.SemgrepCompatibilitySarif { nativeBuilder.EnableSemgrepCompatibility() } - for _, severity := range Severity { + for _, severity := range cfg.Severity { nativeBuilder.AddSeverity(severity) } for _, absRuleSetPath := range absRuleSetPaths { @@ -365,28 +381,29 @@ func scan(cmd *cobra.Command) { if maxMemory != "" { nativeBuilder.SetMaxMemory(maxMemory) } - if expandRuleRefs && len(RuleID) > 0 { + ruleIDs := cfg.RuleID + if cfg.ExpandRuleRefs && len(ruleIDs) > 0 { var roots []string for _, r := range absRuleSetPaths { roots = append(roots, r.Path) } - RuleID = rules.ExpandRuleIDs(RuleID, roots) + ruleIDs = rules.ExpandRuleIDs(ruleIDs, roots) } - for _, ruleID := range RuleID { + for _, ruleID := range ruleIDs { nativeBuilder.AddRuleID(ruleID) } - for _, passthrough := range PassthroughApproximations { + for _, passthrough := range cfg.PassthroughApproximations { absPassthrough := log.AbsPathOrExit(passthrough, "passthrough-approximations") nativeBuilder.AddPassthroughApproximations(absPassthrough) } - if TrackExternalMethods { + if cfg.TrackExternalMethods { nativeBuilder.SetTrackExternalMethods(true) } - if DebugFactReachabilitySarif { + if cfg.DebugFactReachabilitySarif { nativeBuilder.EnableDebugFactReachabilitySarif() } - if DebugRunAnalysisOnSelectedEntryPoints != "" { - nativeBuilder.SetDebugRunAnalysisOnSelectedEntryPoints(DebugRunAnalysisOnSelectedEntryPoints) + if cfg.DebugRunAnalysisOnSelectedEntryPoints != "" { + nativeBuilder.SetDebugRunAnalysisOnSelectedEntryPoints(cfg.DebugRunAnalysisOnSelectedEntryPoints) } analyzerJarPath, err := ensureAnalyzerAvailable() @@ -396,7 +413,7 @@ func scan(cmd *cobra.Command) { nativeBuilder.SetJarPath(analyzerJarPath) // Process --dataflow-approximations: auto-compile .java sources if needed - for _, approxPath := range DataflowApproximations { + for _, approxPath := range cfg.DataflowApproximations { absApproxPath := log.AbsPathOrExit(approxPath, "dataflow-approximations") compiledPath, compileErr := compileApproximationsIfNeeded(absApproxPath, analyzerJarPath, absProjectModelPath) if compileErr != nil { @@ -483,17 +500,17 @@ func scan(cmd *cobra.Command) { } } -func resolveScanConfig(absUserProjectRoot string) scanConfig { - if ProjectModelPath != "" { - return scanConfig{ +func resolveScanPlan(cfg ScanConfig, absUserProjectRoot string) scanPlan { + if cfg.ProjectModelPath != "" { + return scanPlan{ mode: Scan, - absProjectModel: log.AbsPathOrExit(filepath.Clean(ProjectModelPath), "project model path"), + absProjectModel: log.AbsPathOrExit(filepath.Clean(cfg.ProjectModelPath), "project model path"), } } - if DryRunScan { + if cfg.DryRun { dryRunPath := filepath.Join(os.TempDir(), dryRunScanProjectModelPath) - return scanConfig{ + return scanPlan{ mode: CompileAndScan, absProjectModel: dryRunPath, needsCompilation: true, @@ -510,12 +527,12 @@ func resolveScanConfig(absUserProjectRoot string) scanConfig { // Fast path: if we're not forced to recompile and the cache looks // complete on disk, take a shared lock and re-check under the lock. - if !Recompile && utils.IsCachedModelComplete(projectCachePath) { + if !cfg.Recompile && utils.IsCachedModelComplete(projectCachePath) { sharedLock, sharedErr := utils.TryLockShared(cacheLockPath) if sharedErr == nil { if utils.IsCachedModelComplete(projectCachePath) { output.LogDebugf("Reusing cached model at: %s", cachedModelPath) - return scanConfig{ + return scanPlan{ mode: Scan, absProjectModel: cachedModelPath, projectCachePath: projectCachePath, @@ -552,7 +569,7 @@ func resolveScanConfig(absUserProjectRoot string) scanConfig { out.Fatalf("Failed to acquire cache lock: %s", lockErr) } - return scanConfig{ + return scanPlan{ mode: CompileAndScan, absProjectModel: cachedModelPath, projectCachePath: projectCachePath, @@ -561,21 +578,21 @@ func resolveScanConfig(absUserProjectRoot string) scanConfig { } } -func printScanInfo(cmd *cobra.Command, cfg scanConfig, absSemgrepRuleLoadTracePath string, absUserProjectRoot string, absRuleSetPaths []RulesetType, analyzerVersion string) { - sb := out.Section(cfg.mode.String()) +func printScanInfo(cmd *cobra.Command, plan scanPlan, absSemgrepRuleLoadTracePath string, absUserProjectRoot string, absRuleSetPaths []RulesetType, analyzerVersion string) { + sb := out.Section(plan.mode.String()) addConfigFields(cmd, sb) if globals.Config.Output.Debug { sb.FieldNode("Rule load trace", absSemgrepRuleLoadTracePath) sb.Line() } - if cfg.needsCompilation { + if plan.needsCompilation { sb.FieldNode("Project", absUserProjectRoot) - if cfg.projectCachePath != "" { - sb.FieldNode("Project model", cfg.absProjectModel) + if plan.projectCachePath != "" { + sb.FieldNode("Project model", plan.absProjectModel) } sb.FieldNode("Autobuilder", utils.ArtifactDisplayVersion(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)) } else { - sb.FieldNode("Project model", cfg.absProjectModel) + sb.FieldNode("Project model", plan.absProjectModel) } sb.FieldNode("Analyzer", analyzerVersion) for _, r := range absRuleSetPaths { diff --git a/cli/cmd/test_rule_reachability.go b/cli/cmd/test_rule_reachability.go index 3dce48cd4..f8a918695 100644 --- a/cli/cmd/test_rule_reachability.go +++ b/cli/cmd/test_rule_reachability.go @@ -30,23 +30,32 @@ Use --project-model to scan a pre-compiled project model instead of compiling fr Annotations: map[string]string{"PrintConfig": "true"}, Args: cobra.RangeArgs(1, 2), Run: func(cmd *cobra.Command, args []string) { - // `reachability` is `scan` with a forced flag preset. scan reads its - // inputs from package-level vars (bound to its cobra flags), so we set - // those vars here and delegate to scanCmd.Run rather than duplicating - // the scan pipeline. This relies on shared mutable state: it assumes a - // single, non-concurrent command invocation per process (the CLI - // contract), and any new scan input must be wired through the same vars. - RuleID = []string{args[0]} - DebugFactReachabilitySarif = true - expandRuleRefs = true + // `reachability` is `scan` with a forced preset. It shares the scan + // flags (so scanFlags carries the parsed --ruleset, --output, ... ) but + // builds an explicit ScanConfig with the reachability overrides applied + // instead of mutating shared state, then runs the same scan pipeline. if reachabilityEntryPoint != "" { out.Warn("on Spring projects this method is added to the auto-discovered entry points, not used to restrict them") - DebugRunAnalysisOnSelectedEntryPoints = reachabilityEntryPoint } - scanCmd.Run(scanCmd, args[1:]) + cfg := reachabilityScanConfig(scanFlags, args[0], reachabilityEntryPoint) + runScan(cmd, prepareScanConfig(cfg, args[1:])) }, } +// reachabilityScanConfig returns the scan config for a `test rule reachability` +// run: the base scan flags with the reachability-specific presets applied +// (single rule, fact-reachability SARIF, rule-ref expansion, optional +// entry-point restriction). +func reachabilityScanConfig(base ScanConfig, ruleID, entryPoint string) ScanConfig { + base.RuleID = []string{ruleID} + base.DebugFactReachabilitySarif = true + base.ExpandRuleRefs = true + if entryPoint != "" { + base.DebugRunAnalysisOnSelectedEntryPoints = entryPoint + } + return base +} + func init() { testRuleCmd.AddCommand(testRuleReachabilityCmd) addScanFlags(testRuleReachabilityCmd) diff --git a/cli/cmd/test_rule_reachability_test.go b/cli/cmd/test_rule_reachability_test.go new file mode 100644 index 000000000..4d59ef35d --- /dev/null +++ b/cli/cmd/test_rule_reachability_test.go @@ -0,0 +1,40 @@ +package cmd + +import "testing" + +func TestReachabilityScanConfigAppliesPresets(t *testing.T) { + base := ScanConfig{ + Ruleset: []string{"builtin"}, + Severity: []string{"warning"}, + } + + cfg := reachabilityScanConfig(base, "security/sqli.yaml:sql-injection", "com.example.A#m") + + if len(cfg.RuleID) != 1 || cfg.RuleID[0] != "security/sqli.yaml:sql-injection" { + t.Fatalf("RuleID = %v, want [security/sqli.yaml:sql-injection]", cfg.RuleID) + } + if !cfg.DebugFactReachabilitySarif { + t.Error("DebugFactReachabilitySarif = false, want true") + } + if !cfg.ExpandRuleRefs { + t.Error("ExpandRuleRefs = false, want true") + } + if cfg.DebugRunAnalysisOnSelectedEntryPoints != "com.example.A#m" { + t.Errorf("entry points = %q, want com.example.A#m", cfg.DebugRunAnalysisOnSelectedEntryPoints) + } + + // Base scan flags must be preserved, not clobbered by the preset. + if len(cfg.Ruleset) != 1 || cfg.Ruleset[0] != "builtin" { + t.Errorf("Ruleset = %v, want base [builtin]", cfg.Ruleset) + } + if len(cfg.Severity) != 1 || cfg.Severity[0] != "warning" { + t.Errorf("Severity = %v, want base [warning]", cfg.Severity) + } +} + +func TestReachabilityScanConfigOmitsEmptyEntryPoint(t *testing.T) { + cfg := reachabilityScanConfig(ScanConfig{}, "r", "") + if cfg.DebugRunAnalysisOnSelectedEntryPoints != "" { + t.Errorf("entry points = %q, want empty when no entry point given", cfg.DebugRunAnalysisOnSelectedEntryPoints) + } +} From 798d3c87e4461b7af523761d31120aab6e125657 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Tue, 9 Jun 2026 17:05:03 +0200 Subject: [PATCH 26/54] feat(cli): render health report as a nested tree like scan Match the scan command's section style: each dependency is its own tree node with the version and path as separate child nodes, instead of a flat "Key: version path" line. Reuses the same GroupItem primitive and FieldKey/FieldValue theme styling as scan's printScanInfo. The missing marker now hangs off the path node, and the single-flag scripting path (bare path on stdout) is unchanged. --- cli/cmd/health.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cli/cmd/health.go b/cli/cmd/health.go index d9414abd6..2468d439a 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -86,15 +86,18 @@ func runHealth() { } sb := out.Section("OpenTaint Health") + th := out.Theme() for _, c := range components { - value := c.path + node := out.GroupItem(th.FieldKey.Render(c.name + ":")) if c.version != "" { - value = shortVersion(c.version) + " " + c.path + node.Child(th.FieldValue.Render(shortVersion(c.version))) } + path := c.path if !c.present { - value += " " + out.Theme().Error.Render("missing") + path += " " + th.Error.Render("missing") } - sb.Field(c.name, value) + node.Child(th.FieldValue.Render(path)) + sb.Child(node) } sb.Render() } From fa2092afd5770f2976f1b83943d159f63669a009 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Tue, 9 Jun 2026 17:25:29 +0200 Subject: [PATCH 27/54] refactor(cli): share artifact version label across scan/compile/health MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add utils.ArtifactVersionShort, which is ArtifactDisplayVersion without the redundant "/" prefix (custom (...) is unchanged), and use it for every on-screen artifact node in scan, compile, and health so the kind is no longer shown twice (e.g. "Autobuilder: autobuilder/" -> "Autobuilder: "). The full prefixed version is still used for the SARIF tool version metadata. Drops printScanInfo's now-redundant analyzerVersion parameter. Reword the health runtime line: the Java source moves from a "· managed"/ "· " tail into an inline "(builtin)" / "(system)" parenthetical, and the doc comment explains that builtin is the JRE OpenTaint manages itself and system is a Java on the user's PATH. --- cli/cmd/compile.go | 2 +- cli/cmd/health.go | 36 ++++++++++++--------------- cli/cmd/health_test.go | 5 ++++ cli/cmd/scan.go | 10 ++++---- cli/internal/utils/display_version.go | 9 +++++++ 5 files changed, 36 insertions(+), 26 deletions(-) diff --git a/cli/cmd/compile.go b/cli/cmd/compile.go index 2d2f61dce..6a6cc739f 100644 --- a/cli/cmd/compile.go +++ b/cli/cmd/compile.go @@ -73,7 +73,7 @@ Arguments: } sb.FieldNode("Project", absProjectRoot). FieldNode("Output project model", absOutputProjectModelPath). - FieldNode("Autobuilder", utils.ArtifactDisplayVersion(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)). + FieldNode("Autobuilder", utils.ArtifactVersionShort(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)). Render() if DryRunCompile { diff --git a/cli/cmd/health.go b/cli/cmd/health.go index 2468d439a..340ab7792 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -4,7 +4,6 @@ import ( "fmt" "os" "strconv" - "strings" "github.com/seqra/opentaint/internal/globals" "github.com/seqra/opentaint/internal/utils" @@ -90,7 +89,7 @@ func runHealth() { for _, c := range components { node := out.GroupItem(th.FieldKey.Render(c.name + ":")) if c.version != "" { - node.Child(th.FieldValue.Render(shortVersion(c.version))) + node.Child(th.FieldValue.Render(c.version)) } path := c.path if !c.present { @@ -107,11 +106,15 @@ func runHealth() { func resolveHealthComponent(key string) healthComponent { switch key { case "autobuilder": - path, err := utils.ResolveJarPath(globals.ArtifactByKind("autobuilder")) - return healthComponent{"Autobuilder", globals.Config.Autobuilder.Version, path, err == nil && utils.PathExists(path)} + def := globals.ArtifactByKind("autobuilder") + path, err := utils.ResolveJarPath(def) + version := utils.ArtifactVersionShort(def, globals.Config.Autobuilder.JarPath) + return healthComponent{"Autobuilder", version, path, err == nil && utils.PathExists(path)} case "analyzer": - path, err := utils.ResolveJarPath(globals.ArtifactByKind("analyzer")) - return healthComponent{"Analyzer", globals.Config.Analyzer.Version, path, err == nil && utils.PathExists(path)} + def := globals.ArtifactByKind("analyzer") + path, err := utils.ResolveJarPath(def) + version := utils.ArtifactVersionShort(def, globals.Config.Analyzer.JarPath) + return healthComponent{"Analyzer", version, path, err == nil && utils.PathExists(path)} case "rules": return resolveRulesComponent() case "runtime": @@ -124,7 +127,7 @@ func resolveHealthComponent(key string) healthComponent { // resolveRulesComponent resolves the built-in rules directory, downloading it // on demand so `health --rules` replaces `dev rules-path`. func resolveRulesComponent() healthComponent { - c := healthComponent{name: "Rules", version: globals.Config.Rules.Version} + c := healthComponent{name: "Rules", version: utils.ArtifactVersionShort(globals.ArtifactByKind("rules"), "")} path, err := utils.GetRulesPath(globals.Config.Rules.Version) if err != nil { return c @@ -146,33 +149,26 @@ func resolveRulesComponent() healthComponent { return c } -// resolveRuntimeComponent reports the Java the analyzer uses: a managed JRE if -// present, otherwise system Java. +// resolveRuntimeComponent reports the Java the analyzer runs on, and where it +// comes from: "builtin" is the JRE OpenTaint manages itself (downloaded/bundled +// into its own install), "system" is a Java already on the user's PATH. func resolveRuntimeComponent() healthComponent { c := healthComponent{name: "Runtime"} if jre := utils.FindExistingJRE(utils.ManagedJRETiers()); jre != nil { c.path = utils.JavaBinaryPath(jre.Path) - c.version = "Java " + strconv.Itoa(globals.DefaultJavaVersion) + " · managed" + c.version = "Java " + strconv.Itoa(globals.DefaultJavaVersion) + " (builtin)" c.present = true return c } if sys := java.DetectSystemJava(); sys != nil { c.path = sys.Path - c.version = "Java " + sys.FullVersion + " · " + sys.Vendor + c.version = "Java " + sys.FullVersion + " (system)" c.present = true return c } - c.version = "Java " + strconv.Itoa(globals.DefaultJavaVersion) + c.version = "Java " + strconv.Itoa(globals.DefaultJavaVersion) + " (builtin)" if jre := utils.GetInstallJREPath(); jre != "" { c.path = utils.JavaBinaryPath(jre) } return c } - -// shortVersion strips the artifact-kind prefix (e.g. "rules/v0.1.1" → "v0.1.1"). -func shortVersion(v string) string { - if idx := strings.LastIndex(v, "/"); idx >= 0 { - return v[idx+1:] - } - return v -} diff --git a/cli/cmd/health_test.go b/cli/cmd/health_test.go index dca28c3b3..4d38d28f8 100644 --- a/cli/cmd/health_test.go +++ b/cli/cmd/health_test.go @@ -15,6 +15,11 @@ func TestResolveHealthComponentUsesAnalyzerJarOverride(t *testing.T) { if c.path != globals.Config.Analyzer.JarPath { t.Fatalf("health analyzer path = %q, want override %q", c.path, globals.Config.Analyzer.JarPath) } + // A jar-path override is a custom build, so health must report it as such + // (matching scan), not as the nominal configured version. + if want := "custom (" + globals.Config.Analyzer.JarPath + ")"; c.version != want { + t.Fatalf("health analyzer version = %q, want %q", c.version, want) + } } func TestResolveHealthComponentUsesAutobuilderJarOverride(t *testing.T) { diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index c0d2090e8..ac50ac8fb 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -273,7 +273,7 @@ func runScan(cmd *cobra.Command, cfg ScanConfig) { } // Display scan information in tree format - printScanInfo(cmd, plan, absSemgrepRuleLoadTracePath, absUserProjectRoot, absRuleSetPaths, localVersion) + printScanInfo(cmd, plan, absSemgrepRuleLoadTracePath, absUserProjectRoot, absRuleSetPaths) var nonBuiltinRulesetPaths []string for _, r := range absRuleSetPaths { @@ -578,7 +578,7 @@ func resolveScanPlan(cfg ScanConfig, absUserProjectRoot string) scanPlan { } } -func printScanInfo(cmd *cobra.Command, plan scanPlan, absSemgrepRuleLoadTracePath string, absUserProjectRoot string, absRuleSetPaths []RulesetType, analyzerVersion string) { +func printScanInfo(cmd *cobra.Command, plan scanPlan, absSemgrepRuleLoadTracePath string, absUserProjectRoot string, absRuleSetPaths []RulesetType) { sb := out.Section(plan.mode.String()) addConfigFields(cmd, sb) if globals.Config.Output.Debug { @@ -590,14 +590,14 @@ func printScanInfo(cmd *cobra.Command, plan scanPlan, absSemgrepRuleLoadTracePat if plan.projectCachePath != "" { sb.FieldNode("Project model", plan.absProjectModel) } - sb.FieldNode("Autobuilder", utils.ArtifactDisplayVersion(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)) + sb.FieldNode("Autobuilder", utils.ArtifactVersionShort(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)) } else { sb.FieldNode("Project model", plan.absProjectModel) } - sb.FieldNode("Analyzer", analyzerVersion) + sb.FieldNode("Analyzer", utils.ArtifactVersionShort(globals.ArtifactByKind("analyzer"), globals.Config.Analyzer.JarPath)) for _, r := range absRuleSetPaths { if r.Builtin { - sb.FieldNode("Bundled ruleset", utils.ArtifactDisplayVersion(globals.ArtifactByKind("rules"), "")) + sb.FieldNode("Bundled ruleset", utils.ArtifactVersionShort(globals.ArtifactByKind("rules"), "")) } else { sb.FieldNode("User ruleset", r.Path) } diff --git a/cli/internal/utils/display_version.go b/cli/internal/utils/display_version.go index 32e2e1a25..19e1bb602 100644 --- a/cli/internal/utils/display_version.go +++ b/cli/internal/utils/display_version.go @@ -2,10 +2,19 @@ package utils import ( "fmt" + "strings" "github.com/seqra/opentaint/internal/globals" ) +// ArtifactVersionShort is ArtifactDisplayVersion without the leading "/" +// prefix. Use it where the value is already labelled with the artifact kind +// (e.g. an "Analyzer:" tree node), so the kind isn't shown twice. The +// "custom (...)" form carries no such prefix and is returned unchanged. +func ArtifactVersionShort(def globals.ArtifactDef, jarPathOverride string) string { + return strings.TrimPrefix(ArtifactDisplayVersion(def, jarPathOverride), def.Kind()+"/") +} + func ArtifactDisplayVersion(def globals.ArtifactDef, jarPathOverride string) string { resolvedTier, resolvedPath := "", "" if jarPathOverride == "" { From 85b31a51b62d51741246f8c8559c382e258bdd76 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Tue, 9 Jun 2026 17:43:29 +0200 Subject: [PATCH 28/54] fix(cli): drop path duplication in health's custom artifact version ArtifactDisplayVersion embeds the jar path in the custom label ("custom ()"), which health then repeated on its own path node. Split the display helpers by context: ArtifactVersionWithPath keeps the path inline for single-line displays (scan, compile), while the new ArtifactVersion collapses a custom build to a bare "custom" for displays that show the path separately (health). The custom-vs-release decision is now a shared isCustomArtifact predicate. --- cli/cmd/compile.go | 2 +- cli/cmd/health.go | 6 +- cli/cmd/health_test.go | 9 +-- cli/cmd/scan.go | 6 +- cli/internal/utils/display_version.go | 69 ++++++++++++++-------- cli/internal/utils/display_version_test.go | 21 +++++++ 6 files changed, 79 insertions(+), 34 deletions(-) diff --git a/cli/cmd/compile.go b/cli/cmd/compile.go index 6a6cc739f..191eb4ba9 100644 --- a/cli/cmd/compile.go +++ b/cli/cmd/compile.go @@ -73,7 +73,7 @@ Arguments: } sb.FieldNode("Project", absProjectRoot). FieldNode("Output project model", absOutputProjectModelPath). - FieldNode("Autobuilder", utils.ArtifactVersionShort(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)). + FieldNode("Autobuilder", utils.ArtifactVersionWithPath(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)). Render() if DryRunCompile { diff --git a/cli/cmd/health.go b/cli/cmd/health.go index 340ab7792..b3f146898 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -108,12 +108,12 @@ func resolveHealthComponent(key string) healthComponent { case "autobuilder": def := globals.ArtifactByKind("autobuilder") path, err := utils.ResolveJarPath(def) - version := utils.ArtifactVersionShort(def, globals.Config.Autobuilder.JarPath) + version := utils.ArtifactVersion(def, globals.Config.Autobuilder.JarPath) return healthComponent{"Autobuilder", version, path, err == nil && utils.PathExists(path)} case "analyzer": def := globals.ArtifactByKind("analyzer") path, err := utils.ResolveJarPath(def) - version := utils.ArtifactVersionShort(def, globals.Config.Analyzer.JarPath) + version := utils.ArtifactVersion(def, globals.Config.Analyzer.JarPath) return healthComponent{"Analyzer", version, path, err == nil && utils.PathExists(path)} case "rules": return resolveRulesComponent() @@ -127,7 +127,7 @@ func resolveHealthComponent(key string) healthComponent { // resolveRulesComponent resolves the built-in rules directory, downloading it // on demand so `health --rules` replaces `dev rules-path`. func resolveRulesComponent() healthComponent { - c := healthComponent{name: "Rules", version: utils.ArtifactVersionShort(globals.ArtifactByKind("rules"), "")} + c := healthComponent{name: "Rules", version: utils.ArtifactVersion(globals.ArtifactByKind("rules"), "")} path, err := utils.GetRulesPath(globals.Config.Rules.Version) if err != nil { return c diff --git a/cli/cmd/health_test.go b/cli/cmd/health_test.go index 4d38d28f8..153c612b1 100644 --- a/cli/cmd/health_test.go +++ b/cli/cmd/health_test.go @@ -15,10 +15,11 @@ func TestResolveHealthComponentUsesAnalyzerJarOverride(t *testing.T) { if c.path != globals.Config.Analyzer.JarPath { t.Fatalf("health analyzer path = %q, want override %q", c.path, globals.Config.Analyzer.JarPath) } - // A jar-path override is a custom build, so health must report it as such - // (matching scan), not as the nominal configured version. - if want := "custom (" + globals.Config.Analyzer.JarPath + ")"; c.version != want { - t.Fatalf("health analyzer version = %q, want %q", c.version, want) + // A jar-path override is a custom build, so health reports the version as + // "custom" — bare, since the path is already shown on its own node (no + // duplication), unlike scan's single-line "custom ()". + if c.version != "custom" { + t.Fatalf("health analyzer version = %q, want %q", c.version, "custom") } } diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index ac50ac8fb..3ba6eda73 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -590,14 +590,14 @@ func printScanInfo(cmd *cobra.Command, plan scanPlan, absSemgrepRuleLoadTracePat if plan.projectCachePath != "" { sb.FieldNode("Project model", plan.absProjectModel) } - sb.FieldNode("Autobuilder", utils.ArtifactVersionShort(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)) + sb.FieldNode("Autobuilder", utils.ArtifactVersionWithPath(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)) } else { sb.FieldNode("Project model", plan.absProjectModel) } - sb.FieldNode("Analyzer", utils.ArtifactVersionShort(globals.ArtifactByKind("analyzer"), globals.Config.Analyzer.JarPath)) + sb.FieldNode("Analyzer", utils.ArtifactVersionWithPath(globals.ArtifactByKind("analyzer"), globals.Config.Analyzer.JarPath)) for _, r := range absRuleSetPaths { if r.Builtin { - sb.FieldNode("Bundled ruleset", utils.ArtifactVersionShort(globals.ArtifactByKind("rules"), "")) + sb.FieldNode("Bundled ruleset", utils.ArtifactVersionWithPath(globals.ArtifactByKind("rules"), "")) } else { sb.FieldNode("User ruleset", r.Path) } diff --git a/cli/internal/utils/display_version.go b/cli/internal/utils/display_version.go index 19e1bb602..d8815afa3 100644 --- a/cli/internal/utils/display_version.go +++ b/cli/internal/utils/display_version.go @@ -7,37 +7,60 @@ import ( "github.com/seqra/opentaint/internal/globals" ) -// ArtifactVersionShort is ArtifactDisplayVersion without the leading "/" -// prefix. Use it where the value is already labelled with the artifact kind -// (e.g. an "Analyzer:" tree node), so the kind isn't shown twice. The -// "custom (...)" form carries no such prefix and is returned unchanged. -func ArtifactVersionShort(def globals.ArtifactDef, jarPathOverride string) string { +// ArtifactDisplayVersion renders an artifact's full display label, keeping the +// "/" version prefix. Used for the SARIF tool version, where the prefix +// is part of the identifier. +func ArtifactDisplayVersion(def globals.ArtifactDef, jarPathOverride string) string { + tier, path := artifactResolution(def, jarPathOverride) + return displayVersion(def.Version, jarPathOverride, tier, path) +} + +// ArtifactVersionWithPath is the version with the redundant "/" prefix +// stripped, for a single-line display that has no separate path field (e.g. +// scan's "Analyzer:" node). A custom build keeps its jar path — "custom ()" +// — since that line is the only place the path appears. +func ArtifactVersionWithPath(def globals.ArtifactDef, jarPathOverride string) string { return strings.TrimPrefix(ArtifactDisplayVersion(def, jarPathOverride), def.Kind()+"/") } -func ArtifactDisplayVersion(def globals.ArtifactDef, jarPathOverride string) string { - resolvedTier, resolvedPath := "", "" +// ArtifactVersion is the version for a display that shows the resolved path on +// its own line (e.g. health's tree). A managed release yields the bare version; +// a custom build collapses to "custom", so the path isn't repeated. +func ArtifactVersion(def globals.ArtifactDef, jarPathOverride string) string { + tier, _ := artifactResolution(def, jarPathOverride) + if isCustomArtifact(def.Version, jarPathOverride, tier) { + return "custom" + } + return strings.TrimPrefix(def.Version, def.Kind()+"/") +} + +// artifactResolution resolves the artifact's tier and path, unless an explicit +// jar override is set (in which case neither is needed). +func artifactResolution(def globals.ArtifactDef, jarPathOverride string) (tier, path string) { if jarPathOverride == "" { - resolvedTier, resolvedPath, _ = resolveArtifactTier(def) + tier, path, _ = resolveArtifactTier(def) } - return displayVersion(def.Version, jarPathOverride, resolvedTier, resolvedPath) + return tier, path +} + +// isCustomArtifact reports whether the artifact is a custom build — an explicit +// jar override, a bundled build next to the binary (whose nominal version may +// not match its content), or an unpinned version — rather than a managed +// install/cache release. +func isCustomArtifact(version, overridePath, resolvedTier string) bool { + return overridePath != "" || resolvedTier == TierBundled || version == "" } -// displayVersion renders an artifact's display label: -// - an explicit jar-path override always wins -> custom () -// - resolved from the bundled tier (a user-controlled build next to the binary, -// whose nominal version may not match its actual content) -> custom () -// - an empty/unpinned version -> custom () -// - otherwise (a managed install/cache release) -> the version string +// displayVersion renders an artifact's label: a custom build as "custom ()" +// (the override path if set, otherwise the resolved path), and a managed release +// as its version string. func displayVersion(version, overridePath, resolvedTier, resolvedPath string) string { - if overridePath != "" { - return customLabel(overridePath) - } - if resolvedTier == TierBundled { - return customLabel(resolvedPath) - } - if version == "" { - return customLabel(resolvedPath) + if isCustomArtifact(version, overridePath, resolvedTier) { + path := overridePath + if path == "" { + path = resolvedPath + } + return customLabel(path) } return version } diff --git a/cli/internal/utils/display_version_test.go b/cli/internal/utils/display_version_test.go index 99c601a22..1ea4b8992 100644 --- a/cli/internal/utils/display_version_test.go +++ b/cli/internal/utils/display_version_test.go @@ -80,3 +80,24 @@ func TestArtifactDisplayVersion(t *testing.T) { t.Errorf("pinned case: got %q, want %q", got, "analyzer/2026.05.27.68ab20a") } } + +func TestArtifactVersionShortVariants(t *testing.T) { + analyzer := globals.ArtifactByKind("analyzer").WithVersion("analyzer/2026.05.27.68ab20a") + + // Pinned release: kind prefix stripped, identical for both helpers. + if got := ArtifactVersionWithPath(analyzer, ""); got != "2026.05.27.68ab20a" { + t.Errorf("WithPath pinned: got %q, want %q", got, "2026.05.27.68ab20a") + } + if got := ArtifactVersion(analyzer, ""); got != "2026.05.27.68ab20a" { + t.Errorf("bare pinned: got %q, want %q", got, "2026.05.27.68ab20a") + } + + // Custom (jar override): WithPath keeps the path (single-line display), + // bare collapses to "custom" (the path is shown separately, no dup). + if got := ArtifactVersionWithPath(analyzer, "/home/dev/analyzer.jar"); got != "custom (/home/dev/analyzer.jar)" { + t.Errorf("WithPath custom: got %q, want %q", got, "custom (/home/dev/analyzer.jar)") + } + if got := ArtifactVersion(analyzer, "/home/dev/analyzer.jar"); got != "custom" { + t.Errorf("bare custom: got %q, want %q", got, "custom") + } +} From 763d7ea1673794caf7ff815a71ca7c6545f4ae44 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Tue, 9 Jun 2026 17:43:54 +0200 Subject: [PATCH 29/54] style(cli): drop trailing colon from tree node labels The colon dangled awkwardly on a parent node whose value hangs on the line below. Remove it from the shared FieldNode primitive (and health's label), so scan, compile, and health tree nodes read "Autobuilder" rather than "Autobuilder:". The inline Field "key: value" form keeps its colon. --- cli/cmd/health.go | 2 +- cli/internal/output/output_test.go | 8 ++++---- cli/internal/output/section.go | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cli/cmd/health.go b/cli/cmd/health.go index b3f146898..c9a8d82a8 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -87,7 +87,7 @@ func runHealth() { sb := out.Section("OpenTaint Health") th := out.Theme() for _, c := range components { - node := out.GroupItem(th.FieldKey.Render(c.name + ":")) + node := out.GroupItem(th.FieldKey.Render(c.name)) if c.version != "" { node.Child(th.FieldValue.Render(c.version)) } diff --git a/cli/internal/output/output_test.go b/cli/internal/output/output_test.go index 4dedf2504..ab497b199 100644 --- a/cli/internal/output/output_test.go +++ b/cli/internal/output/output_test.go @@ -117,8 +117,8 @@ func TestSectionFieldNodeRendersValueAsLeaf(t *testing.T) { Render() got := buf.String() - if !strings.Contains(got, "Project:") { - t.Errorf("expected 'Project:' parent node, got %q", got) + if !strings.Contains(got, "Project") { + t.Errorf("expected 'Project' parent node, got %q", got) } if !strings.Contains(got, "/home/me/projects/hertzbeat") { t.Errorf("expected path value in output, got %q", got) @@ -126,7 +126,7 @@ func TestSectionFieldNodeRendersValueAsLeaf(t *testing.T) { keyLine := -1 valLine := -1 for i, line := range strings.Split(got, "\n") { - if strings.Contains(line, "Project:") { + if strings.Contains(line, "Project") { keyLine = i } if strings.Contains(line, "/home/me/projects/hertzbeat") { @@ -134,7 +134,7 @@ func TestSectionFieldNodeRendersValueAsLeaf(t *testing.T) { } } if keyLine == -1 || valLine == -1 || valLine != keyLine+1 { - t.Errorf("expected path on the line directly below 'Project:', got key=%d val=%d in %q", keyLine, valLine, got) + t.Errorf("expected path on the line directly below 'Project', got key=%d val=%d in %q", keyLine, valLine, got) } } diff --git a/cli/internal/output/section.go b/cli/internal/output/section.go index 6aa20ad77..6b19b6cb8 100644 --- a/cli/internal/output/section.go +++ b/cli/internal/output/section.go @@ -61,7 +61,7 @@ func (sb *SectionBuilder) StyledField(key string, value any, valueStyle lipgloss func (sb *SectionBuilder) FieldNode(key string, value any) *SectionBuilder { th := sb.printer.theme - root := th.FieldKey.Render(key + ":") + root := th.FieldKey.Render(key) leaf := th.FieldValue.Render(fmt.Sprint(value)) sb.items = append(sb.items, sb.printer.GroupItem(root, leaf)) return sb From 728563c6df44339d79393fb769a3f618b3567506 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Tue, 9 Jun 2026 18:55:27 +0200 Subject: [PATCH 30/54] refactor(cli): dedupe rule/approximation plumbing into shared helpers Collapse logic that had been copy-pasted across the scan and test commands into reusable libs, keeping the command layer thin: - utils.EnsureRulesPath centralizes the built-in rules resolve+download dance (was duplicated in scan, health, test rule run) - addDataflowApproximations / addPassthroughApproximations share the approximation-input loops between scan and test rule run - utils.CopyFile replaces an ad-hoc file copy in test init - testutil.ResolveJar moves the 4-tier test-util JAR lookup out of the cmd layer into its owning lib Also tidy up a few supportability issues surfaced in review: - drop the never-read analyzerError.message field and fold the duplicated print+return in classifyAnalyzerError into one path - gate scan's viper "scan.*" bindings behind a bindViper flag so the reachability alias no longer re-binds them and steal config precedence - normalize a stray 0755 to 0o755 Behavior-preserving except health now reports a rules-resolution error to stderr instead of swallowing it. Build, vet, and tests pass. --- cli/cmd/analyzer_exit.go | 21 ++++---- cli/cmd/analyzer_inputs.go | 28 ++++++++++ cli/cmd/health.go | 19 ++----- cli/cmd/scan.go | 44 ++++++++-------- cli/cmd/test_init.go | 83 ++---------------------------- cli/cmd/test_rule_reachability.go | 2 +- cli/cmd/test_rule_run.go | 30 ++--------- cli/internal/testutil/testutil.go | 44 ++++++++++++++++ cli/internal/utils/copy_file.go | 34 ++++++++++++ cli/internal/utils/ensure_rules.go | 28 ++++++++++ 10 files changed, 179 insertions(+), 154 deletions(-) create mode 100644 cli/cmd/analyzer_inputs.go create mode 100644 cli/internal/utils/copy_file.go create mode 100644 cli/internal/utils/ensure_rules.go diff --git a/cli/cmd/analyzer_exit.go b/cli/cmd/analyzer_exit.go index 8dd5a3cf4..d81095057 100644 --- a/cli/cmd/analyzer_exit.go +++ b/cli/cmd/analyzer_exit.go @@ -24,7 +24,6 @@ const ( // exitCode is the process exit code to forward to os.Exit. type analyzerError struct { exitCode int - message string } // analyzerExitMessage returns a human-readable description for a known @@ -44,25 +43,23 @@ func analyzerExitMessage(code int) string { } } -// classifyAnalyzerError converts a *JavaCommandError into an *analyzerError -// with a human-readable message. Returns nil when cmdErr is nil. +// classifyAnalyzerError prints a human-readable description of an analyzer +// failure and returns the *analyzerError carrying its exit code. Returns nil +// when cmdErr is nil. // -// The error message is printed immediately. The caller is responsible for -// eventually calling os.Exit with the returned exit code after performing -// any post-failure work (e.g. printing summaries). +// The message is printed immediately. The caller is responsible for eventually +// calling os.Exit with the returned exit code after performing any post-failure +// work (e.g. printing summaries). func classifyAnalyzerError(cmdErr *java.JavaCommandError) *analyzerError { if cmdErr == nil { return nil } code := cmdErr.ExitCode + formatted := fmt.Sprintf("Analysis failed with exit code %d", code) if msg := analyzerExitMessage(code); msg != "" { - formatted := fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg) - out.Error(formatted) - return &analyzerError{exitCode: code, message: formatted} + formatted = fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg) } - - formatted := fmt.Sprintf("Analysis failed with exit code %d", code) out.Error(formatted) - return &analyzerError{exitCode: code, message: formatted} + return &analyzerError{exitCode: code} } diff --git a/cli/cmd/analyzer_inputs.go b/cli/cmd/analyzer_inputs.go new file mode 100644 index 000000000..6328a6b62 --- /dev/null +++ b/cli/cmd/analyzer_inputs.go @@ -0,0 +1,28 @@ +package cmd + +import ( + "github.com/seqra/opentaint/internal/utils/log" +) + +// addDataflowApproximations resolves each --dataflow-approximations entry, +// auto-compiling a Java source directory into class files when needed, and +// registers the result on the builder. Shared by `scan` and the `test * run` +// commands so the two stay in lockstep. +func addDataflowApproximations(b *AnalyzerBuilder, paths []string, analyzerJarPath, projectModelDir string) { + for _, approxPath := range paths { + absApproxPath := log.AbsPathOrExit(approxPath, "dataflow-approximations") + compiledPath, err := compileApproximationsIfNeeded(absApproxPath, analyzerJarPath, projectModelDir) + if err != nil { + out.Fatalf("Approximation compilation failed: %s", err) + } + b.AddDataflowApproximations(compiledPath) + } +} + +// addPassthroughApproximations resolves each --passthrough-approximations entry +// to an absolute path and registers it on the builder. +func addPassthroughApproximations(b *AnalyzerBuilder, paths []string) { + for _, passthrough := range paths { + b.AddPassthroughApproximations(log.AbsPathOrExit(passthrough, "passthrough-approximations")) + } +} diff --git a/cli/cmd/health.go b/cli/cmd/health.go index c9a8d82a8..a2e0145d9 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -128,21 +128,12 @@ func resolveHealthComponent(key string) healthComponent { // on demand so `health --rules` replaces `dev rules-path`. func resolveRulesComponent() healthComponent { c := healthComponent{name: "Rules", version: utils.ArtifactVersion(globals.ArtifactByKind("rules"), "")} - path, err := utils.GetRulesPath(globals.Config.Rules.Version) - if err != nil { - return c - } + // EnsureRulesPath returns the expected path even on failure, so the report + // can still show where the rules belong, flagged as missing. + path, err := utils.EnsureRulesPath(out) c.path = path - if utils.PathExists(path) { - c.present = true - return c - } - if dlErr := utils.DownloadAndUnpackGithubReleaseAsset( - globals.Config.Owner, globals.Config.Repo, - globals.Config.Rules.Version, globals.RulesAssetName, - path, globals.Config.Github.Token, globals.Config.SkipVerify, out, - ); dlErr != nil { - fmt.Fprintf(os.Stderr, "Error downloading rules: %s\n", dlErr) + if err != nil { + fmt.Fprintf(os.Stderr, "Error resolving rules: %s\n", err) return c } c.present = utils.PathExists(path) diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 3ba6eda73..df4d4a44d 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -130,7 +130,7 @@ func prepareScanConfig(cfg ScanConfig, args []string) ScanConfig { func init() { rootCmd.AddCommand(scanCmd) - addScanFlags(scanCmd) + addScanFlags(scanCmd, true) addRuleIDFlag(scanCmd) } @@ -141,21 +141,34 @@ func addRuleIDFlag(cmd *cobra.Command) { cmd.Flags().StringArrayVar(&scanFlags.RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") } -func addScanFlags(cmd *cobra.Command) { +// addScanFlags registers the flags shared by `scan` and `test rule +// reachability`. Only the canonical `scan` command binds them to viper config +// keys (bindViper); the reachability alias shares the same scanFlags target but +// must not re-bind the global "scan.*" keys, or the last init() to run would +// silently steal config precedence from the command the user actually invoked. +func addScanFlags(cmd *cobra.Command, bindViper bool) { cmd.Flags().DurationVarP(&globals.Config.Scan.Timeout, "timeout", "t", 900*time.Second, "Timeout for analysis") - _ = viper.BindPFlag("scan.timeout", cmd.Flags().Lookup("timeout")) + if bindViper { + _ = viper.BindPFlag("scan.timeout", cmd.Flags().Lookup("timeout")) + } cmd.Flags().StringArrayVar(&scanFlags.Ruleset, "ruleset", []string{"builtin"}, "YAML rules file, directory of YAML rules files ending in .yml or .yaml, or `builtin` to scan with built-in rules") - _ = viper.BindPFlag("scan.ruleset", cmd.Flags().Lookup("ruleset")) + if bindViper { + _ = viper.BindPFlag("scan.ruleset", cmd.Flags().Lookup("ruleset")) + } cmd.Flags().BoolVar(&scanFlags.SemgrepCompatibilitySarif, "semgrep-compatibility-sarif", true, "Use Semgrep compatible ruleId") cmd.Flags().StringVarP(&scanFlags.SarifReportPath, "output", "o", "", "Path to the SARIF-report output file") cmd.Flags().StringArrayVar(&scanFlags.Severity, "severity", []string{"warning", "error"}, "Report findings only from rules matching the supplied severity level. By default only warning and error rules are run (note, warning, error)") cmd.Flags().StringVar(&globals.Config.Scan.MaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 1024m, 8G, 81920k, 83886080)") - _ = viper.BindPFlag("scan.max_memory", cmd.Flags().Lookup("max-memory")) + if bindViper { + _ = viper.BindPFlag("scan.max_memory", cmd.Flags().Lookup("max-memory")) + } cmd.Flags().Int64Var(&globals.Config.Scan.CodeFlowLimit, "code-flow-limit", 0, "Maximum number of code flows to include in the report (0 = unlimited)") - _ = viper.BindPFlag("scan.code_flow_limit", cmd.Flags().Lookup("code-flow-limit")) + if bindViper { + _ = viper.BindPFlag("scan.code_flow_limit", cmd.Flags().Lookup("code-flow-limit")) + } cmd.Flags().BoolVar(&scanFlags.DryRun, "dry-run", false, "Validate inputs and show what would run without compiling or scanning") cmd.Flags().BoolVar(&scanFlags.Recompile, "recompile", false, "Force recompilation even if a cached project model exists") cmd.Flags().StringVar(&scanFlags.ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)") @@ -296,10 +309,7 @@ func runScan(cmd *cobra.Command, cfg ScanConfig) { if !ruleSetPath.Builtin { continue } - if _, err := os.Stat(ruleSetPath.Path); err == nil { - continue - } - if err := utils.DownloadAndUnpackGithubReleaseAsset(globals.Config.Owner, globals.Config.Repo, globals.Config.Rules.Version, globals.RulesAssetName, ruleSetPath.Path, globals.Config.Github.Token, globals.Config.SkipVerify, out); err != nil { + if _, err := utils.EnsureRulesPath(out); err != nil { out.Fatalf("Unexpected error occurred while trying to download ruleset: %s", err) } } @@ -392,10 +402,7 @@ func runScan(cmd *cobra.Command, cfg ScanConfig) { for _, ruleID := range ruleIDs { nativeBuilder.AddRuleID(ruleID) } - for _, passthrough := range cfg.PassthroughApproximations { - absPassthrough := log.AbsPathOrExit(passthrough, "passthrough-approximations") - nativeBuilder.AddPassthroughApproximations(absPassthrough) - } + addPassthroughApproximations(nativeBuilder, cfg.PassthroughApproximations) if cfg.TrackExternalMethods { nativeBuilder.SetTrackExternalMethods(true) } @@ -413,14 +420,7 @@ func runScan(cmd *cobra.Command, cfg ScanConfig) { nativeBuilder.SetJarPath(analyzerJarPath) // Process --dataflow-approximations: auto-compile .java sources if needed - for _, approxPath := range cfg.DataflowApproximations { - absApproxPath := log.AbsPathOrExit(approxPath, "dataflow-approximations") - compiledPath, compileErr := compileApproximationsIfNeeded(absApproxPath, analyzerJarPath, absProjectModelPath) - if compileErr != nil { - out.Fatalf("Approximation compilation failed: %s", compileErr) - } - nativeBuilder.AddDataflowApproximations(compiledPath) - } + addDataflowApproximations(nativeBuilder, cfg.DataflowApproximations, analyzerJarPath, absProjectModelPath) analyzerJavaRunner := java.NewJavaRunner(). WithSkipVerify(globals.Config.SkipVerify). diff --git a/cli/cmd/test_init.go b/cli/cmd/test_init.go index 478597e61..0d41beca8 100644 --- a/cli/cmd/test_init.go +++ b/cli/cmd/test_init.go @@ -2,7 +2,6 @@ package cmd import ( "fmt" - "io" "os" "path/filepath" "strings" @@ -109,12 +108,12 @@ func bootstrapTestProject(outputDir, projectName string, dependencies []string) } } - testUtilJarSrc, err := resolveTestUtilJar() + testUtilJarSrc, err := testutil.ResolveJar() if err != nil { out.Fatalf("Failed to resolve test-util JAR: %s", err) } - testUtilJarDst := filepath.Join(outputDir, "libs", "opentaint-sast-test-util.jar") - if err := copyFile(testUtilJarSrc, testUtilJarDst); err != nil { + testUtilJarDst := filepath.Join(outputDir, "libs", testutil.JarName) + if err := utils.CopyFile(testUtilJarSrc, testUtilJarDst); err != nil { out.Fatalf("Failed to copy test-util JAR: %s", err) } @@ -127,82 +126,6 @@ func bootstrapTestProject(outputDir, projectName string, dependencies []string) } } -// resolveTestUtilJar finds the opentaint-sast-test-util.jar. -// Resolution order: -// 1. Bundled path next to binary: /lib/opentaint-sast-test-util.jar -// 2. Install path: ~/.opentaint/install/lib/opentaint-sast-test-util.jar -// 3. Dev build: /core/opentaint-sast-test-util/build/libs/opentaint-sast-test-util.jar -func resolveTestUtilJar() (string, error) { - const jarName = "opentaint-sast-test-util.jar" - - // Tier 1: Bundled next to binary - if libPath := utils.GetBundledLibPath(); libPath != "" { - candidate := filepath.Join(libPath, jarName) - if _, err := os.Stat(candidate); err == nil { - return candidate, nil - } - } - - // Tier 2: Install path - if libPath := utils.GetInstallLibPath(); libPath != "" { - candidate := filepath.Join(libPath, jarName) - if _, err := os.Stat(candidate); err == nil { - return candidate, nil - } - } - - // Tier 3: Dev build — walk up from exe dir to find core/opentaint-sast-test-util/build/libs/ - if exe, err := os.Executable(); err == nil { - exe, _ = filepath.EvalSymlinks(exe) - // exe is typically at cli/bin/opentaint, so repo root is ../../ - dir := filepath.Dir(exe) - for i := 0; i < 4; i++ { - candidate := filepath.Join(dir, "core", "opentaint-sast-test-util", "build", "libs", jarName) - if _, err := os.Stat(candidate); err == nil { - return candidate, nil - } - dir = filepath.Dir(dir) - } - } - - // Tier 4: Extract from embedded binary - if extracted, err := testutil.ExtractJar(); err == nil { - return extracted, nil - } - - return "", fmt.Errorf( - "%s not found; build it with 'cd core && ./gradlew :opentaint-sast-test-util:jar' or reinstall opentaint", - jarName, - ) -} - -func copyFile(src, dst string) error { - in, err := os.Open(src) - if err != nil { - return fmt.Errorf("open source: %w", err) - } - defer func() { _ = in.Close() }() - - if err := utils.EnsureParentDir(dst); err != nil { - return err - } - - outFile, err := os.Create(dst) - if err != nil { - return fmt.Errorf("create destination: %w", err) - } - - if _, err := io.Copy(outFile, in); err != nil { - _ = outFile.Close() - return fmt.Errorf("copy: %w", err) - } - - if err := outFile.Close(); err != nil { - return fmt.Errorf("close destination: %w", err) - } - return nil -} - func generateBuildGradle(outputDir string, dependencies []string) error { var sb strings.Builder sb.WriteString(`plugins { diff --git a/cli/cmd/test_rule_reachability.go b/cli/cmd/test_rule_reachability.go index f8a918695..f09275607 100644 --- a/cli/cmd/test_rule_reachability.go +++ b/cli/cmd/test_rule_reachability.go @@ -58,7 +58,7 @@ func reachabilityScanConfig(base ScanConfig, ruleID, entryPoint string) ScanConf func init() { testRuleCmd.AddCommand(testRuleReachabilityCmd) - addScanFlags(testRuleReachabilityCmd) + addScanFlags(testRuleReachabilityCmd, false) testRuleReachabilityCmd.Flags().StringVar(&reachabilityEntryPoint, "entry-points", "", "Start from '*' or a fully qualified method such as com.example.Class#method") } diff --git a/cli/cmd/test_rule_run.go b/cli/cmd/test_rule_run.go index 2b51570e8..d2a03bf82 100644 --- a/cli/cmd/test_rule_run.go +++ b/cli/cmd/test_rule_run.go @@ -91,25 +91,15 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { // The agent should always specify -o to control the output location. } else { outputDir = log.AbsPathOrExit(outputDir, "output") - if err := os.MkdirAll(outputDir, 0755); err != nil { + if err := os.MkdirAll(outputDir, 0o755); err != nil { out.Fatalf("Failed to create output directory: %s", err) } } // Ensure builtin rules are available - rulesPath, err := utils.GetRulesPath(globals.Config.Rules.Version) + rulesPath, err := utils.EnsureRulesPath(out) if err != nil { - out.Fatalf("Failed to resolve rules path: %s", err) - } - if _, err := os.Stat(rulesPath); os.IsNotExist(err) { - if dlErr := utils.DownloadAndUnpackGithubReleaseAsset( - globals.Config.Owner, globals.Config.Repo, - globals.Config.Rules.Version, globals.RulesAssetName, - rulesPath, globals.Config.Github.Token, - globals.Config.SkipVerify, out, - ); dlErr != nil { - out.Fatalf("Failed to download rules: %s", dlErr) - } + out.Fatalf("Failed to prepare built-in rules: %s", err) } timeoutSeconds := int64(opts.timeout / time.Second) @@ -147,18 +137,8 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { builder.SetJarPath(analyzerJarPath) // Auto-compile .java sources in a --dataflow-approximations dir, as `scan` does. - for _, approxPath := range opts.dataflowApprox { - absApproxPath := log.AbsPathOrExit(approxPath, "dataflow-approximations") - compiledPath, compileErr := compileApproximationsIfNeeded(absApproxPath, analyzerJarPath, projectPath) - if compileErr != nil { - out.Fatalf("Approximation compilation failed: %s", compileErr) - } - builder.AddDataflowApproximations(compiledPath) - } - for _, passthrough := range opts.passthroughApprox { - absPassthrough := log.AbsPathOrExit(passthrough, "passthrough-approximations") - builder.AddPassthroughApproximations(absPassthrough) - } + addDataflowApproximations(builder, opts.dataflowApprox, analyzerJarPath, projectPath) + addPassthroughApproximations(builder, opts.passthroughApprox) javaRunner := java.NewJavaRunner(). WithSkipVerify(globals.Config.SkipVerify). diff --git a/cli/internal/testutil/testutil.go b/cli/internal/testutil/testutil.go index 30c20262f..88d20d9f4 100644 --- a/cli/internal/testutil/testutil.go +++ b/cli/internal/testutil/testutil.go @@ -23,6 +23,50 @@ var jarFiles embed.FS // JarName is the filename of the test-util JAR. const JarName = "opentaint-sast-test-util.jar" +// ResolveJar locates the opentaint-sast-test-util.jar, checking, in order: +// 1. Bundled next to the binary: /lib/ +// 2. Managed install: ~/.opentaint/install/lib/ +// 3. Dev build: /core/opentaint-sast-test-util/build/libs/ +// 4. The copy embedded in this binary, extracted on demand. +func ResolveJar() (string, error) { + if libPath := utils.GetBundledLibPath(); libPath != "" { + candidate := filepath.Join(libPath, JarName) + if utils.PathExists(candidate) { + return candidate, nil + } + } + + if libPath := utils.GetInstallLibPath(); libPath != "" { + candidate := filepath.Join(libPath, JarName) + if utils.PathExists(candidate) { + return candidate, nil + } + } + + // Dev build: walk up from the exe dir (typically cli/bin/opentaint, so the + // repo root is a few levels up) to find core/.../build/libs/. + if exe, err := os.Executable(); err == nil { + exe, _ = filepath.EvalSymlinks(exe) + dir := filepath.Dir(exe) + for range 4 { + candidate := filepath.Join(dir, "core", "opentaint-sast-test-util", "build", "libs", JarName) + if utils.PathExists(candidate) { + return candidate, nil + } + dir = filepath.Dir(dir) + } + } + + if extracted, err := ExtractJar(); err == nil { + return extracted, nil + } + + return "", fmt.Errorf( + "%s not found; build it with 'cd core && ./gradlew :opentaint-sast-test-util:jar' or reinstall opentaint", + JarName, + ) +} + func contentHash(jarData []byte) string { h := sha256.Sum256(jarData) return hex.EncodeToString(h[:]) diff --git a/cli/internal/utils/copy_file.go b/cli/internal/utils/copy_file.go new file mode 100644 index 000000000..2aaefcae2 --- /dev/null +++ b/cli/internal/utils/copy_file.go @@ -0,0 +1,34 @@ +package utils + +import ( + "fmt" + "io" + "os" +) + +// CopyFile copies the file at src to dst, creating parent directories as +// needed. dst is truncated if it already exists. +func CopyFile(src, dst string) error { + in, err := os.Open(src) + if err != nil { + return fmt.Errorf("open source: %w", err) + } + defer func() { _ = in.Close() }() + + if err := EnsureParentDir(dst); err != nil { + return err + } + + out, err := os.Create(dst) + if err != nil { + return fmt.Errorf("create destination: %w", err) + } + if _, err := io.Copy(out, in); err != nil { + _ = out.Close() + return fmt.Errorf("copy: %w", err) + } + if err := out.Close(); err != nil { + return fmt.Errorf("close destination: %w", err) + } + return nil +} diff --git a/cli/internal/utils/ensure_rules.go b/cli/internal/utils/ensure_rules.go new file mode 100644 index 000000000..33609eb07 --- /dev/null +++ b/cli/internal/utils/ensure_rules.go @@ -0,0 +1,28 @@ +package utils + +import ( + "github.com/seqra/opentaint/internal/globals" + "github.com/seqra/opentaint/internal/output" +) + +// EnsureRulesPath returns the on-disk path to the built-in rules for the +// configured version, downloading and unpacking them if they are not already +// present. The path is returned even when the download fails, so callers can +// still report where the rules were expected. +func EnsureRulesPath(printer *output.Printer) (string, error) { + path, err := GetRulesPath(globals.Config.Rules.Version) + if err != nil { + return "", err + } + if PathExists(path) { + return path, nil + } + if err := DownloadAndUnpackGithubReleaseAsset( + globals.Config.Owner, globals.Config.Repo, + globals.Config.Rules.Version, globals.RulesAssetName, + path, globals.Config.Github.Token, globals.Config.SkipVerify, printer, + ); err != nil { + return path, err + } + return path, nil +} From e8d2005d35911d41a76d3eaa3cfdc1b6e2ef7c74 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 07:59:42 +0200 Subject: [PATCH 31/54] docs: document npm/npx install across README and docs Surface the npm package and install-free npx workflow alongside the existing Homebrew and install-script methods, and note install-free npx usage in the usage guide. --- README.md | 10 ++++++++++ docs/README.md | 2 ++ docs/installation.md | 6 +++++- docs/usage.md | 4 +++- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b270f7a0a..3fea7ddf8 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,16 @@ brew install --cask seqra/tap/opentaint irm https://opentaint.org/install.ps1 | iex ``` +**Install via npm (Linux/macOS/Windows):** +```bash +npm install -g @seqra/opentaint +``` + +**Or run instantly with npx — no install required (needs Node.js):** +```bash +npx @seqra/opentaint scan +``` + **Scan your project:** ```bash opentaint scan diff --git a/docs/README.md b/docs/README.md index 638a2e910..35484236d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -103,6 +103,7 @@ Each finding includes the HTTP endpoint, making it easy to map your application' | Method | Command | |--------|---------| | **Homebrew** (Linux/macOS) | `brew install --cask seqra/tap/opentaint` | +| **npm / npx** (Linux/macOS/Windows) | `npm install -g @seqra/opentaint` — or `npx @seqra/opentaint scan` to run without installing (needs Node.js) | | **Install script** (Linux/macOS) | `curl -fsSL https://opentaint.org/install.sh \| bash` | | **Install script** (Windows PowerShell) | `irm https://opentaint.org/install.ps1 \| iex` | | **Install script** (Windows CMD) | `curl -fsSL https://opentaint.org/install.cmd -o install.cmd && install.cmd && del install.cmd` | @@ -119,6 +120,7 @@ For detailed instructions, see [Installation Guide](installation.md). ```bash opentaint scan # Scan current directory +npx @seqra/opentaint scan # Run without installing (needs Node.js) opentaint scan --output results.sarif # Scan with explicit output path opentaint summary --show-findings results.sarif # View results opentaint summary --show-findings --verbose-flow --show-code-snippets results.sarif # Full detail diff --git a/docs/installation.md b/docs/installation.md index c31fa2999..d8d6b2d45 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -12,9 +12,13 @@ brew install --cask seqra/tap/opentaint If you have Node.js installed, you can install opentaint from npm. The package bundles the analyzer, rules, and a Java runtime, so no separate Java install is required. -Run without installing: +Run without installing — `npx` downloads the package and runs any command directly: ```bash +# Scan the current directory +npx @seqra/opentaint scan + +# Quick smoke test npx @seqra/opentaint --version ``` diff --git a/docs/usage.md b/docs/usage.md index a805bee0f..92e540232 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,5 +1,7 @@ # Usage +> **Run without installing:** every `opentaint ` below can be run install-free with `npx @seqra/opentaint ` (requires Node.js), e.g. `npx @seqra/opentaint scan`. See [Installation](installation.md#npm). + ## Scanning Projects ```bash @@ -107,7 +109,7 @@ On the first run, the compiled project model is cached in `~/.opentaint/cache/`. #### Rule-authoring flags -These experimental flags support custom rules and approximations: +These flags are to work with custom approximations: | Flag | Description | |------|-------------| From 9c2694e93a73237302d7d7d1aa0d692625fba342 Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Tue, 9 Jun 2026 14:23:48 +0300 Subject: [PATCH 32/54] feat: add script for discovering package usages --- skills/discover-attack-surface/SKILL.md | 2 +- .../scripts/package-usages.py | 264 ++++++++++++++++++ 2 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 skills/discover-attack-surface/scripts/package-usages.py diff --git a/skills/discover-attack-surface/SKILL.md b/skills/discover-attack-surface/SKILL.md index b1b4b74b2..a21524474 100644 --- a/skills/discover-attack-surface/SKILL.md +++ b/skills/discover-attack-surface/SKILL.md @@ -32,7 +32,7 @@ Before enumerating anything, see what the built-ins already match for this packa ### 2. Enumerate sources and sinks from the package jar -Find the package's jar in `` (match the artifact from the dependency GAV; `unzip -l | grep ` confirms it owns the package) and read its compiled API with `javap` / `unzip` — capture as many real sources and sinks as the package exposes, not just the ones the app happens to call today. Never read the analyzer jar — only dependency jars +Find the package's jar in `` (match the artifact from the dependency GAV; `unzip -l | grep ` confirms it owns the package) and read its compiled API with `javap` / `unzip` — capture as many real sources and sinks as the package exposes, not just the ones the app happens to call today. `scripts/package-usages.py --package --model-dir --output ` can reduce this to functions the project actually calls; use it only as a prioritization aid. Still confirm against source/API for framework entrypoints, type-only/annotation/config APIs, reflection/proxies, and library-internal behavior behind a public call. Never read the analyzer jar — only dependency jars - **sources** — the exact place untrusted data first enters from a boundary (network, persistence, serialization, messaging, execution): a method that *returns* attacker-controlled data — HTTP/RPC request data, a message-broker payload. NOT a method that merely passes data it was handed along — that's a propagator the engine already handles, not a source. General, not class-tagged - **sinks** — dangerous operations (query construction, command/file/path ops, deserialization, template/EL, LDAP/JNDI, reflection); tag each with its vuln class (`ssrf`, `sqli`, `path-traversal`, …) diff --git a/skills/discover-attack-surface/scripts/package-usages.py b/skills/discover-attack-surface/scripts/package-usages.py new file mode 100644 index 000000000..931a71f0a --- /dev/null +++ b/skills/discover-attack-surface/scripts/package-usages.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python3 +"""List package methods used by compiled project classes via javap.""" +import argparse +import json +import os +import re +import shutil +import subprocess +import sys +from bisect import bisect_right +from pathlib import Path + + +CLASSFILE_RE = re.compile(r"^Classfile (.+)$") +SOURCE_RE = re.compile(r'^(?:SourceFile:\s+|Compiled from\s+)"(.+)"$') +CALL_RE = re.compile( + r"^\s*(\d+):\s+invoke\w+\s+#\d+(?:,\s+\d+)?\s+//\s+" + r"(?:Method|InterfaceMethod)\s+(.+)$" +) +DYNAMIC_RE = re.compile( + r"^\s*(\d+):\s+invokedynamic\s+#\d+(?:,\s+\d+)?\s+//\s+InvokeDynamic\s+#(\d+):" +) +LINE_RE = re.compile(r"^line\s+(\d+):\s+(\d+)$") +BOOTSTRAP_INDEX_RE = re.compile(r"^\s*(\d+):") +REF_INVOKE_RE = re.compile(r"REF_invoke\w+\s+([^:]+):(\S+)") + + +def q(value): + if value is None: + return "null" + if isinstance(value, int): + return str(value) + return json.dumps(str(value), ensure_ascii=False) + + +def source_root(model_dir): + project_yaml = model_dir / "project.yaml" + if project_yaml.exists(): + for line in project_yaml.read_text().splitlines(): + if line.strip().startswith("sourceRoot:"): + return model_dir / line.split(":", 1)[1].strip() + return model_dir / "sources" + + +def source_index(root): + index = {} + markers = ( + ("src", "main", "java"), ("src", "test", "java"), + ("src", "main", "kotlin"), ("src", "test", "kotlin"), + ("src", "main", "groovy"), ("src", "test", "groovy"), + ) + if not root.exists(): + return index + for pattern in ("*.java", "*.kt", "*.groovy"): + for path in root.rglob(pattern): + rel = str(path.resolve().relative_to(root.resolve())) + index.setdefault(rel, rel) + parts = Path(rel).parts + for marker in markers: + for i in range(0, len(parts) - len(marker)): + if parts[i:i + len(marker)] == marker: + index.setdefault(str(Path(*parts[i + len(marker):])), rel) + break + return index + + +def class_roots(model_dir): + classes_dir = model_dir / "classes" + return sorted(path.resolve() for path in classes_dir.iterdir() if path.is_dir()) + + +def class_from_path(path, roots): + path = Path(path).resolve() + for root in roots: + try: + return str(path.relative_to(root).with_suffix("")).replace(os.sep, ".") + except ValueError: + pass + return None + + +def source_for(index, cls, source_file): + if not cls or not source_file: + return None + package = cls.rsplit(".", 1)[0] if "." in cls else "" + key = str(Path(*package.split(".")) / source_file) if package else source_file + return index.get(key) + + +def line_for(lines, offset): + if not lines: + return None + offsets = [item[0] for item in lines] + pos = bisect_right(offsets, offset) - 1 + return lines[pos][1] if pos >= 0 else None + + +def target(ref, package): + if ":" not in ref: + return None + name, descriptor = ref.split(":", 1) + name = name.replace('"', "").replace("/", ".").strip() + if "." not in name: + return None + owner, method = name.rsplit(".", 1) + if owner != package and not owner.startswith(package + "."): + return None + return f"{owner}#{method}{descriptor.strip()}" + + +def javap_usages(package, model_dir, deps_dir): + if not shutil.which("javap"): + raise SystemExit("javap not found on PATH; install/use a JDK") + + roots = class_roots(model_dir) + classes = [path for root in roots for path in sorted(root.rglob("*.class"))] + if not classes: + raise SystemExit(f"no .class files found under {model_dir / 'classes'}") + + jars = sorted(str(path.resolve()) for path in deps_dir.glob("*.jar")) if deps_dir.exists() else [] + classpath = os.pathsep.join([str(root) for root in roots] + jars) + sources = source_index(source_root(model_dir)) + + found = {} + cls = src_file = None + calls = [] + dynamic_calls = [] + pending_dynamic = [] + line_table = [] + in_line_table = False + in_bootstrap = False + bootstrap = None + bootstrap_targets = {} + + def add_found(fn, src, line): + found.setdefault(fn, {"function": fn, "source": src, "line": line}) + + def flush(): + nonlocal calls, dynamic_calls, line_table + src = source_for(sources, cls, src_file) + for offset, fn in calls: + add_found(fn, src, line_for(line_table, offset)) + for offset, index in dynamic_calls: + pending_dynamic.append((index, src, line_for(line_table, offset))) + calls, dynamic_calls, line_table = [], [], [] + + def flush_dynamic(): + for index, src, line in pending_dynamic: + for fn in bootstrap_targets.get(index, []): + add_found(fn, src, line) + pending_dynamic.clear() + + def parse(line): + nonlocal cls, src_file, calls, dynamic_calls, line_table, in_line_table + nonlocal in_bootstrap, bootstrap + stripped = line.strip() + + match = CLASSFILE_RE.match(stripped) + if match: + flush() + flush_dynamic() + cls = class_from_path(match.group(1), roots) + src_file = None + bootstrap_targets.clear() + bootstrap = None + in_bootstrap = False + in_line_table = False + return + + match = SOURCE_RE.match(stripped) + if match: + src_file = match.group(1) + return + + if stripped == "BootstrapMethods:": + flush() + in_bootstrap = True + bootstrap = None + return + if in_bootstrap: + match = BOOTSTRAP_INDEX_RE.match(line) + if match: + bootstrap = int(match.group(1)) + match = REF_INVOKE_RE.search(line) + if match and bootstrap is not None: + fn = target(f"{match.group(1)}:{match.group(2)}", package) + if fn: + bootstrap_targets.setdefault(bootstrap, set()).add(fn) + return + + if line.startswith(" ") and not line.startswith(" "): + if "(" in stripped or stripped == "static {};": + flush() + in_line_table = False + return + + if stripped == "LineNumberTable:": + in_line_table = True + return + if in_line_table: + match = LINE_RE.match(stripped) + if match: + line_table.append((int(match.group(2)), int(match.group(1)))) + return + in_line_table = False + + match = CALL_RE.match(line) + if match: + fn = target(match.group(2), package) + if fn: + calls.append((int(match.group(1)), fn)) + return + + match = DYNAMIC_RE.match(line) + if match: + dynamic_calls.append((int(match.group(1)), int(match.group(2)))) + + for i in range(0, len(classes), 100): + batch = [str(path) for path in classes[i:i + 100]] + proc = subprocess.run( + ["javap", "-classpath", classpath, "-verbose", "-p", "-c", "-l", *batch], + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if proc.returncode: + sys.stderr.write(proc.stderr) + raise SystemExit(proc.returncode) + for line in proc.stdout.splitlines(): + parse(line) + flush() + flush_dynamic() + return [found[key] for key in sorted(found)] + + +def write_yaml(path, functions): + lines = ["functions:"] + if not functions: + lines.append(" []") + for item in functions: + lines.append(f" - function: {q(item['function'])}") + lines.append(f" source: {q(item['source'])}") + lines.append(f" line: {q(item['line'])}") + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("\n".join(lines) + "\n") + + +def main(): + parser = argparse.ArgumentParser(description="Extract javap method calls for a package prefix.") + parser.add_argument("--package", required=True, help="package prefix, e.g. org.pf4j") + parser.add_argument("--model-dir", default=".opentaint/project") + parser.add_argument("--deps-dir", help="default: /dependencies") + parser.add_argument("--output", required=True, help="YAML output file") + args = parser.parse_args() + + model_dir = Path(args.model_dir) + deps_dir = Path(args.deps_dir) if args.deps_dir else model_dir / "dependencies" + functions = javap_usages(args.package.replace("/", "."), model_dir, deps_dir) + write_yaml(Path(args.output), functions) + print(f"wrote {args.output} ({len(functions)} functions)") + + +if __name__ == "__main__": + main() From 6746b5593a827e43938c1be4b23582ec9646f1fb Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Wed, 10 Jun 2026 02:35:03 +0300 Subject: [PATCH 33/54] feat: replace python script with jar for better methods usage detection --- skills/appsec-agent/SKILL.md | 11 +- .../appsec-agent/references/discover-rules.md | 4 +- skills/discover-attack-surface/SKILL.md | 33 ++- .../scripts/package-usages.jar | Bin 0 -> 155088 bytes .../scripts/package-usages.py | 264 ------------------ 5 files changed, 30 insertions(+), 282 deletions(-) create mode 100644 skills/discover-attack-surface/scripts/package-usages.jar delete mode 100644 skills/discover-attack-surface/scripts/package-usages.py diff --git a/skills/appsec-agent/SKILL.md b/skills/appsec-agent/SKILL.md index ff26283ca..422819b5d 100644 --- a/skills/appsec-agent/SKILL.md +++ b/skills/appsec-agent/SKILL.md @@ -32,7 +32,7 @@ Begin by asking the user both things in a single AskUserQuestion call — two qu 1. Scan level — `lite` · `normal` · `deep` - lite — build + scan with existing rules - normal — + approximation iteration - - deep — + discover-attack-surface + new rules (fixed first) + - deep — + discover-attack-surface for project-used dependency members + new rules (fixed first) 2. Triage level — `static` · `dynamic` - static — classify findings from the model, no running app - dynamic — + a PoC per confirmed TP. This launches a few test services on the user's current machine (local instances and ports); they're torn down at the end of the run. Make that clear in the option @@ -41,7 +41,7 @@ The run is one fixed pipeline; the two levels decide which steps execute. Walk i ``` build → references/build.md every run -[deep] discover + new rules → references/discover-rules.md deep scan +[deep] discover project-used lib rules → references/discover-rules.md deep scan scan → references/scan.md every run [normal/deep] approximation iteration → references/approximations.md normal, deep scan triage (generate findings + classify) → references/triage.md every run @@ -89,7 +89,7 @@ Two limits apply to every fan-out — a global one against rate-limiting, and a - cores — `nproc` (Linux) / `sysctl -n hw.ncpu` (macOS) - free memory in GB — `free -g` (Linux, the `available` column) / `sysctl -n hw.memsize` ÷ 1024³ (macOS) - `cap_heavy = max(1, min(cores, floor(free_GB / 2), 7))` — budget ~2 GB per concurrent JVM -- Every other agent is not RAM-bound — discover-attack-surface (reads jars + the built model), create-test-project (compiles once), triage-dependencies, analyze-external-methods, analyze-findings, create-pass-through-approximation, assemble-lib-rules, generate-poc. They're held only by the global cap of 7 +- Every other agent is not RAM-bound — discover-attack-surface (reads the built model plus dependency jars for signatures/metadata), create-test-project (compiles once), triage-dependencies, analyze-external-methods, analyze-findings, create-pass-through-approximation, assemble-lib-rules, generate-poc. They're held only by the global cap of 7 It's machine state, not run state — recompute on resume, don't track it. PoC is already sequential. @@ -111,8 +111,9 @@ The single source of truth for the tracking schema; each skill writes only its o .opentaint/tracking/ state.yaml # you only — levels + phase status coverage.yaml # triage-dependencies seeds, discover-attack-surface flips — one entry per dependency package weighed (deep) + usage/.yaml # discover-attack-surface writes project-used package members (deep) findings/.yaml # one per logical finding (from the SARIF→finding script; split by triage) - rules/lib/.yaml # per-package rule plan — new source/sink lib rules (discover plans; create-* build + test vs the marker) (deep) + rules/lib/.yaml # per-package project-used rule plan — new source/sink lib rules (discover plans; create-* build + test vs the marker) (deep) rules/join/.yaml # per-vuln-class security join (assemble-lib-rules writes; main scan verifies) (deep) approximations/-passthrough.yaml # simple from→to copies; write-only, scan-verified approximations/-dataflow.yaml # lambda/callback/async; tested on a test project @@ -158,7 +159,7 @@ poc: pending # pending | confirmed | failed poc_script: null # path under .opentaint/pocs/ once generate-poc writes one ``` -rules/lib/.yaml — per-package rule plan; `description` fields + `sources`/`sinks` by discover-attack-surface, `test_project` by create-test-project, `tests_passing` + `rule_id`s + `artifact` by create-rule. `coverage: new` ⇒ write a pattern, `expand` ⇒ ref the built-in plus the missing methods: +rules/lib/.yaml — per-package rule plan for project-used sources/sinks only; `description` fields + `sources`/`sinks` by discover-attack-surface, `test_project` by create-test-project, `tests_passing` + `rule_id`s + `artifact` by create-rule. `coverage: new` ⇒ write a pattern, `expand` ⇒ ref the built-in plus the missing used methods: ```yaml package: org.springframework.web.reactive.function.client diff --git a/skills/appsec-agent/references/discover-rules.md b/skills/appsec-agent/references/discover-rules.md index 3d77a5ffb..a707bf920 100644 --- a/skills/appsec-agent/references/discover-rules.md +++ b/skills/appsec-agent/references/discover-rules.md @@ -6,9 +6,9 @@ Delegate triage-dependencies. Inputs: ``, model-dir `.opentaint/pr ## Discover attack surface -Fan out discover-attack-surface in parallel, one agent per `pending` package in `coverage.yaml` (capped per SKILL.md § Resource limits). Inputs each: ``, deps-dir `.opentaint/project/dependencies`, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`. Each agent first settles built-in coverage (full ⇒ no unit, just `coverage.yaml` done; partial ⇒ expand only the missing methods; none ⇒ plan from scratch), then enumerates the package's sources/sinks from its **dependency jar** and writes the package's rule plan `tracking/rules/lib/.yaml` (new vs expand; sinks tagged by vuln class), writing no rule and running no test, then flips its `coverage.yaml` entry to `done`. Returns the sources/sinks planned. +Fan out discover-attack-surface in parallel, one agent per `pending` package in `coverage.yaml` (capped per SKILL.md § Resource limits). Inputs each: ``, deps-dir `.opentaint/project/dependencies`, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`. Each agent first scopes the package to functions/classes used by the project, saving `java -jar scripts/package-usages.jar` output to `tracking/usage/.yaml`, then reviews source/config for indirect reachability. It settles built-in coverage for that used scope (full ⇒ no unit, just `coverage.yaml` done; partial ⇒ expand only the missing used methods; none ⇒ plan used members from scratch). It writes the package's project-used rule plan `tracking/rules/lib/.yaml` (new vs expand; sinks tagged by vuln class), writing no rule and running no test, then flips its `coverage.yaml` entry to `done`. Returns the sources/sinks planned. -Then a quick area cross-check: across network, persistence, environment, serialization, rendering, naming, execution, messaging — is every boundary a dependency exposes either covered by built-ins or now carrying a lib unit? If a boundary has a relevant dependency but produced no unit and no clear reason, dispatch a depth pass for it. Set `phases.discover: done` once every `coverage.yaml` entry is `done`. +Then a quick area cross-check over project-used boundaries only: across network, persistence, environment, serialization, rendering, naming, execution, messaging — is every boundary the project reaches through a dependency either covered by built-ins or now carrying a lib unit? If a reachable boundary has a relevant dependency but produced no unit and no clear reason, dispatch a depth pass for it. Set `phases.discover: done` once every `coverage.yaml` entry is `done`. ## Per-package lib rules diff --git a/skills/discover-attack-surface/SKILL.md b/skills/discover-attack-surface/SKILL.md index a21524474..7d7085f8b 100644 --- a/skills/discover-attack-surface/SKILL.md +++ b/skills/discover-attack-surface/SKILL.md @@ -1,6 +1,6 @@ --- name: discover-attack-surface -description: Analyze a dependency package for potential sources and sinks not covered by the built-in rules. Use for the depth pass of attack-surface discovery, one package at a time, after triage-dependencies flags it +description: Analyze project-used members of a dependency package for potential sources and sinks not covered by the built-in rules. Use for the depth pass of attack-surface discovery, one package at a time, after triage-dependencies flags it license: Apache-2.0 metadata: author: opentaint @@ -9,7 +9,7 @@ metadata: # Skill: Discover Attack Surface -Take one library the triage flagged, settle what the built-in rules already cover, and write the package's rule plan — the untrusted-data sources and dangerous sinks it introduces — for the next phase to build +Take one library the triage flagged, settle what the built-in rules already cover for the package members this project uses, and write that project-used rule plan — the untrusted-data sources and dangerous sinks actually relevant to this project — for the next phase to build ## Inputs @@ -24,15 +24,15 @@ From the caller; if omitted, fall back to the default. Ask only when a required ### 1. Settle built-in coverage first -Before enumerating anything, see what the built-ins already match for this package — read the lib rules (`opentaint health --rules`) plus `.opentaint/rules`. Decide one of: +Before planning anything, see what the built-ins already match for this package's project-used members — read the lib rules (`opentaint health --rules`) plus `.opentaint/rules`. Decide one of: -- **full** — the built-ins already match the package's relevant sources/sinks → write no lib unit, flip the `coverage.yaml` entry to `done` with a `builtin_coverage: full` note, and stop. Don't drill further -- **partial** — built-ins match some but miss methods/overloads/classes → plan only the missing ones (`coverage: expand`, ref the built-in for the rest) -- **none** — plan the package's surface from scratch +- **full** — the built-ins already match the project-used package sources/sinks → write no lib unit, flip the `coverage.yaml` entry to `done` with a `builtin_coverage: full` note, and stop. Don't drill further +- **partial** — built-ins match some project-used methods/overloads/classes but miss others → plan only the missing used members (`coverage: expand`, ref the built-in for the rest) +- **none** — plan the package's project-used surface from scratch -### 2. Enumerate sources and sinks from the package jar +### 2. Scope project-used sources and sinks -Find the package's jar in `` (match the artifact from the dependency GAV; `unzip -l | grep ` confirms it owns the package) and read its compiled API with `javap` / `unzip` — capture as many real sources and sinks as the package exposes, not just the ones the app happens to call today. `scripts/package-usages.py --package --model-dir --output ` can reduce this to functions the project actually calls; use it only as a prioritization aid. Still confirm against source/API for framework entrypoints, type-only/annotation/config APIs, reflection/proxies, and library-internal behavior behind a public call. Never read the analyzer jar — only dependency jars +Find the package's jar in `` only to confirm the dependency identity and inspect signatures/docs for members already in scope (match the artifact from the dependency GAV; `unzip -l | grep ` confirms it owns the package). Run `java -jar scripts/package-usages.jar --package --model-dir --output /usage/.yaml` first to get the minimal bytecode-derived list of package functions and classes the project statically references; create `usage/` if needed. It parses project `.class` files, including lambda bodies, method references, fluent chains, signatures, annotations, class literals, casts, and dependency-hierarchy owner resolution. The tool can still miss APIs reached only through reflection, dynamic proxies, framework/container dispatch, config strings, generated code absent from the model, or runtime polymorphic targets not named in bytecode. Treat the output as the main used-in-project scope, then inspect app source, dependency API/source, and framework configuration only to classify those used members and to add indirectly reached members that the bytecode list cannot show. Do not enumerate the whole package API. Never read the analyzer jar — only dependency jars - **sources** — the exact place untrusted data first enters from a boundary (network, persistence, serialization, messaging, execution): a method that *returns* attacker-controlled data — HTTP/RPC request data, a message-broker payload. NOT a method that merely passes data it was handed along — that's a propagator the engine already handles, not a source. General, not class-tagged - **sinks** — dangerous operations (query construction, command/file/path ops, deserialization, template/EL, LDAP/JNDI, reflection); tag each with its vuln class (`ssrf`, `sqli`, `path-traversal`, …) @@ -41,11 +41,12 @@ Verify each is real before recording: a source genuinely attacker-controlled, a ### 3. Write the package's rule plan -Write `/rules/lib/.yaml` — its new sources, its sinks grouped by `vuln_class`, the dependency GAV, `stages.description: done`, and each `coverage: new` or `expand`. Then flip the package's `coverage.yaml` entry to `status: done`. `` is the dotted package with `.` → `-`; the `package:` field keeps the real dotted name +Write `/rules/lib/.yaml` — only the project-used new sources and sinks, grouped by `vuln_class`, the dependency GAV, `stages.description: done`, and each `coverage: new` or `expand`. Then flip the package's `coverage.yaml` entry to `status: done`. `` is the dotted package with `.` → `-`; the `package:` field keeps the real dotted name ## Output -- A `/rules/lib/.yaml` rule plan (or, for `full` coverage, none — just the coverage note) +- A `/rules/lib/.yaml` rule plan for project-used members only (or, for `full` coverage, none — just the coverage note) +- A `/usage/.yaml` package usage snapshot from `package-usages.jar` - The package's `coverage.yaml` entry set `status: done` with a one-line `notes` - A brief summary to the caller: the sources and sinks planned (one line each, marked `new` / `expand`). The unit holds the detail — don't paste it back @@ -59,6 +60,15 @@ Write `/rules/lib/.yaml` — its new sources, its s notes: WebClient request methods — SSRF sink; built-ins cover get(), expand with post()/put(); no new source ``` +`/usage/.yaml` — temporary-but-persisted project-used scope. Keep it next to the rule plans so resumed agents can reuse it instead of rerunning extraction: + +```yaml +functions: + - function: "org.springframework.web.reactive.function.client.WebClient#get()Lorg/springframework/web/reactive/function/client/WebClient$RequestHeadersUriSpec;" +classes: + - class: "org.springframework.web.reactive.function.client.WebClient" +``` + `/rules/lib/.yaml` — the rule plan; fill only the discovery-stage fields (create-test-project and create-rule fill the rest): ```yaml @@ -94,4 +104,5 @@ notes: > ## Gotchas - Plan, don't write — record source/sink ideas only; the lib rules are written and tested in the next phase -- Don't re-declare a source or sink a built-in already matches — `coverage: expand` with only the missing methods, or fold it into `full` coverage +- Don't re-declare a source or sink a built-in already matches — `coverage: expand` with only the missing used methods, or fold it into `full` coverage +- Don't add unused package APIs just because they look security-relevant — this phase scopes rules to what the project uses or reaches indirectly diff --git a/skills/discover-attack-surface/scripts/package-usages.jar b/skills/discover-attack-surface/scripts/package-usages.jar new file mode 100644 index 0000000000000000000000000000000000000000..ba75c1965538280297b5a4ecfcc72dce4afd9a49 GIT binary patch literal 155088 zcmb5V1F$g7k|um?yvMd}+wZY$+qP}nwr$(CZTp=6yR#D;JNw5>+^y)Yh_33asE*Fe zdNS)NIZ0p;WB>>V2!J(&2`zyCqaXu-07#1{3DAhkh|)<5$cT%IC@RrPi~dXi0Kksb zLk;5p0hrUi!@B`bBE3FZF@lE3FW!!ILkZ97XA1uD*50|}cKC>6ek`FZuM6zgY099| zFVri^1&-Ao5B#mPQtY22-EB}F_z_s z)VAMbSdio_NLKNclLUvT+OKPK`Dd2?|J-u_?c5>%>HXhx2ma?paEO0>8PLC~f69L^ z2=Y&_t%E7u|BDRVf6Ca}8QVDNo7*`3KS`tf|D+u)&8@5)|DOgC`R@ji(>Ju#H#Jsv z{HJmx7IAZO&^L6lbs+wa25Dm_Gg~7Sb4T-knrIEJ^c@|G)qY*El~I1TZcI#Fn5+kj z!mR|M!vzw?TO-sKKr{>&nk*NK5?IrK28>)Yq#C(8Z)6fwD0GXMDdl=7lTeomP``l4 ziK`vp#C-3;e+gqpiFRI1TwD^j_rN?ivR{vTKHs-KZ@O>0e-8P4-;w*e-yD%Q|0w)f zkqd>`*JI^d&7D?{vD}E+)K#lnJQw`6XKL4*Q741`QaodKTA9uiXUn{e10JPz<;~1! zfo6qAAiU|)K3kJK!`h9gApoT$piQ01sZY3RVA-@qW&BQTH)1tL5ex%jSQ1jK(;kGo zBHq%W=#HQAZZYfxhuEJHUTvy-Mup|XbSE`0SwxLIJ2aglY2>r!bo^Gd*>O1u3DQao zF`-Z$qdiGffoUbI$rBNxvLV`1&`7k?vXmiLNSS&nU~$iwo65~AJ4tN8 ziJVgtkrLsW?L2jW+3@E=@e&!-J|k}Wwe>|bP(QDQWE#B4TtT{@j%0ur5@Pc}55*Y{ zNX$+)NG8&5zB)suG795;qFSrmuWZYlYfS!&UuHtdlE4aWHREqFWMMqdQhk6cxNW{E zQL1!Bv@~gA>#|<|zyPDy8z2BH)D~-}8m*DO({Me5*PM;K|3mp?@tOb?GX)ll80yAo zd%EVc-)U=xjvCY1d%x9yVB0IhP7WoU9$6miI%l$Msm?P2C*V(_MB zAUq5llzsK&XiQEiQwMC?sNUMwpLm(gmaVjbxVDIiJLC!>Kf+5W25Fdmm<&3YxfL-C zC38PwRU$q-*ae+XIx5=l+D@nxk-G;P;g$lwXwGCC*~5p9x(YCG zm(r%xE(c>D;0Zh$`8i_YN*lh8h%|6BBb>a2&SqNOWF)IqasgG7?)simUrwxwQ?7ob zG(f@rKwWzi6XrUH=rV?4SPDizZxmBAY2*BBBM9Ng^n?axGtL7Gh`V^E$Fu~*R|pe$8I-Y0YrF(I9v6D^RIO66N;fe2ZCSi+^t z!fYXeTKonmnZ;dG8)LV5>VLoUH%4G16{#9?k|i-5Obp<3J)moBxa5e%cPa(cT3hrP z690rpA7zVZJz#ApXprL~cc!Z5>Y_r`6EV2k)6nrb5W(}ES7)}ip1xP=&3b@FyNl{KDv_q2AgHvxn}=8&`S*`cnCY8p96RMs>{m2q zHQRl$@)hlVgew^N>}`i2h6;0EM1wP$TC^EYWF)n;T7Rt9Fj;zYoGmji*fY8Z*5&U5 zX2;50CxQV6XEsfs84Nz=y&+x!!u10Efb3ls{5@u;Y+G`(@AN9K8rlAV^T+C{9i3sg z4foXdr}3{Iq$b)->1h7Ed(#5aSqT|QC~(e*+Jb&cc>PjDKHehPs{6wxF4O$=@lDcI zPE_b`Oz7c$|6lHhb6%1rS!;#NB+I`;MA8@Af%9%pwrTBAZB29^s#A9ppq25^T*6=Lq)uI%Si){EoEp)l zkqEg$KskXzX^TL2)4-a)q<}wZ-czxL6IcH!3(o8vvGr;~wk@wK&>leVkVZV%BU4L5 z5c(N|HR9ch^INS;+@X-{djY7^_6Rq+8MoZK~ zhH!h&oJ8=m0)OiA1=pJjcc&HvgsPwO3%v8H&OSw`Wz^q{uiH=93mN&EJ`A1^yl+l! zcrVabfKG<%61KUGjfDGBC`VG;B1@0o=W^e>J$x(O_GNx{)9&9$auy~hiI?2oN#kcd z7~0dh$zF+NZ2dwxkb4*Z<+5iePBIlf@2}i+r-rxErU7I33-*fE=8N><3-~X1^e)p~ z`3w#KK!p51!6S};4`&Gd3rdLFnArXoY*DRjt%#+J>~nKsEUpGc>@C?J0Lm32w1&-5v97@@rRV;RFY$>T;E#=XB~ial|f!}B%E zGv_hOarXPMmaG@JEk+v$Q)s|G(%jTiHSI^$fNHwi!K2xL6LcT+4`2ZVL6s2OTrkAo zvCQQ=;%isev3k!TV~{D$=EEs^!1$C~57sul(-S&-piT%=r&n%iWskCiDb1|uT$Xi+ zngjRdhdyb5&ydp)Y}PPGh&f6#p;fMCOYX}{mBP5>?(qpOU4$p@w|gO!=*W%>rd)v$ z)t0plRBV1NPf!dr%=c(y{Lu9koos#V_ z@qHZDJH{Hh16Dfdi3$^i0~dzuN9{>k%1U`)!v)F%WvT&f0>$z9%7i@PK{bcQackv! z?!|tYsEJg@J4{k^+NAfKG*_#{&&!lvp^Q_O8ti;88i5pV+s?Y^EyI)3i47Xuic}%4QD2 z9#)azQYVUG+ThC;>AV7t(u)j(euZO8ijkx!i4m%SM>L$-)qBXnq)Zx=N!O_%*tHqbX(RTHho<_0@ij|RmAU4P!;1Ju4Smo`ymUqHd zXcCL;s>3T~s#(&eVf$+V5$@3ImHp+eVQ#u)hZMRnQzFC70p;wly-4yq9y zawHrsCuN?^q>UDns7hu!L1qP^xuZFJ>BK~>@%oM;O9+@o312h@cRc2u(?7)GY+%(} z#MUT)3x2|ssO*CD&*<|)PsKjN@6kkB+(Ui-4xX_;h6~D{aQfGpPiFA;@}Ic3iyiWt z0Il@03H!iXHMT!V+|CJu<>Nvy#fbSrQALRRe1G!k;pdYf_(rIZWkw*Fhu;7Rt`Rz# zfnm(~D$paFy)PJwTKu>uZ|J={h3tz+!y(9Ngf!&L6O!T*`^tBKO=#WlIH4Rzkw!VuB;b=3CKeVePVoX!~^lD zI-2{%P?brUBUY3&#a`qc!p_-yNy!NrrCTnUheO2W1S(~m!wND8&qDBcBhDk#;2ewy zU?r5I&-qwV&g7U^5zYp-l|2+@(e?>|HHa50S`cCGyHvh$=B_~_TA_*Xe>?%SP7fX}} zINU?6w4g@3N}|wdfl!nQh(866&&lW&Z(F;dE#)5c&J9T(&{%xsk$w#S8gesjKM{&- zl5X>Onw)UorgJboe!phI1I!pOK;Ufv-G$D_zzq}`*OpW#Fl+0l*=(q@L4lHaOCGHZ zS9D$84PvDGTx}4ZYP^KGXt797qSWZ8Plg!QWzOx_V9boM>r-z^bV&hZ2l?W#5c@BgbpmDW@ z5))@2bPQ>$8`9<5Qa#TkCR`2S3JNQ|6=oLH5eS`)@&g3w!|3Dfn!vav+QO(_wicnSmuj@?wT=PYmICIj^`dzAh?WnPEmxM3Wzj;30M1#>l6k z{O4c+UAdS=v<=U;+C#G-b`>q6&iX#6(mvrbtCZL>&B4B~jqsYwptvu`Y-J*RprH=- z`I_U~A`_uRGogLDWV$W~^t?v>U2_0c>zB<(GZqnfVij4s6;4AshkjS-OgPZMmN_`z z;p@SGRbksHkHm6tW*kF^&SzU8raaG!o!a4gMk5BR4}1UaSJLlK4wJOG=V!QSn@Q}e zwQJI+@~}Zxs|cDdIxjEt`x*!#g0pU2Y4&fI=09#rvvt{e;5O%|J~hvX`3*DQ=z@fB z_IVHKD{26K4y$QV(`lYd_>4!Up*MEZ9baajIgbYPVIO8wAeXcP@4nbMCO4vGCLh_F z{28({*Fg3Gl>8Z?^BpJrrF&pUb*;9~8na9BxmXrr&J<<=4VFg@SJxkIpPSas$=KK~ z{yz1|4b$ipgw}Hp9Ok7`i&_3ql$lm#S}~hAul6k87BiVTwk)pzMG0AT?ZKRC5cSXw zWtDpcQ??!vqp~5vct!K=hQf6C6|>yb_uiNO*HpTq9*R$8f0Ubd_b2V4%uz}?!s8PE zyq5rn65zW+y>AC9Zxy&nU*k2m-}yUd7}f{A?iXP5(dvC|!r|fjtJ}|??_Zz=mMy~0 z3m5<(`=9L2e{b@1|DMS!7&|&!IsIoEL_zyshRxHc-C?CpQLUCAfyYlsGxR(iim3A! zf*6arz&p3b5J$~qe5MMLZxUQ^Ads(b7vMuS%C$srp-1g-aw|67bEe((=2cg97ocmz z0~VJO@C$;ejFB4qk&b(&4Rgd6Y?P1%X4$&DXkEX|%ISl^1Oco}Bu!{&Zah*yD;LtZ zO^I}cjB(QGE2Wgoi=5Yt^XY>te=^E$K(<&SdMcSTW9U9f)Wn{!`CNVL9l{wJvj}5V zW3KV;FB}S+F#);<63ON`@)o#g){Bb=j3riLRHA+9?B4AOUU)^$UlO>NEhDnJ6+7LScOg3YjV0sr7Q8(%eBT- z)O7t9IkU!GOQb!>!HeB}i~`_d;d;-{)a7{ypen|$GzHx0HO#Jp>pL@D9_qDjV4Ao{ zOruLf1|qIN()ZVKs%wYDYFE1cj!ZO&4o$SS*JK40>V@?n_&UqCD=>mjfA<$8c=BH> zr4UqHpc$-rjaftwHNxIw;q}guZSBGECW92n!s{+Ex2r3t98Q_o4RwMT)|^Cbp-;Ad zDTcn?T4Wa>06^?N%JJVThVZ|q82{H=@E_pzzbpgQs+O8qs|Y^e#MNpm3~_>0Z;0T5 zG)945khFlIAkaw2^d+(Z66qDr+M z(&w9Yp(+AH9RvkwF?z0#xvNI4H{GNxm`-qT*|t5K0VpvGD%MF40NSnUOGSucSUX$+w;?D&`n!Jc0}}Oo;H9{@F-Kms)+h#sm(i z3T3|H515NX@{~R!QL$aGs4yUNp;)WdhsPzrE(?gFSY%qc`glMZuMX*~*jJ{;3pd@ zh=n_lfi%x3$pm&(R==Av;+Ae&_A$q?M1IN@TFE|3-|L563Kuk&klWceRC5#|UPlpg z<^Vfihb|Ljq9Sd`z5l~8NWr?wcH017QwU$<4J3Eo4k5J-U&5n*Y)wVxb;gmi;mCti z6>&e^6Rt0%oLvW>ITgmXf5!E65Cm2I2LrZa?13k<@vCV`d-j>#!RrAGbzpBue{5ub zO;roj>kpqgSLBZjU^Hx2bVAFlyY=TNRRQ+6ws4q_h?DdNU; z&&l-2WcTn4$A(Hu$u4+2RBU9PR3_oWFtrAM)}VI$qv3}Aw`gVKTNtq_lWlOSG*jpr zZqo`rI*82@?i}9gRFX=l8U)PZ+aHRV?T(Hfe62~W#>lMt&Et<}qVzf4{t+oZhsA8; zNsbwa<>w2;ru!}!zG-@Bhtfu`IqR)!FGkJ&rn!uz?qI0B%8-IEre}4-n9pvsG@`4H zMH3iZ18f8_`uw=AQL7DlN6{N*ImpH>V;mbNaXQ)^7ZPZdT9NfxFMEw&yu~h{25B+G z+Z{dX)2IeKI+i4kdPA)EooW8}X`Q7{M$ut1=#to!3n3iaQuLMXJwC(*ggj1CS8Psv@h;WHc+)_i5z`Mq(z! z!FX|6j$(&A5s5n850#;&Jn_Prwz6}sM)D)FbQW>D0Nw`h z_X+PbSGR3j%VAd1mlr2xuj*Epkn)1vW0iBpw(3wb?GYGLlKj?SpA>3!P(sFjB5=kK zi)XY*Yj133&%Ps!Ixsiyw4R?%FFkklFMOrDf8{&4084F9QuY6N?S*O$%3B>8cqN`1 zMCs%5K~=u{m7QSpX0V|-KAw2Q?2R(^@a+?$&9>e-`ix)wei|sydbHnA9GIXG&j^?GC721}T<_+)9&h-~pfk)rdp7e3zbiJl;>)Wo+8;=`~ z-Sr#j-M78F)Nr$&kAzHQ4j2cGBoW$@tw%2|Z({*nGf(yJmB%j{2GZ35e)2z>Lr3J) z77mHx>tO9P69kdwChQH})LZ-T!(RS!Rn!}(5MYJImEJVgJ=Z_Y@?XNG%Aze|=?$D- z@nyN@^7|nhJv^9(@%(0^v%OvkXKA|bIYW-!sljGKeiKYI)OLFP?7jK~nF*3cl|Yt$`5u{gHzVk4Nc<Evh$wo_z24X*3j_KLmHl|p;D#9k6sXcDP}=)$ zRYbUsi%uE(b5fB|!ij-BgHS@0s4$0wpBD7_szT84(!`U6KBQKDt~p%%L(2W;G(E3+ChN_iFfRDp)r6p#BV|Lf+pQ zNluF$lu{Y9%-*WR*pFyZv8c#0wlr&~@XbR&`*KZ10Jby&y37}`sM7Gpi|0x;779p|hM60D*}`JMXzwhIEA16oXqe8ZBU2P% z7a_g9Djle|YJ}+AtqG_rqDsY$!1Y_zK3N$zZ# z$#IYnov|&!HG7E6d#vXz6Y}{7qsr`PV89soQF9R;T8N~_H4GL2WP>Y$d`G_N%?Qt( z6Imi&qvI601PY=CmlKT-`v~<742#M$6#t1+v^fL&kJ7G<2< zL@e4wa-IWCiAsLcgX&cTrJ7v%Lx5P9a&iyiT z82Ksug4*O^!FG%nWS%Pq<`FgwYQ$Xo+|?2eMqqqvgS|&$dTkVFig?`cH#txN;KjZd zzq`msbES2S0?Yi0CcX&nux=KDZ4Wck;^^eF`d&KdI*n55hGxFR3(a}dioAx?9)S$S zzYn{+U_JpX7WGfeBZq+*cxODtiNG(K0t9LI=wT^-*DV?MaegD?S44}b511SgsGK*P zA$(-%P1Bh)a zXG9R8dWYvADB7|yP1ADkWWij}`gyJtoSwP!1D2sOEWdAjo2Qu4+vzr)Q5ir^sjZE) ziHjjTO(*OZ)X=FvIm^2(;#n+T*etN+GdFG+ka}Ys&1~R8hPt{;DC{<$83T559l(DW z_-EUNE__9Z?3Uu;he*aH!Jv@B5mwZqdHo=CTg=7Me1yxf`IPz7&#Se}$Koqf7mM>5 zA21SpouYQj^v=PR5uI5AqNS8vx-G-$7R~$mY~fj)pQ3y=o7PcqJdu7cRoSDRo-0UW@VfCdK^GZ%GA+a>^i%@ph|;^9Y{iP8HFo0u?qU|1!&aaJ(-a2Zm*R}XIpV=bXMp;$qZptZlyDJ+HPF#eDG7SCJozZ-dbpYk7 z{N8+6N8eYowv?j6X_@bN%16gN*+%}P?w~HFWN~!Vm6-g&aCmT~@raG-H^@SR3rd^8 zj@fFe-NM8cG>M%DM}3yaNm@pP5&H&nT);c?w{#I6=rroUy0cfiI-(w1S>4z`zdhpz zK|<-I=gp2;f(aUC$HhX;%%X`OeYe-8=j@{(P4C&+%QE6Fo9!Y!c#WhmZB2V%j|H)C zPcpMoRt`x#LU@EkJq?${UcIq=R1FrQw=YO7>n95}fIwusH;WvlR{%GabL_YhI!y6b zu|bM$T_ch)7RKM$+r@?dAS`eu%?(r5$BCnpJ{3omJxR2~hfGv?BUDB#SqW+hF7#fT zO=5tZsZ78uS}Z;HY8X~D+P818n<#yzvAtuqYqKRiqtMh>gh(7@qRtr)-T;t93dkGi zNmJ3F+~Q8|9(!p+UqXAi1%^DMX?8uMgcw=rM$v@&%|ftNQgll#xw-)jgkyXOjN|d$w8v*H z7~DVkN9IgPLdZSU*D{?BDwLRuh5zqQSZ1YrdtFnBtTI@Y!jjRQL`nPL-*JV86@O9m z%38k=y-3c;OLrETb4&AkS3k!^D9g}1Du(6)uym^0J+MeD42~!!KjU;0Sh8p7c=`Bg z>8_bQM{1iCtnhs;<04YYG`RCNcVKV5yOE|TnrN;#H19aEhz{ZJ7PYIxYHy6O1R0$7 znAxgvtP}`Kjo?pyPnG=;l7Wls@#v-`Dk22k(ls?UTcS{LXlc%xBa#Q5&EgN)>YrB0fg zq&X8K>MWT0ni!SO5dGmHA!=UXkV#dT2=fd7VkNB^PwJkCS}-c+)6X>CA{OcQUjLxy>df>M zZihR_gZ`SvH;;QgxHca5-uZ*fp$rUvGiOh+szswCxtV|f*i+1}^ZjjnD1(M~R2?$C zj_m`kH<`E9M{r_0jDftQPR9q{Hm)8^ergutGREV2uh=tPZgl3i9-ntR?JjSsD^#T$ zkw84D5=?W))FEI2>vi0{P?1(*dBKVeVpwfdzJXjMt|dUyCJFA$enT>qxVjL$#y9&P zZj&bbkn*T?OXc(^VlnG2a!XzUOI`}s;6V;`2kS)mF(^f;o0L7&7iAQNBl z9E8MP>e;mMS^u!;`}0QGZ5pR;z5B>7l=0Gmco)1gjdwg9?B~h;cR&vY_^!{4vHRj4 zBWnl_(%N1|s0S>5_?UMZZWE~E-Fh>4Usc_F;|_lMch4|gi;Ij4RTZ{n<|=hf$j^sw zCB?&wiwaEh4}vMH0(0r(49u`2Wp>NX;u0Ha=fQ@F3X4QvAYez8=ZqtCBh2;6)e;?F zIL(M1W0vJ+Nommo{Q{=@7t*GR8jtMp-hnKMB}yzz%1midpm^=(t_}-hW#dE4L|<@8 zO~p{q%ZiH2)MEmz@4z7s)$}j2C+gj;R&K#h?C+ed?^+N3-5B3h;Ds?yXfll+cL2H{ zP|FM}DY&oQCVvXJ?!ly1FD3}QxgT6969>BpMR%h+JgFam(4Wi3*^G$3os9w5OiEN* zN@~v5wz29Y2lNT^a_ZxY)IBl$k8Oh#y(vLXG@IMG`%QafgF9hy<0SKPt6(!9CBtcT z=VZ$jRwdh-ifSt>VRW`5ZbkJx4oq`~<-ca8TripC4kYmXAZfzMp-mFUOf+~ZI&4uz zlSN`uB&uK5`)YD7Sg{_Ag*sKFV?GdVbRTJ-->GFF5?H{SLOp8SK2qY0lr^$gNIfBAQiqG#4(0J?6Q^o7k)3;-FoS6K8ha(^O{ zbMrN8CXP2ztzPeX4gJx6j?gOfN8-xqG#U(v@5}Swh`ikVZB%bl^i+GK&@EIvzSMFr z;w9IYjA;nLzx@GW&gyRWk4sv$XCkubk5V(CK1p|f*qI9$LbBuiNZNASUaRAcbZ!3d zQu`(?!FDX3+$KRow_}AwixTcxyozib#muxzQ#BABz;Em#8@{xPc!OuwBz!Z;OGT}X zLar4>VK$ww{hXa&{UtQpu=5L7GAo-#GvRESVz1_d#aNl{TAbLgvtO&U2e?`mOsO1+ z+ufxM)#0G$oOQd{kD{oE_rj+s%;lSk@}Z|~0D6Jq7@uv!OfdC*>*rGlA(cA+a%28c zeQ^<>O*Oq4th4S~D>h`<1Oe4`=;2-T<0l=e^E5e}zD2vzOr29mrV}8(y(tspw;n6ME>BL=HZJx5ENPgns8e2;~NqW zf&1i&`%ZX{!KhoWEtmOoKqv!tLoEaZaENqcCNQMuJ7Bj?XV=T+m9G+x*aBS2@XkWg zTKRk)D=DD+xuUY$L{I};It^ICgth;yX8qc(ymS;WC_&WhFog5)c6XtSjKGHsy4 zM|(a?TIcBeV`i1CD~SDeDH)nd%?898(vekuQ8f#DJAnlW~@FhugPx`dQIsQ-+@-_*oJwvwx&W?qZ`Ba zwZQ$|WCZiIa~b$TK#0evJQT&@m%^JIHpnGp^UJsylPYhlReBNbde&Ul_fy5=C@0Mb zPp>`19{Jw7jb_K*tNEHuo-iwR)!f$ot6e0}v(l?S&#w`P`rM&XZ0Qu{nH*8^8Qq{t zpwRl_`WA6A!K&@?Hy3uWGa22Dc>GI!+kXc)c9Y^;EZTt@Z*>~XNoq-;Wu$HM|)Ff36J;=nYqgj_bO9S z2F}99+7&eqKQtUdlr21swbjhPWyF#tTeZfj*eWvPy(bhLeJZyTrF6-n8Q65;Fmvw8 ze!D-anMXIPv_(b+t47o2 zVcsdDtA7bJ$7cuu}O;_xdrR7 zKX|5DSB8+`JZmm$rQ+NPh*rh2))KaZ!uz8{Jnczt1ZL&p6Q=s@qETQ54AhGP4n_y1 zE!c44uPq@bbZ@tyc}4D-_8nt(=i`!B7RZ*eu23c@Z7r|hX9oPjcC$`q`|DL<)xr0J zgD*T!Tg4H*cgooxo{GChT>$&NKR7CSj|S5}9Sb0zzS7H*HoEPCAbuLSClRG1LG=y#0&9X;Htb4EX z`BJcC7j&5ck4r|S;_AdLGD3!r*Kr3Ew}pIjPjL@>A}L4Pc-ArJG`T~3c>fl=*Sbdu zT!)_N!eKvTKV%um$~f%S?09y1@Q{d^W75CyDxzhpg|^O*w`Mt*mXEH(qVj2W>Bdcq zaKme8_a{F%$7Nx1NypKaqZm~rC)HY*M^zM#SKzj(OY=IjYFA;NF*b+Xs9K&gEDM7# zb9yLe3)z;Lc!*V>-dh06DxW^UG>1nnndFpSw8>b{vu}#p;L37X0LCQ=+47Ids2=xS zR4PF?CvFzm|{XjrvW#numj#eF19}&3B?l z-^SOXIc9g9N_*yk*;V&1NfIr1_9>3U-ov&0?dQ_hO+8$*o##UuxsN3OOiJ2TyNgz$IyxyeXpWYU&#eT? zcm-YK??+tnJ2YsN4UC$_UuaI=xT9O9BL3*B6vne)4c$6o4}qBYq`m6&4%0ZhpxhI> zr0fmC8A9x_3)-FrU)M-3sU-P~bTmCqaefRN%#u_(z25iI1ZFa;BsUaPSCOE}ual$i z#*f{UOaytF8}!Nn0Ns`(;;vapz|C0`F+?gn%jrI9(S+{s$h~8aO<2*Zh5}s!f;Cix zMOB%7Xv@RSsWO!m=88B|@qj?|O3KLtrnn|Ajc0e`Y9a!m4b8_n9kdZG`zvyejZ&Bv zu_$6erd{bWU#dj5T|t`{I`@lhuvk>g6DlLSwo2F`RH)`P2mKIwmT^iqrrS9Pp1vo} zb5~_Xxeb0D_$)m>meUTybS-Zxgh^OfoMc`22K~6(%{>VW{M~qB|#vW zd#q*R(CY;<%N<8sy44@WbrJZ9K8NU>LQqXa7YImKDqjzSSop#Kvz*r7w;TE8p7Eh# zQs25J&!ZV!en`1VSzFrr0u%p0qeDX*+_XFNrSkHe!Wo#@g~3~Oy9}Wop=(QmNr}j+ zLcp3?ygFE!ML@h!1HAJhfMl~UW7*^E#jGRNv>Z0ik(j5JwZgG^-c8+F0{_%F8gYds zIty9nDm{~BK}TkA5;W(IE5Z-Bh5^)p8b4M z_fbM~BGnZ3?ig1UD(RC2z0t$%QNRX+7Pr%bOEPw0Y9Yuv9i|DVn4y?LaS`4Ts?FSpi}QXb<+FkiWcQj~p=}C}w`{8^LzrbaCi+!jAkuvo$`sZSfVAz{(ez5I1-yCJ096Dp^a#ZF zDq_pEZQ0EEQ}^nc$&_f5XiMShF3DX3>}3hR>yq|Ligt1Fz26t;8!1|5yU6~G!>R6_ zT2gik=vwR7b32Imjep!tx&Nr~N3|`8)mv+U#TSU|62bN8oZO&$BWDbKr4k4IR4S5w z#nNPr$+ZmNLuN6`L5hR)gyfBuguo8B%>jJZyfpud(=7l!RLrASRFrqOd-s!jH?nAK zbNcl*HFtyPi=S!fb)kv|{5@Zpq+PM&z3~z|l2m*%@ag(MeEaK+`xO1u(Bm~zK4>z5 zhV?QlSP$^@WEL{KoP> z)%tRA8E$=LY2indb~ejMy5=1|N)rC%^r>k!=QYk@(o{=mZtG~9Y~ivjnsGR0(-rU%DvNAz{6Fuqz?-2rCGq!Rr8V$&}jwRf0P8teVgta>Y1uT{5#{N ze0R=t$#y^6M@($#_Pp!^*jx5H+^6bC>Q})x-|zxf<$`8L<#&mk9R*QMYPO6O1;oa5 zk&3g#DNJd}YDhHb?}=_$EC@W)x$7EJmY+9ZG_xab9+JrWJ0P!U^i|7m<_B&2A63Gf zD}HQqT@EWi1rRDjf=lWhU6^psevtXl!labzn56FIP4-8wBZXUYOzWg*5vP>WV{Vmc z5oD9mdQuLp5ym+H3SG%n5|_vbxC0LOh-c^QxY#;NJB&lQ&k~!*H5TEl!Y&y){9~)@ z6WiNYxT2C&sC)M1`~@44lk=|(W?1zj^z7WgL(%gJ5WrDHHfmLAH>E@`hKWS-*+MH( zbAk1kg%E|G*hQSt~52lLiL zkdGjuSX%p|((E5f6kv&~#aw=(ok0ilUw@=~WHKEoI|Ii6BnJr@@>8v(_l@a4{17C< z9MHzb2~_W2Vp#{XEy<2`&}W7N!?UsO_@Sd~-EL%Yt2IcxK3PS=qC*dfK#!SPRe!XB+ zOLmL7=M?mY`5H*i!q9w}6aR7&z}qJ3t9C)aoDeXp1!&-)EUuq9R>)p3X%&Hd9a4m< zmPx*$+7vAAZ_+BNmhps5NDChYSRl?Ep6zyjywZMq z#?G|01~8FItLBzM-oY&{qup`i%+f7@==h)daA)3m0nKXOc_onIYo^uY3_LQ*Ir=@s z(L~9~9=3ze+Hlvdd=Wd#;V6iWJ^sfr#(vY55;SZ8fK#6TX|s{$-`Z>>Ry21q{$D$e z*~;>YNUF#`H;4&wHN;>rV1S?mA+UceHK~-%Ej6Gp1)vS8z10)4>rB?CuV`JoE@#hV z&kVU1qu{d7-?s3U&pBLc8CK##Oip$@Z^k)xU2ix}cfK!g5BUJSBJdRE=#%)RM(GcL z(oIhs8?VxA3cI?0rz(Ph?loI1LoI=XhKIWmh#3<2B}T#LKx^E53a^@}*_f@_sKKQ- z?E@Zw8&{aIRto(}^n6C)XapgW$;rtjS8pqe6X zLl*-(YUe4KBR$2ZpkP+9nIIaJ{3kh2>hbCA%~-1=PFJSx>Tz)6kb_FFz<^vo#oQQ| zKP!l9PB$B5xC8BH9TD{ygOm!_wueAz)>#i-I(UqGH zMsU|^yLAA#7MEDQxNpdqAYskm{8^R|XEYEQ_h1vmrLEEh^kf%t^Tx5(oZ{z9v?h~< z6DIi0(G@jWU${xX!#Pwz{<-T!zS7rX*M7@;zQH4n@7k9ESvYuyJK4*7(V3=q;P|u> z)6f}c7m^wV*&b*Ydg_U-xRr=;MA<`t#iRRR?9SD%2&Q+(0Y~vOOXJZZJHavmjHk;f zpO5q$O;CJJgI-4vy_gkoL;=e*>X>x~^IZ+=P}HX_8j=Xl*6@L?Vd)NX+A*pyny4gu zFf3)TjMbmvn)^Wm7p+;G+6>Ju`-+v&>0@RaplE0wio(7tBZ>WhfWm%?fx>=iP~o8M zaHut6R%r>;9ZK&Bo;Vlc-81}LSDeVm$=+nnWbA^}h9XyagTYqg9erKWHmixYlmo*B z*#NUud1B?RJz%{UCp5zoSW$zs#0p>YELbJZER_&Gcw_-Bf*_Pm{D64t9OUC~vP%kv z#KQK>5V0;7y!z=#n%qfo*z;i=4sDHleq6FDwrgn0>m6A02A`y@;;(w=I^UMbhz zVuKva@3P*Vm!AUsnEei0JwgrD6(V%$T>9Kv7tf6*Rw7lI5v|Dim#{f`uQ(SOgY z{13E8Vut^u$tqhaB8emOpaCHrkTdYA%B!cFY$Y`x95V{Z zFqiMOevfRL5KC5h2mX>zY2|7xuTR@Bj*yUkoxZ;Dl-X7F{eC%z_S>!J4giGF7+XUh zpx57l=T!F9^pIfBsR+@GxeQrGncqOfc{d#sQq%G%2bx##kz`p^ zT<##+b_p6?vgOPvJb_48Fm$j60mZJjTN+`&CY!pxW&PzmA=D>LHndRBB0kEsC|MkM z^dd<*^yJ7IFo!l|o!O(_AHek<$nn}Qn+}axCEcQA$qmPmcMe`Huc_#}=WJ%)!5N)J zo!NW?6%izvE`}fz>9qbL8uZw6y`88-Vc#?{r}baSQJ1b!e@m`Qs)dCekwSq@CjeEbfHvnKlL{wU$NhLYi~OU(?)cy7Q>S~ zgvXkHVtnn`=xQ{o+n}!Y$!fUHwjS7H&9SZ8Rm=|Hx}+{P*nme;>yC zPl%v6X@kUo%p+6c&d`+4kH5cNLV_U?3W^x1vaM86h74N7{~~iCvn|6tzb0^?`v!@M zfC$a&N4NhD9cBrxu%K|iHpTP0f9QCt)%*MMg5xI!Au)!Z8G336#}V>C;M+f#R!Q!) zz=iBR7`K@ukwFt;Rk%)5;AenKf_J*dR7GNrOe&NQlR6Rf@pq&iF8ARFA$G`78c%yS zrhTQ;f=~Yr84n_4c;%i3Cs6{$T5ZQ24?3l(OiU=grF==0?oDk-Bm_|#f5CePEyG9# zT~u^LU((`mM)yU&A0_0AKP}KGb^_Ih;u4}!U<(I>!bX&0yet=gotwroGKP9KAxpk_ zksXJR%7OYbsK+GLq$&Ffl1iI$9v9Yw(0jzQK2u}i)IPOz;e#(Aha+z3%;Qqm2V}f@ zDxn+uhgj)%an#oI0nM=SJB4$c{5lfn+#^fA%O;i{c|$OBG@R9a%UJEhu@vT~MuE^7 zR56=sR94bWL^Ku82}RfW^WrU{(T7Y*}v2mHNYgR*5HfR|MNw{TQS` zx74k%^op@}c^@8@%1wi)4?B;O+D`x;o(`NOm`LtQ^+5JqQl?)rDIArOzqHAroE1il z;6)YwQ|Sjcm1Jl5K6JZtCo<(|rI+Gzgo?C-8nKQ&4ZU`pTHGI_kcXN3;PO4-xKj79 zfM~1nRK}EL7N=1rt3;{pb&4T3{a5?%dI-Y9IISBjLp3TAWJMG3Ia1cBEh~faF`zd( zYUpD|@$(f?Rac!u$nBir_dO)7Ir@Oen72vj4X#HMhYYuPrEK2e`dEz+W-xW{;EH@n?)3m8-={^(h5nk69<9&SWQW}S0ct; z{+ZH2EqLRKDj2MDG$`J^3ng~b_X*4w7!TVrX*V^407|4p&RJ$Qpxsd;ns$$d2KSfRDy|4Iu8xfcKqDki zsYUT`EKskpWNu0OM2cOMbqTK?*o{nfi$FVCE~R>Ck<3%1>}?az1*YwyyAM9phu=;s zO>&zdT%CP*&G*^iPbbQLB&OJH{)BR9&Fc5b#7eh61(;3Ta7{9x1tg+ZtF)g(4N?dD zhnC|LSBW1Xu1KXS`QcFKof4!z@+Kkh;$0MZu0TdvG1pSoL^F&{-i9Q-5QkyB%66Dm zYi$`XGz6z>iY_G-e$}oF$lYbywl8CJdEQwjLPwBY=KnQD#PP89WV9PoHewOeIAA;? znzl*!1FvC|4irpRGN;*W0>NhGFrAPTDxpjH9B*))&Zv6B(hoJwN^*u8W&e9kBoMa) zz3i}FPO;JJPk(C_^Kyt|;zddM3{_Hbb;f~$!2Q$%wt8DFr_!bu+88x!VTNffMdh@i zR2wa8L?vH>043?vQw7$9`KO)p*#aVBs!R&1nGdFL=iFsNt!Zq zqOm4}2OW$wa)M!VUZ5wp*Y8Uq$CX(m9?cxxmZ)f_k6vD$xQ+;&ZK3&VBRo5})7O>7 zhd#Kxd=6#iGZfc2iS5vu*MNIBitq^H&*y7FqrFA<2GJX=g{`3hs|W|Z`D^(HzFg91 ziDO~0d`Dce0tbp$s0Ggxn=lsA*E7Q~rh9g@UT#_jZh*q=4;3L9=h`;N&cgJ5EWRL= zxLjv=<=kRf5jKfp`GEO{`Jxcn88L~c`F`JI)iw#t9`Gq7H#GhE^Zk898v}6Hlf~Gb z>;iBW_DBhT=5Tfa8$-LkH5AC=Sny)6C?f!xFo2;r>K^inZe4N3s;XPk5s1j?QG z6w$vTaxug?%#P89!EE*doqEHu1m zm47~#hWbzIAZyzR%!CAWeYsv`jY>0?mR@(h`!GFiICkA%u5%=a>2kc||Gq!GHbBl> z5$q9#C-J|d$|}zaPDQ%5P+zwHvz~^UNF6p*aBAYeca|Q7D(F87YL1T+#bPbqcBNU& z2Msm&hdcma;2#y1N?amE?1_o^VZ(hXynhN83p!+_6->}K{A8R& zlWCV2ex7^i*rfU7W?SSm4| zUdV6DgI(6nMv!x~tcAh2aZWtO9SKt(rB~FkN77oWSe(`+Ee07Vm%$F8P!h0LdK6Yi zdsVNZ_cbs!5Lys{jM}}okta?Zo7yJB+=>%?oS8Sr6(}zU=1UPq z?vj$ZP=;i+t3}%c=qqPX+g92!atn;6+;(eBw~h8YTf#W>GmzFP)!As1qI(ij*pw_p zopSDa!Gcgbrib2cf5z(v&I^>?Z7?a_VXBX{=FGKC4vF0|t9-K^L+xZRpW%u z)cI3NJ4eUf63QVp=CUrzmRx@PNUC(^D4)%te)h4wmHnbGXX!Vku`@ORx4)Q$^;6kT zvSdtzn6%CeD)=6ops%K=>m9yE4N0z1C&_)^Nc61=nSm#3FTGweVI{0g;xwj4VKpE9 zeFz57H{3>bT|q6ogaTjFfP)4%h#nrxdHCz{PPRkwAf+ToZfXOyV{pHh!s${>y6Cfqm{_w1uTQ|7e44(wT99i6>Bj01Up>J^)8=iwd1bKs9PgujIe>Q% zUmQHV@Ccf1*0#Yvv+Nq`nl*%xDO8U^-K%at=@2PI7`OVnN@Kw?BMZOug$t(~pB zt0oA5g`UYuX-*^4?o~iy7WR4$j}xL z;xqAxjYkFUm0WDxJ?mIEDfgT66}$RX=dXVXTeN&r43@9 zd$Jz*r)9at>kh$^9DaQjt{hlul5L6rsVLzRyei{07M+5zGpNya6KTJ&im7A|jg!-g zES2rdFiodJX|1XFdUx>sv9Z*QJi=5Jo+ilFYi%vmQFo`XOzsL7T=$Bx1$LX(o@6NJ zh%t}aj5BG`x5>CdY1H+)@BF}~r=$yu2TC)@tzkOu&>HHru0?BMwPrW?nq+Hk1F*C; z3(46RmuMc|tm@2U6y2jLT#b(aXi${jVODUnKRS_Zu9lBS=195KKt1Eb{Cq0ILEeB`wU@N!Y{o%V zwkKv)H7*C*4bw$RHI(k0#znPsD{m|v8zZLxbtaCcvX5ns)s2VFXILbda1Wz1Q-L7^UQ$5Ug(ypN+o(S9wz$&#jEWmbNXu{T=K zpFpCV)?R0dZrE5GgEP;2HslKL+pP0{(>gYCI~%tyNI0`rM3XhVpU8ktx5)3e`V!h} za*Iu;n<=EW(l~y^rX@SCqJRdR17-&}t3>2zz>+p!@3D)E%QODFqcj&D@9QaHA9@aSnwO!5vfi6SqO1YP91SffWQq;i#s zZ5~HKXrAeqSIw=n%lT9bJ}(z#Glm!FtYZw(U;~_21yh_!Qlpr>GjJx$X?g>fXTbJw z^LOozwa1*|_ia%uLZziHbC3i#SKm?YrV(_~(Hm4Wa=hWvUBeF^MN%l2{Swa|)bPjL zu!;*{hsh*Tc`Z^0EMB5i{mfN9Y+l$rhW$$*k#Tqwo(7O44o!Wg?$pu5BOZa{j4@I< zRG0NfQ%=^5nkX<2i*CtfV9aJ_S$LGJA%+XHDZI)dAFa3cMwO98ovLH(msY^j#0+Ki z#8Q}ipR+wb)2N?=t{vEv7-6j2tURN))eh`y72@-g)y-=<3n(J?mjlOJ9PI!X^EV z9VhF*OMG8Js0#Y=62Jun}nk+F2M#I117`0m*d z*%mBs#QfTJ*rP(g6RAgvz)G6SYOANTgW z_|ogRr86Zw5%!cTL_E4qW@jqj_GB=1^dZgUK3j$e&Qa4m^*p2Ta%%?qv#M<|iU%@D z_C+MFlxeVMoEC5#dN zBTzn%FU3JQuR9>!Ea{7T(|5@bQMhWiMcn1i+i>bF<5@TE{?oKeX55qRn2Efd-3Lkx zt^oZfFPkW=AfcE(fHQOs2pkS>JS0Skun~-&N7401J>er5J)5E{crk8|zxV+!HG5z> zP{}d5ww@`U=OwhvyrKBXGJ5C%3OnW8T&u}bfCtbA9(1s(G?8r*bxS_=z~kG2ra`J z1nI+Bva8bsn(KH112V`*O~7hvD?hH%&aTl+D10>ec_Cu_p3V<9s%?4eX2 zPZViS0VnHXgoC0pr{H&Nfi%qBfk7M&D5r4s#DcBnorXe3q;(AC`M%(4C<=9ZVDxt2>o$W6L@TXfqHgyF z@TUUWPq9^<=kPY#TCV79Ktj*@2;DX)*nPtxH>ZSW1{PW$@K?%<2i9L5&7EBg@=x@V z=fQStP#*evSldM&ww?-0=q1(A(1yMrLdJL0n66k?y%{$y+T&(D=6~X3Av-`=x56>p zu*ybPG*+VSA?kszg^{ZIHMQj&w|?VxWU$)^r%wai#FkK&2nny_PLV&TJ9M|G*f1tH z586XyMlwoe;+`<5kY8@Dr~7<=%jUfT1VE#U?fWJ)`&DlX&H~;y{Hq;AJ8ee3+v{r~28%c*4VUFKdhDS<<3}68V{3aH z0qB%^>Ra@oVCKeDlr|n%zv6eH(Wezd*zKQnwZ3nTHHj2gTPU%o{c zrk_|izix|KQURfK;|<_}-EuSB9A0mYe^R$2q{BQT!#rCg?$u9>crswQ&egawT>Ijd z+q1FmKq)_?MTwX5z}6Ewj|_Vmxr_`)F*rMMi4B@QK)ewDDeEzD06X>{aBuPoFF-5^ zR76g~?N^>|liQ=vL-2$s;jJ&(>pqVJT!yDO)qwS`d6y;hvx8#yj!SW>%K!*U?_L2@2oGj1F( z(TM2srpPK%Ep8W{5+R*gg%q}cH(PpHtd>3sic!a?OsRriA(7Ig$pgz#XOS1TQozc* z(@RnYfSQMw!bFiIk@z^LRud`Vy0B=*Nsg~0l|i3o&vbPT`x2J{(Y036vp9!l;vJp% zWcq{A*^qwb z#wv(Zx^I!Jp1{4$UL;ZF-YcctkTTcBzd*MmvaPjs*bKq4S~Ci4o}t?J+R>7W_2IJe z*6IJOBl*Cyc4hA|2ABGoGg5lPqH${3vBN_2ct$8VO<974N1Zc39g0@4@l8m(dc_3(KyQ9(f!lNJ-*>(5^ZXw)~DiQ^@2m77=7}Zc~@I+AKCJOOJxSG!%I7U&PGx}8R9J4sTuQ`b8 z@{v#FI-O1+pR)XM*Bq#ji=%49F14h(GczO-R3rttDlrK|{Qcz*M5VS`Dk|E`bsKfLjga54e>`(>h9#J5~H#@FWNrEFcR zMM{GO)S`cEB6lpH{Lnx$P*m{yE`$X@`N*-FMLaKUx|FtR>Ft-hQ$R`BrT1IhJJ)}X z86x}dUu+;>;uTdgSf^J;U{hLwv3>iA;y<*0r8>89=~(l<uAt24$ zlmZB7RJB0>(>z5z!ueN<=cm&Ym4=P^noTN{DPS1F0c@hLf&+j9`Hbci6}*($limYc z$*-@|1u8f56U5C=7;FiuP}^||@nmFq0~^kJJqvdv7YPf-I({V*Duanj)^lUA!dY8i zNY0kX6su`R1XP#076lEeVflq*MwK$8K#5Hnw1Lo0M$X6i0u+;@GpYjm=?O7R${vFN zXd13^&y55TXIxe!4_2^kAqoIWn{pyh-Lv*;LP`bhJ#Nh0yl|@8VyWi^?L`0hfvC3g zCz6~-;X0o4wzN(Q956kJb5ohGLoXB&?6_j1A2J}@QBehaR&&~$co~>XX9t*mL72YG zscV$lqtvFE;WTLU8do{C(oP93Aw(bJdu$f1fHVJ$E zE-mAn-h3wsad{t1}8p z()z|+y%m&S?S^a`AQi6O82%cPhSiKN;)a46mT+d&uI3m`Kol33JMepYnh-}f9${=d zQDsHWN)_Q(kSC66ilDYbTF|M83FI{tmBG zH$*?&l6q^pL2@bV`D|qA*9)qpb&}|PoJ4!;lmLeh$U+6Sh9Zp_&@B=>0U$p2@zr`W zCO0yhxE7S_KM4=Mf+)?rBBG0Pmx$N>Zx{kP4@OF5onoFw*5qneiO;|P(%y{u^eXm$ z3L)YRK`gbhs6lERe@JU42~D%^dAl4ReUOA0Bj8?Ygahboe*+G8~gem@bDi)t^&>tiO|D7U00O zQ)PeLxv@?U_o5vQI7~WnMu#AqNQiJm)z>Gy#H=jVxLk%B+4%aj@({*iEN8J=pHr42 zwJE-gXKzhs^oJIM%DKcG2W7gOTI8Tixeek+Tvt#Y9R_pO(ME|aBptUS>h&+zpSfB- zeo!Imn(Z_o>cp~#A!$6eN1Sb|b>N9}hkDkxq(m}>-x7sY@Ay&U;_nxS-!g^0J>Dgp zT^=Jooi-OQtkaQnRc!kt^;X|v0d*%mR+X*u?}#JJb1(pDRddZ#HBVIE?$034~d zvvpf1#Y+)`5$0()p)I%WO~?QErLNk+4h9TMEkS8jq;hxL*zTXfhxjbyIV?ES_>M3H z@Z->xjvaFUQH6OI$!#y4#C*n@p1qH7_^TRNn?)gUqnOUGx_K{QvbYR%?Toj)(cTKX z1;Z;a#Zw4-OPhDV^(HIyzyrE}G;humB(3*FVT?CRnRC$Hro{gaMR6unf^r-C;tBr|28b4{g?R&?f<|&-6xQZ&bjWGMW+!%zhKtZ9hQvmc$eC)l@M@t;rSfDb_#U9O6jE>gKT{ z?T#kzwkF7Z*;%<)1LmZ|ozwGF(tnd1j|z9n>ScFR^A*Aw$2CiS`t=c`(mZf(V$5$D zZOrh13u|d!{E$%DzkGu62~lMoC*=5eiG3?D6u+w#C~yiHNNcqf|N;|od<`gG`KsKu^aX}s)1 z6DAX64Am!33O!dH>6o>juuM?OSdkx8E2s65v{ZA^@X=T^av0nBN(?3PWY1IX355!q z?j~`^UlJ=b&Q4zV8)%Wgj(S*4o1hBsnA!3WcamVYN09=@2Be=nM;_LJqtB6S=oQRnBODr;d~ zt92F|{P8FyDfyQuR%+J#2}cY=IHZomqPoI#mXd2;LLWAHJ987JK-TxwWMrlt2SmVcAqhSW4bczS(U;fv|)nbVc(t+Dp-xldDM=vLRx zi@rDoHf4Ex^+tdo;-9#CAR$vXk#FRa8R-8SqH+HvL=T z!3Y1*!`Gg;@)JAP2>ws7`M?jC&)3%BxS$T@y;66D(}m|XA1Cu7&n#7|k_fZd3I1!w z;QryGe5UPG`rdzHdW%QvqJ_TK^AC%k|G;Yf`0Sq#4A6T|;DIREQt z#P5l|lL(#dzeXE40{*XNp!na;@PClaj#AcgTo*?ETngJ4R@ql?O;r8EZXt=y7N#{; zN!;K(U+8>B8l)}Rg0i}0zc3OXcOVZMjQ69T?>7S+J+!SsFeeTtkS+I30s2%GvrCQ4 zFkO3jfn(QortS1<_t(n@+>aCmefH~?!$>8!frwADj#!2cNm8rzXxrx^qwCb{Q!X3m zNP~kodwpZ-ep)luLQiOKXe68|sJ@t8!ST%oxJ>Wt*5MDHVyEF6Db%N;`{;VT@*(;j zyQ`Nf253fT;7@38(M4l@j^I#xe&!e4JH5hZl6lq-E9j5XjSJX*YszJ%gNx{Jc0@?R z8S<5FCY~_@l+r^P`OKl+u+hcZoW>kLdb9V+_(zK8a4 zY9J=ibPUj*T~=#{Cd0ItpARI#_G4_@kXwn}rMXI%O&Kinl=f}q_mD*l0%%PD&qm8d zdv9IIc*{|N4rxL3MapI===)OH71<(qX9W?+`d{pmc1>E6!#Kx*``rDx*T8%jLy9_O z@^QT+yPg_@#6^K4YQlp$hzTCuMfhRM7!Ok!M7`YGHQr~*Y+;ox-^Mi>)pd6*V>(JB z*6M%}0VJt|=9+H|R97?;YLCdSx-dAI&@|A&3VO?)- zVvCy7WZ42C?XK1$m||lVV}Rq5GoAWvirLuFaADB!|x7!Hq^OMbEHNmQziw?baCC!>9K0tXb(=2jxYB zZ)6ZLTR$|1g{@{w^_WpFg`C2T0(FC9PR=E;sZ0z;hA z!8gEVe}^XgHbJCD@c$6h_inH)jIonk)-Bl&uga=2(p{cfU?{Niz)5IxX@b>R)~zto zUY=S+@C`nw`b4hA-HqZKy#pe-N470~Bx}je_Vl-=Dy6txGDs4)HTrqZ%==^ybiISh z8rP4i(PUCLiXUZKhhGgF?#WV(>YwW+^}}iV zBNwxnV7`=)Yq?A;@yj(X2&-j#>JFsVO9EX_BH0Ok(RenU z8q=bK6pNpzizVE|kX^8Av!-PB>xQMqdjxhJyqmPoHuPrQ$RvstLM$V&fPerA2x+cj z-nCFgHCG3yJ&Sr9Vf=|$_e1^IC3$dQBWPvyMsfdV6JZl5ul4e|*sss9DL%aRBR6pD zUNQ%3^cUz^AI#te(JzpHU{^mDd_6e6ajOsi2^;-iGuppo8vj#9`;UC2=J3s#`ghN> zNi|Qe#Kkn9si(<_lt>^5KL}`v7N}8TAO{9XWO}p$AtQLC(BJKo#A(Dv2^gIBARzV# z5vrc`4UdRci<%^z=1Dq;?ao$>?Gf_Ui)HJ3jm}o%O^sV6oz_%unfKm~1Zl+g(^s~> zFEU+@?$e&xPoLR%KF^!OKdFa9enA=DW}DPTQ}t&Zsq99ZVj*yNTxQdKjCDj_tt+%lg2n?OKu-(^J>CnOFCrvGBhKE`lZQ zo6P&PTRxKC=4U6sZ-Q(QqB#2HSa(yhZV}Aph#QeGcyYr273979X}6#(O$#Hk7Y4Jr z8CE0VBH)Ly3Jx6JG~CZcSnDSY7y?ApZLn%8LvUestxa$rbMhfZrCNdX9VU!)I2*b< zw00ZTM|<)2DBxDCTnHd^*s#lA_(6&CGEdy$!@W2#uLH47#TF>~&4t;p(uWv61Qqa@ zR)gKsli)crYy-t$Td1E1{DeF_k%lYMC?FBm|0`M|rDn#B&1i9JMT*od@j6=9CEsAC z5rGjS?CFre&Q^bNn9NzWF@OtCo&g`>XYvaqud3=*o!KBb|A@5yO}owIMLiNl01n6^ z2+Pdi5C7KRpY?y(P>X#hi$O?POTf$uH1{~f2s5sVwp8Z_+xx<%+>E&>PQr}nkXwux zWkBmMny)rwpIAM=To<|7-Ud^G+WT+q8WO@y@DG$ED)Zc8BKtrNS9?s!CDJdy^*L){ zog+_fL6~OvnGza%(ybvSsr2XGFzw^8(Dn>}_BYJT*A;nZ_c9{5n*NYTOZ%D0W$b~h z9zW>KsLDzn4B4YhRY!|Lj_uvoV{#zQkWwwbma#FaSu4T0YDy@@lzoTq*5C zDSpPi5)QDj?nQa%iUbE2owSonrPWB@tfu+eclM2m8PhjMOJ%`L@`D|N4|55s{+lZ` zwg#cgM+2lwy@UoU0`maw##teOd7uhM*fJNT>m{x4@v7`?OOAV{j8dKx@9dDyt9&g0 zy}lW`w;y>4-8g2)(xZGRJ7VxE903NjvKt&$4jf*;zR@G=N8xlB99fd3pv$l{hpQd1 zX>-xn_-n0+6Ug(xwcM}sZryMP=}yab2lNF7^SSW7ip}o+w6uf*^)gOXoi7iCH)>j( z%1iamoY@YFhay1CNRQfBk~9j5aW#fcoK!DS8WVa$63K+#A{3@XQCD?~o*-r2KIDLL z1Qmb3p=a)ndWsLBfNr05^n{A}a&BNwfB!Z3HJN)uNGmN57p}{aW6rzoXy&f5NCm{= z>FxG677cq(zMh$<7l0dgE$uIjC#7b*Zmf1m^T$Z`u}HoISe%e2OLq)wZOWjBr_FV1 z`=6?WjHpo%&!nOrpZecA*~M*{j+h`3UU(I>KM zeTrrxsk6+(F7nTc0;RX+dMi!;%sp(Z(LqO*x&P9eP_-UP9Qu+bp_uNbpt2~^12H+==c)av^hgC#x=FfdJgzpt?+eNq?{shCpt-`s zWx@CHH9t}@kkD$igr@9eB2yiF zqG3vLk|OZ2_NT_8DWVJO!Y{NGFh?NvwEUIiwX2g}JQ}nQq>nbEkJ8#%8`Y|}ltFOtg2$Xo>xU>Pro;R1Ci1OWGS;Fl>wgnb; zxaGa-LQRv+2*@DG=t6G6s6yd%Oo=onmUu<>mVL25Q9M&fifB-{{Tnnfh8B)JdK~^L zMC@wE)t3zBTJO>Piv=a_#6fmtdoEBf7h()uxv|eQv`xH+C)`_P3*uX4aUM-k!du-# zU0*iYcDL<)UEM<(dyYO}k1BV*Gsh2LL%Fvc-qG_n$W%1k((Z6~RlbEyum%XhC{>N9 zPsmj$>T(f~EU)0{iNUJm6>XRrm}AAFbOfqo6dAR>eP>?39oIzh?rb>%qoV&BSU=zi zj^Y>HD-GpLtLkWd?+`EwKU%d@W3g1^az!p18(~fnY-}+Gyc%`PY;7@mx1;c#z&BA9 zfWkb$6hM(5n%0Dlz-nfQ zI7G8m0uu_tS*){F2=79O-G5;ewb?>RtMryDMw&oyQ?&drL#A)t8kMSD+;w!1z32Kn ztCAK)H5IGlx~y`fptCr63jqY(=U}svKFe|wwicw_m8ji?rlPmrHsVMJ?bCg&bYPig z>t0$zy%ezV+fk_9SdKf*V@GOyxZUtCo5r{86ktA50H5C5XBnbyIgNMTB%p4SWA*nX zDWAcEr`GwGU5UH5j=}4DjvKCX?FhYs1Ga?sF6Qv<;n8c>h;0DJjaJv%-ZK8nBw;s~ z*lk92rX#Q`m8r-RW4jLkSmEPE=SnWbA# z0-pjvk8PvsyR6l))|%|QZhO$S)@`&4+}7gNu;&_)n;utq!rJ+JR~TGF^R3t=@#nzA zquFLi_sRG(50>xZ@~*815bpxrAVpS?-gf^rjNrU2Vq2FU`vXwlpZI+!>7@3T2zrvU z#1eF^TW9pdTGZ_x+^Wk#?5iWkOGw~bnTx|U`#G2$Ep0C0Hn3ZWlf-;<2JQjMz-Wf z#)iR4?RB}r?Li|(g>#|N_~F@Z^`PK_>s@S87M&8hwthLlGUg34hQ>>;vEs^6LvLFbAp?mCEsVs&%omJd8Cj=-CD zV5}?a8h?yYAdf)*`a{>8-i@$?Y@ixR72Y8HweF)OxZnunR_o0&(@FQolJ13BvjZ_Y z+P02eMk++s2iV{@pE4pRj6B@N(va4X2&>8(kC)D%Kx))FD?zSmVx`5#pkYnBP--cv z#61#@a!za= z?{gWjkve~{4Edrj^1%7bHxs?l_G~Orl%a~qZZ1->)KvNQ!i6cZ$UP}uS~wRss*0A+S=u#gqIPDV zijyx$QO+P2qodMr0v)AFl#`rJi6=pEtQKl?P&n44P>WJpXz`lrS3hu!-g#uqnO}Nd z^M>7Z9OJR<8scy|TP#CdOs@ra6jY9#$0FsF4^hC;Rn$%6%qBpK-juU)(59H_NqJ6q zSYqw5n(w)=58^yz3S<&o6+T}1t9b<_w_`zR{7eq@tkvEaCV#_R3VoaH3Ad|d|1C8D z&pEg}OV?G(>`+T0GuEc?zGfp|z}}0hyswA%DP?MYM(e*#9(9nXrLiFqF8iKLgtn|Z zcx}DjEo-^@yfi-tk~DTzUg+;5r-Y{b@@Kb_1E2$cBQ>t&_(Du!3>r7otU8y^R_7+j zEfT^bSQ|5E5hH(LluD32M14yn`gb1r3xgVQdd@vZ==XO0z&*QG$8XGA!mJ&Tg|Hmc zhr`Fxt`6PSK>33GCZYwZl$pg7lf{(rS%}4y<6S_95fqm(RD0cwUGm>1sqPz2$?N`n z$!a5!s07|yWT;ViZZFxHKT!8=iLTID9)GAWS5*ytLv%lAgFIqzKX^^U6DB~f=c4fn z`O)ejHPT&;AhsGm9RGw%l+1d%=|R*XlBqahPGAowe%#%FdFj91rYtf6xs4DosBt}5 z7B5T8L{N23p|H?a_a95tEKv*h4EJp3DEC{>e7g7nKwZ; zFKiF>hT>fLjBvxsAN4CU!0dXZC|dO8r5*! zN@(vj?2DPBT}=S*UrQ*vf+l!Z3$wi+(>x_fRE@U9gAoi$rJEv+YUGVvwGddBjpHwp z2?F~1SbENk!guN&oB&Qr4G@$Ca*ON`>-&*eh(}r$JARP#fHz;Ic+M}de;iuN`2SrKkK}*fM00X-urPFXG9gwn`FDG&C?zR5Bn9M88!b-x5dV$}`K@W*!r!)e zoC3{H8_lDM@~bHCqzW0mDJ`ql&-`_i+&8r zb5Lz25^6|fg=yCbg0N$BxaiaInVFOOlk`mP5c9XCgv&+iM#K!##*m5)4`HZpNI0Bq zn2}ZpZ_WLP#D@@@)TPJIwz$}h`GD}Mw+jBs8xhBF{6$w zB@CrGj-MM9vE@RB9AK$MdYKDe#F~FTM~H?-t}BH=$lYPg)1IEC$t>+I%O4BmSQ5!9 zXV;rOu;qsydA%QgUlnLS5CHE{`tAI6P)BMzw#R*1{(3AKo>;tX}*p z8Y$F^h|HvPr|~UxM};>`2{*IH0+TApGItCLt-^vb#-hwXvW$bk#L;WSaE&=FJTQOE zsI|4BB1Y=8f<$Uud=)}M?YV0QdB2SEytcv^-HVl*f~MH)AVN}A6fbE~!)hyFf%Eu8As`q*WSY;t6?8Q*W&-{v6RZ-W9Ga@O_sw5w6pKYMd+caD$-jiEVs)CC&W=mqW(PRlfvDI~^KD_w zuJQ&Ny@q>}o0hW1Sq*UL*x!D^y@iYZg_&%)GTy!<#xWn(@0v)Qa%tOCjn@_|(AjAZ z9Qal(;v@6D{Q*d>bdpn;lb|}iahdC-XNhn3R~unCP1h8`Ph4B~!-HRydtRY)s@(>! ze^ZYiOmwfrtJq=D7IWf^eCG!OCJ|A@*l=fO{*Z<_u3H^J@y+*&92 z@1yeHxom1yu1L!$n`+f@fz;MU6bOAku_dinckUR&WU!5Z!W7qAz^!{E#?^7hf4K{b z8uc*$oI_mekk~G~Oz|PdOXrREKb1`Pl779iRLxk#n`>N?lX>c7!iAt;vRpd5t2>_R ze9Ek!=6%cN`uyV=H{N<~HhsX*tc^0SO{6eWhd9E-}WXp|4c}W;x7L^zdn6UaYX8w$ARZEBn0~bdgrkNgBBJat%&cJ~# z1jeY)P@sgr!pm`zXe5Q4QOgpn5<2SOrK_*fbr$hjp1``9VrELI=u0@B5^sQdp;4Ea zvsl^tl7?|Qz{WvbL#n4W9#ExFa**tJDLEKC9|}zgzRVR6Lxcw*+p+6tu8Wp*yZ_44XdJSw3m8NsagYg zdTf({+i#eJYn23nLsk_&4lAfwS};)%BXMrmZDmoxO_ndE{_&lHacb9&qgtx*;>((_ zg@($V7?wTMk>NpKEl_M1yjJ?W3AZ0u^l)HyUDMen&k)NIGg?YUGr234E=L!BWPz2Z zIP1wIC2ed>=m|MH+7M9>apq3ygWVV-=>-u?=E6=FSLo2ab;Kq$D(|PgyWnbszq=W4 z9Nl!*5v#2)i5j;Y>UuH}Su;JG3N6l3v81RT7}bM(rwptFv^KwjgHyaprSfu0wPj_1 z36{zvD9pE`R*xO9Nv3J@a`EAZa;@_`ui-SeS4?@}Z))ZIhB!4{5t5ElU0Oy2QT%42 z?=ByUje3${+Uzs|9Z#))+=jo!G^Ps!-jLdXRjvNn-wu<&+svHR`}9!N>NtAZI2fKB zHp(O*l1_e+Au1_U4aYV1VXVYIrK+O(xws=qa%mkxDJma2Eak^!!Q(B2RIkpf9mjyZ z1CIUe4eQlCJ0s6dB)onpHw|6v>KP!I`XZcutEq+aUktLNo1TY1;}+v;&f2_ywiEwu z@43&HgZ1#>1w2w|=XCYNS~8Z0L=-V_^0AqySH)+jXkjc@=6ku4DTTO=ggXwo)hZqg z=+0BGx5=#5B-d70U?y8q)HTd%yKjwQfrXORFrHhzzy$OM)YlUjfFs$b@+^?KdF-49BE4S5X7<}kmSlB%u{_2 zH@PK*Y@y_^nmzYMJr0O5~!Q>7kwS0n{u45}ajCF6FzWB1Ivr8(K9UkqL8~ zMtZx>E9gi%zq**N?-1b@pE|tCN_+ zU~a;qz(ZLB2LseeSPi09w29&ar3&Dz=nI4q^|#|^yrDIFDzF|9J0;e%212{HmyvEB zfhE$mkF^F$Ehh>WDpz__xiB2jm}dh@R&myou-XNP0qHX)q=1%){=6 z^F;C~{y)0DDM%Bb%d)y`+qP}nwr$r}wr$(CZFkwWx@=c({}a0pJ24ZHk$E#RAM@en zx#ynKWyw!8io_4W?XaJ@o6s*Y{S>BhfUvNW3Ek{VYwSl{$QGu2pgK`aL*sEM8c)uF z*?VxA@0T-QHL+#7AR71Alt5e*S4cLdSW1?VF3vcdGfZN%Wa^NLqAqC>1oMCOlJhiPYs>MA0 z?1RAA<2rrbD9It$6%vNp7#2k66pHneT6^Zxkxf|KZwM0Y>n-9xH6W$A^ypibNQezHE+n)+-m+AdQv9Ekn6;ab#FYu<q3(85OHa8Wtb@>sU_ zrna7WBB)|xJ+b2dGq7P5yq+6#?Tf)izEe~5fWncy}&LNyfp4wmU$6U z(osKIk_&8CFkkOe5-*j_@AsL}x9I*Cr)#!TLt}`kgD$4jWp>Bnx^->WVz1a;z~>V@ z0GyW!an^LaI5RPM)$*r>4O#A(yf}Bs(M1KgYRhCR%Yl?z0qrBV+S;7D} zG41JS!(ycmv!8|~LnDA*UtB5`6C@z~-*rR}Qp*=G)+TM-8>NtABI=gn6?doV?QXHB;$Ql|NGoXPwSanz)rKR3&@P)LJdccYRm_CQ+qz z6I+mFvts33lz1z#U8aF10^U^`Uhj8YD*jB2mc#=Err8p(V6-_!g#A^r+Y2FBiXSvE zvL30KFls`x_Pr7UDN#GxpM`8LCt{1l%f-nNtc&tH!Bib1WNLYM?iZo63|G@;DEulo zUe&*1R;lDx-0AJ)aJWW1mDB(ft2l-R)buQYsBvJ-fr5diL|_aG*U^4K zfSQxP1igisSzdRu)<$bsNBXF}*;1rxE!mn6CqKh9HlZ)l%48q)_C91oSMh_TBeM) z#-xr7u+RL{2qpYua&~9|Da(`xXlC;rZ{G6v;G5266nhHQ-! z6hS{e9X)|Yz0B^{=Hd=9wV)M*@4O%)4Xw_rZk!s7ujV2c?}L-N=Sb9R-PlfQ zxC7S#U7b6#1SJ)WuL^VZSzD7w^P`tQ@v`WNfE&>FE7=XD{l;vX9`v_A0ahbJNa_rY;!2jV2L< z>YJFSPkfYKIS=-cH~Q@X)_sT7a@b5p-QHcZl?$CvEEhL5Q#XZ!mnu>*j2`~iJ7=1j zhxC1%RfQ!A2{RP(iF#+*oE0sI54yV;n+Mxs$WiwC#ej+1TK zW^Eq05zX2%RY#;y(aEEIs_CdpHeO@mnfmcb@548n+Q1~QPUZEs5QYPDYbFW$((UMv zSg(rFj(zCPt@^sJeVE_1sbxh1kpUC*o*uJmR{CEey{&aXAi?5Ed`2Asi{7o$c0Qw? zfLUkn+|w8A=en;qz}iNn^^6~B=~~3}54Gx z?CC4)DMHP;fO4FGQV{~>LjFeW-y!*TjpI9xobkPTs;}U)_k6N)_tjLpe}Jr-SkHOR z2KI3-OLw5J+G;ea4UhY#@%1Un`(~e_KNyeB5L)-{z@m*MevM^k)Jt}#I03yRX2kiy zfv267KF7m|s|td_$cf^F|G$=t}-z& z{@G^*Xq0^PROx2?G4B)$5_waGk9pzJZ|QNEx;TuEpM=GGFaX-8Dc?2aX2|#s{lr#X ztx2X&{yl4ScxfCH9q!UFv$INf1jHY?dek9CIA|nhII0i>@Mv#YlTIg@nW@wL+yy=s zee1ir$~7NSgaamDSz$jUqe}k9YwBi-&4wzL&40$*8Oe2INM**qXEn@zLyr5DDJXZT zI((3l^}bBc96t)+>t5_dP7|kVKo;n#fI5d#em~JevLXPet}f#s_PX zEuS_*Sjov0BNZ2~#T#29{3?(9rCN9iQwsrK9n2H=?+^*}yILQq>HVb4iIM)NX?auG`l3z7?T33qkN!%e*Bc&gEG%_3*iHDM6D^8J$|I+Gl#wkeNkr5_5{G=S zH$C=x1Kdg)jtBHr+jhaZ7iS&7;ao#XF0Z2F=?2k+OMpp}$p@{N!>yHdfjD0sp*>9N zO56ipd&X_gUXK4!)e8Sz@u6nXxV$|k0J z`N+2EA+{+@&QVq=JsnF%X>XS13+n8{L)mljFUWs@s&#)?U#x$cuG;^In*TdcMe;u; zTK^^gsBC6r`v2%o%e@djXrmt-Ke8J;&E79hwnKrWq{|74prn?R(ol$9q^_wU$AUpb zky1cP3~lS)7%;?XE_AQ@&(eZQSNqSKX!YOP4epKYJ#)Wr@?4PN&wGFT{{HyA62!YCi15mI}IU|48?jNAuGwYK~ku|f!RoJ_|I6ev55JIwhfL=nJU}z^0@Yg ztb;ADwOTSbb>Q|l#maz)VCxdf{B(Khj6So`>WDIMNsQ4-qk0&rJT0DpdsBfTu)b6# ztHpNAqO;H;s81=3d3-h(Q)q-wFuarQdkmQw%d`bFH*?rPj1x>+Og@j5@%0_gO-^oVy%pf6N;YPDMx84~6cj5f<74&OZL*3!D{^LV)7TbtI;+Sw zc1ShTkRjDLINI9P*xF}p^gNdOYTs3uW1{HgsG7T*x@!%%FwNT~C~NGhwKdfMl~*lZ zz8Ob%`REOnbawW(wib2`wvFJV3JF8A6(#`Dk_p_jj!h#{@g?>j?Qvv9X!Ljj#`u7+ z*QTalkFuCRV&ICHk^$|jt423@7tjjONa!^bUPDGL6XvX{jM!5;>ht z*;{I?t%p8FtRGCHAJUAHAJo-rEp2CY@NDb{g&%nt!FYPADydc#MGO@Ocg?Q#dQ_Gv zQObr`l{Q^bEp6}pP-an~ERgP6*f$eFnV}b1XF`Sz<0PZMIew8x{bbX05y_1nC;a9x zWBtR>+&h@X)UmPYXsAwcR^y&Ey22U6i3 zlRdWzZ&-Zqr@F#k&YZ{ih=i3O!i_ssk<^yefnH=YMxRD#q6&o zB6o$Evz8lC*J8oFjHOp@RMJ*eYSWv@qTJ1zA;@9j@drCRg^jh=T1>pLDz$iIJC>B# zyfoq?zroJAZz2S=tK6c&a^UcuJO_v(DvwJF*1ZriH>g85T{i46k)31q^;A>ESB(vz zZ$20TWmS6Z$~hcB&DngpjU!#Kcbf&L8N;nP%Ycw$Gqm9tS8@2RS!pWGWx0f7ckNCT zc>bVi^S+t3x<19#m^P+1v(o59XRiZ()xAuA&2nJeF%l5HuZ(eGu3kIR7Php)lvJX& z%+66)sjW@0WXM;PTcfKlVq&w)jGBjyp!?75maDW1N5j}E4!kkc(SRftxaU_L8r@z5 z@hv%4e;XTnU1z8Db&XnWm)(#|!x`bHJ8e+`tW|D(yqQnu%&Gyl^ay8ThUA_J6To9fMC?RnR)s} zTOnaNH5zQ(4`o@BmGIz;*%W3Rl}$p_IX@h#mdC{uza=Ioz20-zO1TE9*I|$%eT1TY zvT$>8HJe&+1e2^(H#lS*(?^0(JaArM9wxi14DygX%~&28H`G$6m6b;HoNV$i3euB- zx?T@)UnRvh&Tmi;;5^jfmz3OIA2pSljoF*w@usj<%}3@OAv)k-h!Qqf$y}hS%_{S? z#e!%GNSEQT)DY!bnFeR>yy8+WM`j*+Pct~{94k~>P{-fsF1VFt)Swq(C(WeCMkvKrY!WtCm@U40 zOor__NNV}_>+$hpbgf2*?Y@O$b>f)QjrbSSHK?h>IG2I!GKK*#YRoR#%l zWNe6|`6$VK$W0T5&MMMMKWZ{u6QB$>f$vj8=Blt!f1&iXshx8SiKcmw)sP_ggo9x4 zc&VxR%3z?`Z9Nr((=s1GyeR=)DXfQ^!NE0}Z^$u$P8%pvO5=>Ej_Nl(M?-KoP_vlO z)v93YX!C)}IoEq+A?b9Ys^x_j#3N?n+30JnMX40{tSnlGV=0A*#dq~rxv9DQ7Gb@* z;i5EiqcJfSb=-`#5e%tL5;-8^HdZ}5hrYUym+DzxE@`|b;t#uqP@38dTiw*O(Q6F{ zT?lLJHZiuns5&VfZyCc5(SJ#3Sn{+~hxtvrHGWKjH8SK!mPt7;?>mL9US8?Q0h&_> z^-|^RjvLF>_-D+##-4q+3p(6T++>CtmwHjxcBwjT?2pp)wZ@c`zcL}W4Ul^2FAF(o zHD^B(3nZ+HVH;CK`O@z>omg0*Wi-4PMOKt?yIb)ud}-_)QBER}NycDY!FiFFDPr3V zqIrSXDTO&BP%3m>L@MSH%XO1d$ket#icofOM)7ItTMe;tSIJ%`Xz&5O)(JIAg0U}(Lh&A)gQ4>Gczas{|f||aW z-atSY^Jx)58W0o5@i&fXXJxiNBdutUjhVP~kNJmEqf49_Cy{l~X%^PgoxCrgl4)C$ zRl7Hmp{aT`zwx6OQ6*z&QDPPZVj!g&lstg5=3?QPj4p8>!plk#len9NAt#mMv^Xm1 z)eTINQrTMvoyu)H*rk%?#1R%rwzM__M$i-17pP+vOv^u!jkf;4kml*ZEOje8yX8pk z5+*wtWf4NFk%f5}*OYd3)=6ba{Di5|sHmc35ylOWN7m_K4wi8GG^drGzngxLF39Z- z%jJ*33L-f_lt#*87SRVo*_XktG2GS9ScNNJgDMP#sfKm&_B4a5_snfg8V0O8qX z?vFpVWE^UHbvZAIv$|BRX`dO5P07EKFw0VNW6>&~>{^d_!?BZ)k4~f5uf6x>ym@yd zimAoyx}pj7XiafP%#4dVMI1I&hzU%V^~N#9b0i@`>c52kLu3nCG_B>^c$(**!Vt(>4L$KXXs1{!cV!%LARpX z60*2~*ghNS%0*A^CG-JM9Fsot#G}Q^TX_Wqgjtca>FkI1@Nmh#CrH+F#cLLlcV8G+ zbqv0{)wb5rmM_jLY5wuJ4dc7FwcBqc$SLx9Q7J;ZX}&}|`&$NE{uHYNSMGb0lCd#M@VR;u#=OWNc!W^=UzNCD!<#ER|`FoHL72*+1 zO*t-vRU9H`)qcNg#v(0ViB&i)85gWUvcm|YPxflHj9hi%E!;!!`4@rc*pFd4s2RVLp`p}X<+AP_dKc3ML%LUe~VQ48jft&0Nupm&C6c#=y@ z++FuxovWp)&y@>F8p?ZCC_~R5q9K7t!Tugl*G@Bvr$b%n#8EHmt6S^y6jrr%0epN- zo|QnUmnPeDU;85@ExMMtyM%vN7l$O-*VI+zYpm!}0CuQH;=}7ie>G)O^O=?Rc7ED1 zINO!T73oPtH*@B86op?pqMQFbb%m|Vc6RL}YD7M#W@VA?@Fxf0uFadkYD=wg!-(tY zp7`1td{f*tWBz*J^y;a2$!|bEsXed#U19A^4FEojlX;j=PJ#d@hvIT;3qPl7Lt7;0 zrX$l=AN$VH6y)9F4WEKfhp2}?Qv(5LkRowU;p6x%Epez?JH+-Z8{ zQr*C{QGan2#H-l~k zE>MDQlW&|qU?tpO#)^M*#ulBuviM0j1)|9}clAv3&z8?0>ms$u6}-926R!?w{Pws9 z`ZD4-$5D603{amx(rL_%lQzN3%WUc=9+-W_Og+(7+IOTZ@s?w=v2;l0V%AAMy6jx~ z183wOFxM85ZO~CVS~|5d>K(A3=sd*V!efCr7+VT9tWr+3@GBBy#K_Aom6 z9nk<3*Wv{~{kn4RbFH1KQ3MO|Vs`vQj01yzo^qyv2VoZD$qO6TWO6q9hGz?F46q52EOBaVE%BC-sp3F4j?3G*)VN(Md;R zLOq$iYPkc;<}s{TpIlj^pRI)GX%cZHvS#|TXBAraHlHoC>{LuZca^$V@6k-{G%){B zxvI5ISFzZlZCp^^%e;A*u(q}x)pb*Q_O{v;rzkK?{*y*jxUzA4couM@(K%AIqd zj3lG7#RdSx7&stTeME{{O$DL zecHrTXBjDpY;J5! zMo%+I^TazK7E;>MSlY#mi>4-}*^}_876K=;pi~?i9_&KpskX$*mv)I)eGZVT>!wzs z@$CjQhGyW|LuFmuo6kBbsI=MiXtdcxvx#L4svH7q7xz478d}7;4nOs0s~cMw36AMZpbJ5H_LP zRyFERy^@SDPCUwt{B8~+B?H*?h7wpcaVvjMD{D&BB*U7(gX4^2bLF)ZDn`>(qt#3R zEu0|hRwQIdR5G?qnk*>ZyHqEU9GI+a+C5|o?{J7nL@szH4Q#e_hq}@@*f^IaYH-** z@So<39Tz91nxz-sqb4r`jc(jLdAR`(I!qS!m6P&KP!!$h!c>6D$;}DQg(ejx<)t~Y z*xWeZtbnILD}?&;xu{moDPu_noC(fswC-KFb>>U_T zXy}>PY{=8A)H6to?oyU@g$$@1D4yny-7onVqsASiQG~rz^jwXk)0Jv&mSf&Xy`m1T zwa{?)!Z9;GqIP1=w-dDHLpQc)sU$vI{)HmfmZ)dZV6#E)G=V$1%GYZKG$|n`${dcPOAA9THNwFw~t?Ym@)3cd97J zRX)c#k;I5vos-V2EyMB4<}?I_(#}S+tJtmTnkS*#TPHl1t6 z2%)_l0x-B>d@91)7OyEy9HM3^J7_3cHO8{^d)G|0&JWQGX*-zJHm0_1oMOu!o_DZn z&MVhtux*(-Z1Vk}$4&k2Tq@7L!pjbzw|iZ&%F0j#p0+&-Y*jyiOhlW&l=;^MWPcm0 zV1b=|%-JaMfi2?y7_qn6^=TrJPV^52ZYY_%m*~+#+5ODu*Blt{Y*_n6daoRI&im>S z{z#d|qN4waY`>*F4h&s8&X88t2THLgzPKKL=vp8+{b!6^^!E@e zjkKASXZ3()8#cnvdn$xa>yymAQ zok-rVgaV!!lVcpqqruhn1p${@i9((yAZee6Yb!kep2HK{ z9%pnRdhyqh<3qbEURiKWP5!5(*WG!E-A`cw&)eI17}F19Q$`@@K*d21T=uv_JqB81 zv90!j0v^!|SF|5&=0oy3!JN7g#j;#P@L@zH5?H(dFqFx<2_ucbD^Uke=;)?*bj;n< zExm$4^U2zI`SktM#2G?nS3>oasd-4)$o6rF0mW|64<^cW1i>^6`@mr6Xm*jvH>})d zd@l@hh?OY-CoAU#_{28&5k^4bS=FOwZ27o&N85K?zz-iw;Y$9 zbKO+EI-T50Z3;gu{TX{Ng+`?CjkoD&z8=l^VNo93a3!JM^?|ApqY7=xKf!jDwCuFQ zi}nG!)0C@+38j%&L~37NVg$Bd80iPO@2joDIShE;q4zNC!Vd2t9FV+W@`ozy!+h-o z_wNm+-;wx$^$)C3e(fa?ehD`1V=UtNk#U72^A;uSijs(lQVd53i6b&#iTRk~LM1sP z6QPj?(~_j2wgFOpG?Wsw6?0%|)!c>igZ_l{xrk_{?LoP@yCn38RzQNS&ju2oLo4_^T)I`FO-Xki16oLWTx+!Go=fq5#Mi*mIiW%P@?mhtCcaiY~%D5VqK!*$TnX`Zwp z5r!mWtit;! zAjrTi?Rb)15c5J%Jtzs`*q-{Kdjb4gB+7Aph?2D_Zw)*W_~>BQUSPC zt7Nr=HVS_`VgAzHJFZV(!0)tR7O+w#1REEQ)zl-z)FD1)Ywx3;UGma42wbq&Az$Py zhO|XsW><(da5rA~Q2-N+GU)`OgCP%cNYb&SA`dTXmqf$>`Mj`DN38^>4Gk|qr)a^b zkVF!U(iADflu3F4DYs;tUUSg(FJe}6@&G?P0_P&jb|QW+;$ROt6N^>=^_gw2sr6_Xt1QikxA;!0Ct@D}e63QQ|Q zk>_S15ITWMFNIU;q6wpQVfiM>6b0!+iJh@fO*~k&7;ME{b@nA#4h)b{NG!>FLfRtFL(-LS*6lI_-$QUckt%P3*Bse@x@lB*Vg|gU zX*uWQvj*T8dLkX=P0#JLA`HK~W;WkIs}tjaooCtCW0!t*knvAIIEKTO@|v;ukP*VzC~8bJv(O|A9nvxFsI_7Ar*YWY9D*#D z8+tGqRMOfjTV`k+K~WlvlT+R#BlA;0HJuXB!Zn>T&?GdSz-ihfrO2r!jYoGhol?-; zDaiO7t$mbo^eoM2*ov6B)MvpT7<^E3UQ>>vAZv!&NBOpzR_$Urx?$ zZyMpYnj53LoLo4)A*fMU6gDIwfXNS6DfJvxF-Qfe4LW&qv4||;$g#-`$ong`8V30f z@rcAEW=x7iNG27^15TmqD|bpindpRp$RXEvPGMmw>cha2o2Y~c5j?}^5hUsjd$YCe z1XlY7$LjJGlryABYN(NeFv!Rx zLq#N_-j_cmzd2*#68Nd8YB^EGB?Y89k-gC2V@{)@#L%QSrHrKXSs6A>Sg>$sXfBA( zQ%5)KxbW@`V4^|MX%B*fvEiTE)6kzqeU6cS?A^9 zo0vt_qKdYs8fq-fd9d(iHh+03u~z5e8fRV>_RKS1dz~6eCbB!!rFxTDTJnIK1*+c| zdQW(DJvGR*hx#c<=_|2PY_epn$&+giJ~LL$LvI#gs!}KF!LRy*Jkhl{p%hj-7B3Iv7=;wm?p z;+9mgBmFjLC$W&dj=gn|d1gUHZjH1d&o^u=6yeqaY%c-u@vnx7V9Qxv;P1&|#)zu` zBvXnkmiR%~2Lx6Fe2K@Cd_{@8fR$q@&FBM^iAP7~4Y2Yg|3R!{GtTTD=(EJN1K|gU zj*uR(^knya=?8jUk=^jvW08(LA6Sk=g#+^ku5IzYkp9H_1NjF!Te7`D`3DeQ8NU#X zNrgM`cQk%NygRt6Jb38{X(}is@h&BKFO^VSK{e(mYg69m1T0kwv}Hn>3XB;@6gnUR zEs5`AS!_a#dBV=9un9R<*_#3+XNDOeF-*ow6f+ejd81C#JNd%LeS)+}X9JD29 z0Hxl5^o1~cFun1KE0d0FydjepX?1C;Et%ew`T{k8R&VU*pLa|0h0+zR_8ffy%?4Ua zqFs^PmH6gw`@-5Q`OTrWd6RX%Cl7$YH_DdOS|h~;lr08d!VkM|kiTI5vB7-#Tl=EWglL9>mUMBr||zQ9h}~ zPLfA!aW{IB7nTbhKWsPnHIy%+u>GVfp)N;Qo}|3^ z*M>o)6pCt~&=j(jM-`)RrowDO-7}{dwpXY|0UM_XS4d7M5gs~?NGX%`z+{xrEc(%} za%yZY2ods}LPux%vd6&h0zff>yC_#=;fAMF`P-35^-#IK< zv(KY-C;Nq$Hbh-sv46O*XUI(l#DAc&7RWUn`TiotUeL^kcSiP0j8vEp&V1oHgN*@# zx8AZ%`NE3sIDWW0kw-YbZjS2{svF$B2h@LnJF%@creHZkbV)v>tpaqLgq{c#@2Oz7 z9!_oz6*MUwz9FE8)n|}aYtY3A`{dV~vh0ux$f^tF6+??yHd$LK|1_4IIGIFp{Mmu} zimPY>iK)<_?AD>}!v{y#);(sWi~BQXk!eR>HkfLu(5fVn)z1Y6m}=Xrzltd-v8X*n<`oxi+=wIl?X%?5CXHJH1skBvdkF zR5FxA$3zE*-{dF~G+NX_q9KKb)k%~G6VCw#QD?-qr`SpY0~ZYpYzx>*Cr(y;+Dgp3 znl6w;vxPCP!m`gwQY_LwV2{-?Qi3{AEwI8wPmR#Tve(v%ge^xdv@FTxMZN7H`C~_c zK#{@>JueGX7BuBLpZN>vRE!WYT+xRy;r)ORJ%EW8(nJef)(~CROetfuYuINMBu-E< zsu9=Hywef(5{{yY-x2x}#-jPhk%}h3W&@}r6>Z2s3u$`>e5ltJl6MF`AlH_XcLqKr zeFM@1;^0b!9l6gCVh|s|=rbo>5FAU)jq?we!9*T)Q~*#?nI26LvM0tEmL3zbN8(PE zB~;eLs5Vp{OR~qRHeQs3%@DfTl3N)!Pc+)HbLHZR&ko&z)dcB|o+iQB{_2dRBjsoo zJBDw|u(b$Jme3^g3{p!h-%x&FP#4)XW_n=K5$89Vp5&-Wb8X_D;d>p=6p9tSNzj4F8T z0gX>TU}SX%-R=QNUN~n$vga3T_$m*{=7UUAFoYsnJUos_sLVK$nk)r&;U!y4Bw-p~ z0#1{JJM{o}37LGAkAr269ZRral7NIOIymN1qJIYmFW_2%5P}gJ1~Y)=+L!s1DJW%j zjqIh2FO)gyDuaZQR1qwF7aeGT=AKDqtPuj8${X2qNSK#OgoLG(c&&QDHP zT<|;)MW(2q(Dx2eEv5?Z6&D}SIA&YWI40+WlH<98v;AxQ5OxJe-N$nXw97GJuN&a^C5Z@@S9He+AzBa zx}0PGBS;=v0aqRJU-%XjrP*RYn6pbdiW4q1#2}{}!--4zwMaS|BT_G1^AjF{2yG17 z&Nq%YtCoL8!6bD*j;uX8sB1%wyFco@?%cK#x-~jAR{&sHYcP?OM9&nNfUBq;BN}$7w z?jiJ6DQd8TEL3At=1?nTzAA5Aqw3Q(vDg7l{U_qw%rKs>1GXy^-Yf_IGp$1!@OTPf z)83Er>>^>Gfm{uO!s16JW3J~Tf-4AO@*)}Ck+#Ihhv{F*o=?w)0R!k!8y}4ryAi{h zWcCFCSTW}5!$eom8Ncg7hS${^GW6ginrsT=>_S$qxH+Nd{$ZKVj!fOZ>P$I?Vz0Z&K3Qxn+6R)a)IH+9@r>^1J#t%z?c2b1E!>AKuJA*K>>xv5hZL@U0bdCn8X+JF zcw-XXDGWaCMn$9LkUw|`b0YtPvFh=)eA)^`-&M(+j zru9kcFXURofaIf%?vmKv@cB3X&IsR#`8TG{L|@^b-~1%kS1=)?bCbhY-c7;ZvOkl4 zM03pslhZZ9J)=LBg6t{KmB^;a zsQ$pFB9LW97^Wl(lB|S=M9Rbi9JE5}bWNla;;|LG@_i_hS3tEI!lj_y4WQi-X!bB$ zidc;}=?h^;@$M-T+d9Vub=hYbk{phVsV8DrvukbGcqM-T#)d~jn

gW%LCt)aWzSR! zf)qkY-dQV7mJTTSVbmWt8sZ4T>^E^c($b4n)PCu_>3T%W1yj8HctkP;xW5U%MKA<8 zycxe`63Daqf!;roNTo0kdG`1v`ha1L#uJC%rJ5o;5So2iDi3B)3hxTX9oLpb z5s0MSx0VDF$Y{Si9eeyxIAa?~%pOxDJ-$yKJNqGbX61|5omeE|ylot_`@wf6=gZt2 zoFz-VftgbDNAONE5Ou$e9;5kzdFC3(?Vhd?R@f67+}TXNesN2`Jugo?6!`g}%$)j{ z`FiBQ@3$`ne$jKMIFvy=0=yvigee}oFKqi_=?`BQN!|cnsQv+Mcb^wbUrb)O{y}*Y z`9K%_Q2JdKkNfN)f zLZl4?A0s#T_yVWZD-1k5mkbA<*!y`;%LmQs5!3BR&r9EDY>b3mRnsjavRwK}N(Jg!Yk9-<>2YR-<}kDjWiyTd^RyV5_0Z znTdbkjvmcNQ?0X1^4iC%-Y!ftyy2$L1h>OV^@sq z&E)auu4TZJ2JP$?g)_ed^w7~JBus^}kh2DZd$>%qM>Cx^^fZAs9>aQ2S`CIKL|alh zNH}vo1~p2RoRuZ?5MwF?AX6BEGBqMxns`B-Tw|%uN?gEDO-g(&ocZM?ydYSEq|dEi zW=5J)5{M_>T}-9mB2!agW|L9zOgN*1U{bUS!dg$tk`-zGE!dsBS_=_79@spD+X+n+ z_qSzeMhf> zw0SY#MvH&zp;BXYY@~7#k@S1WvJ8#5C<|gP-uwngfweg|PF^i2QyHqu=d)8+rmGMW zRlL@c<2TRPlryTBItEz=RqDQmY$R%CVpUUfR${dhwAhv5F4%~nab`}fti~@&21A*6&tx)o5(7hKaMhIghq(*4yPJ!4!KQVf#c%bxF_6*@G=bZ#l%{6JHoUPlR7+i^*khMv8=K9uu zWxG*2IE#5kXHRYMR`d*$OnLWYKk&~f@$Y}9D0n3PvoHm{2@Jf8Xck+5!w>P{#uL#x z78@s@2x=`++5|<<5jI{VpD6h@;>;DYFs^`Vy`YuSf$P1nO-$d7PzYb(diyLh!E-H+ z=UNpa_nm5lZ_=#8nO5aU%Zyyl_;+A}f_F)nEaz4eOzWDpvmTAoc!|=87k8aN)lz4} zV>r=>_hz^T*blbqwBxx0{ulY;Z5e@n2C7SL9T2|I<En&!zVBNlROEpwB^vFv{nlio>lsQX%U6>NfwvvmC z6ul)kL)?wYGB!|ce7i$*t)Y3*`O%ptdk*@(Wdi~%5_BWZ04dRWx)1Bs324vytJVL_ zzr6c=D`>HC?ZAWAMK*|Dd1{nbwnyZ z%h2M(**~gvFG5pn&QqJaH>Qm{9{ z+y}@xL+@ME>HCQ#qXk3OW%DJgKIxcnNyvWkEfr%5cyrVUdrGk=|J zxNhG6!`C?lX99F-cw*bOZQGvMwrx!&$rmRR+qP}nwrxAvY;En{?SI#o-PK*)RejF; zJi<76nsdvk=PyVFxM)I<)g{j2rQX^JTAS-_#w_4G1^B^g2#SkG!j67Yzw2U+Ovj_G z>>{vlqs{jQLTA)MxTw*0C4C8U%kjo+mTgxvyGn#c^EQar@;2x=thD$5nyZ%&D;~r3 zjw_Tt66y9OH5P=>jy$>{0QL-e-kt?Wa$@Ga8N^%)Zlu*ba_l9O*o~W~{H${|s*mYX zB(J*oqT=+T7G-R5pRuhb_Y^Vb3Sudx?8r{i(2+CU(ELw6iE~4)yEV$}QGvUH+>MdLkY=`rhbJ;;_k^7tR+> zIX5+QPTWC1cn)1*gnNp~3sQaGCTd7W*NwIU8Y|oZQAvBz)}0r>?MQ^uZLYmK!pcPb zMQj7_yq=la;Br^N`zrpgiE40(>o7)l4924~W?A8vRgo{%^QCX?{dY+9cA0t`_rT9U zV8#8LLvQWw8?5X3yH+L=&mZl2{#h5m{lZaIxzf~cWy&zF;U%fyujUkp|kxp zt@E)$OjDh?MMR6AwVw2>r#Mr0mtag&$?K}i(b( zI~d-a!5}hGIUf175~Xxt6`h6wwAu!^IuQd`3q=BP4908$wYZKu&q+h7OA@_XjTELg zn?o4V6a1bg|8;}{KHv0${6^ceBQAR)HU`ieA90U}qq^Qic}hmT0yB>hyu*|lo8O0m zhD%SSTmDlM1gM3Nk{eKi+Tsuuk2Esiw(rfjwaKlhKk(5GMaBE}?>}%EVjZp}#VSYB z7Lp1WVmwN(=nQrS=Ps_FsgmFg#PL_OR4;O@nZJ{dggy2C{&~eswwEv#$F;@O7|PvQ z1=fl)w+M<=yYmGNLm%6gDSPAM1~od_g$x^-7ni4)XbJ__$?9q*jtHsswU=iuA@TQ<#k3L1UP6%D1$92cw=czkupyE{r|&;m*E>N9p$ z?3v9R9J@YKs#juG(qEr?tG0sG7}lxJ1BnK?NSD=Njo-;3Bi;;g@_g4%l^st48%K`W z`UAJ3sy=5JV*AdPqd~%6ueKoDSY`a<$j>;sRZ)sRetMgpdqpi)564M;YlD3^pZEmz zDNjSD!#~Yn8BTvzS*~b+Ci*S|bz+ZJP{d$=F0&3@06Jpz6!BzV%rJU}clFfZ3me@< zUd~fW*;tN7RN7y7VWeZI$lMvKv^o3HZ2V`w8Qm_|ew$%Q4a{p^zv%;*{UWwyt#+$?Nw zhc~MT^R*$%^)B;tP1`rP^HQp(xVF2IYce<*bBupG$e<=c6UYQHmPIjvR_^1#b=na$ z)hL?vSI8=ZOi3fBh|?hq3U69_XG`&-Ge~6UQPbl^jhFPbVbcB1)HaC{lS#kmr*!e&6XoEvWKFLf=7OzvU(fBZx7H zncjlVFJKwJqbur39Owm!Y!W}ei5GPn&S*tDAL~24u#UA*Yp)VT=7RV@obLT+U_*S6 zq;VU)DVAjBxpvAB7MN+p#D#(OSly!n+$K$w!#twIbx;nCT1pM8#)etwW$@Rk&twsT zMO6T|3Oq*(t`C|Gw0wc_U9U6jvtVeo`izgWZ{0(*GugaM;U4%AORs=`9HPAaCHE}C z=kFT>zm#w4c6rzMwfgoxLMg$G#3I70@!9YP)+5YU*&Euo#%4a^qjhlkHg(ZkW?ZFT z^@EbXB6um@7$#i8D7Ipem27IhW1+Zm0-9D)ozl2cTEmxggj!C;Qg}k+n&6>_ZH?1k zc2O=(oU)8UIQ6MTaq46%2r6|82pXJ5TBx|O78}dMEgdn zF@!ahLv!1tb$6F=~?W>`$qCd;!BH1rAp1s zlExB8+eWYACh^UpO4aM-XjONs;nEoE;qqxqeAa(iED&gd=s4i_24&x1;XAp-kQjoKx0Uwd4ohl za-&2e$G^L-CgpcgZ`pSxZ?$(-@2UOStmmLc3|EkvX{!V9R8$p`S5=0 zwy~F+r*PIXpW*a-R%mhmyzcbf!&1BV+u3WD-`g(*f8P@GKm2nnq6C**KV>VdGJ+$l zDs>O>GZ|ip)OJ3O6ar5>R)NP26TjV6v#Z;U^ZcxLpK$Z`>3z?w$c-$ z{NCpT=j3&S1N9ohi~lL=!Do-=*5``m-sdzu?75#;|CO58+vnc3+dd5chHg2q3!Lvj zyEw%6_!O(F{ISHoerxkNXRH4ifp7geV{5)yX8O6a;Q6kz?D^g|=lR(;{F&i_cpKk0 z=UUtM-!+79sQx0t&HH2SkGv+On_|oRwEq+A4bs0>??OOBU_?NR;4-@+=5cxv;g(?* z;jwyV_*MOE|I_*r>Zd@L_pQ{q;134iIEnvkK2iSgpEZAJKCA!Wd0722f9U_1K6n0L zI6wU2c!d6zc+>gjcY|@@Ncld@>9+wX6VVGot|WAtu0ABPKv{ zljCQ$BlTI)L;rcvWB0kyO zPy0sxb81KAZ96dL%pHtiem&6sL_5&_)E&b8eEo0s$uqeBz&pd6FVReMNXnPsNVx#= zS?M+G9;^KBR6JS`Kss8wWGtgE=?0jsgn0dhXAQxp>& zGDnew9g~?jKD(LNSJ-ng(s^;3-s?h1n2+TEu|7^?Y<3iPE!jvH zAVrukJG@^A$dvh^2+t|dDmZRf(9Wb%iB^0-Jn2rI(lmhy);0tw(wH)b7tA;HZ*EQ< z4zMiRaM*(Qh%xrjNp2RX-w~31V+k5#(&^qM#-jtJ8NlOR%oogy`_wIPu!DK?lM#^0 zyeUuHPcdhS!Mz@@*cggS(o9jmfK(u4r{H!Ulq-Oue;#JF-J^nz$hsZ)ulKJ;xn7s6 z11O9k<+eZXLmW=F@29gN5RA+ka>^*Nx^S7LhHb*SU4PFxmba{@x0SNAJn{nGmkA6r%*`pH_ouI}sq(>Nh zuy{@SbEwb1!-7!b`51!VGmIw`9Fq4U`9#`w*-y}!**TT&mRXN)f=MwAI@5Aq3O{Pg zRg_uxRNVWH8fX$td)P-#wC4+=mC`4 zVlty;GNWy$EroHk_1vo?`vZ(tOKH)O%?~lH;EfN{Sc1bO1_-`8Hkv>9If8_DSSt)1 zCSqI0AO}bGeg|x7p+pF%Vp+ywcf8jJk~}z+9RB9wUqj%dliU3y%eB3nZ$(a}-AQmr z&f>4V)2l#EQy&);HYFL_cwPFdj0sUW(!K(Z;FOcb@TBV^;wEzjI;|1yA>uF)8(b%K|z1SP955ByIgVOQd!KMOp#H1TTqXVGhIdsu0=z zp@gMEA7~0i3$;Ag;o1GmycO^Z_gk!rR3{ewQAYvwfUT^hA|h{WHw0}{@~v}&pNe1K>&rw_u5M}P3suSQvz12qa;8bM zW;qPJgP!M`Gy)h#-S;JXIiZ>1*xnGc)~ShC${qW+?1>SXLWzUQ0!F#e47308QE|JG z;GGp9!JBKTU~-_vEXK|6F`q;rx8adJ_{~H?I9l-r#l1lu;KT?A++yMyd;~UN zE5y1E%^#5NV>uPw(zeccMC+J#2(n|gk4z3lm_*z%IK|w8w@$xsc=o*fvw()%*UplBmB$YJcK zJy(}_5ST}J+6U@2AT_tyCpkn)+A;h{5WI{UC{(X>k2v7P!+41;y`AT$09kwelcUS* zzsQ})l(P|&gA~Lf_kunzfN+7utDFH#4IU6pHIo>+3H14F?Tys0I})j@3j%BXzfapb zCJJM0e23W9^IzIs-}<^HvCd&_aZ?zc&F7}Kkm=o3Da}mSA;kmpz^_5^4?xeQjKkoN zg;BM{GOaOcc18Lv0eqM(%PX?orh{dLT4Ust3=_4}Mu#mDwHGX%gI+kw5m$No8l0nz z#5g=-w_Os*v2D4QuWZQgYfQrn7u3j2AJJ7(cJP1}c#vJLg-qJqSxaxCiS9d%M~SaJ z>K1qc20BorD&3ha@8pTBJ53Kfu2r1M+_AZ)c`|j5GKJW^%;Q?z(OWlp(l;*oA~()6 zMZR|FC=|SCM$Wfd59qFW?~sb#``Z6p+~Q05-O&?ud(lwc(&x9ibFpDZ;nqaB zC!;Shp~IzN5<{w|3_wIf#s~^hk(^gLIpRVF!OZJr4P(KrEleK30y9R)4Ow%J!?o~l zK~@XJs0Yo8sFq$!XW0`Y30ReoEUc0uvnx$E$Z*!t!;pA+WbYLDiP0+MrF{-{5esgk zu!$~F%j`-@{8rz?V}RDVUdd|U8H!Ns0SEy7nr0u=`^rtA4zB@bV27*;hk4~AE*!%D`)rS@LegLX z8biS_IHI`bFJ0p;st9`)v&K~yBgDOCeqfdko?XUkW5(t8mWy&Ojb;9m ztECYR*_x#=KTX2vzHmRy9wP()GZ!Fj@@{mhxz%EMtO?h*K^T@5r&R`(4^72(j)u$; zG>OFPzAef%D6}S7G6ajLewiB{)aoz7u60O>z=ipW@dp06vR2iXoxS_{_b6JOLwWi5%UzcPXVizFapqt@OVAhg*{aHMi*W!>T?N_Tvbhsc~rO z481JoSlUPFk)DCe3nN*T{So1C9jSN2v;mae>Cu>_xUXn_w5NglCtX1KO&jxTM>P!p z&vb2*ys9a{5WQiFhb}q2x0NDQ#nSso4?5L?>=2rG(y1OdVc@1J;Z@*HmZXBb5W5MvK?+jJ^Cny3Ts6 z>IV#OBqMZSdZQ8Dz|9DYvc@Cumr^niD5&>JGfZx7Lt@)qzL8An*@eP!QhR0I99b zlA^6Zuq}j9(d%K8qGyyk#On}m0Wz76?A+U1u+nEgsimDyjcfG-g)P1V(d*{})@uX{ zA0Gw!hc8L`+b>c2=P%)^`!8v`{`c*UA1uN{Cr5gu4HMcOf-;#xI<_Z#wtFi6U0i9r zx*Wr659{b~QH+ln_y~tts7s2W^yDwq3CPD3tOaYcIeatpNw)S`pL!lV1G9OfdI&tz zquE+B2JX=dx$y~pm!j2Ve6y$gh}*6K+Pv!2z?1bOgSRlQAg23YMQMSlmhi?%&Ya_{ z5;ZpUDUU1?bzw(BHp~F?6tapw+`*%C;}xVMjiwFWph^}5QwqZu7iV6c>5LwFR+-hq zBT~0au2fsL;u>2_znP3Ne3s%`|1_4B`Xkw=GOh?;%PBK`;};2McnUMF<0BorlzR^V zS2ji(%nKnb0tdz|vNIEb@sIf@MdA+Y2)S#eVozpLx@$t`N8_?&v2?R&7Fo@fe2+4r zl1(6p z?u&tz%t~{z?m*ZMm$APMA1B$Iq!D0l!AgxC}kim0;* zmpJN3C^NmpG*&dcv-n`I?|`DF2?i$a#D;eDTj6L)2toC`TBp`kyQ{rz^O>IU<_A0U zyou06p=T$qlz?mg1#`$(zhko#*;{k7evV+Ye{2`)?gFP#{^v1M6z4DkCQM_eCH`nL zBVN<%3C2NjA(1Tb4_k3JP+@^5mVeUz6GXzg-7BVzG|{C~u1fu-%#L;yq?-Z^3PUz52uguCZH794G6#eyRlD+ zDGKy|5*hMDtPvTv3|fskBSz3mX8&MMVIM8r!LK-bX6eeAf|D>>GfLG6%EQkVls#

gW%LCt)aWzSR! zf)qkY-dQV7mJTTSVbmWt8sZ4T>^E^c($b4n)PCu_>3T%W1yj8HctkP;xW5U%MKA<8 zycxe`63Daqf!;roNTo0kdG`1v`ha1L#uJC%rJ5o;5So2iDi3B)3hxTX9oLpb z5s0MSx0VDF$Y{Si9eeyxIAa?~%pOxDJ-$yKJNqGbX61|5omeE|ylot_`@wf6=gZt2 zoFz-VftgbDNAONE5Ou$e9;5kzdFC3(?Vhd?R@f67+}TXNesN2`Jugo?6!`g}%$)j{ z`FiBQ@3$`ne$jKMIFvy=0=yvigee}oFKqi_=?`BQN!|cnsQv+Mcb^wbUrb)O{y}*Y z`9K%_Q2JdKkNfN)f zLZl4?A0s#T_yVWZD-1k5mkbA<*!y`;%LmQs5!3BR&r9EDY>b3mRnsjavRwK}N(Jg!Yk9-<>2YR-<}kDjWiyTd^RyV5_0Z znTdbkjvmcNQ?0X1^4iC%-Y!ftyy2$L1h>OV^@sq z&E)auu4TZJ2JP$?g)_ed^w7~JBus^}kh2DZd$>%qM>Cx^^fZAs9>aQ2S`CIKL|alh zNH}vo1~p2RoRuZ?5MwF?AX6BEGBqMxns`B-Tw|%uN?gEDO-g(&ocZM?ydYSEq|dEi zW=5J)5{M_>T}-9mB2!agW|L9zOgN*1U{bUS!dg$tk`-zGE!dsBS_=_79@spD+X+n+ z_qSzeMhf> zw0SY#MvH&zp;BXYY@~7#k@S1WvJ8#5C<|gP-uwngfweg|PF^i2QyHqu=d)8+rmGMW zRlL@c<2TRPlryTBItEz=RqDQmY$R%CVpUUfR${dhwAhv5F4%~nab`}fti~@&21A*6&tx)o5(7hKaMhIghq(*4yPJ!4!KQVf#c%bxF_6*@G=bZ#l%{6JHoUPlR7+i^*khMv8=K9uu zWxG*2IE#5kXHRYMR`d*$OnLWYKk&~f@$Y}9D0n3PvoHm{2@Jf8Xck+5!w>P{#uL#x z78@s@2x=`++5|<<5jI{VpD6h@;>;DYFs^`Vy`YuSf$P1nO-$d7PzYb(diyLh!E-H+ z=UNpa_nm5lZ_=#8nO5aU%Zyyl_;+A}f_F)nEaz4eOzWDpvmTAoc!|=87k8aN)lz4} zV>r=>_hz^T*blbqwBxx0{ulY;Z5e@n2C7SL9T2|I<En&!zVBNlROEpwB^vFv{nlio>lsQX%U6>NfwvvmC z6ul)kL)?wYGB!|ce7i$*t)Y3*`O%ptdk*@(Wdi~%5_BWZ04dRWx)1Bs324vytJVL_ zzr6c=D`>HC?ZAWAMK*|Dd1{nbwnyZ z%h2M(**~gvFG5pn&QqJaH>Qm{9{ z+y}@xL+@ME>HCQ#qXk3OW%DJgKIxcnNyvWkEfr%5cyrVUdrGk=|J zxNhG6!`C?lX99F-cw*bOZQGvMwrx!&$rmRR+qP}nwrxAvY;En{?SI#o-PK*)RejF; zJi<76nsdvk=PyVFxM)I<)g{j2rQX^JTAS-_#w_4G1^B^g2#SkG!j67Yzw2U+Ovj_G z>>{vlqs{jQLTA)MxTw*0C4C8U%kjo+mTgxvyGn#c^EQar@;2x=thD$5nyZ%&D;~r3 zjw_Tt66y9OH5P=>jy$>{0QL-e-kt?Wa$@Ga8N^%)Zlu*ba_l9O*o~W~{H${|s*mYX zB(J*oqT=+T7G-R5pRuhb_Y^Vb3Sudx?8r{i(2+CU(ELw6iE~4)yEV$}QGvUH+>MdLkY=`rhbJ;;_k^7tR+> zIX5+QPTWC1cn)1*gnNp~3sQaGCTd7W*NwIU8Y|oZQAvBz)}0r>?MQ^uZLYmK!pcPb zMQj7_yq=la;Br^N`zrpgiE40(>o7)l4924~W?A8vRgo{%^QCX?{dY+9cA0t`_rT9U zV8#8LLvQWw8?5X3yH+L=&mZl2{#h5m{lZaIxzf~cWy&zF;U%fyujUkp|kxp zt@E)$OjDh?MMR6AwVw2>r#Mr0mtag&$?K}i(b( zI~d-a!5}hGIUf175~Xxt6`h6wwAu!^IuQd`3q=BP4908$wYZKu&q+h7OA@_XjTELg zn?o4V6a1bg|8;}{KHv0${6^ceBQAR)HU`ieA90U}qq^Qic}hmT0yB>hyu*|lo8O0m zhD%SSTmDlM1gM3Nk{eKi+Tsuuk2Esiw(rfjwaKlhKk(5GMaBE}?>}%EVjZp}#VSYB z7Lp1WVmwN(=nQrS=Ps_FsgmFg#PL_OR4;O@nZJ{dggy2C{&~eswwEv#$F;@O7|PvQ z1=fl)w+M<=yYmGNLm%6gDSPAM1~od_g$x^-7ni4)XbJ__$?9q*jtHsswU=iuA@TQ<#k3L1UP6%D1$92cw=czkupyE{r|&;m*E>N9p$ z?3v9R9J@YKs#juG(qEr?tG0sG7}lxJ1BnK?NSD=Njo-;3Bi;;g@_g4%l^st48%K`W z`UAJ3sy=5JV*AdPqd~%6ueKoDSY`a<$j>;sRZ)sRetMgpdqpi)564M;YlD3^pZEmz zDNjSD!#~Yn8BTvzS*~b+Ci*S|bz+ZJP{d$=F0&3@06Jpz6!BzV%rJU}clFfZ3me@< zUd~fW*;tN7RN7y7VWeZI$lMvKv^o3HZ2V`w8Qm_|ew$%Q4a{p^zv%;*{UWwyt#+$?Nw zhc~MT^R*$%^)B;tP1`rP^HQp(xVF2IYce<*bBupG$e<=c6UYQHmPIjvR_^1#b=na$ z)hL?vSI8=ZOi3fBh|?hq3U69_XG`&-Ge~6UQPbl^jhFPbVbcB1)HaC{lS#kmr*!e&6XoEvWKFLf=7OzvU(fBZx7H zncjlVFJKwJqbur39Owm!Y!W}ei5GPn&S*tDAL~24u#UA*Yp)VT=7RV@obLT+U_*S6 zq;VU)DVAjBxpvAB7MN+p#D#(OSly!n+$K$w!#twIbx;nCT1pM8#)etwW$@Rk&twsT zMO6T|3Oq*(t`C|Gw0wc_U9U6jvtVeo`izgWZ{0(*GugaM;U4%AORs=`9HPAaCHE}C z=kFT>zm#w4c6rzMwfgoxLMg$G#3I70@!9YP)+5YU*&Euo#%4a^qjhlkHg(ZkW?ZFT z^@EbXB6um@7$#i8D7Ipem27IhW1+Zm0-9D)ozl2cTEmxggj!C;Qg}k+n&6>_ZH?1k zc2O=(oU)8UIQ6MTaq46%2r6|82pXJ5TBx|O78}dMEgdn zF@!ahLv!1tb$6F=~?W>`$qCd;!BH1rAp1s zlExB8+eWYACh^UpO4aM-XjONs;nEoE;qqxqeAa(iED&gd=s4i_24&x1;XAp-kQjoKx0Uwd4ohl za-&2e$G^L-CgpcgZ`pSxZ?$(-@2UOStmmLc3|EkvX{!V9R8$p`S5=0 zwy~F+r*PIXpW*a-R%mhmyzcbf!&1BV+u3WD-`g(*f8P@GKm2nnq6C**KV>VdGJ+$l zDs>O>GZ|ip)OJ3O6ar5>R)NP26TjV6v#Z;U^ZcxLpK$Z`>3z?w$c-$ z{NCpT=j3&S1N9ohi~lL=!Do-=*5``m-sdzu?75#;|CO58+vnc3+dd5chHg2q3!Lvj zyEw%6_!O(F{ISHoerxkNXRH4ifp7geV{5)yX8O6a;Q6kz?D^g|=lR(;{F&i_cpKk0 z=UUtM-!+79sQx0t&HH2SkGv+On_|oRwEq+A4bs0>??OOBU_?NR;4-@+=5cxv;g(?* z;jwyV_*MOE|I_*r>Zd@L_pQ{q;134iIEnvkK2iSgpEZAJKCA!Wd0722f9U_1K6n0L zI6wU2c!d6zc+>gjcY|@@Ncld@>9+wX6VVGot|WAtu0ABPKv{ zljCQ$BlTI)L;rcvWB0kyO zPy0sxb81KAZ96dL%pHtiem&6sL_5&_)E&b8eEo0s$uqeBz&pd6FVReMNXnPsNVx#= zS?M+G9;^KBR6JS`Kss8wWGtgE=?0jsgn0dhXAQxp>& zGDnew9g~?jKD(LNSJ-ng(s^;3-s?h1n2+TEu|7^?Y<3iPE!jvH zAVrukJG@^A$dvh^2+t|dDmZRf(9Wb%iB^0-Jn2rI(lmhy);0tw(wH)b7tA;HZ*EQ< z4zMiRaM*(Qh%xrjNp2RX-w~31V+k5#(&^qM#-jtJ8NlOR%oogy`_wIPu!DK?lM#^0 zyeUuHPcdhS!Mz@@*cggS(o9jmfK(u4r{H!Ulq-Oue;#JF-J^nz$hsZ)ulKJ;xn7s6 z11O9k<+eZXLmW=F@29gN5RA+ka>^*Nx^S7LhHb*SU4PFxmba{@x0SNAJn{nGmkA6r%*`pH_ouI}sq(>Nh zuy{@SbEwb1!-7!b`51!VGmIw`9Fq4U`9#`w*-y}!**TT&mRXN)f=MwAI@5Aq3O{Pg zRg_uxRNVWH8fX$td)P-#wC4+=mC`4 zVlty;GNWy$EroHk_1vo?`vZ(tOKH)O%?~lH;EfN{Sc1bO1_-`8Hkv>9If8_DSSt)1 zCSqI0AO}bGeg|x7p+pF%Vp+ywcf8jJk~}z+9RB9wUqj%dliU3y%eB3nZ$(a}-AQmr z&f>4V)2l#EQy&);HYFL_cwPFdj0sUW(!K(Z;FOcb@TBV^;wEzjI;|1yA>uF)8(b%K|z1SP955ByIgVOQd!KMOp#H1TTqXVGhIdsu0=z zp@gMEA7~0i3$;Ag;o1GmycO^Z_gk!rR3{ewQAYvwfUT^hA|h{WHw0}{@~v}&pNe1K>&rw_u5M}P3suSQvz12qa;8bM zW;qPJgP!M`Gy)h#-S;JXIiZ>1*xnGc)~ShC${qW+?1>SXLWzUQ0!F#e47308QE|JG z;GGp9!JBKTU~-_vEXK|6F`q;rx8adJ_{~H?I9l-r#l1lu;KT?A++yMyd;~UN zE5y1E%^#5NV>uPw(zeccMC+J#2(n|gk4z3lm_*z%IK|w8w@$xsc=o*fvw()%*UplBmB$YJcK zJy(}_5ST}J+6U@2AT_tyCpkn)+A;h{5WI{UC{(X>k2v7P!+41;y`AT$09kwelcUS* zzsQ})l(P|&gA~Lf_kunzfN+7utDFH#4IU6pHIo>+3H14F?Tys0I})j@3j%BXzfapb zCJJM0e23W9^IzIs-}<^HvCd&_aZ?zc&F7}Kkm=o3Da}mSA;kmpz^_5^4?xeQjKkoN zg;BM{GOaOcc18Lv0eqM(%PX?orh{dLT4Ust3=_4}Mu#mDwHGX%gI+kw5m$No8l0nz z#5g=-w_Os*v2D4QuWZQgYfQrn7u3j2AJJ7(cJP1}c#vJLg-qJqSxaxCiS9d%M~SaJ z>K1qc20BorD&3ha@8pTBJ53Kfu2r1M+_AZ)c`|j5GKJW^%;Q?z(OWlp(l;*oA~()6 zMZR|FC=|SCM$Wfd59qFW?~sb#``Z6p+~Q05-O&?ud(lwc(&x9ibFpDZ;nqaB zC!;Shp~IzN5<{w|3_wIf#s~^hk(^gLIpRVF!OZJr4P(KrEleK30y9R)4Ow%J!?o~l zK~@XJs0Yo8sFq$!XW0`Y30ReoEUc0uvnx$E$Z*!t!;pA+WbYLDiP0+MrF{-{5esgk zu!$~F%j`-@{8rz?V}RDVUdd|U8H!Ns0SEy7nr0u=`^rtA4zB@bV27*;hk4~AE*!%D`)rS@LegLX z8biS_IHI`bFJ0p;st9`)v&K~yBgDOCeqfdko?XUkW5(t8mWy&Ojb;9m ztECYR*_x#=KTX2vzHmRy9wP()GZ!Fj@@{mhxz%EMtO?h*K^T@5r&R`(4^72(j)u$; zG>OFPzAef%D6}S7G6ajLewiB{)aoz7u60O>z=ipW@dp06vR2iXoxS_{_b6JOLwWi5%UzcPXVizFapqt@OVAhg*{aHMi*W!>T?N_Tvbhsc~rO z481JoSlUPFk)DCe3nN*T{So1C9jSN2v;mae>Cu>_xUXn_w5NglCtX1KO&jxTM>P!p z&vb2*ys9a{5WQiFhb}q2x0NDQ#nSso4?5L?>=2rG(y1OdVc@1J;Z@*HmZXBb5W5MvK?+jJ^Cny3Ts6 z>IV#OBqMZSdZQ8Dz|9DYvc@Cumr^niD5&>JGfZx7Lt@)qzL8An*@eP!QhR0I99b zlA^6Zuq}j9(d%K8qGyyk#On}m0Wz76?A+U1u+nEgsimDyjcfG-g)P1V(d*{})@uX{ zA0Gw!hc8L`+b>c2=P%)^`!8v`{`c*UA1uN{Cr5gu4HMcOf-;#xI<_Z#wtFi6U0i9r zx*Wr659{b~QH+ln_y~tts7s2W^yDwq3CPD3tOaYcIeatpNw)S`pL!lV1G9OfdI&tz zquE+B2JX=dx$y~pm!j2Ve6y$gh}*6K+Pv!2z?1bOgSRlQAg23YMQMSlmhi?%&Ya_{ z5;ZpUDUU1?bzw(BHp~F?6tapw+`*%C;}xVMjiwFWph^}5QwqZu7iV6c>5LwFR+-hq zBT~0au2fsL;u>2_znP3Ne3s%`|1_4B`Xkw=GOh?;%PBK`;};2McnUMF<0BorlzR^V zS2ji(%nKnb0tdz|vNIEb@sIf@MdA+Y2)S#eVozpLx@$t`N8_?&v2?R&7Fo@fe2+4r zl1(6p z?u&tz%t~{z?m*ZMm$APMA1B$Iq!D0l!AgxC}kim0;* zmpJN3C^NmpG*&dcv-n`I?|`DF2?i$a#D;eDTj6L)2toC`TBp`kyQ{rz^O>IU<_A0U zyou06p=T$qlz?mg1#`$(zhko#*;{k7evV+Ye{2`)?gFP#{^v1M6z4DkCQM_eCH`nL zBVN<%3C2NjA(1Tb4_k3JP+@^5mVeUz6GXzg-7BVzG|{C~u1fu-%#L;yq?-Z^3PUz52uguCZH794G6#eyRlD+ zDGKy|5*hMDtPvTv3|fskBSz3mX8&MMVIM8r!LK-bX6eeAf|D>>GfLG6%EQkVls#

7wZLEe1+B1^QDTlbe@fz*2c;A8NUy??qj`zRDkuk!BYu;jP9e z%feT@nyHA?lmCqr8huiU+F2K&L9M2RQ6RL}4{1lO0(Y&5h0;;(KEO~`y1qxPk~k)K z!@VNeyfTFiP>jT96A^s}XE||5A0}fGb`J&L;pk9584 znn(&^(#IES~%8aKp; z*sY$5KG}EU&nMS}wQP&1S!Ty_su)q0l|PgV*d)2R3YNVVmM1L!rxs|~)}c*__CraO zVJ+45WA%MnnTSHAE9d*P@yHDH-ru_AiTdKP5->VBf;KKI)H(ElN%s{ z@YX%b6no^aGod$HUP&T-6?rR$Hut!gBvnT&nRc+JRRem|lJh#-VAacM7^UF`AJ3#W zN~-!MtvJUP*-|RRci`EO-OFZMv~@$g7vNqpu>+;q(AlhT@l`l-<)en3Tv^mg!Yov} z`?OqP*h`#jl+b#evMtbQ@6s=;Be9EDztXskWJES6jwGB77&|qar9+K4Oxfda8$P3! z=!YB$jR*A}3Ro}NEppjkuac56=dVQpGnZH(H*9^>Rkd1bx$07L{v0jhEv4A$d27nD zQcC*QR1DoMwWj{YF8ZF7Q_t$F?g}^l>R$}R%*7vqD?>M4B$ic)F(}iLRnh;uT~_EJxQJAk3^8>0 zo)@@ey0!q2f@7*g$381I-CU)qM0aJ@U6k!&jE zZy`O@R&ht3qL!tRD)eloJh1Px0C>Wt0J;-?P*$Ellhmk^3E2arLyN^>yiLTGm6m{fo)uSl62 zQYa$lh4y)+=EKY1Imkb89W?kftmUeV=V?_@vbzW|qTc?yjGy%AEi}a%WA>CqE+Q<* zo+|v3gt*dOo8~V=`OJ~ACLv8js}A}Iv@hzw(m!hpMZ*RjT=#`+OQZw$%szhfmPAhd zc7EWNm=0HMoNQz(|0IYIhoQj0PdvFcmMTIY0!*Z-%!_&U_fpzMFVdHwP+uJkstXh# zY+pF+j$+lz*l-iEL#Ew(*u$&q#czzGeN?mg79sPO^JK`0B zsIoFy8|J^7lA)Nyx-DB$n6)GA(1Y|O(_CwS7s|ww%_JgyFhp=8afEUe#d_@0SH7dL z)>>*R)Bmk%s;NpASQaOw3MQmF{5K}exD4AvT=lp?4J~H6Tw3`#y*B`OH~VE{95Q6Q z-T@PHhmo--*B%_g4aLeKrB_=1P2u*$!Kc@hOv*RVNQnOts@sKom}gsihqoqIE?qAq z)SVA`3r^EE4Vqdu6`F+7fp&Q5YFcO5(dM%1ka3Gi^?NO2lkx$A+$|$sOR7AL{HnMT zzB004u|N;3Z_C7E3w5BgR1?jdplF0qIrntRg1n$`L|o?p0!={rmiEpv@BP5!Td~ld zNM-F{yof!Uk*jpN?Hyu!K3!}#h{=|l+#WsFfZeInJEx;nG~APOxgDdk(-XTs!0#2v z%#vgr_t+S{*Hd2d8krRG8orIEPP9wXB+B$+xrAsmP~Fi}bL5G_B9Is=q%rJyzGUEh z9jHTB8m4+I(L%q>+`i$ft-H?7hGghIVw&du#)@wWZp*bSGUV8lc{oHR$$5^y_guqN z$E>?(;Luj7S;cm+;I`0>xMR>!8%p`dGjknN(+Lr~PZ#unFU)goi0avRaz-b-<_(JB z9pTv63_9Pzz2Hexv*FEqGTLrb6mR?>4F`0Jol!!h8yIi>KmnH)T4ArB$kh^z7QGN} z5Ox^NY`ld$d4r^J5j@NKMM>gvWXD#oN05cya67hNIgMQ8@kfr=pti+QGN3uIE+m!Z z7ZN|}!N59tD_i-L71~*Ps7U2Sl-1P@j zSR0$OXF<%;QK7cnW05adR+r_$2`IhXi?067gIIpXL`4JxwwSd}@gkT)ZTC7pDS`6- zo1RD4OsLJ^!|j@joxGw;M0_^gpqA8`ubk2coYXCT+@3G#;QQ?4J9^<3kj@U?0OK{{ zkXMM}E?%$gmg2@Co=D;|UKy0%2v6=QYj)SWo6D^`K`JoN;2B72HG;{PW85Jy-F>6@ zQTy)m8?SjNIdgp^*euM1=O`rGBB*cH{Gf(T`Ly$0xN{BvG*+QFA1Qn~`ie~Hs$I{eWr3vP#(;qZ@5%i<9KipA3JLKAjdvfnWakHFq7PLAI|VD`er zWW}#y_0;Oud#PEmcf7!)otqaHWvC>_>oB|zK#~i~?!XM?@JFB6N6rM?8QX!E;t{*5R zFR+<~!!&eSy?)OIyGVyKXrL3)hB82)?MAxt&mXp9^lo(;#N(=1)^cX;0Tl5!dWdMSpXuXz} zmSs}!AI5hqLJB0N+y_wCi@mH5QP!#&(+noQ+f)Y2_G_23*_5Ba=N(*+EiH>lqBe`F zsf5NCk0MiRvpG8Mm8Z6JVv9D40s^h#vn|=a0qj1HPj)(dGFv@m`JMsE4aZhhnV?#y z{~tv_>=LOncedeOAi$^rl)Y>I)BG(pjFs0t8iKhrpiNL|F_Fj%bH|2F|Wz~Ugq7x>q&IMDxeuu1>_gRN%iZ25n( znL_n63uPVCry1H8tHJx>*z&OsASx&nRR@=C^1xMS8J&90dh~mTAIQF}tyQ(r> zE=wvoZRiH5!%Yc`QW=j%-pzn9JZ2I9y#5;+SbtDh*zP_?*yD{(FO4m-BI-NHC^oVPkJLVeHE=`S1h8&VF782|VnE~@0w(H}sHYnIoFGn)z zh^Q71OE3%Pn#zI$U}tgVz_q98_59M>1dMd^eB_9jp$PPl4{!tajg}5VA~b6vB_b(L ztujR-BOqH3(TKn&6%s7JV+aM(US>yP!2Li^h=l+2T-BN1$_`1Mr`GClcN=M34Gb2< zy2`Cv{a8b_a5>DiP94{!&+N80fq6pDTs-ePK4iN#6#3kV?ah?H!ORsPeNPG?`HZ+; zZI-55#i_-$2LHZLTIQJ70}_!j@k`jWBMK_;7a9KuvGbD7oJ*8C?egn!(jvOpUgry% z^bEtmk+h^QREpkEuoxvHY2`htkky&M z=8896Vo^=%tVm3X*|C5Ox%~sMokI#{N= zuQE&w0j>wR^{BMhPW4<+i1Qkwc{^wDFAIIa<*IVlQaxBkm2w!9Tx(508OMq_p3K-C z7&0wg+!xL zx*^Uk8WnQ@HA@3}g}zZ)WX=vU#Sc?v$1I{GDZKZ;roW*>lmPJ)shX+N_$w)k);$#b0+t3r`@@H3ZF(TRrLSj}d z3*@KQaC?6NylSbZC<6}GM73_L46m|(aHc_*?O0!TzZG@jH`KC-$#Oc{x zmZh?=Cx`S4>C8H;vb-bf-mPkp;E%SLCMqp4L^Ls@m=|^h*fXy*OH!rB1gOiT$u(U^ zks!~Mo9#bIFEy2nz>wNK=e&UCj(Lb7-E^re&n&Zc{8dkVc9-%VwVz{Uc3GxJ*f86C zZmsqSl}@-r`jP$_Bir8%=9}NCqG_*EElRE$Kvitei*=WeR{xXEU>>!80gC@1qB2Fb zryNVmA!RdEwETd2R(JJCzshvv5z}jsPfub>KX&1-9PdNUYKr$V6Pix_=uTT0(;RV^ zA8FShpNvFW5qm13$TttF_QD2x$t!k;h|-!BUp`eh?cQH#Uz}~Xq@-q_xvko`WAOqU zJRn)!M(-W7o;@VRa$Dx8}laAi=p;?q*rEc*FJ2I!t4~)d$JZHH$ zLokMYrVx2KdmvKWBFfJe^2caqB$*iFM( zV?x@iiRN&=Ugp(nKe_iJb`@+4-ay#jiMU_TF7dA5T{v7g&>6MSbK`mJbaD{&L=mz` zCHNHt%(@6;i+RtuHDG4QtEf)wSVhn`5MC3w@e8mSF zH}{Kf&#QdFGU(#1Vqx?MUogT}62mDQl1Cc$6@ihQ9x$UcT+hf*N63i#RNe|%XRW2< zrxV#MV_Q1=!AO?|y%O=OXYJ$56E>>j$?Xjowh~YQyzykvpL6a+5@LLBQY1?v4iR}H z1&g&SB%=?gqnM`5>!hi`bBh75U`VfINSYMH*Gkc!ZF?^lR7k6VWKyCI^Lc_O(g8Ig zHJkCN8`JgLAQ7~LiJ7Yac)42v!|7d9^au({EOAZI?l5Ck*pW&_{QEyCaExx70 zzoKNnGf_Vwg3q(#285<9!UhQOKo}MwZc*_c~dVtu~UPVW!b9z1iYeMJ!`^RV!YaJ+&5n|X)er}uf% z+2b6)E~)?eHKIRQFXrfiuF2&n0$-BNWODraYy&PV5)06xZb3rGRXwDYxLa4{4hOM7 z&JS{`A@+-ndz3u25Wm4oY((m)*u#h&PTZG>3b7wNq38B#Y%{t+^6Dh79Ds82$~ckz zHy^)3Q)H*fL@QRVza+;myYPpSSwV5qrocTp1Xqn}^0EC-V;Txs9H}^T=3a&<-_a#q zuA7v5FfLk*gF01eyR>UNjQ#HWLDUYtF^#OB%J=~?lP4gB_t~HMm1z@aBB%{(&_q@@ zu@XNXu^ZZ>n45tfC4j~Pi9uB-t1?^=Z&BG!{G=F0H2|@#wCT>K|NBSA0s(oM?{_2h zW5FBxj~2ZDAZJl`vUK@hIm@6nl#j~dqQG(2GjseXi3yGYWF5mOsUd}+V1jC%4S|r! z?ASlqJ_F`J78G_guv%%lMz>j`@|`)O%1RBX9O1+@=p`+!N(*=XPc3b0U23(;rn^k% z>r4)Yc&VNJ^XGRzfmHWx-Kp1X=V`9zjr+a&Y~C->Uy(={L>U+n98uwwdN@@Ic97Nh zXwnUlX{)%>r1TLIal0)Lh`Ih4%1PqLu_lp6n2=yqVkFCer4U4L7d>{q)JO@nX7YZK z^FAqr1HuG1mQfHr&_FZd6&8#seV5a?RrRUtt_pY{Yq&mznLFiAXLN;Zf%a%`u)m8d zxK`4H9!nwnVUo1r|B_c0>bRFWNE_O?WJiu%kF=7r7=w>O&d&S825`kFI9kc${s9xe zAXlFEIex|wgvE74<-XlaXjmsWyk)>+qtRHG7Be9$kRFTnO~S2WjT04e8fC}?Dc9d0 z%)Ra&q+xc#P1LDH{gk3QIN7>M*VgK|arIX;aH6bwV8^0eJT9|?&m!eUZR~xGO6M9!I zOyujdjo>OL4F_8ybR8!NEyQLWh`BhDB-_w1B`>G`GTLJ`XYx@+x?R!8P3ogYH;cxN zI*90=pS>+u+_u?tlFj*{hnySV0#a*}Tx%^-Y#VrC4kD%CpNvZ5WT9W~rkd1+dbsdxh~R0aB5*%3(LeW=SnWn`5>8?{ zRNx1|H*Hk>jD`R>FoR>{U`Q6OSTO&*iX<;Jov~c*r)a&HcR(I4N)RYCG*#41;qm2G$^|Y*WME=`N`+f!vd4DF311Mdz zK1pz@Tq5!@X*$%+LQuBRfKg}KGz;R_oxJ-622InKW!KMrgdbRLds65ymQQ1Ni5HM z6TEs)lXA(`RmDwD@o59S@{msif*k;F=&{)I-Pc890K3nseaL)QTRUBDzsh(7Fx@9l z{>p_T-eUl`_f(QI4*H3%W>HeTs@#aBq(@D!+BOpB)~8cx-V?ZoKKJ0E*6St}g4@1? z*Cpse_ScC)x(_L8m+x<=`#MngOaiDeN0qh$XGFpq1+$5CS9mJ%3Y$*(onA{=T8ni$ z{t4Sto2L4xy=P?(A`TicyjIBQG;!To@Ka8&Rg4I9Lm||Z=^u_;k;h>2T^8}M>$iS;2X75eRR4o^(RHwHT|HQi&w zTwIUumJ-Go#`TrZ!HK<zeqr$|x?@j$w;g*>OPDCtIqr znWmBP;r)9xRpgjq_b0FHFK3r1Pdge+RxN*80Jo1izP-+sw`bJ2C3=Mu_E@6xIx}C8ps9-+M&6?*Z3w9H@Nbjx*E`x^w}-u2Q}vFw%kD zH@QoWb@SJpArp?EU%)x+totN1+&n(qP3lZ<+pc;n>FE=}8(rE&lS9Z!s+?bD>AKCs z`!{0_%TSF!hTawktFJ|lsfa!$*<}7Oz^^PZf)IPzyj$B1aeRok!8uGCVd_j|vJxvS z&y0cwD!M{GuizY%?zjzM)ecB^iL!0FNXHZwA&If*JH2uQSo2VKWjw_1!nWUYCcBx? zp#Rq*b>XB%?j)v*_^b^t!uv6DDp1CqT?In*zH_JyJsth)NO!@TxkPJUe57MZ7Jf_! z`|W3i&mnC1@3%VI=C&kZLtPVRjiRMScKBf=8+OwJ6ECf@U-?I?m=h66QRBJ9U^e=h6CPeV>vMric< z$nQ$mL9z`;sJ{><=bBc!wBSYA+96f#of5?b9mp1bMi|u|d9L-GC6c$Cr7->EIu+$| zw6!!bz#_jh8Bb_QER>3VrggjhS`nG3T- znhSV-K+S<$Rwm7onF4|;58xIK0n=D&&#!1m!*DIJukkSJ4O_R`IMum8cT3iz!7Z>T z6hz2#@#-rrVAbXVodLq^QF00m3&`sp)}0OfIL)QII8?5rX_getx8&q!5^c8WaJeR- z=MyY89boP?G5+F)4q}P9rZnamuYaSm#kvgkk11yhAL%6oUtY0TV(TRbzO#=_E>@0j z-J;3#cjl-pY5F^DKjPMA8E7tx@s@{pJP+hQpjWY@Om4YwuJ@>lZ^|JtvI38o|80f2 z*WYi7eC+MIPUgNj07_?U0d5{*TsXb>9m^8tOTH%9P@WbgfdaYR@+X0mk8AU-o&{dg z%*f3P>TrvdQ@MR_vd`oS;UOrMlM9U6!eQl*Wjy%w;lpuEU1nr2Qz~6`>)P>A+e6ni zurV&5mePytd@m))oo>NSH^V+Jx*OFp=I4+Tk@V+vr6ld%w78r#cnFM}f$Z>uhOifo zPG(qv$0KL)43(nLgPSN10T6Egq=K=rH^^NG@M!JCd=A$H@7%mBhpw? z2=-z)k9a4U_AHzFi(k8N;%9q?2$)0*z(*sERH)l453kKn z-+qfnHKu3qsTvPv@84heF(5kaxt_*x#j~orP8W$d$pAQLp%?YhtoB^i#I)SkUcAMgNk?1dA5U?pDl>4x*%?RE zoaM?jeLHnFPcOo{Z`g+RBKql=q@P~3DKfv5rxK^3`PcLZ1#0EKQ{f9uLgkOQGFemO z%tu@APwk4gd%EkWw@FpSksC{6itqAQ0jU2p(@)$8O3h)-!E#x6$V5XPwoNOp_}n}i zKYo%S@<=~Btv29oGFi@pFHwp&qP8?Mf%s-Zn}%<1Wt$6`HS&VJqXPva*_-e_R0YjL z{Oov|+y5_$fX3xp#jeEu=2%C%>hL7~h-`GQ6}$OmTO(M?Qth8FiW^bG___6U>r$B- z<6E4(9r5mRCd=g2%`qgx{&QReqF*#C^%>f@N_!z0dZqKHR9`_oO^^AzAS>f$vD1}? z6MW`1+-ZW&L(T(ZV%&5@sr|nIc&S$F(3)pdFjRd}qE&5jkr>LNn<{7QsSWd~iKGpf z?vq0lvOJDM8Y~@NVnI|&;<6KOzou$nUBdIA6wJhMKmJ+u42ci}uYam?UK*lM@SgE9 zC@!N2Zq;XG)_b*ZXUK!EVVDOfAcF>TqCs`OuZPh?M+3%mKGml|Aytq@C745@fb|#l zP4E)cC83zHedJ=EXLp?ilZfhD4oIAT)ujw*m{KD4#MAu3bP#=z9pWPlt1{7&DC;airqL6Mo7>U zml61YUk3qjlx8>E7(xhM)9#(mLBwD0z^n8L)5Krl4BIj;-TDIFr{?yb7a|QWs~FOU z_1S(|5TA$6fBfJV?jd>&UUBobL(YyFKXI$FNgg_wDU+vj{M1g&3;oo3C0}t1m!p=Z znY=zVKKbRlV?vx@XjHELv1hRH1sQ~|+o9+pC*%L)8q3H)3$aI4wFJRtZRkpzLz zGw9u$G#L)U*bRpc{@&QX$2zXpfI9=jl>+1Gf*;w#TOz7mM^FOEb5WiHpX6X0tOYCj z`&~(+EMOwf9iWZ4flx2goNxsc(~2RxZy2H;T`~UP8m6{0LqC&;)@NEHt2odaG$~{3 z{kPDz<}X=0ujBwm61r|}VHuvV?e8C1BeKX9Y|7=#0(h)J4>vfzF~dVbry5VgMTiFu zDF~N@p*Z1jn74t|KYZJotmR693+uor@y6ln-R5_LUwrIi$-x1*-R2cq-45*#&skr5 zk9O?OfgnC=W7foeE5^48AStGotW8knF2YJ&O|K|mT}*mz*|j;yqOOb%Gl!HCg; z=(q?RbIuU*LG2F67em;kse`1>Aes|w1qcW-*C!S+55x(T)`efKu6Q=F?+p-D#Yt4! z9KXloD*0;?AM_yCcf>mtD%T9eRdC7hYBSK3oHH^hYLTri)EoxX5aX5AY{aoHEqeD zUL0_Ij%Q(alTgahW^mN|es}bdZ7_x$GJ1%@aL9&JnOJ9|Un#y+0eZ=My_J{jF>I@Y zgazIa`t}PJKHoF4`pb?Tv?f1SKNv;dJJVEj70UGBIvHU>YGeCER~oNP;merAewm)X z-vn(uQ|yKgIZ>n%r<*2jZ-8|sGN`M~?w1DMurm2uWtU&4obe<~RtKnG(6@;|^D z-~tT)+7QwMMXAH;u-`;JrBc5z@s`{E7iaGnoJsU|i-r@M6WgBHwrx&q+qP}nwv&l% zJ#i*{;$)(e|G96~J*VzD?}yv9yQ{mZ`%~||cCX!Q{gz?=_ZiG0S%N^|DeIqTSG+0n zT0HyU(_jo+2i{(x*B;b*P)2k4>FbaJuKTDZ>>odx{0$f!4zT7k%IDESzJI>b(VS&;n}E;-$9j&z`B6LlbyV zatr`N90(hq3p!{HJy;uJNJ;#HN8C{h2T^YY@dfEWkHQy@d^Eq#egM>lf~YU`Sk(lz z7U=QAsxg+eXDZ|HV1JfoeOIm&7VUE>WK|@Xj3KD>>z}9%5WC{o1U$CLl2V6*zN1b& zm?r(TOZrRT)V0He`aFQ%gOzsRi#+;fM9L4Mb1OuB!Ac%VdcjV)Xj6Qs!*$5+p`x?F z{?k`>(gd<5}atPwPADN+v+$!~Y6G8e`EU>f8T6c^z1J7!JH%IRRusue!_c^K~UInOsaEf7!L)bnOkbX_z|65~0EJ~g) zBK1N>ECY-I!UyPad>{i#0xf0KUYG32RYpPBRE|h$v@_nYXNu`Yxb^14$@TipR#Xpt zcFQ1L&L1`y8d|FGr!?ehH{)L=<~19h9}HrwlGO(5+|i}mvo)nX(bVg*q~*iG;|u!7 zHogC`6?2E<|D}4^%L^`r)c4znFIKP3n(U$Cn=)HITUD<#?o|6W zWDgG^L8#?>#RS3;LFhfz#G^NO=at86m6g|ZxI)}OW!83Y`B%Z&zh1-%1BaRHoz}3&{9Gz_? zzy4%*_5e6#%TVqjD0cxJRS)sD@wUFQUJuq6{4$zCzg@T;;qJoGf1{jt7@)fddh~&F z1e1P{&K)JNulznGpRW2I*>a%JwaVbUn&(pMJNh@tTA^&s%tJS7y1d1!Cn;#JSjkT_ znedN*bkv0TL?7m#HNOmnL~T!9p~(r4Av^zgJ(ondmx%g3yYaW@i85!m$Tfhsi*Sz^ z)?)kMnEP4EEAH{#&7ziD)NIo;QN+!X&^u_%Fp?4KXK+$pydj03Z2HQQqD1%_Y@kCZ za)UbdJPlok`q9AS5cpOImK!V~FVUT0eZxNS0{Et=0w*0TKuQT(A(G^!k}41@lu#eD z7qC{u@vZ~`s4rsdp_kszFCz33^`}DxpJ)7GOvnr!9zl$aF48CsKMZ5csTVor*af{v z-wbvdDgBPb44WRg(4ibN=*5h=76F5ewvzI$FfwC|s?S&kKxD=cpq(A(8G-a@??CRS zE;}?arr>8V+kYOl;Uzf%D2(X(2rVDLI5PLqFbr~xemf8729QRCjiSVg$~EbtM00`B zvE8Hb$2dmJW7r+59W{Mc zIT^n52P3v(3?k=n>!Y?}*G92o+()otkw(&E;z#k#5<8ag7~C?|4j7uXeO7o4d}eaU zuh~zdc20yF$8{;L+3L}sGh+|@oNHgYJX>GFI<+p@?9q|3+N12V3{a3V4bYIXy+_-> zor5U%VheVk@_0;NM2K1TQR6euBgFRx2j4cb8y(LBnCfy)<4071h942-HHin|s}Q%O z7bLeWu92K(^q6y+w4?F0tp{2cR<~#u;1uwIav~#jiOee;+2yKXQ}2RM zmF4;d)Hx{#A|bz1!v0HAm4Dz8*haA#_k|0RjAZ+Ic`0Z7dMGB0ihXd-@XtHQH+gU7 zv1l3mc)w@q&jYN|ZgWov5Z=v6ygT!*+hch+y@1#T&x8ZJr)c31LE0dzE50>{$Dm!H z+w*aIr10U3mca4?4z@i|nfF*xFD$Pp6-AQW;D8sN2dtZ6Oc*CL> z?6N5TA(vam3{w6;nQxP#s1~sF#}KTDV6u_m5O6?!IOHm9P3>%bzhsFA$aI~sagA8X zE_A*Tw^yDGv19gHPvSE4-i8MXO$au32mW%@1Dl%4I~xA)U3zk!hIaGTqLh2-^oH+) zJ^#RG6H8-ZppnK^lQ97tJw&dK{s}gN^+|SMnR$KN`yayjiB?`STXjy zI1B{5zypVuQ0D@aI1VVb^+RZ+IQ2BsY;dQze}+AL0JjNI4a<-2YYQX#Cak)FQ(!q+ zQ@%0NRzZQxxu~V*j~({eC3n`5Ll;t|A%lu&%(foEwnN4<@f;_%74;`%=wCU_1f|T`|1x|QC+k8!qK%_XeDMm zD=fBVe4t@F&Zge$@j2-18-FmBVV&@jLpsXVr($+4%+G`ho+e;Wz`G>EuIU%fBvDxZ z$N9-3H4d4VJLsm=Qcz^5I{3$JVZ4Hy>fe$h2qard2E495N&{j9v4XlAcAE!hcsMOM z7w_~+xf`=FmL|sAS&~JS+cCdg>K>#gueQ8|b7Mts_c(4-(Y^xteIa-h_W{|Qhv_@rIs3+OQ zhJ{=|(y~pAO1TK?SSSUjb098eCLDq*`6bvE%~FuB9E0X^K#n4NKP!kHG0OJ+sL^ZxT;a3 z@Aecemau92BNr#6bX<(OGy3X_J2Rbi$$!<0Q#rJ-vX;!ul$sN&657;${rJyMRQsPg zc@KCXAdsT}oxY3m|E%w_w{vzevU3sjv@^1`H2E*ZqS}rMt{VE+P6Cor`ooC0}d$iFiam<*KV=aM!}z zV(R|(cZ0~rI`*YOkB-r4$rT%MXL8j`KHyAjm~~qQUL|rz7tHRd1;2ogYeq3$QohV^}U#XE0YNSq23=eAbS`0RU#~%6bA?Ocl7BTsesO?@F@XZv- zAE7RUFfn$~R;0NSR^VGg>*gU(XLi#|XJxfbf29q6rR4V>V`rgd{#L{x3FDd)Fg7eH z@V1AXMOa!m?U+x7mo1MFoqfr;p>k$~XD*@I3H96|pdc|KNm8$LoTb7ZktuPUZNTS{ zn@e`y5*b|#Ie4D*nm{oFZ}L$YrS(v&;8K9>(10+(R(4Q|@R**gQ<6s5YU>lO6?#mU zh2TV2RbAZ1h1DNtV!oHXx-5}qMMQT?gBAJcxt=bs)v7aw?+v;)4GUJY7<2JV++tQF zK;Lq5O*enV`x{TjlKESFROb&(M-a?DoRChCCR``8E*knCMRMISc|cQhP5PvdQ{4V< zMIv{Nx+v;xWV*^^FK{_XBPS1y^*p2Xn?F3~89~=i@GRSYeGL9_*#rDMy51bh?9$C7 zc{kJ@!DiAKeJvw$=WT%?6y2`>Je) z6AgxYutsl=c8fK3nn*oK)3vl(s5(&DBm@(Ql+QK!lu{W`=QFf_)o1e}n+hz7k{(Hps_`0&-mec?v#Axo{B- zg1+SD2c1JuRkd(G3u^Dznre-sO4awyzQNfZb^tdezu1# z#>@~Gx2v7eZ;VDJleq~Z&$+sQ;L_uqZwFio& zYa~2J#E+v27Nc{jzWs`@1a>lVRY_U8r47wZGNSQGT1F}vn?pQ#iP<`u6%^k6E@vo@ z$J~#{#kN@=eKEAg?=M+%uPP#MaPDRVhrYIGq2luS{EgI$R!uX68-8QV z8$g=s5X;EivEK>&2T;$?MwZ{d#jU2q7*Z#fDNIJ3^Rj9ep66K%%4{M-Pt^w6 z$$ElS%yElwwQ*K>v?pO@>F=3WG2amdsan#xA5r7Ve--I+f5K1?qms*@EAW}>bTRMB zObq>W7hl#@x=#*GQ4K#tiR7DRW@S}uf;JoS{w%mvM+ ztW0@@Boo;ZBlHt*%F0tG+*Uvf(US`^IVrONRl1by0qXbge2s%VJvyr-t5IW#re-x%cgDp1CMiK*$sH{KkF^X6^%E-0;kP6jtBdT~-W zy8CV`UKK%}YEx+?L?KZQE1LT4o)hK zGZe+8kCPdBicWwV_f6P%U`${Od(Mwp$-+Xq#hES($Q9LWvkhM zL#~|I*VK-a&P3rN|5aV6NtcJ1`cy@&#cJcLjJp8GJsk=^+_JK`swX^g{?n9;z1+)L zN{>Bm+6cfh;(mM^(*i@kkjCk2Am z_vk|3%*M6T+vatuCS(KN%#(d1@-4B5nL2eAepDqR zOP~znlDIkGr(Wbz;Ik-sn(n;*0CzFY6s?7RY>u?#L`9Bxjk)sh$93Ui#(a&`j`(tU zRPjb;GbE)AJqu}GOam6ZwYFpw^9wp`_^)E^FPRE^ED`E+MSW zYi>Y5`PKo+TJOAA-PSqD+Nk4cv_}{2_Z_(3ci1vG?YJ5KS-Nsays&fn-rW)}+>Eu3 z(O0#czRO&mZ2A&7KjITOf8-j9&F>vEukBqiuOWF(KfZ0W!TvLf@AMlnsbl=UPE7fX zC=fVfUL(BAxS0BWfDe#R(7$Fr5{1o%` zXfFdTozfp)2Bf>sH@=e(B#*nEFQF7jMDrsD)_wRpMbF}2i^IpUvkvho9NZ@S+6#Zk z39A)Plj6N&iQOM4CvkHZ^66e@)N^iC=o$t!oxcUueUF$4t-OcYk)tNRV&~FEdVIYR zyVuKcyJF|-tgH!P67~}J*7hO(fwyhu&BJv#r`#`R{3$5ifT8#1RLw|WedTcm5OU(va{C?`WUxP&bWkukWO`L* zz$bhqe61$>C!bDP8I-R{D51R3(dVl)Gd1+k%I8$sMG zBgHPjGOQ(@$18}o0M`N8rkXm*sfj<}wj9|cYXz$}7lFiHSeBCBoV!Z3bDI+Hv*0NL zTHxDO77ImPl;(x!>Pl*EqUiBRPfWIPxgsyQ@a+ub438>DAFz$PnND1ujga3wqzyS+ z9=AQZUS682xtmTGY0e|0(1&F?lQ>UY#@sI1kr5`1HNm~Pc#{ZvBeOs^)uvq9-A7eX z+c{YYt~r^g$ZN44qcf<;?;x{N-j@ZEWjpmVY<{3ay2;^SNLT0E4I`S$krmn*qRt!~ zgi)%}mg;FN$JC4sKPh+RzB*``zV8d_vw0(m>CCy7(;vmBc=SoGiv6IS#!ouE*DL~eWfg{^<=geLiyx*_ zd`C|=@U@Vg8HQOiAr~|qH_^3iP44Lt$?!TXr=+{5dKe+9mmxHLY$+!u z4H{d)>yxvULl#!rT=L%@!;G^OvT;L(^@`Kl#hb1g6X*GJ$n|H5&I3Y?;%&0{ZPxLq zAvlYmqL}m{Dzh|92W3S)bp3fDSk^`azc)&M28rKQmfgm0aSGH6K}~CpN;vBg{&3i7 zTX2WlNZTzSj;lA~1m_8~2J(#4?njF(AY1%84vKAxrS_iNKO!ma-oC!Or}XFXdJKgl zGhh56PgHf4IUIkelI3AAMH(WtO-q60Q5=*+t6^h7ETlv{l#H4iQ}pz}+Jh#L>W+4O zqw?pplld+e(K~BAZ|*X}5{@I@NKdFwK38m3igOLcVvjUOBf(S(*APhWw}f4&SXPxu zv@OB2<`1P-QxY8!sOmGE(7hy|N8&*ApbUgQtUd)07APIa2*L)}gVQ4pAo9o^zzWiV z+y&XCa^UF@0n7m81G@)xAa-HIX+*f^LR5CaAW*c?m`h#mz1IDi&{AG%K(0RMs$C^4uN$Odfvy?qSRzyKr$8U;E7 zqyiBds2vD=atAOwh#C+WV0|EbQ~@S{L$H42cap&J0Y=OhazHxlI}30MvX9^$HqZdZ zFLj{)#~u;TdY~TT#TBpy_x=Nj3+adUG6U8R`K}(g_rtGy;0B~$1>hdIM-TKK;DGf3 zd)E)#gYxSiAOzVX140cFqP`3R;-TM_fa4IKoG&E<`5^wv0EIvSdf*IT4*FdScnZ-6 z=~F$B59KceU<~Rf1G2uP43rNVV7{;ee+Kr0eO3*$2hYL2vjNv2KYzSrgYAKSs*An* z=m2;I-PJ~(cfj_*`!oP=1J7{pATL6JgkS=6KvIANq#yiy$ACgWKKctaa0yHR+)o{F zIhM*Bpzv4m+76c1Ea*Npr<5Yidzxb9jU-ai~Ytuk3i29m&i2g``k8e?L_oCY#OA z)S$TK7i3-TS!b??=URPV78H`kGVXKywOq|r9zSQBDWyb~m3Vtz(b;C%Ih60qe(J0- z>$DsH(n!Kwluy=-WLOM7CD8@=vg&EK+ww9I1~4KOjZ5{fNOohE=<#^)EY>33@FaM# zEYb(Mbfa31t~yg5w1mD?jDq8Z!q6+?y0s@)DQ_X77KnL;)V~RNr4S6OP{fw*dX~WG zhI0^R#+##Q%{@1t`2UA)Ahd8W5y69i5aIrJbc68!MmJncJzV~aY$)s4qY9z(i&!d+ zqy;^R+xDvJFgZSUjzLtC$*nFz|AkX%9&V7;Gy8K{lb(N^9EtP^;-7Lm%~>WG`IjQw z{={dmbN2P|ZCnAw19A?=h87=+rt4_h=R`?x$#GtJ5y)5u5+eATMZSQvo6a zA+3Km)C5trj7?uO4Ro%JVKJcOC2v?msvo!eR3Sjq%vH>V{pn62P8CUTlp>fITj7_D z5Uz{%=TQfuO|y(G{e+K$l+A)=Y^f!89vgqvyy6%42?Wi)+Jv4gg^6q{7b)*P^^&nM zdRfIR6LG9ZP@p^J%C#iC`aJ{RWixdt%kt`8G51N>z^DJZvz0i68`emh)s*o|7ZNg! zcrsiSWNcEUBw+*dE}JTD%{}nPrn^(XRPehe1J>;n&cBJdFK%r1$e!q)wgbr6+g~?mvLt&n|$8~tvWRR*-2~SP)`xVzZHH; zK@z|YXipDeB71SL*Ucbvpm*%+9X;E?$*M5%s}_4)$Mr_!#z$6O0S9I`%~DLe*w8-t z5fhRi#uo-vz=z$SZMay+VZ+glyV1&i);LQIPmM<(>@(i(^AkdRcEhp5)`c7ID(|(I z4~RG7^T$VFH`==+UPQPDEOM=B=Pf>B-##&vM4gK@Z$KCA8VrD%*ZCTuT>BIJKYa^T z4wf3}+xsc`|Hu33>f~f<=OXT8WNZ5Wbg)omTk9o7Y(9nT7Hjofz1Ckk&qeCCwA!N_ z4aG#k!i6uUEDQEhJuwz4Du9uY)umETPy*ROFF9j?_eZZPhN~D!&4Tt$%AZQk|y$xs|5?8 z{ZFa0$%6cB&zp|GdCYnf@>Y-FP?16k+0Jf@ve!1qznQN}rz#lL6*^P^3Zsc+2}%+2 z;T+#Awo(clO&M>y``JlS1wS(^RuXi4Heywh+7I^8i`Qn9lyyNlqWg->u} z62`k6W^3#=%^p!Ygk5c(uy4whgL(tbsz5N;o)HRt<}K@5?u^?H6~&$KV%tk#itDlilzV16Q1Bwd|9@I=7Q;U&=0QO~BEbJU^!Z+12C;WCXS6rAGBt5= zH#KH7a<*j@`)&STJ;POTN`6QXoliswqKb&B=w(}~SRJd5)ef2hTNJgG-))K8Sn7(* zw}PjC0+QKrAL>QvAa`;|RL|vP+j%a>+e?6dZx{5^Ft(5+IFI_O&!I@dD4Vy3Bb$RS z5!8h0=@p!V@A`QBai;y~a>>_Us$U6}h?Qf{dV8^m*lue&Us)f|J$B3%7p^iA zD5ZFdR2P3plf_`0xm{#wSj$adAwy685*G1r{LZkE6o}~{)Y^`wHtZs{Kci{UGjKWZ z)`2kJeu|)OEgj~IhN`QQ_gwC$SnBn=P}$TchoLx7QP3GPQhY5PRglM9Q&^3 z%htD7oYp~`qTobTLQ#(T&Gtv?DDq4U{xiZ4j`_mezTcn3_b6AC0f#^bfrkEeRDlS6 z@Bi=ie|C3^TbkPZ{@>i)xoR>hxDv>p;;7J=fkzmCjL|qEPDvIb9=|cpu>*`L6lmNS z(mP$LU`{vfRg+@tMu~FOSEj z!2xCXycxYP$$!6BmyW!qm=I3cgBi1;YJbIusnk0~)j}bUP(u<_9G%+y#E8PP)kUHL z8qG)B4Ma0EzKkwXry(}JSQx<$Fm0-)(WH7PF_K>yyM|}a4khz4@`RcGyX6)!7;Qvo z<}}ar-1*u4he=R%N$ynbHM1p$--|-`$wfP(u{PgZktUw7D-CAB-U;T z-q|q=2ONCz!ePTWX=lX}s3;2BfMeLcTZ%RUWyn66NV*9dKg)x}Rs}RsVD!+FG6*R) zhEyLNdOI=2y|0=O#2R+Xp;&Q<8s3geg@m1A%RA@oMfaL7{}3~ThyMfdh8OIxFiQq@H<{hx0jKA zBU0kpOmLt+Bqt$I`HL?Z&9FzfvbYomb=OB7y=7CI6_&L1>8q|=$o)H0f|IPBd>V_i z81f<#TCG)Xd5k#Z*SN&IS@bPW(P^d03aoTjdM>aeqG*&7{bm+FJT0Eb+1lcPDu0v8 z)e_g%ELC>|1LcDeeg&Zs9grDjLhu|1eaIW?Z~nUV%~@Suv+Ag+G`GrFUM|ELV^ga( z5>*Meo@afuK>*M4stW^vl4xCV)m;P>p!6pd@;}?1}x(lL8 zhr}F9ERX7~=kJ7~JI2F4GF;1fPO@yriKr#I9Z%sXY)XbJ=)&mlfM@Kf>%$&y{qfL3 z?y_?aYlVNmqBw4}DJ`&8@oxnZws}fcsTrGDd`MNpaq}Wi&$!KZ;6p?2_s%f3Lif3^ zuNx^;^I7*(4&0a5tVObgNvNho)k zAaMUgt)I`wgMEn(2rRv(rwK#q86q9)vhvFzCe;Jrj}5ln-l6`3ZWHI@kTKuiF8{tE z?f;H$|7U~f|3tU{)gU^fZf%42pVAfXSH3Rqf7;};)*Y|NTCDByl9dqwvubihh}YX+5v5r+LOh+S#6b`4-+E7rP5-ybbbNILqXEtnSUr4NP&Vx z$k3vYRWH}Mx6_-u9y%e&0&X{%Jg2w0&i&3OeeWmkeQ<`9eH0BF4Dt`_#pAI(-sQ}j z#XKr}P_o5L?7FOa#n{2f7Tpojvo5l0W%8m0mD3jEjvpxu4x?7hy>&XQ5;Ap-w(|}q z6=)V(BGh3Wf+1O8>6}X{#cul*{Mt-so`$U}h86QNzI^!{c&CCw`3?}G>O@9=Rm4|u ze#yhJ#{HDXU^k{!*1k^s?=iH z0ig}XX0fIi|AX4LQgW3=mD%QSuCNlA%PGFe+B9dVQF?aXGwm>%cMAQq(?9pteeq>@dU6mz@rEk;z~O zzUV{wEP2!2^v7IYq4?*P1Q*I!jdF(XN{@4W&>4)1{V@+ywI8Zlh7;`VIp*E8=pH4? za>AtkE)Bft2P~OKpiW(0&m#`MoTsr6^cG*Qt>|h62ioTgtx2C#S+|3pDH2Bm!ps;$ zw;+uDD4mXCpmnQgTAOIYmo{KQOID3`a%%!!>C>Hh!w)M4&{p1_ZG~Ry)95Lu*lUb} zp`JhJvZv-HVjKfIE`k9KdT=D+*%HGJ9jzLZTsd}59lCL`ECsf+*${V^ zP<$>wHJG1tOy1#3d7Q!0kFP3|eM++!tYf!=PxQjPCKzRX{XL&Y9CM1t*N{lQBNf`J zPLos7ssbv>loDAl((xc6ANg{tGae_Kz8GD}fXOcryuahp=~q}D?_5mt$m8fGOiP*9 z3dX?jQ*$n}-D2_brF=bEW6{FAtNpeaPGX8gSaGg{sG)9lN*1+OnSEj?X99wg-4;>@ zn^G%uG3AH(IyfSKZetN=zg%gLuMeU;Z@tr5bJ7-50zpnOe9ezn$N8$mB{A$mbCB0H za*F`_HYAMpIN)K6T2*`L8j>Z#W#T3yb(blpcy zUOlC?wxhcYTw{QiV|kpqOvY@H1?n9x;v3?@R~m6>o5`(Euv+k)VX>{&;{j&|7L+u- zbd0GBPb`?o;(eJ8(n##S>G)Xv30n!ZdWdcNcog@;0L!24sb%7^MgBAL2=<4T*~~Y2kkDFm zraL{f828j2RWr7G_JPV+f=yLiH0p`ZgNQSnA9u=IC!4)KIiq8*n5WxxCnJitTti?$x=iH z!!r3yIEK&vN2h|c73a6w16Pa`|Ji;cJJrYmiEVS?o&STwQU)}B6e>c8C)ursoLNWkq zVel*8(d#4U(hq;_FrDtCeto3htkOT2_#LzT(EaG-#V4H z9*@fZ5agAIU`*gU(x+kKbAsR%t1pn_wg_1hne8Pn!ca-{7ELL7O%%Q#*|CQhM{1s; zFj94$tH>`hAcepr+1YDp&vRwcHs(F#z@Jcmn68v6d=m>OmS5rhF)Q`E$BO?aMV2a7 zj$A}^8p|~qSR3W~q{5OMf{vEeJ;Pitna+Flq{LM}=znH;m6WLYR45P-Yt;X4mdF35 zGyh)>`oAns)bhgkq>kSH+Pn7Kn>qyD-xm=^6%rgtcMw5O$bbOjm5d+*g+RZPBp^=c z8954-5k0CX5bNPQ#+1@A;iExe)AmVpKeRQ3CI z|Kj=Hz7O374;zGp{9qs=K%S81YU?Q|wK*MZ>R9^7X*7rL!R_x7z?8-(Imllq>Yd(rf9yeP@ZA@h3uC^#;7`M;ReU~zfbefp2=g=ZDF@pyouS)uUQe4tM;O1cevJv96)+#dmK`5i5|lha z#7@#?umqws{bUY3F%BWou^MTiE~G9zP;}F`P`~cz97Pim+qPxDr~8wc!lSNWLj0mY zFv#~iLUbotFGYZ(ynu+2!yEPx4Vk+QV2)|M&|`PB9=YMAgpbTUnka?_2m2Vqzc|u? zOl(1tyOlFhth;fe?)&b4%8 zjwOdFU@_i!x(Iy>L7KzG`hz>)5Z4q7(`+tBhe=K#`NQ+hKd#jcPtmg{Lxj!P&#szLh!4 zAIYh>v+#Bk?F18@Tb?C7oslS!I>e2Y)~hQ#YwF>xj6q+WrJ<*Vm#6XbXYSfl0=6$`W~~eyJ9@0xBxx(AP1&)!sL9fB>LW^R zTLMzs5Idty;8iVRc9Ev7Sj%?M7D`txFBhEUAj4(~StlQPG4kTuY3nibzaX~~^YaD% zb#U+A2HdBOm@%<)X=_?5a717L4~p{ELT2`0#8>Dd-V^bvIQaXt z%<8LW_i57;C)~C1l3Li*g^vRVZU>5=)Zj4qb5ix34rX+Udb$|w$y|N(h>wH7RxgU5 z7;3&c0UY8&)v&4QQqip~GIew7W-WfLl@Vdcu)al5IC7FD+3Qr2gRM9GkG5gJGHw*M z09ZWB;kdE@V=fyL(nkji*Tm%jI1@9C8 zX6h{bMD|cIuO?Sii#avRzvKHHJrCy8%W5rI3v_zObu(ojRBz!7*cZX408hrtkUbRz zQ!;G=9Y8RaC+$^;z`Uh~#@qp#3cCT-aveuMy#Mbz4P>wGCo5Ca5?8!lu8i%f8jWa$hazP4S*N)3<3M*QjQy-E$rgB{fiDYfb6D*`Zy^8N9S5vpqgD43 z6ycqlKQ8~>U^WnIH!VMB$d3D;|9R*dpt+3(TX^gIcc<1iqc1RY=85n((~~$QLmqx* z3@}UkAhI{x()0fObncXNxyFrY;sN*IUWpLRwpej2vxjwgcQ%jV47-?aNTS=&#pn*LL&`+g!|Kxxd(%X5B{x8q>d|u`+rkHXG`Y@K8E$a`x7?iI3D9Ej@7FJ~m zXG+!3baqOJT%Egc#TL4zl%~>T)+6pMid-e3W`#JUtNnFIyU?vaS(4OhixaYNR-ANe zTI-2-@jc12;>uFgZt?^28E3%}J9jGol!Xv~*48h=9cY@b>1`zrP`I!VtC_FiXDM;t z{M}BH;+p)cd`&ezn@RN>4R?s^-mzjIzGkshK|!xa$MJXJd$~$9)9d~^7@YUgzPz|2 zl`#Kh^r>ef(v!9CAI9ovu`0St1q%4J)v$C&iV)<>y|~FOp@n6Bc;hTA#sL>?UDl?O z0yU>I_tPj(Zt=w=tMd!Yb=vG*ZnkM8#>ag{-+}|C^Qe1gm(5YJt?Alkp2OwMw|+c$ zGD!_(FC#K_16bPM`5VizHsa0Ra1#6 zn`^q?xk$~nV_f__BWApDq8SECI;XJQCVzW~k?>@qN>pfVv4}wK0ORnW#mlriPB=9;jL+KD=4x_=i zNM)SNeq}?Vn_{G-PKjao6lo@wCNBQNF8no7sX#yUbG76KuF;_riLrb+#;h;M%+3)8 z-=2?iZMf3@j(S%{lN)K4)HLTwXf)XLk`t@Z_cWPbJEHmGoTSyJV0&}(TpJF{qcMnq zAtUOP>6XL^PM#dT=FW(bH(h51dYA$O1CeH${u-k$_904r+-qfJZCQ0~jKs7zO?3qG zfu^>us26#`v8_&{RUY>{5%JesidKUEA1Xp}O?z73MkLD3>vmPiuHridsL$KG_;~*W zfA~+7chc7wz1^o6d}%xGtzc{44spVY7c}BWxz6G@o5JJ{89CeEj$*F9xp6RUi8>4L zw%|zC;7H{lWg=>Iy>m90d5aFYORUwdIh43dTxEyaowmN43FS{eH#vP)7p<&F8zb;t zbM(|V37WLXIF!}ItfngaUSdGcRNbfNA)9?XaK)?^0d9oyW4v}db_G&pvzrYY;4zH|NSJE~SP^CC}Tvr}jOHsbHEUq(L z@ErIkwM}Nh+N|5m^ryINjVdGb-iI?4wFNnP_IJ+m3MTtTn=jF{=QK8aTsIgLqnN(N z{4wz!x5PdvIqGPg^`!i~!r4h)gp}m14S!-6{rpkW9OqO@LMAaO73N<9l8w|lM+DDY z%SKuAzv9xk6Dy;vhcQxHzFgGzWADrK>)UFJM`wS}U#f5DJ!PUTCcinK$@Am_taKWw z%ZnR;o=rbzrl>-kw*=^Q@qAyv`p!o84hcQ1(^D5HN*+b8M%q8BFD&Qs({JN1OI#|I zF=`+Q`>e)a*ZvHy4`L~fJnZLRN$nLRXkx~NT4KrG?Hd5;%Y3%{k}PldbItc$RW=RM zd;_2MyI(ZY_W5RE&rNDVe_5`C`wt`aq`dh2nI6x3DhnZ8Duw<_vs~#)F*K)Jjx!dh zG)}XLRh}N&BMzmxS(#{&o2%wx!ye@ah0Qi5-VF-pM_QFYWML?3(CW-X|dfB z`V?7lDp#JC3A9Vxzh0syp&yE-u=*eM;dctbUqEhY{d~jYAl#h>b7ACN2KAtyx&d_% z`Nu)v5bn-{=4jhHkd!FP!;q9{Ig4hWs#rdVgM4}pj6rj*gS{}t4&)0spAi1 zJefxd9Dy8gl)(djFw^#o(Q!}7m=~#zUAocLdel_a=OhM=U4KhdJ-GVfb2VyXIAPzd~Ilv4W zVe!TTl(BTh9Y!GMC_Abf&!=5hM6F~O%*WOpg%feC} z5c~;BFlEF7@ze)f5$AO9@-hcopbTe-HHRO&L>d^V5C>i1Z8#xtJNovKw+9}_L>dw= zwjvm4{54%9JzbR|qF3rLEM{d}COU|mbTX;O@Rpv1DK~HT0i#^p($cJIOt1Z^j{#8_pct3MNHXKP35(& z02=G9cE3Jx%6sb|_u82BdU}tJ_}8g%KFDJ~J!3zc#J}FhTF|wGtG%Ss*aq{<=24-} z$Whrv${@)iGdaB($!b#99S&%53y&!tzn5K#oaj@JRS`K~=;dcj(Ngu~-x#p^Y_FRgkIin17xXXE*n`3)Y;Da(03jcuA_9`#ku1^#s^{XkZ75a>PD#>Fd0KH8j$PC zd7jyGe!MySz#EwY@aMoF9UQx;-biO<9|`o*U5@Eg* zvcJJ_z=%~mQ|jb9l=8O+SYALYp;OXyfcaTPNJ|Msln!1COV2GcsJ+m@1>+XDe$~m1 zt1}!%&Eq_#7hM6ElfcUkJ_04bZRAig)fd>du#yL@%a|?tN}G!67HF>pIMP6_1Il%n zV+WG7gFwmwzG5VlA@wedz+kWg+nxM3% z0x_%Azt*xc*oh4P2VdtDoC(lw>j@{eZQHhOV`AI3ZQJ(5Hon-l?PP+Jz4y5|r)t+< z)mMGj7jIX+tJZoJEN^rVaP7?~otuz&=h9Pt5l*S$kw$TL5m1tYc;?f=BWw1n28*`8 z^9r|Q0vJn^oLo{{#*m=Zt8AlpAC*J(H6kWt04QFnB+UvKk4TbO5mK{C2jWTcEK!>5 zm?J5|B>9`j));@577S}xqJH0~g+33EH#hI^Jffp7EG9_2u$ev-9&q(tyD^2C0ih4t z>VchpEK9q-ok5#dVC!LYN~I4VyuP$uO>ay+!dF)sd?eN4E!XKJSC-j&f;D4X525Jv)7i~c60~% zf*7J~R`_TuV}kQZu)8x+$pBhFo%s<;4MCO}H#lx!gx+%stXBEd;iAthw`OYjB3Rhn zhj@1p$$@AhN=2B10D~k~@;D`!4HJ1#?Fsl+ISeF_ups1>}&EYgKA0Zr1W8`65fe8>hgS#=zr>#r@ ztG(=`L_->52>0->gn=lGU4-9eNB|m5M`}(tM<9dR13}VO(^4g$ClHR0UyNmH=Y%-h zjjILO$5=pG*6do=Zd%qz#aO%qTN0trF(_@Tr;y6g+YAe*ASPn^R=(%!iID*h- zcIA=uT~hSu7VT*Up?a?}9MJT|&FgxvqSFI6BDN-$3kO;fqcmp9y1;o{O*;aOjf&#M4HhHC z^$s8X&4V0jv}KFXPS^ybqz?0V5)zU)t!bN57TM$yp%XS%$((<%oX44^#uyCNMIQdd zD~g3?;O|_qDC%S*uA@jW8Y0AwLV8o~kGhH1hh+m2<)wSyFt&P8Zc?%^=e2bBbfJG}QZrNR+@4fVF;rl+Cc>F7@TXWKE4&A{fGcIn|0T4Z z(}OU-X1@P|7#@qlu$kIMIMDpCKovc#G?dyA845;sVOROg5Tq zNZoi$PhxYC$KOK_-motHPQi)56)~ku9xWy~%23)Tj^r##HH{yRknj&?$0KNczAFs3 zg}kFGeErYCRMb}6j)`6r-b!6Q^HK%p0BR|JE4D(M3(w1SM32a*n!6YECHSBuk405d zIqt7pRt)%_zQbC0bwyu#9V0#MxSvK<#ztw^lhxRN-<+;Oarta%UF`7YxhO`#wgE&h z3c8it-V5JiS|=qBbXrQYq)gyaps0%NE<)Ktyth1qCeV#Lt1eq^HD-KhZI&(v{cJ+9 z7as(_fw}as7w7VOMcDCR-zl%-8sfIVUEF5*<=NptKOb~o0U&sNoX2Cyd-aHsG76VeUv*mGJYC_{SaMPsmI#qE) ze~`tPRT%O=pq~TD^H0kk&A^iNGGVy8DS@!`k{Y%3uw?dx|0dRPxlSkj+2Ml(SX*J` z!xuQJy_d}=7_>%a`v#WQ7$Tjy%ve8=1tI|mOj=(L`57bHn9c9U>0QBSb?ugu*urC6 z#R!&%)u*}UdSMGk1n=wCHcdJpptKNg>y~6pb3`^fSzA+U)r?18uI`+X^M=+qwxdJfG2|MxN3hnU|I#j6dmEa6(5t1(bwQbqI2c;J~q*{H2Yfw9$I9h6#YlIiIDzx1c zFR-@O#c6pNtxJ|$I$9D^o(O7vvq`a2^1-B}5T<`Z^jd;S0|s5OX}=7xT5=HqbHAY; zDe8D9Hh3zLM^WwPzR%p42vCfbga8b z9HY}jYws%4t-HudvFf4>4`>-D-L-WLy2zzsUPMFg*cC0jDezeN(Bv`m!iz@A_FG5H z_C*hz?i%k(+_zq(x-34FKbBs#yp6qyzihq9d)U87eOY?aaAWX|3-5|9Ewbag!6pa? z;s-;LGla&i4aVb737|vd<&blZXc8!-MbJ6*Urj!90jP|YDb)3`GqwpMCBvLO# zBR5Hvj`SRAc_=(5J4Nw5@=mGo(N4w>3iG@*v+4O!Ebj9R{Xf)T68OUlCJ+Wy0x@bo z41>?fm{jB)3a|+1l!%8VQs_s@Gxm{O+SBs~3=64{lT<^-lv502JCvqW85<;Mky9)~ z!Yh{wwQA5=&&3={Iy4t6YKTf*BRpNgG^%~+Qp>%Dlw5i>YT=@)77-hScxh|ZW+GG0 z!5m^dMXQzU!mXCH4P!j@t5x&DUCyQNke_PWKYX_~aI zSx<&>?JAed>XDzvS`J|z-5h_mNv>J!P~kH+h6ayq@50;Vd2H}d!~tS=(H^}Vrg%v7 z>F`nL0P-RHDg7vQz`;K2gU4ORTjOif8~I)8Td4bnw|bY6FU2-1KjKY>e#8r)ZaD0? zcUT-PJdRe`+mJL|zD91J`GV%I-R1A=gcRa;y=Ro4*5~kwTIOpStH#I11j3Fs@h~K{ zav-Z*A*-NNl|tt7(R}0a1Zb6xXHCewnd)ThGSk|)79ny?OJh`v;CoHpL{rU%vD<}f zhVfMrZ?!Pfnx{yv5pD`^&B49S1=!k#r|xsZcJhg4-nj2_T4V21OjqB6da1{}afewx z&M7A0Zx6aeVV<JGkhx2ag(qB?_nSoI9?ugzGrW$GC%zsPC-vFq9x`a2v6DnI(vF!YxSWFzx!GrL z64Un|!qd;*#C5kH6est?zkd&KI`TS?!g_BdzzG%I3Z`2gsLYsVGo3;iAE*hN zm9?NI<}A}e?817hDuwNN|8@hNcCnX7-jaWNFWx54-uef*=UjId1jIJ|xS)Z(l5_e9 z@qpCPy58N+Fm;wALwvbi!D2|vPn}6E37ln1J6?8H?K3+#;-NP9rz@fM0WxR%CRAJLB|-Kg|j6*R%^CK4|vLZ(Rr7jho-# z-!|z^bH}ZMN%l+Q&Ti8($63rfMQ0P)(<>LO#T=-OAA}t? zRbqu@LuLdEb<%r4CT3U(n~oOK{;|%$qT|y`vx3Y1`Xl*e)nR*%kUAu3{pRgBj)zj| zRCNJw_gWehD*`DxW%OgnXZst-O7=}XX!0?w=A3ub_N=xJdI^3{71(c|DU+_kF ztC)(vjP140ZO5W!)|0|C{SMAKs#?|U3FSSySk>RjS2WiI~I2MWce{pc3s;h>W&6uosWPG{u@xd!QT ziRS9&i98E}zOFA`j{&}}V-NSZ)U7uaa5vqG?09$#5THHbg|O@E4rX??%G-tkUODjX z3StXFuUpztMse;x@?1#2#k?E<(L$sDb|`=F`6ipqTV^2N^7WJ6hL&O>TR6UnbgMn= z)FA>31H-z|?~`}p?#s%>QUGgI>|MUf&qC&3c_Y6GF?zm=lK_d-e*;eRdl9nDE+khp zR+UDfy*qhdV1KfiE-u$`P>ki;w; z9&{Ug>tmFm42F{u5HBH6%$OC{w6DDvz&{FkNP(YMCC4b_8e}*M({VvNtgvMe&4E=f zC!tWMtj~rUXRU;{R`D???ZN`}QwiGRWd-F^9Ncwc#gQvV zG%tr{6pR@XVFmwPNrO}AXkUyKm0^ivT5-1?)hcMFA&TeJ`WzGET>m+Lw_v(yknaCt zzKKs5{$eT5Gq)LK+LeNaFQ|v|otqoB{66=(>sBZBXjO9Np8=B1?(=s7z~x+>S28G1Q}COimM9(hSLuW z7+3i%j!MeieK4Yo6DpSJ=|$5p#TcpXOOlM482L938?!vMDHl#}VktjUKS+f)vTrp1 zV&!7X{*Wz3v8ZJJ+_hWi>nY2pMXK->GAy~?u4YXO8Z5T;%sW5!i?l(H+~L2Rx^azv!vlRb7^T#PJ$w`Ue`Q2IbMFK?fAPJ{ zM}t#H$&(!PqAcQHK8AsQEP%XvBKJt6vwv2(u z*8sMQIA-hzRnE<+B4#h6v{+USC&Ucln{Q&b<+8iV>;d{)Fq-FT>G`%nl9tei3o-c` zHbG1+IXcg-ZBkdkDVFT*mw2ni&jYl{QdbJU|DYstnn2yxJSP&?tqT16myr` zf~~96AJ9J6yDYgL{W@1qFNNDvm&~rp9q>4} z&?%2KOyLTBI_IEMo!O^)!l_l9IRJn1+AP#tyv~z^I}3YykBcI~38ueQ{*d`T$T}KN*DZ z{#S#Lgn_NG_5Y2jMvcSz+%Ku?d)H5&?ygeuDbz3n$Lbf=eJRGCTmnea^|Z?e1J zE^hMte%mABLpiS<361D=Qaerawx=>uWo2ZArqGWg1PeU=n0Sq)LRH76sPhm$fNg3= zA@HGKSiEJdHn!t4G|y13x-L}CX4ap@T&RQ50&>ebLlMx-9X`Q6Vige(n1KcV<&sVU zH&}!8o+isyF_0nDN6S(oj896{THA86asJIn!JJGuav>HMHw-C^%}PDfLB)`r=D|Gr zzAR1!IjbPYsN}pPZ}ALJeHj z^sP$Xv6bh{XhsZ?FD{JTtR_Ux!V_j>{bXtGR^tn+xZVjh8;JGLj(nJ>rG58q|dEB!sftYnYCe;hnq6gEJAR+oFcLZ1 z=sHb7i*1s$(gKh;z=aq>uS_+voiJ%KoIeXTS&Q9E|0{Zx571atNFps#N@$LzSzmIL zlglENYU02Ul-a~qFzIH_#!W)rt!exd=`JPye1l6<(wsbNDPqMgomhJ_vkT$^9O z2?gkaeuQy+g5bW|M~O|tk0Dv3Q5);J3Z9N&`ZgKg9SLcAcgt(a^wrUnBJ@?>`}B$x z2KnYtbCkR~&y9KW5t~B;AU_dK9%HVH|1mb=_GnTu7z9d*GTol0gvqI$w zQ{Ou$6uV!97x_C;P<5zprMq-QaBYd?*cQsMD;7L!Kb(K>G9t?BCS~4yv#hHJ= z<-F#W^KljGbkuI9cmf(LpZxXUrV}{N6RM2ru81(QULkMT&>vTb6TG&@<{RK>;Oyp%N)hARR>=c@2Fyi8%}W0h zxB~zFZ*!w(3cRKLPlN#%@<07u|9iamfBr7V|4CogsNTqHt77=dZEU1E9+lw+0S6Wi z*l4zLp2QJSU_)sA6(t+l>7-5jtKm6*WfCY>CKnT-$bTx1w1w70@7rgbGJN)ds=H7A zjlm!FQ(#qVJs=@LVlm@2{V(^#C-=oi{_P>}rVfb1pEAI8W4!F@vMa?&zqQe#sWCp6 zVX}#IGC}}?1(0PnBkre*cdNR9La1zmRo(R|#xG2J!CkW;?xy|Lx;od*eO6F8iug>9D-OW?AJb5VeV3C(R9?X9WLR~%+Q)#sks*|li zffu~TSm{w>ca?cxnS2k2Gk!#1;ZExz3M-bu?yGW-%S}q6K=SfL z$uj7_D4QGn?Yc#_fr}0(qlOs67$IxYbT^Hl+&g;`_$CXWSj@wXF5VV|R-8R;snI#i z(#)mG$zIX{RL@vJs?Hd93aB|^oob>lt-%)w4Q;-tRr=HSKwzKq~P)I;{X>@?2D|hNz1lEg$+w!4TH^J@o?rC7GFsGyqlz(P!r@37jP|4xr8 zjuNW$t}f45DOf~r^kAhAI1Z19v{5Y%UMZ?6?_2n%*m3*B^-z%Th{`!UOBX%p#Ui_l z#lg)GJ1ZfsrEAaOHFaR4Qky_|6v|@@(aJ%<>me`?zYzZp5;Ore_HImp*1T|Z>X4C^ z@?u-y-oP0^#^6X(xx54Ys7Q=4aMUOGAvzUPJn{a^4Z}~afj?cwgSjs$nH?;5NwJB` zq`vO94uHv;4&F9#_Z=|gXFOnKJ}7@WJo5qg3PJHT5%oJDe*YxvCvLHg-j#F>|4yTr zMy82ZJYDboFDEwE_fx~4pU-OWKjy9ecPF-tfuV`@{|$<(duXVvpnh+$gMb|TB4#3j zGF4H*_}dmtLPx>?!^Fj`0N!9u#Ak}fz%@0P@jEhRgw1u*cAjj>bf>&Rc99}8-CWIP zfp%-b8j$O&+e4?{dkxF`(lg3HMC5mS-Tt!a{H_t!y9G1^j<6&eOc4pOC1xn~W1GongZ_3sd^xTn^N z!?H~_+AB|_c&<24{o;e3$266%wG;#w?x>xKU)ZhET~fTM(C(}(qTIDI`=f^(a9ln# zf8O$S9J{=ps+z0DN6%;+@rEW}KfZEYagt;jYHifGyrrRi@^GukW!jMrTU%pgp|x=i z>&~^=!k1STQn!wYbD$pN9Lq60&S3LVNKP=_;fazSCl9{-e&OuZiAg7!0X44(XtA}k z@(N?$wY_`g4N!|T6jq+>SWoo?Uf;F21KrGxcx>a0Jg9z1=GNNC;>G=1-^E(U^N3b( z2TJ*4gv@D(&V|m@G_8HZ$Qcs(AY`g#wPkhMWx7xzRCns#J+dy)J=2%W%)}(reD|6) zj-AIY0f+ud*VlOjb1YFUZ8G*0V}@T-;v#x<4VpkG0bZ1lj&yo+rLvFWrFx;b-*X6a zCV}0R{5>rw3t3@MQc*=+U=E|)x%kS)iD#?Wk=%v^Odm4uw2psNCFLpU=3Wy^|m z@f_UA8H)+b9kA6 zJe^+2$&E^S98C?$%9S6S6BL;W6xmutLTV#OD>qHMvRBhkh{%VHHaMaPig6E7pdDM$ zAR!U!X#^=->WsiBEk!>Dhh5v3;_WpVTp}&94oswx*+E-6WMdvgjcnBr8%P^A*n={X zF5QKrk%E^D%cc`4>g{~R=|azuKAWru%|(&#$U_Goy4WKKu0H;D`6$Og8XG@nF-#T= z3i3B+aIVXt`DtVTDuG%T?oLL*fFgqc#dFO%NToa)Hnz;`QQGyB) zS#vpPiqTGRF%ocEk~v6k7u&;1>McsTEeZQ*K+P_Y867))E-O07a8hWih&GZlUiay_ z&!j6H>Ve!}SJb&J#$@vnS&t%DYSe#1F|SCvT=TSApR{n#by)}{FJghAP9o7TRAuJG z&SyuO=^aoC5s{`p%qdbbzPDQcTfaH+^Jgw@x#o*TSh0Hw<$Y6D$E;Ag6OytDkP;OB z9T0}tN>|(Jm`tvtK~htOpXiW?@CbN7cIQ8}OynpIBNstekyvXK>W6fTcp6$~;ar^@ zQBw0+cDdBYsgD;|^Y0}$mSj4RIADw!S zMYxBEt#)2CYTc$o54}^U>+>0r`-lIBiY#IhCYSIs)0;tw|1_gnj8|gDDS~5?5324TW~qUGhc^^r_sKo`iwGQ*ABbYcJKk`9_rv(fXgF>{AEm zMouz{hY!+EJiQ~-&woR)!K>G5MueKwJ;`==a^}2#A3C~zBv@+YP@k)jE3cgZZ_HQ9 zx=$Y;sc5ap$cWtehf)}3k@}w{eRH1P!zS;SaOM7m&fN-I_ORRAsVuf{IKG2xk8FSz zVr1lW;3Oz@l=p#r6WPQsT`XP}PNNXbMY!$yipSX2@jHmL7|ASlv77p=bD_XA1S^H) zx=;y~ew;?T(k}-#(?3wwi{d?_*dE3S**Qcf@z(MQWr^8YMDoO(^rEiuJiMaQdBvoU zCXcdCxZ+m^4pYA$^U83mqkVZ)cZCyeI*`s`#%vd{!PN#ymQCAY3;P?b_#5R48+hHG z9A4!YR2KS+0oL=NoxmBFQZh!9%9qm&m+N!UGZ?PwSbe1l3A>saI>Pn$dy3xA3iHNX z>IOY1$_t65>kZosPjEmdI2%)ha>KU3n!A=Oqn$TAq2Mbt?|uM;<@kY-Cg<2A%?n2B z!^E>psfSWnS!(EZ`*Xo5ME%K9vl^fV9Rnm}as*dZ3~eYg))+9Q!`K!hDD5~*Ymno5 zE5!k}!bqC|N3x<^7bSF@!cgXDxn)sYt+?9}(QuF<^AJbdLKwBeC>qI4=s{t=N+i9F zxfr~`6_8?p&r@=HC?fS{Sf*mxi-9C&Ox?Ju+niD$F)v72d#S{$;v2*L>-|WfR8Pm~ zP^g$M_wk61aXhwm!pQ!Z5#&2;wE^M9C=#1L*86f3uHBw}2tgEylecB1bwng5Xa&K)QN0)LsBi92dm$vWYO>w?zn@P^0 z1f;qU`(&FmIuQG4#MpwHQq$?d%&nNy*2O;FHqYhEYjn!jfr_&@Ywc~0Z=9Em1FRzb zy#7SbZ5)0bwxk|k)|+(GY3~0lvY)v3UAcTZ$1ZWr7>7;dPd2-QqDm($n+CK0M#M#Q zJPfbo%5CbVk}X5e%NWKswGseUTSljFjx}Y+4GiXN3Da?oQS{W6TEM-)(Oi)@FF6mJ zl+#D$kn8eBRWHHbecq5V*CvN9)bs0^2lB9lv_$o`B(voX(vxysBHWo6-!l={=)85~ zDez_6qQp!i@3vk;ui5|MkK+VL>KI4)#ze&yNX7|gp5<4pTZ0k;5?t}OcLRw5IJMPs zD`6>1sYK$QZCrx}ba1z1H{{hQ1;OFZDTw zzhrHu{cgGb(QljO?bBO>ew*GPB9kVp@=1wCTMl(6lTq`^n~88ORD|+}{WCW%c-iN; zE>i^WMi0CF6~lBE;Vr!w!IC?oNHu#(c8PAXMRH0PF=r?kBIGS(u|`*Zi-5;g+b1jr z-LQH@&mSlrXAtg(C+gtgZ^(LB_lHH*0p<58k=hhjCE5Wj8LzP!$iRa5uLSNYmCb-ZiIKwjkc$$9 z$K=EKIWm!eHs#R_W{R^JlK{6vq}ZaE(h?6XaI#$7KoU_r(szP!@iS?nd-;cnZ%5N$ zKqRyML!o7>GKV9z&QISfe$X%vXj6oG=V!WPg>U92FElrP%Eh?fy8O3Zadv0F$LRq?{TBKmWrK0Qr}{4^@K=9cS!o8tHXHJ%!V~ z7v3d2Qp-}AQ;pu7V3f};c-Y4enKxisy9hK;VhjHH5W`e`kEcR*%KTf}6dG=ny(XLO zS4f+-t}-9=0n0fpbWj=1ybqCByU&s>IiHh7Uz#+zPKOaDoUd)IX-QX07!a2H3_oVo ziQ{7u;SdOV6h)nw&4in)A&QBvUQ}s5$PI)5$6O!i_>GRCh9D{@%B+P2!CkrUy5v-o3DLir&i;;i|`AYpw* z=TJmiuRundThN2`SxBpR+%FZ#5=Iw;wKx-JQB~@h3ZA*Yx3f1TybX@#V06h z2l>#%q#0U1vDKI83q5}>Uv=DhBx*s4emD+fiF2p}k)w5JcEM;fp=k0tw7$q%JYjF~ z`{PAFrA87CpwiA%$V*wBmI8SmX z7xH*-8T^BWNdi8L?tuK7zpZ)Gb*f(ewG74kHRR$%hrSO)*fU|d2|^_2)Wn#7g`2{> zb{?vrLxFa6%JPJtK;Xn@V6ll6S~)LlN@gNPo<@XDn>z#Sr$UGVK1cG-f#;}|9>7Xz zE;KwX|7eQOWZ3XVogliKnYzd`;jw|EK`cJOg(5#yxzKXrtXv$ta}Msc<(8kICNYTS z3q*y0i3s#DvJf#8IRT%H8_&%d5NGqK<=0a(bjE#HLCLgPy5{{xEq5FYu+&N?Q3~EF z4z=2%5YYvS3;>qd$4CE0xr?+fleKj&f^=zKD8q`_)1bkUKQHi9s`5q3 zlmw+kTZoD8%Pws&9VN<8LwBEztcgGF8G-EK%snTnHNOyr0!3q9E3Vkr&T2qa_^B z80l;Arh#)Z!pN~5q?C+`9EHq75tg=trql*!A|Bj#2*lg2aKbBd4%{@>5FuP=9IW5j z(k3?I%%qevD2bUIld)I*c|pX^0+(2A&N&<-H}yi`pQPDNijoVSaY73R{scN~(IR5Q z9Br;dr&nIL97-$+JTA$_%qY7VWO8b7&<$L|W(54;^jhyI5Bp-kiTxGm&s|I&cxNvY zd109R1>r%c2Bjh~wf)4%i|yty-F2k7rR8Yf&lk^yYsOH)93h@KMZ2Kxgpd_Vc{>_Z zGNTEV0KIJ6Djo)lw6%t!F+PVDI89*OL1LPv)Z~?J%-!vuz3nwRz>&qc0)aBhBv@Y9 zNGmz=9>$q|k>$81?U&YWNHCFF9*u655wb9aJ&Rm#_%DG$_!7E8CVaS1eUU3Q*9lX~s zT5dduu8b#YBxp#p%g8^8=|~JBv&W<}&D<+9hjjFDSXo*6Y~f&X`O^wyDTKS%5Z@CE z6^Xhuq32RtW8!~d&s+B;>Y*u8_}m^Wm)JS{bq6m=YX)1ZWGR-N}mm zn2xWokmZ}vk(g6Eq${S|lphy4v4YR>Y@C5|zy0;MvjG-fDJkuHll^5OBy7|QH!eL` zoM&}lv>PNY0Sjel@ckOMT$Ma|ETA4yrTwFL;wVqCJ-?xC+Dot|RLYd@mDg%;d^g|m zTcQ%tBSf3EwpjiEbL0)|jUR>WRlTTC5km_=?v}904;9eyAx5#is5j>8<)-$=2{vN( zgv~Qk?JF&_e0mY#Z-n}^t*pA0UrMz3S44jUEzas9MT@?o_RwTL;rvSIx*y4zgmgZP zYi1%_1+S|zcAJ0JltT32VuO{)7kJ6Ba@Op~tOp(6QEhnTPTcf1lnc%HV_a;FmEwRc z_n8eqll7?``KiBfc_b)OCZLhjEn_?bHI~0iVJe9`I{x>!GViz$M_;_gdDD%$~$ffq11Zr2qeZNltXo~_G1EtL4<0H#PVU97WD^`No) z>%5#F4uv4;j{IqNydC+Yb$%-p7OLZfL-T;3yn2(o_3$E~OXAjF8!k`eW!`_6xKm43 zIEH1w&RR{vNVT-#CW$e3W2p<3FVAhA0`AG9EY2`?rgMh^%V+yjoN@4^2LGszSxoWh zVH!vD%$243b}Np!)vzB9YfZrTT3oxy4thOHyEUI!*pf^?eNH&Pn**YXAusdhzyTa> zpQdBZTqDp|@~_@p{n(Ec^vwn_hI3-m+l|FyCCTcedNZ~ZRhw~RBQda-E<{*~!y8$p z(~(HB!1Ujl{)aQ$PFSogS_=_po7jNYA}F%nt;wVg4m+x4{_xfIJ2!&~HexSL)@Gmm4<#p{cFt#N$c}wH zjk_HGnI%YXj+)VUP59xx;rwHHm0Dd zRFk%-0mQC@`0;!2XFlp`A1PXQ;x8Nvo9)T&qA)d6Db$~oP?kBA*;Y+dvBAj)R}@w6p7Z)aam&57_NtR8eUhgcWzPic-#1sr z(8d{F(3Y2u{>@U{xQF;#a;va*2+P*J(fw~LKq|B4!1Q6VC?iJ#*?SX>Dc1BArzZU< zJ%|&IKh|MtI3iZKe#n919p|A@g1%LPepLQ0nB?;?xa5j53oq7a&k-k#Nn9HbP}jop z-A-eJeplkvRmm(1zS88HO&Yuq0yR3%qk592w)B?b0in9T(u`ht&J&iyRDNn{WB8J_ z6~NvJW|fmPaC13UhIoHQ;g+4TgzZjiTqlBfe+AY>kcU&j+0h2=P=C2>g^1`H!A}$-II6z zYEAgniW~;(;rcGCd?WRtY0B99IlO-MF!gX$(~WXuXC&I5Cej`UQ6iCCLo)H#2kFSh z@*AaL@@&^>u1Tz+yk4ZjMGdv&Hdiy=eax2ECT5_X&Y&Z^it z(HdRynGv;6 zp7h9MqFHp!PV%^wI?;}%JW9=vWl!*~L3TWls0~Vo zf|_DF$>_T=ayjc*@NPUfcRG>FTll%U8}7y5f&bN=YzwoVs*d(Tk;lBF ztZ#{3*F2GEX;%UiE!erxlA)!fs-znmg94t=ztu7GLh~4WnXaj; ztI1SWy1rr2*t-5JNALp^3-b??LOp3vcr~O4a@nET3?4tBpUm{@9E?V6VCOHKe*Kg* zkN(T?w+zh#Q?ponPHs0LHB&m)ep+W<;iR=9GTE^~5@vcYCe{-et$$f3(Rtatl>A=C z3ar>YkF;q_|>WucUJSN*X`U!4~ASw54u{wcHaY}J@ZKKAZ8Fd(5q9`6! zP}I^?2#jhM^#YI}O;b(ojxA;OK(XO#n?Lyq%Gg@8RP?=Y8(|vDp+w6Y6>s{wp}8C< zX{xHqQFm|A50&^&JzOvMl zO8T^0@}OL~3WJ1&R!3>BS-xuar${>TSHejJ!i^&&^0DbZgk-&7NtqgC<&(qSp79?m`F8Q&< z;h8+sTQMB64TS~4j&SwV3jy5VkEo}rbom3YQhAS{2~b;hj_bd$z*e!L?~n{4GfB@l ztq^G(wlPaFVz;`GC9lr{1s?!&%k{9Wm!Z1Lv6((psDk6Qny;9&icNwm5VP7I(>%<% zsGzLLJ*TV@K+;2Y zi9+imxZSBrLZ;PPDKe|oQ?qXGfk!pT>R2UpMTL0bfWjIai4x6JE#>I7vXa^giuP1I z;;879u>TN$OH)-(S#$McY3Ag87fE08Ndeqs+$^DnvkYq5i88~(Qca%mx-1z#1AlSAuv zlocq^=D6TwCc!cGqD?6Bv7jUd%Ahw`8i-u@Z*o}8P@#k9KE`|;Nr19Sa)^8R^J`_9 z%FNY{HJa7KmSeU(qe`eCmB(;UCdI1@yE;UH4L4HbnvD1J{bF9^#C%N$? z>Q=ba!D8;vD){D9zHCD)S5sqKDGcOK6@og;c$DFSxOL#4wv=Cv(3{7>aE|q=a$_m& z+_Q}wu!C6k>KN68L>OR26<1ox&?E|#qO3~F6q%`Aaw5*`o5-Qv%f#X-1n8b0RY_e{ zYBSV?5om_tf0|%RLg{SaSm3N({Gso_t>O@L`~eAJU~%e@jbir~bT1$N$YlL*$zBL+ z`j=y9tq?~D<*3%lq?!g{$YNs-t_5d-hCg=hd#S^Z=P_$DHr4EU?DL2^EPFIY8v(Mo zWME9dz@b7BD3%sD9b78q$^*y5M^hCMNba9q-YkSnhhy06sEMR~BR>ot$lvi6SvE=i zeF#4ibglL^e7E`J41pqwnkR2@03GK>u8_o>)7nToi{pTkeYmUVDkR9^yq>_7Ii+r~ zbi=>n2{$cAx$ypqB@y|jE`$qSvs;)XD~^FbfVCySw>ji@6@gcbz;g$3=;TpP1VPnE z1-XY}Ncz$;kwRCtMT7>fh!$Zr6)mS3_}sq0c-0NJLHLwUIFJ5ZwS%=XAwqPG;5Z@6 z1E-NO0!%3NZf=Qd>)q~XLZYsbQuv{PWIYU-iE+(-NU=}wsb!^J$1J|ZI>rAkoa0>i3? zoFa}pr~!R`DgyAGm|&uo5=&7}^{}sRFwVJ?Rqbx(t$WUQW(6OjMjAPmKB{Ao$;J(l;x!}QPx2YBC{ga~*V+Dl;6ph61Uit>mgGoy=mfdVamMlnjin z>dc~fVlkt#luX4NRnl-He?-X4esUJdjaRH@4{|N5hT7A)g4J5^Fs!DZ7Tb$x7L?kU zc!zulEU>(MT!aSvV0q>+@+QC*F~EwxAmFyI&i28$->o=JR}i$5PZ?mEcc01;*sDFw zSN!9>ykmrPOB3~Lb3vrRmNYsHrX+T1%CTvt!8!5X%rgO`U69NAh^2eU{2l_2LMX$?CWnjdsNr z5b4Q3)OjAdq6)YtZ$!(~bXxpS#m?p8$OL@~DLGusBcyHNfck!xWm(yBhDCi8Y~6%h zdmOEx*ev|lmQp*R^89fv*Q&Bo@UV>8Iaf+wq$aUps-ep=un{_Szx)7XDEOyf3}4j7 za#P*uTCKpD)s{QDTp$n^^!zO0NWHd5s=GwgX*!@_lqU@-lbvh)4~nr`Vhl57umDFH z;_N?zM(cGKlG$4FMfcn?rY5byHGhJX~Y`Bk<<`b1aZyqx_ePRCbq#7 zt&4{sNh$Y=bduj#GNBVCrXT}EWDsc1N`TQrKZxjfvUJ!O?sZ*|7}nE|#O%J*f}4#= zcmmBOQ6dk@m^Ys;8jRlo1{1DRg&nx;u|aDJat5`&nD&GH3;O zsEYJ?9#|mVBu!DBGe4hEWc-9*AwM3Z^b<w1*Gi-Et{&RFx$vSOPQI zGP|BLCp*}bgq%pq(?!6&DA>IVDtx6RN*BQ6JxxQKGVen!k7$P={yPFt%`o4l5RjH9 z7@Al#&AJ{x3=uJGj`IICdgYdL$Ln|*`T*pA%Pg#gu5s$6OiLiG%5N#D8jtw4ypo`y=sL2>h?U0#g<=%JY-_Y^6#g zUlk2eCsp>IV+*Wed3d`SK$hG_i^O)epVBA8ykQ&+cZ@qIBKjH)arqs76j!3|+k0&* z|6fcO3Z&tn4x@&u@7wUClgGx%<1SR;K*|(0HnwApY3?Lwz>uetBRTQNot0itE0w;) z$&IV6TtX5(S}TQkS49lN@R!T!_qrMYOykNJ5}JYBLN>w!WUAAq`@;#0_DM{AP{)liRHc3`;fPs zZ&0#5d!hgE;6J_gzAQ~j)g7Uf6;p`hPBnt+5`H!_k3Phicjoa=@-j(TRY3g_b!7VO zfDbNG3ya$>2YOOI5B!k{>xA7mtlm^O)mhPrE!pv|u;RY4G*!KZ;d|OIa8y<2N-QHB z#Mzv~Clo^InRLPV3(RoS;Tsdjupl$|k&uDy>`%R3$R4C#O+YzyIw2hs2rK*Bk6}=B z`FRw4Tr;nz>LFvO<$R*G(Hr;!mrp>HW3+(vJ3aoJQ`MDyA>9>dJuD}*Z5z$~s@#(r zI!YKWdaIBl;vNYI3a)NZLDbs*f=<8AUpX@!(=dUf#x~AhBs;nBw0EyI;QbvpE1nE`MN%9#F0> z4nN)i2X~bVUAwhAjKg(nWj{jru%PszZ z9?wb&y7|cXBt1m-OPJ6XG@G&YRGvSqOU(0{j1fudiyfba^|+Wjv?tojJ=4-mnb;-E zY^JFb=txeuSr9x<`s5{I@;0$*F(Gg;@tWPJLRll7G`z&H`xQ&|_te0Fl z%uDq0W>YkUz2$_xwFj#b!pmjjZ~gXL4_$eo`$;5tM-75A?~AdxuS2{?QxIm2|A&hb z*gJcMR^-3d_Q|a^{{pf9r=!Z^{wKef`^YzH!cU|(O~=}8Xq103VR$`O3S{S7+MPQ# zXi3E*i{Tf7R8;{70e2W9wEwM0TC#)4`sXB3{vDkARN(EsjD^|uJ?Z4`lO76{`&F`f zBq7u3+R|o%dChI+QNIXid=aW>6P#6$-KcWda9Z<&Id6ruh01S|ZIiT$RWV%OFu;$^H+x!;@Z#U2$ z@qI6qIWLypF_50F0@!9NCOvE}@KJA+?GDsg_v$k9z$cmD*8TOrp7KaH*?*Ttnu1h! z5V;~={t}E`Tk$5<^pHOy&F-u;!Do{nUs%s%8P@dW zE!Xue+xb7?Sfs!Q4(xLdd*WO5z&CBcw&iB5bQ4#*YOZ`pSnt?Wd80(5%`&bqk9o$n zoR4YF>Re=)^NvM5Id#?(#+?4@z&X6Q$BERTkis_-GOD7(Xedn9GyMSH;ok9#(0)+#C~!}+AI(0bv)N=*VWE6 zBx|2GZON}=!B*ZfQmGP0t>o4>F8!Aka2UYXR zB5{P?@8Qh_^N*PzYBm^$58CVwx*%O2gzr}){{W;h!Rvs|$9_+=HraCvKp%?hjkomh zR-Z@jf8tKbGwb6JoHnHWY4jSbKB)Nkq(8=aB7~t6=-i9QXgKIweCQ<#w+EkP!0Zze z!nu!6=A>Pp8ijKgOXM`nS~+ezl1=9HB8YoRB+g=LV28a$<TlJ#0aX2(-BxR6vB5 z0&dH2sxpBdal~mV>_wS$Dd?#YWWg~mJj1Pzw4~X@#q5rO*TAJ8L|xYM8xGD5FYN9= z?pi9d3maFN?v;G5M7r19@2yCcCH4me>z&tpbwm94`=0;bkvkI%!IbF9X&KK9(N%zYVgCdYA`l@Hp-Y0(z( zYzIHMxY9p`z}*~w7_8+`7G7g;f2rs0E5^&yHALs3nA(kzUPNmcTL+X<7wX^(-hQ_p zxeF@k4eNa5_RFzb-!W;-2Pbo)C)`~m(+^dzZrKXSB7u^0t7@6klEKYOjg3=(8N}vz ztKtrv+(|UGXVsqrn|l>AN$?^>%X9RGc0Sf7jIl?A=8~5)pr^pyY4uNnyXo%=^j;hx zLQNRqr7{%6~s+{{G{-Ip&;`3CI~zxbhsCHVC4?#M>L(q z1I5>F5?}xK3c}Z7<_SC?Sj|k_fYx~s%g+y!0OBwGGMFYmnDlhZi8_ibu^>{*zHf1S zh?~y8(S4>!)trU*#NMKw9pDYp4XEA?9$`k`{%Q=-ry|6Hs0sgK z4;|@vyoD0urN!8-VY}CnO^T{O2_aTVsmrI!fHh7I%pWa2-GuDavNShJ|%Au`x>& zKhzg#s95RIM<&beERL7PEFaMvWcGN)k&9-KPzgz5*w+`GVR-tY>js?53ysD>wzrs%2IbyR8#)gVhqH47BYXrR-E??7AcqEcNod2XgSG zA^B{c#uom<5y0!23&z%laMV*1dSdP(KPqKR%qMcG5N5k(7dGLJOV9)Vg4+jviUZ-t zzn@YX>r(+dF5$M1xu75KT|_9r?{*PZ2V8wpPgJlc@>0)5;c!KFh&@2M8o-eSV2Vr& zHYgJ|VWzNRly$2HeLQu(@yOm)TBlj4sYr<^|He=l{*_U8_I*yNHR*Xnj-|>rcnhUX zYc`Y`#B$^J4`9=d7dmze>R^q{U!U@bvKzCZ27K&TiaKj=aC6+)K1XeZZ^ho+m?HCB z?d>VD)h3=yC)V%lVs#DMCJb!RRJsxA$xrh1QNK{o zPdAOhpJ3BZ5S@{qKvl|NCwK6I7XIQ>C%I&pTDXI@?$%8#9F8dnildIcx&9jN5l%4Ljg|Il4 zzXwGrFnoGvW9C%AuRx{Ot`eo z9I5c+ih#Yb-xFens<`5_BFC})?qVy=e-%KQDu~+9zlas5zC)0d58aD()rL!_Tq(>K zMKk1tX6cm?;TDVIhqpi%&WE;~*@94Tj?IiRsGOSWWq?QNg3nSfzaVxcVbwq-{;(bG zN0dvh;09s}b=o16qC&1>NP~=0N3+%sa!C+%ZjN-{Z9|8WNNBMnTqGbcV&dUL!tX#E zzOHqM^1#`EB>Fr6E%dT;qDuibD;x_%D}mfaqAR6zFlc0e7o{Mj=TC=um!OnV^W9P& zDO9LzV0p8MG$2ha)if^4pk~nv46XsGS=l~eZ&68>M+LQF-9&Clz{FvT;VLlJ$fbAg zmrQPak1yKt4{Z2~Z!IEYj{_5q;$)tN2@nYaB<%&nK#kPW49C!gN|-`6tI`FSm}35d z2*L~}ozlyz{VgX`918g1KSWo|AdS&CvwGGxXpSmN8YZniH;?z6 zHmkknT9Kn==it+Pe!xyKZzcsGFRYMnoX{qppR)zB1)vI9t z#k!5y`G>o!_rd-PcY|FEuPqb%U_>G@GHAJku5$Ex5h^rgZ+BcW4GaE5=!4#PpxKcYaL>V<;!|DHnCGGZ*Wi5Q5%3^s zvEwXE>DbZl;pd>yVQ$Oy3T&OG3^%{4bPTcTyJ4{X)f~`)ie{@bP}{7rZ(a<`G*Ff0 z;B$BNaU(l%{rqYvsTWi^3%M*yNPqWT9-Ms-IJq7Qmx(o3PV8(wu%y!FEYu^p5oyrIfA zw!D?I5X+PpvxiZkU#+of{M1yv$+{Y1)QbZMD?XZi*Uu9t(}*2r2?S=pnM}aF4HLo0 z)&rw#onQ^Mj0*NL6-IbXg7tMXq|bBj1rTO<7gR+K!3n!I0h<5@-&m2!>ePsMdJZVd zQL5zZ9oxF29M{`QeJ>|!(v`3?w;Fin$qg_AosG>o0JaB`ZB&@W=-|)rC^|xwH>7TI zQGBg8tZ=fuLE=5{w1Thy$RuOC?i=E@f^R79B)tVRPMCVK{vPWQd^6y;P$wTGd9v83 z3LS$g;BQt}@jOyNm+M-G`sLOO8lH=@-1%C~C7TmIX4_f%rt|dr)k4gG7Ik9)C+2#1 zi2XX%FwW4V^i^SVJ{&;N)ad7D-y1v?+UOB^KQx@}H)uNRmt3%7p#(jT8v z;{u)?w|+<#e9e=Ejnx^gtr0dH4_B=|=28!ucU=v`QI+|zSc-0os)GRu5B_+FGm@bJ z6wWM1jLI<0AoWuEdy}isZ#XbkL!cZxQ4z|7U>>N0QoxQ4@J%6HApYfd1!ATRkZ@O zgXx$7rWtuu+5D?*6jrDLA%JYJTJ!m(2f3kchPGxyIo2m{+bezo+jN0YY8_&&hhJ*^ z@~Ro%^n;y>O#G*LQm(i|j>NS17qN#eeWpZW2j+tx$q z#|H-ndBTH*Ujm60dkWzOk(Pm;pjyV62u|8%pq1B7gcZwIhJ>m9%9*ZITOk^Gs^glF zjoQcCP{7P7$Zl*rODt*~F6buW$uS5r6|aA27spfBE;oa%2(*Th$-;u};uq!E`NyQ5 zk7xiBY@&fA~r8Fg4O4nhhiku-2~8o2-gKx8C=ErZ=RYY&dj}1xFh_avKMFYE1|4QX-&x@`tBw(8 z91l6TKt-#U4WhVUp;R{gb9xeLm!l0_u0*$5Hfq zZuF|E3*=WWtyr!5)^X~> zmr4`r|whOlD2-BEq#tTd8C6kb@PV_9L5V3`t;B7cX zlRd6OPGBJVgy77f1xh(bT#~LEpn8JUsOyD%sjwH!d57aE>;>;ye%WJu5_Z@7g3wdu4|Q9f{fGPn z{VMnc6mGPwTGxc6EwodT`&nCx<2qDm@ zljZ6JIudXG%7txWnU5-=?|ge__Xl>Vf9J2Xa0xRC<_GT|;Ntnk5_|txqM%-5?P+g; z)SX9|W!BVb+GW-S`zTWjyBf7gX?2-G=VUH2D0C?_G3}7OeOfBUe|)TM&0nq|)KZd7 ztXk{^Jq}QtRyPt1F%{w z)r&?3VJlE@RyBK~v}k&Z8~xl`g#gzWAe#yfjqvlMN_+XXLxdEzyw7wW=pPV(c)+vu zaf-@&)%AHkMCni7?U_DQzxODmt;8P3d33S11V|8DBz7Szl?mZ3MS1E&!??P#~>gp#oNa zAc}9wuBZ?FOK)~bgkb+HY+1Qc$+}hbWZCGA%=F2REe?3e6OIXDhj`%!dCd;=#Dy4c zaE#A88Ahgp>rKENTy6$6afKi}Q`Nu{{UCR(on5GUa=58*%pLY(cL5fROX{X9VeQjCRwYzQt4Bx7m? zM(jNnPOK2X^kQ^guFUUJgB5PU8edHn2ynHW6U-Ug2_NH&d8*$94H_1}`xuJU{ItxQ z;xlRxWm^xzJ;JnM;ckeASU089lRIg+Ear$aC3nj%?NwZ1XXCD0RzS8=7btWkbsjV~ zzw*Q=+y)?Sxd{4e-E>82zIoEJ{_J7f>Z6x+#b0;yxTu|@{jfFL;EEVyGlUnloVNXO)GmVc6gg6!x~r(urX_)=YK7Sn@fok->*{0 zhRp18_SKlYS(y;fl3sk&wCz0W?Q)~cKrOrkm+^#Gyl}KE-vQ zT-??^gLxPC*mB>)&MiYJ;|4$!Y)kI?*ce|LZ~FYKx$g3#t=d5t<3ryoi`JGr+SfmG z8S*gV%_7s#$PM06I=AcuGFM!>t6!E-R4H^sG|7c+3QpFjXJ=fyKDOr9PFAZbks$23 zI+x*5<5D!o6^~?id3Y84L@A^~^MuDkZO25)-|d#cN z!CgTYU(go>J);&|i7USPz!o*zBLM3cg1y2q-U26E{0LT^VQ5M9ZJ=lG7u3z=h;&@o zYSgw>`1QzsU34g)%1ySQv;r^dg=MNDa|U+MISpfj#kzyv0OTM@N4q_OOBs~7^gzf0 z?sP!%x{-NaOFeF-oG*|VL|k)^vG=Hx)C!!!z~l%g*9ryJ3O3dX({Dp`nlEhb$lB-H zT$Q%v11)KEXW4$%w%9epw4(TFNCC@N1>vP;magLM%ZPB8>=j3kEjRIS1w@n`>;#kP z5ZbW!fTnjM6FsKRX~BoJaz(Oihg?Jj!ti2&on7a*oRZ4lzUuAxQS`zJT^Q$Mm4VS9(BMdgV3{#d z7L8dtzN=Z_mt7rbE+QYOpx}8w;_le%CP1s@P3Gb{J@1n!IHK>C89IY3tQBh+6IId-q4 z$FNvpa`j|F~hL`ICS3(Ta&@X zaNta%8YRKh8RIo@z-Hmw)*%^OM=`nQO7RwW*7ty(UJLw2X(x=hDvPrZ1o=A=6LFh`|1#|1186-5Hq7FKz?%>2 zr}Lq>oDUmF@+9jnHy`+Tmw1(#4W)l#yzR*a=07vu3+9Urx#*zLnl+Yu7g-VI&vRKM zZ_4%LyC{-3#C(Tg)5x1(deXG%_C?GpMX%?4$5AUpZ-nXz*RAqer}bpiu9`L0zU$D< zn|(etz0AKh7Jqbl4%HjjnWtV)^JM)H)F1X;OlwT)?CyL;Yge?I$*-$O5at?q+oI;_ zamE-rfx_emdcQRyJQx#%$|I*)L!L`Zi;o*FButnUhdlDvtq-I}xY`Lj>t5nQ`%N`- zXTz{Tm0}~rTo;r;Mu)&TwW&jyXUtLUH{BwqgVur8)1zxa{Mg(#jrj^Fl|?^1ILHvj%U%@Re#B4(y zEsZ)QL$cA~y0d4glag+GYlGvOgLV>d!0p}VpDzUH>SO7xZ5Y_UY2n!H)oRzp6{=30 z>qoB;Mn+~6?)`_80y8GlNNq8xB{4*!pV^Mcr6d;1o$?ATy}Iy~0n%cyjDQvL-p!&= zW9?0a7!|t0%13fEOn91kjX#rvhXHIZW&bZtv+Bbi>!d`oKxow_v=p+zNp(Yj750rc zu2f$R4ta(+L}DMU-?>}^BND-D=GV|jP{1Svfx2)lWZ@je9Ks^VF`=OZjS~+ArMCQV zH|e9#>@;;0ylx$U2^MVUXaIq1cCRx5!Q?Zzrg?1r>MUa-01P&>i`7 zbI$!w*oWM^@+Q#!$@3KzHlpvbM`k(C2=0sE&zU@P+c%8#9DI}O`$KioH<9$RzLC$9 z`WnF(1a@WLK;skpy2uyr;l#^2%@?p*x!*Y2DSiXfm+|40{MyWW*(KZ8pH{IScI`61 zft3sW#+i5Zz!xk6sKGs<0rSU<0nor%lx7feNIvC+vcfkg!Fl#Vk3NCq8vlwfudnzwlUjy zoH3kwxm!$gyLQoA%+5cb=S_OPOMTFeZwZULa*bSn`(2CcMNQ(g7}DqkCeCgyH6Jd;Z{r78$CxAJuVZ(*L=Px2K6}dUm3x805JuBS#@YGns1%9u_W^)!g@ubZ8c+c^1*&EwUzykopAi zEQj1LAeEZ~PFoFuH#e|q4$>foOpJz`TRw%@j***NJ;ga3dTJWjiqkg^=*auYL_0Qv zie$(q?^7F%esyW3nEqXdd@pPiPH>9R-J91gU_bI(H`4F#LHw(_gmJOc`aqjAf zeJJXHd4%|yhuyf6sp<${gpZ3^BM@iD{WkP?MpJ>?1 z|Co%asu@ZLf<|T5ET;pvM@4U=8L{CoLv9llLBykt+_xzL;G;orGZdlpF->oC6yf?X z(1+^8NEc(MRyNAbAjM#<&1sBq>uJ_CCZ%GeHDeqxwQY3%s$os7&x}I3W;v2-8&{jS zVXHU9j4odS9$-DkJK}l`w&&t9*qP8{us5hjXRTk1(q7UYust_8a(`@TjPx?8HRVNf zuldEWZZTMS1UICPVO_YB8~5P)y#jk`}ZW+z&)(< zImW02NIg^Z$x396ToU?npE0*b+aj+@xCGzLw1QLu-SOA@yWUxN+aj@3M}|uwM@V>G z9l6!2#M8vFeXI?xg_d{VgZ`e~);4WLIXFnei4k2J>xGrLTmwo%%?Gq4S(1Jwt6KeR z2!3Wj_sa11tS$S$F`P?fubE)Ia*4O`OF;@c(r#e!iLf-tjHPAy8l%Xhw4;$)NuCDF zSf1AZF0wi%M)h=_j44LtI#J2B)1=no_@tZ@7pN0k}Y-MLPqLcP7d-fS{Cc7b{W z?3)MKM%t~QHf`ik3#08j;5+9@IW#&E6Stf>4cEmiWXoiFEqYOH1wHiI0a=a&`))5< zBt^u066or&O)wzsgqxi%gvJN%wJ=I~($810Idn<%dl5Z5U1|u1R1Rs0-q~ttBhfW| zLN@1ho%Jo9_K@|Wi4RCvRt*KmTPM97;O=!$1osZzP5F2OM{;@rTy$%80WxWya zdtpyrA?TMXbh)lzPMX~*mXKMwK|%52Uv=pcRRVu1W~r5FZZI*C?v$V}J-jD_r0J>5 zzJT1~$iYriFw9suWH1U=GB4<C#aX1@zGJ zdW!-DEk^XhMcPf3h;gqIg%lDeTT*I2GF_sX{|v|Z6}{=PfVdpxzzcR~@3D4ey0umr zU!Ai9EjPHjMC*R3<#Ubd&&c5cq3eF~V9xAX{`)~PjZxDyX8Gx-3c{anDRlI024Y>1 zd-T&uBo|`)M4oqk^Q1H?d^2B(9;Ti;*$ciCG{g&@L2o?N{45X(Qdd63X7N>tzURzj zxtssQp4`6)YS||^dH8Y>5D&$_JmL>G6E|%#P9E76Z(LWbS{MH|`>>(-Bv}=Cd4GN@ z0N}nl*EeBx*|Ai|=Lbnp`TcBvUHc`#n2$8NplmbI3`sZxw@~srCek$B+{`6Z&$J1K zZD~IVGj8O}^*?5>ey1k#EyL$f*<%lfzIAlhtgnChoB+A(;DcGm2CqvvP~tO&w@n$4 zF=l9sGYK?X(`NI>7%QXGhPcxW)m0f|ommH(xtsyAOyo^xW_;94W%W>hu(}4j{cD)Y znplrTZGt$nmNP;w4V%JQ)AgR)HT&VxM=s+z<8T?J*Eh{DKgJ&$W~Y&!Q#1#B8R*UW zvQl)It2!2|_iAQDyOTPC%QB_k03MXGmQ#hnzq1Zq`NoYpDRyLaa0m$Bx%;#mu_E;& zHv-?y<3*ao^C0k#h>pS%!=Fg+sZ%F}LqhHwmY`OM;zmD0ZtZ*nrNRx*+TF{9;QS9Y z=hQ1@kn&TS4(&9AYWhqva+-o$pPnxX{jNp1UpjKQtxoh>{@AZ~__*4YB&)o!Ci_)*=TKL7WRK4!!{|mrwP`~m0bypoGDL#}s2Vo(^%1ctAZ`{DktrY| z>j>K1kfEAA^z;?Z>+hNSRt16x=odWxB?4q(>FfpKkF>q00GyXF7tsR#h{ckAtWAYK zGk-tGjMTkn^~jx4dZFT3_G=fZ&v1&wm|+-ZaXy_FNi@QrffwN%aAhP;nd2!G`zu_3 zm4+4H*jNS9bUN7;S*s$XD;l%R$~N~DBe1$Au*%kGUHzX|R*!@x zG60N00$!6Fw|Y0Lph9ho;;`!1{#pVUS3c(Ndc*X?yzV7`kTO+GBpo5g4$%p9mcE3A zOuJ&|S%wLf0%5Ao$a|OiadDH@=AA4E9~w8wQ0deM-=K$Fv3ui9jEO|GP2)yeiaYxj zQLw9AEQFuL%jb2IwppSsA2#QRgs<>7(PxD(EWu|F6|4&Gnp$pP-H3;;P9VUI4K|%u zca4zDeXAveRRQe>Eer!a_@NnWgG>)%9fKVp<{9k)>m9I%X;lTg+EA$nISs=dQ0f`2 z_5mGe_AEEy8V~p_TRZ=mW3l$xEfDyOEW@x4)aUeU(X;7wjG{P z(ag!Z$^+;U9eo4P1FD)Go?*(dwN1nuSl5bo(zZeGj2-)~;l;Gude3p+8rlPJ>-J~- z6&t^h>;t6N!e=zEweR0vhM$>U);^QIO!4*I<5eB@zCGF*dynWhTzhNZS+7j~+lT3& z+ks=oT+{FIu}uCu=$X7XoCl6NhM%dbY5muuz3HB`VSJwyn?{<$w ze3qYM`^=b6UGM{ZZjPVHg4+YQl%J!yWQVX?5l$J>zk3QB+=I!vv1n5KVagMZ%_{de zh*G^nCDX1VR3Wd~7vXA?IE<45Q)au+uDYmg7M~LEujbrm_9)IWMJk}G6 zyN@mEyl+nPKLcm^adK3{ilR1l5^Bk~JF9qcc$Z+r>0QE$+A^OCG;SV$)FbIb2!*t@sh9IEnH#{mm{g}|Ffpgz4JsIx?9%;O zv?r+Yqq3cAG)E}$tr>=v-BJ5ekVJ^xmT?J6q_}iVTTrt!*F5%EX*){TC%y+O33_eE z$(y11GsePzi|#+g$os2iY@AMsg_5seGK`K;{DLYX{N=zY9 zbV5R^krGpM!a{0PAyIfjLuynpQCbp@^nfzsA6LYg$f|yNF8zIyh;QTgNtAP>L|LRW z7VRZa!{`~vBxdq-u?bqhi;xsup$S`zo2cU&VW5qDlA zv`A}mqbbovkxj?An!>YDLQSzv%DCEMGHWtTu?Zq+RD~xbsWfFraN_SHI?*&)M=poh zkh9D=#JfYNTZd9#jVf8MM9H&dT%Y)UHBxLf;?YylH%P)BrG-7>(M-`RilR^T&>s11 zQ?yg6xC*i{!6B@?SGm}2qLB4ok$Q3^(m6-s2JMWONSA&A)~N>j=s@pnjRZgI$T-T| zCME9Uut<)^FiYd|i|EH6!@f%bALUHA@oZ@S2=T2^xcd@?*EQl%G2t4`vPS}2q*J`u zOY}Wm0w4X1kMRVV=%-lXCF;>%A`j_IFoBQu*i)={HZt)<#8V>qmrwOjcnE4bSTrrY z5D6-0A@NA5E6R`ysZI)Gg!--wfz$j0{2z9diXi)HB@Y0=j?e$+ef~e#QT8S-Hm3iJ z9hIV{tBkDvOW6kk85A0X($K9GYzT!AvF<~uA~K4B1re=B`Dl=gH6&b*>B?Fn?Uu+#`G zN!>N@=h~bnYbi&`X=hSB(tE@KX0=h+0A?Pxr;ZY^F0Ns?DvAEr^2qS!G?K$Qy z`7JYmW0R^|fic2U_KpK@3^SxsY`R#HJe7KN?;YKUdNFF94AP@>Y?f=t)Qj#)pA^Sc zLlEl-m~DVzd9+|R0$IQ@F?Xf469*G?ho(p&vEMw$TV#MTCXzTC8LrJi_V8tf!Pq^L ze^rA;I_>DPSVHF!w&rxf)5dv_e2kaeVH?hJF+yfi=SX_*Ars~ZhmP_nbVf)|M|CRP z=dNpZN_}+=5xN89R_CNBoW_h(*1yrF=cmeZ;xd#?dmdY4AP0in&l;M$(3(tEu3Ca> z*;RhR_5`HvFx2FoN2r3_uQA|fbJcm}n_{*6MJ9N@ne%8W4VM*Y;X{xq$I_~Z^g-b20)K&Trdp-`dNei zL=JRn*JX8z=|{J%vO0W$^u`}+9EW7V3eIgbx8vS){$r*mC>ZSqaKZA_lnM}644Xu+ zBBg1!yV69YY_&0YHiEj%Hrz(9qEtFztEqr!<33Tj#_2mlaUZKVSd=1gl7GtL=@QOY zq+Os6qNs&}{pb>*ChmW&AY`n~aa+W)Ud&80a$+%*EuNq^l#4Y0iExKhFB1s+Y9J3m zBjHBY5E7L6E(L4O`lJAxlAlkFAAxJsL&Ah-HpX^*S)-2JPf**u*TPM%w`ZSGp;GdQCRcD1wk zwzl}CoXf|PjBJiwE~$0gSOw!JM9(p&@}MUN;iKf@g`y91vbur#WnPR!QK!ry1e<+> zuq-R#2Dwpo{F-n4fsaYHDlWUkIqKI$c*qyQ!=yPjV|Gao@gaR4T1VnK*kol;rIMdK z9W7<5MA8(wNdmoJkiO&UjnDGnBkLU(ev{?%daQk0voAc+wvu~OWY%jRal^gu_1X89 zu0cuv+s)3{$cJBfRW8AQs7R&Rz>j#|CEUC|u>T{ZM)D|x+kw#~Kcx+P^;m6&VaW&g zwz%sQzm7V~M?FfKe*MC~lNj8u9I_~V&7t_tf6412BidI6QAorNU7`LefIL8up}hJ; z|4;QR-n=@f`UUpmLH&Q#@Bbh_{CD;HpX7%UHA@F=b=2?Lmi52VG~B94SD+UPLL#X0L|3f37=DvTw24`(NwXQo($LQ` z6O?Ns-`(&Xz5h~miN#RMXXQ6L?p-A)fm})o(?lms zK^8+ktCQ{!lU>NtC{-Mx?SqH|tdKA~r*UY&ajf^KQ%= z8d9c8?#e+c(z<9`EG)>P+08AuLQ^*{SsSLL?ntCx7Z0UYS1DSY>O9?_2m{X9+Frq zPlqzOc$5P+ZL~P51G;q^>ki@M6GwqAB%`hM0ilQ5W&b{xhooT5Q+(7|c(&=v`H1XN z1hi6pVV3jxk3uDTcBAbS2<{AQ>KXE$?<}{C6RelH?FTgHF7t8vm0G_qrC5bt{yQI z|I1u0t{CM-X@0d8%x$^no6(31f9q-4WT@JJc5__*nCo_8#W;qorLvdG6qYB1biLu6 zGz_t5PH>{dQJ3+;P-Ln^$B)T6_M+*I5+}(JH#OFHD7D)YB6MEeKiYcm0aiPE0{;&Z z^GT9yGEFDcfcY_)H$SM-uJmelYL5kmi_@!|h1CvydbQG$Ay3X!TnSYC+^or$L!YYn z3RrGMjGY|i(7ZVz4j*4zO7^}&)w)MBt-M8SLG8?&D`m&vv3wpoX2<05NUU$*l(DFe z;a#ezj>)rWf%dy*6w4Pjx8RWp2*R<4d1vXK$S&HA#=xiB6cBsg8oizp51P(E`t!kp z|Cy+j&LE54^}}co-OHQtEXs$z{1g3^zu>-XEy{5v$IKRUzcu27&Sn(E&rZe&9jDiu z+!3VA0ld*Y6^|E-bSD_sKhcCeB+*r92bl7z5Jh3ZPD*gYe)Jc;+BQcdFM4n%t+DHf z`9Z(;4+zZ1=4p~|rgyeHoBGj%-W(;2Vb}tw%~R2yT~Qmec@~6kus@BF35(wq(s%$F z8sp&)jq4}lj$B-<~ zx})!$-lJvm?iTCJHvQciIA0J3dt`j@_q$hk-30_AE=cf_^U{Lkj>iXa`s!yQH~$=d1|2R4uLZs&NS`<&z$d|;Of-(g4;|p8 z!QHQnFC4~9dmSCio|LH2vCEU?Djk_srDkb#Z;6O@L|ml(XO~UG?&i`V-?&w#alJ_M zf|=?SV9Gn?L|?xZ_q)FFkD#QMC3ch@y(Ve05>Cx($==2$>gD4P(a{M#1dsT_h;))p znjVuN#?pyBT=48{RcW`lmOMx?VMv4<6lmNLvAK*3hGQMp|BJDAj_xID+680Vc1~>j zL?^aw+qP}%#MX&z+qV6SolM^O?pky2%$;whSMTm!jla5fb?vJC)KkoL`92x4+LK1! zuMuItEVf#4=bZ?35&__{9w~h}4fGr2?+N64VdDBmWqvK?jLdVu5ID)Yh4qE@9jp02j^Bo!cB-rUE1n7JuBzz5S2^^?&QVYFAl?~{LqWPiH+`7$0`i0B9l$ww9 zELyLem8XnJoLO`u1*+=~KrU61rc05)e?tDp3y>RFhw5DTT`e>4m$MqVqXn86*)jn%-LAq+(_yXSQ?qM=d+?PhfOtuZdl^ z)UbI&a-Ial8njWP!3!^qK{WK^WiS)_q7%v==0zveKRRV6hNYSmAHDvkH4 z2N`W(F$X+KP71VV>*E2MYA5nGYu?Pm_{#$FRgX`#=a&+@O}pIXhpACim6^>jqBJI+ zs@#31%)^izccvD|Z`G_SStx({S0`4cK3=xFbNkxoR#-ITo*%1%qq9%fsk|xspxUNG# zNX)RLaSWXiT|?q&ZuU0t31TAsLlX*bdE57N7Y#)sX5seO#`KQpQd;= zDODtoA==ELWfZqo?(j`Jlu8Ekex}dkqmnCkRVJg77acKd!8}tbXvAXP)jznTwVI7X zie_MO`=XsyM?K#nq~SZxKh@hD@zvef0x=u29SJ%aFNy59&-BS_5qL&B&hAF742wHN zH-C{@t22BnSXVz+(A8u$NW*QZ*Jh4qGEW+fihC|ePqT5U8_|*gDl9OSXcscjPErwO zPL;6okulbv!@OK!udv}~F>qiv2DO%A|6>eIAB{`p1apgIz!0=}9x z8Z{j-4VKWm{$&hY1gA2zCc~(Xo2HGeRTwo5MJEZhByb5&V!I9+ZnvW|nN?&sBILSc zRA}^ON=DJ8xWJM~n&$4TW3E0y$Jtqz#*%c{l{I&=`_p9}%4$}Vxb+!DKzs-emiK}N zMUnc*(DO70fmX&a?Onf4Ef9N9WIm}8>Yq(^SJcNGgO z?jJgUOk6<9?kKNDOXbmfPD3ky#%8h19sVS5IBT9wlI_4CG(SjN&O*x;> z2%6C7GsN^m2oMaei-$}PQ9?SXD4vOIn3_a~Y#2bMgq#{s3`K+08mOhe7McP|wJ?j| z)J}Vh5Zk0();#gT04=eJFi1)?2k9U`8*tD^O6i6eU{*erj8Qq;oRQkcpR8u{mutif zq$^nq!5Q;CZJe$ut={ji)g|HSz^cIP#F~I!>cE%%6QVQ6JESN0JLDVGliUXSGT+Yw zx<6M5-yz%cKBF-5vW6v}5FZkLE(8CZiQaiq@ic^gB!eG``ll(}b!$XinS1Ez5gu$D zG4d_9MDfR}d2{(6cnpQv2SzuN7P=QiH`dDaKdjp(aKzBWQZyKK)XEu*{LUM9-AIUW z!4X)bO~|Ha9=L-`m_N_h)NPo$qfOYiZx(mpgk(P8`5Cu3JaR>xReL5UJlPD2c*~K= z!}Rgn#;FWwR0%p{ii#IQc;3_u7ndm8H?#Aj#lWLbdc#=)olYJ=jwdj_d(lnlk0}NC zYZPh`(-J7{n$DH0E3cZD7e0jbcDqJwN=OjcHh~cLdrI-w#^c@9C)Ra%o2zEt7dT1GY!mTEZvPX`Z)46O7Cr`=0f(Gf0En)> zpqiYU-ZRq5W5-Q^GUFL6xLD$>9?B1xpAFOOa&ha5?@LZ){`bth)4?gWXLn!gd}{?k z(`yv`@KU#DTc7$#r5bMZW4C8l-|B892X1ssw`WJ+>S3iD(^L6ZcVGT=cRq|;Oh82U z5P(es?;7{~x>}_jvLo$jak9JnD}OrQmN#H2x1WNvgmq<0%_-C?Z5g6d_T_Jg;y2*8 z8r{N{lJFT}92Ma;U9opBfw&5jE3;vSP0>G^l%TMBVdY6G_8~g)(s3Wka;>Bur3(XJz z1Ab@+%unj}dY2FF2KuXc|Hl7Wv6G`!;nvwcY2Oq8H^kD{fHng$qri*FD|gGk`x=Sq z|E$tU=b*Srtf8-;e#>oJ6|s-g|VpsEd)hUT^VZy%;r z#(}3*_!b2a5Qzvvh<6}ts2y{gUw?ZVeLIikMx_f;GxSoo`lem;VsGEFkG@Na_U!M4 z{v7a*UJFK8?heFT<_^V+cc2m(2SQl=f(59rzvY*`z1>Ac%Y)*VxU~d~)xUCy-j?q! zpyeU(3*Tn!HW*&)U9Ng^iru#D-W%T5L=1WP(r!1^yEB9jc^S}dN1^G#b&D|NiKPwi zchO?p9M!wmh7U!t0`bKp`D?F$uRQj-2-|OAX>)+DSQ$EK<3=5{tK+SV?6(qQjKtJC zbWpL|a*PaLrLMMa;o!Dx7~Xq(S?Au!dsDp754i<#&~!s~ir#kZz8k)O`3@oa6Dofo zy%-F|`4g&qV7)jD$@%9gf1tfs49)oG#T=!a7f~jAEo?YagL6DMDic; z=kCgn`1m7|rmAiK+-*VYf%g@=?bv-r>p}ebeH*qLhvoVoi&x^zR=T_Zbk!hSs@dzs)k5nZV`TyB1 zyh5`>%q?)Ic846|2`r`BhORni6vF>gfk1kRspx`b&=X%V0QDZCjQ?kqq3E2k-yw%DyV0A%Pr@L8!-z&+m( zb+dpIT$ZM}7kj9Bx5^OL+kanlH+!FVRpBxaHWD)^DpE>c&H-CnVH3huZ4+3x;N>?! zFcJ}>k~_LWU&T#>3o3D5Vnx=e zbzMag)N8JCj)+xn1${x2z6#`Ah`!1*(m8>m3!)B;stbY+f~pI=4s^HV?b5DG94v9xl@d~3b9l6x(b0)1VB#Y4aiQlJIf2fkXG+A zbvI(wIlNGpV8u4kB5u+D()A4VGbx}rqf+M&EROPH>(9fEfIJJKdzd$G>ty~zUmNpN z_eP?vQ-vbU?bqA#Gdu1F?(WEo8)lH-nkZLPdA2`Qb|BRzwo(gjiLIP8_8f&{Ys@R& zeKkfh8=zqF)j%EyYhL$Eq&Tb}#3Kt9nml2itgK-TsrmRZE))nq0fYM_HzLdMtvGNkv87ARH< z!uMK33CN)OHdrr;s1EGn%%Mop`+<;z1Q3x4-rP@*h^2#OfdX!2r$TVj6ec>BhS{0u zccTQW7TYM9(~Wnldd)c`-a$(_?K5R38$RPe z1~TYXL=pzTW4nI%frr-)rL=z>-SHa^wxU}Mm;Jp1(t-0_1CK?%8ze0crPP+Y$E%Q$ zSZA5$wKDwQPdr-0GlsGQ=-M%cNOQo74s?-5So=%x0m&OpY+w+T^BqgqV^kFY0cR8I zur!Z}e|$l5diu>$Tew?{JQ57M1?s&v%{P3&)g%`tE6&M-A}z}ij4tJWGKWHcD?)30 zjwiei@t%50aGvMx*-U>T7Q5AtN~Vd2iP=xw!P+JRrDtAHYPJ)0+YltA;#hleFT^|# zxw&*!w$4O5KSD;!?w^s~izgXs)=?{117vRcMjn7BH_*pp&}F3P*_);2b=Z=VA_{ca zEv-VW?r!v1H0x;3rdOgpb>J}0gaYspdwt34kfx;8__AkF zYgrv6|HRZA`_p@_nv#=IW3uR6{_nlEmJ@=(i>rcdMFJ|mL5beGtczWW~&D=RE z1%@6k7nKfq3Wi_N2kA(3df_6;Y>+wXM0f}b{7v4P3IMFTFCOQGVG2;I>tOSdQ2=Xo7 zD=_zV;!}zY>+c0DIQ3H*_*tme@~rnCE7fpbb|CqayqNiK*@o%<|5&O2Ig2r+ zs^NsDg2l&y{SPh&3C zP#tnD6l=z^eq{z{KKa#xW`W8ST#Kuf#Db(t<^X9)KDKLK@kXW!3^5Xw`DUe=^G1=3(ai)@ZC$}Qi0NKJM`D$mDK_nVV~{CrXkIblL_ZNaLM4d}I=2(L|a`+B`ZpTEKc zB&#f}f1G9rw90;dUYRPy=~;fSL;m@4l#di#3m}h$XjmBtZVO`rQk)yvh8qV>Yo9)@ z=B!6Xfb?4vU-2p*XwpsQNC6eoH068d-3Vy113qGsWw_C!%KEW-aOK& zF5aiaP2ueE4S5Hn-pjce%UJBVJ)D_I#CQiYT-a1{dbbY+UV>j()Mc*Y2{VN!XPWx| zUah~=*il|k-4&OgsIHHMGTJ?!CU`xDE7Y`Na~^I}e@4i*Fk*fFI7Y@9>7IOLfR|AU1_Bb{GWwa# zy%FH`@t=>QvbPQ)cHLliAB;)*&#J{}_{O#&o+5L$|2zq0_aHud5i#6pO^cq}{r!dW ziA7`hvB4W)GcsKFS|Tui9ox7wIl$5XNFk9ZN-diRfeSxc5K%q2vitUQD~dQq|`K zX8(#1JOsT>ZcRv#BbW|G1%1))i+IStE?y})Hr~hNJkd@tnNmHk@k-rfpJM4b6`^@i z11MB22=m;-g${EVyvGH_!y`GV7{qqpa@bQsr1b$oL6%Npd1cy+w{+ej!DSVDMw`+{-ro^|dES$Ru8@kqesL3s6|ula|E zmvhLx@V5?uQgRunG_4s(V$?-qd5Ht>KuLy-Ui~0yM?fNts1QkZ_IcBLmuyeV$_=u! zXqWYf!9&Z)c`=QE-Y@I1xQ}Qq-*Fu+wiG*Syh5(Tj<$M#rt1f2(|0O#C)mfAXbr#E zrkBK5A5lK!B933o_R|9?K)IwZM^))cmChF$4VYHhIaph=RPJy%V|RdZR8tw2SseE z%Yxnp8eE9PT&9gJUcO@WU!h29AZn{ZZ=O;E@&p+I=@9!&BdV7ZhO7 zdOvQj+MHs;oYRNxH6!=kVD{Z%cJ*<0BPRgb8WD$;8Ys(x$$H%}vM1b<;M9JleWcKV z+{mmC$MT#5DO+>&GL?;eWE`)bj%U2NOERFn%}U*Dxcb;xqfG6hMa9HOCrD$AML%q* zp!676jNt)-Nr8o7RA063RZ?b(y=!HEwshY9nC^kZ@ENo8w2)B z7XPxQTdYAYN=_+h+BCe_Y|sWH*4%?mVBQp4HH5S*1M++=nae@Tq(Pof$3wTuyX+7# zS+R;PzS%019ly;HYXisJ4Qp65Oi#YFGSt+(x#V=6?@{7gk6D{8b6IDIdND8e5q-c} zqe;??z;7!4?gRCiB~;{;H6k#qdn5n)yi~0P#g(_GZ7TjDr&)>%F9^CXD z>ynSlDyqz6-!UFdbTwQw#9Ry|h~%oftNi=|`|NOBn1x(G@lYF_I_i25p@)a>V}+pb=`px-#P z;AZ3pZUlww6zXO(7z?~DPW=_qTZy^n*>8|hZm1K~>WD}DuswY-uyGH6-3<=4*t*v# zT(ND%->_)Z-VWGZwOGoI9r1W!)PnZRF={z&+WaZ*A)$y^s}F54djr2sfiu8)8{Dh0 zFA+e#4+|oPEC>wpnTH7b_#V;15Ea5K=jCc|Ln5un}K)# z`N-~_LG2*|aNkCO*df`$(4zK`0VW1lz`F&rQD4yavp{+zDTFZNzm)+Om@&?;1-Hxg zn>ew~SG{^03fov+ZU6)o?t)V-vNDd|_7BkO(Mq4o@w9i!(Uo_~@rg11r^6#Fy6wgoG$49sG6~e_hfwSoE0;l(^>3u@)A8h&D&qQD#oVQA>Mgz@Z z$4Fr?hoT_dOYRS{TV5o|wSY`Q#6Z4P%<(Nw+TloDcVDvf!B<@CM%YlEH|9q6z6ths zsi<2TIf>M~{`oA*K$wE-xxjx0IG(%eMhfW9eD^0Q_WwTrrTT9MxUzwvwF#N1t+S)Y z|KMXyoU&c`jTSOPMsQLpf!kqY{w7TI!<(fHg3^&{OC7H$N((jan;4CPIg*M;VY3rJ z0Yvs21j8RphlmV=7YH$=l?CA!3YBC8`upU|HrGzqih!^0JHjr4Eu;0DVCgc?Du|JKJ0)WXnXm!v*>XJ=OlXr|Zg${U@8d1pSogn(_#BHmk~*k7|yDuiE4Cx@Mwe>yuZaBJMF|Lqe`t@}`h=uNo^%oo!P zRi1Gf{)-V=E-CF-cVq>%oJIAsmOhLC0DLD=P8@X-;SEH&s2L{68lBZ#*9i& z<@tP?Hy?>d(~B=6^D)R+8iy=ygV~eR--2uq=Nv(JTU?*_>EHtp<)z{aUi$f+|Jt>f zB|FW1LsWF7{rW!Irh8(j$dJ|=eJ46i+I}|-yXne6A>3G#HsU_&}qxH z70DGtRVRvCRexO0@)Sd9tA9<%Yp9x)lu@Ht8&l@AMj*>l#;-JJFIF)w9FmdsRag^| z^|UeZi;~#5>o4b{?&8B#p6dw?bFMvjSCXcB8ON#;^FIb^g>Sl2-fb z$DBc)^1u3Q#7Qg^&)(088NR$dK%9L9!gP^dwujH66h27Z6G;IZIEWwO+xB{qpmFa) zy1^E_?0s00KE5+LV*|NfIVRHPhhO4&1SWI=lEXw&At-X4P zHI9=75r-&E?2nTt3G;k=WLdSVUn$zRO=m41n`1~+X-_FPMhC`8!`N`8>QdJK z0?&64<*SjN!-N)K2Jc&P0z|XfOewt-!Y0_`K%2gquS~HbTZugqp7n`C%s-tsB+8&I=w-mcd@J^sBWlaKIPV57x#Qr(1<~LQ@57Rgw zzyV=4+nQPEK0d7+86p zK;8`F^OoC9n_0(+_e@t(X;14jFi<-6v@oUlg?aX-s+(yQe?6Tl2iN)DVdC0P-Ce+*QO*T^2cmLE2%Bu||8k_RDmbq%$t8JvzIw0YKkw*y51NxwB zt$2l0eViH_oOHx^kuQE@X>8>UjBf+8AcZtFB}V*xd_ruR=BW7>28SRN`^c z+*w}F2ygx+Yhz`lY7A#aXh98H{>p<=-}JCK?0vG-)I`pWia+Nn6%r{;RGqbYNAB7< zvS_uIIvI~PWi9g9?~6^3pWe^Fv^LlTRns3~1)mOt z`uSII(4RQ|02Fu#;!ud74+)5o=yJBn`CELb>WXpuoy|mi{>V>iZoMYfyKNBXi>q7_spu3 zjyOBpDf%H(7pLL#1O#fW4!$Z&gGM|#z@`0?xjm6j;MFa(?OW?8OZCgcB1yoN)sM^Z zVQxraR+>BqG1c{xHJ?`J0=j;{xQ71sDWyVT3zPt zz@=I_W<+pt?C}d)UZxsV#q`Vd)PFpqo0gLxXDF*G&gJgiJ=J9t%I=d^7L$qu!&FA({+b%jPPl=G1yu}C2njr)hyeg_oerxgY#qe@RW|5*sDrp#EDw~t?I^> zQ?vbxnkm#38=AOuS@djw@uv-kM+5HgX>Q}u@nP574?|6xQ0QZ~ZHpv*LIIs_adsBeQ_UtOFgR9go`leSCTUGy*@{4N zpsm*zvuY&Q?I|dfG(~?r1%!<{)KJ^FL2Hj&X@@e>$`B(&zG73|J=pc4^L|Q3`gh$_ zW@Z`1%pH5+$LP={Gh*1*#x+q;9=Pvn`L@E4pv_)vth-I#V0MkbW5p_W=5Gl{>4NSY zi`I%VYI)^EllDhWW$_ptX139l%j{o1j^HB__^RXENFQx0V?2&%sk9P>I%Y>uvo*+- zq9d4V`Y1IPa# zw#es7Q5=L2KJ_N<2&HrgKO*#bh?>hFMlmx-F0p1a zyn|v5X0LkyLl~rAXbToWr%YQpab`FUfW9!p0)Y9{R|AGj`nl55Bu|>!?Ps-bHZt#8 zR_!axano?l{}-u0p-d0TL8(@H;k!=#g~F!lk84x^q_vsQ5&f9nqr>&!0glPxsJp(mscO8 z#S=j;7f2;}@TSe0Vr0YxJ%r-~GqAr=ybLFpd^93hn7Z;&kdUC%%+se2b+i$SON56` zq=Cl^6%;r=uvbhScw9Z>!+J6TPI3wt>0ZH$YWB1zqw1LUJS}eN>z(5qjw`gcCiFxh z@C1O(znI!SO{*L4g3&%4xM#0&j8lqe$hV%Rk5l#=s{WB6G+}(Z-!(>Q$JIj%U=UU^L%71ec-pgxQWZ!8 z#(SG4M{%^pIs)5Ot*wd;5mxTGt+3#jMljT}s=8}6d?r%@%+!6Fif-IX(F~m$bBCAf z52^(#G*t=pOcJ0C32FRvf|)Rq7za+_usaygp7oRR0HZ4}J?N@!=@?Drp)=`kbUq1J zS@j`iN^>mj-`2Qg@(I{mv@9coG1&Il%?BFN#f5fN3fd%%#^RjVo)E??1DPYvr(ll2 zq1yg7M>hWg8@c$6g0JEX?Rl<=vLnqFbZ?S<$Ygz3nQSF-;BljkIXlMXZnAdJ?T&?^ zd1vBIoQU^Rqhs#&Ll$G3TShd;RLMs~_2x)a8HRI25Zn^NDApC<9~GZA?{Aqwdty)i z;dC?o9ySY(R#P+x-Z@Od)u2b5_^Z0@%i<3GR+GRuv{iIIxd*xty!mIEh zkb3@0Q4Lp&9fB7{`yKbPOj5l>k2Zy5#alEX4!w;MoxHVdsg+T<>}4FLj2XawX}V|2 zv8xAQYVkGRMI`$-zyr5AVG;^?!wPs;wdx7zs9xTJLXDMT5AB$8kGCNF=YO?0ujnZ6 z@$Z2g>Ibic*r)eF)n8TXJzYSp+!^=NcKqdbcP8P`vq@oC8uWRbQTYdj2}_*m45rVU zRpxAx3yw$H3KpTp<6#JgMzF4VWx;7o%yKLwhxmt-{A9bN%I$mg>gIFv8Ovn};693j zpI)*L57K{wqrC8E^IdTe+(zx&|zyzT5xKHc{A_^c&#x?9;4lMJ$ z&^L&(t=2`;=U06a)V_1jteA4QH8^+pk8_uo!%u!dz7XHPxOr^9EoIBBedS4?kb>F` z-Z)$s9laMPg=HUE+grLigp{l z-TkG!3zi@t3&lAVCA?>kwr3_o{)2#~g#QQOHkfg*1h_E{Nm>B0VWxvp5uQ=1BGAgV zzP_SoNa@7*>~reeo1&6#4(*~&Y>3B7oa!^1PvmkPBVeB`dZx)msHU-(O~GHGD}H zigdV>SduEoXik)=k!`{A7o+eO8#tFiGf4G`5-k{F83<^Q$^=g1Doc^E9`Nwf7cU_h z&PB&~-%T~d7?Gm1mxjzpgo2uwgX3Zl33wBR_%T$|P3{-R?83@YL}aP(-oTbR`JP}Y z!EMZQjg5^>%g#+B+>80=4+jI{*cGJpidA<}N}@5ucaHKJ8n|brB4_6SOrcq z%8xqK42ISDeJk!<>DKvXSs#eU54 zYV#6R2^^N)6;Wh+fIDbNq`Tn=ic3CYFFb^Ym$bF1Ww0{IcTmh;56SfV%}>cn&5jz4 z+-~^G-}7BspRA=Z$`a9P&u#C7$>(SHsGw~)dUa$<*K8Kr$8fA~+K#%}67>HU%Kyox*tigU#(Pnaw})E}jR<$dGQcJ5DEfMI}#5G-AO zr~-hbqwzuu#OH`9BX)_?@vs8{EC9T~a^x$dWYOy&iwi{ISK@SEe~E~Ovv1^JMBo}? zhi-WORt$&%m%$ge@UAV9FHZ-AE__Smu%vvD-_7dg$+VMo>h@h?4Uoul;ta4ugF?50 z=`Ee=QvE{J`Pzl0F4AcdsMG^WRP?T66$rh`kW|cCwjqBwKSGH@_ zT6K;HsyD4XU$^*qm90G^?AvMwA1vTaj(iusNlg)F4!%zZAUtIG{Iq-s>iAoS-g>cP zXXuvIxxV1pj>#Sq4x*xD@^rI;A3Lp82Olzh&!O9s08NH0Q9V{?TD2+FWUS5u6y<^v z=FwykQhk9Xk>N%p!NAMx0;if~vD#t%Oznyz?H zNd1Uyb9h#g_$EwVO%C%Kvdi;O70osZ9aaIQ3xOR9G3{-5((L(#PCHlOWgmO=*Oev2 zJKUS8-%n7j7Q{Ii^Oh4`TY9uid|thTRL5K^Yy9C{b^hx+6oi{%y3Vk#yEctE?G}}r zveRF}lmUbZC!bgt1HS6;kou^bGEeQ0=etDK0m8G;5}c5pyDjztoe~7!nEwD`07h?w zxWBjrqJ+Hu5WJyl05XhTB~F;!VIg-2sXdMCKUZe}schgGBlJ%HB>;-8f7-1TfO>hL zcJ#V-PXl1J3Eb0nzvt#o^+8Y@xZ;4=DcJNv2#`G&0P-6Q0$>n?w%nQl5cp+Hj-3Dm z-4K2QD|)L?v3Av}u^xa`0??+HB7``lLOJ=(M%=rrTm<{6+{yW90RE zrx4trt}jE01a4{1Evl;NR$R};I^y_caX+WCc1IrOwX|wVw=dw2Y-t=@O%7M(2AgIJyGbO6<}s4j;1Ju&$;0gLt}DeI6>d>pa=ToE$mvL5=H( z8MCv!X^(}Vj|IUq8DZ}Yupd|Sk=XU23a~;k9H$!`z_cR^_h9fqR}R!$p-1{`#It}# z27t4HAjcdC`y&nqkvRiJ0VEg$S?VBu!x8|lSiu0L;)~0|NmdLJ01~(p$5GK*BmQcY z*eEnX{xT8skDin{r*G)f@&3X;2FeDBlXCNfGQArj!2-~UkGAQCC*J18D{GFPluF?bUGa_Lc;O$7Q4%oH~O^TPMYV_P%F9LHBD&HcJ*SvpM> zp)L{+W$T?5n(3W~OY-v&TR$9+PRb*ddC2<^dK+KDrs#oUC?C6!V8!$~`6Cpzo0fg6 zySw;Q83gFi<%hUO;*Ie(;CpchBI6p*G)~fU+x@v2&e#Ucdi_@>9H5ODa##u}7}W4R)jD_H&`~>^$+r&$C+qZV`fN;i?l} zEx`?aHn#$3`ZaKAOBl6M48FfWGm>((knat+03W*xRLgsXRi>TcJ>hi%@wyYk4M!1$ z^WcL7Uxs|y!qzE4Gj5USsT$9k6Le;}KC(4d_P`h3 zYElkd?2>I>A-T6tjX0Cn?FQ)#&GCk;K+I7qX`GiMHl~%lZ&xEY23b1>9X1;~=FR>| zJgkcadzkCk5;|aw)Nkj!v$sIvAnOUv9YyP)LxQRHCK%AiQ$Hr_`FDU-P(eF0$ zAzIhOa)Vq-Tv1#r&m&FQY4~J1PyF1Fqg+^uuC%Sug2}Gp2W~k_ExcP;}s^;2}5+(uMLD1*ut# z=e+12q5T$R5;36$=ywfK`mSsS!SQfn9X50~e-NicThlqeVzP-}o)r*Ur#9UB9W4Hq ztoWrv6PfRhkPe>SGotAE-@X)(l1SHSr2kcC96O-H@Q=$-p4Sl%yyZ_O0u>1L_I2nWUC zDBK@MDvA+{4k3#a#3eaBRJE)<<6(yJA+v=UMW^dX}ukGQY#RRdAKhs(HSi5BZ0!OgSgz2wujEht_FCz7X#@o*iR(e&X%o|pN~{8wAS?P z#z>-lf7O((IUMm>-1qi^-L5#bqjct&Bhv3^S#aUcjP)iz%0=m*?+EQhh zYEZ*7t>v85bx^kywOm4$UZ&i6e?5;?MfWx<4}1_2;@7s+!=}&;9$0(EQGq##G7ui>mZc9wtDq)62Sc-7k0x|g~(+Z1_Hs^mB5)(nU%m3`G~c= z(EoFmX-?;hT~5j1-Od_2Xz`60F;k+`SIEq#^dPZ~9EvMKZJ!k!HA21HwT*K5vPdOi z)k3}#<2CXEM8GxHIL|uT=4`zbi6G0o#1}k*Fv)-j3sY_e$<4%~)!A7JUO^L?XmxXC z$bI$EgrT*=Es(~oS6uir&=Nl8{@09gK8R-5MPV-t2 zoUqzha#s|nvj>8*Sg|wDcS;dYaqTREzo5I?$T#}4C)3(5W`rI!$FQ9#<%YW%qv%Pu zNX`6aFR{$B;e|Y*7IH$%uM+R&%ZJpP`hBSYSx=j#j)DB1!s-}U7b<#bS7f_gLYJ?= z_mbsEXJ~0{%2(Eq@tB<9f*`oA3uV93P*{}l`1t{Z-fnWtT+x?O4^7+IExWE!+l25~ zCgK|QCquKniS$l+p9>I98c&x?LEZzTG!$iD;Ert?rDhQDf?kDI+xO?zYUIhLUevO_ zdYgoeTs(bh{v5}6;B5Y!o7O*KUK~#XC3u;3&dS2(DWBKmdv0&8C~)%F57u#L1L6K; z4exO6c&1@0kHW(MmkqU`ssEEmk@eo!qc-bJU^Y-5QIl^Q8DbSyZh*Va<99sApmSM%K7qCC7yoyuQM1XAB7o4gG5>A7xYbtwrBSUb9hT| zllXVv-F0E@Kk(6XF-qi*ro8i}0)TZ9o48YgR{LF_J$#Tq4HQMIko+Oc0NM&v z`ZW$XtWF`mt(uvXdg;G#Ty}{qnE5-+axGb)pQw3?w$<2`c9(1oy49hF9Sh56_NEG0 zTaBUERl?es8;y@{Fw@ccO2YRndnjqZrK8(u5$i{sqa0Bdamd+OpI?d>X-V4WCguYl zuvH-c)BR-^W1QyU$#$1L$}^vpC#N5p!^R&je#r4#E`?Y-0RiEV+V@j@;u$)mR4t;c z-dGW?Jfv-jeY1=9BjUi%K;Y$*bCsUYe9mnX&jo{75-g#&3^{%^iBwH6!4*TSD@~9C z9ToYjiqHk^38G_71TAfvze`Oh%^jAz+AEf;SDLUb?aO^}r|+^Wo*lo*b=KPXgvkAi zH%QrOxeW&jNMSG>qUOj*56p_b+sF3f`a1qi!uK1Tf$M@?x&5qz?g4KNA6J=x%~i%A z!JgnVSeU}NgZM{o&fH~9lDF^>&?@BT=^nNM~zUM0yBk4 zu$L0X9~2^I&)Ve=Gibz=z`}?c77>yA%^(f+Ri6NZBCOKpM4p0EvP=r`Oy`DCI-Y}i zag)WSN|&aA)4gwbin=tQByIq|2oK?`=CVqbD90gBu267n6$38g7afmsP5CfTJTxndfvq~sQ7tn6)!LNL zC_WK}XM-g@7npo1%qT_UuhfW)ppDWX8om}9Ue0?S%5q6^#M&YL3ZZQfq+AQmsdQx9 zYW(-3I^}l_`o#0oh6nfZM#-I-mpU5%aFQazcaXwKArp`^RNvQn*$DT^wiAm2Ox;g&E4>}{Z< z=PQT`=*Yv$0^8Qh>%_I7RW#vg7gPr%ZQ!fsmis+z@U7=nk;S#4vzp*%3v7aI)_=9e zcQY(m?pe~obTo0dN>zsC%|8ce!sX6p7D-G)d|gQe@Orok^IX-1nWT2Wei(x3%GpU= zHTUJFDWID~6;kV^^O64Phhko}mwbwwaA$@=2v;ykHw#U1{XN>AZK|#Gp9s0(M>JaehI5@Q|E`sny0W$Lgmu(Ltn9BXH^GiqfQkF#30zn zZH)W~2jtM8{M%5BtIzSgu1FQQXo5KY4y|ecQyP@azwr7yDa1%$5jzJY7j4{ik%`Os zV}EKJRfGW8Hw+GP?M?Zcv@{L|tyCV$5+-Dg$?znvfYeUNg=?@{S4_C&Cna89prodb zJrS5L7HG$Z;i44wN*WdLgAi(!KDbaxl|KJ6r1LNl0CRa>q51#O*;~L>y*&TJbayvO zgLHRDcXue@p}V`gySr05C8WDUB$Skv2I>FteoMUS?>^5;oEOJ^Pwec>es=fFY@jq( zQ!SGk@#NoG;BM`X>7}HEo#netn-cu0vBs1_K%C9*yF;|?21C!~XXf1e`kIoH=mo4b+|=`BQiDpfefif- zCIneBpP$z(pI?}j`+0BVc}lr%vYbXe3M$+Z^qW2g8XUr>lo19hOAa=)`e^C;)3qXL zJ96lhxv$=~eu$DItE=qN?&HSvw!$oytb#>@aZ%4Ajr51du%Zn*gyXRCjqh`tZh>y> zCadw)((8_eG{~=rhj^sbbp4>q8g2Ut59>yfv7}_})l_IGE$J0sHFp0_ZI?imc0ZkZ zjkMEwh)KZ$9(U||V@kiRw#}I-F<&!;3D-3 zjA2*xn9`H_PV^O2_Ra|EIc7>>c38`e6hU!SoZ4r}=2JXJx+8!5Z@|+@)1_==R);&fYNZTF%9xE#2w_p^hHs7TZJxeddO?H%s5`133t%2d zxkGE=x$5uxY2JUU>YP5(v2zOwHk**X#f<3aO06e1Ns`oFX<^L$fRTG`=8Z`4l9ZvG z5EsYsE8u=N^J@;rLJ@~X=F#o-MD_HMO$gE2ClPNg$8jEl^rTmPuLT~jeexPq?wPwJ zrr@rM8$a4^8ZXQuOcj!}K5rau(D=fEu>#mbBGH~IY)drRlfRLH-nB?~Xq4qnmq4zQ zfESMaRAS|ApIHE3zlgB8MDU*E14XuZ{DkUeOM~nelB8U1y86&BL(D@h3X(=bXydiR zdejqteMPI`6{+16Yf;Oc;gD`#X%EdZ(>lEcSKC|)-}={Ad0%sbu3Iz8tK#Cd+3j3Z z4~PB+eJoGpxK26bFnd?F$w%Vv*Sluz`$nUH2_sRJ{&c;I;Fs6CR9tNU|GqD(3Gbnz zp87c6&XNiZt^+3QOBTrQ^M*O*R0=+z-ZcrY6na?E|fb{nq)523>Vsb#vY9hHsDKd@c4YsRa+0$)8i*JM25p zGLIZP2FA{;y&g;@TPUtbEWdKG9ToD31N>%?U4YB z;Gn;X$uFlihFo{h1^ zmo4Xuy$mANV60pbvj#6gRJ4e>_|0uT^#NP%jvy^)8k&iONKSm221!yW)KX#Yu3ykg zfG*=pQ_bY)w>flrc9UwvmSow%tczdrW$V+7)o)6sb;?hsw!Ty#MZAGBG+pp~mZlF0 z1D7NxkY{1!h}$wqzlapIG1V(*q)>}_s)&AgtQ3+4Ii|iHfR<`PgBk^|mA=gz!a5Vm zZ&t|8;PRQ;L=Wo4#gxM<8ukpym%dw0T8<|++Ki1 zvy(;lrTR(e_s$do6u~H@9uxe{;)h&VGi;`Ef(;@tS!O~SON5x#A$vSpa`L15~h_8fX`-&0dZ2)mOl z-7l%LFVv6yi{Y@0W?g6tib0x&6YPg^DcWco!*dk)D@KF)7!mY(3nR2+5rQR@Xe3c| z8a?_KnSqkKwoK28*CI`c?^&Q5gb2pw2==g}X;pV5EYV)I_#}my=Y8m{0W8 zdgiOFVQF1EGEmZ+GgM)NI~u-`^o}410%?&v@kNxO%t8romn%oNEV_s8gq;s);|Q%g z_!6wbYoz=)*!`}t9ihTAQ@I+t%>2v=Ci_S7;YZq+-;YHfpnnPbZJokBB}7VL-JXiuC&~i7AJbidyvUN}hh+z16!RYVSsE@n`!+ zT%vC{X+TVREcRG$X~avDA%{dWL51n#?=%7qJl(tw@rF(nKPgB6u{U3w6!D#q5*U-me0Lm*w@z9sMOP3Grbn$@(@5D1-;*_ zd%tW+lT2gqJjd=3uA0H;zd>y8zj3t>f`1SB;w$gHmJM>kmRT)PmbEr>_j*) ztu&f1lSV?ML=WT0UKm;{m)6-p2h{|HcUb%sGFA(QSRh{r>sq_ZOIX{KSht|)PNM$N zCY4U8$Q%uorJC7lk@9jVh30KTm@fgIDS8%*E3FDtoy`U7a&m;om>)HniK80mJG9-{ zHGM|R$a+L^W&MwM8v5$zuA%`OhC)h;R6elXCk}G6%;j~J0QcutLj6g{v)0IP(79x( zxNnGUFVlRblh@{6s8iy5zz#BgAwnpI-rxNAwXCCg?{Zs89)Xfs_m130pkE^5daOpI z-tkQdwj^`JU@rmLZTHDr8=C%VEgfD*R47#jUcwq*5uCKXWo#+3ywcX8g55)jsDhik z$Ql=nv1BnM5}Le1)?g1Zk+e?mDV_Ic7RMg%gAnyVnkS(cAle3@Zb2CuyRbp@4E&SV zty2-doP}H3z^=ku9^1WtVR0<#1AI5xXR(mog?v@De$j8lCz>7atZst4c2JYHf^xjckaEhZIXkdrND$dM zSQk<3F#6;RM4SyMtZgkRtOz+zTMIdqGfos}S_3M^ccR#dsV?aG~1u zx0%@WFYnm(-*F1U9#*Uyk$z(A;K+fdDA_{LwIAkg7wmAA$$F!7q4X{xhS~uH9wP>o z?fNU#5j&E0Q`kA>#jkyfbIa?FGmTf_ zUll0bpjq4i77kz3LyEev@xiZnBFg@G;^HAj%1_1}ZBz2~;aO&48b;Vhp#P>W7~~lWn07 zTkTsW2;@;MRbJLnCkRLp$G>t_94NWBtmbhafUI7mECb691ouiZ3wJx#7U+X*Lpx;S zT(l-1>ifdJWhYniE&db-IkP@v53jHHlu~jZX$Re5c!5LAs)t2RuE*GI(bSFCeUNuI z+xk7nV;IjvP~_FtIqsHicEg~x;wB=6_2`hWdgODVR3t#Z^$k}Kzg)D76! z{tZQ-X2r<>!8BybJ9W&Mth^%{*=6vQnF%SAQ|PfR`8t~6Y$uU2X?~HwuDH{<3*((& z!u*@-le11(qoeqU1QH0}G?SPqF#8p>VDFP#QkUS`Z?PwKRozkxX=v#MXlC}|kNgXv<7*@GozE!);4XuqK_+V2H%)m zPCTRJ4b>?`SQrM{y)Z&lbN#4`%MUAz_t!HodCW^K;SuE5iSV$l6vaGzqC#$W zDNdeq)AP&ZuS2X*5asdoitossFtw^j%m}Y*x2IFAp1|I{c z`01q5xMVF~c*S5PFkp;y?R-^4L|tvLGjd^exBZ%il8l4dzX)5iScXPR$dM;Tege5j zpH*aZf^dlODo^eNbCEPlRKY#Ut@jKAb=yRtO~Up>mYYJ)?`tPyd#8wV_|YVEuk)_* zx_eSbH0kxdMC(_kW|vZ`cW0Kdpd0Yue`3Ob4$~D6z;!xj962qt zj=&w9I&3-nTjhgi6cXtX!(v{AxCJfA*WSlR_ph4ay$n+}ye8PDB;alc@69z5a>wd9J zn3_%>^$$jn%&BcLz|XrewxoP9$DSksemI(ymDEMeOjJz^Uct8nd}f(uHbs^dvUjdX zM<`#$%vaq{ihG8X9xA$^($As%xh{3(>~IOegrgRhZZ4#0dSic_4LRp0q$8>rRIgg3 za#hEa^hOCyHk>yIvT;La;bGfFrck-WcYIKa5u}xJ&cvx3^&?%kn5&*Fv~`e4AK^wU zd;yKVS7v6DZ%55@DPF*T(84OK*9=)Yj$afhGT1$8((AJzFVgz}XH{ zE66!C;C6@U5^M))7;4&Z7R&@`7z_9tw3(SIT_So4^tGo#V=PfV(fi>X3o<@2QXg`m zk5X}aF#R@xdjlryy_%t7-gBqigOm0nEoP5+w z`7ma?>xK3U$Fzub3q?y5!I#-V*n&T%SX7<+3R=v0BRvXV1dhezMV9yo>m#`0@CeVboOg258&#{u0Ss z2sZO*rSePDbfQ*TK99L|`r>;Rdi7}eFO~uwW~z7DK}6T|^Si21mc9n6XX*|o0KUgZQOf1e43g*f!)+t=sZ%$?vyDq{vV{Y%fJQRhvuwEBy z6VYz4R*t-^#l+w!>CX0iJ5F*^{{tw}cy25<}J)%d!8 z=KNER_}V2#i@O{aF67-t_rwqhrPq`O)(M(g7DnvX84|boo6BUrtEh~T7M_Wn{)K4N z{*W@J;fXv*hf|1JACgWVq7lC3m#msVJTq6~jKgqoI+`+2x& zrx*3bUO81>a<>*}*uD_nJVSm&hp*K%$5e?IWadoMWYb0Exu+-oy+Tz!@jl{tnHSr= z4)RphIhK%Hxy?RLpu9D9`1!bn270_cBFj`&_o5Gu%KSU1d6ADVuONnrpBHbuxyF{f zea3YWzcmpv&&($XU0lKF;8(+StY)98u=`%ivXEu)46HMsY@OklhZL=}V0z10t@WP% z?OXaamqX_@ZV1=)ei4$$*4`5Qt36EdOE}i&cJV}g`(Adt{;cxRvAM(~okYbno*`uy zWF#LL1JCzMW#y;n`Goa718&aociiq_R`NbeH0gUKd&R4aHlVZ+ORcew1v)fq^)lMm z;Z=lf5!osb_|@K>v!f2A1=-XqBS>B$E5=I;|P92%v9*h-TWMr?tnYaH_g90CUKH&&aV%t6tq>gZm}Rc?8pfc{xb@Ex|Uo>>Fu^ z6f^$3u|OMtHV2=w66`Nw)BJb^&$&gciy<~UV!x3#`MA3r>(E3Er|jI}f8QOZ_{dRX)LEpjTUpvHss^<2}su;Q)(Lq6i!nwRRtCh0W!qT9M2f{P# zg7pkblXzu~O^23fGVyNq{&XeZ-g9ts@0EOm6s;ujUrlgGz{P#F!x`l@!|(w55a5_; zAK$1%O`PUF&hK8;KC!re@NhH1@foZ{SWC!sIAp#kC~BwY%f^jDWfD<2qI}f8ZW^w= zkm4#|9^srBx)!t65_P*y^$U&It5Vc~W$4y-;IY21wX69SP}On*4rD+ezbc4}SGzwW z*oA|G2UWieT-4GATd4#e%M9k$3Y?<=*=0i6;kS_hfg!D{V7AxPQj$~-r*stdt?W$2 zx%MBQnyR;=p)v`f+%Q7N?s)j3pjrj?uBz;Vzd?m*eYEB;y$kiWU_{(!YZSo zt&+kgCwrtlObju{I$3$Ku@n(RYMZHP!@UiWXUbhk%VpiF8vt%aE-@1-g_y~C099EJfb>;b;tv(q_4F?=!g#bi4=PYH+@4K&lmRK zSMZMImdQb`raT|waz$Q7n}zcFc7^JDE=Lr{(a*k{@U+{{~cu_Q_#izNx=A&>4pXx~cT;xt!|MVV|jb58N;GACe40(5+4!F7Vj@b2Y3N z=nfei>4}H-EO|q{xN{=Pio95=TCz$p0Ngl$Ri*kAaFZQLhD58o(a}? zB2ax!-Qb~?%-Q}`Bh*mAO>XQ#(*7P7Lq`7^lU2lM(q-Kjw||Z_?QQBxxcJw4R6u3Zf4jw9=(rtLu3=y z=@8VZl2YkQ#MrZCD@O~J%E7#=3>A>pif9j*^y^}&+&ZGpCV}uCvYPRu${MvquPU=j znme$KTw18Mi=;qciXpchns@AMx4r`5v91Q`-?Dt_+7;P7V|2`OO zPa3hu7extnznks}qowu|MUu=YHWU^?i;LkDBy$B=lSmtpkFW=%B(~Kg4!$#L4?BD< zW4Dn7POu80s-lPy1C~Lf1PQs!X|w%4QNV{Ka2Gdnp42t*1E^{xYiHJ1Yd1GH=ZmFI zfjbxw$~7e4`yei2rc;i3zlOXm(!EmyvUiOI)QgT|v*&(az+OJH5pFLOlPcpUmSj=K zW<`gY5D310~_u2oubKM>O1x<%Z@C-9IRUke<<` zW{b=12l1^&w`EVtl8ID^e&)+B{-semM9}5$n7Fy3tBs*`kESp2eQ&do_QN{;xH{?K&X{RU})gOn}phtqDQXu1|6e`Vzt_b$7 zaJQK?HgTp@%PPW0T@Fw?rJ^H6Swb$C!$mGTwFWcKnANWdV9t_#kz)N)iR`=}9xW@e z9l0y(*(6%DQLO{7)M_y*iU<(F-okPOH}q3|JR=7c^Ezg8W%MX$S-p*Q?~5`+-=T+x|1Fr-JAi z;?U-~%`!Ox*D04o6sxr~P_&tED`!Uy3T<)+m`&h}+nO1PoJMN(wID8SW&_O1!qW;< z!OjMK?jRHne97@>_w5&HuquggScY#h1o^^dDSN({k_|XLXIIvn9>uHr0+I!q)iaL; z>O9pto_RaTrJoXRX1d*&_U=U!N2^3Rq~#C|-p%D3t(7M3rSjjFf4Af{HCPzOUX@g)T2sF3r>&>qYu{X<*l2^> z#zE8$;#q}rQ(L$<=hxU9$#u6Yf#pc2syDhZRd$W0^-?lFC&{i%vmgz>9XHH=eeSUq@O;xX z;lkg!s(A;8e~zhp5~SZO@#wv}gV`7ctrz%#^Ndyu{&OVL!D-rRw1wcnGy4PM-&qX*fF+LP_3AhzPqkSg{AM#1QSQ-yaU% zhub2nkXTr8Y)?<&M5Wn4e*E-JRL6Hba0f1TWNk7r66(A&@G$ts&+h9|MVVGmBa(v*MAP!Fws9sYaG4@h3Jw1uyU_yABBld)HyN*6kPNEZK=6yD zz$@V>%&hkqGWMUS&vVUNpgaBHeLr0=>`S0NC+0ZP0Aq2w!zKDGw;^n80Rh&ukJqB_m;cp#0k8YtvGX(hh(-` zC?zVSEs}E#l_(6RdtQw1wJ?Z_gy^^5kG4xBD7syUM1SW~EPmK$OhIB2) zFBmjFqr%fy5Z3Z0?C}CFhA$9Bd-QAvADO zlY30j=I`%0iu3k-h$3NX_5nrBi0J*E{L)bl`T~XE*I3lk=eU%_v9PC1sn4@x$~Tqc zD6>%`Jw%5$&1sE2COeAPD5T>1D{1|f?*@&*uK0>WzKKRrbJ z`ihCYsj-#5gQFdQ0UY=M0{uO$HwZ8h(O>QN4;*{VGg>Qu2?ItYcY!wyKK^h^;+LuzJ1})w9&Yg3-Qnu z5E5N7^#-TTG(wfxK!F+twzr9iUlCnF_?cg52n0GSD*C?j_@N;Z`U>&(C;Rauckj-l zPtlLuZUpZM$Z*RH-4^io7fYbfwdd%o(2g$pN%dtV$sFgFl9n3Eq$K9(UL#ya4rBG< ztJYUHiI!--HYmY)uGLeBGkn^}onWE_!1V*eheC4EN4%}7sUc0qDt48wijJFi^;E3Hg3GtxJgbVNQ%M- z4@fr}a{Oe(ROvEyP<^4_$h;T9CrA+ytQMcbA57kaQ@I8#`%n|K#CD-gz1(X|vVy96 zfwHQlY>PfnbYc>(Ypb8B4<=q!6&JDFoErYQdlgp}3u}3p(G*^QweMVye6gclUbk>Z zswz#v5tm-WG9{EI$nN=OG|h>NA#G_Oj?ud^F?A-~xnmpqI;v8B(dhDJ{GMs=UH8`s zV>w~2OiN<*DLssOy?B#UfmY-tVJ?7CmW&BXJUHHLis+X?6F%5qlq^+;e2rc=;cHmR zIeugxPnDpBXJB6K%k1$n5Y&&z>OQbqTKSM;r^N`$(~c4N?f4{*WB}M4c+K( zsrutd^{ra7Nk_mt#RxUha`vDau=OUwFe1g)yns*UC}sJRWU_OR80Y#{PyIA~sizHg-M8 z(&bl;$VWwPR<1?$K~R!3Y)WCkRo!${G=c^ZSzEdZHH(*xoxg26P5_G}@$^eb(Oa2I zCsYB~-ERXdSSoofN=6KpmNf-LD(NJOn2Eh!p&bWg!=179@g}dpUhr|ul!hLYxGgOQ zKYv$_oO|#rGc0GO4uRrCK87)QbNjhmF>Da)us(FJ6|E+q`@yA$f=3c>d4}yOol1L6 z($KDbAR%a`!vsZpjDN_m%2C2pZI>xeeYR6KFOxRegfn54H4 z^*Ueu_SrCOg$j}yskRfBJEMRyq@|gL*{oBN8$^As1IB_^qUM|n zHo_DuR)|Jd(oN&Cb12G#HHkqh1}!^mwtnQ+TMf#C+D+tO$dKoP)xJY%&VYIfrRkO4 zdh$GuGZ5OA2ziafX%+bZjnAy`K`riqdC!W;bt!_!lrr{y3q#1qAotTIj|-yn3?CD6 zXQ&>QVupIjgzW5n`{vjrz-0Ua+v_DKyp*fA>|Db6-pg)wP(G=nfgsc()}yx=F9LY* zAKCGt!sX(@@lm&1s4uLxto~W)*UVLdq2Kh^V&qSbAS6` z!jK1&!N-VcW?%OUQ*xY3tXax_MjQn+7F0% zUHTt`cYDo?F-RJ;%JU0|YmIB!A{(eR`i%Uan~@r4Qu6{*5n#sz;c>2`=-u49qoYqkc&>zbXyf4kM%tgBhE&vG+|C zP@n6rIeW0yX|==bX#>t^Bj7F2z24x_*OJlKSbj@Ae9vHk{kFROTOwan`GMr!cvjKJ z)i(#EH;5>x9B9R37^0Bus2i1=!Yoc0CtPXl{2akHK}K@(|I`?xq8J3ygl#FWShxdPJI{%Y+YbAvDAwzZqvTEyjV_`dzpYv3 z`3FPAldQ0N1m}HR^{Uk6Gs0Nv1}nhI9N&nf4^sr+7ieBXo4TZr2sW??j4}ihx^bom zb(t0`ywzDDqu^_NwPjb$nrs0)%GDK=9hvV@dI}%qWuFZ(VtipCvqN^a>w8BIl|-0h z^p+VR$*IY1XG;~QU?o}leJwOWYSM4jud>WHY7C0j&N&C-ix}O{A2GHlI_W(4ZFJ|GJ zEcMlJv1Y1fI^NE{(UsLlI*UvB;>1)|@U>SowYfLuyn=C%F)M0^BoI(Bxa_Rl9MOc? zdz^K^nOFV#69!HXyes1vhRJGN_~u$p)M6(nM1DqVN?O```vgkqd_8F} zOh4rymfk(#poadI)soVZd_}c8%FvI5=Z&W@u`kZO7q54KeVSW`uX&nVuCJ)<9A0x? zDK>t}{459lB2q)eo+Rl*=BdB?r^4YF?T)*(&^fXpOe< z4*Yo%Tp^HJ8hv!nnupRBQb<0(H4;@$=-F|;!!){zI*v-?@9hp0euHFt=;~w89Em@6 zuf7Vv-PMh^dKKtSw<0*9 zsWWpHk$@p(?e@V*b7E?gamG+W;#iB52U#cgS8mNEI)DAww{(yZ+_;qKMk4P8SKv~{ zS>`oI1OXKL+@(Vu8Sxp_y5Q--y*Z?14*L=5wx@aBCOgdK4bOW<;$8lckh%=bbf z!~F4IQVC`HzIGaZeioIxH27K`1QWzC-yhAw9!AIl){w6oGr$rq9|K@8XYhJPGgid( zc40Z9@*c0`72UM5dK4V$vfSDHXhdcjir0;yIXBz}aAQ2;vn**JqcsjLW-L>yq++t~ z;GPTkTvq5qLE_MPq5aEAzNv-ELYWH`A$FH%{?u*)mgE+YWVzyYERE?X-z?C%)hyAB z+|@&+4p|jcapVd=1&tR=Sm8WReRq5IUfeQFBJ}YT^wt>tQf6MVd`!x^Zen=hly6X` zZsH}n6U7=gOTsw+0f)kVCc3d0-G`d~cqcujs(mec=4LZ4c{|c0F2K8v6-L6#GPU9X z{@ofMvd1Q0dc~V1DaTb_5%;8Q!&vmB5wc|IZIaJhAM0sfR5>^}8dsAO4S-Ie36F|; zyZPCb2WKSkWuy}s#}%FoDsbzQpM@%<=f*mW%PxS+O^e~*LzI$)gfzqJB!)CzW<9^y zv|Ovpl5(}a<{53Wf@lWo{(@%KkA}mMy(wVjP%`+cit%J&=*+)#v$81yAubcMT3=deFd7%?7 zd_6f5EM8AO5@8ki&|LUwyqSTC9Ii)DN8HI?okBzUOn(qU{*+6VtgGz;&p260!HjpPm8$PbR`Cv;Jbq17XPpY`0Jp#hx4-~i z+v?&+J z2@W?5xooqcnD?f%7F?mH6h|xC9a!+YSn&GDmu_1N#oBiX-R36_gxV|fe1Qa~-0iXz z&Mxo_dAP|Qz>M+bqD^PQ=9{Z;rU=?bwzu-QTo#JCR%dusN?a|7#=11eeG}6@Q=UT4 z-l4L2d!UcCr|s9Tn;HA_uAex5N|1LNq$g_Q@!4#N0itf-s*wH~)Wtz+O z=f-X?GcI#{P-lIE*#|Db^ijb6K77Kgp>bF~({HJ?NU_C7>wQ-u%dYw>kCum32U9NS z#0;nt(dI!{j6rYIOU!qaowj$6a?JuT7sbe}Lc&+3JCrJO+5k+yqf*ID&=Ei0R|S=i z2HKC2+;=^f_aDTk%2tR^NapX6zi%(FS8dQz0e8P85x%=&P%sRTe-#s3KpobpSgUS! z0xs!%f!`P)z)C((nyjdb5S^r)7=x^koTQkjvI@Pd*u(JOQNDNj&jtq2*Z5{ zn_5`d{|(|=uD$#Mn2??n0|Z3$2LKR|ir+%$8=C7I161vSKkUgwT^#K64IQlQ$o~3+ zEWp9U`puJ-^3M`eT37381C^5i)GOB$k{H0ljHeR+hwCRKg)Q{$?Io>@tp7*`kt58; z0~o3^ftyj1Psq#z_52UW{3Mx>m6f%FzJsZ?)gQ^~+bo2)0aX_bB+c-I>=4>NA`3j{ zakOyw*Iw7ps?$i}GwTM9Rs=c-h{zN0b>R5@f0yYmP5x1wF5|``Y~Y^XHXjHG)f0+X zg#U!%ujP0(9bj+^R8TX}1iqKj`d{-0TEst)L)p~f-zQf;tLS|)H?Im1`T#iWV*kSh zzWN9h;U7T%H0(dBh>Ypi;tUdNWqhCZut2N+{xtc&mMip+X#Urte~nKeS! z=h&y}0tH&ZKafet&iLQO)PGi2I4z2x3DBCxUi{4Z3#oqu{a8( z2>fRlp9a86(|=pYe@lV>sV~sG{?PXS*7tiE(VzR@&w9IY0VnM_&{Lv0W0?NYu>}RLhZuMu_A0G&RW_H-I??D*o*_I$dK**mkYv=h(R0jt; zQv*i_0GTr27xCmfqj?1=lP5sg^MD@U`4dPO-#Z)Xe;buk3ke2@RX7XC2b>sS(~%YL9PpL&$KzTEY15&vXCdpo_ecR3KRK&zXLQ_S4nSpCP|<1axwLwWp$r18a}{efs?GI?vBYKOWD1KQj4W zi~m26et9wIXZRn_YMB3r4Sa + +Windows equivalent of package-usages.sh: print the distinct methods of dependency + that the project's OWN compiled classes call. Scans every moduleClasses +entry in /project.yaml (class dirs or jars) and keeps only call sites +whose owner is in , deduped. When the modules carry a `packages:` list, +only classes under those roots are scanned; otherwise moduleClasses is already +project-only. The separate `dependencies:` list is never touched. +#> +param( + [Parameter(Mandatory)][string]$ModelDir, + [Parameter(Mandatory)][string]$Package +) + +$pp = $Package -replace '\.','/' +$yaml = Get-Content (Join-Path $ModelDir 'project.yaml') + +# read a YAML block list — the "- item" lines under : +function Get-YamlList([string]$key) { + $f = $false + foreach ($l in $yaml) { + if ($l -match "^\s*$key:\s*$") { $f = $true; continue } + if ($f) { + if ($l -match '^\s*-\s+(.+?)\s*$' -and $l -notmatch ':') { $Matches[1] } + elseif ($l -match ':') { $f = $false } + } + } +} + +$roots = (Get-YamlList 'packages' | ForEach-Object { [regex]::Escape(($_ -replace '\.','/')) }) -join '|' + +$out = foreach ($e in (Get-YamlList 'moduleClasses')) { + $p = Join-Path $ModelDir $e + if (Test-Path -LiteralPath $p -PathType Container) { + $base = (Resolve-Path -LiteralPath $p).Path + $names = Get-ChildItem -LiteralPath $p -Recurse -Filter *.class | + ForEach-Object { ($_.FullName.Substring($base.Length).TrimStart('\','/') -replace '\.class$','') -replace '[\\/]','.' } + } else { + $names = & jar tf $p | Where-Object { $_ -match '\.class$' } | + ForEach-Object { ($_ -replace '\.class$','') -replace '/','.' } + } + if ($roots) { $names = $names | Where-Object { ($_ -replace '\.','/') -match "^($roots)/" } } + if ($names) { + $argfile = New-TemporaryFile # pass class names via @argfile to dodge command-line length limits + $names | Set-Content -LiteralPath $argfile + & javap -c -p -classpath $p "@$argfile" 2>$null + Remove-Item -LiteralPath $argfile + } +} + +$out | + Select-String -Pattern ("// (Interface)?Method " + [regex]::Escape($pp) + "/\S+") -AllMatches | + ForEach-Object { $_.Matches } | ForEach-Object { $_.Value } | + Sort-Object -Unique diff --git a/skills/discover-attack-surface/scripts/package-usages.sh b/skills/discover-attack-surface/scripts/package-usages.sh new file mode 100755 index 000000000..6c8f4adbd --- /dev/null +++ b/skills/discover-attack-surface/scripts/package-usages.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# package-usages.sh +# +# Print the distinct methods of dependency that the project's OWN +# compiled classes call. Scans every moduleClasses entry in /project.yaml +# (class dirs or jars) and keeps only call sites whose owner is in , +# deduped. A model's moduleClasses can mix project + dependency jars, so when the +# modules carry a `packages:` list, only classes under those roots are scanned; +# when there's no `packages:` list, moduleClasses is already project-only. The +# separate `dependencies:` list is never touched. +MODEL=$1; PKG=$2 +[ -n "$MODEL" ] && [ -n "$PKG" ] || { echo "usage: package-usages.sh " >&2; exit 2; } +pp=${PKG//.//} + +# read a YAML block list — the "- item" lines under : +ylist(){ awk -v k="$1" '$0~"^[[:space:]]*"k":[[:space:]]*$"{f=1;next} f&&/^[[:space:]]*-[[:space:]]/&&$0!~/:/{sub(/^[^-]*-[[:space:]]*/,"");print;next} f&&/:/{f=0}' "$MODEL/project.yaml"; } + +roots=$(ylist packages | tr . / | paste -sd'|' -) # project roots; empty ⇒ scan all moduleClasses +ylist moduleClasses | while IFS= read -r e; do + p="$MODEL/$e" + { if [ -d "$p" ]; then (cd "$p" && find . -name '*.class' | sed 's#^\./##'); else jar tf "$p" | grep '\.class$'; fi; } \ + | { [ -n "$roots" ] && grep -E "^($roots)/" || cat; } \ + | sed 's#\.class$##; s#/#.#g' | xargs -r javap -c -p -classpath "$p" 2>/dev/null +done | grep -oE '// (Interface)?Method '"$pp"'/[^ ]+' | sort -u From 627382b1252c13f6f96df22c2dac3a2c92f2c4d1 Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Wed, 10 Jun 2026 19:39:50 +0300 Subject: [PATCH 50/54] feat: add npm installation to skill --- skills/appsec-agent/SKILL.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/skills/appsec-agent/SKILL.md b/skills/appsec-agent/SKILL.md index d6007093a..312742473 100644 --- a/skills/appsec-agent/SKILL.md +++ b/skills/appsec-agent/SKILL.md @@ -19,9 +19,16 @@ Keep every artifact under one `.opentaint/` directory at the project root — mo Before anything else, confirm `opentaint` is on PATH (`command -v opentaint` / `opentaint --version`). If it's missing, don't proceed silently — tell the user and ask to install it, offering the command for their platform; run an install only on explicit confirmation: -- macOS, or any platform with Homebrew — `brew install --cask seqra/tap/opentaint` -- Linux without Homebrew — `curl -fsSL https://opentaint.org/install.sh | bash` -- Windows (PowerShell) — `irm https://opentaint.org/install.ps1 | iex` +macOS / Linux — try in order: + +1. Homebrew — `brew install --cask seqra/tap/opentaint` +2. npm — `npm install -g @seqra/opentaint` +3. shell script — `curl -fsSL https://opentaint.org/install.sh | bash` + +Windows — try in order: + +1. npm — `npm install -g @seqra/opentaint` +2. PowerShell script — `irm https://opentaint.org/install.ps1 | iex` After installing, run `opentaint health` to confirm the autobuilder/analyzer/rules/runtime resolve. From 7fd712711993d23fb53b9025b02dbab99c734515 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 22:59:12 +0200 Subject: [PATCH 51/54] refactor: strip added comments from skills branch code Remove the comments introduced on this branch (Go, helper scripts, Makefile, release CI workflow). Preserves go: directives, shebangs, Python docstrings, and all pre-existing comments. --- .github/workflows/release-cli.yaml | 7 ---- Makefile | 3 -- cli/cmd/analyzer_inputs.go | 6 ---- cli/cmd/artifacts.go | 3 -- cli/cmd/health.go | 15 --------- cli/cmd/health_test.go | 7 ---- cli/cmd/java_runners.go | 4 --- cli/cmd/pull.go | 2 -- cli/cmd/root.go | 7 ---- cli/cmd/scan.go | 23 ------------- cli/cmd/test.go | 7 ---- cli/cmd/test_rule_reachability.go | 8 ----- cli/cmd/test_rule_reachability_test.go | 2 -- cli/cmd/test_rule_run.go | 32 ++++++------------- cli/internal/analyzer/testresult.go | 7 ---- cli/internal/globals/artifacts.go | 4 +-- cli/internal/rules/refs.go | 16 ---------- cli/internal/rules/refs_test.go | 2 -- cli/internal/testapprox/testapprox.go | 9 ------ cli/internal/testproject/testproject.go | 8 ----- cli/internal/testproject/testproject_test.go | 1 - cli/internal/testrule/testrule.go | 10 ------ cli/internal/testutil/testutil.go | 14 -------- cli/internal/utils/bundled_path_test.go | 6 ---- cli/internal/utils/copy_file.go | 2 -- cli/internal/utils/display_version.go | 20 ------------ cli/internal/utils/display_version_test.go | 1 - cli/internal/utils/ensure_rules.go | 4 --- .../utils/opentaint_command_builder.go | 3 -- cli/internal/utils/opentaint_home.go | 29 ----------------- cli/internal/utils/tier.go | 3 -- cli/internal/utils/write_files.go | 3 -- .../appsec-agent/scripts/sarif-to-findings.py | 7 +--- .../scripts/package-usages.ps1 | 13 +------- .../scripts/package-usages.sh | 12 +------ 35 files changed, 14 insertions(+), 286 deletions(-) diff --git a/.github/workflows/release-cli.yaml b/.github/workflows/release-cli.yaml index 0f0d27428..8fe97c06e 100644 --- a/.github/workflows/release-cli.yaml +++ b/.github/workflows/release-cli.yaml @@ -169,9 +169,6 @@ jobs: mkdir -p lib/rules tar -xzf /tmp/opentaint-rules.tar.gz -C lib/rules - # Mark this lib/ as an unmodified official release for these exact - # pinned versions; the CLI shows pinned versions (not "custom") only - # when this marker matches its embedded versions.yaml. cp internal/globals/versions.yaml lib/release-versions.yaml echo "Bundled artifacts:" @@ -188,10 +185,6 @@ jobs: if: ${{ steps.release_version.outputs.status == 'succeeded' }} run: | set -euo pipefail - # Build the jar, embed it into the Go binary (go:embed), and bundle - # it into lib/ so `opentaint test ... init` works from every install - # channel (archives, brew, npm, and the Docker image built from this - # same workspace). (cd core && ./gradlew :opentaint-sast-test-util:jar) (cd cli && go generate ./...) cp core/opentaint-sast-test-util/build/libs/opentaint-sast-test-util.jar cli/lib/ diff --git a/Makefile b/Makefile index e823e8235..c1664478c 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,6 @@ INSTALLED_DEV_BINARY := $(BINDIR)/$(CLI_DEV_BINARY_NAME) all: core cli -# One Gradle invocation: configuration runs once and the jar tasks parallelize. core: cd $(CORE_DIR) && $(GRADLEW) $(ANALYZER_TASK) $(AUTOBUILDER_TASK) $(TEST_UTIL_TASK) @@ -42,7 +41,6 @@ core/autobuilder: core/opentaint-sast-test-util: cd $(CORE_DIR) && $(GRADLEW) $(TEST_UTIL_TASK) -# go generate embeds the test-util jar, so the cli build needs it built first. cli: core/opentaint-sast-test-util $(MAKE) -C $(CLI_DIR) build @@ -65,7 +63,6 @@ install: core 'exec "$$BIN_DIR/$(CLI_BINARY_NAME)" --experimental --analyzer-jar "$$LIB_DIR/$(notdir $(ANALYZER_JAR))" --autobuilder-jar "$$LIB_DIR/$(notdir $(AUTOBUILDER_JAR))" "$$@"' \ > $(INSTALLED_DEV_BINARY) chmod 0755 $(INSTALLED_DEV_BINARY) - # Pull any assets the local build doesn't produce (e.g. the Java runtime). $(INSTALLED_CLI_BINARY) pull clean: diff --git a/cli/cmd/analyzer_inputs.go b/cli/cmd/analyzer_inputs.go index 6328a6b62..3290a995f 100644 --- a/cli/cmd/analyzer_inputs.go +++ b/cli/cmd/analyzer_inputs.go @@ -4,10 +4,6 @@ import ( "github.com/seqra/opentaint/internal/utils/log" ) -// addDataflowApproximations resolves each --dataflow-approximations entry, -// auto-compiling a Java source directory into class files when needed, and -// registers the result on the builder. Shared by `scan` and the `test * run` -// commands so the two stay in lockstep. func addDataflowApproximations(b *AnalyzerBuilder, paths []string, analyzerJarPath, projectModelDir string) { for _, approxPath := range paths { absApproxPath := log.AbsPathOrExit(approxPath, "dataflow-approximations") @@ -19,8 +15,6 @@ func addDataflowApproximations(b *AnalyzerBuilder, paths []string, analyzerJarPa } } -// addPassthroughApproximations resolves each --passthrough-approximations entry -// to an absolute path and registers it on the builder. func addPassthroughApproximations(b *AnalyzerBuilder, paths []string) { for _, passthrough := range paths { b.AddPassthroughApproximations(log.AbsPathOrExit(passthrough, "passthrough-approximations")) diff --git a/cli/cmd/artifacts.go b/cli/cmd/artifacts.go index 08e773777..e033a90a4 100644 --- a/cli/cmd/artifacts.go +++ b/cli/cmd/artifacts.go @@ -9,9 +9,6 @@ import ( "github.com/seqra/opentaint/internal/utils" ) -// ensureArtifactJar resolves an artifact's jar path and downloads the release -// asset when missing. An explicit Override on the def short-circuits both. It -// is the single provisioning path for every jar-backed artifact. func ensureArtifactJar(def globals.ArtifactDef) (string, error) { path, err := utils.ResolveJarPath(def) if err != nil { diff --git a/cli/cmd/health.go b/cli/cmd/health.go index 35b0ec848..0954bbbf4 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -18,7 +18,6 @@ var ( healthRuntime bool ) -// healthComponent is one resolved dependency in the health report. type healthComponent struct { name string version string @@ -26,7 +25,6 @@ type healthComponent struct { present bool } -// healthCmd represents the health command. var healthCmd = &cobra.Command{ Use: "health", Short: "Show resolved dependency paths", @@ -53,7 +51,6 @@ func init() { } func runHealth() error { - // No flags shows every component, in fixed order. var requested []string if healthAutobuilder { requested = append(requested, "autobuilder") @@ -76,7 +73,6 @@ func runHealth() error { components = append(components, resolveHealthComponent(key)) } - // A single flag prints just the bare path, for scripting. if len(requested) == 1 { c := components[0] if c.path != "" { @@ -114,8 +110,6 @@ func runHealth() error { return nil } -// resolveHealthComponent resolves a component's path and presence. Only the -// rules are fetched on demand; the rest are reported as-is. func resolveHealthComponent(key string) healthComponent { switch key { case "autobuilder", "analyzer": @@ -129,7 +123,6 @@ func resolveHealthComponent(key string) healthComponent { } } -// resolveJarComponent resolves a jar-backed artifact (autobuilder/analyzer). func resolveJarComponent(kind string) healthComponent { def := globals.ArtifactByKind(kind) path, err := utils.ResolveJarPath(def) @@ -137,12 +130,8 @@ func resolveJarComponent(kind string) healthComponent { return healthComponent{def.Name, version, path, err == nil && utils.PathExists(path)} } -// resolveRulesComponent resolves the built-in rules directory, downloading it -// on demand so `health --rules` replaces `dev rules-path`. func resolveRulesComponent() healthComponent { c := healthComponent{name: "Rules", version: utils.ArtifactVersion(globals.ArtifactByKind("rules"))} - // EnsureRulesPath returns the expected path even on failure, so the report - // can still show where the rules belong, flagged as missing. path, err := utils.EnsureRulesPath(out) c.path = path if err != nil { @@ -153,10 +142,6 @@ func resolveRulesComponent() healthComponent { return c } -// resolveRuntimeComponent reports the managed JRE the analyzer actually runs -// on. The analyzer's runner never consults a system Java (it pins the managed -// Adoptium JRE), so neither does health; when no managed JRE exists yet, the -// reported path is where the analyzer will download one on first use. func resolveRuntimeComponent() healthComponent { c := healthComponent{ name: "Runtime", diff --git a/cli/cmd/health_test.go b/cli/cmd/health_test.go index ab5e558fa..fbc1ab5b6 100644 --- a/cli/cmd/health_test.go +++ b/cli/cmd/health_test.go @@ -18,9 +18,6 @@ func TestResolveHealthComponentUsesAnalyzerJarOverride(t *testing.T) { if c.path != globals.Config.Analyzer.JarPath { t.Fatalf("health analyzer path = %q, want override %q", c.path, globals.Config.Analyzer.JarPath) } - // A jar-path override is a custom build, so health reports the version as - // "custom" — bare, since the path is already shown on its own node (no - // duplication), unlike scan's single-line "custom ()". if c.version != "custom" { t.Fatalf("health analyzer version = %q, want %q", c.version, "custom") } @@ -38,8 +35,6 @@ func TestResolveHealthComponentUsesAutobuilderJarOverride(t *testing.T) { } func TestResolveRuntimeComponentIgnoresSystemJava(t *testing.T) { - // An empty HOME means no managed JRE can exist; the analyzer would download - // its own JRE, so health must NOT report a system Java as the runtime. t.Setenv("HOME", t.TempDir()) c := resolveHealthComponent("runtime") @@ -58,8 +53,6 @@ func TestResolveRuntimeComponentFindsManagedJRE(t *testing.T) { if err := os.WriteFile(filepath.Join(jreBin, "java"), []byte("#!/bin/sh\n"), 0o755); err != nil { t.Fatal(err) } - // Without the .versions marker the install tier is stale-filtered, matching - // the analyzer's own policy — write it so the JRE counts as current. if err := utils.WriteInstallVersionMarker(); err != nil { t.Fatal(err) } diff --git a/cli/cmd/java_runners.go b/cli/cmd/java_runners.go index 7d8b8d1ff..560e342e8 100644 --- a/cli/cmd/java_runners.go +++ b/cli/cmd/java_runners.go @@ -5,8 +5,6 @@ import ( "github.com/seqra/opentaint/internal/utils/java" ) -// newAnalyzerJavaRunner returns the runner policy for the analyzer JVM: the -// managed Adoptium JRE pinned to the default Java version, never system Java. func newAnalyzerJavaRunner() java.JavaRunner { return java.NewJavaRunner(). WithSkipVerify(globals.Config.SkipVerify). @@ -15,8 +13,6 @@ func newAnalyzerJavaRunner() java.JavaRunner { TrySpecificVersion(globals.DefaultJavaVersion) } -// newAutobuilderJavaRunner returns the runner policy for project compilation: -// system Java first, then the user-configured version. func newAutobuilderJavaRunner() java.JavaRunner { return java.NewJavaRunner(). WithSkipVerify(globals.Config.SkipVerify). diff --git a/cli/cmd/pull.go b/cli/cmd/pull.go index 230500a08..ff95db860 100644 --- a/cli/cmd/pull.go +++ b/cli/cmd/pull.go @@ -77,8 +77,6 @@ func downloadArtifact(spec globals.ArtifactDef, installNextToBinary, installCurr node := out.GroupItem(fmt.Sprintf("%s %s", spec.Name, spec.Version)) if spec.Override != "" { - // Pull still fetches the release artifact (offline prep stays valid if - // the override is later removed), but scans will use the override. node.Child(fmt.Sprintf("Config override active: scans use %s", spec.Override)) } diff --git a/cli/cmd/root.go b/cli/cmd/root.go index a2812bc26..214028ec8 100644 --- a/cli/cmd/root.go +++ b/cli/cmd/root.go @@ -155,10 +155,6 @@ func init() { _ = viper.BindPFlag("autobuilder.jar_path", rootCmd.PersistentFlags().Lookup("autobuilder-jar")) } -// initConfig reads the config file and ENV variables. It runs from the root -// PersistentPreRunE so it can bind shared viper keys to the EXECUTING -// command's flag instances — explicit flags must beat config/env for every -// command that registers scan flags, not just `scan` itself. func initConfig(cmd *cobra.Command) { bindScanFlags(cmd) @@ -175,9 +171,6 @@ func initConfig(cmd *cobra.Command) { _ = viper.Unmarshal(&globals.Config) } -// bindScanFlags points the scan.* viper keys at cmd's flag instances when cmd -// registers them. Binding at execution time (not init time) means the command -// the user actually invoked owns flag precedence — see addScanFlags. func bindScanFlags(cmd *cobra.Command) { for key, name := range map[string]string{ "scan.timeout": "timeout", diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 99936325c..cab1cece6 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -23,10 +23,6 @@ import ( "github.com/seqra/opentaint/internal/utils/log" ) -// ScanConfig holds the per-invocation inputs for a scan. The scan command -// populates it from its flags; sibling commands such as `test rule reachability` -// construct it directly with preset overrides instead of mutating shared -// package state. type ScanConfig struct { UserProjectPath string ProjectModelPath string @@ -47,8 +43,6 @@ type ScanConfig struct { ExpandRuleRefs bool } -// scanFlags is the ScanConfig bound to the scan command's flags. Read it only -// from a command's Run; pass an explicit ScanConfig everywhere else. var scanFlags ScanConfig type RulesetType struct { @@ -61,8 +55,6 @@ const ( dryRunRuleLoadTraceFileName = "opentaint-rule-load-trace.dry-run.json" ) -// scanPlan holds the resolved compilation/cache plan for a scan invocation, -// derived from a ScanConfig and the on-disk model cache. type scanPlan struct { absProjectModel string // absolute path to the project model (always the cache dir when projectCachePath is set) projectCachePath string // cache dir for this project (empty for explicit model / dry-run) @@ -70,7 +62,6 @@ type scanPlan struct { cacheLock *utils.FileLock } -// title names the scan flavor for the info tree header. func (p scanPlan) title() string { if p.needsCompilation { return "OpenTaint Compile and Scan" @@ -96,8 +87,6 @@ Use --project-model to scan a pre-compiled project model instead of compiling fr }, } -// prepareScanConfig validates the source-path-vs-model invariants shared by -// every scan entry point and resolves the project path argument into cfg. func prepareScanConfig(cfg ScanConfig, args []string) ScanConfig { if len(args) > 0 && cfg.ProjectModelPath != "" { out.Error("Cannot use both a source path argument and --project-model flag") @@ -122,18 +111,10 @@ func init() { addRuleIDFlag(scanCmd) } -// addRuleIDFlag registers the --rule-id flag. Split out from addScanFlags so -// that `test rule reachability` can omit it (it takes the rule ID -// positionally and supports only one rule at a time). func addRuleIDFlag(cmd *cobra.Command) { cmd.Flags().StringArrayVar(&scanFlags.RuleID, "rule-id", nil, "Filter active rules by ID (repeatable)") } -// addScanFlags registers the flags shared by `scan` and `test rule -// reachability`. The matching scan.* viper keys are bound to the executing -// command's flag instances at startup (bindScanFlags in root.go), so explicit -// flags keep precedence over config/env regardless of which command the user -// invoked. func addScanFlags(cmd *cobra.Command) { cmd.Flags().DurationVarP(&globals.Config.Scan.Timeout, "timeout", "t", 900*time.Second, "Timeout for analysis") @@ -158,8 +139,6 @@ func addScanFlags(cmd *cobra.Command) { } // currentScanBuilder returns a builder pre-populated with the user's current scan flags. -// All scan command suggestions should use this as the base; every ScanConfig field that -// changes scan semantics must be represented here or suggestions will silently drop it. func currentScanBuilder(cfg ScanConfig, sourcePath string) *utils.OpentaintCommandBuilder { b := utils.NewScanCommand(sourcePath). WithOutput(cfg.SarifReportPath). @@ -176,8 +155,6 @@ func currentScanBuilder(cfg ScanConfig, sourcePath string) *utils.OpentaintComma return b } -// isDefaultSeverity reports whether sev is exactly the flag default, in which -// case suggestions omit the flag entirely. func isDefaultSeverity(sev []string) bool { return len(sev) == 2 && sev[0] == "warning" && sev[1] == "error" } diff --git a/cli/cmd/test.go b/cli/cmd/test.go index 2c517563f..7fe8bffd5 100644 --- a/cli/cmd/test.go +++ b/cli/cmd/test.go @@ -6,20 +6,17 @@ import ( "github.com/spf13/cobra" ) -// testCmd groups the rule/approximation authoring, testing, and debugging tools (experimental). var testCmd = &cobra.Command{ Use: "test", Short: "Create and run rule and approximation tests (experimental)", Long: `Experimental tools for creating test projects, running annotated rule and approximation tests, and debugging rule reachability.`, } -// testRuleCmd groups the rule-authoring subcommands (init/run/reachability). var testRuleCmd = &cobra.Command{ Use: "rule", Short: "Create, run, and debug detection-rule tests", } -// testApproximationCmd groups the approximation-authoring subcommands (init/run). var testApproximationCmd = &cobra.Command{ Use: "approximation", Short: "Create and run dataflow-approximation tests", @@ -31,8 +28,6 @@ func init() { testCmd.AddCommand(testApproximationCmd) } -// testExitCodesHelp documents the exit codes shared by `test rule run` and -// `test approximation run`. Codes 252-255 mirror internal/analyzer. func testExitCodesHelp(passedLine string) string { return `Exit codes: 0 ` + passedLine + ` @@ -44,8 +39,6 @@ func testExitCodesHelp(passedLine string) string { 255 Project configuration error` } -// addTestRunFlags registers the flags shared by `test rule run` and -// `test approximation run`. func addTestRunFlags(cmd *cobra.Command, outputDir *string, timeout *time.Duration, maxMemory *string, dataflow *[]string) { cmd.Flags().StringVarP(outputDir, "output", "o", "", "Directory for test-result.json and test-results.sarif") cmd.Flags().DurationVar(timeout, "timeout", 600*time.Second, "Analysis timeout") diff --git a/cli/cmd/test_rule_reachability.go b/cli/cmd/test_rule_reachability.go index 72365e888..ecb0bf323 100644 --- a/cli/cmd/test_rule_reachability.go +++ b/cli/cmd/test_rule_reachability.go @@ -17,10 +17,6 @@ Referenced library source and sink rules are collected and analyzed automaticall Annotations: map[string]string{"PrintConfig": "true"}, Args: cobra.RangeArgs(1, 2), Run: func(cmd *cobra.Command, args []string) { - // `reachability` is `scan` with a forced preset. It shares the scan - // flags (so scanFlags carries the parsed --ruleset, --output, ... ) but - // builds an explicit ScanConfig with the reachability overrides applied - // instead of mutating shared state, then runs the same scan pipeline. if reachabilityEntryPoint != "" { out.Warn("on Spring projects this method is added to the auto-discovered entry points, not used to restrict them") } @@ -29,10 +25,6 @@ Referenced library source and sink rules are collected and analyzed automaticall }, } -// reachabilityScanConfig returns the scan config for a `test rule reachability` -// run: the base scan flags with the reachability-specific presets applied -// (single rule, fact-reachability SARIF, rule-ref expansion, optional -// entry-point restriction). func reachabilityScanConfig(base ScanConfig, ruleID, entryPoint string) ScanConfig { base.RuleID = []string{ruleID} base.DebugFactReachabilitySarif = true diff --git a/cli/cmd/test_rule_reachability_test.go b/cli/cmd/test_rule_reachability_test.go index 0ba10d8d9..280a7dced 100644 --- a/cli/cmd/test_rule_reachability_test.go +++ b/cli/cmd/test_rule_reachability_test.go @@ -31,7 +31,6 @@ func TestReachabilityScanConfigAppliesPresets(t *testing.T) { t.Errorf("entry points = %q, want com.example.A#m", cfg.DebugRunAnalysisOnSelectedEntryPoints) } - // Base scan flags must be preserved, not clobbered by the preset. if len(cfg.Ruleset) != 1 || cfg.Ruleset[0] != "builtin" { t.Errorf("Ruleset = %v, want base [builtin]", cfg.Ruleset) } @@ -48,7 +47,6 @@ func TestReachabilityScanConfigOmitsEmptyEntryPoint(t *testing.T) { } func TestReachabilityExplicitFlagsSurviveConfig(t *testing.T) { - // Snapshot and restore the shared state this test mutates. origTimeout := globals.Config.Scan.Timeout origMaxMemory := globals.Config.Scan.MaxMemory t.Cleanup(func() { diff --git a/cli/cmd/test_rule_run.go b/cli/cmd/test_rule_run.go index b2809c18d..028c77db5 100644 --- a/cli/cmd/test_rule_run.go +++ b/cli/cmd/test_rule_run.go @@ -46,21 +46,16 @@ var testRuleRunCmd = &cobra.Command{ }, } -// testProjectOptions holds the inputs shared by `test rule run` and `test approximation run`. type testProjectOptions struct { - label string - tempDir string - rulesets []string - outputDir string - timeout time.Duration - maxMemory string - ruleIDs []string - dataflowApprox []string - passthroughApprox []string - // includeBuiltinRules loads the builtin ruleset alongside opts.rulesets. - // Rule tests need it (test joins may ref builtin lib rules); approximation - // tests run only against the self-contained harness rule, so skipping it - // keeps them download-free. + label string + tempDir string + rulesets []string + outputDir string + timeout time.Duration + maxMemory string + ruleIDs []string + dataflowApprox []string + passthroughApprox []string includeBuiltinRules bool } @@ -75,13 +70,11 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { out.Fatalf("Cannot access project model %s: %s", nativeProjectPath, err) } - // Validate max-memory maxMemory, err := utils.ParseMemoryValue(opts.maxMemory) if err != nil { out.Fatalf("Invalid --max-memory value: %s", err) } - // Resolve output directory outputDir := opts.outputDir if outputDir == "" { tmpDir, err := os.MkdirTemp("", opts.tempDir) @@ -89,8 +82,6 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { out.Fatalf("Failed to create temp dir: %s", err) } outputDir = tmpDir - // Note: temp dir is NOT cleaned up so results remain accessible to the agent. - // The agent should always specify -o to control the output location. } else { outputDir = log.AbsPathOrExit(outputDir, "output") if err := os.MkdirAll(outputDir, 0o755); err != nil { @@ -122,13 +113,11 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { builder.SetMaxMemory(maxMemory) } - // Add user rulesets for _, rs := range opts.rulesets { absPath := log.AbsPathOrExit(rs, "ruleset") builder.AddRuleSet(absPath) } - // Add rule ID filters for _, ruleID := range opts.ruleIDs { builder.AddRuleID(ruleID) } @@ -139,7 +128,6 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { } builder.SetJarPath(analyzerJarPath) - // Auto-compile .java sources in a --dataflow-approximations dir, as `scan` does. addDataflowApproximations(builder, opts.dataflowApprox, analyzerJarPath, projectPath) addPassthroughApproximations(builder, opts.passthroughApprox) @@ -157,7 +145,6 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { out.Error(analyzerFail.Message) } - // Always print output paths so the agent can inspect partial results resultPath := filepath.Join(outputDir, "test-result.json") fmt.Printf("Results directory: %s\n", outputDir) fmt.Printf("Test results: %s\n", resultPath) @@ -166,7 +153,6 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { os.Exit(analyzerFail.ExitCode) } - // The analyzer exits 0 even when samples fail; the verdict is in test-result.json. tr, err := analyzer.LoadTestResult(resultPath) if err != nil { out.Fatalf("%s produced no readable test-result.json: %s", opts.label, err) diff --git a/cli/internal/analyzer/testresult.go b/cli/internal/analyzer/testresult.go index 26a9f3762..d9501666b 100644 --- a/cli/internal/analyzer/testresult.go +++ b/cli/internal/analyzer/testresult.go @@ -6,15 +6,11 @@ import ( "os" ) -// TestSampleInfo identifies one annotated sample in a rule-test run, as -// serialized by the analyzer's TestProjectAnalyzer into test-result.json. type TestSampleInfo struct { ClassName string `json:"className"` MethodName string `json:"methodName"` } -// TestResult mirrors the analyzer's test-result.json. The analyzer process -// exits 0 even when samples fail; the verdict lives only in this file. type TestResult struct { Success []TestSampleInfo `json:"success"` FalseNegative []TestSampleInfo `json:"falseNegative"` @@ -23,13 +19,10 @@ type TestResult struct { Disabled []TestSampleInfo `json:"disabled"` } -// Failed counts the samples that keep a run from passing: missed positives, -// false positives, and samples skipped because their rule never loaded. func (tr *TestResult) Failed() int { return len(tr.FalseNegative) + len(tr.FalsePositive) + len(tr.Skipped) } -// LoadTestResult reads a test-result.json produced by the analyzer's rule-test mode. func LoadTestResult(path string) (*TestResult, error) { data, err := os.ReadFile(path) if err != nil { diff --git a/cli/internal/globals/artifacts.go b/cli/internal/globals/artifacts.go index fb33d4781..95487b32a 100644 --- a/cli/internal/globals/artifacts.go +++ b/cli/internal/globals/artifacts.go @@ -12,8 +12,8 @@ type ArtifactDef struct { CacheSuffix string // cache filename suffix (".jar", "") BindVersion string // compile-time bind version Version string // user-configured version - Override string // explicit jar path override; bypasses version resolution and download - Unpack bool // unpack tar.gz; also implies dir-based cache entry + Override string + Unpack bool // unpack tar.gz; also implies dir-based cache entry } // CacheName returns the cache filename/dirname for this artifact version. diff --git a/cli/internal/rules/refs.go b/cli/internal/rules/refs.go index 4acc200d8..f407f3a70 100644 --- a/cli/internal/rules/refs.go +++ b/cli/internal/rules/refs.go @@ -8,8 +8,6 @@ import ( "gopkg.in/yaml.v2" ) -// ruleFile is the minimal shape parsed from a ruleset YAML: each rule's id and -// the rules it pulls in via join.refs. type ruleFile struct { Rules []struct { ID string `yaml:"id"` @@ -21,12 +19,6 @@ type ruleFile struct { } `yaml:"rules"` } -// ExpandRuleIDs returns ruleIDs together with every rule transitively -// referenced through join.refs, resolved against the given ruleset roots. A -// full rule id is ":"; a ref is the same path -// with '#' instead of ':'. Originals come first, the rest in BFS order; -// duplicates are removed and ids that can't be resolved on disk pass through -// unchanged. func ExpandRuleIDs(ruleIDs []string, rulesetRoots []string) []string { seen := make(map[string]bool, len(ruleIDs)) var result []string @@ -50,8 +42,6 @@ func ExpandRuleIDs(ruleIDs []string, rulesetRoots []string) []string { return result } -// refsOf returns the full ids referenced by the rule named id via join.refs, -// or nil when the rule's file or entry can't be found. func refsOf(id string, rulesetRoots []string) []string { relPath, shortID, ok := splitRuleID(id) if !ok { @@ -76,7 +66,6 @@ func refsOf(id string, rulesetRoots []string) []string { return nil } -// splitRuleID splits "java/security/x.yaml:short" into "java/security/x.yaml" and "short". func splitRuleID(id string) (relPath, shortID string, ok bool) { idx := strings.LastIndex(id, ":") if idx < 0 { @@ -85,10 +74,6 @@ func splitRuleID(id string) (relPath, shortID string, ok bool) { return id[:idx], id[idx+1:], true } -// refToRuleID converts a join ref to a full rule id. A cross-file ref is -// "path.yaml#short"; a fragment-less ref names a rule in the referencing file -// itself, so it is qualified with that file's path — mirroring the analyzer's -// resolveRefRuleId, which resolves a bare ref to ":". func refToRuleID(ref, currentRelPath string) string { idx := strings.LastIndex(ref, "#") if idx < 0 { @@ -97,7 +82,6 @@ func refToRuleID(ref, currentRelPath string) string { return ref[:idx] + ":" + ref[idx+1:] } -// loadRuleFile finds relPath under one of the roots and parses it. func loadRuleFile(relPath string, rulesetRoots []string) (ruleFile, bool) { for _, root := range rulesetRoots { data, err := os.ReadFile(filepath.Join(root, filepath.FromSlash(relPath))) diff --git a/cli/internal/rules/refs_test.go b/cli/internal/rules/refs_test.go index 109a70693..ebf11b07f 100644 --- a/cli/internal/rules/refs_test.go +++ b/cli/internal/rules/refs_test.go @@ -6,7 +6,6 @@ import ( "testing" ) -// writeRule writes a ruleset YAML at root/relPath, creating parent dirs. func writeRule(t *testing.T, root, relPath, content string) { t.Helper() full := filepath.Join(root, filepath.FromSlash(relPath)) @@ -71,7 +70,6 @@ func TestExpandRuleIDs_UnresolvedPassesThrough(t *testing.T) { func TestExpandRuleIDs_MultipleRoots(t *testing.T) { builtin := t.TempDir() custom := t.TempDir() - // Custom security rule refs a builtin lib rule — the agent's main case. writeRule(t, custom, "java/security/my.yaml", "rules:\n - id: my\n join:\n refs:\n - rule: java/lib/generic/src.yaml#src\n") writeRule(t, builtin, "java/lib/generic/src.yaml", "rules:\n - id: src\n") diff --git a/cli/internal/testapprox/testapprox.go b/cli/internal/testapprox/testapprox.go index 36c3e0170..3a686c11c 100644 --- a/cli/internal/testapprox/testapprox.go +++ b/cli/internal/testapprox/testapprox.go @@ -1,5 +1,3 @@ -// Package testapprox bundles the fixed source->sink rule the `opentaint dev test-approximations` -// harness applies, and the Taint source/sink helper scaffolded into an approximation test project. package testapprox import ( @@ -11,8 +9,6 @@ import ( "github.com/seqra/opentaint/internal/utils" ) -// fixedRuleFileName is the rule's path relative to the ruleset root, and the value -// samples reference in @PositiveRuleSample/@NegativeRuleSample. const fixedRuleFileName = "approximation-rule.yaml" //go:embed example/approximation-rule.yaml @@ -21,8 +17,6 @@ var fixedRule []byte //go:embed example/src/main/java/test/Taint.java var taintJava []byte -// WriteFixedRule writes the fixed harness rule into dir and returns its path. Used by -// test-approximations to apply the rule automatically from a throwaway ruleset directory. func WriteFixedRule(dir string) (string, error) { path := filepath.Join(dir, fixedRuleFileName) if err := os.WriteFile(path, fixedRule, 0o644); err != nil { @@ -31,9 +25,6 @@ func WriteFixedRule(dir string) (string, error) { return path, nil } -// Scaffold writes the fixed rule (for reference — test-approximations applies its own bundled copy) -// and the Taint source/sink helper. Samples are the agent's to write; the approximation under test -// lives in its own unit folder (.opentaint/approximations/), never inside this test project. func Scaffold(projectDir string) error { return utils.WriteFiles(map[string][]byte{ filepath.Join(projectDir, fixedRuleFileName): fixedRule, diff --git a/cli/internal/testproject/testproject.go b/cli/internal/testproject/testproject.go index bddaf3605..df84b47c4 100644 --- a/cli/internal/testproject/testproject.go +++ b/cli/internal/testproject/testproject.go @@ -1,7 +1,3 @@ -// Package testproject scaffolds the Gradle layout shared by rule and -// approximation test projects: the build files and the test-util jar under -// libs/. The per-flavor payloads (samples, marker rules) stay in -// internal/testrule and internal/testapprox. package testproject import ( @@ -13,10 +9,6 @@ import ( "github.com/seqra/opentaint/internal/utils" ) -// Bootstrap creates the shared Gradle project layout in outputDir: the -// build/settings files referencing the test-util jar, and the jar itself -// copied from testUtilJarSrc into libs/. Parent directories are created as -// needed by the underlying write helpers. func Bootstrap(outputDir, projectName string, dependencies []string, testUtilJarSrc string) error { if err := utils.CopyFile(testUtilJarSrc, filepath.Join(outputDir, "libs", testutil.JarName)); err != nil { return fmt.Errorf("copy test-util JAR: %w", err) diff --git a/cli/internal/testproject/testproject_test.go b/cli/internal/testproject/testproject_test.go index 8eaa93eeb..74da2f086 100644 --- a/cli/internal/testproject/testproject_test.go +++ b/cli/internal/testproject/testproject_test.go @@ -24,7 +24,6 @@ func TestBootstrapWritesGradleLayoutAndJar(t *testing.T) { if err != nil { t.Fatal(err) } - // The gradle dependency and the copied jar must share one filename constant. if !strings.Contains(string(build), "libs/"+testutil.JarName) { t.Errorf("build.gradle.kts must reference libs/%s, got:\n%s", testutil.JarName, build) } diff --git a/cli/internal/testrule/testrule.go b/cli/internal/testrule/testrule.go index 03eb96c1d..e72405c6a 100644 --- a/cli/internal/testrule/testrule.go +++ b/cli/internal/testrule/testrule.go @@ -1,7 +1,3 @@ -// Package testrule bundles the generic Taint source/sink lib rules and the Taint -// helper scaffolded into a rule test project, so a package's source/sink lib rules can -// be exercised against a fixed, type-agnostic counterpart (the generic marker), the way -// testapprox bundles the fixed approximation rule. package testrule import ( @@ -20,18 +16,12 @@ var genericSource []byte //go:embed example/rules/java/lib/test/generic-sink.yaml var genericSink []byte -// Marker locations, relative to the test project root. The marker lib rules and the -// test join an agent writes alongside them live only under the test-rules dir — never in -// .opentaint/rules — so they never reach the main project scan. The rule paths double -// as the values an agent refs from a test join (relative to the test-rules root). const ( markersDir = "test-rules" genericSourceRule = "java/lib/test/generic-source.yaml" genericSinkRule = "java/lib/test/generic-sink.yaml" ) -// Scaffold writes the Taint helper into the project sources and the generic -// source/sink marker lib rules into the project's test-rules ruleset. func Scaffold(projectDir string) error { return utils.WriteFiles(map[string][]byte{ filepath.Join(projectDir, "src", "main", "java", "test", "Taint.java"): taintJava, diff --git a/cli/internal/testutil/testutil.go b/cli/internal/testutil/testutil.go index b32200ef8..8e81c5bef 100644 --- a/cli/internal/testutil/testutil.go +++ b/cli/internal/testutil/testutil.go @@ -1,5 +1,3 @@ -// Package testutil embeds the opentaint-sast-test-util.jar and extracts it -// on demand to ~/.opentaint/test-util/ when no bundled copy is available. package testutil import ( @@ -20,14 +18,8 @@ import ( //go:embed jar/* var jarFiles embed.FS -// JarName is the filename of the test-util JAR. const JarName = "opentaint-sast-test-util.jar" -// ResolveJar locates the opentaint-sast-test-util.jar, checking, in order: -// 1. Bundled next to the binary: /lib/ -// 2. Managed install: ~/.opentaint/install/lib/ -// 3. Dev build: /core/opentaint-sast-test-util/build/libs/ -// 4. The copy embedded in this binary, extracted on demand. func ResolveJar() (string, error) { if libPath := utils.GetBundledLibPath(); libPath != "" { candidate := filepath.Join(libPath, JarName) @@ -43,8 +35,6 @@ func ResolveJar() (string, error) { } } - // Dev build: walk up from the exe dir (typically cli/bin/opentaint, so the - // repo root is a few levels up) to find core/.../build/libs/. if exe, err := os.Executable(); err == nil { exe, _ = filepath.EvalSymlinks(exe) dir := filepath.Dir(exe) @@ -72,10 +62,6 @@ func contentHash(jarData []byte) string { return hex.EncodeToString(h[:]) } -// extractJar extracts the embedded test-util JAR to ~/.opentaint/test-util/ -// and returns the path to the extracted JAR. Uses a SHA-256 content hash -// marker for staleness detection so the extracted copy is refreshed when the -// binary is rebuilt with a newer JAR. func extractJar() (string, error) { jarData, err := embeddedJarData() if err != nil { diff --git a/cli/internal/utils/bundled_path_test.go b/cli/internal/utils/bundled_path_test.go index c02f4693f..39bbf0d57 100644 --- a/cli/internal/utils/bundled_path_test.go +++ b/cli/internal/utils/bundled_path_test.go @@ -6,8 +6,6 @@ import ( "testing" ) -// FHS layout: `make install` puts the binary in /bin and the artifacts -// in /lib, so lib is a sibling of the binary's directory. func TestResolveBundledDir_FHSLayout(t *testing.T) { prefix := t.TempDir() binDir := filepath.Join(prefix, "bin") @@ -24,8 +22,6 @@ func TestResolveBundledDir_FHSLayout(t *testing.T) { } } -// Flat layout: the managed install (~/.opentaint/install/) keeps the binary, -// lib/ and jre/ in the same directory. func TestResolveBundledDir_FlatLayout(t *testing.T) { dir := t.TempDir() libDir := filepath.Join(dir, "lib") @@ -38,8 +34,6 @@ func TestResolveBundledDir_FlatLayout(t *testing.T) { } } -// When neither layout has the directory, fall back to the flat path so callers -// keep a stable default probe/download target (preserves prior behavior). func TestResolveBundledDir_NoneFallsBackToFlat(t *testing.T) { binDir := filepath.Join(t.TempDir(), "bin") if err := os.MkdirAll(binDir, 0o755); err != nil { diff --git a/cli/internal/utils/copy_file.go b/cli/internal/utils/copy_file.go index 2aaefcae2..b4758edb4 100644 --- a/cli/internal/utils/copy_file.go +++ b/cli/internal/utils/copy_file.go @@ -6,8 +6,6 @@ import ( "os" ) -// CopyFile copies the file at src to dst, creating parent directories as -// needed. dst is truncated if it already exists. func CopyFile(src, dst string) error { in, err := os.Open(src) if err != nil { diff --git a/cli/internal/utils/display_version.go b/cli/internal/utils/display_version.go index bb92accb7..a92dfa7d1 100644 --- a/cli/internal/utils/display_version.go +++ b/cli/internal/utils/display_version.go @@ -7,25 +7,15 @@ import ( "github.com/seqra/opentaint/internal/globals" ) -// ArtifactDisplayVersion renders an artifact's full display label, keeping the -// "/" version prefix. Used for the SARIF tool version, where the prefix -// is part of the identifier. func ArtifactDisplayVersion(def globals.ArtifactDef) string { tier, path, bundledRelease := artifactResolution(def) return displayVersion(def.Version, def.Override, tier, path, bundledRelease) } -// ArtifactVersionWithPath is the version with the redundant "/" prefix -// stripped, for a single-line display that has no separate path field (e.g. -// scan's "Analyzer:" node). A custom build keeps its jar path — "custom ()" -// — since that line is the only place the path appears. func ArtifactVersionWithPath(def globals.ArtifactDef) string { return strings.TrimPrefix(ArtifactDisplayVersion(def), def.Kind()+"/") } -// ArtifactVersion is the version for a display that shows the resolved path on -// its own line (e.g. health's tree). A managed release yields the bare version; -// a custom build collapses to "custom", so the path isn't repeated. func ArtifactVersion(def globals.ArtifactDef) string { tier, _, bundledRelease := artifactResolution(def) if isCustomArtifact(def.Version, def.Override, tier, bundledRelease) { @@ -34,10 +24,6 @@ func ArtifactVersion(def globals.ArtifactDef) string { return strings.TrimPrefix(def.Version, def.Kind()+"/") } -// artifactResolution resolves the artifact's tier and path, unless the def -// carries an explicit jar override (in which case neither is needed). The -// release-marker check only runs for bundled resolutions, where it decides -// between "official release" and "local build". func artifactResolution(def globals.ArtifactDef) (tier, path string, bundledRelease bool) { if def.Override == "" { tier, path, _ = resolveArtifactTier(def) @@ -48,9 +34,6 @@ func artifactResolution(def globals.ArtifactDef) (tier, path string, bundledRele return tier, path, bundledRelease } -// isCustomArtifact reports whether the artifact is a custom build — an explicit -// jar override, an unpinned version, or a bundled build next to the binary -// without the release pipeline's version marker — rather than a managed release. func isCustomArtifact(version, overridePath, resolvedTier string, bundledRelease bool) bool { if overridePath != "" || version == "" { return true @@ -58,9 +41,6 @@ func isCustomArtifact(version, overridePath, resolvedTier string, bundledRelease return resolvedTier == TierBundled && !bundledRelease } -// displayVersion renders an artifact's label: a custom build as "custom ()" -// (the override path if set, otherwise the resolved path), and a managed release -// as its version string. func displayVersion(version, overridePath, resolvedTier, resolvedPath string, bundledRelease bool) string { if isCustomArtifact(version, overridePath, resolvedTier, bundledRelease) { path := overridePath diff --git a/cli/internal/utils/display_version_test.go b/cli/internal/utils/display_version_test.go index 6a30bb71a..7df12c528 100644 --- a/cli/internal/utils/display_version_test.go +++ b/cli/internal/utils/display_version_test.go @@ -82,7 +82,6 @@ func TestArtifactDisplayVersionOverride(t *testing.T) { } func TestArtifactVersionShortVariants(t *testing.T) { - // Override cases never resolve tiers, so they are host-independent. custom := globals.ArtifactByKind("analyzer").WithVersion("analyzer/2026.05.27.68ab20a") custom.Override = "/home/dev/analyzer.jar" diff --git a/cli/internal/utils/ensure_rules.go b/cli/internal/utils/ensure_rules.go index 33609eb07..0b8806edd 100644 --- a/cli/internal/utils/ensure_rules.go +++ b/cli/internal/utils/ensure_rules.go @@ -5,10 +5,6 @@ import ( "github.com/seqra/opentaint/internal/output" ) -// EnsureRulesPath returns the on-disk path to the built-in rules for the -// configured version, downloading and unpacking them if they are not already -// present. The path is returned even when the download fails, so callers can -// still report where the rules were expected. func EnsureRulesPath(printer *output.Printer) (string, error) { path, err := GetRulesPath(globals.Config.Rules.Version) if err != nil { diff --git a/cli/internal/utils/opentaint_command_builder.go b/cli/internal/utils/opentaint_command_builder.go index 1efff7a58..1356a557b 100644 --- a/cli/internal/utils/opentaint_command_builder.go +++ b/cli/internal/utils/opentaint_command_builder.go @@ -181,7 +181,6 @@ func (cb *OpentaintCommandBuilder) WithRuleID(ruleIDs []string) *OpentaintComman return cb } -// WithPassthroughApproximations adds repeatable --passthrough-approximations paths. func (cb *OpentaintCommandBuilder) WithPassthroughApproximations(paths []string) *OpentaintCommandBuilder { for _, p := range paths { if p != "" { @@ -191,7 +190,6 @@ func (cb *OpentaintCommandBuilder) WithPassthroughApproximations(paths []string) return cb } -// WithDataflowApproximations adds repeatable --dataflow-approximations paths. func (cb *OpentaintCommandBuilder) WithDataflowApproximations(paths []string) *OpentaintCommandBuilder { for _, p := range paths { if p != "" { @@ -201,7 +199,6 @@ func (cb *OpentaintCommandBuilder) WithDataflowApproximations(paths []string) *O return cb } -// WithTrackExternalMethods sets the track-external-methods flag. func (cb *OpentaintCommandBuilder) WithTrackExternalMethods(enabled bool) *OpentaintCommandBuilder { if enabled { cb.boolFlags["track-external-methods"] = true diff --git a/cli/internal/utils/opentaint_home.go b/cli/internal/utils/opentaint_home.go index 0bad48c86..0c5e6bb53 100644 --- a/cli/internal/utils/opentaint_home.go +++ b/cli/internal/utils/opentaint_home.go @@ -30,7 +30,6 @@ func GetOpenTaintHome() (string, error) { return path, nil } -// PathExists reports whether a path exists on disk. func PathExists(p string) bool { _, err := os.Stat(p) return err == nil @@ -50,17 +49,6 @@ func exeDir() string { return filepath.Dir(exe) } -// resolveBundledDir locates a bundled artifact directory (e.g. "lib" or "jre") -// relative to the binary, supporting both supported install layouts: -// -// - flat: / — the managed install (~/.opentaint/install/) keeps -// the binary, lib/ and jre/ in the same directory. -// - FHS: /../ — `make install` puts the binary in /bin -// and artifacts in /lib, so the directory is a sibling of bin/. -// -// The first layout whose directory exists wins. When neither exists it falls -// back to the flat path so callers keep a stable default probe/download target. -// Returns empty string if exeDir is empty (executable path undeterminable). func resolveBundledDir(exeDir, name string) string { if exeDir == "" { return "" @@ -115,14 +103,8 @@ func GetInstallJREPath() string { return "" } -// BundledReleaseMarkerName is the manifest the release pipeline writes next to -// the bundled jars; its presence (matching the embedded versions.yaml) marks -// the bundled lib dir as an unmodified official release rather than a local build. const BundledReleaseMarkerName = "release-versions.yaml" -// IsBundledRelease reports whether the bundled lib dir next to the binary was -// produced by the release pipeline for exactly the embedded bind versions. -// A `make install` dev layout has no marker and reads as a custom build. func IsBundledRelease() bool { lib := GetBundledLibPath() if lib == "" { @@ -197,13 +179,6 @@ func ReconcileInstallMarker() { _ = WriteInstallVersionMarker() } -// resolveArtifactTier resolves both the storage tier and path for an artifact by -// checking tiers in order: -// 1. Bundled path (next to binary) — only if version matches bindVersion -// 2. Install path (~/.opentaint/install/lib/) — only if version matches bindVersion -// 3. Cache path (~/.opentaint/) -// -// When no tier exists yet, it returns the last tier as the default download target. func resolveArtifactTier(def globals.ArtifactDef) (string, string, error) { tiers, err := ArtifactTiers(def) if err != nil { @@ -216,15 +191,11 @@ func resolveArtifactTier(def globals.ArtifactDef) (string, string, error) { return last.Name, last.Path, nil } -// resolveArtifactPath resolves the path for an artifact. See resolveArtifactTier. func resolveArtifactPath(def globals.ArtifactDef) (string, error) { _, path, err := resolveArtifactTier(def) return path, err } -// ResolveJarPath resolves an artifact's jar path, honoring an explicit override -// (which bypasses version-based resolution) and otherwise falling back to the -// versioned artifact path. func ResolveJarPath(def globals.ArtifactDef) (string, error) { if def.Override != "" { return def.Override, nil diff --git a/cli/internal/utils/tier.go b/cli/internal/utils/tier.go index df5120961..e709cdc05 100644 --- a/cli/internal/utils/tier.go +++ b/cli/internal/utils/tier.go @@ -116,9 +116,6 @@ func JRETiers(javaVersion int, cacheDir string) []Tier { return tiers } -// FindCurrentManagedJRE returns the first non-stale managed JRE tier (bundled -// or install) containing a java binary — the same probe the analyzer's Java -// runner uses, so health and the runner can't drift. Nil when none exists. func FindCurrentManagedJRE() *Tier { return FindExistingJRE(CurrentTiers(ManagedJRETiers(), IsInstallCurrent())) } diff --git a/cli/internal/utils/write_files.go b/cli/internal/utils/write_files.go index 48b8cff52..7c7f87926 100644 --- a/cli/internal/utils/write_files.go +++ b/cli/internal/utils/write_files.go @@ -5,9 +5,6 @@ import ( "os" ) -// WriteFiles writes each path->content entry to disk, creating parent -// directories as needed. It is the shared primitive behind the test-project -// scaffolders (see internal/testrule and internal/testapprox). func WriteFiles(files map[string][]byte) error { for path, content := range files { if err := EnsureParentDir(path); err != nil { diff --git a/skills/appsec-agent/scripts/sarif-to-findings.py b/skills/appsec-agent/scripts/sarif-to-findings.py index 6ef073f85..ba461185d 100644 --- a/skills/appsec-agent/scripts/sarif-to-findings.py +++ b/skills/appsec-agent/scripts/sarif-to-findings.py @@ -43,9 +43,6 @@ def docker_name(seed, taken): return name -# Prefer a stable, named fingerprint kind. vulnerabilitySourceSinkHash is more stable -# than vulnerabilityWithTraceHash — it keys on the source+sink and survives changes to -# the intermediate trace path. Fall back to any fingerprint value, then a content hash. _FP_PREFERENCE = ("vulnerabilitySourceSinkHash", "vulnerabilityWithTraceHash") @@ -73,7 +70,6 @@ def scan_results(sarif): """rule_id -> {hash: message}""" out = {} for run in sarif.get("runs") or []: - # An aborted run may carry an explicit "results": null (SARIF allows it). for res in run.get("results") or []: rid = res.get("ruleId") or "unknown" msg = (res.get("message", {}) or {}).get("text", "").strip() @@ -146,7 +142,7 @@ def main(): out = Path(args.out) out.mkdir(parents=True, exist_ok=True) - existing = {} # rule_id -> [(path, hashes)] + existing = {} taken = set() for p in sorted(glob.glob(str(out / "*.yaml"))): name, rid, hashes = parse_existing(Path(p).read_text(encoding="utf-8")) @@ -172,7 +168,6 @@ def main(): if not new: unchanged += 1 continue - # add new hashes to the first finding file for this rule; reset verdict path, hashes = files[0] merged = sorted(set(hashes) | set(new)) text = path.read_text(encoding="utf-8") diff --git a/skills/discover-attack-surface/scripts/package-usages.ps1 b/skills/discover-attack-surface/scripts/package-usages.ps1 index 6b886963a..174c95783 100644 --- a/skills/discover-attack-surface/scripts/package-usages.ps1 +++ b/skills/discover-attack-surface/scripts/package-usages.ps1 @@ -1,13 +1,3 @@ -<# -package-usages.ps1 - -Windows equivalent of package-usages.sh: print the distinct methods of dependency - that the project's OWN compiled classes call. Scans every moduleClasses -entry in /project.yaml (class dirs or jars) and keeps only call sites -whose owner is in , deduped. When the modules carry a `packages:` list, -only classes under those roots are scanned; otherwise moduleClasses is already -project-only. The separate `dependencies:` list is never touched. -#> param( [Parameter(Mandatory)][string]$ModelDir, [Parameter(Mandatory)][string]$Package @@ -16,7 +6,6 @@ param( $pp = $Package -replace '\.','/' $yaml = Get-Content (Join-Path $ModelDir 'project.yaml') -# read a YAML block list — the "- item" lines under : function Get-YamlList([string]$key) { $f = $false foreach ($l in $yaml) { @@ -42,7 +31,7 @@ $out = foreach ($e in (Get-YamlList 'moduleClasses')) { } if ($roots) { $names = $names | Where-Object { ($_ -replace '\.','/') -match "^($roots)/" } } if ($names) { - $argfile = New-TemporaryFile # pass class names via @argfile to dodge command-line length limits + $argfile = New-TemporaryFile $names | Set-Content -LiteralPath $argfile & javap -c -p -classpath $p "@$argfile" 2>$null Remove-Item -LiteralPath $argfile diff --git a/skills/discover-attack-surface/scripts/package-usages.sh b/skills/discover-attack-surface/scripts/package-usages.sh index 6c8f4adbd..d4c2bbf80 100755 --- a/skills/discover-attack-surface/scripts/package-usages.sh +++ b/skills/discover-attack-surface/scripts/package-usages.sh @@ -1,21 +1,11 @@ #!/usr/bin/env bash -# package-usages.sh -# -# Print the distinct methods of dependency that the project's OWN -# compiled classes call. Scans every moduleClasses entry in /project.yaml -# (class dirs or jars) and keeps only call sites whose owner is in , -# deduped. A model's moduleClasses can mix project + dependency jars, so when the -# modules carry a `packages:` list, only classes under those roots are scanned; -# when there's no `packages:` list, moduleClasses is already project-only. The -# separate `dependencies:` list is never touched. MODEL=$1; PKG=$2 [ -n "$MODEL" ] && [ -n "$PKG" ] || { echo "usage: package-usages.sh " >&2; exit 2; } pp=${PKG//.//} -# read a YAML block list — the "- item" lines under : ylist(){ awk -v k="$1" '$0~"^[[:space:]]*"k":[[:space:]]*$"{f=1;next} f&&/^[[:space:]]*-[[:space:]]/&&$0!~/:/{sub(/^[^-]*-[[:space:]]*/,"");print;next} f&&/:/{f=0}' "$MODEL/project.yaml"; } -roots=$(ylist packages | tr . / | paste -sd'|' -) # project roots; empty ⇒ scan all moduleClasses +roots=$(ylist packages | tr . / | paste -sd'|' -) ylist moduleClasses | while IFS= read -r e; do p="$MODEL/$e" { if [ -d "$p" ]; then (cd "$p" && find . -name '*.class' | sed 's#^\./##'); else jar tf "$p" | grep '\.class$'; fi; } \ From b6cb1d7ae68ae6f173bc5af265cb5baaf4f86a43 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Thu, 11 Jun 2026 00:41:35 +0200 Subject: [PATCH 52/54] refactor: unify version marker name to .versions across tiers Use a single VersionMarkerName constant for both the bundled tier (next to the binary) and the install tier, renaming the bundled marker from release-versions.yaml to .versions to match the install tier. Add an explicit lib/.versions entry to the goreleaser archives so the dotfile is reliably bundled regardless of how lib/** treats hidden files. --- .github/workflows/release-cli.yaml | 2 +- cli/.goreleaser.yaml | 4 ++++ cli/internal/utils/opentaint_home.go | 14 +++++++++----- cli/internal/utils/opentaint_home_test.go | 10 +++++----- cli/internal/utils/updater.go | 2 +- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/.github/workflows/release-cli.yaml b/.github/workflows/release-cli.yaml index 8fe97c06e..e2208b763 100644 --- a/.github/workflows/release-cli.yaml +++ b/.github/workflows/release-cli.yaml @@ -169,7 +169,7 @@ jobs: mkdir -p lib/rules tar -xzf /tmp/opentaint-rules.tar.gz -C lib/rules - cp internal/globals/versions.yaml lib/release-versions.yaml + cp internal/globals/versions.yaml lib/.versions echo "Bundled artifacts:" ls -la lib/ diff --git a/cli/.goreleaser.yaml b/cli/.goreleaser.yaml index c2f5167a8..45714aa75 100644 --- a/cli/.goreleaser.yaml +++ b/cli/.goreleaser.yaml @@ -33,6 +33,8 @@ archives: files: - src: lib/** dst: lib + - src: lib/.versions + dst: lib - id: full formats: [tar.gz] @@ -43,6 +45,8 @@ archives: files: - src: lib/** dst: lib + - src: lib/.versions + dst: lib checksum: disable: true diff --git a/cli/internal/utils/opentaint_home.go b/cli/internal/utils/opentaint_home.go index 0c5e6bb53..c397e10ce 100644 --- a/cli/internal/utils/opentaint_home.go +++ b/cli/internal/utils/opentaint_home.go @@ -103,14 +103,18 @@ func GetInstallJREPath() string { return "" } -const BundledReleaseMarkerName = "release-versions.yaml" +// VersionMarkerName is the byte-for-byte copy of the embedded versions.yaml +// dropped alongside an artifact tier so a later run can detect whether that +// tier matches the current bind version. Used both next to the binary (bundled +// tier) and in ~/.opentaint/install/ (install tier). +const VersionMarkerName = ".versions" func IsBundledRelease() bool { lib := GetBundledLibPath() if lib == "" { return false } - data, err := os.ReadFile(filepath.Join(lib, BundledReleaseMarkerName)) + data, err := os.ReadFile(filepath.Join(lib, VersionMarkerName)) if err != nil { return false } @@ -124,7 +128,7 @@ func IsInstallCurrent() bool { if installDir == "" { return false } - data, err := os.ReadFile(filepath.Join(installDir, ".versions")) + data, err := os.ReadFile(filepath.Join(installDir, VersionMarkerName)) if err != nil { return false } @@ -141,7 +145,7 @@ func WriteInstallVersionMarker() error { if err := os.MkdirAll(installDir, 0o755); err != nil { return err } - return os.WriteFile(filepath.Join(installDir, ".versions"), globals.GetVersionsYAML(), 0o644) + return os.WriteFile(filepath.Join(installDir, VersionMarkerName), globals.GetVersionsYAML(), 0o644) } // CleanInstallDir removes the install-tier lib and jre directories along with @@ -151,7 +155,7 @@ func CleanInstallDir() error { if installDir == "" { return nil } - for _, sub := range []string{"lib", "jre", ".versions"} { + for _, sub := range []string{"lib", "jre", VersionMarkerName} { if err := os.RemoveAll(filepath.Join(installDir, sub)); err != nil { return err } diff --git a/cli/internal/utils/opentaint_home_test.go b/cli/internal/utils/opentaint_home_test.go index 962e33bc7..5ca353560 100644 --- a/cli/internal/utils/opentaint_home_test.go +++ b/cli/internal/utils/opentaint_home_test.go @@ -37,7 +37,7 @@ func TestIsInstallCurrent_StaleMarker(t *testing.T) { t.Fatal(err) } // Write a marker with different content - if err := os.WriteFile(filepath.Join(installDir, ".versions"), []byte("old-content"), 0o644); err != nil { + if err := os.WriteFile(filepath.Join(installDir, VersionMarkerName), []byte("old-content"), 0o644); err != nil { t.Fatal(err) } @@ -57,7 +57,7 @@ func TestCleanInstallDir(t *testing.T) { // Create install dirs with content createTestFile(t, filepath.Join(libDir, "artifact.jar"), 100) createTestFile(t, filepath.Join(jreDir, "bin", "java"), 50) - if err := os.WriteFile(filepath.Join(installDir, ".versions"), []byte("marker"), 0o644); err != nil { + if err := os.WriteFile(filepath.Join(installDir, VersionMarkerName), []byte("marker"), 0o644); err != nil { t.Fatal(err) } @@ -65,8 +65,8 @@ func TestCleanInstallDir(t *testing.T) { t.Fatalf("CleanInstallDir() error = %v", err) } - // Verify lib, jre, and .versions are removed - for _, p := range []string{libDir, jreDir, filepath.Join(installDir, ".versions")} { + // Verify lib, jre, and the version marker are removed + for _, p := range []string{libDir, jreDir, filepath.Join(installDir, VersionMarkerName)} { if _, err := os.Stat(p); !os.IsNotExist(err) { t.Errorf("expected %s to be removed", p) } @@ -106,7 +106,7 @@ func TestWriteInstallVersionMarker_Content(t *testing.T) { t.Fatalf("WriteInstallVersionMarker() error = %v", err) } - markerPath := filepath.Join(home, ".opentaint", "install", ".versions") + markerPath := filepath.Join(home, ".opentaint", "install", VersionMarkerName) data, err := os.ReadFile(markerPath) if err != nil { t.Fatalf("failed to read marker: %v", err) diff --git a/cli/internal/utils/updater.go b/cli/internal/utils/updater.go index f9584ff16..235e36d23 100644 --- a/cli/internal/utils/updater.go +++ b/cli/internal/utils/updater.go @@ -215,7 +215,7 @@ func SelfUpdate(archivePath, installDir string) error { // (called from PersistentPreRunE) handles it on the new binary's first run. if !libBundled || !jreBundled { if dir := GetInstallDir(); dir != "" { - _ = os.Remove(filepath.Join(dir, ".versions")) + _ = os.Remove(filepath.Join(dir, VersionMarkerName)) } } From 5786392a5faffdcfadec7233675c798e605a9167 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Thu, 11 Jun 2026 00:43:12 +0200 Subject: [PATCH 53/54] docs: drop experimental label from test command --- cli/cmd/test.go | 4 ++-- docs/usage.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/cmd/test.go b/cli/cmd/test.go index 7fe8bffd5..409240606 100644 --- a/cli/cmd/test.go +++ b/cli/cmd/test.go @@ -8,8 +8,8 @@ import ( var testCmd = &cobra.Command{ Use: "test", - Short: "Create and run rule and approximation tests (experimental)", - Long: `Experimental tools for creating test projects, running annotated rule and approximation tests, and debugging rule reachability.`, + Short: "Create and run rule and approximation tests", + Long: `Tools for creating test projects, running annotated rule and approximation tests, and debugging rule reachability.`, } var testRuleCmd = &cobra.Command{ diff --git a/docs/usage.md b/docs/usage.md index c20f807fe..84f9359ed 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -140,7 +140,7 @@ With no flags, `health` shows the autobuilder, analyzer, built-in rules, and Jav ### opentaint test -The `test` command group is experimental tooling for rule and approximation development. +The `test` command group is tooling for rule and approximation development. #### Rule tests From 8c92dfb618d4acfd98090af7839a3aefbb9c8121 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Thu, 11 Jun 2026 01:07:40 +0200 Subject: [PATCH 54/54] fix: drop fragile literal lib/.versions goreleaser entry The explicit `- src: lib/.versions` archive entries added in b6cb1d7ae were redundant and fragile: goreleaser's fileglob already bundles the dotfile via `lib/**`, while a literal pattern with no wildcard hard-fails the entire release when the file is absent ("globbing failed for pattern lib/.versions"). Remove both entries and guard marker creation directly in the workflow so a genuinely missing .versions fails loudly at the point it should be written rather than silently shipping or dying in goreleaser. --- .github/workflows/release-cli.yaml | 4 ++++ cli/.goreleaser.yaml | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release-cli.yaml b/.github/workflows/release-cli.yaml index e2208b763..9f5104c35 100644 --- a/.github/workflows/release-cli.yaml +++ b/.github/workflows/release-cli.yaml @@ -170,6 +170,10 @@ jobs: tar -xzf /tmp/opentaint-rules.tar.gz -C lib/rules cp internal/globals/versions.yaml lib/.versions + if [ ! -f lib/.versions ]; then + echo "::error::lib/.versions was not created; the bundled release would ship without a version marker" >&2 + exit 1 + fi echo "Bundled artifacts:" ls -la lib/ diff --git a/cli/.goreleaser.yaml b/cli/.goreleaser.yaml index 45714aa75..c2f5167a8 100644 --- a/cli/.goreleaser.yaml +++ b/cli/.goreleaser.yaml @@ -33,8 +33,6 @@ archives: files: - src: lib/** dst: lib - - src: lib/.versions - dst: lib - id: full formats: [tar.gz] @@ -45,8 +43,6 @@ archives: files: - src: lib/** dst: lib - - src: lib/.versions - dst: lib checksum: disable: true

7wZLEe1+B1^QDTlbe@fz*2c;A8NUy??qj`zRDkuk!BYu;jP9e z%feT@nyHA?lmCqr8huiU+F2K&L9M2RQ6RL}4{1lO0(Y&5h0;;(KEO~`y1qxPk~k)K z!@VNeyfTFiP>jT96A^s}XE||5A0}fGb`J&L;pk9584 znn(&^(#IES~%8aKp; z*sY$5KG}EU&nMS}wQP&1S!Ty_su)q0l|PgV*d)2R3YNVVmM1L!rxs|~)}c*__CraO zVJ+45WA%MnnTSHAE9d*P@yHDH-ru_AiTdKP5->VBf;KKI)H(ElN%s{ z@YX%b6no^aGod$HUP&T-6?rR$Hut!gBvnT&nRc+JRRem|lJh#-VAacM7^UF`AJ3#W zN~-!MtvJUP*-|RRci`EO-OFZMv~@$g7vNqpu>+;q(AlhT@l`l-<)en3Tv^mg!Yov} z`?OqP*h`#jl+b#evMtbQ@6s=;Be9EDztXskWJES6jwGB77&|qar9+K4Oxfda8$P3! z=!YB$jR*A}3Ro}NEppjkuac56=dVQpGnZH(H*9^>Rkd1bx$07L{v0jhEv4A$d27nD zQcC*QR1DoMwWj{YF8ZF7Q_t$F?g}^l>R$}R%*7vqD?>M4B$ic)F(}iLRnh;uT~_EJxQJAk3^8>0 zo)@@ey0!q2f@7*g$381I-CU)qM0aJ@U6k!&jE zZy`O@R&ht3qL!tRD)eloJh1Px0C>Wt0J;-?P*$Ellhmk^3E2arLyN^>yiLTGm6m{fo)uSl62 zQYa$lh4y)+=EKY1Imkb89W?kftmUeV=V?_@vbzW|qTc?yjGy%AEi}a%WA>CqE+Q<* zo+|v3gt*dOo8~V=`OJ~ACLv8js}A}Iv@hzw(m!hpMZ*RjT=#`+OQZw$%szhfmPAhd zc7EWNm=0HMoNQz(|0IYIhoQj0PdvFcmMTIY0!*Z-%!_&U_fpzMFVdHwP+uJkstXh# zY+pF+j$+lz*l-iEL#Ew(*u$&q#czzGeN?mg79sPO^JK`0B zsIoFy8|J^7lA)Nyx-DB$n6)GA(1Y|O(_CwS7s|ww%_JgyFhp=8afEUe#d_@0SH7dL z)>>*R)Bmk%s;NpASQaOw3MQmF{5K}exD4AvT=lp?4J~H6Tw3`#y*B`OH~VE{95Q6Q z-T@PHhmo--*B%_g4aLeKrB_=1P2u*$!Kc@hOv*RVNQnOts@sKom}gsihqoqIE?qAq z)SVA`3r^EE4Vqdu6`F+7fp&Q5YFcO5(dM%1ka3Gi^?NO2lkx$A+$|$sOR7AL{HnMT zzB004u|N;3Z_C7E3w5BgR1?jdplF0qIrntRg1n$`L|o?p0!={rmiEpv@BP5!Td~ld zNM-F{yof!Uk*jpN?Hyu!K3!}#h{=|l+#WsFfZeInJEx;nG~APOxgDdk(-XTs!0#2v z%#vgr_t+S{*Hd2d8krRG8orIEPP9wXB+B$+xrAsmP~Fi}bL5G_B9Is=q%rJyzGUEh z9jHTB8m4+I(L%q>+`i$ft-H?7hGghIVw&du#)@wWZp*bSGUV8lc{oHR$$5^y_guqN z$E>?(;Luj7S;cm+;I`0>xMR>!8%p`dGjknN(+Lr~PZ#unFU)goi0avRaz-b-<_(JB z9pTv63_9Pzz2Hexv*FEqGTLrb6mR?>4F`0Jol!!h8yIi>KmnH)T4ArB$kh^z7QGN} z5Ox^NY`ld$d4r^J5j@NKMM>gvWXD#oN05cya67hNIgMQ8@kfr=pti+QGN3uIE+m!Z z7ZN|}!N59tD_i-L71~*Ps7U2Sl-1P@j zSR0$OXF<%;QK7cnW05adR+r_$2`IhXi?067gIIpXL`4JxwwSd}@gkT)ZTC7pDS`6- zo1RD4OsLJ^!|j@joxGw;M0_^gpqA8`ubk2coYXCT+@3G#;QQ?4J9^<3kj@U?0OK{{ zkXMM}E?%$gmg2@Co=D;|UKy0%2v6=QYj)SWo6D^`K`JoN;2B72HG;{PW85Jy-F>6@ zQTy)m8?SjNIdgp^*euM1=O`rGBB*cH{Gf(T`Ly$0xN{BvG*+QFA1Qn~`ie~Hs$I{eWr3vP#(;qZ@5%i<9KipA3JLKAjdvfnWakHFq7PLAI|VD`er zWW}#y_0;Oud#PEmcf7!)otqaHWvC>_>oB|zK#~i~?!XM?@JFB6N6rM?8QX!E;t{*5R zFR+<~!!&eSy?)OIyGVyKXrL3)hB82)?MAxt&mXp9^lo(;#N(=1)^cX;0Tl5!dWdMSpXuXz} zmSs}!AI5hqLJB0N+y_wCi@mH5QP!#&(+noQ+f)Y2_G_23*_5Ba=N(*+EiH>lqBe`F zsf5NCk0MiRvpG8Mm8Z6JVv9D40s^h#vn|=a0qj1HPj)(dGFv@m`JMsE4aZhhnV?#y z{~tv_>=LOncedeOAi$^rl)Y>I)BG(pjFs0t8iKhrpiNL|F_Fj%bH|2F|Wz~Ugq7x>q&IMDxeuu1>_gRN%iZ25n( znL_n63uPVCry1H8tHJx>*z&OsASx&nRR@=C^1xMS8J&90dh~mTAIQF}tyQ(r> zE=wvoZRiH5!%Yc`QW=j%-pzn9JZ2I9y#5;+SbtDh*zP_?*yD{(FO4m-BI-NHC^oVPkJLVeHE=`S1h8&VF782|VnE~@0w(H}sHYnIoFGn)z zh^Q71OE3%Pn#zI$U}tgVz_q98_59M>1dMd^eB_9jp$PPl4{!tajg}5VA~b6vB_b(L ztujR-BOqH3(TKn&6%s7JV+aM(US>yP!2Li^h=l+2T-BN1$_`1Mr`GClcN=M34Gb2< zy2`Cv{a8b_a5>DiP94{!&+N80fq6pDTs-ePK4iN#6#3kV?ah?H!ORsPeNPG?`HZ+; zZI-55#i_-$2LHZLTIQJ70}_!j@k`jWBMK_;7a9KuvGbD7oJ*8C?egn!(jvOpUgry% z^bEtmk+h^QREpkEuoxvHY2`htkky&M z=8896Vo^=%tVm3X*|C5Ox%~sMokI#{N= zuQE&w0j>wR^{BMhPW4<+i1Qkwc{^wDFAIIa<*IVlQaxBkm2w!9Tx(508OMq_p3K-C z7&0wg+!xL zx*^Uk8WnQ@HA@3}g}zZ)WX=vU#Sc?v$1I{GDZKZ;roW*>lmPJ)shX+N_$w)k);$#b0+t3r`@@H3ZF(TRrLSj}d z3*@KQaC?6NylSbZC<6}GM73_L46m|(aHc_*?O0!TzZG@jH`KC-$#Oc{x zmZh?=Cx`S4>C8H;vb-bf-mPkp;E%SLCMqp4L^Ls@m=|^h*fXy*OH!rB1gOiT$u(U^ zks!~Mo9#bIFEy2nz>wNK=e&UCj(Lb7-E^re&n&Zc{8dkVc9-%VwVz{Uc3GxJ*f86C zZmsqSl}@-r`jP$_Bir8%=9}NCqG_*EElRE$Kvitei*=WeR{xXEU>>!80gC@1qB2Fb zryNVmA!RdEwETd2R(JJCzshvv5z}jsPfub>KX&1-9PdNUYKr$V6Pix_=uTT0(;RV^ zA8FShpNvFW5qm13$TttF_QD2x$t!k;h|-!BUp`eh?cQH#Uz}~Xq@-q_xvko`WAOqU zJRn)!M(-W7o;@VRa$Dx8}laAi=p;?q*rEc*FJ2I!t4~)d$JZHH$ zLokMYrVx2KdmvKWBFfJe^2caqB$*iFM( zV?x@iiRN&=Ugp(nKe_iJb`@+4-ay#jiMU_TF7dA5T{v7g&>6MSbK`mJbaD{&L=mz` zCHNHt%(@6;i+RtuHDG4QtEf)wSVhn`5MC3w@e8mSF zH}{Kf&#QdFGU(#1Vqx?MUogT}62mDQl1Cc$6@ihQ9x$UcT+hf*N63i#RNe|%XRW2< zrxV#MV_Q1=!AO?|y%O=OXYJ$56E>>j$?Xjowh~YQyzykvpL6a+5@LLBQY1?v4iR}H z1&g&SB%=?gqnM`5>!hi`bBh75U`VfINSYMH*Gkc!ZF?^lR7k6VWKyCI^Lc_O(g8Ig zHJkCN8`JgLAQ7~LiJ7Yac)42v!|7d9^au({EOAZI?l5Ck*pW&_{QEyCaExx70 zzoKNnGf_Vwg3q(#285<9!UhQOKo}MwZc*_c~dVtu~UPVW!b9z1iYeMJ!`^RV!YaJ+&5n|X)er}uf% z+2b6)E~)?eHKIRQFXrfiuF2&n0$-BNWODraYy&PV5)06xZb3rGRXwDYxLa4{4hOM7 z&JS{`A@+-ndz3u25Wm4oY((m)*u#h&PTZG>3b7wNq38B#Y%{t+^6Dh79Ds82$~ckz zHy^)3Q)H*fL@QRVza+;myYPpSSwV5qrocTp1Xqn}^0EC-V;Txs9H}^T=3a&<-_a#q zuA7v5FfLk*gF01eyR>UNjQ#HWLDUYtF^#OB%J=~?lP4gB_t~HMm1z@aBB%{(&_q@@ zu@XNXu^ZZ>n45tfC4j~Pi9uB-t1?^=Z&BG!{G=F0H2|@#wCT>K|NBSA0s(oM?{_2h zW5FBxj~2ZDAZJl`vUK@hIm@6nl#j~dqQG(2GjseXi3yGYWF5mOsUd}+V1jC%4S|r! z?ASlqJ_F`J78G_guv%%lMz>j`@|`)O%1RBX9O1+@=p`+!N(*=XPc3b0U23(;rn^k% z>r4)Yc&VNJ^XGRzfmHWx-Kp1X=V`9zjr+a&Y~C->Uy(={L>U+n98uwwdN@@Ic97Nh zXwnUlX{)%>r1TLIal0)Lh`Ih4%1PqLu_lp6n2=yqVkFCer4U4L7d>{q)JO@nX7YZK z^FAqr1HuG1mQfHr&_FZd6&8#seV5a?RrRUtt_pY{Yq&mznLFiAXLN;Zf%a%`u)m8d zxK`4H9!nwnVUo1r|B_c0>bRFWNE_O?WJiu%kF=7r7=w>O&d&S825`kFI9kc${s9xe zAXlFEIex|wgvE74<-XlaXjmsWyk)>+qtRHG7Be9$kRFTnO~S2WjT04e8fC}?Dc9d0 z%)Ra&q+xc#P1LDH{gk3QIN7>M*VgK|arIX;aH6bwV8^0eJT9|?&m!eUZR~xGO6M9!I zOyujdjo>OL4F_8ybR8!NEyQLWh`BhDB-_w1B`>G`GTLJ`XYx@+x?R!8P3ogYH;cxN zI*90=pS>+u+_u?tlFj*{hnySV0#a*}Tx%^-Y#VrC4kD%CpNvZ5WT9W~rkd1+dbsdxh~R0aB5*%3(LeW=SnWn`5>8?{ zRNx1|H*Hk>jD`R>FoR>{U`Q6OSTO&*iX<;Jov~c*r)a&HcR(I4N)RYCG*#41;qm2G$^|Y*WME=`N`+f!vd4DF311Mdz zK1pz@Tq5!@X*$%+LQuBRfKg}KGz;R_oxJ-622InKW!KMrgdbRLds65ymQQ1Ni5HM z6TEs)lXA(`RmDwD@o59S@{msif*k;F=&{)I-Pc890K3nseaL)QTRUBDzsh(7Fx@9l z{>p_T-eUl`_f(QI4*H3%W>HeTs@#aBq(@D!+BOpB)~8cx-V?ZoKKJ0E*6St}g4@1? z*Cpse_ScC)x(_L8m+x<=`#MngOaiDeN0qh$XGFpq1+$5CS9mJ%3Y$*(onA{=T8ni$ z{t4Sto2L4xy=P?(A`TicyjIBQG;!To@Ka8&Rg4I9Lm||Z=^u_;k;h>2T^8}M>$iS;2X75eRR4o^(RHwHT|HQi&w zTwIUumJ-Go#`TrZ!HK<zeqr$|x?@j$w;g*>OPDCtIqr znWmBP;r)9xRpgjq_b0FHFK3r1Pdge+RxN*80Jo1izP-+sw`bJ2C3=Mu_E@6xIx}C8ps9-+M&6?*Z3w9H@Nbjx*E`x^w}-u2Q}vFw%kD zH@QoWb@SJpArp?EU%)x+totN1+&n(qP3lZ<+pc;n>FE=}8(rE&lS9Z!s+?bD>AKCs z`!{0_%TSF!hTawktFJ|lsfa!$*<}7Oz^^PZf)IPzyj$B1aeRok!8uGCVd_j|vJxvS z&y0cwD!M{GuizY%?zjzM)ecB^iL!0FNXHZwA&If*JH2uQSo2VKWjw_1!nWUYCcBx? zp#Rq*b>XB%?j)v*_^b^t!uv6DDp1CqT?In*zH_JyJsth)NO!@TxkPJUe57MZ7Jf_! z`|W3i&mnC1@3%VI=C&kZLtPVRjiRMScKBf=8+OwJ6ECf@U-?I?m=h66QRBJ9U^e=h6CPeV>vMric< z$nQ$mL9z`;sJ{><=bBc!wBSYA+96f#of5?b9mp1bMi|u|d9L-GC6c$Cr7->EIu+$| zw6!!bz#_jh8Bb_QER>3VrggjhS`nG3T- znhSV-K+S<$Rwm7onF4|;58xIK0n=D&&#!1m!*DIJukkSJ4O_R`IMum8cT3iz!7Z>T z6hz2#@#-rrVAbXVodLq^QF00m3&`sp)}0OfIL)QII8?5rX_getx8&q!5^c8WaJeR- z=MyY89boP?G5+F)4q}P9rZnamuYaSm#kvgkk11yhAL%6oUtY0TV(TRbzO#=_E>@0j z-J;3#cjl-pY5F^DKjPMA8E7tx@s@{pJP+hQpjWY@Om4YwuJ@>lZ^|JtvI38o|80f2 z*WYi7eC+MIPUgNj07_?U0d5{*TsXb>9m^8tOTH%9P@WbgfdaYR@+X0mk8AU-o&{dg z%*f3P>TrvdQ@MR_vd`oS;UOrMlM9U6!eQl*Wjy%w;lpuEU1nr2Qz~6`>)P>A+e6ni zurV&5mePytd@m))oo>NSH^V+Jx*OFp=I4+Tk@V+vr6ld%w78r#cnFM}f$Z>uhOifo zPG(qv$0KL)43(nLgPSN10T6Egq=K=rH^^NG@M!JCd=A$H@7%mBhpw? z2=-z)k9a4U_AHzFi(k8N;%9q?2$)0*z(*sERH)l453kKn z-+qfnHKu3qsTvPv@84heF(5kaxt_*x#j~orP8W$d$pAQLp%?YhtoB^i#I)SkUcAMgNk?1dA5U?pDl>4x*%?RE zoaM?jeLHnFPcOo{Z`g+RBKql=q@P~3DKfv5rxK^3`PcLZ1#0EKQ{f9uLgkOQGFemO z%tu@APwk4gd%EkWw@FpSksC{6itqAQ0jU2p(@)$8O3h)-!E#x6$V5XPwoNOp_}n}i zKYo%S@<=~Btv29oGFi@pFHwp&qP8?Mf%s-Zn}%<1Wt$6`HS&VJqXPva*_-e_R0YjL z{Oov|+y5_$fX3xp#jeEu=2%C%>hL7~h-`GQ6}$OmTO(M?Qth8FiW^bG___6U>r$B- z<6E4(9r5mRCd=g2%`qgx{&QReqF*#C^%>f@N_!z0dZqKHR9`_oO^^AzAS>f$vD1}? z6MW`1+-ZW&L(T(ZV%&5@sr|nIc&S$F(3)pdFjRd}qE&5jkr>LNn<{7QsSWd~iKGpf z?vq0lvOJDM8Y~@NVnI|&;<6KOzou$nUBdIA6wJhMKmJ+u42ci}uYam?UK*lM@SgE9 zC@!N2Zq;XG)_b*ZXUK!EVVDOfAcF>TqCs`OuZPh?M+3%mKGml|Aytq@C745@fb|#l zP4E)cC83zHedJ=EXLp?ilZfhD4oIAT)ujw*m{KD4#MAu3bP#=z9pWPlt1{7&DC;airqL6Mo7>U zml61YUk3qjlx8>E7(xhM)9#(mLBwD0z^n8L)5Krl4BIj;-TDIFr{?yb7a|QWs~FOU z_1S(|5TA$6fBfJV?jd>&UUBobL(YyFKXI$FNgg_wDU+vj{M1g&3;oo3C0}t1m!p=Z znY=zVKKbRlV?vx@XjHELv1hRH1sQ~|+o9+pC*%L)8q3H)3$aI4wFJRtZRkpzLz zGw9u$G#L)U*bRpc{@&QX$2zXpfI9=jl>+1Gf*;w#TOz7mM^FOEb5WiHpX6X0tOYCj z`&~(+EMOwf9iWZ4flx2goNxsc(~2RxZy2H;T`~UP8m6{0LqC&;)@NEHt2odaG$~{3 z{kPDz<}X=0ujBwm61r|}VHuvV?e8C1BeKX9Y|7=#0(h)J4>vfzF~dVbry5VgMTiFu zDF~N@p*Z1jn74t|KYZJotmR693+uor@y6ln-R5_LUwrIi$-x1*-R2cq-45*#&skr5 zk9O?OfgnC=W7foeE5^48AStGotW8knF2YJ&O|K|mT}*mz*|j;yqOOb%Gl!HCg; z=(q?RbIuU*LG2F67em;kse`1>Aes|w1qcW-*C!S+55x(T)`efKu6Q=F?+p-D#Yt4! z9KXloD*0;?AM_yCcf>mtD%T9eRdC7hYBSK3oHH^hYLTri)EoxX5aX5AY{aoHEqeD zUL0_Ij%Q(alTgahW^mN|es}bdZ7_x$GJ1%@aL9&JnOJ9|Un#y+0eZ=My_J{jF>I@Y zgazIa`t}PJKHoF4`pb?Tv?f1SKNv;dJJVEj70UGBIvHU>YGeCER~oNP;merAewm)X z-vn(uQ|yKgIZ>n%r<*2jZ-8|sGN`M~?w1DMurm2uWtU&4obe<~RtKnG(6@;|^D z-~tT)+7QwMMXAH;u-`;JrBc5z@s`{E7iaGnoJsU|i-r@M6WgBHwrx&q+qP}nwv&l% zJ#i*{;$)(e|G96~J*VzD?}yv9yQ{mZ`%~||cCX!Q{gz?=_ZiG0S%N^|DeIqTSG+0n zT0HyU(_jo+2i{(x*B;b*P)2k4>FbaJuKTDZ>>odx{0$f!4zT7k%IDESzJI>b(VS&;n}E;-$9j&z`B6LlbyV zatr`N90(hq3p!{HJy;uJNJ;#HN8C{h2T^YY@dfEWkHQy@d^Eq#egM>lf~YU`Sk(lz z7U=QAsxg+eXDZ|HV1JfoeOIm&7VUE>WK|@Xj3KD>>z}9%5WC{o1U$CLl2V6*zN1b& zm?r(TOZrRT)V0He`aFQ%gOzsRi#+;fM9L4Mb1OuB!Ac%VdcjV)Xj6Qs!*$5+p`x?F z{?k`>(gd<5}atPwPADN+v+$!~Y6G8e`EU>f8T6c^z1J7!JH%IRRusue!_c^K~UInOsaEf7!L)bnOkbX_z|65~0EJ~g) zBK1N>ECY-I!UyPad>{i#0xf0KUYG32RYpPBRE|h$v@_nYXNu`Yxb^14$@TipR#Xpt zcFQ1L&L1`y8d|FGr!?ehH{)L=<~19h9}HrwlGO(5+|i}mvo)nX(bVg*q~*iG;|u!7 zHogC`6?2E<|D}4^%L^`r)c4znFIKP3n(U$Cn=)HITUD<#?o|6W zWDgG^L8#?>#RS3;LFhfz#G^NO=at86m6g|ZxI)}OW!83Y`B%Z&zh1-%1BaRHoz}3&{9Gz_? zzy4%*_5e6#%TVqjD0cxJRS)sD@wUFQUJuq6{4$zCzg@T;;qJoGf1{jt7@)fddh~&F z1e1P{&K)JNulznGpRW2I*>a%JwaVbUn&(pMJNh@tTA^&s%tJS7y1d1!Cn;#JSjkT_ znedN*bkv0TL?7m#HNOmnL~T!9p~(r4Av^zgJ(ondmx%g3yYaW@i85!m$Tfhsi*Sz^ z)?)kMnEP4EEAH{#&7ziD)NIo;QN+!X&^u_%Fp?4KXK+$pydj03Z2HQQqD1%_Y@kCZ za)UbdJPlok`q9AS5cpOImK!V~FVUT0eZxNS0{Et=0w*0TKuQT(A(G^!k}41@lu#eD z7qC{u@vZ~`s4rsdp_kszFCz33^`}DxpJ)7GOvnr!9zl$aF48CsKMZ5csTVor*af{v z-wbvdDgBPb44WRg(4ibN=*5h=76F5ewvzI$FfwC|s?S&kKxD=cpq(A(8G-a@??CRS zE;}?arr>8V+kYOl;Uzf%D2(X(2rVDLI5PLqFbr~xemf8729QRCjiSVg$~EbtM00`B zvE8Hb$2dmJW7r+59W{Mc zIT^n52P3v(3?k=n>!Y?}*G92o+()otkw(&E;z#k#5<8ag7~C?|4j7uXeO7o4d}eaU zuh~zdc20yF$8{;L+3L}sGh+|@oNHgYJX>GFI<+p@?9q|3+N12V3{a3V4bYIXy+_-> zor5U%VheVk@_0;NM2K1TQR6euBgFRx2j4cb8y(LBnCfy)<4071h942-HHin|s}Q%O z7bLeWu92K(^q6y+w4?F0tp{2cR<~#u;1uwIav~#jiOee;+2yKXQ}2RM zmF4;d)Hx{#A|bz1!v0HAm4Dz8*haA#_k|0RjAZ+Ic`0Z7dMGB0ihXd-@XtHQH+gU7 zv1l3mc)w@q&jYN|ZgWov5Z=v6ygT!*+hch+y@1#T&x8ZJr)c31LE0dzE50>{$Dm!H z+w*aIr10U3mca4?4z@i|nfF*xFD$Pp6-AQW;D8sN2dtZ6Oc*CL> z?6N5TA(vam3{w6;nQxP#s1~sF#}KTDV6u_m5O6?!IOHm9P3>%bzhsFA$aI~sagA8X zE_A*Tw^yDGv19gHPvSE4-i8MXO$au32mW%@1Dl%4I~xA)U3zk!hIaGTqLh2-^oH+) zJ^#RG6H8-ZppnK^lQ97tJw&dK{s}gN^+|SMnR$KN`yayjiB?`STXjy zI1B{5zypVuQ0D@aI1VVb^+RZ+IQ2BsY;dQze}+AL0JjNI4a<-2YYQX#Cak)FQ(!q+ zQ@%0NRzZQxxu~V*j~({eC3n`5Ll;t|A%lu&%(foEwnN4<@f;_%74;`%=wCU_1f|T`|1x|QC+k8!qK%_XeDMm zD=fBVe4t@F&Zge$@j2-18-FmBVV&@jLpsXVr($+4%+G`ho+e;Wz`G>EuIU%fBvDxZ z$N9-3H4d4VJLsm=Qcz^5I{3$JVZ4Hy>fe$h2qard2E495N&{j9v4XlAcAE!hcsMOM z7w_~+xf`=FmL|sAS&~JS+cCdg>K>#gueQ8|b7Mts_c(4-(Y^xteIa-h_W{|Qhv_@rIs3+OQ zhJ{=|(y~pAO1TK?SSSUjb098eCLDq*`6bvE%~FuB9E0X^K#n4NKP!kHG0OJ+sL^ZxT;a3 z@Aecemau92BNr#6bX<(OGy3X_J2Rbi$$!<0Q#rJ-vX;!ul$sN&657;${rJyMRQsPg zc@KCXAdsT}oxY3m|E%w_w{vzevU3sjv@^1`H2E*ZqS}rMt{VE+P6Cor`ooC0}d$iFiam<*KV=aM!}z zV(R|(cZ0~rI`*YOkB-r4$rT%MXL8j`KHyAjm~~qQUL|rz7tHRd1;2ogYeq3$QohV^}U#XE0YNSq23=eAbS`0RU#~%6bA?Ocl7BTsesO?@F@XZv- zAE7RUFfn$~R;0NSR^VGg>*gU(XLi#|XJxfbf29q6rR4V>V`rgd{#L{x3FDd)Fg7eH z@V1AXMOa!m?U+x7mo1MFoqfr;p>k$~XD*@I3H96|pdc|KNm8$LoTb7ZktuPUZNTS{ zn@e`y5*b|#Ie4D*nm{oFZ}L$YrS(v&;8K9>(10+(R(4Q|@R**gQ<6s5YU>lO6?#mU zh2TV2RbAZ1h1DNtV!oHXx-5}qMMQT?gBAJcxt=bs)v7aw?+v;)4GUJY7<2JV++tQF zK;Lq5O*enV`x{TjlKESFROb&(M-a?DoRChCCR``8E*knCMRMISc|cQhP5PvdQ{4V< zMIv{Nx+v;xWV*^^FK{_XBPS1y^*p2Xn?F3~89~=i@GRSYeGL9_*#rDMy51bh?9$C7 zc{kJ@!DiAKeJvw$=WT%?6y2`>Je) z6AgxYutsl=c8fK3nn*oK)3vl(s5(&DBm@(Ql+QK!lu{W`=QFf_)o1e}n+hz7k{(Hps_`0&-mec?v#Axo{B- zg1+SD2c1JuRkd(G3u^Dznre-sO4awyzQNfZb^tdezu1# z#>@~Gx2v7eZ;VDJleq~Z&$+sQ;L_uqZwFio& zYa~2J#E+v27Nc{jzWs`@1a>lVRY_U8r47wZGNSQGT1F}vn?pQ#iP<`u6%^k6E@vo@ z$J~#{#kN@=eKEAg?=M+%uPP#MaPDRVhrYIGq2luS{EgI$R!uX68-8QV z8$g=s5X;EivEK>&2T;$?MwZ{d#jU2q7*Z#fDNIJ3^Rj9ep66K%%4{M-Pt^w6 z$$ElS%yElwwQ*K>v?pO@>F=3WG2amdsan#xA5r7Ve--I+f5K1?qms*@EAW}>bTRMB zObq>W7hl#@x=#*GQ4K#tiR7DRW@S}uf;JoS{w%mvM+ ztW0@@Boo;ZBlHt*%F0tG+*Uvf(US`^IVrONRl1by0qXbge2s%VJvyr-t5IW#re-x%cgDp1CMiK*$sH{KkF^X6^%E-0;kP6jtBdT~-W zy8CV`UKK%}YEx+?L?KZQE1LT4o)hK zGZe+8kCPdBicWwV_f6P%U`${Od(Mwp$-+Xq#hES($Q9LWvkhM zL#~|I*VK-a&P3rN|5aV6NtcJ1`cy@&#cJcLjJp8GJsk=^+_JK`swX^g{?n9;z1+)L zN{>Bm+6cfh;(mM^(*i@kkjCk2Am z_vk|3%*M6T+vatuCS(KN%#(d1@-4B5nL2eAepDqR zOP~znlDIkGr(Wbz;Ik-sn(n;*0CzFY6s?7RY>u?#L`9Bxjk)sh$93Ui#(a&`j`(tU zRPjb;GbE)AJqu}GOam6ZwYFpw^9wp`_^)E^FPRE^ED`E+MSW zYi>Y5`PKo+TJOAA-PSqD+Nk4cv_}{2_Z_(3ci1vG?YJ5KS-Nsays&fn-rW)}+>Eu3 z(O0#czRO&mZ2A&7KjITOf8-j9&F>vEukBqiuOWF(KfZ0W!TvLf@AMlnsbl=UPE7fX zC=fVfUL(BAxS0BWfDe#R(7$Fr5{1o%` zXfFdTozfp)2Bf>sH@=e(B#*nEFQF7jMDrsD)_wRpMbF}2i^IpUvkvho9NZ@S+6#Zk z39A)Plj6N&iQOM4CvkHZ^66e@)N^iC=o$t!oxcUueUF$4t-OcYk)tNRV&~FEdVIYR zyVuKcyJF|-tgH!P67~}J*7hO(fwyhu&BJv#r`#`R{3$5ifT8#1RLw|WedTcm5OU(va{C?`WUxP&bWkukWO`L* zz$bhqe61$>C!bDP8I-R{D51R3(dVl)Gd1+k%I8$sMG zBgHPjGOQ(@$18}o0M`N8rkXm*sfj<}wj9|cYXz$}7lFiHSeBCBoV!Z3bDI+Hv*0NL zTHxDO77ImPl;(x!>Pl*EqUiBRPfWIPxgsyQ@a+ub438>DAFz$PnND1ujga3wqzyS+ z9=AQZUS682xtmTGY0e|0(1&F?lQ>UY#@sI1kr5`1HNm~Pc#{ZvBeOs^)uvq9-A7eX z+c{YYt~r^g$ZN44qcf<;?;x{N-j@ZEWjpmVY<{3ay2;^SNLT0E4I`S$krmn*qRt!~ zgi)%}mg;FN$JC4sKPh+RzB*``zV8d_vw0(m>CCy7(;vmBc=SoGiv6IS#!ouE*DL~eWfg{^<=geLiyx*_ zd`C|=@U@Vg8HQOiAr~|qH_^3iP44Lt$?!TXr=+{5dKe+9mmxHLY$+!u z4H{d)>yxvULl#!rT=L%@!;G^OvT;L(^@`Kl#hb1g6X*GJ$n|H5&I3Y?;%&0{ZPxLq zAvlYmqL}m{Dzh|92W3S)bp3fDSk^`azc)&M28rKQmfgm0aSGH6K}~CpN;vBg{&3i7 zTX2WlNZTzSj;lA~1m_8~2J(#4?njF(AY1%84vKAxrS_iNKO!ma-oC!Or}XFXdJKgl zGhh56PgHf4IUIkelI3AAMH(WtO-q60Q5=*+t6^h7ETlv{l#H4iQ}pz}+Jh#L>W+4O zqw?pplld+e(K~BAZ|*X}5{@I@NKdFwK38m3igOLcVvjUOBf(S(*APhWw}f4&SXPxu zv@OB2<`1P-QxY8!sOmGE(7hy|N8&*ApbUgQtUd)07APIa2*L)}gVQ4pAo9o^zzWiV z+y&XCa^UF@0n7m81G@)xAa-HIX+*f^LR5CaAW*c?m`h#mz1IDi&{AG%K(0RMs$C^4uN$Odfvy?qSRzyKr$8U;E7 zqyiBds2vD=atAOwh#C+WV0|EbQ~@S{L$H42cap&J0Y=OhazHxlI}30MvX9^$HqZdZ zFLj{)#~u;TdY~TT#TBpy_x=Nj3+adUG6U8R`K}(g_rtGy;0B~$1>hdIM-TKK;DGf3 zd)E)#gYxSiAOzVX140cFqP`3R;-TM_fa4IKoG&E<`5^wv0EIvSdf*IT4*FdScnZ-6 z=~F$B59KceU<~Rf1G2uP43rNVV7{;ee+Kr0eO3*$2hYL2vjNv2KYzSrgYAKSs*An* z=m2;I-PJ~(cfj_*`!oP=1J7{pATL6JgkS=6KvIANq#yiy$ACgWKKctaa0yHR+)o{F zIhM*Bpzv4m+76c1Ea*Npr<5Yidzxb9jU-ai~Ytuk3i29m&i2g``k8e?L_oCY#OA z)S$TK7i3-TS!b??=URPV78H`kGVXKywOq|r9zSQBDWyb~m3Vtz(b;C%Ih60qe(J0- z>$DsH(n!Kwluy=-WLOM7CD8@=vg&EK+ww9I1~4KOjZ5{fNOohE=<#^)EY>33@FaM# zEYb(Mbfa31t~yg5w1mD?jDq8Z!q6+?y0s@)DQ_X77KnL;)V~RNr4S6OP{fw*dX~WG zhI0^R#+##Q%{@1t`2UA)Ahd8W5y69i5aIrJbc68!MmJncJzV~aY$)s4qY9z(i&!d+ zqy;^R+xDvJFgZSUjzLtC$*nFz|AkX%9&V7;Gy8K{lb(N^9EtP^;-7Lm%~>WG`IjQw z{={dmbN2P|ZCnAw19A?=h87=+rt4_h=R`?x$#GtJ5y)5u5+eATMZSQvo6a zA+3Km)C5trj7?uO4Ro%JVKJcOC2v?msvo!eR3Sjq%vH>V{pn62P8CUTlp>fITj7_D z5Uz{%=TQfuO|y(G{e+K$l+A)=Y^f!89vgqvyy6%42?Wi)+Jv4gg^6q{7b)*P^^&nM zdRfIR6LG9ZP@p^J%C#iC`aJ{RWixdt%kt`8G51N>z^DJZvz0i68`emh)s*o|7ZNg! zcrsiSWNcEUBw+*dE}JTD%{}nPrn^(XRPehe1J>;n&cBJdFK%r1$e!q)wgbr6+g~?mvLt&n|$8~tvWRR*-2~SP)`xVzZHH; zK@z|YXipDeB71SL*Ucbvpm*%+9X;E?$*M5%s}_4)$Mr_!#z$6O0S9I`%~DLe*w8-t z5fhRi#uo-vz=z$SZMay+VZ+glyV1&i);LQIPmM<(>@(i(^AkdRcEhp5)`c7ID(|(I z4~RG7^T$VFH`==+UPQPDEOM=B=Pf>B-##&vM4gK@Z$KCA8VrD%*ZCTuT>BIJKYa^T z4wf3}+xsc`|Hu33>f~f<=OXT8WNZ5Wbg)omTk9o7Y(9nT7Hjofz1Ckk&qeCCwA!N_ z4aG#k!i6uUEDQEhJuwz4Du9uY)umETPy*ROFF9j?_eZZPhN~D!&4Tt$%AZQk|y$xs|5?8 z{ZFa0$%6cB&zp|GdCYnf@>Y-FP?16k+0Jf@ve!1qznQN}rz#lL6*^P^3Zsc+2}%+2 z;T+#Awo(clO&M>y``JlS1wS(^RuXi4Heywh+7I^8i`Qn9lyyNlqWg->u} z62`k6W^3#=%^p!Ygk5c(uy4whgL(tbsz5N;o)HRt<}K@5?u^?H6~&$KV%tk#itDlilzV16Q1Bwd|9@I=7Q;U&=0QO~BEbJU^!Z+12C;WCXS6rAGBt5= zH#KH7a<*j@`)&STJ;POTN`6QXoliswqKb&B=w(}~SRJd5)ef2hTNJgG-))K8Sn7(* zw}PjC0+QKrAL>QvAa`;|RL|vP+j%a>+e?6dZx{5^Ft(5+IFI_O&!I@dD4Vy3Bb$RS z5!8h0=@p!V@A`QBai;y~a>>_Us$U6}h?Qf{dV8^m*lue&Us)f|J$B3%7p^iA zD5ZFdR2P3plf_`0xm{#wSj$adAwy685*G1r{LZkE6o}~{)Y^`wHtZs{Kci{UGjKWZ z)`2kJeu|)OEgj~IhN`QQ_gwC$SnBn=P}$TchoLx7QP3GPQhY5PRglM9Q&^3 z%htD7oYp~`qTobTLQ#(T&Gtv?DDq4U{xiZ4j`_mezTcn3_b6AC0f#^bfrkEeRDlS6 z@Bi=ie|C3^TbkPZ{@>i)xoR>hxDv>p;;7J=fkzmCjL|qEPDvIb9=|cpu>*`L6lmNS z(mP$LU`{vfRg+@tMu~FOSEj z!2xCXycxYP$$!6BmyW!qm=I3cgBi1;YJbIusnk0~)j}bUP(u<_9G%+y#E8PP)kUHL z8qG)B4Ma0EzKkwXry(}JSQx<$Fm0-)(WH7PF_K>yyM|}a4khz4@`RcGyX6)!7;Qvo z<}}ar-1*u4he=R%N$ynbHM1p$--|-`$wfP(u{PgZktUw7D-CAB-U;T z-q|q=2ONCz!ePTWX=lX}s3;2BfMeLcTZ%RUWyn66NV*9dKg)x}Rs}RsVD!+FG6*R) zhEyLNdOI=2y|0=O#2R+Xp;&Q<8s3geg@m1A%RA@oMfaL7{}3~ThyMfdh8OIxFiQq@H<{hx0jKA zBU0kpOmLt+Bqt$I`HL?Z&9FzfvbYomb=OB7y=7CI6_&L1>8q|=$o)H0f|IPBd>V_i z81f<#TCG)Xd5k#Z*SN&IS@bPW(P^d03aoTjdM>aeqG*&7{bm+FJT0Eb+1lcPDu0v8 z)e_g%ELC>|1LcDeeg&Zs9grDjLhu|1eaIW?Z~nUV%~@Suv+Ag+G`GrFUM|ELV^ga( z5>*Meo@afuK>*M4stW^vl4xCV)m;P>p!6pd@;}?1}x(lL8 zhr}F9ERX7~=kJ7~JI2F4GF;1fPO@yriKr#I9Z%sXY)XbJ=)&mlfM@Kf>%$&y{qfL3 z?y_?aYlVNmqBw4}DJ`&8@oxnZws}fcsTrGDd`MNpaq}Wi&$!KZ;6p?2_s%f3Lif3^ zuNx^;^I7*(4&0a5tVObgNvNho)k zAaMUgt)I`wgMEn(2rRv(rwK#q86q9)vhvFzCe;Jrj}5ln-l6`3ZWHI@kTKuiF8{tE z?f;H$|7U~f|3tU{)gU^fZf%42pVAfXSH3Rqf7;};)*Y|NTCDByl9dqwvubihh}YX+5v5r+LOh+S#6b`4-+E7rP5-ybbbNILqXEtnSUr4NP&Vx z$k3vYRWH}Mx6_-u9y%e&0&X{%Jg2w0&i&3OeeWmkeQ<`9eH0BF4Dt`_#pAI(-sQ}j z#XKr}P_o5L?7FOa#n{2f7Tpojvo5l0W%8m0mD3jEjvpxu4x?7hy>&XQ5;Ap-w(|}q z6=)V(BGh3Wf+1O8>6}X{#cul*{Mt-so`$U}h86QNzI^!{c&CCw`3?}G>O@9=Rm4|u ze#yhJ#{HDXU^k{!*1k^s?=iH z0ig}XX0fIi|AX4LQgW3=mD%QSuCNlA%PGFe+B9dVQF?aXGwm>%cMAQq(?9pteeq>@dU6mz@rEk;z~O zzUV{wEP2!2^v7IYq4?*P1Q*I!jdF(XN{@4W&>4)1{V@+ywI8Zlh7;`VIp*E8=pH4? za>AtkE)Bft2P~OKpiW(0&m#`MoTsr6^cG*Qt>|h62ioTgtx2C#S+|3pDH2Bm!ps;$ zw;+uDD4mXCpmnQgTAOIYmo{KQOID3`a%%!!>C>Hh!w)M4&{p1_ZG~Ry)95Lu*lUb} zp`JhJvZv-HVjKfIE`k9KdT=D+*%HGJ9jzLZTsd}59lCL`ECsf+*${V^ zP<$>wHJG1tOy1#3d7Q!0kFP3|eM++!tYf!=PxQjPCKzRX{XL&Y9CM1t*N{lQBNf`J zPLos7ssbv>loDAl((xc6ANg{tGae_Kz8GD}fXOcryuahp=~q}D?_5mt$m8fGOiP*9 z3dX?jQ*$n}-D2_brF=bEW6{FAtNpeaPGX8gSaGg{sG)9lN*1+OnSEj?X99wg-4;>@ zn^G%uG3AH(IyfSKZetN=zg%gLuMeU;Z@tr5bJ7-50zpnOe9ezn$N8$mB{A$mbCB0H za*F`_HYAMpIN)K6T2*`L8j>Z#W#T3yb(blpcy zUOlC?wxhcYTw{QiV|kpqOvY@H1?n9x;v3?@R~m6>o5`(Euv+k)VX>{&;{j&|7L+u- zbd0GBPb`?o;(eJ8(n##S>G)Xv30n!ZdWdcNcog@;0L!24sb%7^MgBAL2=<4T*~~Y2kkDFm zraL{f828j2RWr7G_JPV+f=yLiH0p`ZgNQSnA9u=IC!4)KIiq8*n5WxxCnJitTti?$x=iH z!!r3yIEK&vN2h|c73a6w16Pa`|Ji;cJJrYmiEVS?o&STwQU)}B6e>c8C)ursoLNWkq zVel*8(d#4U(hq;_FrDtCeto3htkOT2_#LzT(EaG-#V4H z9*@fZ5agAIU`*gU(x+kKbAsR%t1pn_wg_1hne8Pn!ca-{7ELL7O%%Q#*|CQhM{1s; zFj94$tH>`hAcepr+1YDp&vRwcHs(F#z@Jcmn68v6d=m>OmS5rhF)Q`E$BO?aMV2a7 zj$A}^8p|~qSR3W~q{5OMf{vEeJ;Pitna+Flq{LM}=znH;m6WLYR45P-Yt;X4mdF35 zGyh)>`oAns)bhgkq>kSH+Pn7Kn>qyD-xm=^6%rgtcMw5O$bbOjm5d+*g+RZPBp^=c z8954-5k0CX5bNPQ#+1@A;iExe)AmVpKeRQ3CI z|Kj=Hz7O374;zGp{9qs=K%S81YU?Q|wK*MZ>R9^7X*7rL!R_x7z?8-(Imllq>Yd(rf9yeP@ZA@h3uC^#;7`M;ReU~zfbefp2=g=ZDF@pyouS)uUQe4tM;O1cevJv96)+#dmK`5i5|lha z#7@#?umqws{bUY3F%BWou^MTiE~G9zP;}F`P`~cz97Pim+qPxDr~8wc!lSNWLj0mY zFv#~iLUbotFGYZ(ynu+2!yEPx4Vk+QV2)|M&|`PB9=YMAgpbTUnka?_2m2Vqzc|u? zOl(1tyOlFhth;fe?)&b4%8 zjwOdFU@_i!x(Iy>L7KzG`hz>)5Z4q7(`+tBhe=K#`NQ+hKd#jcPtmg{Lxj!P&#szLh!4 zAIYh>v+#Bk?F18@Tb?C7oslS!I>e2Y)~hQ#YwF>xj6q+WrJ<*Vm#6XbXYSfl0=6$`W~~eyJ9@0xBxx(AP1&)!sL9fB>LW^R zTLMzs5Idty;8iVRc9Ev7Sj%?M7D`txFBhEUAj4(~StlQPG4kTuY3nibzaX~~^YaD% zb#U+A2HdBOm@%<)X=_?5a717L4~p{ELT2`0#8>Dd-V^bvIQaXt z%<8LW_i57;C)~C1l3Li*g^vRVZU>5=)Zj4qb5ix34rX+Udb$|w$y|N(h>wH7RxgU5 z7;3&c0UY8&)v&4QQqip~GIew7W-WfLl@Vdcu)al5IC7FD+3Qr2gRM9GkG5gJGHw*M z09ZWB;kdE@V=fyL(nkji*Tm%jI1@9C8 zX6h{bMD|cIuO?Sii#avRzvKHHJrCy8%W5rI3v_zObu(ojRBz!7*cZX408hrtkUbRz zQ!;G=9Y8RaC+$^;z`Uh~#@qp#3cCT-aveuMy#Mbz4P>wGCo5Ca5?8!lu8i%f8jWa$hazP4S*N)3<3M*QjQy-E$rgB{fiDYfb6D*`Zy^8N9S5vpqgD43 z6ycqlKQ8~>U^WnIH!VMB$d3D;|9R*dpt+3(TX^gIcc<1iqc1RY=85n((~~$QLmqx* z3@}UkAhI{x()0fObncXNxyFrY;sN*IUWpLRwpej2vxjwgcQ%jV47-?aNTS=&#pn*LL&`+g!|Kxxd(%X5B{x8q>d|u`+rkHXG`Y@K8E$a`x7?iI3D9Ej@7FJ~m zXG+!3baqOJT%Egc#TL4zl%~>T)+6pMid-e3W`#JUtNnFIyU?vaS(4OhixaYNR-ANe zTI-2-@jc12;>uFgZt?^28E3%}J9jGol!Xv~*48h=9cY@b>1`zrP`I!VtC_FiXDM;t z{M}BH;+p)cd`&ezn@RN>4R?s^-mzjIzGkshK|!xa$MJXJd$~$9)9d~^7@YUgzPz|2 zl`#Kh^r>ef(v!9CAI9ovu`0St1q%4J)v$C&iV)<>y|~FOp@n6Bc;hTA#sL>?UDl?O z0yU>I_tPj(Zt=w=tMd!Yb=vG*ZnkM8#>ag{-+}|C^Qe1gm(5YJt?Alkp2OwMw|+c$ zGD!_(FC#K_16bPM`5VizHsa0Ra1#6 zn`^q?xk$~nV_f__BWApDq8SECI;XJQCVzW~k?>@qN>pfVv4}wK0ORnW#mlriPB=9;jL+KD=4x_=i zNM)SNeq}?Vn_{G-PKjao6lo@wCNBQNF8no7sX#yUbG76KuF;_riLrb+#;h;M%+3)8 z-=2?iZMf3@j(S%{lN)K4)HLTwXf)XLk`t@Z_cWPbJEHmGoTSyJV0&}(TpJF{qcMnq zAtUOP>6XL^PM#dT=FW(bH(h51dYA$O1CeH${u-k$_904r+-qfJZCQ0~jKs7zO?3qG zfu^>us26#`v8_&{RUY>{5%JesidKUEA1Xp}O?z73MkLD3>vmPiuHridsL$KG_;~*W zfA~+7chc7wz1^o6d}%xGtzc{44spVY7c}BWxz6G@o5JJ{89CeEj$*F9xp6RUi8>4L zw%|zC;7H{lWg=>Iy>m90d5aFYORUwdIh43dTxEyaowmN43FS{eH#vP)7p<&F8zb;t zbM(|V37WLXIF!}ItfngaUSdGcRNbfNA)9?XaK)?^0d9oyW4v}db_G&pvzrYY;4zH|NSJE~SP^CC}Tvr}jOHsbHEUq(L z@ErIkwM}Nh+N|5m^ryINjVdGb-iI?4wFNnP_IJ+m3MTtTn=jF{=QK8aTsIgLqnN(N z{4wz!x5PdvIqGPg^`!i~!r4h)gp}m14S!-6{rpkW9OqO@LMAaO73N<9l8w|lM+DDY z%SKuAzv9xk6Dy;vhcQxHzFgGzWADrK>)UFJM`wS}U#f5DJ!PUTCcinK$@Am_taKWw z%ZnR;o=rbzrl>-kw*=^Q@qAyv`p!o84hcQ1(^D5HN*+b8M%q8BFD&Qs({JN1OI#|I zF=`+Q`>e)a*ZvHy4`L~fJnZLRN$nLRXkx~NT4KrG?Hd5;%Y3%{k}PldbItc$RW=RM zd;_2MyI(ZY_W5RE&rNDVe_5`C`wt`aq`dh2nI6x3DhnZ8Duw<_vs~#)F*K)Jjx!dh zG)}XLRh}N&BMzmxS(#{&o2%wx!ye@ah0Qi5-VF-pM_QFYWML?3(CW-X|dfB z`V?7lDp#JC3A9Vxzh0syp&yE-u=*eM;dctbUqEhY{d~jYAl#h>b7ACN2KAtyx&d_% z`Nu)v5bn-{=4jhHkd!FP!;q9{Ig4hWs#rdVgM4}pj6rj*gS{}t4&)0spAi1 zJefxd9Dy8gl)(djFw^#o(Q!}7m=~#zUAocLdel_a=OhM=U4KhdJ-GVfb2VyXIAPzd~Ilv4W zVe!TTl(BTh9Y!GMC_Abf&!=5hM6F~O%*WOpg%feC} z5c~;BFlEF7@ze)f5$AO9@-hcopbTe-HHRO&L>d^V5C>i1Z8#xtJNovKw+9}_L>dw= zwjvm4{54%9JzbR|qF3rLEM{d}COU|mbTX;O@Rpv1DK~HT0i#^p($cJIOt1Z^j{#8_pct3MNHXKP35(& z02=G9cE3Jx%6sb|_u82BdU}tJ_}8g%KFDJ~J!3zc#J}FhTF|wGtG%Ss*aq{<=24-} z$Whrv${@)iGdaB($!b#99S&%53y&!tzn5K#oaj@JRS`K~=;dcj(Ngu~-x#p^Y_FRgkIin17xXXE*n`3)Y;Da(03jcuA_9`#ku1^#s^{XkZ75a>PD#>Fd0KH8j$PC zd7jyGe!MySz#EwY@aMoF9UQx;-biO<9|`o*U5@Eg* zvcJJ_z=%~mQ|jb9l=8O+SYALYp;OXyfcaTPNJ|Msln!1COV2GcsJ+m@1>+XDe$~m1 zt1}!%&Eq_#7hM6ElfcUkJ_04bZRAig)fd>du#yL@%a|?tN}G!67HF>pIMP6_1Il%n zV+WG7gFwmwzG5VlA@wedz+kWg+nxM3% z0x_%Azt*xc*oh4P2VdtDoC(lw>j@{eZQHhOV`AI3ZQJ(5Hon-l?PP+Jz4y5|r)t+< z)mMGj7jIX+tJZoJEN^rVaP7?~otuz&=h9Pt5l*S$kw$TL5m1tYc;?f=BWw1n28*`8 z^9r|Q0vJn^oLo{{#*m=Zt8AlpAC*J(H6kWt04QFnB+UvKk4TbO5mK{C2jWTcEK!>5 zm?J5|B>9`j));@577S}xqJH0~g+33EH#hI^Jffp7EG9_2u$ev-9&q(tyD^2C0ih4t z>VchpEK9q-ok5#dVC!LYN~I4VyuP$uO>ay+!dF)sd?eN4E!XKJSC-j&f;D4X525Jv)7i~c60~% zf*7J~R`_TuV}kQZu)8x+$pBhFo%s<;4MCO}H#lx!gx+%stXBEd;iAthw`OYjB3Rhn zhj@1p$$@AhN=2B10D~k~@;D`!4HJ1#?Fsl+ISeF_ups1>}&EYgKA0Zr1W8`65fe8>hgS#=zr>#r@ ztG(=`L_->52>0->gn=lGU4-9eNB|m5M`}(tM<9dR13}VO(^4g$ClHR0UyNmH=Y%-h zjjILO$5=pG*6do=Zd%qz#aO%qTN0trF(_@Tr;y6g+YAe*ASPn^R=(%!iID*h- zcIA=uT~hSu7VT*Up?a?}9MJT|&FgxvqSFI6BDN-$3kO;fqcmp9y1;o{O*;aOjf&#M4HhHC z^$s8X&4V0jv}KFXPS^ybqz?0V5)zU)t!bN57TM$yp%XS%$((<%oX44^#uyCNMIQdd zD~g3?;O|_qDC%S*uA@jW8Y0AwLV8o~kGhH1hh+m2<)wSyFt&P8Zc?%^=e2bBbfJG}QZrNR+@4fVF;rl+Cc>F7@TXWKE4&A{fGcIn|0T4Z z(}OU-X1@P|7#@qlu$kIMIMDpCKovc#G?dyA845;sVOROg5Tq zNZoi$PhxYC$KOK_-motHPQi)56)~ku9xWy~%23)Tj^r##HH{yRknj&?$0KNczAFs3 zg}kFGeErYCRMb}6j)`6r-b!6Q^HK%p0BR|JE4D(M3(w1SM32a*n!6YECHSBuk405d zIqt7pRt)%_zQbC0bwyu#9V0#MxSvK<#ztw^lhxRN-<+;Oarta%UF`7YxhO`#wgE&h z3c8it-V5JiS|=qBbXrQYq)gyaps0%NE<)Ktyth1qCeV#Lt1eq^HD-KhZI&(v{cJ+9 z7as(_fw}as7w7VOMcDCR-zl%-8sfIVUEF5*<=NptKOb~o0U&sNoX2Cyd-aHsG76VeUv*mGJYC_{SaMPsmI#qE) ze~`tPRT%O=pq~TD^H0kk&A^iNGGVy8DS@!`k{Y%3uw?dx|0dRPxlSkj+2Ml(SX*J` z!xuQJy_d}=7_>%a`v#WQ7$Tjy%ve8=1tI|mOj=(L`57bHn9c9U>0QBSb?ugu*urC6 z#R!&%)u*}UdSMGk1n=wCHcdJpptKNg>y~6pb3`^fSzA+U)r?18uI`+X^M=+qwxdJfG2|MxN3hnU|I#j6dmEa6(5t1(bwQbqI2c;J~q*{H2Yfw9$I9h6#YlIiIDzx1c zFR-@O#c6pNtxJ|$I$9D^o(O7vvq`a2^1-B}5T<`Z^jd;S0|s5OX}=7xT5=HqbHAY; zDe8D9Hh3zLM^WwPzR%p42vCfbga8b z9HY}jYws%4t-HudvFf4>4`>-D-L-WLy2zzsUPMFg*cC0jDezeN(Bv`m!iz@A_FG5H z_C*hz?i%k(+_zq(x-34FKbBs#yp6qyzihq9d)U87eOY?aaAWX|3-5|9Ewbag!6pa? z;s-;LGla&i4aVb737|vd<&blZXc8!-MbJ6*Urj!90jP|YDb)3`GqwpMCBvLO# zBR5Hvj`SRAc_=(5J4Nw5@=mGo(N4w>3iG@*v+4O!Ebj9R{Xf)T68OUlCJ+Wy0x@bo z41>?fm{jB)3a|+1l!%8VQs_s@Gxm{O+SBs~3=64{lT<^-lv502JCvqW85<;Mky9)~ z!Yh{wwQA5=&&3={Iy4t6YKTf*BRpNgG^%~+Qp>%Dlw5i>YT=@)77-hScxh|ZW+GG0 z!5m^dMXQzU!mXCH4P!j@t5x&DUCyQNke_PWKYX_~aI zSx<&>?JAed>XDzvS`J|z-5h_mNv>J!P~kH+h6ayq@50;Vd2H}d!~tS=(H^}Vrg%v7 z>F`nL0P-RHDg7vQz`;K2gU4ORTjOif8~I)8Td4bnw|bY6FU2-1KjKY>e#8r)ZaD0? zcUT-PJdRe`+mJL|zD91J`GV%I-R1A=gcRa;y=Ro4*5~kwTIOpStH#I11j3Fs@h~K{ zav-Z*A*-NNl|tt7(R}0a1Zb6xXHCewnd)ThGSk|)79ny?OJh`v;CoHpL{rU%vD<}f zhVfMrZ?!Pfnx{yv5pD`^&B49S1=!k#r|xsZcJhg4-nj2_T4V21OjqB6da1{}afewx z&M7A0Zx6aeVV<JGkhx2ag(qB?_nSoI9?ugzGrW$GC%zsPC-vFq9x`a2v6DnI(vF!YxSWFzx!GrL z64Un|!qd;*#C5kH6est?zkd&KI`TS?!g_BdzzG%I3Z`2gsLYsVGo3;iAE*hN zm9?NI<}A}e?817hDuwNN|8@hNcCnX7-jaWNFWx54-uef*=UjId1jIJ|xS)Z(l5_e9 z@qpCPy58N+Fm;wALwvbi!D2|vPn}6E37ln1J6?8H?K3+#;-NP9rz@fM0WxR%CRAJLB|-Kg|j6*R%^CK4|vLZ(Rr7jho-# z-!|z^bH}ZMN%l+Q&Ti8($63rfMQ0P)(<>LO#T=-OAA}t? zRbqu@LuLdEb<%r4CT3U(n~oOK{;|%$qT|y`vx3Y1`Xl*e)nR*%kUAu3{pRgBj)zj| zRCNJw_gWehD*`DxW%OgnXZst-O7=}XX!0?w=A3ub_N=xJdI^3{71(c|DU+_kF ztC)(vjP140ZO5W!)|0|C{SMAKs#?|U3FSSySk>RjS2WiI~I2MWce{pc3s;h>W&6uosWPG{u@xd!QT ziRS9&i98E}zOFA`j{&}}V-NSZ)U7uaa5vqG?09$#5THHbg|O@E4rX??%G-tkUODjX z3StXFuUpztMse;x@?1#2#k?E<(L$sDb|`=F`6ipqTV^2N^7WJ6hL&O>TR6UnbgMn= z)FA>31H-z|?~`}p?#s%>QUGgI>|MUf&qC&3c_Y6GF?zm=lK_d-e*;eRdl9nDE+khp zR+UDfy*qhdV1KfiE-u$`P>ki;w; z9&{Ug>tmFm42F{u5HBH6%$OC{w6DDvz&{FkNP(YMCC4b_8e}*M({VvNtgvMe&4E=f zC!tWMtj~rUXRU;{R`D???ZN`}QwiGRWd-F^9Ncwc#gQvV zG%tr{6pR@XVFmwPNrO}AXkUyKm0^ivT5-1?)hcMFA&TeJ`WzGET>m+Lw_v(yknaCt zzKKs5{$eT5Gq)LK+LeNaFQ|v|otqoB{66=(>sBZBXjO9Np8=B1?(=s7z~x+>S28G1Q}COimM9(hSLuW z7+3i%j!MeieK4Yo6DpSJ=|$5p#TcpXOOlM482L938?!vMDHl#}VktjUKS+f)vTrp1 zV&!7X{*Wz3v8ZJJ+_hWi>nY2pMXK->GAy~?u4YXO8Z5T;%sW5!i?l(H+~L2Rx^azv!vlRb7^T#PJ$w`Ue`Q2IbMFK?fAPJ{ zM}t#H$&(!PqAcQHK8AsQEP%XvBKJt6vwv2(u z*8sMQIA-hzRnE<+B4#h6v{+USC&Ucln{Q&b<+8iV>;d{)Fq-FT>G`%nl9tei3o-c` zHbG1+IXcg-ZBkdkDVFT*mw2ni&jYl{QdbJU|DYstnn2yxJSP&?tqT16myr` zf~~96AJ9J6yDYgL{W@1qFNNDvm&~rp9q>4} z&?%2KOyLTBI_IEMo!O^)!l_l9IRJn1+AP#tyv~z^I}3YykBcI~38ueQ{*d`T$T}KN*DZ z{#S#Lgn_NG_5Y2jMvcSz+%Ku?d)H5&?ygeuDbz3n$Lbf=eJRGCTmnea^|Z?e1J zE^hMte%mABLpiS<361D=Qaerawx=>uWo2ZArqGWg1PeU=n0Sq)LRH76sPhm$fNg3= zA@HGKSiEJdHn!t4G|y13x-L}CX4ap@T&RQ50&>ebLlMx-9X`Q6Vige(n1KcV<&sVU zH&}!8o+isyF_0nDN6S(oj896{THA86asJIn!JJGuav>HMHw-C^%}PDfLB)`r=D|Gr zzAR1!IjbPYsN}pPZ}ALJeHj z^sP$Xv6bh{XhsZ?FD{JTtR_Ux!V_j>{bXtGR^tn+xZVjh8;JGLj(nJ>rG58q|dEB!sftYnYCe;hnq6gEJAR+oFcLZ1 z=sHb7i*1s$(gKh;z=aq>uS_+voiJ%KoIeXTS&Q9E|0{Zx571atNFps#N@$LzSzmIL zlglENYU02Ul-a~qFzIH_#!W)rt!exd=`JPye1l6<(wsbNDPqMgomhJ_vkT$^9O z2?gkaeuQy+g5bW|M~O|tk0Dv3Q5);J3Z9N&`ZgKg9SLcAcgt(a^wrUnBJ@?>`}B$x z2KnYtbCkR~&y9KW5t~B;AU_dK9%HVH|1mb=_GnTu7z9d*GTol0gvqI$w zQ{Ou$6uV!97x_C;P<5zprMq-QaBYd?*cQsMD;7L!Kb(K>G9t?BCS~4yv#hHJ= z<-F#W^KljGbkuI9cmf(LpZxXUrV}{N6RM2ru81(QULkMT&>vTb6TG&@<{RK>;Oyp%N)hARR>=c@2Fyi8%}W0h zxB~zFZ*!w(3cRKLPlN#%@<07u|9iamfBr7V|4CogsNTqHt77=dZEU1E9+lw+0S6Wi z*l4zLp2QJSU_)sA6(t+l>7-5jtKm6*WfCY>CKnT-$bTx1w1w70@7rgbGJN)ds=H7A zjlm!FQ(#qVJs=@LVlm@2{V(^#C-=oi{_P>}rVfb1pEAI8W4!F@vMa?&zqQe#sWCp6 zVX}#IGC}}?1(0PnBkre*cdNR9La1zmRo(R|#xG2J!CkW;?xy|Lx;od*eO6F8iug>9D-OW?AJb5VeV3C(R9?X9WLR~%+Q)#sks*|li zffu~TSm{w>ca?cxnS2k2Gk!#1;ZExz3M-bu?yGW-%S}q6K=SfL z$uj7_D4QGn?Yc#_fr}0(qlOs67$IxYbT^Hl+&g;`_$CXWSj@wXF5VV|R-8R;snI#i z(#)mG$zIX{RL@vJs?Hd93aB|^oob>lt-%)w4Q;-tRr=HSKwzKq~P)I;{X>@?2D|hNz1lEg$+w!4TH^J@o?rC7GFsGyqlz(P!r@37jP|4xr8 zjuNW$t}f45DOf~r^kAhAI1Z19v{5Y%UMZ?6?_2n%*m3*B^-z%Th{`!UOBX%p#Ui_l z#lg)GJ1ZfsrEAaOHFaR4Qky_|6v|@@(aJ%<>me`?zYzZp5;Ore_HImp*1T|Z>X4C^ z@?u-y-oP0^#^6X(xx54Ys7Q=4aMUOGAvzUPJn{a^4Z}~afj?cwgSjs$nH?;5NwJB` zq`vO94uHv;4&F9#_Z=|gXFOnKJ}7@WJo5qg3PJHT5%oJDe*YxvCvLHg-j#F>|4yTr zMy82ZJYDboFDEwE_fx~4pU-OWKjy9ecPF-tfuV`@{|$<(duXVvpnh+$gMb|TB4#3j zGF4H*_}dmtLPx>?!^Fj`0N!9u#Ak}fz%@0P@jEhRgw1u*cAjj>bf>&Rc99}8-CWIP zfp%-b8j$O&+e4?{dkxF`(lg3HMC5mS-Tt!a{H_t!y9G1^j<6&eOc4pOC1xn~W1GongZ_3sd^xTn^N z!?H~_+AB|_c&<24{o;e3$266%wG;#w?x>xKU)ZhET~fTM(C(}(qTIDI`=f^(a9ln# zf8O$S9J{=ps+z0DN6%;+@rEW}KfZEYagt;jYHifGyrrRi@^GukW!jMrTU%pgp|x=i z>&~^=!k1STQn!wYbD$pN9Lq60&S3LVNKP=_;fazSCl9{-e&OuZiAg7!0X44(XtA}k z@(N?$wY_`g4N!|T6jq+>SWoo?Uf;F21KrGxcx>a0Jg9z1=GNNC;>G=1-^E(U^N3b( z2TJ*4gv@D(&V|m@G_8HZ$Qcs(AY`g#wPkhMWx7xzRCns#J+dy)J=2%W%)}(reD|6) zj-AIY0f+ud*VlOjb1YFUZ8G*0V}@T-;v#x<4VpkG0bZ1lj&yo+rLvFWrFx;b-*X6a zCV}0R{5>rw3t3@MQc*=+U=E|)x%kS)iD#?Wk=%v^Odm4uw2psNCFLpU=3Wy^|m z@f_UA8H)+b9kA6 zJe^+2$&E^S98C?$%9S6S6BL;W6xmutLTV#OD>qHMvRBhkh{%VHHaMaPig6E7pdDM$ zAR!U!X#^=->WsiBEk!>Dhh5v3;_WpVTp}&94oswx*+E-6WMdvgjcnBr8%P^A*n={X zF5QKrk%E^D%cc`4>g{~R=|azuKAWru%|(&#$U_Goy4WKKu0H;D`6$Og8XG@nF-#T= z3i3B+aIVXt`DtVTDuG%T?oLL*fFgqc#dFO%NToa)Hnz;`QQGyB) zS#vpPiqTGRF%ocEk~v6k7u&;1>McsTEeZQ*K+P_Y867))E-O07a8hWih&GZlUiay_ z&!j6H>Ve!}SJb&J#$@vnS&t%DYSe#1F|SCvT=TSApR{n#by)}{FJghAP9o7TRAuJG z&SyuO=^aoC5s{`p%qdbbzPDQcTfaH+^Jgw@x#o*TSh0Hw<$Y6D$E;Ag6OytDkP;OB z9T0}tN>|(Jm`tvtK~htOpXiW?@CbN7cIQ8}OynpIBNstekyvXK>W6fTcp6$~;ar^@ zQBw0+cDdBYsgD;|^Y0}$mSj4RIADw!S zMYxBEt#)2CYTc$o54}^U>+>0r`-lIBiY#IhCYSIs)0;tw|1_gnj8|gDDS~5?5324TW~qUGhc^^r_sKo`iwGQ*ABbYcJKk`9_rv(fXgF>{AEm zMouz{hY!+EJiQ~-&woR)!K>G5MueKwJ;`==a^}2#A3C~zBv@+YP@k)jE3cgZZ_HQ9 zx=$Y;sc5ap$cWtehf)}3k@}w{eRH1P!zS;SaOM7m&fN-I_ORRAsVuf{IKG2xk8FSz zVr1lW;3Oz@l=p#r6WPQsT`XP}PNNXbMY!$yipSX2@jHmL7|ASlv77p=bD_XA1S^H) zx=;y~ew;?T(k}-#(?3wwi{d?_*dE3S**Qcf@z(MQWr^8YMDoO(^rEiuJiMaQdBvoU zCXcdCxZ+m^4pYA$^U83mqkVZ)cZCyeI*`s`#%vd{!PN#ymQCAY3;P?b_#5R48+hHG z9A4!YR2KS+0oL=NoxmBFQZh!9%9qm&m+N!UGZ?PwSbe1l3A>saI>Pn$dy3xA3iHNX z>IOY1$_t65>kZosPjEmdI2%)ha>KU3n!A=Oqn$TAq2Mbt?|uM;<@kY-Cg<2A%?n2B z!^E>psfSWnS!(EZ`*Xo5ME%K9vl^fV9Rnm}as*dZ3~eYg))+9Q!`K!hDD5~*Ymno5 zE5!k}!bqC|N3x<^7bSF@!cgXDxn)sYt+?9}(QuF<^AJbdLKwBeC>qI4=s{t=N+i9F zxfr~`6_8?p&r@=HC?fS{Sf*mxi-9C&Ox?Ju+niD$F)v72d#S{$;v2*L>-|WfR8Pm~ zP^g$M_wk61aXhwm!pQ!Z5#&2;wE^M9C=#1L*86f3uHBw}2tgEylecB1bwng5Xa&K)QN0)LsBi92dm$vWYO>w?zn@P^0 z1f;qU`(&FmIuQG4#MpwHQq$?d%&nNy*2O;FHqYhEYjn!jfr_&@Ywc~0Z=9Em1FRzb zy#7SbZ5)0bwxk|k)|+(GY3~0lvY)v3UAcTZ$1ZWr7>7;dPd2-QqDm($n+CK0M#M#Q zJPfbo%5CbVk}X5e%NWKswGseUTSljFjx}Y+4GiXN3Da?oQS{W6TEM-)(Oi)@FF6mJ zl+#D$kn8eBRWHHbecq5V*CvN9)bs0^2lB9lv_$o`B(voX(vxysBHWo6-!l={=)85~ zDez_6qQp!i@3vk;ui5|MkK+VL>KI4)#ze&yNX7|gp5<4pTZ0k;5?t}OcLRw5IJMPs zD`6>1sYK$QZCrx}ba1z1H{{hQ1;OFZDTw zzhrHu{cgGb(QljO?bBO>ew*GPB9kVp@=1wCTMl(6lTq`^n~88ORD|+}{WCW%c-iN; zE>i^WMi0CF6~lBE;Vr!w!IC?oNHu#(c8PAXMRH0PF=r?kBIGS(u|`*Zi-5;g+b1jr z-LQH@&mSlrXAtg(C+gtgZ^(LB_lHH*0p<58k=hhjCE5Wj8LzP!$iRa5uLSNYmCb-ZiIKwjkc$$9 z$K=EKIWm!eHs#R_W{R^JlK{6vq}ZaE(h?6XaI#$7KoU_r(szP!@iS?nd-;cnZ%5N$ zKqRyML!o7>GKV9z&QISfe$X%vXj6oG=V!WPg>U92FElrP%Eh?fy8O3Zadv0F$LRq?{TBKmWrK0Qr}{4^@K=9cS!o8tHXHJ%!V~ z7v3d2Qp-}AQ;pu7V3f};c-Y4enKxisy9hK;VhjHH5W`e`kEcR*%KTf}6dG=ny(XLO zS4f+-t}-9=0n0fpbWj=1ybqCByU&s>IiHh7Uz#+zPKOaDoUd)IX-QX07!a2H3_oVo ziQ{7u;SdOV6h)nw&4in)A&QBvUQ}s5$PI)5$6O!i_>GRCh9D{@%B+P2!CkrUy5v-o3DLir&i;;i|`AYpw* z=TJmiuRundThN2`SxBpR+%FZ#5=Iw;wKx-JQB~@h3ZA*Yx3f1TybX@#V06h z2l>#%q#0U1vDKI83q5}>Uv=DhBx*s4emD+fiF2p}k)w5JcEM;fp=k0tw7$q%JYjF~ z`{PAFrA87CpwiA%$V*wBmI8SmX z7xH*-8T^BWNdi8L?tuK7zpZ)Gb*f(ewG74kHRR$%hrSO)*fU|d2|^_2)Wn#7g`2{> zb{?vrLxFa6%JPJtK;Xn@V6ll6S~)LlN@gNPo<@XDn>z#Sr$UGVK1cG-f#;}|9>7Xz zE;KwX|7eQOWZ3XVogliKnYzd`;jw|EK`cJOg(5#yxzKXrtXv$ta}Msc<(8kICNYTS z3q*y0i3s#DvJf#8IRT%H8_&%d5NGqK<=0a(bjE#HLCLgPy5{{xEq5FYu+&N?Q3~EF z4z=2%5YYvS3;>qd$4CE0xr?+fleKj&f^=zKD8q`_)1bkUKQHi9s`5q3 zlmw+kTZoD8%Pws&9VN<8LwBEztcgGF8G-EK%snTnHNOyr0!3q9E3Vkr&T2qa_^B z80l;Arh#)Z!pN~5q?C+`9EHq75tg=trql*!A|Bj#2*lg2aKbBd4%{@>5FuP=9IW5j z(k3?I%%qevD2bUIld)I*c|pX^0+(2A&N&<-H}yi`pQPDNijoVSaY73R{scN~(IR5Q z9Br;dr&nIL97-$+JTA$_%qY7VWO8b7&<$L|W(54;^jhyI5Bp-kiTxGm&s|I&cxNvY zd109R1>r%c2Bjh~wf)4%i|yty-F2k7rR8Yf&lk^yYsOH)93h@KMZ2Kxgpd_Vc{>_Z zGNTEV0KIJ6Djo)lw6%t!F+PVDI89*OL1LPv)Z~?J%-!vuz3nwRz>&qc0)aBhBv@Y9 zNGmz=9>$q|k>$81?U&YWNHCFF9*u655wb9aJ&Rm#_%DG$_!7E8CVaS1eUU3Q*9lX~s zT5dduu8b#YBxp#p%g8^8=|~JBv&W<}&D<+9hjjFDSXo*6Y~f&X`O^wyDTKS%5Z@CE z6^Xhuq32RtW8!~d&s+B;>Y*u8_}m^Wm)JS{bq6m=YX)1ZWGR-N}mm zn2xWokmZ}vk(g6Eq${S|lphy4v4YR>Y@C5|zy0;MvjG-fDJkuHll^5OBy7|QH!eL` zoM&}lv>PNY0Sjel@ckOMT$Ma|ETA4yrTwFL;wVqCJ-?xC+Dot|RLYd@mDg%;d^g|m zTcQ%tBSf3EwpjiEbL0)|jUR>WRlTTC5km_=?v}904;9eyAx5#is5j>8<)-$=2{vN( zgv~Qk?JF&_e0mY#Z-n}^t*pA0UrMz3S44jUEzas9MT@?o_RwTL;rvSIx*y4zgmgZP zYi1%_1+S|zcAJ0JltT32VuO{)7kJ6Ba@Op~tOp(6QEhnTPTcf1lnc%HV_a;FmEwRc z_n8eqll7?``KiBfc_b)OCZLhjEn_?bHI~0iVJe9`I{x>!GViz$M_;_gdDD%$~$ffq11Zr2qeZNltXo~_G1EtL4<0H#PVU97WD^`No) z>%5#F4uv4;j{IqNydC+Yb$%-p7OLZfL-T;3yn2(o_3$E~OXAjF8!k`eW!`_6xKm43 zIEH1w&RR{vNVT-#CW$e3W2p<3FVAhA0`AG9EY2`?rgMh^%V+yjoN@4^2LGszSxoWh zVH!vD%$243b}Np!)vzB9YfZrTT3oxy4thOHyEUI!*pf^?eNH&Pn**YXAusdhzyTa> zpQdBZTqDp|@~_@p{n(Ec^vwn_hI3-m+l|FyCCTcedNZ~ZRhw~RBQda-E<{*~!y8$p z(~(HB!1Ujl{)aQ$PFSogS_=_po7jNYA}F%nt;wVg4m+x4{_xfIJ2!&~HexSL)@Gmm4<#p{cFt#N$c}wH zjk_HGnI%YXj+)VUP59xx;rwHHm0Dd zRFk%-0mQC@`0;!2XFlp`A1PXQ;x8Nvo9)T&qA)d6Db$~oP?kBA*;Y+dvBAj)R}@w6p7Z)aam&57_NtR8eUhgcWzPic-#1sr z(8d{F(3Y2u{>@U{xQF;#a;va*2+P*J(fw~LKq|B4!1Q6VC?iJ#*?SX>Dc1BArzZU< zJ%|&IKh|MtI3iZKe#n919p|A@g1%LPepLQ0nB?;?xa5j53oq7a&k-k#Nn9HbP}jop z-A-eJeplkvRmm(1zS88HO&Yuq0yR3%qk592w)B?b0in9T(u`ht&J&iyRDNn{WB8J_ z6~NvJW|fmPaC13UhIoHQ;g+4TgzZjiTqlBfe+AY>kcU&j+0h2=P=C2>g^1`H!A}$-II6z zYEAgniW~;(;rcGCd?WRtY0B99IlO-MF!gX$(~WXuXC&I5Cej`UQ6iCCLo)H#2kFSh z@*AaL@@&^>u1Tz+yk4ZjMGdv&Hdiy=eax2ECT5_X&Y&Z^it z(HdRynGv;6 zp7h9MqFHp!PV%^wI?;}%JW9=vWl!*~L3TWls0~Vo zf|_DF$>_T=ayjc*@NPUfcRG>FTll%U8}7y5f&bN=YzwoVs*d(Tk;lBF ztZ#{3*F2GEX;%UiE!erxlA)!fs-znmg94t=ztu7GLh~4WnXaj; ztI1SWy1rr2*t-5JNALp^3-b??LOp3vcr~O4a@nET3?4tBpUm{@9E?V6VCOHKe*Kg* zkN(T?w+zh#Q?ponPHs0LHB&m)ep+W<;iR=9GTE^~5@vcYCe{-et$$f3(Rtatl>A=C z3ar>YkF;q_|>WucUJSN*X`U!4~ASw54u{wcHaY}J@ZKKAZ8Fd(5q9`6! zP}I^?2#jhM^#YI}O;b(ojxA;OK(XO#n?Lyq%Gg@8RP?=Y8(|vDp+w6Y6>s{wp}8C< zX{xHqQFm|A50&^&JzOvMl zO8T^0@}OL~3WJ1&R!3>BS-xuar${>TSHejJ!i^&&^0DbZgk-&7NtqgC<&(qSp79?m`F8Q&< z;h8+sTQMB64TS~4j&SwV3jy5VkEo}rbom3YQhAS{2~b;hj_bd$z*e!L?~n{4GfB@l ztq^G(wlPaFVz;`GC9lr{1s?!&%k{9Wm!Z1Lv6((psDk6Qny;9&icNwm5VP7I(>%<% zsGzLLJ*TV@K+;2Y zi9+imxZSBrLZ;PPDKe|oQ?qXGfk!pT>R2UpMTL0bfWjIai4x6JE#>I7vXa^giuP1I z;;879u>TN$OH)-(S#$McY3Ag87fE08Ndeqs+$^DnvkYq5i88~(Qca%mx-1z#1AlSAuv zlocq^=D6TwCc!cGqD?6Bv7jUd%Ahw`8i-u@Z*o}8P@#k9KE`|;Nr19Sa)^8R^J`_9 z%FNY{HJa7KmSeU(qe`eCmB(;UCdI1@yE;UH4L4HbnvD1J{bF9^#C%N$? z>Q=ba!D8;vD){D9zHCD)S5sqKDGcOK6@og;c$DFSxOL#4wv=Cv(3{7>aE|q=a$_m& z+_Q}wu!C6k>KN68L>OR26<1ox&?E|#qO3~F6q%`Aaw5*`o5-Qv%f#X-1n8b0RY_e{ zYBSV?5om_tf0|%RLg{SaSm3N({Gso_t>O@L`~eAJU~%e@jbir~bT1$N$YlL*$zBL+ z`j=y9tq?~D<*3%lq?!g{$YNs-t_5d-hCg=hd#S^Z=P_$DHr4EU?DL2^EPFIY8v(Mo zWME9dz@b7BD3%sD9b78q$^*y5M^hCMNba9q-YkSnhhy06sEMR~BR>ot$lvi6SvE=i zeF#4ibglL^e7E`J41pqwnkR2@03GK>u8_o>)7nToi{pTkeYmUVDkR9^yq>_7Ii+r~ zbi=>n2{$cAx$ypqB@y|jE`$qSvs;)XD~^FbfVCySw>ji@6@gcbz;g$3=;TpP1VPnE z1-XY}Ncz$;kwRCtMT7>fh!$Zr6)mS3_}sq0c-0NJLHLwUIFJ5ZwS%=XAwqPG;5Z@6 z1E-NO0!%3NZf=Qd>)q~XLZYsbQuv{PWIYU-iE+(-NU=}wsb!^J$1J|ZI>rAkoa0>i3? zoFa}pr~!R`DgyAGm|&uo5=&7}^{}sRFwVJ?Rqbx(t$WUQW(6OjMjAPmKB{Ao$;J(l;x!}QPx2YBC{ga~*V+Dl;6ph61Uit>mgGoy=mfdVamMlnjin z>dc~fVlkt#luX4NRnl-He?-X4esUJdjaRH@4{|N5hT7A)g4J5^Fs!DZ7Tb$x7L?kU zc!zulEU>(MT!aSvV0q>+@+QC*F~EwxAmFyI&i28$->o=JR}i$5PZ?mEcc01;*sDFw zSN!9>ykmrPOB3~Lb3vrRmNYsHrX+T1%CTvt!8!5X%rgO`U69NAh^2eU{2l_2LMX$?CWnjdsNr z5b4Q3)OjAdq6)YtZ$!(~bXxpS#m?p8$OL@~DLGusBcyHNfck!xWm(yBhDCi8Y~6%h zdmOEx*ev|lmQp*R^89fv*Q&Bo@UV>8Iaf+wq$aUps-ep=un{_Szx)7XDEOyf3}4j7 za#P*uTCKpD)s{QDTp$n^^!zO0NWHd5s=GwgX*!@_lqU@-lbvh)4~nr`Vhl57umDFH z;_N?zM(cGKlG$4FMfcn?rY5byHGhJX~Y`Bk<<`b1aZyqx_ePRCbq#7 zt&4{sNh$Y=bduj#GNBVCrXT}EWDsc1N`TQrKZxjfvUJ!O?sZ*|7}nE|#O%J*f}4#= zcmmBOQ6dk@m^Ys;8jRlo1{1DRg&nx;u|aDJat5`&nD&GH3;O zsEYJ?9#|mVBu!DBGe4hEWc-9*AwM3Z^b<w1*Gi-Et{&RFx$vSOPQI zGP|BLCp*}bgq%pq(?!6&DA>IVDtx6RN*BQ6JxxQKGVen!k7$P={yPFt%`o4l5RjH9 z7@Al#&AJ{x3=uJGj`IICdgYdL$Ln|*`T*pA%Pg#gu5s$6OiLiG%5N#D8jtw4ypo`y=sL2>h?U0#g<=%JY-_Y^6#g zUlk2eCsp>IV+*Wed3d`SK$hG_i^O)epVBA8ykQ&+cZ@qIBKjH)arqs76j!3|+k0&* z|6fcO3Z&tn4x@&u@7wUClgGx%<1SR;K*|(0HnwApY3?Lwz>uetBRTQNot0itE0w;) z$&IV6TtX5(S}TQkS49lN@R!T!_qrMYOykNJ5}JYBLN>w!WUAAq`@;#0_DM{AP{)liRHc3`;fPs zZ&0#5d!hgE;6J_gzAQ~j)g7Uf6;p`hPBnt+5`H!_k3Phicjoa=@-j(TRY3g_b!7VO zfDbNG3ya$>2YOOI5B!k{>xA7mtlm^O)mhPrE!pv|u;RY4G*!KZ;d|OIa8y<2N-QHB z#Mzv~Clo^InRLPV3(RoS;Tsdjupl$|k&uDy>`%R3$R4C#O+YzyIw2hs2rK*Bk6}=B z`FRw4Tr;nz>LFvO<$R*G(Hr;!mrp>HW3+(vJ3aoJQ`MDyA>9>dJuD}*Z5z$~s@#(r zI!YKWdaIBl;vNYI3a)NZLDbs*f=<8AUpX@!(=dUf#x~AhBs;nBw0EyI;QbvpE1nE`MN%9#F0> z4nN)i2X~bVUAwhAjKg(nWj{jru%PszZ z9?wb&y7|cXBt1m-OPJ6XG@G&YRGvSqOU(0{j1fudiyfba^|+Wjv?tojJ=4-mnb;-E zY^JFb=txeuSr9x<`s5{I@;0$*F(Gg;@tWPJLRll7G`z&H`xQ&|_te0Fl z%uDq0W>YkUz2$_xwFj#b!pmjjZ~gXL4_$eo`$;5tM-75A?~AdxuS2{?QxIm2|A&hb z*gJcMR^-3d_Q|a^{{pf9r=!Z^{wKef`^YzH!cU|(O~=}8Xq103VR$`O3S{S7+MPQ# zXi3E*i{Tf7R8;{70e2W9wEwM0TC#)4`sXB3{vDkARN(EsjD^|uJ?Z4`lO76{`&F`f zBq7u3+R|o%dChI+QNIXid=aW>6P#6$-KcWda9Z<&Id6ruh01S|ZIiT$RWV%OFu;$^H+x!;@Z#U2$ z@qI6qIWLypF_50F0@!9NCOvE}@KJA+?GDsg_v$k9z$cmD*8TOrp7KaH*?*Ttnu1h! z5V;~={t}E`Tk$5<^pHOy&F-u;!Do{nUs%s%8P@dW zE!Xue+xb7?Sfs!Q4(xLdd*WO5z&CBcw&iB5bQ4#*YOZ`pSnt?Wd80(5%`&bqk9o$n zoR4YF>Re=)^NvM5Id#?(#+?4@z&X6Q$BERTkis_-GOD7(Xedn9GyMSH;ok9#(0)+#C~!}+AI(0bv)N=*VWE6 zBx|2GZON}=!B*ZfQmGP0t>o4>F8!Aka2UYXR zB5{P?@8Qh_^N*PzYBm^$58CVwx*%O2gzr}){{W;h!Rvs|$9_+=HraCvKp%?hjkomh zR-Z@jf8tKbGwb6JoHnHWY4jSbKB)Nkq(8=aB7~t6=-i9QXgKIweCQ<#w+EkP!0Zze z!nu!6=A>Pp8ijKgOXM`nS~+ezl1=9HB8YoRB+g=LV28a$<TlJ#0aX2(-BxR6vB5 z0&dH2sxpBdal~mV>_wS$Dd?#YWWg~mJj1Pzw4~X@#q5rO*TAJ8L|xYM8xGD5FYN9= z?pi9d3maFN?v;G5M7r19@2yCcCH4me>z&tpbwm94`=0;bkvkI%!IbF9X&KK9(N%zYVgCdYA`l@Hp-Y0(z( zYzIHMxY9p`z}*~w7_8+`7G7g;f2rs0E5^&yHALs3nA(kzUPNmcTL+X<7wX^(-hQ_p zxeF@k4eNa5_RFzb-!W;-2Pbo)C)`~m(+^dzZrKXSB7u^0t7@6klEKYOjg3=(8N}vz ztKtrv+(|UGXVsqrn|l>AN$?^>%X9RGc0Sf7jIl?A=8~5)pr^pyY4uNnyXo%=^j;hx zLQNRqr7{%6~s+{{G{-Ip&;`3CI~zxbhsCHVC4?#M>L(q z1I5>F5?}xK3c}Z7<_SC?Sj|k_fYx~s%g+y!0OBwGGMFYmnDlhZi8_ibu^>{*zHf1S zh?~y8(S4>!)trU*#NMKw9pDYp4XEA?9$`k`{%Q=-ry|6Hs0sgK z4;|@vyoD0urN!8-VY}CnO^T{O2_aTVsmrI!fHh7I%pWa2-GuDavNShJ|%Au`x>& zKhzg#s95RIM<&beERL7PEFaMvWcGN)k&9-KPzgz5*w+`GVR-tY>js?53ysD>wzrs%2IbyR8#)gVhqH47BYXrR-E??7AcqEcNod2XgSG zA^B{c#uom<5y0!23&z%laMV*1dSdP(KPqKR%qMcG5N5k(7dGLJOV9)Vg4+jviUZ-t zzn@YX>r(+dF5$M1xu75KT|_9r?{*PZ2V8wpPgJlc@>0)5;c!KFh&@2M8o-eSV2Vr& zHYgJ|VWzNRly$2HeLQu(@yOm)TBlj4sYr<^|He=l{*_U8_I*yNHR*Xnj-|>rcnhUX zYc`Y`#B$^J4`9=d7dmze>R^q{U!U@bvKzCZ27K&TiaKj=aC6+)K1XeZZ^ho+m?HCB z?d>VD)h3=yC)V%lVs#DMCJb!RRJsxA$xrh1QNK{o zPdAOhpJ3BZ5S@{qKvl|NCwK6I7XIQ>C%I&pTDXI@?$%8#9F8dnildIcx&9jN5l%4Ljg|Il4 zzXwGrFnoGvW9C%AuRx{Ot`eo z9I5c+ih#Yb-xFens<`5_BFC})?qVy=e-%KQDu~+9zlas5zC)0d58aD()rL!_Tq(>K zMKk1tX6cm?;TDVIhqpi%&WE;~*@94Tj?IiRsGOSWWq?QNg3nSfzaVxcVbwq-{;(bG zN0dvh;09s}b=o16qC&1>NP~=0N3+%sa!C+%ZjN-{Z9|8WNNBMnTqGbcV&dUL!tX#E zzOHqM^1#`EB>Fr6E%dT;qDuibD;x_%D}mfaqAR6zFlc0e7o{Mj=TC=um!OnV^W9P& zDO9LzV0p8MG$2ha)if^4pk~nv46XsGS=l~eZ&68>M+LQF-9&Clz{FvT;VLlJ$fbAg zmrQPak1yKt4{Z2~Z!IEYj{_5q;$)tN2@nYaB<%&nK#kPW49C!gN|-`6tI`FSm}35d z2*L~}ozlyz{VgX`918g1KSWo|AdS&CvwGGxXpSmN8YZniH;?z6 zHmkknT9Kn==it+Pe!xyKZzcsGFRYMnoX{qppR)zB1)vI9t z#k!5y`G>o!_rd-PcY|FEuPqb%U_>G@GHAJku5$Ex5h^rgZ+BcW4GaE5=!4#PpxKcYaL>V<;!|DHnCGGZ*Wi5Q5%3^s zvEwXE>DbZl;pd>yVQ$Oy3T&OG3^%{4bPTcTyJ4{X)f~`)ie{@bP}{7rZ(a<`G*Ff0 z;B$BNaU(l%{rqYvsTWi^3%M*yNPqWT9-Ms-IJq7Qmx(o3PV8(wu%y!FEYu^p5oyrIfA zw!D?I5X+PpvxiZkU#+of{M1yv$+{Y1)QbZMD?XZi*Uu9t(}*2r2?S=pnM}aF4HLo0 z)&rw#onQ^Mj0*NL6-IbXg7tMXq|bBj1rTO<7gR+K!3n!I0h<5@-&m2!>ePsMdJZVd zQL5zZ9oxF29M{`QeJ>|!(v`3?w;Fin$qg_AosG>o0JaB`ZB&@W=-|)rC^|xwH>7TI zQGBg8tZ=fuLE=5{w1Thy$RuOC?i=E@f^R79B)tVRPMCVK{vPWQd^6y;P$wTGd9v83 z3LS$g;BQt}@jOyNm+M-G`sLOO8lH=@-1%C~C7TmIX4_f%rt|dr)k4gG7Ik9)C+2#1 zi2XX%FwW4V^i^SVJ{&;N)ad7D-y1v?+UOB^KQx@}H)uNRmt3%7p#(jT8v z;{u)?w|+<#e9e=Ejnx^gtr0dH4_B=|=28!ucU=v`QI+|zSc-0os)GRu5B_+FGm@bJ z6wWM1jLI<0AoWuEdy}isZ#XbkL!cZxQ4z|7U>>N0QoxQ4@J%6HApYfd1!ATRkZ@O zgXx$7rWtuu+5D?*6jrDLA%JYJTJ!m(2f3kchPGxyIo2m{+bezo+jN0YY8_&&hhJ*^ z@~Ro%^n;y>O#G*LQm(i|j>NS17qN#eeWpZW2j+tx$q z#|H-ndBTH*Ujm60dkWzOk(Pm;pjyV62u|8%pq1B7gcZwIhJ>m9%9*ZITOk^Gs^glF zjoQcCP{7P7$Zl*rODt*~F6buW$uS5r6|aA27spfBE;oa%2(*Th$-;u};uq!E`NyQ5 zk7xiBY@&fA~r8Fg4O4nhhiku-2~8o2-gKx8C=ErZ=RYY&dj}1xFh_avKMFYE1|4QX-&x@`tBw(8 z91l6TKt-#U4WhVUp;R{gb9xeLm!l0_u0*$5Hfq zZuF|E3*=WWtyr!5)^X~> zmr4`r|whOlD2-BEq#tTd8C6kb@PV_9L5V3`t;B7cX zlRd6OPGBJVgy77f1xh(bT#~LEpn8JUsOyD%sjwH!d57aE>;>;ye%WJu5_Z@7g3wdu4|Q9f{fGPn z{VMnc6mGPwTGxc6EwodT`&nCx<2qDm@ zljZ6JIudXG%7txWnU5-=?|ge__Xl>Vf9J2Xa0xRC<_GT|;Ntnk5_|txqM%-5?P+g; z)SX9|W!BVb+GW-S`zTWjyBf7gX?2-G=VUH2D0C?_G3}7OeOfBUe|)TM&0nq|)KZd7 ztXk{^Jq}QtRyPt1F%{w z)r&?3VJlE@RyBK~v}k&Z8~xl`g#gzWAe#yfjqvlMN_+XXLxdEzyw7wW=pPV(c)+vu zaf-@&)%AHkMCni7?U_DQzxODmt;8P3d33S11V|8DBz7Szl?mZ3MS1E&!??P#~>gp#oNa zAc}9wuBZ?FOK)~bgkb+HY+1Qc$+}hbWZCGA%=F2REe?3e6OIXDhj`%!dCd;=#Dy4c zaE#A88Ahgp>rKENTy6$6afKi}Q`Nu{{UCR(on5GUa=58*%pLY(cL5fROX{X9VeQjCRwYzQt4Bx7m? zM(jNnPOK2X^kQ^guFUUJgB5PU8edHn2ynHW6U-Ug2_NH&d8*$94H_1}`xuJU{ItxQ z;xlRxWm^xzJ;JnM;ckeASU089lRIg+Ear$aC3nj%?NwZ1XXCD0RzS8=7btWkbsjV~ zzw*Q=+y)?Sxd{4e-E>82zIoEJ{_J7f>Z6x+#b0;yxTu|@{jfFL;EEVyGlUnloVNXO)GmVc6gg6!x~r(urX_)=YK7Sn@fok->*{0 zhRp18_SKlYS(y;fl3sk&wCz0W?Q)~cKrOrkm+^#Gyl}KE-vQ zT-??^gLxPC*mB>)&MiYJ;|4$!Y)kI?*ce|LZ~FYKx$g3#t=d5t<3ryoi`JGr+SfmG z8S*gV%_7s#$PM06I=AcuGFM!>t6!E-R4H^sG|7c+3QpFjXJ=fyKDOr9PFAZbks$23 zI+x*5<5D!o6^~?id3Y84L@A^~^MuDkZO25)-|d#cN z!CgTYU(go>J);&|i7USPz!o*zBLM3cg1y2q-U26E{0LT^VQ5M9ZJ=lG7u3z=h;&@o zYSgw>`1QzsU34g)%1ySQv;r^dg=MNDa|U+MISpfj#kzyv0OTM@N4q_OOBs~7^gzf0 z?sP!%x{-NaOFeF-oG*|VL|k)^vG=Hx)C!!!z~l%g*9ryJ3O3dX({Dp`nlEhb$lB-H zT$Q%v11)KEXW4$%w%9epw4(TFNCC@N1>vP;magLM%ZPB8>=j3kEjRIS1w@n`>;#kP z5ZbW!fTnjM6FsKRX~BoJaz(Oihg?Jj!ti2&on7a*oRZ4lzUuAxQS`zJT^Q$Mm4VS9(BMdgV3{#d z7L8dtzN=Z_mt7rbE+QYOpx}8w;_le%CP1s@P3Gb{J@1n!IHK>C89IY3tQBh+6IId-q4 z$FNvpa`j|F~hL`ICS3(Ta&@X zaNta%8YRKh8RIo@z-Hmw)*%^OM=`nQO7RwW*7ty(UJLw2X(x=hDvPrZ1o=A=6LFh`|1#|1186-5Hq7FKz?%>2 zr}Lq>oDUmF@+9jnHy`+Tmw1(#4W)l#yzR*a=07vu3+9Urx#*zLnl+Yu7g-VI&vRKM zZ_4%LyC{-3#C(Tg)5x1(deXG%_C?GpMX%?4$5AUpZ-nXz*RAqer}bpiu9`L0zU$D< zn|(etz0AKh7Jqbl4%HjjnWtV)^JM)H)F1X;OlwT)?CyL;Yge?I$*-$O5at?q+oI;_ zamE-rfx_emdcQRyJQx#%$|I*)L!L`Zi;o*FButnUhdlDvtq-I}xY`Lj>t5nQ`%N`- zXTz{Tm0}~rTo;r;Mu)&TwW&jyXUtLUH{BwqgVur8)1zxa{Mg(#jrj^Fl|?^1ILHvj%U%@Re#B4(y zEsZ)QL$cA~y0d4glag+GYlGvOgLV>d!0p}VpDzUH>SO7xZ5Y_UY2n!H)oRzp6{=30 z>qoB;Mn+~6?)`_80y8GlNNq8xB{4*!pV^Mcr6d;1o$?ATy}Iy~0n%cyjDQvL-p!&= zW9?0a7!|t0%13fEOn91kjX#rvhXHIZW&bZtv+Bbi>!d`oKxow_v=p+zNp(Yj750rc zu2f$R4ta(+L}DMU-?>}^BND-D=GV|jP{1Svfx2)lWZ@je9Ks^VF`=OZjS~+ArMCQV zH|e9#>@;;0ylx$U2^MVUXaIq1cCRx5!Q?Zzrg?1r>MUa-01P&>i`7 zbI$!w*oWM^@+Q#!$@3KzHlpvbM`k(C2=0sE&zU@P+c%8#9DI}O`$KioH<9$RzLC$9 z`WnF(1a@WLK;skpy2uyr;l#^2%@?p*x!*Y2DSiXfm+|40{MyWW*(KZ8pH{IScI`61 zft3sW#+i5Zz!xk6sKGs<0rSU<0nor%lx7feNIvC+vcfkg!Fl#Vk3NCq8vlwfudnzwlUjy zoH3kwxm!$gyLQoA%+5cb=S_OPOMTFeZwZULa*bSn`(2CcMNQ(g7}DqkCeCgyH6Jd;Z{r78$CxAJuVZ(*L=Px2K6}dUm3x805JuBS#@YGns1%9u_W^)!g@ubZ8c+c^1*&EwUzykopAi zEQj1LAeEZ~PFoFuH#e|q4$>foOpJz`TRw%@j***NJ;ga3dTJWjiqkg^=*auYL_0Qv zie$(q?^7F%esyW3nEqXdd@pPiPH>9R-J91gU_bI(H`4F#LHw(_gmJOc`aqjAf zeJJXHd4%|yhuyf6sp<${gpZ3^BM@iD{WkP?MpJ>?1 z|Co%asu@ZLf<|T5ET;pvM@4U=8L{CoLv9llLBykt+_xzL;G;orGZdlpF->oC6yf?X z(1+^8NEc(MRyNAbAjM#<&1sBq>uJ_CCZ%GeHDeqxwQY3%s$os7&x}I3W;v2-8&{jS zVXHU9j4odS9$-DkJK}l`w&&t9*qP8{us5hjXRTk1(q7UYust_8a(`@TjPx?8HRVNf zuldEWZZTMS1UICPVO_YB8~5P)y#jk`}ZW+z&)(< zImW02NIg^Z$x396ToU?npE0*b+aj+@xCGzLw1QLu-SOA@yWUxN+aj@3M}|uwM@V>G z9l6!2#M8vFeXI?xg_d{VgZ`e~);4WLIXFnei4k2J>xGrLTmwo%%?Gq4S(1Jwt6KeR z2!3Wj_sa11tS$S$F`P?fubE)Ia*4O`OF;@c(r#e!iLf-tjHPAy8l%Xhw4;$)NuCDF zSf1AZF0wi%M)h=_j44LtI#J2B)1=no_@tZ@7pN0k}Y-MLPqLcP7d-fS{Cc7b{W z?3)MKM%t~QHf`ik3#08j;5+9@IW#&E6Stf>4cEmiWXoiFEqYOH1wHiI0a=a&`))5< zBt^u066or&O)wzsgqxi%gvJN%wJ=I~($810Idn<%dl5Z5U1|u1R1Rs0-q~ttBhfW| zLN@1ho%Jo9_K@|Wi4RCvRt*KmTPM97;O=!$1osZzP5F2OM{;@rTy$%80WxWya zdtpyrA?TMXbh)lzPMX~*mXKMwK|%52Uv=pcRRVu1W~r5FZZI*C?v$V}J-jD_r0J>5 zzJT1~$iYriFw9suWH1U=GB4<C#aX1@zGJ zdW!-DEk^XhMcPf3h;gqIg%lDeTT*I2GF_sX{|v|Z6}{=PfVdpxzzcR~@3D4ey0umr zU!Ai9EjPHjMC*R3<#Ubd&&c5cq3eF~V9xAX{`)~PjZxDyX8Gx-3c{anDRlI024Y>1 zd-T&uBo|`)M4oqk^Q1H?d^2B(9;Ti;*$ciCG{g&@L2o?N{45X(Qdd63X7N>tzURzj zxtssQp4`6)YS||^dH8Y>5D&$_JmL>G6E|%#P9E76Z(LWbS{MH|`>>(-Bv}=Cd4GN@ z0N}nl*EeBx*|Ai|=Lbnp`TcBvUHc`#n2$8NplmbI3`sZxw@~srCek$B+{`6Z&$J1K zZD~IVGj8O}^*?5>ey1k#EyL$f*<%lfzIAlhtgnChoB+A(;DcGm2CqvvP~tO&w@n$4 zF=l9sGYK?X(`NI>7%QXGhPcxW)m0f|ommH(xtsyAOyo^xW_;94W%W>hu(}4j{cD)Y znplrTZGt$nmNP;w4V%JQ)AgR)HT&VxM=s+z<8T?J*Eh{DKgJ&$W~Y&!Q#1#B8R*UW zvQl)It2!2|_iAQDyOTPC%QB_k03MXGmQ#hnzq1Zq`NoYpDRyLaa0m$Bx%;#mu_E;& zHv-?y<3*ao^C0k#h>pS%!=Fg+sZ%F}LqhHwmY`OM;zmD0ZtZ*nrNRx*+TF{9;QS9Y z=hQ1@kn&TS4(&9AYWhqva+-o$pPnxX{jNp1UpjKQtxoh>{@AZ~__*4YB&)o!Ci_)*=TKL7WRK4!!{|mrwP`~m0bypoGDL#}s2Vo(^%1ctAZ`{DktrY| z>j>K1kfEAA^z;?Z>+hNSRt16x=odWxB?4q(>FfpKkF>q00GyXF7tsR#h{ckAtWAYK zGk-tGjMTkn^~jx4dZFT3_G=fZ&v1&wm|+-ZaXy_FNi@QrffwN%aAhP;nd2!G`zu_3 zm4+4H*jNS9bUN7;S*s$XD;l%R$~N~DBe1$Au*%kGUHzX|R*!@x zG60N00$!6Fw|Y0Lph9ho;;`!1{#pVUS3c(Ndc*X?yzV7`kTO+GBpo5g4$%p9mcE3A zOuJ&|S%wLf0%5Ao$a|OiadDH@=AA4E9~w8wQ0deM-=K$Fv3ui9jEO|GP2)yeiaYxj zQLw9AEQFuL%jb2IwppSsA2#QRgs<>7(PxD(EWu|F6|4&Gnp$pP-H3;;P9VUI4K|%u zca4zDeXAveRRQe>Eer!a_@NnWgG>)%9fKVp<{9k)>m9I%X;lTg+EA$nISs=dQ0f`2 z_5mGe_AEEy8V~p_TRZ=mW3l$xEfDyOEW@x4)aUeU(X;7wjG{P z(ag!Z$^+;U9eo4P1FD)Go?*(dwN1nuSl5bo(zZeGj2-)~;l;Gude3p+8rlPJ>-J~- z6&t^h>;t6N!e=zEweR0vhM$>U);^QIO!4*I<5eB@zCGF*dynWhTzhNZS+7j~+lT3& z+ks=oT+{FIu}uCu=$X7XoCl6NhM%dbY5muuz3HB`VSJwyn?{<$w ze3qYM`^=b6UGM{ZZjPVHg4+YQl%J!yWQVX?5l$J>zk3QB+=I!vv1n5KVagMZ%_{de zh*G^nCDX1VR3Wd~7vXA?IE<45Q)au+uDYmg7M~LEujbrm_9)IWMJk}G6 zyN@mEyl+nPKLcm^adK3{ilR1l5^Bk~JF9qcc$Z+r>0QE$+A^OCG;SV$)FbIb2!*t@sh9IEnH#{mm{g}|Ffpgz4JsIx?9%;O zv?r+Yqq3cAG)E}$tr>=v-BJ5ekVJ^xmT?J6q_}iVTTrt!*F5%EX*){TC%y+O33_eE z$(y11GsePzi|#+g$os2iY@AMsg_5seGK`K;{DLYX{N=zY9 zbV5R^krGpM!a{0PAyIfjLuynpQCbp@^nfzsA6LYg$f|yNF8zIyh;QTgNtAP>L|LRW z7VRZa!{`~vBxdq-u?bqhi;xsup$S`zo2cU&VW5qDlA zv`A}mqbbovkxj?An!>YDLQSzv%DCEMGHWtTu?Zq+RD~xbsWfFraN_SHI?*&)M=poh zkh9D=#JfYNTZd9#jVf8MM9H&dT%Y)UHBxLf;?YylH%P)BrG-7>(M-`RilR^T&>s11 zQ?yg6xC*i{!6B@?SGm}2qLB4ok$Q3^(m6-s2JMWONSA&A)~N>j=s@pnjRZgI$T-T| zCME9Uut<)^FiYd|i|EH6!@f%bALUHA@oZ@S2=T2^xcd@?*EQl%G2t4`vPS}2q*J`u zOY}Wm0w4X1kMRVV=%-lXCF;>%A`j_IFoBQu*i)={HZt)<#8V>qmrwOjcnE4bSTrrY z5D6-0A@NA5E6R`ysZI)Gg!--wfz$j0{2z9diXi)HB@Y0=j?e$+ef~e#QT8S-Hm3iJ z9hIV{tBkDvOW6kk85A0X($K9GYzT!AvF<~uA~K4B1re=B`Dl=gH6&b*>B?Fn?Uu+#`G zN!>N@=h~bnYbi&`X=hSB(tE@KX0=h+0A?Pxr;ZY^F0Ns?DvAEr^2qS!G?K$Qy z`7JYmW0R^|fic2U_KpK@3^SxsY`R#HJe7KN?;YKUdNFF94AP@>Y?f=t)Qj#)pA^Sc zLlEl-m~DVzd9+|R0$IQ@F?Xf469*G?ho(p&vEMw$TV#MTCXzTC8LrJi_V8tf!Pq^L ze^rA;I_>DPSVHF!w&rxf)5dv_e2kaeVH?hJF+yfi=SX_*Ars~ZhmP_nbVf)|M|CRP z=dNpZN_}+=5xN89R_CNBoW_h(*1yrF=cmeZ;xd#?dmdY4AP0in&l;M$(3(tEu3Ca> z*;RhR_5`HvFx2FoN2r3_uQA|fbJcm}n_{*6MJ9N@ne%8W4VM*Y;X{xq$I_~Z^g-b20)K&Trdp-`dNei zL=JRn*JX8z=|{J%vO0W$^u`}+9EW7V3eIgbx8vS){$r*mC>ZSqaKZA_lnM}644Xu+ zBBg1!yV69YY_&0YHiEj%Hrz(9qEtFztEqr!<33Tj#_2mlaUZKVSd=1gl7GtL=@QOY zq+Os6qNs&}{pb>*ChmW&AY`n~aa+W)Ud&80a$+%*EuNq^l#4Y0iExKhFB1s+Y9J3m zBjHBY5E7L6E(L4O`lJAxlAlkFAAxJsL&Ah-HpX^*S)-2JPf**u*TPM%w`ZSGp;GdQCRcD1wk zwzl}CoXf|PjBJiwE~$0gSOw!JM9(p&@}MUN;iKf@g`y91vbur#WnPR!QK!ry1e<+> zuq-R#2Dwpo{F-n4fsaYHDlWUkIqKI$c*qyQ!=yPjV|Gao@gaR4T1VnK*kol;rIMdK z9W7<5MA8(wNdmoJkiO&UjnDGnBkLU(ev{?%daQk0voAc+wvu~OWY%jRal^gu_1X89 zu0cuv+s)3{$cJBfRW8AQs7R&Rz>j#|CEUC|u>T{ZM)D|x+kw#~Kcx+P^;m6&VaW&g zwz%sQzm7V~M?FfKe*MC~lNj8u9I_~V&7t_tf6412BidI6QAorNU7`LefIL8up}hJ; z|4;QR-n=@f`UUpmLH&Q#@Bbh_{CD;HpX7%UHA@F=b=2?Lmi52VG~B94SD+UPLL#X0L|3f37=DvTw24`(NwXQo($LQ` z6O?Ns-`(&Xz5h~miN#RMXXQ6L?p-A)fm})o(?lms zK^8+ktCQ{!lU>NtC{-Mx?SqH|tdKA~r*UY&ajf^KQ%= z8d9c8?#e+c(z<9`EG)>P+08AuLQ^*{SsSLL?ntCx7Z0UYS1DSY>O9?_2m{X9+Frq zPlqzOc$5P+ZL~P51G;q^>ki@M6GwqAB%`hM0ilQ5W&b{xhooT5Q+(7|c(&=v`H1XN z1hi6pVV3jxk3uDTcBAbS2<{AQ>KXE$?<}{C6RelH?FTgHF7t8vm0G_qrC5bt{yQI z|I1u0t{CM-X@0d8%x$^no6(31f9q-4WT@JJc5__*nCo_8#W;qorLvdG6qYB1biLu6 zGz_t5PH>{dQJ3+;P-Ln^$B)T6_M+*I5+}(JH#OFHD7D)YB6MEeKiYcm0aiPE0{;&Z z^GT9yGEFDcfcY_)H$SM-uJmelYL5kmi_@!|h1CvydbQG$Ay3X!TnSYC+^or$L!YYn z3RrGMjGY|i(7ZVz4j*4zO7^}&)w)MBt-M8SLG8?&D`m&vv3wpoX2<05NUU$*l(DFe z;a#ezj>)rWf%dy*6w4Pjx8RWp2*R<4d1vXK$S&HA#=xiB6cBsg8oizp51P(E`t!kp z|Cy+j&LE54^}}co-OHQtEXs$z{1g3^zu>-XEy{5v$IKRUzcu27&Sn(E&rZe&9jDiu z+!3VA0ld*Y6^|E-bSD_sKhcCeB+*r92bl7z5Jh3ZPD*gYe)Jc;+BQcdFM4n%t+DHf z`9Z(;4+zZ1=4p~|rgyeHoBGj%-W(;2Vb}tw%~R2yT~Qmec@~6kus@BF35(wq(s%$F z8sp&)jq4}lj$B-<~ zx})!$-lJvm?iTCJHvQciIA0J3dt`j@_q$hk-30_AE=cf_^U{Lkj>iXa`s!yQH~$=d1|2R4uLZs&NS`<&z$d|;Of-(g4;|p8 z!QHQnFC4~9dmSCio|LH2vCEU?Djk_srDkb#Z;6O@L|ml(XO~UG?&i`V-?&w#alJ_M zf|=?SV9Gn?L|?xZ_q)FFkD#QMC3ch@y(Ve05>Cx($==2$>gD4P(a{M#1dsT_h;))p znjVuN#?pyBT=48{RcW`lmOMx?VMv4<6lmNLvAK*3hGQMp|BJDAj_xID+680Vc1~>j zL?^aw+qP}%#MX&z+qV6SolM^O?pky2%$;whSMTm!jla5fb?vJC)KkoL`92x4+LK1! zuMuItEVf#4=bZ?35&__{9w~h}4fGr2?+N64VdDBmWqvK?jLdVu5ID)Yh4qE@9jp02j^Bo!cB-rUE1n7JuBzz5S2^^?&QVYFAl?~{LqWPiH+`7$0`i0B9l$ww9 zELyLem8XnJoLO`u1*+=~KrU61rc05)e?tDp3y>RFhw5DTT`e>4m$MqVqXn86*)jn%-LAq+(_yXSQ?qM=d+?PhfOtuZdl^ z)UbI&a-Ial8njWP!3!^qK{WK^WiS)_q7%v==0zveKRRV6hNYSmAHDvkH4 z2N`W(F$X+KP71VV>*E2MYA5nGYu?Pm_{#$FRgX`#=a&+@O}pIXhpACim6^>jqBJI+ zs@#31%)^izccvD|Z`G_SStx({S0`4cK3=xFbNkxoR#-ITo*%1%qq9%fsk|xspxUNG# zNX)RLaSWXiT|?q&ZuU0t31TAsLlX*bdE57N7Y#)sX5seO#`KQpQd;= zDODtoA==ELWfZqo?(j`Jlu8Ekex}dkqmnCkRVJg77acKd!8}tbXvAXP)jznTwVI7X zie_MO`=XsyM?K#nq~SZxKh@hD@zvef0x=u29SJ%aFNy59&-BS_5qL&B&hAF742wHN zH-C{@t22BnSXVz+(A8u$NW*QZ*Jh4qGEW+fihC|ePqT5U8_|*gDl9OSXcscjPErwO zPL;6okulbv!@OK!udv}~F>qiv2DO%A|6>eIAB{`p1apgIz!0=}9x z8Z{j-4VKWm{$&hY1gA2zCc~(Xo2HGeRTwo5MJEZhByb5&V!I9+ZnvW|nN?&sBILSc zRA}^ON=DJ8xWJM~n&$4TW3E0y$Jtqz#*%c{l{I&=`_p9}%4$}Vxb+!DKzs-emiK}N zMUnc*(DO70fmX&a?Onf4Ef9N9WIm}8>Yq(^SJcNGgO z?jJgUOk6<9?kKNDOXbmfPD3ky#%8h19sVS5IBT9wlI_4CG(SjN&O*x;> z2%6C7GsN^m2oMaei-$}PQ9?SXD4vOIn3_a~Y#2bMgq#{s3`K+08mOhe7McP|wJ?j| z)J}Vh5Zk0();#gT04=eJFi1)?2k9U`8*tD^O6i6eU{*erj8Qq;oRQkcpR8u{mutif zq$^nq!5Q;CZJe$ut={ji)g|HSz^cIP#F~I!>cE%%6QVQ6JESN0JLDVGliUXSGT+Yw zx<6M5-yz%cKBF-5vW6v}5FZkLE(8CZiQaiq@ic^gB!eG``ll(}b!$XinS1Ez5gu$D zG4d_9MDfR}d2{(6cnpQv2SzuN7P=QiH`dDaKdjp(aKzBWQZyKK)XEu*{LUM9-AIUW z!4X)bO~|Ha9=L-`m_N_h)NPo$qfOYiZx(mpgk(P8`5Cu3JaR>xReL5UJlPD2c*~K= z!}Rgn#;FWwR0%p{ii#IQc;3_u7ndm8H?#Aj#lWLbdc#=)olYJ=jwdj_d(lnlk0}NC zYZPh`(-J7{n$DH0E3cZD7e0jbcDqJwN=OjcHh~cLdrI-w#^c@9C)Ra%o2zEt7dT1GY!mTEZvPX`Z)46O7Cr`=0f(Gf0En)> zpqiYU-ZRq5W5-Q^GUFL6xLD$>9?B1xpAFOOa&ha5?@LZ){`bth)4?gWXLn!gd}{?k z(`yv`@KU#DTc7$#r5bMZW4C8l-|B892X1ssw`WJ+>S3iD(^L6ZcVGT=cRq|;Oh82U z5P(es?;7{~x>}_jvLo$jak9JnD}OrQmN#H2x1WNvgmq<0%_-C?Z5g6d_T_Jg;y2*8 z8r{N{lJFT}92Ma;U9opBfw&5jE3;vSP0>G^l%TMBVdY6G_8~g)(s3Wka;>Bur3(XJz z1Ab@+%unj}dY2FF2KuXc|Hl7Wv6G`!;nvwcY2Oq8H^kD{fHng$qri*FD|gGk`x=Sq z|E$tU=b*Srtf8-;e#>oJ6|s-g|VpsEd)hUT^VZy%;r z#(}3*_!b2a5Qzvvh<6}ts2y{gUw?ZVeLIikMx_f;GxSoo`lem;VsGEFkG@Na_U!M4 z{v7a*UJFK8?heFT<_^V+cc2m(2SQl=f(59rzvY*`z1>Ac%Y)*VxU~d~)xUCy-j?q! zpyeU(3*Tn!HW*&)U9Ng^iru#D-W%T5L=1WP(r!1^yEB9jc^S}dN1^G#b&D|NiKPwi zchO?p9M!wmh7U!t0`bKp`D?F$uRQj-2-|OAX>)+DSQ$EK<3=5{tK+SV?6(qQjKtJC zbWpL|a*PaLrLMMa;o!Dx7~Xq(S?Au!dsDp754i<#&~!s~ir#kZz8k)O`3@oa6Dofo zy%-F|`4g&qV7)jD$@%9gf1tfs49)oG#T=!a7f~jAEo?YagL6DMDic; z=kCgn`1m7|rmAiK+-*VYf%g@=?bv-r>p}ebeH*qLhvoVoi&x^zR=T_Zbk!hSs@dzs)k5nZV`TyB1 zyh5`>%q?)Ic846|2`r`BhORni6vF>gfk1kRspx`b&=X%V0QDZCjQ?kqq3E2k-yw%DyV0A%Pr@L8!-z&+m( zb+dpIT$ZM}7kj9Bx5^OL+kanlH+!FVRpBxaHWD)^DpE>c&H-CnVH3huZ4+3x;N>?! zFcJ}>k~_LWU&T#>3o3D5Vnx=e zbzMag)N8JCj)+xn1${x2z6#`Ah`!1*(m8>m3!)B;stbY+f~pI=4s^HV?b5DG94v9xl@d~3b9l6x(b0)1VB#Y4aiQlJIf2fkXG+A zbvI(wIlNGpV8u4kB5u+D()A4VGbx}rqf+M&EROPH>(9fEfIJJKdzd$G>ty~zUmNpN z_eP?vQ-vbU?bqA#Gdu1F?(WEo8)lH-nkZLPdA2`Qb|BRzwo(gjiLIP8_8f&{Ys@R& zeKkfh8=zqF)j%EyYhL$Eq&Tb}#3Kt9nml2itgK-TsrmRZE))nq0fYM_HzLdMtvGNkv87ARH< z!uMK33CN)OHdrr;s1EGn%%Mop`+<;z1Q3x4-rP@*h^2#OfdX!2r$TVj6ec>BhS{0u zccTQW7TYM9(~Wnldd)c`-a$(_?K5R38$RPe z1~TYXL=pzTW4nI%frr-)rL=z>-SHa^wxU}Mm;Jp1(t-0_1CK?%8ze0crPP+Y$E%Q$ zSZA5$wKDwQPdr-0GlsGQ=-M%cNOQo74s?-5So=%x0m&OpY+w+T^BqgqV^kFY0cR8I zur!Z}e|$l5diu>$Tew?{JQ57M1?s&v%{P3&)g%`tE6&M-A}z}ij4tJWGKWHcD?)30 zjwiei@t%50aGvMx*-U>T7Q5AtN~Vd2iP=xw!P+JRrDtAHYPJ)0+YltA;#hleFT^|# zxw&*!w$4O5KSD;!?w^s~izgXs)=?{117vRcMjn7BH_*pp&}F3P*_);2b=Z=VA_{ca zEv-VW?r!v1H0x;3rdOgpb>J}0gaYspdwt34kfx;8__AkF zYgrv6|HRZA`_p@_nv#=IW3uR6{_nlEmJ@=(i>rcdMFJ|mL5beGtczWW~&D=RE z1%@6k7nKfq3Wi_N2kA(3df_6;Y>+wXM0f}b{7v4P3IMFTFCOQGVG2;I>tOSdQ2=Xo7 zD=_zV;!}zY>+c0DIQ3H*_*tme@~rnCE7fpbb|CqayqNiK*@o%<|5&O2Ig2r+ zs^NsDg2l&y{SPh&3C zP#tnD6l=z^eq{z{KKa#xW`W8ST#Kuf#Db(t<^X9)KDKLK@kXW!3^5Xw`DUe=^G1=3(ai)@ZC$}Qi0NKJM`D$mDK_nVV~{CrXkIblL_ZNaLM4d}I=2(L|a`+B`ZpTEKc zB&#f}f1G9rw90;dUYRPy=~;fSL;m@4l#di#3m}h$XjmBtZVO`rQk)yvh8qV>Yo9)@ z=B!6Xfb?4vU-2p*XwpsQNC6eoH068d-3Vy113qGsWw_C!%KEW-aOK& zF5aiaP2ueE4S5Hn-pjce%UJBVJ)D_I#CQiYT-a1{dbbY+UV>j()Mc*Y2{VN!XPWx| zUah~=*il|k-4&OgsIHHMGTJ?!CU`xDE7Y`Na~^I}e@4i*Fk*fFI7Y@9>7IOLfR|AU1_Bb{GWwa# zy%FH`@t=>QvbPQ)cHLliAB;)*&#J{}_{O#&o+5L$|2zq0_aHud5i#6pO^cq}{r!dW ziA7`hvB4W)GcsKFS|Tui9ox7wIl$5XNFk9ZN-diRfeSxc5K%q2vitUQD~dQq|`K zX8(#1JOsT>ZcRv#BbW|G1%1))i+IStE?y})Hr~hNJkd@tnNmHk@k-rfpJM4b6`^@i z11MB22=m;-g${EVyvGH_!y`GV7{qqpa@bQsr1b$oL6%Npd1cy+w{+ej!DSVDMw`+{-ro^|dES$Ru8@kqesL3s6|ula|E zmvhLx@V5?uQgRunG_4s(V$?-qd5Ht>KuLy-Ui~0yM?fNts1QkZ_IcBLmuyeV$_=u! zXqWYf!9&Z)c`=QE-Y@I1xQ}Qq-*Fu+wiG*Syh5(Tj<$M#rt1f2(|0O#C)mfAXbr#E zrkBK5A5lK!B933o_R|9?K)IwZM^))cmChF$4VYHhIaph=RPJy%V|RdZR8tw2SseE z%Yxnp8eE9PT&9gJUcO@WU!h29AZn{ZZ=O;E@&p+I=@9!&BdV7ZhO7 zdOvQj+MHs;oYRNxH6!=kVD{Z%cJ*<0BPRgb8WD$;8Ys(x$$H%}vM1b<;M9JleWcKV z+{mmC$MT#5DO+>&GL?;eWE`)bj%U2NOERFn%}U*Dxcb;xqfG6hMa9HOCrD$AML%q* zp!676jNt)-Nr8o7RA063RZ?b(y=!HEwshY9nC^kZ@ENo8w2)B z7XPxQTdYAYN=_+h+BCe_Y|sWH*4%?mVBQp4HH5S*1M++=nae@Tq(Pof$3wTuyX+7# zS+R;PzS%019ly;HYXisJ4Qp65Oi#YFGSt+(x#V=6?@{7gk6D{8b6IDIdND8e5q-c} zqe;??z;7!4?gRCiB~;{;H6k#qdn5n)yi~0P#g(_GZ7TjDr&)>%F9^CXD z>ynSlDyqz6-!UFdbTwQw#9Ry|h~%oftNi=|`|NOBn1x(G@lYF_I_i25p@)a>V}+pb=`px-#P z;AZ3pZUlww6zXO(7z?~DPW=_qTZy^n*>8|hZm1K~>WD}DuswY-uyGH6-3<=4*t*v# zT(ND%->_)Z-VWGZwOGoI9r1W!)PnZRF={z&+WaZ*A)$y^s}F54djr2sfiu8)8{Dh0 zFA+e#4+|oPEC>wpnTH7b_#V;15Ea5K=jCc|Ln5un}K)# z`N-~_LG2*|aNkCO*df`$(4zK`0VW1lz`F&rQD4yavp{+zDTFZNzm)+Om@&?;1-Hxg zn>ew~SG{^03fov+ZU6)o?t)V-vNDd|_7BkO(Mq4o@w9i!(Uo_~@rg11r^6#Fy6wgoG$49sG6~e_hfwSoE0;l(^>3u@)A8h&D&qQD#oVQA>Mgz@Z z$4Fr?hoT_dOYRS{TV5o|wSY`Q#6Z4P%<(Nw+TloDcVDvf!B<@CM%YlEH|9q6z6ths zsi<2TIf>M~{`oA*K$wE-xxjx0IG(%eMhfW9eD^0Q_WwTrrTT9MxUzwvwF#N1t+S)Y z|KMXyoU&c`jTSOPMsQLpf!kqY{w7TI!<(fHg3^&{OC7H$N((jan;4CPIg*M;VY3rJ z0Yvs21j8RphlmV=7YH$=l?CA!3YBC8`upU|HrGzqih!^0JHjr4Eu;0DVCgc?Du|JKJ0)WXnXm!v*>XJ=OlXr|Zg${U@8d1pSogn(_#BHmk~*k7|yDuiE4Cx@Mwe>yuZaBJMF|Lqe`t@}`h=uNo^%oo!P zRi1Gf{)-V=E-CF-cVq>%oJIAsmOhLC0DLD=P8@X-;SEH&s2L{68lBZ#*9i& z<@tP?Hy?>d(~B=6^D)R+8iy=ygV~eR--2uq=Nv(JTU?*_>EHtp<)z{aUi$f+|Jt>f zB|FW1LsWF7{rW!Irh8(j$dJ|=eJ46i+I}|-yXne6A>3G#HsU_&}qxH z70DGtRVRvCRexO0@)Sd9tA9<%Yp9x)lu@Ht8&l@AMj*>l#;-JJFIF)w9FmdsRag^| z^|UeZi;~#5>o4b{?&8B#p6dw?bFMvjSCXcB8ON#;^FIb^g>Sl2-fb z$DBc)^1u3Q#7Qg^&)(088NR$dK%9L9!gP^dwujH66h27Z6G;IZIEWwO+xB{qpmFa) zy1^E_?0s00KE5+LV*|NfIVRHPhhO4&1SWI=lEXw&At-X4P zHI9=75r-&E?2nTt3G;k=WLdSVUn$zRO=m41n`1~+X-_FPMhC`8!`N`8>QdJK z0?&64<*SjN!-N)K2Jc&P0z|XfOewt-!Y0_`K%2gquS~HbTZugqp7n`C%s-tsB+8&I=w-mcd@J^sBWlaKIPV57x#Qr(1<~LQ@57Rgw zzyV=4+nQPEK0d7+86p zK;8`F^OoC9n_0(+_e@t(X;14jFi<-6v@oUlg?aX-s+(yQe?6Tl2iN)DVdC0P-Ce+*QO*T^2cmLE2%Bu||8k_RDmbq%$t8JvzIw0YKkw*y51NxwB zt$2l0eViH_oOHx^kuQE@X>8>UjBf+8AcZtFB}V*xd_ruR=BW7>28SRN`^c z+*w}F2ygx+Yhz`lY7A#aXh98H{>p<=-}JCK?0vG-)I`pWia+Nn6%r{;RGqbYNAB7< zvS_uIIvI~PWi9g9?~6^3pWe^Fv^LlTRns3~1)mOt z`uSII(4RQ|02Fu#;!ud74+)5o=yJBn`CELb>WXpuoy|mi{>V>iZoMYfyKNBXi>q7_spu3 zjyOBpDf%H(7pLL#1O#fW4!$Z&gGM|#z@`0?xjm6j;MFa(?OW?8OZCgcB1yoN)sM^Z zVQxraR+>BqG1c{xHJ?`J0=j;{xQ71sDWyVT3zPt zz@=I_W<+pt?C}d)UZxsV#q`Vd)PFpqo0gLxXDF*G&gJgiJ=J9t%I=d^7L$qu!&FA({+b%jPPl=G1yu}C2njr)hyeg_oerxgY#qe@RW|5*sDrp#EDw~t?I^> zQ?vbxnkm#38=AOuS@djw@uv-kM+5HgX>Q}u@nP574?|6xQ0QZ~ZHpv*LIIs_adsBeQ_UtOFgR9go`leSCTUGy*@{4N zpsm*zvuY&Q?I|dfG(~?r1%!<{)KJ^FL2Hj&X@@e>$`B(&zG73|J=pc4^L|Q3`gh$_ zW@Z`1%pH5+$LP={Gh*1*#x+q;9=Pvn`L@E4pv_)vth-I#V0MkbW5p_W=5Gl{>4NSY zi`I%VYI)^EllDhWW$_ptX139l%j{o1j^HB__^RXENFQx0V?2&%sk9P>I%Y>uvo*+- zq9d4V`Y1IPa# zw#es7Q5=L2KJ_N<2&HrgKO*#bh?>hFMlmx-F0p1a zyn|v5X0LkyLl~rAXbToWr%YQpab`FUfW9!p0)Y9{R|AGj`nl55Bu|>!?Ps-bHZt#8 zR_!axano?l{}-u0p-d0TL8(@H;k!=#g~F!lk84x^q_vsQ5&f9nqr>&!0glPxsJp(mscO8 z#S=j;7f2;}@TSe0Vr0YxJ%r-~GqAr=ybLFpd^93hn7Z;&kdUC%%+se2b+i$SON56` zq=Cl^6%;r=uvbhScw9Z>!+J6TPI3wt>0ZH$YWB1zqw1LUJS}eN>z(5qjw`gcCiFxh z@C1O(znI!SO{*L4g3&%4xM#0&j8lqe$hV%Rk5l#=s{WB6G+}(Z-!(>Q$JIj%U=UU^L%71ec-pgxQWZ!8 z#(SG4M{%^pIs)5Ot*wd;5mxTGt+3#jMljT}s=8}6d?r%@%+!6Fif-IX(F~m$bBCAf z52^(#G*t=pOcJ0C32FRvf|)Rq7za+_usaygp7oRR0HZ4}J?N@!=@?Drp)=`kbUq1J zS@j`iN^>mj-`2Qg@(I{mv@9coG1&Il%?BFN#f5fN3fd%%#^RjVo)E??1DPYvr(ll2 zq1yg7M>hWg8@c$6g0JEX?Rl<=vLnqFbZ?S<$Ygz3nQSF-;BljkIXlMXZnAdJ?T&?^ zd1vBIoQU^Rqhs#&Ll$G3TShd;RLMs~_2x)a8HRI25Zn^NDApC<9~GZA?{Aqwdty)i z;dC?o9ySY(R#P+x-Z@Od)u2b5_^Z0@%i<3GR+GRuv{iIIxd*xty!mIEh zkb3@0Q4Lp&9fB7{`yKbPOj5l>k2Zy5#alEX4!w;MoxHVdsg+T<>}4FLj2XawX}V|2 zv8xAQYVkGRMI`$-zyr5AVG;^?!wPs;wdx7zs9xTJLXDMT5AB$8kGCNF=YO?0ujnZ6 z@$Z2g>Ibic*r)eF)n8TXJzYSp+!^=NcKqdbcP8P`vq@oC8uWRbQTYdj2}_*m45rVU zRpxAx3yw$H3KpTp<6#JgMzF4VWx;7o%yKLwhxmt-{A9bN%I$mg>gIFv8Ovn};693j zpI)*L57K{wqrC8E^IdTe+(zx&|zyzT5xKHc{A_^c&#x?9;4lMJ$ z&^L&(t=2`;=U06a)V_1jteA4QH8^+pk8_uo!%u!dz7XHPxOr^9EoIBBedS4?kb>F` z-Z)$s9laMPg=HUE+grLigp{l z-TkG!3zi@t3&lAVCA?>kwr3_o{)2#~g#QQOHkfg*1h_E{Nm>B0VWxvp5uQ=1BGAgV zzP_SoNa@7*>~reeo1&6#4(*~&Y>3B7oa!^1PvmkPBVeB`dZx)msHU-(O~GHGD}H zigdV>SduEoXik)=k!`{A7o+eO8#tFiGf4G`5-k{F83<^Q$^=g1Doc^E9`Nwf7cU_h z&PB&~-%T~d7?Gm1mxjzpgo2uwgX3Zl33wBR_%T$|P3{-R?83@YL}aP(-oTbR`JP}Y z!EMZQjg5^>%g#+B+>80=4+jI{*cGJpidA<}N}@5ucaHKJ8n|brB4_6SOrcq z%8xqK42ISDeJk!<>DKvXSs#eU54 zYV#6R2^^N)6;Wh+fIDbNq`Tn=ic3CYFFb^Ym$bF1Ww0{IcTmh;56SfV%}>cn&5jz4 z+-~^G-}7BspRA=Z$`a9P&u#C7$>(SHsGw~)dUa$<*K8Kr$8fA~+K#%}67>HU%Kyox*tigU#(Pnaw})E}jR<$dGQcJ5DEfMI}#5G-AO zr~-hbqwzuu#OH`9BX)_?@vs8{EC9T~a^x$dWYOy&iwi{ISK@SEe~E~Ovv1^JMBo}? zhi-WORt$&%m%$ge@UAV9FHZ-AE__Smu%vvD-_7dg$+VMo>h@h?4Uoul;ta4ugF?50 z=`Ee=QvE{J`Pzl0F4AcdsMG^WRP?T66$rh`kW|cCwjqBwKSGH@_ zT6K;HsyD4XU$^*qm90G^?AvMwA1vTaj(iusNlg)F4!%zZAUtIG{Iq-s>iAoS-g>cP zXXuvIxxV1pj>#Sq4x*xD@^rI;A3Lp82Olzh&!O9s08NH0Q9V{?TD2+FWUS5u6y<^v z=FwykQhk9Xk>N%p!NAMx0;if~vD#t%Oznyz?H zNd1Uyb9h#g_$EwVO%C%Kvdi;O70osZ9aaIQ3xOR9G3{-5((L(#PCHlOWgmO=*Oev2 zJKUS8-%n7j7Q{Ii^Oh4`TY9uid|thTRL5K^Yy9C{b^hx+6oi{%y3Vk#yEctE?G}}r zveRF}lmUbZC!bgt1HS6;kou^bGEeQ0=etDK0m8G;5}c5pyDjztoe~7!nEwD`07h?w zxWBjrqJ+Hu5WJyl05XhTB~F;!VIg-2sXdMCKUZe}schgGBlJ%HB>;-8f7-1TfO>hL zcJ#V-PXl1J3Eb0nzvt#o^+8Y@xZ;4=DcJNv2#`G&0P-6Q0$>n?w%nQl5cp+Hj-3Dm z-4K2QD|)L?v3Av}u^xa`0??+HB7``lLOJ=(M%=rrTm<{6+{yW90RE zrx4trt}jE01a4{1Evl;NR$R};I^y_caX+WCc1IrOwX|wVw=dw2Y-t=@O%7M(2AgIJyGbO6<}s4j;1Ju&$;0gLt}DeI6>d>pa=ToE$mvL5=H( z8MCv!X^(}Vj|IUq8DZ}Yupd|Sk=XU23a~;k9H$!`z_cR^_h9fqR}R!$p-1{`#It}# z27t4HAjcdC`y&nqkvRiJ0VEg$S?VBu!x8|lSiu0L;)~0|NmdLJ01~(p$5GK*BmQcY z*eEnX{xT8skDin{r*G)f@&3X;2FeDBlXCNfGQArj!2-~UkGAQCC*J18D{GFPluF?bUGa_Lc;O$7Q4%oH~O^TPMYV_P%F9LHBD&HcJ*SvpM> zp)L{+W$T?5n(3W~OY-v&TR$9+PRb*ddC2<^dK+KDrs#oUC?C6!V8!$~`6Cpzo0fg6 zySw;Q83gFi<%hUO;*Ie(;CpchBI6p*G)~fU+x@v2&e#Ucdi_@>9H5ODa##u}7}W4R)jD_H&`~>^$+r&$C+qZV`fN;i?l} zEx`?aHn#$3`ZaKAOBl6M48FfWGm>((knat+03W*xRLgsXRi>TcJ>hi%@wyYk4M!1$ z^WcL7Uxs|y!qzE4Gj5USsT$9k6Le;}KC(4d_P`h3 zYElkd?2>I>A-T6tjX0Cn?FQ)#&GCk;K+I7qX`GiMHl~%lZ&xEY23b1>9X1;~=FR>| zJgkcadzkCk5;|aw)Nkj!v$sIvAnOUv9YyP)LxQRHCK%AiQ$Hr_`FDU-P(eF0$ zAzIhOa)Vq-Tv1#r&m&FQY4~J1PyF1Fqg+^uuC%Sug2}Gp2W~k_ExcP;}s^;2}5+(uMLD1*ut# z=e+12q5T$R5;36$=ywfK`mSsS!SQfn9X50~e-NicThlqeVzP-}o)r*Ur#9UB9W4Hq ztoWrv6PfRhkPe>SGotAE-@X)(l1SHSr2kcC96O-H@Q=$-p4Sl%yyZ_O0u>1L_I2nWUC zDBK@MDvA+{4k3#a#3eaBRJE)<<6(yJA+v=UMW^dX}ukGQY#RRdAKhs(HSi5BZ0!OgSgz2wujEht_FCz7X#@o*iR(e&X%o|pN~{8wAS?P z#z>-lf7O((IUMm>-1qi^-L5#bqjct&Bhv3^S#aUcjP)iz%0=m*?+EQhh zYEZ*7t>v85bx^kywOm4$UZ&i6e?5;?MfWx<4}1_2;@7s+!=}&;9$0(EQGq##G7ui>mZc9wtDq)62Sc-7k0x|g~(+Z1_Hs^mB5)(nU%m3`G~c= z(EoFmX-?;hT~5j1-Od_2Xz`60F;k+`SIEq#^dPZ~9EvMKZJ!k!HA21HwT*K5vPdOi z)k3}#<2CXEM8GxHIL|uT=4`zbi6G0o#1}k*Fv)-j3sY_e$<4%~)!A7JUO^L?XmxXC z$bI$EgrT*=Es(~oS6uir&=Nl8{@09gK8R-5MPV-t2 zoUqzha#s|nvj>8*Sg|wDcS;dYaqTREzo5I?$T#}4C)3(5W`rI!$FQ9#<%YW%qv%Pu zNX`6aFR{$B;e|Y*7IH$%uM+R&%ZJpP`hBSYSx=j#j)DB1!s-}U7b<#bS7f_gLYJ?= z_mbsEXJ~0{%2(Eq@tB<9f*`oA3uV93P*{}l`1t{Z-fnWtT+x?O4^7+IExWE!+l25~ zCgK|QCquKniS$l+p9>I98c&x?LEZzTG!$iD;Ert?rDhQDf?kDI+xO?zYUIhLUevO_ zdYgoeTs(bh{v5}6;B5Y!o7O*KUK~#XC3u;3&dS2(DWBKmdv0&8C~)%F57u#L1L6K; z4exO6c&1@0kHW(MmkqU`ssEEmk@eo!qc-bJU^Y-5QIl^Q8DbSyZh*Va<99sApmSM%K7qCC7yoyuQM1XAB7o4gG5>A7xYbtwrBSUb9hT| zllXVv-F0E@Kk(6XF-qi*ro8i}0)TZ9o48YgR{LF_J$#Tq4HQMIko+Oc0NM&v z`ZW$XtWF`mt(uvXdg;G#Ty}{qnE5-+axGb)pQw3?w$<2`c9(1oy49hF9Sh56_NEG0 zTaBUERl?es8;y@{Fw@ccO2YRndnjqZrK8(u5$i{sqa0Bdamd+OpI?d>X-V4WCguYl zuvH-c)BR-^W1QyU$#$1L$}^vpC#N5p!^R&je#r4#E`?Y-0RiEV+V@j@;u$)mR4t;c z-dGW?Jfv-jeY1=9BjUi%K;Y$*bCsUYe9mnX&jo{75-g#&3^{%^iBwH6!4*TSD@~9C z9ToYjiqHk^38G_71TAfvze`Oh%^jAz+AEf;SDLUb?aO^}r|+^Wo*lo*b=KPXgvkAi zH%QrOxeW&jNMSG>qUOj*56p_b+sF3f`a1qi!uK1Tf$M@?x&5qz?g4KNA6J=x%~i%A z!JgnVSeU}NgZM{o&fH~9lDF^>&?@BT=^nNM~zUM0yBk4 zu$L0X9~2^I&)Ve=Gibz=z`}?c77>yA%^(f+Ri6NZBCOKpM4p0EvP=r`Oy`DCI-Y}i zag)WSN|&aA)4gwbin=tQByIq|2oK?`=CVqbD90gBu267n6$38g7afmsP5CfTJTxndfvq~sQ7tn6)!LNL zC_WK}XM-g@7npo1%qT_UuhfW)ppDWX8om}9Ue0?S%5q6^#M&YL3ZZQfq+AQmsdQx9 zYW(-3I^}l_`o#0oh6nfZM#-I-mpU5%aFQazcaXwKArp`^RNvQn*$DT^wiAm2Ox;g&E4>}{Z< z=PQT`=*Yv$0^8Qh>%_I7RW#vg7gPr%ZQ!fsmis+z@U7=nk;S#4vzp*%3v7aI)_=9e zcQY(m?pe~obTo0dN>zsC%|8ce!sX6p7D-G)d|gQe@Orok^IX-1nWT2Wei(x3%GpU= zHTUJFDWID~6;kV^^O64Phhko}mwbwwaA$@=2v;ykHw#U1{XN>AZK|#Gp9s0(M>JaehI5@Q|E`sny0W$Lgmu(Ltn9BXH^GiqfQkF#30zn zZH)W~2jtM8{M%5BtIzSgu1FQQXo5KY4y|ecQyP@azwr7yDa1%$5jzJY7j4{ik%`Os zV}EKJRfGW8Hw+GP?M?Zcv@{L|tyCV$5+-Dg$?znvfYeUNg=?@{S4_C&Cna89prodb zJrS5L7HG$Z;i44wN*WdLgAi(!KDbaxl|KJ6r1LNl0CRa>q51#O*;~L>y*&TJbayvO zgLHRDcXue@p}V`gySr05C8WDUB$Skv2I>FteoMUS?>^5;oEOJ^Pwec>es=fFY@jq( zQ!SGk@#NoG;BM`X>7}HEo#netn-cu0vBs1_K%C9*yF;|?21C!~XXf1e`kIoH=mo4b+|=`BQiDpfefif- zCIneBpP$z(pI?}j`+0BVc}lr%vYbXe3M$+Z^qW2g8XUr>lo19hOAa=)`e^C;)3qXL zJ96lhxv$=~eu$DItE=qN?&HSvw!$oytb#>@aZ%4Ajr51du%Zn*gyXRCjqh`tZh>y> zCadw)((8_eG{~=rhj^sbbp4>q8g2Ut59>yfv7}_})l_IGE$J0sHFp0_ZI?imc0ZkZ zjkMEwh)KZ$9(U||V@kiRw#}I-F<&!;3D-3 zjA2*xn9`H_PV^O2_Ra|EIc7>>c38`e6hU!SoZ4r}=2JXJx+8!5Z@|+@)1_==R);&fYNZTF%9xE#2w_p^hHs7TZJxeddO?H%s5`133t%2d zxkGE=x$5uxY2JUU>YP5(v2zOwHk**X#f<3aO06e1Ns`oFX<^L$fRTG`=8Z`4l9ZvG z5EsYsE8u=N^J@;rLJ@~X=F#o-MD_HMO$gE2ClPNg$8jEl^rTmPuLT~jeexPq?wPwJ zrr@rM8$a4^8ZXQuOcj!}K5rau(D=fEu>#mbBGH~IY)drRlfRLH-nB?~Xq4qnmq4zQ zfESMaRAS|ApIHE3zlgB8MDU*E14XuZ{DkUeOM~nelB8U1y86&BL(D@h3X(=bXydiR zdejqteMPI`6{+16Yf;Oc;gD`#X%EdZ(>lEcSKC|)-}={Ad0%sbu3Iz8tK#Cd+3j3Z z4~PB+eJoGpxK26bFnd?F$w%Vv*Sluz`$nUH2_sRJ{&c;I;Fs6CR9tNU|GqD(3Gbnz zp87c6&XNiZt^+3QOBTrQ^M*O*R0=+z-ZcrY6na?E|fb{nq)523>Vsb#vY9hHsDKd@c4YsRa+0$)8i*JM25p zGLIZP2FA{;y&g;@TPUtbEWdKG9ToD31N>%?U4YB z;Gn;X$uFlihFo{h1^ zmo4Xuy$mANV60pbvj#6gRJ4e>_|0uT^#NP%jvy^)8k&iONKSm221!yW)KX#Yu3ykg zfG*=pQ_bY)w>flrc9UwvmSow%tczdrW$V+7)o)6sb;?hsw!Ty#MZAGBG+pp~mZlF0 z1D7NxkY{1!h}$wqzlapIG1V(*q)>}_s)&AgtQ3+4Ii|iHfR<`PgBk^|mA=gz!a5Vm zZ&t|8;PRQ;L=Wo4#gxM<8ukpym%dw0T8<|++Ki1 zvy(;lrTR(e_s$do6u~H@9uxe{;)h&VGi;`Ef(;@tS!O~SON5x#A$vSpa`L15~h_8fX`-&0dZ2)mOl z-7l%LFVv6yi{Y@0W?g6tib0x&6YPg^DcWco!*dk)D@KF)7!mY(3nR2+5rQR@Xe3c| z8a?_KnSqkKwoK28*CI`c?^&Q5gb2pw2==g}X;pV5EYV)I_#}my=Y8m{0W8 zdgiOFVQF1EGEmZ+GgM)NI~u-`^o}410%?&v@kNxO%t8romn%oNEV_s8gq;s);|Q%g z_!6wbYoz=)*!`}t9ihTAQ@I+t%>2v=Ci_S7;YZq+-;YHfpnnPbZJokBB}7VL-JXiuC&~i7AJbidyvUN}hh+z16!RYVSsE@n`!+ zT%vC{X+TVREcRG$X~avDA%{dWL51n#?=%7qJl(tw@rF(nKPgB6u{U3w6!D#q5*U-me0Lm*w@z9sMOP3Grbn$@(@5D1-;*_ zd%tW+lT2gqJjd=3uA0H;zd>y8zj3t>f`1SB;w$gHmJM>kmRT)PmbEr>_j*) ztu&f1lSV?ML=WT0UKm;{m)6-p2h{|HcUb%sGFA(QSRh{r>sq_ZOIX{KSht|)PNM$N zCY4U8$Q%uorJC7lk@9jVh30KTm@fgIDS8%*E3FDtoy`U7a&m;om>)HniK80mJG9-{ zHGM|R$a+L^W&MwM8v5$zuA%`OhC)h;R6elXCk}G6%;j~J0QcutLj6g{v)0IP(79x( zxNnGUFVlRblh@{6s8iy5zz#BgAwnpI-rxNAwXCCg?{Zs89)Xfs_m130pkE^5daOpI z-tkQdwj^`JU@rmLZTHDr8=C%VEgfD*R47#jUcwq*5uCKXWo#+3ywcX8g55)jsDhik z$Ql=nv1BnM5}Le1)?g1Zk+e?mDV_Ic7RMg%gAnyVnkS(cAle3@Zb2CuyRbp@4E&SV zty2-doP}H3z^=ku9^1WtVR0<#1AI5xXR(mog?v@De$j8lCz>7atZst4c2JYHf^xjckaEhZIXkdrND$dM zSQk<3F#6;RM4SyMtZgkRtOz+zTMIdqGfos}S_3M^ccR#dsV?aG~1u zx0%@WFYnm(-*F1U9#*Uyk$z(A;K+fdDA_{LwIAkg7wmAA$$F!7q4X{xhS~uH9wP>o z?fNU#5j&E0Q`kA>#jkyfbIa?FGmTf_ zUll0bpjq4i77kz3LyEev@xiZnBFg@G;^HAj%1_1}ZBz2~;aO&48b;Vhp#P>W7~~lWn07 zTkTsW2;@;MRbJLnCkRLp$G>t_94NWBtmbhafUI7mECb691ouiZ3wJx#7U+X*Lpx;S zT(l-1>ifdJWhYniE&db-IkP@v53jHHlu~jZX$Re5c!5LAs)t2RuE*GI(bSFCeUNuI z+xk7nV;IjvP~_FtIqsHicEg~x;wB=6_2`hWdgODVR3t#Z^$k}Kzg)D76! z{tZQ-X2r<>!8BybJ9W&Mth^%{*=6vQnF%SAQ|PfR`8t~6Y$uU2X?~HwuDH{<3*((& z!u*@-le11(qoeqU1QH0}G?SPqF#8p>VDFP#QkUS`Z?PwKRozkxX=v#MXlC}|kNgXv<7*@GozE!);4XuqK_+V2H%)m zPCTRJ4b>?`SQrM{y)Z&lbN#4`%MUAz_t!HodCW^K;SuE5iSV$l6vaGzqC#$W zDNdeq)AP&ZuS2X*5asdoitossFtw^j%m}Y*x2IFAp1|I{c z`01q5xMVF~c*S5PFkp;y?R-^4L|tvLGjd^exBZ%il8l4dzX)5iScXPR$dM;Tege5j zpH*aZf^dlODo^eNbCEPlRKY#Ut@jKAb=yRtO~Up>mYYJ)?`tPyd#8wV_|YVEuk)_* zx_eSbH0kxdMC(_kW|vZ`cW0Kdpd0Yue`3Ob4$~D6z;!xj962qt zj=&w9I&3-nTjhgi6cXtX!(v{AxCJfA*WSlR_ph4ay$n+}ye8PDB;alc@69z5a>wd9J zn3_%>^$$jn%&BcLz|XrewxoP9$DSksemI(ymDEMeOjJz^Uct8nd}f(uHbs^dvUjdX zM<`#$%vaq{ihG8X9xA$^($As%xh{3(>~IOegrgRhZZ4#0dSic_4LRp0q$8>rRIgg3 za#hEa^hOCyHk>yIvT;La;bGfFrck-WcYIKa5u}xJ&cvx3^&?%kn5&*Fv~`e4AK^wU zd;yKVS7v6DZ%55@DPF*T(84OK*9=)Yj$afhGT1$8((AJzFVgz}XH{ zE66!C;C6@U5^M))7;4&Z7R&@`7z_9tw3(SIT_So4^tGo#V=PfV(fi>X3o<@2QXg`m zk5X}aF#R@xdjlryy_%t7-gBqigOm0nEoP5+w z`7ma?>xK3U$Fzub3q?y5!I#-V*n&T%SX7<+3R=v0BRvXV1dhezMV9yo>m#`0@CeVboOg258&#{u0Ss z2sZO*rSePDbfQ*TK99L|`r>;Rdi7}eFO~uwW~z7DK}6T|^Si21mc9n6XX*|o0KUgZQOf1e43g*f!)+t=sZ%$?vyDq{vV{Y%fJQRhvuwEBy z6VYz4R*t-^#l+w!>CX0iJ5F*^{{tw}cy25<}J)%d!8 z=KNER_}V2#i@O{aF67-t_rwqhrPq`O)(M(g7DnvX84|boo6BUrtEh~T7M_Wn{)K4N z{*W@J;fXv*hf|1JACgWVq7lC3m#msVJTq6~jKgqoI+`+2x& zrx*3bUO81>a<>*}*uD_nJVSm&hp*K%$5e?IWadoMWYb0Exu+-oy+Tz!@jl{tnHSr= z4)RphIhK%Hxy?RLpu9D9`1!bn270_cBFj`&_o5Gu%KSU1d6ADVuONnrpBHbuxyF{f zea3YWzcmpv&&($XU0lKF;8(+StY)98u=`%ivXEu)46HMsY@OklhZL=}V0z10t@WP% z?OXaamqX_@ZV1=)ei4$$*4`5Qt36EdOE}i&cJV}g`(Adt{;cxRvAM(~okYbno*`uy zWF#LL1JCzMW#y;n`Goa718&aociiq_R`NbeH0gUKd&R4aHlVZ+ORcew1v)fq^)lMm z;Z=lf5!osb_|@K>v!f2A1=-XqBS>B$E5=I;|P92%v9*h-TWMr?tnYaH_g90CUKH&&aV%t6tq>gZm}Rc?8pfc{xb@Ex|Uo>>Fu^ z6f^$3u|OMtHV2=w66`Nw)BJb^&$&gciy<~UV!x3#`MA3r>(E3Er|jI}f8QOZ_{dRX)LEpjTUpvHss^<2}su;Q)(Lq6i!nwRRtCh0W!qT9M2f{P# zg7pkblXzu~O^23fGVyNq{&XeZ-g9ts@0EOm6s;ujUrlgGz{P#F!x`l@!|(w55a5_; zAK$1%O`PUF&hK8;KC!re@NhH1@foZ{SWC!sIAp#kC~BwY%f^jDWfD<2qI}f8ZW^w= zkm4#|9^srBx)!t65_P*y^$U&It5Vc~W$4y-;IY21wX69SP}On*4rD+ezbc4}SGzwW z*oA|G2UWieT-4GATd4#e%M9k$3Y?<=*=0i6;kS_hfg!D{V7AxPQj$~-r*stdt?W$2 zx%MBQnyR;=p)v`f+%Q7N?s)j3pjrj?uBz;Vzd?m*eYEB;y$kiWU_{(!YZSo zt&+kgCwrtlObju{I$3$Ku@n(RYMZHP!@UiWXUbhk%VpiF8vt%aE-@1-g_y~C099EJfb>;b;tv(q_4F?=!g#bi4=PYH+@4K&lmRK zSMZMImdQb`raT|waz$Q7n}zcFc7^JDE=Lr{(a*k{@U+{{~cu_Q_#izNx=A&>4pXx~cT;xt!|MVV|jb58N;GACe40(5+4!F7Vj@b2Y3N z=nfei>4}H-EO|q{xN{=Pio95=TCz$p0Ngl$Ri*kAaFZQLhD58o(a}? zB2ax!-Qb~?%-Q}`Bh*mAO>XQ#(*7P7Lq`7^lU2lM(q-Kjw||Z_?QQBxxcJw4R6u3Zf4jw9=(rtLu3=y z=@8VZl2YkQ#MrZCD@O~J%E7#=3>A>pif9j*^y^}&+&ZGpCV}uCvYPRu${MvquPU=j znme$KTw18Mi=;qciXpchns@AMx4r`5v91Q`-?Dt_+7;P7V|2`OO zPa3hu7extnznks}qowu|MUu=YHWU^?i;LkDBy$B=lSmtpkFW=%B(~Kg4!$#L4?BD< zW4Dn7POu80s-lPy1C~Lf1PQs!X|w%4QNV{Ka2Gdnp42t*1E^{xYiHJ1Yd1GH=ZmFI zfjbxw$~7e4`yei2rc;i3zlOXm(!EmyvUiOI)QgT|v*&(az+OJH5pFLOlPcpUmSj=K zW<`gY5D310~_u2oubKM>O1x<%Z@C-9IRUke<<` zW{b=12l1^&w`EVtl8ID^e&)+B{-semM9}5$n7Fy3tBs*`kESp2eQ&do_QN{;xH{?K&X{RU})gOn}phtqDQXu1|6e`Vzt_b$7 zaJQK?HgTp@%PPW0T@Fw?rJ^H6Swb$C!$mGTwFWcKnANWdV9t_#kz)N)iR`=}9xW@e z9l0y(*(6%DQLO{7)M_y*iU<(F-okPOH}q3|JR=7c^Ezg8W%MX$S-p*Q?~5`+-=T+x|1Fr-JAi z;?U-~%`!Ox*D04o6sxr~P_&tED`!Uy3T<)+m`&h}+nO1PoJMN(wID8SW&_O1!qW;< z!OjMK?jRHne97@>_w5&HuquggScY#h1o^^dDSN({k_|XLXIIvn9>uHr0+I!q)iaL; z>O9pto_RaTrJoXRX1d*&_U=U!N2^3Rq~#C|-p%D3t(7M3rSjjFf4Af{HCPzOUX@g)T2sF3r>&>qYu{X<*l2^> z#zE8$;#q}rQ(L$<=hxU9$#u6Yf#pc2syDhZRd$W0^-?lFC&{i%vmgz>9XHH=eeSUq@O;xX z;lkg!s(A;8e~zhp5~SZO@#wv}gV`7ctrz%#^Ndyu{&OVL!D-rRw1wcnGy4PM-&qX*fF+LP_3AhzPqkSg{AM#1QSQ-yaU% zhub2nkXTr8Y)?<&M5Wn4e*E-JRL6Hba0f1TWNk7r66(A&@G$ts&+h9|MVVGmBa(v*MAP!Fws9sYaG4@h3Jw1uyU_yABBld)HyN*6kPNEZK=6yD zz$@V>%&hkqGWMUS&vVUNpgaBHeLr0=>`S0NC+0ZP0Aq2w!zKDGw;^n80Rh&ukJqB_m;cp#0k8YtvGX(hh(-` zC?zVSEs}E#l_(6RdtQw1wJ?Z_gy^^5kG4xBD7syUM1SW~EPmK$OhIB2) zFBmjFqr%fy5Z3Z0?C}CFhA$9Bd-QAvADO zlY30j=I`%0iu3k-h$3NX_5nrBi0J*E{L)bl`T~XE*I3lk=eU%_v9PC1sn4@x$~Tqc zD6>%`Jw%5$&1sE2COeAPD5T>1D{1|f?*@&*uK0>WzKKRrbJ z`ihCYsj-#5gQFdQ0UY=M0{uO$HwZ8h(O>QN4;*{VGg>Qu2?ItYcY!wyKK^h^;+LuzJ1})w9&Yg3-Qnu z5E5N7^#-TTG(wfxK!F+twzr9iUlCnF_?cg52n0GSD*C?j_@N;Z`U>&(C;Rauckj-l zPtlLuZUpZM$Z*RH-4^io7fYbfwdd%o(2g$pN%dtV$sFgFl9n3Eq$K9(UL#ya4rBG< ztJYUHiI!--HYmY)uGLeBGkn^}onWE_!1V*eheC4EN4%}7sUc0qDt48wijJFi^;E3Hg3GtxJgbVNQ%M- z4@fr}a{Oe(ROvEyP<^4_$h;T9CrA+ytQMcbA57kaQ@I8#`%n|K#CD-gz1(X|vVy96 zfwHQlY>PfnbYc>(Ypb8B4<=q!6&JDFoErYQdlgp}3u}3p(G*^QweMVye6gclUbk>Z zswz#v5tm-WG9{EI$nN=OG|h>NA#G_Oj?ud^F?A-~xnmpqI;v8B(dhDJ{GMs=UH8`s zV>w~2OiN<*DLssOy?B#UfmY-tVJ?7CmW&BXJUHHLis+X?6F%5qlq^+;e2rc=;cHmR zIeugxPnDpBXJB6K%k1$n5Y&&z>OQbqTKSM;r^N`$(~c4N?f4{*WB}M4c+K( zsrutd^{ra7Nk_mt#RxUha`vDau=OUwFe1g)yns*UC}sJRWU_OR80Y#{PyIA~sizHg-M8 z(&bl;$VWwPR<1?$K~R!3Y)WCkRo!${G=c^ZSzEdZHH(*xoxg26P5_G}@$^eb(Oa2I zCsYB~-ERXdSSoofN=6KpmNf-LD(NJOn2Eh!p&bWg!=179@g}dpUhr|ul!hLYxGgOQ zKYv$_oO|#rGc0GO4uRrCK87)QbNjhmF>Da)us(FJ6|E+q`@yA$f=3c>d4}yOol1L6 z($KDbAR%a`!vsZpjDN_m%2C2pZI>xeeYR6KFOxRegfn54H4 z^*Ueu_SrCOg$j}yskRfBJEMRyq@|gL*{oBN8$^As1IB_^qUM|n zHo_DuR)|Jd(oN&Cb12G#HHkqh1}!^mwtnQ+TMf#C+D+tO$dKoP)xJY%&VYIfrRkO4 zdh$GuGZ5OA2ziafX%+bZjnAy`K`riqdC!W;bt!_!lrr{y3q#1qAotTIj|-yn3?CD6 zXQ&>QVupIjgzW5n`{vjrz-0Ua+v_DKyp*fA>|Db6-pg)wP(G=nfgsc()}yx=F9LY* zAKCGt!sX(@@lm&1s4uLxto~W)*UVLdq2Kh^V&qSbAS6` z!jK1&!N-VcW?%OUQ*xY3tXax_MjQn+7F0% zUHTt`cYDo?F-RJ;%JU0|YmIB!A{(eR`i%Uan~@r4Qu6{*5n#sz;c>2`=-u49qoYqkc&>zbXyf4kM%tgBhE&vG+|C zP@n6rIeW0yX|==bX#>t^Bj7F2z24x_*OJlKSbj@Ae9vHk{kFROTOwan`GMr!cvjKJ z)i(#EH;5>x9B9R37^0Bus2i1=!Yoc0CtPXl{2akHK}K@(|I`?xq8J3ygl#FWShxdPJI{%Y+YbAvDAwzZqvTEyjV_`dzpYv3 z`3FPAldQ0N1m}HR^{Uk6Gs0Nv1}nhI9N&nf4^sr+7ieBXo4TZr2sW??j4}ihx^bom zb(t0`ywzDDqu^_NwPjb$nrs0)%GDK=9hvV@dI}%qWuFZ(VtipCvqN^a>w8BIl|-0h z^p+VR$*IY1XG;~QU?o}leJwOWYSM4jud>WHY7C0j&N&C-ix}O{A2GHlI_W(4ZFJ|GJ zEcMlJv1Y1fI^NE{(UsLlI*UvB;>1)|@U>SowYfLuyn=C%F)M0^BoI(Bxa_Rl9MOc? zdz^K^nOFV#69!HXyes1vhRJGN_~u$p)M6(nM1DqVN?O```vgkqd_8F} zOh4rymfk(#poadI)soVZd_}c8%FvI5=Z&W@u`kZO7q54KeVSW`uX&nVuCJ)<9A0x? zDK>t}{459lB2q)eo+Rl*=BdB?r^4YF?T)*(&^fXpOe< z4*Yo%Tp^HJ8hv!nnupRBQb<0(H4;@$=-F|;!!){zI*v-?@9hp0euHFt=;~w89Em@6 zuf7Vv-PMh^dKKtSw<0*9 zsWWpHk$@p(?e@V*b7E?gamG+W;#iB52U#cgS8mNEI)DAww{(yZ+_;qKMk4P8SKv~{ zS>`oI1OXKL+@(Vu8Sxp_y5Q--y*Z?14*L=5wx@aBCOgdK4bOW<;$8lckh%=bbf z!~F4IQVC`HzIGaZeioIxH27K`1QWzC-yhAw9!AIl){w6oGr$rq9|K@8XYhJPGgid( zc40Z9@*c0`72UM5dK4V$vfSDHXhdcjir0;yIXBz}aAQ2;vn**JqcsjLW-L>yq++t~ z;GPTkTvq5qLE_MPq5aEAzNv-ELYWH`A$FH%{?u*)mgE+YWVzyYERE?X-z?C%)hyAB z+|@&+4p|jcapVd=1&tR=Sm8WReRq5IUfeQFBJ}YT^wt>tQf6MVd`!x^Zen=hly6X` zZsH}n6U7=gOTsw+0f)kVCc3d0-G`d~cqcujs(mec=4LZ4c{|c0F2K8v6-L6#GPU9X z{@ofMvd1Q0dc~V1DaTb_5%;8Q!&vmB5wc|IZIaJhAM0sfR5>^}8dsAO4S-Ie36F|; zyZPCb2WKSkWuy}s#}%FoDsbzQpM@%<=f*mW%PxS+O^e~*LzI$)gfzqJB!)CzW<9^y zv|Ovpl5(}a<{53Wf@lWo{(@%KkA}mMy(wVjP%`+cit%J&=*+)#v$81yAubcMT3=deFd7%?7 zd_6f5EM8AO5@8ki&|LUwyqSTC9Ii)DN8HI?okBzUOn(qU{*+6VtgGz;&p260!HjpPm8$PbR`Cv;Jbq17XPpY`0Jp#hx4-~i z+v?&+J z2@W?5xooqcnD?f%7F?mH6h|xC9a!+YSn&GDmu_1N#oBiX-R36_gxV|fe1Qa~-0iXz z&Mxo_dAP|Qz>M+bqD^PQ=9{Z;rU=?bwzu-QTo#JCR%dusN?a|7#=11eeG}6@Q=UT4 z-l4L2d!UcCr|s9Tn;HA_uAex5N|1LNq$g_Q@!4#N0itf-s*wH~)Wtz+O z=f-X?GcI#{P-lIE*#|Db^ijb6K77Kgp>bF~({HJ?NU_C7>wQ-u%dYw>kCum32U9NS z#0;nt(dI!{j6rYIOU!qaowj$6a?JuT7sbe}Lc&+3JCrJO+5k+yqf*ID&=Ei0R|S=i z2HKC2+;=^f_aDTk%2tR^NapX6zi%(FS8dQz0e8P85x%=&P%sRTe-#s3KpobpSgUS! z0xs!%f!`P)z)C((nyjdb5S^r)7=x^koTQkjvI@Pd*u(JOQNDNj&jtq2*Z5{ zn_5`d{|(|=uD$#Mn2??n0|Z3$2LKR|ir+%$8=C7I161vSKkUgwT^#K64IQlQ$o~3+ zEWp9U`puJ-^3M`eT37381C^5i)GOB$k{H0ljHeR+hwCRKg)Q{$?Io>@tp7*`kt58; z0~o3^ftyj1Psq#z_52UW{3Mx>m6f%FzJsZ?)gQ^~+bo2)0aX_bB+c-I>=4>NA`3j{ zakOyw*Iw7ps?$i}GwTM9Rs=c-h{zN0b>R5@f0yYmP5x1wF5|``Y~Y^XHXjHG)f0+X zg#U!%ujP0(9bj+^R8TX}1iqKj`d{-0TEst)L)p~f-zQf;tLS|)H?Im1`T#iWV*kSh zzWN9h;U7T%H0(dBh>Ypi;tUdNWqhCZut2N+{xtc&mMip+X#Urte~nKeS! z=h&y}0tH&ZKafet&iLQO)PGi2I4z2x3DBCxUi{4Z3#oqu{a8( z2>fRlp9a86(|=pYe@lV>sV~sG{?PXS*7tiE(VzR@&w9IY0VnM_&{Lv0W0?NYu>}RLhZuMu_A0G&RW_H-I??D*o*_I$dK**mkYv=h(R0jt; zQv*i_0GTr27xCmfqj?1=lP5sg^MD@U`4dPO-#Z)Xe;buk3ke2@RX7XC2b>sS(~%YL9PpL&$KzTEY15&vXCdpo_ecR3KRK&zXLQ_S4nSpCP|<1axwLwWp$r18a}{efs?GI?vBYKOWD1KQj4W zi~m26et9wIXZRn_YMB3r4Sa= 0 else None - - -def target(ref, package): - if ":" not in ref: - return None - name, descriptor = ref.split(":", 1) - name = name.replace('"', "").replace("/", ".").strip() - if "." not in name: - return None - owner, method = name.rsplit(".", 1) - if owner != package and not owner.startswith(package + "."): - return None - return f"{owner}#{method}{descriptor.strip()}" - - -def javap_usages(package, model_dir, deps_dir): - if not shutil.which("javap"): - raise SystemExit("javap not found on PATH; install/use a JDK") - - roots = class_roots(model_dir) - classes = [path for root in roots for path in sorted(root.rglob("*.class"))] - if not classes: - raise SystemExit(f"no .class files found under {model_dir / 'classes'}") - - jars = sorted(str(path.resolve()) for path in deps_dir.glob("*.jar")) if deps_dir.exists() else [] - classpath = os.pathsep.join([str(root) for root in roots] + jars) - sources = source_index(source_root(model_dir)) - - found = {} - cls = src_file = None - calls = [] - dynamic_calls = [] - pending_dynamic = [] - line_table = [] - in_line_table = False - in_bootstrap = False - bootstrap = None - bootstrap_targets = {} - - def add_found(fn, src, line): - found.setdefault(fn, {"function": fn, "source": src, "line": line}) - - def flush(): - nonlocal calls, dynamic_calls, line_table - src = source_for(sources, cls, src_file) - for offset, fn in calls: - add_found(fn, src, line_for(line_table, offset)) - for offset, index in dynamic_calls: - pending_dynamic.append((index, src, line_for(line_table, offset))) - calls, dynamic_calls, line_table = [], [], [] - - def flush_dynamic(): - for index, src, line in pending_dynamic: - for fn in bootstrap_targets.get(index, []): - add_found(fn, src, line) - pending_dynamic.clear() - - def parse(line): - nonlocal cls, src_file, calls, dynamic_calls, line_table, in_line_table - nonlocal in_bootstrap, bootstrap - stripped = line.strip() - - match = CLASSFILE_RE.match(stripped) - if match: - flush() - flush_dynamic() - cls = class_from_path(match.group(1), roots) - src_file = None - bootstrap_targets.clear() - bootstrap = None - in_bootstrap = False - in_line_table = False - return - - match = SOURCE_RE.match(stripped) - if match: - src_file = match.group(1) - return - - if stripped == "BootstrapMethods:": - flush() - in_bootstrap = True - bootstrap = None - return - if in_bootstrap: - match = BOOTSTRAP_INDEX_RE.match(line) - if match: - bootstrap = int(match.group(1)) - match = REF_INVOKE_RE.search(line) - if match and bootstrap is not None: - fn = target(f"{match.group(1)}:{match.group(2)}", package) - if fn: - bootstrap_targets.setdefault(bootstrap, set()).add(fn) - return - - if line.startswith(" ") and not line.startswith(" "): - if "(" in stripped or stripped == "static {};": - flush() - in_line_table = False - return - - if stripped == "LineNumberTable:": - in_line_table = True - return - if in_line_table: - match = LINE_RE.match(stripped) - if match: - line_table.append((int(match.group(2)), int(match.group(1)))) - return - in_line_table = False - - match = CALL_RE.match(line) - if match: - fn = target(match.group(2), package) - if fn: - calls.append((int(match.group(1)), fn)) - return - - match = DYNAMIC_RE.match(line) - if match: - dynamic_calls.append((int(match.group(1)), int(match.group(2)))) - - for i in range(0, len(classes), 100): - batch = [str(path) for path in classes[i:i + 100]] - proc = subprocess.run( - ["javap", "-classpath", classpath, "-verbose", "-p", "-c", "-l", *batch], - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - if proc.returncode: - sys.stderr.write(proc.stderr) - raise SystemExit(proc.returncode) - for line in proc.stdout.splitlines(): - parse(line) - flush() - flush_dynamic() - return [found[key] for key in sorted(found)] - - -def write_yaml(path, functions): - lines = ["functions:"] - if not functions: - lines.append(" []") - for item in functions: - lines.append(f" - function: {q(item['function'])}") - lines.append(f" source: {q(item['source'])}") - lines.append(f" line: {q(item['line'])}") - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text("\n".join(lines) + "\n") - - -def main(): - parser = argparse.ArgumentParser(description="Extract javap method calls for a package prefix.") - parser.add_argument("--package", required=True, help="package prefix, e.g. org.pf4j") - parser.add_argument("--model-dir", default=".opentaint/project") - parser.add_argument("--deps-dir", help="default: /dependencies") - parser.add_argument("--output", required=True, help="YAML output file") - args = parser.parse_args() - - model_dir = Path(args.model_dir) - deps_dir = Path(args.deps_dir) if args.deps_dir else model_dir / "dependencies" - functions = javap_usages(args.package.replace("/", "."), model_dir, deps_dir) - write_yaml(Path(args.output), functions) - print(f"wrote {args.output} ({len(functions)} functions)") - - -if __name__ == "__main__": - main() From e600896b538954d3ceb903a47b8f13036ff76ff0 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:23:04 +0200 Subject: [PATCH 34/54] fix(cli): qualify bare same-file join refs in rule expansion A fragment-less join ref names a rule in the referencing file; the analyzer resolves it to ":" and filters loaded rules by exact full-id match. Passing the bare id through left same-file referenced rules (three exist in the builtin ruleset) silently dropped from reachability runs. --- cli/internal/rules/refs.go | 11 +++++---- cli/internal/rules/refs_test.go | 42 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/cli/internal/rules/refs.go b/cli/internal/rules/refs.go index a676d10aa..4acc200d8 100644 --- a/cli/internal/rules/refs.go +++ b/cli/internal/rules/refs.go @@ -67,7 +67,7 @@ func refsOf(id string, rulesetRoots []string) []string { } var refs []string for _, ref := range r.Join.Refs { - if full := refToRuleID(ref.Rule); full != "" { + if full := refToRuleID(ref.Rule, relPath); full != "" { refs = append(refs, full) } } @@ -85,11 +85,14 @@ func splitRuleID(id string) (relPath, shortID string, ok bool) { return id[:idx], id[idx+1:], true } -// refToRuleID converts a join ref ("path.yaml#short") to a full id ("path.yaml:short"). -func refToRuleID(ref string) string { +// refToRuleID converts a join ref to a full rule id. A cross-file ref is +// "path.yaml#short"; a fragment-less ref names a rule in the referencing file +// itself, so it is qualified with that file's path — mirroring the analyzer's +// resolveRefRuleId, which resolves a bare ref to ":". +func refToRuleID(ref, currentRelPath string) string { idx := strings.LastIndex(ref, "#") if idx < 0 { - return ref + return currentRelPath + ":" + ref } return ref[:idx] + ":" + ref[idx+1:] } diff --git a/cli/internal/rules/refs_test.go b/cli/internal/rules/refs_test.go index b1c65ab6c..109a70693 100644 --- a/cli/internal/rules/refs_test.go +++ b/cli/internal/rules/refs_test.go @@ -90,3 +90,45 @@ func assertEqual(t *testing.T, got, want []string) { } } } + +func TestExpandRuleIDs_BareSameFileRef(t *testing.T) { + root := t.TempDir() + writeRule(t, root, "java/security/deser.yaml", ` +rules: + - id: unsafe-deserialization + mode: join + join: + refs: + - rule: unsafe-object-mapper-sink + as: sink + - id: unsafe-object-mapper-sink + options: {lib: true} +`) + + got := ExpandRuleIDs([]string{"java/security/deser.yaml:unsafe-deserialization"}, []string{root}) + want := []string{ + "java/security/deser.yaml:unsafe-deserialization", + "java/security/deser.yaml:unsafe-object-mapper-sink", + } + assertEqual(t, got, want) +} + +func TestExpandRuleIDs_BareRefTransitive(t *testing.T) { + root := t.TempDir() + writeRule(t, root, "a.yaml", ` +rules: + - id: a + join: + refs: + - rule: helper + - id: helper + join: + refs: + - rule: b.yaml#b +`) + writeRule(t, root, "b.yaml", "rules:\n - id: b\n") + + got := ExpandRuleIDs([]string{"a.yaml:a"}, []string{root}) + want := []string{"a.yaml:a", "a.yaml:helper", "b.yaml:b"} + assertEqual(t, got, want) +} From 33a03d7ce2ebaa9ccd03d4a79ea084b6b3ed325a Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:23:35 +0200 Subject: [PATCH 35/54] feat(cli): parse analyzer test-result.json in internal/analyzer --- cli/internal/analyzer/testresult.go | 43 ++++++++++++++++++++++++ cli/internal/analyzer/testresult_test.go | 39 +++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 cli/internal/analyzer/testresult.go create mode 100644 cli/internal/analyzer/testresult_test.go diff --git a/cli/internal/analyzer/testresult.go b/cli/internal/analyzer/testresult.go new file mode 100644 index 000000000..26a9f3762 --- /dev/null +++ b/cli/internal/analyzer/testresult.go @@ -0,0 +1,43 @@ +package analyzer + +import ( + "encoding/json" + "fmt" + "os" +) + +// TestSampleInfo identifies one annotated sample in a rule-test run, as +// serialized by the analyzer's TestProjectAnalyzer into test-result.json. +type TestSampleInfo struct { + ClassName string `json:"className"` + MethodName string `json:"methodName"` +} + +// TestResult mirrors the analyzer's test-result.json. The analyzer process +// exits 0 even when samples fail; the verdict lives only in this file. +type TestResult struct { + Success []TestSampleInfo `json:"success"` + FalseNegative []TestSampleInfo `json:"falseNegative"` + FalsePositive []TestSampleInfo `json:"falsePositive"` + Skipped []TestSampleInfo `json:"skipped"` + Disabled []TestSampleInfo `json:"disabled"` +} + +// Failed counts the samples that keep a run from passing: missed positives, +// false positives, and samples skipped because their rule never loaded. +func (tr *TestResult) Failed() int { + return len(tr.FalseNegative) + len(tr.FalsePositive) + len(tr.Skipped) +} + +// LoadTestResult reads a test-result.json produced by the analyzer's rule-test mode. +func LoadTestResult(path string) (*TestResult, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + var tr TestResult + if err := json.Unmarshal(data, &tr); err != nil { + return nil, fmt.Errorf("parse %s: %w", path, err) + } + return &tr, nil +} diff --git a/cli/internal/analyzer/testresult_test.go b/cli/internal/analyzer/testresult_test.go new file mode 100644 index 000000000..edcdd783d --- /dev/null +++ b/cli/internal/analyzer/testresult_test.go @@ -0,0 +1,39 @@ +package analyzer + +import ( + "os" + "path/filepath" + "testing" +) + +func TestLoadTestResult(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test-result.json") + content := `{ + "success": [{"className": "test.Ok", "methodName": "m", "rule": {"ruleId": "r1"}}], + "falseNegative": [{"className": "test.Missed", "methodName": null, "rule": {"ruleId": "r2"}}], + "falsePositive": [], + "skipped": [{"className": "test.NoRule", "methodName": "x", "rule": {"ruleId": "gone"}}], + "disabled": [] +}` + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + + tr, err := LoadTestResult(path) + if err != nil { + t.Fatalf("LoadTestResult: %v", err) + } + if len(tr.Success) != 1 || tr.Success[0].ClassName != "test.Ok" { + t.Errorf("Success = %+v, want one test.Ok entry", tr.Success) + } + if got := tr.Failed(); got != 2 { + t.Errorf("Failed() = %d, want 2 (1 falseNegative + 1 skipped)", got) + } +} + +func TestLoadTestResultMissingFile(t *testing.T) { + if _, err := LoadTestResult(filepath.Join(t.TempDir(), "nope.json")); err == nil { + t.Fatal("expected error for missing file") + } +} From c9f2a18f8c17d53182e71a4ede410df36d5067f3 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:25:08 +0200 Subject: [PATCH 36/54] fix(cli): fail test runs on failing samples, reuse internal/analyzer classifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The analyzer exits 0 even when rule-test samples fail; the verdict lives only in test-result.json, which the CLI never read — so the documented exit-code contract ('0 = all tests passed') was not enforced. Test runs now parse the result file, print a pass/fail summary, and exit 2 on failures. Also: delete cmd/analyzer_exit.go (a verbatim duplicate of internal/analyzer), share the exit-codes help text and common flag registrations between the two run commands, skip the builtin-rules download for approximation tests (the harness rule is self-contained), and surface non-ENOENT project-model stat errors instead of swallowing them. --- cli/cmd/analyzer_exit.go | 65 ---------------------- cli/cmd/java_runners.go | 26 +++++++++ cli/cmd/test.go | 24 ++++++++ cli/cmd/test_approximation_run.go | 14 +---- cli/cmd/test_rule_run.go | 92 +++++++++++++++++-------------- 5 files changed, 104 insertions(+), 117 deletions(-) delete mode 100644 cli/cmd/analyzer_exit.go create mode 100644 cli/cmd/java_runners.go diff --git a/cli/cmd/analyzer_exit.go b/cli/cmd/analyzer_exit.go deleted file mode 100644 index d81095057..000000000 --- a/cli/cmd/analyzer_exit.go +++ /dev/null @@ -1,65 +0,0 @@ -package cmd - -import ( - "fmt" - - "github.com/seqra/opentaint/internal/utils/java" -) - -// Analyzer exit codes as seen by the OS (unsigned byte values). -// These correspond to the Kotlin exitProcess() calls in AbstractAnalyzerRunner: -// -// exitProcess(-1) → 255 (project configuration error) -// exitProcess(-2) → 254 (analysis timeout) -// exitProcess(-3) → 253 (out of memory) -// exitProcess(-4) → 252 (unhandled exception) -const ( - analyzerExitConfigError = 255 - analyzerExitTimeout = 254 - analyzerExitOOM = 253 - analyzerExitException = 252 -) - -// analyzerError holds information about an analyzer failure. -// exitCode is the process exit code to forward to os.Exit. -type analyzerError struct { - exitCode int -} - -// analyzerExitMessage returns a human-readable description for a known -// analyzer exit code, or empty string if the code is not recognized. -func analyzerExitMessage(code int) string { - switch code { - case analyzerExitConfigError: - return "project configuration error" - case analyzerExitTimeout: - return "analysis timed out — try increasing --timeout or --max-memory" - case analyzerExitOOM: - return "out of memory — try increasing --max-memory (e.g. --max-memory 16G)" - case analyzerExitException: - return "unhandled analyzer exception" - default: - return "" - } -} - -// classifyAnalyzerError prints a human-readable description of an analyzer -// failure and returns the *analyzerError carrying its exit code. Returns nil -// when cmdErr is nil. -// -// The message is printed immediately. The caller is responsible for eventually -// calling os.Exit with the returned exit code after performing any post-failure -// work (e.g. printing summaries). -func classifyAnalyzerError(cmdErr *java.JavaCommandError) *analyzerError { - if cmdErr == nil { - return nil - } - - code := cmdErr.ExitCode - formatted := fmt.Sprintf("Analysis failed with exit code %d", code) - if msg := analyzerExitMessage(code); msg != "" { - formatted = fmt.Sprintf("Analysis failed (exit code %d): %s", code, msg) - } - out.Error(formatted) - return &analyzerError{exitCode: code} -} diff --git a/cli/cmd/java_runners.go b/cli/cmd/java_runners.go new file mode 100644 index 000000000..7d8b8d1ff --- /dev/null +++ b/cli/cmd/java_runners.go @@ -0,0 +1,26 @@ +package cmd + +import ( + "github.com/seqra/opentaint/internal/globals" + "github.com/seqra/opentaint/internal/utils/java" +) + +// newAnalyzerJavaRunner returns the runner policy for the analyzer JVM: the +// managed Adoptium JRE pinned to the default Java version, never system Java. +func newAnalyzerJavaRunner() java.JavaRunner { + return java.NewJavaRunner(). + WithSkipVerify(globals.Config.SkipVerify). + WithDebugOutput(out.DebugStream("Analyzer")). + WithImageType(java.AdoptiumImageJRE). + TrySpecificVersion(globals.DefaultJavaVersion) +} + +// newAutobuilderJavaRunner returns the runner policy for project compilation: +// system Java first, then the user-configured version. +func newAutobuilderJavaRunner() java.JavaRunner { + return java.NewJavaRunner(). + WithSkipVerify(globals.Config.SkipVerify). + WithDebugOutput(out.DebugStream("Autobuilder")). + TrySystem(). + TrySpecificVersion(globals.Config.Java.Version) +} diff --git a/cli/cmd/test.go b/cli/cmd/test.go index 81b9130fc..2c517563f 100644 --- a/cli/cmd/test.go +++ b/cli/cmd/test.go @@ -1,6 +1,8 @@ package cmd import ( + "time" + "github.com/spf13/cobra" ) @@ -28,3 +30,25 @@ func init() { testCmd.AddCommand(testRuleCmd) testCmd.AddCommand(testApproximationCmd) } + +// testExitCodesHelp documents the exit codes shared by `test rule run` and +// `test approximation run`. Codes 252-255 mirror internal/analyzer. +func testExitCodesHelp(passedLine string) string { + return `Exit codes: + 0 ` + passedLine + ` + 1 General failure (configuration or infrastructure error) + 2 One or more tests failed (false negatives/positives or skipped samples) + 252 Unhandled analyzer exception + 253 Out of memory (try increasing --max-memory) + 254 Analysis timed out (try increasing --timeout) + 255 Project configuration error` +} + +// addTestRunFlags registers the flags shared by `test rule run` and +// `test approximation run`. +func addTestRunFlags(cmd *cobra.Command, outputDir *string, timeout *time.Duration, maxMemory *string, dataflow *[]string) { + cmd.Flags().StringVarP(outputDir, "output", "o", "", "Directory for test-result.json and test-results.sarif") + cmd.Flags().DurationVar(timeout, "timeout", 600*time.Second, "Analysis timeout") + cmd.Flags().StringVar(maxMemory, "max-memory", "8G", "Maximum analyzer heap size (e.g., 8G)") + cmd.Flags().StringArrayVar(dataflow, "dataflow-approximations", nil, "Dataflow approximation class directory or Java source directory (repeatable)") +} diff --git a/cli/cmd/test_approximation_run.go b/cli/cmd/test_approximation_run.go index 6d7b40292..926b89d63 100644 --- a/cli/cmd/test_approximation_run.go +++ b/cli/cmd/test_approximation_run.go @@ -23,13 +23,7 @@ var testApproximationRunCmd = &cobra.Command{ A built-in source-to-sink harness rule is applied automatically; positive samples reference it as ` + "`@PositiveRuleSample(value = \"approximation-rule.yaml\", id = \"approximation-rule\")`" + `. -Exit codes: - 0 All approximation tests passed - 1 General failure (configuration or infrastructure error) - 252 Unhandled analyzer exception - 253 Out of memory (try increasing --max-memory) - 254 Analysis timed out (try increasing --timeout) - 255 Project configuration error`, +` + testExitCodesHelp("All approximation tests passed"), Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { ruleDir, err := os.MkdirTemp("", "opentaint-approx-rule-*") @@ -54,9 +48,5 @@ Exit codes: func init() { testApproximationCmd.AddCommand(testApproximationRunCmd) - - testApproximationRunCmd.Flags().StringVarP(&testApproxOutputDir, "output", "o", "", "Directory for test-result.json and test-results.sarif") - testApproximationRunCmd.Flags().DurationVar(&testApproxTimeout, "timeout", 600*time.Second, "Analysis timeout") - testApproximationRunCmd.Flags().StringVar(&testApproxMaxMemory, "max-memory", "8G", "Maximum analyzer heap size (e.g., 8G)") - testApproximationRunCmd.Flags().StringArrayVar(&testApproxDataflow, "dataflow-approximations", nil, "Dataflow approximation class directory or Java source directory (repeatable)") + addTestRunFlags(testApproximationRunCmd, &testApproxOutputDir, &testApproxTimeout, &testApproxMaxMemory, &testApproxDataflow) } diff --git a/cli/cmd/test_rule_run.go b/cli/cmd/test_rule_run.go index d2a03bf82..b2809c18d 100644 --- a/cli/cmd/test_rule_run.go +++ b/cli/cmd/test_rule_run.go @@ -6,9 +6,8 @@ import ( "path/filepath" "time" - "github.com/seqra/opentaint/internal/globals" + "github.com/seqra/opentaint/internal/analyzer" "github.com/seqra/opentaint/internal/utils" - "github.com/seqra/opentaint/internal/utils/java" "github.com/seqra/opentaint/internal/utils/log" "github.com/spf13/cobra" ) @@ -29,25 +28,20 @@ var testRuleRunCmd = &cobra.Command{ Long: `Run detection rules against samples annotated with @PositiveRuleSample and @NegativeRuleSample in the compiled project model. -Exit codes: - 0 All rule tests passed - 1 General failure (configuration or infrastructure error) - 252 Unhandled analyzer exception - 253 Out of memory (try increasing --max-memory) - 254 Analysis timed out (try increasing --timeout) - 255 Project configuration error`, +` + testExitCodesHelp("All rule tests passed"), Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { runTestProject(args[0], testProjectOptions{ - label: "Rule tests", - tempDir: "opentaint-test-rules-*", - rulesets: testRulesRuleset, - outputDir: testRulesOutputDir, - timeout: testRulesTimeout, - maxMemory: testRulesMaxMemory, - ruleIDs: testRulesRuleID, - dataflowApprox: testRulesDataflow, - passthroughApprox: testRulesPassthrough, + label: "Rule tests", + tempDir: "opentaint-test-rules-*", + rulesets: testRulesRuleset, + outputDir: testRulesOutputDir, + timeout: testRulesTimeout, + maxMemory: testRulesMaxMemory, + ruleIDs: testRulesRuleID, + dataflowApprox: testRulesDataflow, + passthroughApprox: testRulesPassthrough, + includeBuiltinRules: true, }) }, } @@ -63,14 +57,22 @@ type testProjectOptions struct { ruleIDs []string dataflowApprox []string passthroughApprox []string + // includeBuiltinRules loads the builtin ruleset alongside opts.rulesets. + // Rule tests need it (test joins may ref builtin lib rules); approximation + // tests run only against the self-contained harness rule, so skipping it + // keeps them download-free. + includeBuiltinRules bool } func runTestProject(projectModelArg string, opts testProjectOptions) { projectPath := log.AbsPathOrExit(projectModelArg, "project-model") nativeProjectPath := filepath.Join(projectPath, "project.yaml") - if _, err := os.Stat(nativeProjectPath); os.IsNotExist(err) { - out.Fatalf("Project model not found: %s", nativeProjectPath) + if _, err := os.Stat(nativeProjectPath); err != nil { + if os.IsNotExist(err) { + out.Fatalf("Project model not found: %s", nativeProjectPath) + } + out.Fatalf("Cannot access project model %s: %s", nativeProjectPath, err) } // Validate max-memory @@ -96,12 +98,6 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { } } - // Ensure builtin rules are available - rulesPath, err := utils.EnsureRulesPath(out) - if err != nil { - out.Fatalf("Failed to prepare built-in rules: %s", err) - } - timeoutSeconds := int64(opts.timeout / time.Second) if timeoutSeconds <= 0 { timeoutSeconds = 600 @@ -112,9 +108,16 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { SetOutputDir(outputDir). SetSarifFileName("test-results.sarif"). SetIfdsAnalysisTimeout(timeoutSeconds). - AddRuleSet(rulesPath). EnableRunRuleTests() + if opts.includeBuiltinRules { + rulesPath, err := utils.EnsureRulesPath(out) + if err != nil { + out.Fatalf("Failed to prepare built-in rules: %s", err) + } + builder.AddRuleSet(rulesPath) + } + if maxMemory != "" { builder.SetMaxMemory(maxMemory) } @@ -140,27 +143,39 @@ func runTestProject(projectModelArg string, opts testProjectOptions) { addDataflowApproximations(builder, opts.dataflowApprox, analyzerJarPath, projectPath) addPassthroughApproximations(builder, opts.passthroughApprox) - javaRunner := java.NewJavaRunner(). - WithSkipVerify(globals.Config.SkipVerify). - WithDebugOutput(out.DebugStream("Analyzer")). - WithImageType(java.AdoptiumImageJRE). - TrySpecificVersion(globals.DefaultJavaVersion) + javaRunner := newAnalyzerJavaRunner() if _, err := javaRunner.EnsureJava(); err != nil { - out.Fatalf("Failed to resolve Java: %s", err) + out.Fatalf("Failed to resolve Java for analyzer: %s", err) } cmdErr, err := scanProject(builder, javaRunner) if err != nil { out.Fatalf("%s failed: %s", opts.label, err) } - analyzerFail := classifyAnalyzerError(cmdErr) + analyzerFail := analyzer.Classify(cmdErr) + if analyzerFail != nil { + out.Error(analyzerFail.Message) + } // Always print output paths so the agent can inspect partial results + resultPath := filepath.Join(outputDir, "test-result.json") fmt.Printf("Results directory: %s\n", outputDir) - fmt.Printf("Test results: %s\n", filepath.Join(outputDir, "test-result.json")) + fmt.Printf("Test results: %s\n", resultPath) if analyzerFail != nil { - os.Exit(analyzerFail.exitCode) + os.Exit(analyzerFail.ExitCode) + } + + // The analyzer exits 0 even when samples fail; the verdict is in test-result.json. + tr, err := analyzer.LoadTestResult(resultPath) + if err != nil { + out.Fatalf("%s produced no readable test-result.json: %s", opts.label, err) + } + fmt.Printf("Passed: %d, failed: %d (false negatives: %d, false positives: %d, skipped: %d), disabled: %d\n", + len(tr.Success), tr.Failed(), len(tr.FalseNegative), len(tr.FalsePositive), len(tr.Skipped), len(tr.Disabled)) + if tr.Failed() > 0 { + out.Error(fmt.Sprintf("%s failed", opts.label)) + os.Exit(2) } fmt.Printf("%s completed successfully\n", opts.label) @@ -170,10 +185,7 @@ func init() { testRuleCmd.AddCommand(testRuleRunCmd) testRuleRunCmd.Flags().StringArrayVar(&testRulesRuleset, "ruleset", nil, "Ruleset file or directory to test (repeatable)") - testRuleRunCmd.Flags().StringVarP(&testRulesOutputDir, "output", "o", "", "Directory for test-result.json and test-results.sarif") - testRuleRunCmd.Flags().DurationVar(&testRulesTimeout, "timeout", 600*time.Second, "Analysis timeout") - testRuleRunCmd.Flags().StringVar(&testRulesMaxMemory, "max-memory", "8G", "Maximum analyzer heap size (e.g., 8G)") + addTestRunFlags(testRuleRunCmd, &testRulesOutputDir, &testRulesTimeout, &testRulesMaxMemory, &testRulesDataflow) testRuleRunCmd.Flags().StringArrayVar(&testRulesRuleID, "rule-id", nil, "Run only rules with this ID (repeatable)") - testRuleRunCmd.Flags().StringArrayVar(&testRulesDataflow, "dataflow-approximations", nil, "Dataflow approximation class directory or Java source directory (repeatable)") testRuleRunCmd.Flags().StringArrayVar(&testRulesPassthrough, "passthrough-approximations", nil, "Pass-through approximation YAML file or directory (repeatable)") } From 55f5f6d1f81c86b52d0ed47cf5a9d45d359a71fa Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:26:29 +0200 Subject: [PATCH 37/54] fix(cli): keep release versions for marker-verified bundled installs, read override from ArtifactDef MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every official install channel (install.sh, brew, npm, release archives) lays lib/ flat next to the binary and resolves at the bundled tier, so classifying TierBundled as 'custom' regressed the SARIF tool version and the info trees to 'custom ()' for all end users. The release pipeline now stamps lib/release-versions.yaml; a bundled resolution matching the embedded versions.yaml keeps its pinned version, while make-install dev layouts (no marker) still read as custom — the original intent of the tier check. The jarPathOverride parameter on the display helpers duplicated ArtifactDef.Override at every call site; the helpers now read the def. --- cli/cmd/compile.go | 2 +- cli/cmd/health.go | 22 ++++---- cli/cmd/scan.go | 8 +-- cli/internal/utils/display_version.go | 47 +++++++++------- cli/internal/utils/display_version_test.go | 64 ++++++++++------------ cli/internal/utils/opentaint_home.go | 20 +++++++ 6 files changed, 91 insertions(+), 72 deletions(-) diff --git a/cli/cmd/compile.go b/cli/cmd/compile.go index 191eb4ba9..c4fcf8403 100644 --- a/cli/cmd/compile.go +++ b/cli/cmd/compile.go @@ -73,7 +73,7 @@ Arguments: } sb.FieldNode("Project", absProjectRoot). FieldNode("Output project model", absOutputProjectModelPath). - FieldNode("Autobuilder", utils.ArtifactVersionWithPath(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)). + FieldNode("Autobuilder", utils.ArtifactVersionWithPath(globals.ArtifactByKind("autobuilder"))). Render() if DryRunCompile { diff --git a/cli/cmd/health.go b/cli/cmd/health.go index a2e0145d9..74119e840 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -105,16 +105,8 @@ func runHealth() { // rules are fetched on demand; the rest are reported as-is. func resolveHealthComponent(key string) healthComponent { switch key { - case "autobuilder": - def := globals.ArtifactByKind("autobuilder") - path, err := utils.ResolveJarPath(def) - version := utils.ArtifactVersion(def, globals.Config.Autobuilder.JarPath) - return healthComponent{"Autobuilder", version, path, err == nil && utils.PathExists(path)} - case "analyzer": - def := globals.ArtifactByKind("analyzer") - path, err := utils.ResolveJarPath(def) - version := utils.ArtifactVersion(def, globals.Config.Analyzer.JarPath) - return healthComponent{"Analyzer", version, path, err == nil && utils.PathExists(path)} + case "autobuilder", "analyzer": + return resolveJarComponent(key) case "rules": return resolveRulesComponent() case "runtime": @@ -124,10 +116,18 @@ func resolveHealthComponent(key string) healthComponent { } } +// resolveJarComponent resolves a jar-backed artifact (autobuilder/analyzer). +func resolveJarComponent(kind string) healthComponent { + def := globals.ArtifactByKind(kind) + path, err := utils.ResolveJarPath(def) + version := utils.ArtifactVersion(def) + return healthComponent{def.Name, version, path, err == nil && utils.PathExists(path)} +} + // resolveRulesComponent resolves the built-in rules directory, downloading it // on demand so `health --rules` replaces `dev rules-path`. func resolveRulesComponent() healthComponent { - c := healthComponent{name: "Rules", version: utils.ArtifactVersion(globals.ArtifactByKind("rules"), "")} + c := healthComponent{name: "Rules", version: utils.ArtifactVersion(globals.ArtifactByKind("rules"))} // EnsureRulesPath returns the expected path even on failure, so the report // can still show where the rules belong, flagged as missing. path, err := utils.EnsureRulesPath(out) diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index df4d4a44d..e73510d40 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -262,7 +262,7 @@ func runScan(cmd *cobra.Command, cfg ScanConfig) { sarifReportName := filepath.Base(absSarifReportPath) - localVersion := utils.ArtifactDisplayVersion(globals.ArtifactByKind("analyzer"), globals.Config.Analyzer.JarPath) + localVersion := utils.ArtifactDisplayVersion(globals.ArtifactByKind("analyzer")) localSemanticVersion := version.GetVersion() var sourceRoot string @@ -590,14 +590,14 @@ func printScanInfo(cmd *cobra.Command, plan scanPlan, absSemgrepRuleLoadTracePat if plan.projectCachePath != "" { sb.FieldNode("Project model", plan.absProjectModel) } - sb.FieldNode("Autobuilder", utils.ArtifactVersionWithPath(globals.ArtifactByKind("autobuilder"), globals.Config.Autobuilder.JarPath)) + sb.FieldNode("Autobuilder", utils.ArtifactVersionWithPath(globals.ArtifactByKind("autobuilder"))) } else { sb.FieldNode("Project model", plan.absProjectModel) } - sb.FieldNode("Analyzer", utils.ArtifactVersionWithPath(globals.ArtifactByKind("analyzer"), globals.Config.Analyzer.JarPath)) + sb.FieldNode("Analyzer", utils.ArtifactVersionWithPath(globals.ArtifactByKind("analyzer"))) for _, r := range absRuleSetPaths { if r.Builtin { - sb.FieldNode("Bundled ruleset", utils.ArtifactVersionWithPath(globals.ArtifactByKind("rules"), "")) + sb.FieldNode("Bundled ruleset", utils.ArtifactVersionWithPath(globals.ArtifactByKind("rules"))) } else { sb.FieldNode("User ruleset", r.Path) } diff --git a/cli/internal/utils/display_version.go b/cli/internal/utils/display_version.go index d8815afa3..bb92accb7 100644 --- a/cli/internal/utils/display_version.go +++ b/cli/internal/utils/display_version.go @@ -10,52 +10,59 @@ import ( // ArtifactDisplayVersion renders an artifact's full display label, keeping the // "/" version prefix. Used for the SARIF tool version, where the prefix // is part of the identifier. -func ArtifactDisplayVersion(def globals.ArtifactDef, jarPathOverride string) string { - tier, path := artifactResolution(def, jarPathOverride) - return displayVersion(def.Version, jarPathOverride, tier, path) +func ArtifactDisplayVersion(def globals.ArtifactDef) string { + tier, path, bundledRelease := artifactResolution(def) + return displayVersion(def.Version, def.Override, tier, path, bundledRelease) } // ArtifactVersionWithPath is the version with the redundant "/" prefix // stripped, for a single-line display that has no separate path field (e.g. // scan's "Analyzer:" node). A custom build keeps its jar path — "custom ()" // — since that line is the only place the path appears. -func ArtifactVersionWithPath(def globals.ArtifactDef, jarPathOverride string) string { - return strings.TrimPrefix(ArtifactDisplayVersion(def, jarPathOverride), def.Kind()+"/") +func ArtifactVersionWithPath(def globals.ArtifactDef) string { + return strings.TrimPrefix(ArtifactDisplayVersion(def), def.Kind()+"/") } // ArtifactVersion is the version for a display that shows the resolved path on // its own line (e.g. health's tree). A managed release yields the bare version; // a custom build collapses to "custom", so the path isn't repeated. -func ArtifactVersion(def globals.ArtifactDef, jarPathOverride string) string { - tier, _ := artifactResolution(def, jarPathOverride) - if isCustomArtifact(def.Version, jarPathOverride, tier) { +func ArtifactVersion(def globals.ArtifactDef) string { + tier, _, bundledRelease := artifactResolution(def) + if isCustomArtifact(def.Version, def.Override, tier, bundledRelease) { return "custom" } return strings.TrimPrefix(def.Version, def.Kind()+"/") } -// artifactResolution resolves the artifact's tier and path, unless an explicit -// jar override is set (in which case neither is needed). -func artifactResolution(def globals.ArtifactDef, jarPathOverride string) (tier, path string) { - if jarPathOverride == "" { +// artifactResolution resolves the artifact's tier and path, unless the def +// carries an explicit jar override (in which case neither is needed). The +// release-marker check only runs for bundled resolutions, where it decides +// between "official release" and "local build". +func artifactResolution(def globals.ArtifactDef) (tier, path string, bundledRelease bool) { + if def.Override == "" { tier, path, _ = resolveArtifactTier(def) + if tier == TierBundled { + bundledRelease = IsBundledRelease() + } } - return tier, path + return tier, path, bundledRelease } // isCustomArtifact reports whether the artifact is a custom build — an explicit -// jar override, a bundled build next to the binary (whose nominal version may -// not match its content), or an unpinned version — rather than a managed -// install/cache release. -func isCustomArtifact(version, overridePath, resolvedTier string) bool { - return overridePath != "" || resolvedTier == TierBundled || version == "" +// jar override, an unpinned version, or a bundled build next to the binary +// without the release pipeline's version marker — rather than a managed release. +func isCustomArtifact(version, overridePath, resolvedTier string, bundledRelease bool) bool { + if overridePath != "" || version == "" { + return true + } + return resolvedTier == TierBundled && !bundledRelease } // displayVersion renders an artifact's label: a custom build as "custom ()" // (the override path if set, otherwise the resolved path), and a managed release // as its version string. -func displayVersion(version, overridePath, resolvedTier, resolvedPath string) string { - if isCustomArtifact(version, overridePath, resolvedTier) { +func displayVersion(version, overridePath, resolvedTier, resolvedPath string, bundledRelease bool) string { + if isCustomArtifact(version, overridePath, resolvedTier, bundledRelease) { path := overridePath if path == "" { path = resolvedPath diff --git a/cli/internal/utils/display_version_test.go b/cli/internal/utils/display_version_test.go index 1ea4b8992..6a30bb71a 100644 --- a/cli/internal/utils/display_version_test.go +++ b/cli/internal/utils/display_version_test.go @@ -8,17 +8,17 @@ import ( func TestDisplayVersion(t *testing.T) { tests := []struct { - name string - version string - overridePath string - resolvedTier string - resolvedPath string - want string + name string + version string + overridePath string + resolvedTier string + resolvedPath string + bundledRelease bool + want string }{ { name: "pinned version, no override, managed install tier", version: "analyzer/2026.05.27.68ab20a", - overridePath: "", resolvedTier: TierInstall, resolvedPath: "/opt/opentaint/lib/opentaint-project-analyzer.jar", want: "analyzer/2026.05.27.68ab20a", @@ -34,7 +34,6 @@ func TestDisplayVersion(t *testing.T) { { name: "empty pin falls back to resolved path", version: "", - overridePath: "", resolvedTier: TierCache, resolvedPath: "/opt/opentaint/lib/opentaint-project-analyzer.jar", want: "custom (/opt/opentaint/lib/opentaint-project-analyzer.jar)", @@ -48,56 +47,49 @@ func TestDisplayVersion(t *testing.T) { want: "custom (/home/dev/build/analyzer.jar)", }, { - name: "bundled tier shows custom path even with a pinned version", + name: "bundled tier without release marker is a local build", version: "rules/v0.2.0", - overridePath: "", resolvedTier: TierBundled, resolvedPath: "/opt/opentaint/lib/rules", want: "custom (/opt/opentaint/lib/rules)", }, + { + name: "bundled tier with release marker keeps the pinned version", + version: "analyzer/2026.06.09.fc56601", + resolvedTier: TierBundled, + resolvedPath: "/home/user/.opentaint/install/lib/opentaint-project-analyzer.jar", + bundledRelease: true, + want: "analyzer/2026.06.09.fc56601", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := displayVersion(tt.version, tt.overridePath, tt.resolvedTier, tt.resolvedPath) + got := displayVersion(tt.version, tt.overridePath, tt.resolvedTier, tt.resolvedPath, tt.bundledRelease) if got != tt.want { - t.Errorf("displayVersion(%q, %q, %q, %q) = %q, want %q", - tt.version, tt.overridePath, tt.resolvedTier, tt.resolvedPath, got, tt.want) + t.Errorf("displayVersion(%q, %q, %q, %q, %v) = %q, want %q", + tt.version, tt.overridePath, tt.resolvedTier, tt.resolvedPath, tt.bundledRelease, got, tt.want) } }) } } -func TestArtifactDisplayVersion(t *testing.T) { - analyzer := globals.ArtifactByKind("analyzer") - - override := analyzer.WithVersion("analyzer/2026.05.27.68ab20a") - if got := ArtifactDisplayVersion(override, "/home/dev/analyzer.jar"); got != "custom (/home/dev/analyzer.jar)" { +func TestArtifactDisplayVersionOverride(t *testing.T) { + override := globals.ArtifactByKind("analyzer").WithVersion("analyzer/2026.05.27.68ab20a") + override.Override = "/home/dev/analyzer.jar" + if got := ArtifactDisplayVersion(override); got != "custom (/home/dev/analyzer.jar)" { t.Errorf("override case: got %q, want %q", got, "custom (/home/dev/analyzer.jar)") } - - pinned := analyzer.WithVersion("analyzer/2026.05.27.68ab20a") - if got := ArtifactDisplayVersion(pinned, ""); got != "analyzer/2026.05.27.68ab20a" { - t.Errorf("pinned case: got %q, want %q", got, "analyzer/2026.05.27.68ab20a") - } } func TestArtifactVersionShortVariants(t *testing.T) { - analyzer := globals.ArtifactByKind("analyzer").WithVersion("analyzer/2026.05.27.68ab20a") - - // Pinned release: kind prefix stripped, identical for both helpers. - if got := ArtifactVersionWithPath(analyzer, ""); got != "2026.05.27.68ab20a" { - t.Errorf("WithPath pinned: got %q, want %q", got, "2026.05.27.68ab20a") - } - if got := ArtifactVersion(analyzer, ""); got != "2026.05.27.68ab20a" { - t.Errorf("bare pinned: got %q, want %q", got, "2026.05.27.68ab20a") - } + // Override cases never resolve tiers, so they are host-independent. + custom := globals.ArtifactByKind("analyzer").WithVersion("analyzer/2026.05.27.68ab20a") + custom.Override = "/home/dev/analyzer.jar" - // Custom (jar override): WithPath keeps the path (single-line display), - // bare collapses to "custom" (the path is shown separately, no dup). - if got := ArtifactVersionWithPath(analyzer, "/home/dev/analyzer.jar"); got != "custom (/home/dev/analyzer.jar)" { + if got := ArtifactVersionWithPath(custom); got != "custom (/home/dev/analyzer.jar)" { t.Errorf("WithPath custom: got %q, want %q", got, "custom (/home/dev/analyzer.jar)") } - if got := ArtifactVersion(analyzer, "/home/dev/analyzer.jar"); got != "custom" { + if got := ArtifactVersion(custom); got != "custom" { t.Errorf("bare custom: got %q, want %q", got, "custom") } } diff --git a/cli/internal/utils/opentaint_home.go b/cli/internal/utils/opentaint_home.go index b3cbee5fc..0bad48c86 100644 --- a/cli/internal/utils/opentaint_home.go +++ b/cli/internal/utils/opentaint_home.go @@ -115,6 +115,26 @@ func GetInstallJREPath() string { return "" } +// BundledReleaseMarkerName is the manifest the release pipeline writes next to +// the bundled jars; its presence (matching the embedded versions.yaml) marks +// the bundled lib dir as an unmodified official release rather than a local build. +const BundledReleaseMarkerName = "release-versions.yaml" + +// IsBundledRelease reports whether the bundled lib dir next to the binary was +// produced by the release pipeline for exactly the embedded bind versions. +// A `make install` dev layout has no marker and reads as a custom build. +func IsBundledRelease() bool { + lib := GetBundledLibPath() + if lib == "" { + return false + } + data, err := os.ReadFile(filepath.Join(lib, BundledReleaseMarkerName)) + if err != nil { + return false + } + return bytes.Equal(data, globals.GetVersionsYAML()) +} + // IsInstallCurrent reports whether the install-tier version marker matches // the embedded versions.yaml. Returns false if the marker is missing or differs. func IsInstallCurrent() bool { From d61ffd29d4a1bc0e7c5ee18e0374600b7c89f49d Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:27:50 +0200 Subject: [PATCH 38/54] fix(cli): health exits non-zero on missing components and reports the analyzer's real runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single-component mode printed an empty path and exited 0 on resolution failures, so scripts couldn't detect a missing dependency. The runtime component reported a system Java (or a stale install-tier JRE) that the analyzer runner — which pins the managed Adoptium JRE and never tries system Java — would not use. Both now share the runner's probe via utils.FindCurrentManagedJRE, and the autobuilder/analyzer cases collapse into one resolveJarComponent. --- cli/cmd/health.go | 53 ++++++++++++++++++------------- cli/cmd/health_test.go | 39 +++++++++++++++++++++++ cli/internal/utils/java/runner.go | 3 +- cli/internal/utils/tier.go | 7 ++++ 4 files changed, 78 insertions(+), 24 deletions(-) diff --git a/cli/cmd/health.go b/cli/cmd/health.go index 74119e840..35b0ec848 100644 --- a/cli/cmd/health.go +++ b/cli/cmd/health.go @@ -4,10 +4,10 @@ import ( "fmt" "os" "strconv" + "strings" "github.com/seqra/opentaint/internal/globals" "github.com/seqra/opentaint/internal/utils" - "github.com/seqra/opentaint/internal/utils/java" "github.com/spf13/cobra" ) @@ -35,10 +35,12 @@ built-in rules, and Java runtime. Use --autobuilder, --analyzer, --rules, or --runtime to select components. When exactly one component is selected, only its path is printed. The command does -not download artifacts except built-in rules, which are fetched on demand.`, +not download artifacts except built-in rules, which are fetched on demand. + +The exit code is non-zero when any selected component is missing.`, Args: cobra.NoArgs, - Run: func(cmd *cobra.Command, args []string) { - runHealth() + RunE: func(cmd *cobra.Command, args []string) error { + return runHealth() }, } @@ -50,7 +52,7 @@ func init() { healthCmd.Flags().BoolVar(&healthRuntime, "runtime", false, "Print only the Java runtime path") } -func runHealth() { +func runHealth() error { // No flags shows every component, in fixed order. var requested []string if healthAutobuilder { @@ -77,15 +79,21 @@ func runHealth() { // A single flag prints just the bare path, for scripting. if len(requested) == 1 { c := components[0] - fmt.Println(c.path) + if c.path != "" { + fmt.Println(c.path) + } if !c.present { - fmt.Fprintf(os.Stderr, "%s missing at %s\n", c.name, c.path) + if c.path == "" { + return fmt.Errorf("%s could not be resolved", c.name) + } + return fmt.Errorf("%s missing at %s", c.name, c.path) } - return + return nil } sb := out.Section("OpenTaint Health") th := out.Theme() + var missing []string for _, c := range components { node := out.GroupItem(th.FieldKey.Render(c.name)) if c.version != "" { @@ -94,11 +102,16 @@ func runHealth() { path := c.path if !c.present { path += " " + th.Error.Render("missing") + missing = append(missing, c.name) } node.Child(th.FieldValue.Render(path)) sb.Child(node) } sb.Render() + if len(missing) > 0 { + return fmt.Errorf("missing components: %s", strings.Join(missing, ", ")) + } + return nil } // resolveHealthComponent resolves a component's path and presence. Only the @@ -133,31 +146,27 @@ func resolveRulesComponent() healthComponent { path, err := utils.EnsureRulesPath(out) c.path = path if err != nil { - fmt.Fprintf(os.Stderr, "Error resolving rules: %s\n", err) + fmt.Fprintf(os.Stderr, "Failed to prepare built-in rules: %s\n", err) return c } c.present = utils.PathExists(path) return c } -// resolveRuntimeComponent reports the Java the analyzer runs on, and where it -// comes from: "builtin" is the JRE OpenTaint manages itself (downloaded/bundled -// into its own install), "system" is a Java already on the user's PATH. +// resolveRuntimeComponent reports the managed JRE the analyzer actually runs +// on. The analyzer's runner never consults a system Java (it pins the managed +// Adoptium JRE), so neither does health; when no managed JRE exists yet, the +// reported path is where the analyzer will download one on first use. func resolveRuntimeComponent() healthComponent { - c := healthComponent{name: "Runtime"} - if jre := utils.FindExistingJRE(utils.ManagedJRETiers()); jre != nil { - c.path = utils.JavaBinaryPath(jre.Path) - c.version = "Java " + strconv.Itoa(globals.DefaultJavaVersion) + " (builtin)" - c.present = true - return c + c := healthComponent{ + name: "Runtime", + version: "Java " + strconv.Itoa(globals.DefaultJavaVersion) + " (builtin)", } - if sys := java.DetectSystemJava(); sys != nil { - c.path = sys.Path - c.version = "Java " + sys.FullVersion + " (system)" + if jre := utils.FindCurrentManagedJRE(); jre != nil { + c.path = utils.JavaBinaryPath(jre.Path) c.present = true return c } - c.version = "Java " + strconv.Itoa(globals.DefaultJavaVersion) + " (builtin)" if jre := utils.GetInstallJREPath(); jre != "" { c.path = utils.JavaBinaryPath(jre) } diff --git a/cli/cmd/health_test.go b/cli/cmd/health_test.go index 153c612b1..ab5e558fa 100644 --- a/cli/cmd/health_test.go +++ b/cli/cmd/health_test.go @@ -1,9 +1,12 @@ package cmd import ( + "os" + "path/filepath" "testing" "github.com/seqra/opentaint/internal/globals" + "github.com/seqra/opentaint/internal/utils" ) func TestResolveHealthComponentUsesAnalyzerJarOverride(t *testing.T) { @@ -33,3 +36,39 @@ func TestResolveHealthComponentUsesAutobuilderJarOverride(t *testing.T) { t.Fatalf("health autobuilder path = %q, want override %q", c.path, globals.Config.Autobuilder.JarPath) } } + +func TestResolveRuntimeComponentIgnoresSystemJava(t *testing.T) { + // An empty HOME means no managed JRE can exist; the analyzer would download + // its own JRE, so health must NOT report a system Java as the runtime. + t.Setenv("HOME", t.TempDir()) + + c := resolveHealthComponent("runtime") + if c.present { + t.Fatalf("runtime present = true with no managed JRE; health must not report a runtime the analyzer won't use (path %q)", c.path) + } +} + +func TestResolveRuntimeComponentFindsManagedJRE(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + jreBin := filepath.Join(home, ".opentaint", "install", "jre", "bin") + if err := os.MkdirAll(jreBin, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(jreBin, "java"), []byte("#!/bin/sh\n"), 0o755); err != nil { + t.Fatal(err) + } + // Without the .versions marker the install tier is stale-filtered, matching + // the analyzer's own policy — write it so the JRE counts as current. + if err := utils.WriteInstallVersionMarker(); err != nil { + t.Fatal(err) + } + + c := resolveHealthComponent("runtime") + if !c.present { + t.Fatalf("runtime present = false, want true with managed JRE at %s", jreBin) + } + if want := filepath.Join(jreBin, "java"); c.path != want { + t.Errorf("runtime path = %q, want %q", c.path, want) + } +} diff --git a/cli/internal/utils/java/runner.go b/cli/internal/utils/java/runner.go index a2878250f..392fd7262 100644 --- a/cli/internal/utils/java/runner.go +++ b/cli/internal/utils/java/runner.go @@ -355,8 +355,7 @@ func NewJavaRunner() JavaRunner { } func (j *javaRunner) findBundledJRE() string { - tiers := utils.CurrentTiers(utils.ManagedJRETiers(), utils.IsInstallCurrent()) - if tier := utils.FindExistingJRE(tiers); tier != nil { + if tier := utils.FindCurrentManagedJRE(); tier != nil { return utils.JavaBinaryPath(tier.Path) } return "" diff --git a/cli/internal/utils/tier.go b/cli/internal/utils/tier.go index e179906f4..df5120961 100644 --- a/cli/internal/utils/tier.go +++ b/cli/internal/utils/tier.go @@ -116,6 +116,13 @@ func JRETiers(javaVersion int, cacheDir string) []Tier { return tiers } +// FindCurrentManagedJRE returns the first non-stale managed JRE tier (bundled +// or install) containing a java binary — the same probe the analyzer's Java +// runner uses, so health and the runner can't drift. Nil when none exists. +func FindCurrentManagedJRE() *Tier { + return FindExistingJRE(CurrentTiers(ManagedJRETiers(), IsInstallCurrent())) +} + // ManagedJRETiers returns the bundled and install JRE tiers (excluding cache). // Used to find a pre-installed JRE without triggering a download. func ManagedJRETiers() []Tier { From 46cf09cddc7da6a988ac37b1073056f7efbaaad6 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:28:41 +0200 Subject: [PATCH 39/54] refactor(cli): single ArtifactDef-driven jar provisioning and shared Java runner policies ensureAnalyzerAvailable/ensureAutobuilderAvailable were near-identical bodies in different command files, reached into across commands; both now delegate to one ensureArtifactJar(def) in artifacts.go driven entirely by the ArtifactDef registry. The analyzer/autobuilder JavaRunner construction chains (copied in scan, compile, and the test runner) collapse into newAnalyzerJavaRunner / newAutobuilderJavaRunner. --- cli/cmd/artifacts.go | 31 +++++++++++++++++++++++++++++++ cli/cmd/compile.go | 25 +------------------------ cli/cmd/scan.go | 31 ++----------------------------- 3 files changed, 34 insertions(+), 53 deletions(-) diff --git a/cli/cmd/artifacts.go b/cli/cmd/artifacts.go index 9d662b0ce..08e773777 100644 --- a/cli/cmd/artifacts.go +++ b/cli/cmd/artifacts.go @@ -4,8 +4,39 @@ import ( "errors" "fmt" "os" + + "github.com/seqra/opentaint/internal/globals" + "github.com/seqra/opentaint/internal/utils" ) +// ensureArtifactJar resolves an artifact's jar path and downloads the release +// asset when missing. An explicit Override on the def short-circuits both. It +// is the single provisioning path for every jar-backed artifact. +func ensureArtifactJar(def globals.ArtifactDef) (string, error) { + path, err := utils.ResolveJarPath(def) + if err != nil { + return "", fmt.Errorf("failed to construct path to the %s: %w", def.Kind(), err) + } + if def.Override != "" { + return path, nil + } + + if err := ensureArtifactAvailable(def.Kind(), def.Version, path, func() error { + return utils.DownloadGithubReleaseAsset(globals.Config.Owner, globals.Config.Repo, def.Version, def.AssetName, path, globals.Config.Github.Token, globals.Config.SkipVerify, out) + }); err != nil { + return "", err + } + return path, nil +} + +func ensureAnalyzerAvailable() (string, error) { + return ensureArtifactJar(globals.ArtifactByKind("analyzer")) +} + +func ensureAutobuilderAvailable() (string, error) { + return ensureArtifactJar(globals.ArtifactByKind("autobuilder")) +} + func ensureArtifactAvailable(name, version, artifactPath string, download func() error) error { if _, err := os.Stat(artifactPath); err == nil { return nil diff --git a/cli/cmd/compile.go b/cli/cmd/compile.go index c4fcf8403..3d33c8d33 100644 --- a/cli/cmd/compile.go +++ b/cli/cmd/compile.go @@ -88,11 +88,7 @@ Arguments: out.Fatalf("Native compile preparation failed: %s", err) } - compileJavaRunner := java.NewJavaRunner(). - WithSkipVerify(globals.Config.SkipVerify). - WithDebugOutput(out.DebugStream("Autobuilder")). - TrySystem(). - TrySpecificVersion(globals.Config.Java.Version) + compileJavaRunner := newAutobuilderJavaRunner() if _, err := compileJavaRunner.EnsureJava(); err != nil { out.Fatalf("Failed to resolve Java for compilation: %s", err) } @@ -119,25 +115,6 @@ func init() { compileCmd.Flags().StringVar(&CompileLogFile, "log-file", "", "Path to the log file (default: /logs/.log)") } -func ensureAutobuilderAvailable() (string, error) { - def := globals.ArtifactByKind("autobuilder") - autobuilderJarPath, err := utils.ResolveJarPath(def) - if err != nil { - return "", fmt.Errorf("failed to construct path to the autobuilder: %w", err) - } - if def.Override != "" { - return autobuilderJarPath, nil - } - - if err = ensureArtifactAvailable("autobuilder", globals.Config.Autobuilder.Version, autobuilderJarPath, func() error { - return utils.DownloadGithubReleaseAsset(globals.Config.Owner, globals.Config.Repo, globals.Config.Autobuilder.Version, globals.AutobuilderAssetName, autobuilderJarPath, globals.Config.Github.Token, globals.Config.SkipVerify, out) - }); err != nil { - return "", err - } - - return autobuilderJarPath, nil -} - func compile(absProjectRoot, absOutputProjectModelPath, autobuilderJarPath string, javaRunner java.JavaRunner) error { if err := validation.ValidateCompileInputs(absProjectRoot, absOutputProjectModelPath); err != nil { return err diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index e73510d40..7c68c10d6 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -320,11 +320,7 @@ func runScan(cmd *cobra.Command, cfg ScanConfig) { out.Fatalf("Native compile preparation failed: %s", err) } - compileJavaRunner := java.NewJavaRunner(). - WithSkipVerify(globals.Config.SkipVerify). - WithDebugOutput(out.DebugStream("Autobuilder")). - TrySystem(). - TrySpecificVersion(globals.Config.Java.Version) + compileJavaRunner := newAutobuilderJavaRunner() if _, err := compileJavaRunner.EnsureJava(); err != nil { out.Fatalf("Failed to resolve Java for compilation: %s", err) } @@ -422,11 +418,7 @@ func runScan(cmd *cobra.Command, cfg ScanConfig) { // Process --dataflow-approximations: auto-compile .java sources if needed addDataflowApproximations(nativeBuilder, cfg.DataflowApproximations, analyzerJarPath, absProjectModelPath) - analyzerJavaRunner := java.NewJavaRunner(). - WithSkipVerify(globals.Config.SkipVerify). - WithDebugOutput(out.DebugStream("Analyzer")). - WithImageType(java.AdoptiumImageJRE). - TrySpecificVersion(globals.DefaultJavaVersion) + analyzerJavaRunner := newAnalyzerJavaRunner() if _, err := analyzerJavaRunner.EnsureJava(); err != nil { out.Fatalf("Failed to resolve Java for analyzer: %s", err) } @@ -619,25 +611,6 @@ func setupSemgrepRuleLoadTrace() string { return absSemgrepRuleLoadTracePath } -func ensureAnalyzerAvailable() (string, error) { - def := globals.ArtifactByKind("analyzer") - analyzerJarPath, err := utils.ResolveJarPath(def) - if err != nil { - return "", fmt.Errorf("failed to construct path to the analyzer: %w", err) - } - if def.Override != "" { - return analyzerJarPath, nil - } - - if err := ensureArtifactAvailable("analyzer", globals.Config.Analyzer.Version, analyzerJarPath, func() error { - return utils.DownloadGithubReleaseAsset(globals.Config.Owner, globals.Config.Repo, globals.Config.Analyzer.Version, globals.AnalyzerAssetName, analyzerJarPath, globals.Config.Github.Token, globals.Config.SkipVerify, out) - }); err != nil { - return "", err - } - - return analyzerJarPath, nil -} - func scanProject(analyzerBuilder *AnalyzerBuilder, javaRunner java.JavaRunner) (*java.JavaCommandError, error) { analyzerCommand := analyzerBuilder.BuildNativeCommand() From 40de354f75a44a716ccd160c68f0a35b98cb88ce Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:29:59 +0200 Subject: [PATCH 40/54] fix(cli): bind scan config keys to the executing command so explicit flags beat config/env The scan.* viper keys were bound only to scanCmd's flag instances at init time, so when 'test rule reachability' parsed explicit --timeout/--max-memory /--code-flow-limit values, initConfig's viper.Unmarshal silently overwrote them with config-file or OPENTAINT_SCAN_* env values (the bound scan flags had Changed=false). Config loading now runs from the root PersistentPreRunE and rebinds the keys to whichever command is executing. --- cli/cmd/root.go | 28 +++++++++-- cli/cmd/scan.go | 25 +++------- cli/cmd/test_rule_reachability.go | 2 +- cli/cmd/test_rule_reachability_test.go | 67 +++++++++++++++++++++++++- 4 files changed, 98 insertions(+), 24 deletions(-) diff --git a/cli/cmd/root.go b/cli/cmd/root.go index b074efa0e..a2812bc26 100644 --- a/cli/cmd/root.go +++ b/cli/cmd/root.go @@ -40,6 +40,8 @@ var rootCmd = &cobra.Command{ SilenceUsage: true, PersistentPreRunE: func(cmd *cobra.Command, args []string) error { + initConfig(cmd) + applyExperimentalFlagVisibility(cmd.Root(), experimentalMode) if err := log.SetUpLogs(); err != nil { @@ -93,7 +95,6 @@ func Execute() { } func init() { - cobra.OnInitialize(initConfig) configureExperimentalFlagVisibility() // Here you will define your flags and configuration settings. @@ -154,8 +155,13 @@ func init() { _ = viper.BindPFlag("autobuilder.jar_path", rootCmd.PersistentFlags().Lookup("autobuilder-jar")) } -// initConfig reads in config file and ENV variables if set. -func initConfig() { +// initConfig reads the config file and ENV variables. It runs from the root +// PersistentPreRunE so it can bind shared viper keys to the EXECUTING +// command's flag instances — explicit flags must beat config/env for every +// command that registers scan flags, not just `scan` itself. +func initConfig(cmd *cobra.Command) { + bindScanFlags(cmd) + if globals.ConfigFile != "" { // Use config file from the flag. viper.SetConfigFile(globals.ConfigFile) @@ -169,6 +175,22 @@ func initConfig() { _ = viper.Unmarshal(&globals.Config) } +// bindScanFlags points the scan.* viper keys at cmd's flag instances when cmd +// registers them. Binding at execution time (not init time) means the command +// the user actually invoked owns flag precedence — see addScanFlags. +func bindScanFlags(cmd *cobra.Command) { + for key, name := range map[string]string{ + "scan.timeout": "timeout", + "scan.ruleset": "ruleset", + "scan.max_memory": "max-memory", + "scan.code_flow_limit": "code-flow-limit", + } { + if f := cmd.Flags().Lookup(name); f != nil { + _ = viper.BindPFlag(key, f) + } + } +} + // hasNestedKey reports whether a dotted key path is present in a viper settings map. // Each path segment must resolve to a non-nil value; intermediate segments must be maps. func hasNestedKey(m map[string]any, parts []string) bool { diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 7c68c10d6..9a6842897 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -15,7 +15,6 @@ import ( "github.com/seqra/opentaint/internal/utils/project" "github.com/spf13/cobra" - "github.com/spf13/viper" "github.com/seqra/opentaint/internal/globals" "github.com/seqra/opentaint/internal/output" @@ -130,7 +129,7 @@ func prepareScanConfig(cfg ScanConfig, args []string) ScanConfig { func init() { rootCmd.AddCommand(scanCmd) - addScanFlags(scanCmd, true) + addScanFlags(scanCmd) addRuleIDFlag(scanCmd) } @@ -142,33 +141,21 @@ func addRuleIDFlag(cmd *cobra.Command) { } // addScanFlags registers the flags shared by `scan` and `test rule -// reachability`. Only the canonical `scan` command binds them to viper config -// keys (bindViper); the reachability alias shares the same scanFlags target but -// must not re-bind the global "scan.*" keys, or the last init() to run would -// silently steal config precedence from the command the user actually invoked. -func addScanFlags(cmd *cobra.Command, bindViper bool) { +// reachability`. The matching scan.* viper keys are bound to the executing +// command's flag instances at startup (bindScanFlags in root.go), so explicit +// flags keep precedence over config/env regardless of which command the user +// invoked. +func addScanFlags(cmd *cobra.Command) { cmd.Flags().DurationVarP(&globals.Config.Scan.Timeout, "timeout", "t", 900*time.Second, "Timeout for analysis") - if bindViper { - _ = viper.BindPFlag("scan.timeout", cmd.Flags().Lookup("timeout")) - } cmd.Flags().StringArrayVar(&scanFlags.Ruleset, "ruleset", []string{"builtin"}, "YAML rules file, directory of YAML rules files ending in .yml or .yaml, or `builtin` to scan with built-in rules") - if bindViper { - _ = viper.BindPFlag("scan.ruleset", cmd.Flags().Lookup("ruleset")) - } cmd.Flags().BoolVar(&scanFlags.SemgrepCompatibilitySarif, "semgrep-compatibility-sarif", true, "Use Semgrep compatible ruleId") cmd.Flags().StringVarP(&scanFlags.SarifReportPath, "output", "o", "", "Path to the SARIF-report output file") cmd.Flags().StringArrayVar(&scanFlags.Severity, "severity", []string{"warning", "error"}, "Report findings only from rules matching the supplied severity level. By default only warning and error rules are run (note, warning, error)") cmd.Flags().StringVar(&globals.Config.Scan.MaxMemory, "max-memory", "8G", "Maximum memory for the analyzer (e.g., 1024m, 8G, 81920k, 83886080)") - if bindViper { - _ = viper.BindPFlag("scan.max_memory", cmd.Flags().Lookup("max-memory")) - } cmd.Flags().Int64Var(&globals.Config.Scan.CodeFlowLimit, "code-flow-limit", 0, "Maximum number of code flows to include in the report (0 = unlimited)") - if bindViper { - _ = viper.BindPFlag("scan.code_flow_limit", cmd.Flags().Lookup("code-flow-limit")) - } cmd.Flags().BoolVar(&scanFlags.DryRun, "dry-run", false, "Validate inputs and show what would run without compiling or scanning") cmd.Flags().BoolVar(&scanFlags.Recompile, "recompile", false, "Force recompilation even if a cached project model exists") cmd.Flags().StringVar(&scanFlags.ProjectModelPath, "project-model", "", "Path to a pre-compiled project model (skips compilation)") diff --git a/cli/cmd/test_rule_reachability.go b/cli/cmd/test_rule_reachability.go index f09275607..f8a918695 100644 --- a/cli/cmd/test_rule_reachability.go +++ b/cli/cmd/test_rule_reachability.go @@ -58,7 +58,7 @@ func reachabilityScanConfig(base ScanConfig, ruleID, entryPoint string) ScanConf func init() { testRuleCmd.AddCommand(testRuleReachabilityCmd) - addScanFlags(testRuleReachabilityCmd, false) + addScanFlags(testRuleReachabilityCmd) testRuleReachabilityCmd.Flags().StringVar(&reachabilityEntryPoint, "entry-points", "", "Start from '*' or a fully qualified method such as com.example.Class#method") } diff --git a/cli/cmd/test_rule_reachability_test.go b/cli/cmd/test_rule_reachability_test.go index 4d59ef35d..0ba10d8d9 100644 --- a/cli/cmd/test_rule_reachability_test.go +++ b/cli/cmd/test_rule_reachability_test.go @@ -1,6 +1,14 @@ package cmd -import "testing" +import ( + "os" + "path/filepath" + "testing" + "time" + + "github.com/seqra/opentaint/internal/globals" + "github.com/spf13/viper" +) func TestReachabilityScanConfigAppliesPresets(t *testing.T) { base := ScanConfig{ @@ -38,3 +46,60 @@ func TestReachabilityScanConfigOmitsEmptyEntryPoint(t *testing.T) { t.Errorf("entry points = %q, want empty when no entry point given", cfg.DebugRunAnalysisOnSelectedEntryPoints) } } + +func TestReachabilityExplicitFlagsSurviveConfig(t *testing.T) { + // Snapshot and restore the shared state this test mutates. + origTimeout := globals.Config.Scan.Timeout + origMaxMemory := globals.Config.Scan.MaxMemory + t.Cleanup(func() { + globals.Config.Scan.Timeout = origTimeout + globals.Config.Scan.MaxMemory = origMaxMemory + globals.ConfigFile = "" + viper.Reset() + testRuleReachabilityCmd.Flags().Lookup("timeout").Changed = false + testRuleReachabilityCmd.Flags().Lookup("max-memory").Changed = false + }) + + cfgFile := filepath.Join(t.TempDir(), "config.yaml") + if err := os.WriteFile(cfgFile, []byte("scan:\n timeout: 300s\n max_memory: 4G\n"), 0o644); err != nil { + t.Fatal(err) + } + globals.ConfigFile = cfgFile + + if err := testRuleReachabilityCmd.Flags().Set("timeout", "777s"); err != nil { + t.Fatal(err) + } + if err := testRuleReachabilityCmd.Flags().Set("max-memory", "16G"); err != nil { + t.Fatal(err) + } + + initConfig(testRuleReachabilityCmd) + + if got := globals.Config.Scan.Timeout; got != 777*time.Second { + t.Errorf("Timeout = %v, want 777s (explicit flag must beat config file)", got) + } + if got := globals.Config.Scan.MaxMemory; got != "16G" { + t.Errorf("MaxMemory = %q, want 16G (explicit flag must beat config file)", got) + } +} + +func TestScanConfigFileAppliesWhenFlagUnset(t *testing.T) { + origTimeout := globals.Config.Scan.Timeout + t.Cleanup(func() { + globals.Config.Scan.Timeout = origTimeout + globals.ConfigFile = "" + viper.Reset() + }) + + cfgFile := filepath.Join(t.TempDir(), "config.yaml") + if err := os.WriteFile(cfgFile, []byte("scan:\n timeout: 123s\n"), 0o644); err != nil { + t.Fatal(err) + } + globals.ConfigFile = cfgFile + + initConfig(scanCmd) + + if got := globals.Config.Scan.Timeout; got != 123*time.Second { + t.Errorf("Timeout = %v, want config-file 123s when flag not passed", got) + } +} From 90d9aad4b686d6c161f21031e25f658c078f0430 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:31:30 +0200 Subject: [PATCH 41/54] fix(cli): carry all scan flags into suggestions, simplify scan plan and builtin download currentScanBuilder promised that new flags propagate to every suggestion but dropped --rule-id, --severity, both approximation flags, and --track-external-methods, so the Docker-fallback and --project-model suggestions ran materially different scans. scanPlan.mode fully derived from needsCompilation (ScanMode removed); the builtin-ruleset download loop ran EnsureRulesPath once per builtin entry for nothing. --- cli/cmd/scan.go | 67 ++++++++++--------- .../utils/opentaint_command_builder.go | 28 ++++++++ 2 files changed, 63 insertions(+), 32 deletions(-) diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 9a6842897..99936325c 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -56,39 +56,28 @@ type RulesetType struct { Builtin bool } -type ScanMode int - -const ( - Scan ScanMode = iota - CompileAndScan -) - const ( dryRunScanProjectModelPath = "opentaint-scan-dry-run/project-model" dryRunRuleLoadTraceFileName = "opentaint-rule-load-trace.dry-run.json" ) -func (m ScanMode) String() string { - switch m { - case Scan: - return "OpenTaint Scan" - case CompileAndScan: - return "OpenTaint Compile and Scan" - default: - return "Unknown" - } -} - // scanPlan holds the resolved compilation/cache plan for a scan invocation, // derived from a ScanConfig and the on-disk model cache. type scanPlan struct { - mode ScanMode absProjectModel string // absolute path to the project model (always the cache dir when projectCachePath is set) projectCachePath string // cache dir for this project (empty for explicit model / dry-run) needsCompilation bool // true when compilation is needed before scanning cacheLock *utils.FileLock } +// title names the scan flavor for the info tree header. +func (p scanPlan) title() string { + if p.needsCompilation { + return "OpenTaint Compile and Scan" + } + return "OpenTaint Scan" +} + // scanCmd represents the scan command var scanCmd = &cobra.Command{ Use: "scan [source-path]", @@ -169,14 +158,28 @@ func addScanFlags(cmd *cobra.Command) { } // currentScanBuilder returns a builder pre-populated with the user's current scan flags. -// All scan command suggestions should use this as the base to ensure that adding a new -// flag in one place automatically propagates to every suggestion. +// All scan command suggestions should use this as the base; every ScanConfig field that +// changes scan semantics must be represented here or suggestions will silently drop it. func currentScanBuilder(cfg ScanConfig, sourcePath string) *utils.OpentaintCommandBuilder { - return utils.NewScanCommand(sourcePath). + b := utils.NewScanCommand(sourcePath). WithOutput(cfg.SarifReportPath). WithTimeout(globals.Config.Scan.Timeout). WithRuleset(cfg.Ruleset). - WithSemgrepCompatibility(cfg.SemgrepCompatibilitySarif) + WithSemgrepCompatibility(cfg.SemgrepCompatibilitySarif). + WithRuleID(cfg.RuleID). + WithPassthroughApproximations(cfg.PassthroughApproximations). + WithDataflowApproximations(cfg.DataflowApproximations). + WithTrackExternalMethods(cfg.TrackExternalMethods) + if !isDefaultSeverity(cfg.Severity) { + b.WithSeverity(cfg.Severity) + } + return b +} + +// isDefaultSeverity reports whether sev is exactly the flag default, in which +// case suggestions omit the flag entirely. +func isDefaultSeverity(sev []string) bool { + return len(sev) == 2 && sev[0] == "warning" && sev[1] == "error" } // dockerScanSuggestion builds the "try Docker-based scan" fallback hint. @@ -282,7 +285,7 @@ func runScan(cmd *cobra.Command, cfg ScanConfig) { } } - maxMemory, err := validation.ValidateScanInputs(absUserProjectRoot, absProjectModelPath, absSarifReportPath, nonBuiltinRulesetPaths, cfg.Severity, globals.Config.Scan.MaxMemory, plan.mode == Scan) + maxMemory, err := validation.ValidateScanInputs(absUserProjectRoot, absProjectModelPath, absSarifReportPath, nonBuiltinRulesetPaths, cfg.Severity, globals.Config.Scan.MaxMemory, !plan.needsCompilation) if err != nil { out.Fatalf("Input validation failed: %s", err) } @@ -292,12 +295,16 @@ func runScan(cmd *cobra.Command, cfg ScanConfig) { return } + hasBuiltin := false for _, ruleSetPath := range absRuleSetPaths { - if !ruleSetPath.Builtin { - continue + if ruleSetPath.Builtin { + hasBuiltin = true + break } + } + if hasBuiltin { if _, err := utils.EnsureRulesPath(out); err != nil { - out.Fatalf("Unexpected error occurred while trying to download ruleset: %s", err) + out.Fatalf("Failed to prepare built-in rules: %s", err) } } @@ -482,7 +489,6 @@ func runScan(cmd *cobra.Command, cfg ScanConfig) { func resolveScanPlan(cfg ScanConfig, absUserProjectRoot string) scanPlan { if cfg.ProjectModelPath != "" { return scanPlan{ - mode: Scan, absProjectModel: log.AbsPathOrExit(filepath.Clean(cfg.ProjectModelPath), "project model path"), } } @@ -490,7 +496,6 @@ func resolveScanPlan(cfg ScanConfig, absUserProjectRoot string) scanPlan { if cfg.DryRun { dryRunPath := filepath.Join(os.TempDir(), dryRunScanProjectModelPath) return scanPlan{ - mode: CompileAndScan, absProjectModel: dryRunPath, needsCompilation: true, } @@ -512,7 +517,6 @@ func resolveScanPlan(cfg ScanConfig, absUserProjectRoot string) scanPlan { if utils.IsCachedModelComplete(projectCachePath) { output.LogDebugf("Reusing cached model at: %s", cachedModelPath) return scanPlan{ - mode: Scan, absProjectModel: cachedModelPath, projectCachePath: projectCachePath, cacheLock: sharedLock, @@ -549,7 +553,6 @@ func resolveScanPlan(cfg ScanConfig, absUserProjectRoot string) scanPlan { } return scanPlan{ - mode: CompileAndScan, absProjectModel: cachedModelPath, projectCachePath: projectCachePath, needsCompilation: true, @@ -558,7 +561,7 @@ func resolveScanPlan(cfg ScanConfig, absUserProjectRoot string) scanPlan { } func printScanInfo(cmd *cobra.Command, plan scanPlan, absSemgrepRuleLoadTracePath string, absUserProjectRoot string, absRuleSetPaths []RulesetType) { - sb := out.Section(plan.mode.String()) + sb := out.Section(plan.title()) addConfigFields(cmd, sb) if globals.Config.Output.Debug { sb.FieldNode("Rule load trace", absSemgrepRuleLoadTracePath) diff --git a/cli/internal/utils/opentaint_command_builder.go b/cli/internal/utils/opentaint_command_builder.go index f3b746f30..1efff7a58 100644 --- a/cli/internal/utils/opentaint_command_builder.go +++ b/cli/internal/utils/opentaint_command_builder.go @@ -181,6 +181,34 @@ func (cb *OpentaintCommandBuilder) WithRuleID(ruleIDs []string) *OpentaintComman return cb } +// WithPassthroughApproximations adds repeatable --passthrough-approximations paths. +func (cb *OpentaintCommandBuilder) WithPassthroughApproximations(paths []string) *OpentaintCommandBuilder { + for _, p := range paths { + if p != "" { + cb.arrayFlags["passthrough-approximations"] = append(cb.arrayFlags["passthrough-approximations"], p) + } + } + return cb +} + +// WithDataflowApproximations adds repeatable --dataflow-approximations paths. +func (cb *OpentaintCommandBuilder) WithDataflowApproximations(paths []string) *OpentaintCommandBuilder { + for _, p := range paths { + if p != "" { + cb.arrayFlags["dataflow-approximations"] = append(cb.arrayFlags["dataflow-approximations"], p) + } + } + return cb +} + +// WithTrackExternalMethods sets the track-external-methods flag. +func (cb *OpentaintCommandBuilder) WithTrackExternalMethods(enabled bool) *OpentaintCommandBuilder { + if enabled { + cb.boolFlags["track-external-methods"] = true + } + return cb +} + // WithPartialFingerprint adds repeatable --partial-fingerprint filters. func (cb *OpentaintCommandBuilder) WithPartialFingerprint(fingerprints []string) *OpentaintCommandBuilder { for _, f := range fingerprints { From 4e40fa4bc9d0afbf553d36103b6841134f34dfcc Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:32:50 +0200 Subject: [PATCH 42/54] refactor(cli): extract test-project scaffolding into internal/testproject MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Gradle layout lived in the cobra command file and hardcoded 'libs/opentaint-sast-test-util.jar' in the template while the jar copy used testutil.JarName three hops away — a coherent rename would scaffold projects that fail only at sample-compile time. The layout now lives in one package sharing the JarName constant, writes through utils.WriteFiles/CopyFile (no redundant MkdirAll loop), and the jar resolves once per command instead of once per kind. generate_jar.go reuses utils.CopyFile (gaining close-error checking); package-internal identifiers are unexported. --- cli/cmd/test_init.go | 84 ++++---------------- cli/internal/testapprox/testapprox.go | 8 +- cli/internal/testproject/testproject.go | 57 +++++++++++++ cli/internal/testproject/testproject_test.go | 44 ++++++++++ cli/internal/testrule/testrule.go | 12 +-- cli/internal/testutil/generate_jar.go | 32 +------- cli/internal/testutil/testutil.go | 6 +- 7 files changed, 133 insertions(+), 110 deletions(-) create mode 100644 cli/internal/testproject/testproject.go create mode 100644 cli/internal/testproject/testproject_test.go diff --git a/cli/cmd/test_init.go b/cli/cmd/test_init.go index 0d41beca8..66488d8d8 100644 --- a/cli/cmd/test_init.go +++ b/cli/cmd/test_init.go @@ -2,14 +2,12 @@ package cmd import ( "fmt" - "os" "path/filepath" - "strings" "github.com/seqra/opentaint/internal/testapprox" + "github.com/seqra/opentaint/internal/testproject" "github.com/seqra/opentaint/internal/testrule" "github.com/seqra/opentaint/internal/testutil" - "github.com/seqra/opentaint/internal/utils" "github.com/spf13/cobra" ) @@ -44,9 +42,15 @@ Use --dependency to add compile-only Maven dependencies for the samples.`, } else if initRuleSourcesOnly { kinds = []string{"sources"} } + jarSrc, err := testutil.ResolveJar() + if err != nil { + out.Fatalf("Failed to resolve test-util JAR: %s", err) + } for _, kind := range kinds { dir := filepath.Join(args[0], kind) - bootstrapTestProject(dir, "opentaint-rule-test-"+kind, initRuleProjectDeps) + if err := testproject.Bootstrap(dir, "opentaint-rule-test-"+kind, initRuleProjectDeps, jarSrc); err != nil { + out.Fatalf("Failed to bootstrap test project: %s", err) + } if err := testrule.Scaffold(dir); err != nil { out.Fatalf("Failed to scaffold rule test project: %s", err) } @@ -73,7 +77,13 @@ The approximation under test is supplied separately at test time with Use --dependency to add compile-only Maven dependencies for the samples.`, Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { - bootstrapTestProject(args[0], "approximation-test-project", initApproxProjectDeps) + jarSrc, err := testutil.ResolveJar() + if err != nil { + out.Fatalf("Failed to resolve test-util JAR: %s", err) + } + if err := testproject.Bootstrap(args[0], "approximation-test-project", initApproxProjectDeps, jarSrc); err != nil { + out.Fatalf("Failed to bootstrap test project: %s", err) + } if err := testapprox.Scaffold(args[0]); err != nil { out.Fatalf("Failed to scaffold approximation project: %s", err) } @@ -94,67 +104,3 @@ func init() { testApproximationInitCmd.Flags().StringArrayVar(&initApproxProjectDeps, "dependency", nil, "Compile-only Maven dependency coordinates for generated samples (repeatable)") } - -// bootstrapTestProject creates the shared Gradle layout (dirs, test-util JAR, build files) -// used by both `test rule init` and `test approximation init`. -func bootstrapTestProject(outputDir, projectName string, dependencies []string) { - dirs := []string{ - filepath.Join(outputDir, "libs"), - filepath.Join(outputDir, "src", "main", "java", "test"), - } - for _, d := range dirs { - if err := os.MkdirAll(d, 0o755); err != nil { - out.Fatalf("Failed to create directory %s: %s", d, err) - } - } - - testUtilJarSrc, err := testutil.ResolveJar() - if err != nil { - out.Fatalf("Failed to resolve test-util JAR: %s", err) - } - testUtilJarDst := filepath.Join(outputDir, "libs", testutil.JarName) - if err := utils.CopyFile(testUtilJarSrc, testUtilJarDst); err != nil { - out.Fatalf("Failed to copy test-util JAR: %s", err) - } - - if err := generateBuildGradle(outputDir, dependencies); err != nil { - out.Fatalf("Failed to generate build.gradle.kts: %s", err) - } - - if err := generateSettingsGradle(outputDir, projectName); err != nil { - out.Fatalf("Failed to generate settings.gradle.kts: %s", err) - } -} - -func generateBuildGradle(outputDir string, dependencies []string) error { - var sb strings.Builder - sb.WriteString(`plugins { - java -} - -java { - sourceCompatibility = JavaVersion.VERSION_1_8 - targetCompatibility = JavaVersion.VERSION_1_8 -} - -repositories { - mavenCentral() -} - -dependencies { - compileOnly(files("libs/opentaint-sast-test-util.jar")) -`) - for _, dep := range dependencies { - sb.WriteString(fmt.Sprintf(" compileOnly(\"%s\")\n", dep)) - } - sb.WriteString("}\n") - - path := filepath.Join(outputDir, "build.gradle.kts") - return os.WriteFile(path, []byte(sb.String()), 0o644) -} - -func generateSettingsGradle(outputDir, projectName string) error { - content := fmt.Sprintf("rootProject.name = %q\n", projectName) - path := filepath.Join(outputDir, "settings.gradle.kts") - return os.WriteFile(path, []byte(content), 0o644) -} diff --git a/cli/internal/testapprox/testapprox.go b/cli/internal/testapprox/testapprox.go index 0bc13c390..36c3e0170 100644 --- a/cli/internal/testapprox/testapprox.go +++ b/cli/internal/testapprox/testapprox.go @@ -11,9 +11,9 @@ import ( "github.com/seqra/opentaint/internal/utils" ) -// FixedRuleFileName is the rule's path relative to the ruleset root, and the value +// fixedRuleFileName is the rule's path relative to the ruleset root, and the value // samples reference in @PositiveRuleSample/@NegativeRuleSample. -const FixedRuleFileName = "approximation-rule.yaml" +const fixedRuleFileName = "approximation-rule.yaml" //go:embed example/approximation-rule.yaml var fixedRule []byte @@ -24,7 +24,7 @@ var taintJava []byte // WriteFixedRule writes the fixed harness rule into dir and returns its path. Used by // test-approximations to apply the rule automatically from a throwaway ruleset directory. func WriteFixedRule(dir string) (string, error) { - path := filepath.Join(dir, FixedRuleFileName) + path := filepath.Join(dir, fixedRuleFileName) if err := os.WriteFile(path, fixedRule, 0o644); err != nil { return "", fmt.Errorf("write fixed approximation rule: %w", err) } @@ -36,7 +36,7 @@ func WriteFixedRule(dir string) (string, error) { // lives in its own unit folder (.opentaint/approximations/), never inside this test project. func Scaffold(projectDir string) error { return utils.WriteFiles(map[string][]byte{ - filepath.Join(projectDir, FixedRuleFileName): fixedRule, + filepath.Join(projectDir, fixedRuleFileName): fixedRule, filepath.Join(projectDir, "src", "main", "java", "test", "Taint.java"): taintJava, }) } diff --git a/cli/internal/testproject/testproject.go b/cli/internal/testproject/testproject.go new file mode 100644 index 000000000..bddaf3605 --- /dev/null +++ b/cli/internal/testproject/testproject.go @@ -0,0 +1,57 @@ +// Package testproject scaffolds the Gradle layout shared by rule and +// approximation test projects: the build files and the test-util jar under +// libs/. The per-flavor payloads (samples, marker rules) stay in +// internal/testrule and internal/testapprox. +package testproject + +import ( + "fmt" + "path/filepath" + "strings" + + "github.com/seqra/opentaint/internal/testutil" + "github.com/seqra/opentaint/internal/utils" +) + +// Bootstrap creates the shared Gradle project layout in outputDir: the +// build/settings files referencing the test-util jar, and the jar itself +// copied from testUtilJarSrc into libs/. Parent directories are created as +// needed by the underlying write helpers. +func Bootstrap(outputDir, projectName string, dependencies []string, testUtilJarSrc string) error { + if err := utils.CopyFile(testUtilJarSrc, filepath.Join(outputDir, "libs", testutil.JarName)); err != nil { + return fmt.Errorf("copy test-util JAR: %w", err) + } + return utils.WriteFiles(map[string][]byte{ + filepath.Join(outputDir, "build.gradle.kts"): buildGradle(dependencies), + filepath.Join(outputDir, "settings.gradle.kts"): settingsGradle(projectName), + }) +} + +func buildGradle(dependencies []string) []byte { + var sb strings.Builder + fmt.Fprintf(&sb, `plugins { + java +} + +java { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 +} + +repositories { + mavenCentral() +} + +dependencies { + compileOnly(files("libs/%s")) +`, testutil.JarName) + for _, dep := range dependencies { + fmt.Fprintf(&sb, " compileOnly(%q)\n", dep) + } + sb.WriteString("}\n") + return []byte(sb.String()) +} + +func settingsGradle(projectName string) []byte { + return fmt.Appendf(nil, "rootProject.name = %q\n", projectName) +} diff --git a/cli/internal/testproject/testproject_test.go b/cli/internal/testproject/testproject_test.go new file mode 100644 index 000000000..8eaa93eeb --- /dev/null +++ b/cli/internal/testproject/testproject_test.go @@ -0,0 +1,44 @@ +package testproject + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/seqra/opentaint/internal/testutil" +) + +func TestBootstrapWritesGradleLayoutAndJar(t *testing.T) { + dir := t.TempDir() + jarSrc := filepath.Join(t.TempDir(), testutil.JarName) + if err := os.WriteFile(jarSrc, []byte("fake-jar"), 0o644); err != nil { + t.Fatal(err) + } + + if err := Bootstrap(dir, "my-test-project", []string{"com.foo:bar:1.0"}, jarSrc); err != nil { + t.Fatalf("Bootstrap: %v", err) + } + + build, err := os.ReadFile(filepath.Join(dir, "build.gradle.kts")) + if err != nil { + t.Fatal(err) + } + // The gradle dependency and the copied jar must share one filename constant. + if !strings.Contains(string(build), "libs/"+testutil.JarName) { + t.Errorf("build.gradle.kts must reference libs/%s, got:\n%s", testutil.JarName, build) + } + if !strings.Contains(string(build), `compileOnly("com.foo:bar:1.0")`) { + t.Errorf("build.gradle.kts missing dependency, got:\n%s", build) + } + if _, err := os.Stat(filepath.Join(dir, "libs", testutil.JarName)); err != nil { + t.Errorf("jar not copied to libs/: %v", err) + } + settings, err := os.ReadFile(filepath.Join(dir, "settings.gradle.kts")) + if err != nil { + t.Fatal(err) + } + if want := `rootProject.name = "my-test-project"`; !strings.Contains(string(settings), want) { + t.Errorf("settings.gradle.kts missing %q, got:\n%s", want, settings) + } +} diff --git a/cli/internal/testrule/testrule.go b/cli/internal/testrule/testrule.go index bb3fbd52e..03eb96c1d 100644 --- a/cli/internal/testrule/testrule.go +++ b/cli/internal/testrule/testrule.go @@ -21,13 +21,13 @@ var genericSource []byte var genericSink []byte // Marker locations, relative to the test project root. The marker lib rules and the -// test join an agent writes alongside them live only under MarkersDir — never in +// test join an agent writes alongside them live only under the test-rules dir — never in // .opentaint/rules — so they never reach the main project scan. The rule paths double // as the values an agent refs from a test join (relative to the test-rules root). const ( - MarkersDir = "test-rules" - GenericSourceRule = "java/lib/test/generic-source.yaml" - GenericSinkRule = "java/lib/test/generic-sink.yaml" + markersDir = "test-rules" + genericSourceRule = "java/lib/test/generic-source.yaml" + genericSinkRule = "java/lib/test/generic-sink.yaml" ) // Scaffold writes the Taint helper into the project sources and the generic @@ -35,7 +35,7 @@ const ( func Scaffold(projectDir string) error { return utils.WriteFiles(map[string][]byte{ filepath.Join(projectDir, "src", "main", "java", "test", "Taint.java"): taintJava, - filepath.Join(projectDir, MarkersDir, filepath.FromSlash(GenericSourceRule)): genericSource, - filepath.Join(projectDir, MarkersDir, filepath.FromSlash(GenericSinkRule)): genericSink, + filepath.Join(projectDir, markersDir, filepath.FromSlash(genericSourceRule)): genericSource, + filepath.Join(projectDir, markersDir, filepath.FromSlash(genericSinkRule)): genericSink, }) } diff --git a/cli/internal/testutil/generate_jar.go b/cli/internal/testutil/generate_jar.go index 4bf69196f..0a4fcb36b 100644 --- a/cli/internal/testutil/generate_jar.go +++ b/cli/internal/testutil/generate_jar.go @@ -4,9 +4,10 @@ package main import ( "fmt" - "io" "os" "path/filepath" + + "github.com/seqra/opentaint/internal/utils" ) const ( @@ -16,33 +17,8 @@ const ( ) func main() { - if err := copyJar(); err != nil { - fmt.Fprintf(os.Stderr, "generate test-util jar: %v\n", err) + if err := utils.CopyFile(sourceJar, filepath.Join(outputDir, jarName)); err != nil { + fmt.Fprintf(os.Stderr, "generate test-util jar: %v; build it with 'cd ../../../core && ./gradlew :opentaint-sast-test-util:jar'\n", err) os.Exit(1) } } - -func copyJar() error { - if err := os.MkdirAll(outputDir, 0o755); err != nil { - return fmt.Errorf("create %s: %w", outputDir, err) - } - - in, err := os.Open(sourceJar) - if err != nil { - return fmt.Errorf("open %s: %w; build it with 'cd ../../../core && ./gradlew :opentaint-sast-test-util:jar'", sourceJar, err) - } - defer in.Close() - - outPath := filepath.Join(outputDir, jarName) - out, err := os.Create(outPath) - if err != nil { - return fmt.Errorf("create %s: %w", outPath, err) - } - defer out.Close() - - if _, err := io.Copy(out, in); err != nil { - return fmt.Errorf("copy %s to %s: %w", sourceJar, outPath, err) - } - - return nil -} diff --git a/cli/internal/testutil/testutil.go b/cli/internal/testutil/testutil.go index 88d20d9f4..b32200ef8 100644 --- a/cli/internal/testutil/testutil.go +++ b/cli/internal/testutil/testutil.go @@ -57,7 +57,7 @@ func ResolveJar() (string, error) { } } - if extracted, err := ExtractJar(); err == nil { + if extracted, err := extractJar(); err == nil { return extracted, nil } @@ -72,11 +72,11 @@ func contentHash(jarData []byte) string { return hex.EncodeToString(h[:]) } -// ExtractJar extracts the embedded test-util JAR to ~/.opentaint/test-util/ +// extractJar extracts the embedded test-util JAR to ~/.opentaint/test-util/ // and returns the path to the extracted JAR. Uses a SHA-256 content hash // marker for staleness detection so the extracted copy is refreshed when the // binary is rebuilt with a newer JAR. -func ExtractJar() (string, error) { +func extractJar() (string, error) { jarData, err := embeddedJarData() if err != nil { return "", err From 5bc1fe8a40e2e546c8624f209b29de80cb0a553a Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:33:31 +0200 Subject: [PATCH 43/54] fix(build): correct make dependency graph, install paths, and dev wrapper portability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - bare 'make cli' (and parallel 'make -j all') died on fresh checkouts: go generate hard-requires the test-util jar but the cli target had no core dependency - a relative PREFIX split the install: root resolved BINDIR against the repo root while cli/Makefile's abspath resolved it against cli/ — BINDIR is now passed absolutized - the opentaint-dev wrapper depended on non-POSIX realpath; it now falls back to plain $0 resolution - 'make install' built the CLI twice (cli prerequisite + go install); cli install now reuses the build output, which also makes BINARY_NAME effective - the core target ran three Gradle invocations; one invocation configures once and parallelizes the jar tasks --- Makefile | 14 +++++++++----- cli/Makefile | 6 +++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 3c4eb4c52..b01386286 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,9 @@ INSTALLED_DEV_BINARY := $(BINDIR)/$(CLI_DEV_BINARY_NAME) all: core cli -core: projectAnalyzerJar core/autobuilder core/opentaint-sast-test-util +# One Gradle invocation: configuration runs once and the jar tasks parallelize. +core: + cd $(CORE_DIR) && $(GRADLEW) $(ANALYZER_TASK) $(AUTOBUILDER_TASK) $(TEST_UTIL_TASK) projectAnalyzerJar: cd $(CORE_DIR) && $(GRADLEW) $(ANALYZER_TASK) @@ -40,12 +42,13 @@ core/autobuilder: core/opentaint-sast-test-util: cd $(CORE_DIR) && $(GRADLEW) $(TEST_UTIL_TASK) -cli: +# go generate embeds the test-util jar, so the cli build needs it built first. +cli: core/opentaint-sast-test-util $(MAKE) -C $(CLI_DIR) build -install: core cli +install: core mkdir -p $(BINDIR) $(LIBDIR) - $(MAKE) -C $(CLI_DIR) install PREFIX=$(PREFIX) BINDIR=$(BINDIR) + $(MAKE) -C $(CLI_DIR) install PREFIX=$(PREFIX) BINDIR=$(abspath $(BINDIR)) $(INSTALL) -m 0644 $(ANALYZER_JAR) $(INSTALLED_ANALYZER_JAR) $(INSTALL) -m 0644 $(AUTOBUILDER_JAR) $(INSTALLED_AUTOBUILDER_JAR) $(INSTALL) -m 0644 $(TEST_UTIL_JAR) $(LIBDIR)/$(notdir $(TEST_UTIL_JAR)) @@ -55,7 +58,8 @@ install: core cli printf '%s\n' \ '#!/bin/sh' \ 'set -eu' \ - 'BIN_DIR=$$(CDPATH= cd -- "$$(dirname -- "$$(realpath "$$0")")" && pwd)' \ + 'if command -v realpath >/dev/null 2>&1; then SELF=$$(realpath "$$0"); else SELF=$$0; fi' \ + 'BIN_DIR=$$(CDPATH= cd -- "$$(dirname -- "$$SELF")" && pwd -P)' \ 'PREFIX_DIR=$$(CDPATH= cd -- "$$BIN_DIR/.." && pwd)' \ 'LIB_DIR="$$PREFIX_DIR/lib"' \ 'exec "$$BIN_DIR/$(CLI_BINARY_NAME)" --experimental --analyzer-jar "$$LIB_DIR/$(notdir $(ANALYZER_JAR))" --autobuilder-jar "$$LIB_DIR/$(notdir $(AUTOBUILDER_JAR))" "$$@"' \ diff --git a/cli/Makefile b/cli/Makefile index 64c9562b7..fca2d5733 100644 --- a/cli/Makefile +++ b/cli/Makefile @@ -19,9 +19,9 @@ build: generate mkdir -p $(BUILD_DIR) $(GO) build -o $(BINARY_PATH) . -install: generate - mkdir -p $(BINDIR) - GOBIN=$(INSTALL_GOBIN) $(GO) install . +install: build + mkdir -p $(INSTALL_GOBIN) + install -m 0755 $(BINARY_PATH) $(INSTALL_GOBIN)/$(BINARY_NAME) clean: rm -f $(BINARY_PATH) From 0c5d8b3edbf948701336e843ca64e6546e881212 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:34:01 +0200 Subject: [PATCH 44/54] fix(ci): ship test-util jar and release-version marker in CLI releases Released binaries could never resolve opentaint-sast-test-util.jar: the lib-assembly step shipped only analyzer/autobuilder/rules and goreleaser ran without go generate, so the go:embed held only the README placeholder and 'opentaint test rule|approximation init' hard-failed for every non-source install. The jar is now built and embedded before goreleaser (the Docker build uses the same workspace, so its context picks the generated jar up too) and bundled into lib/. lib/release-versions.yaml marks the assembled lib dir as an unmodified official release so bundled-tier installs keep their pinned version labels. --- .github/workflows/release-cli.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.github/workflows/release-cli.yaml b/.github/workflows/release-cli.yaml index afed35662..0f0d27428 100644 --- a/.github/workflows/release-cli.yaml +++ b/.github/workflows/release-cli.yaml @@ -169,8 +169,32 @@ jobs: mkdir -p lib/rules tar -xzf /tmp/opentaint-rules.tar.gz -C lib/rules + # Mark this lib/ as an unmodified official release for these exact + # pinned versions; the CLI shows pinned versions (not "custom") only + # when this marker matches its embedded versions.yaml. + cp internal/globals/versions.yaml lib/release-versions.yaml + echo "Bundled artifacts:" ls -la lib/ + - + name: Set up JDK for test-util jar + if: ${{ steps.release_version.outputs.status == 'succeeded' }} + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: '21' + - + name: Build and embed test-util jar + if: ${{ steps.release_version.outputs.status == 'succeeded' }} + run: | + set -euo pipefail + # Build the jar, embed it into the Go binary (go:embed), and bundle + # it into lib/ so `opentaint test ... init` works from every install + # channel (archives, brew, npm, and the Docker image built from this + # same workspace). + (cd core && ./gradlew :opentaint-sast-test-util:jar) + (cd cli && go generate ./...) + cp core/opentaint-sast-test-util/build/libs/opentaint-sast-test-util.jar cli/lib/ - name: Run GoReleaser if: ${{ steps.release_version.outputs.status == 'succeeded' }} From 3e5a1c03d49946a9416955f7fb59573688d0e947 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:34:49 +0200 Subject: [PATCH 45/54] fix(skills): harden sarif-to-findings against block-style hashes, null results, and locale encoding A finding file whose sarif_hashes list was rewritten in YAML block style made HASHES_RE.sub a silent no-op: merged hashes were never persisted, so every rescan counted the same hashes as new and clobbered the analyst's verdict back to pending, forever. Both styles now parse and rewrite, and a missing key is prepended rather than dropped. Also: 'results': null (aborted runs) no longer raises TypeError, and all file I/O pins encoding=utf-8. --- .../appsec-agent/scripts/sarif-to-findings.py | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/skills/appsec-agent/scripts/sarif-to-findings.py b/skills/appsec-agent/scripts/sarif-to-findings.py index c73abf57e..6ef073f85 100644 --- a/skills/appsec-agent/scripts/sarif-to-findings.py +++ b/skills/appsec-agent/scripts/sarif-to-findings.py @@ -72,8 +72,9 @@ def result_hash(res): def scan_results(sarif): """rule_id -> {hash: message}""" out = {} - for run in sarif.get("runs", []): - for res in run.get("results", []): + for run in sarif.get("runs") or []: + # An aborted run may carry an explicit "results": null (SARIF allows it). + for res in run.get("results") or []: rid = res.get("ruleId") or "unknown" msg = (res.get("message", {}) or {}).get("text", "").strip() out.setdefault(rid, {})[result_hash(res)] = msg @@ -83,16 +84,38 @@ def scan_results(sarif): NAME_RE = re.compile(r'^finding_name:\s*(.+?)\s*$', re.M) RULE_RE = re.compile(r'^rule_id:\s*(.+?)\s*$', re.M) HASHES_RE = re.compile(r'^sarif_hashes:\s*\[(.*)\]\s*$', re.M) +HASHES_BLOCK_RE = re.compile(r'^sarif_hashes:\s*\n((?:[ \t]+-[^\n]*\n?)+)', re.M) + + +def parse_hashes(text): + """Hashes from either flow style ([a, b]) or block style (- a / - b).""" + m = HASHES_RE.search(text) + if m: + return [h.strip() for h in m.group(1).split(",") if h.strip()] + m = HASHES_BLOCK_RE.search(text) + if m: + return [ln.strip().lstrip("-").strip() + for ln in m.group(1).splitlines() if ln.strip().lstrip("-").strip()] + return [] + + +def replace_hashes(text, merged): + """Rewrite the sarif_hashes entry (either style) as a flow list; if the key + is missing entirely, prepend it so merged hashes are never silently lost.""" + line = "sarif_hashes: " + fmt_list(merged) + if HASHES_RE.search(text): + return HASHES_RE.sub(lambda m: line, text, count=1) + if HASHES_BLOCK_RE.search(text): + return HASHES_BLOCK_RE.sub(line + "\n", text, count=1) + return line + "\n" + text def parse_existing(text): name = NAME_RE.search(text) rid = RULE_RE.search(text) - hm = HASHES_RE.search(text) - hashes = [h.strip() for h in hm.group(1).split(",") if h.strip()] if hm else [] return (name.group(1) if name else None, rid.group(1) if rid else None, - hashes) + parse_hashes(text)) def fmt_list(hashes): @@ -118,7 +141,7 @@ def main(): help="findings dir (default: .opentaint/tracking/findings)") args = ap.parse_args() - by_rule = scan_results(json.loads(Path(args.sarif).read_text())) + by_rule = scan_results(json.loads(Path(args.sarif).read_text(encoding="utf-8"))) out = Path(args.out) out.mkdir(parents=True, exist_ok=True) @@ -126,7 +149,7 @@ def main(): existing = {} # rule_id -> [(path, hashes)] taken = set() for p in sorted(glob.glob(str(out / "*.yaml"))): - name, rid, hashes = parse_existing(Path(p).read_text()) + name, rid, hashes = parse_existing(Path(p).read_text(encoding="utf-8")) if name: taken.add(name) if rid: @@ -141,7 +164,7 @@ def main(): taken.add(name) notes = "\n".join(sorted({m for m in hashmap.values() if m})) (out / f"{name}.yaml").write_text( - new_file_text(name, rid, sorted(scanned), notes)) + new_file_text(name, rid, sorted(scanned), notes), encoding="utf-8") created += 1 continue already = set().union(*(set(h) for _, h in files)) @@ -152,10 +175,10 @@ def main(): # add new hashes to the first finding file for this rule; reset verdict path, hashes = files[0] merged = sorted(set(hashes) | set(new)) - text = path.read_text() - text = HASHES_RE.sub(lambda m: "sarif_hashes: " + fmt_list(merged), text, count=1) + text = path.read_text(encoding="utf-8") + text = replace_hashes(text, merged) text = re.sub(r'^verdict:\s*.+$', "verdict: pending", text, count=1, flags=re.M) - path.write_text(text) + path.write_text(text, encoding="utf-8") updated += 1 print(f"findings: {created} created, {updated} updated, {unchanged} unchanged " From 7768b1b24b78a0698e0b8a67f7c9aa0badb1808a Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:35:18 +0200 Subject: [PATCH 46/54] docs: fix rule-test invocations to include marker and builtin rulesets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'test rule run' example omitted the test project's test-rules marker ruleset, so the documented flow reported every positive sample as a false negative; the reachability example replaced the builtin default (pflag array semantics), silently breaking ref expansion into builtin lib rules — the debug-rule skill already showed the correct two-ruleset form. The reachability help example also lacked the 'java/' ruleset-root prefix real rule ids carry. --- cli/cmd/test_rule_reachability.go | 2 +- docs/usage.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/cmd/test_rule_reachability.go b/cli/cmd/test_rule_reachability.go index f8a918695..a3eeb03e7 100644 --- a/cli/cmd/test_rule_reachability.go +++ b/cli/cmd/test_rule_reachability.go @@ -13,7 +13,7 @@ var testRuleReachabilityCmd = &cobra.Command{ fact-reachability details. Use this to debug why a rule does or does not fire. Arguments: - rule-id - Full rule ID, e.g. security/sqli.yaml:sql-injection + rule-id - Full rule ID, e.g. java/security/sqli.yaml:sql-injection source-path - Path to the project sources (default: current directory) Referenced library source and sink rules are collected and analyzed automatically. diff --git a/docs/usage.md b/docs/usage.md index 92e540232..c20f807fe 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -147,8 +147,8 @@ The `test` command group is experimental tooling for rule and approximation deve ```bash opentaint test rule init .opentaint/test-projects/my-rule opentaint compile .opentaint/test-projects/my-rule/sinks -o .opentaint/test-compiled/my-rule/sinks -opentaint test rule run .opentaint/test-compiled/my-rule/sinks --ruleset .opentaint/rules -opentaint test rule reachability java/security/my-rule.yaml:my-rule --project-model .opentaint/test-compiled/my-rule/sinks --ruleset .opentaint/rules +opentaint test rule run .opentaint/test-compiled/my-rule/sinks --ruleset .opentaint/rules --ruleset .opentaint/test-projects/my-rule/sinks/test-rules +opentaint test rule reachability java/security/my-rule.yaml:my-rule --project-model .opentaint/test-compiled/my-rule/sinks --ruleset builtin --ruleset .opentaint/rules ``` | Command | Description | From 6e5a24eaa93cfd9d842ba7690f20fb98a896f6b1 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:36:08 +0200 Subject: [PATCH 47/54] fix(skills): align approximation guidance, tracking schema, and script paths across skills - the create-pass-through load-check told subagents to run a full scan on the main project model, contradicting appsec-agent's no-scan rule for subagents; it is now explicitly standalone-only - appsec-agent's blanket 'approximation over a built-in errors at load' applied only to dataflow approximations and contradicted its own passThrough merge rule three lines up - analyze-external-methods' tracking templates omitted the artifact field that appsec-agent's single-source-of-truth schema declares - triage.md's verbatim command mixed a skill-dir-relative script path with project-relative arguments; no cwd satisfied both --- skills/analyze-external-methods/SKILL.md | 2 ++ skills/appsec-agent/SKILL.md | 2 +- skills/appsec-agent/references/triage.md | 2 +- skills/create-pass-through-approximation/SKILL.md | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/skills/analyze-external-methods/SKILL.md b/skills/analyze-external-methods/SKILL.md index b73aa2bca..6fbb3da9c 100644 --- a/skills/analyze-external-methods/SKILL.md +++ b/skills/analyze-external-methods/SKILL.md @@ -49,6 +49,7 @@ Create one file per (package, kind); fill only the discovery-stage fields. The t ```yaml # -passthrough.yaml — simple copies, no test project package: com.foo +artifact: null stages: description: done written: pending @@ -62,6 +63,7 @@ methods: ```yaml # -dataflow.yaml — lambda/callback/async, tested on a test project package: com.foo +artifact: null dependencies: # exact GAV the test project needs, from the build files - com.foo:foo-core:1.2.3 stages: diff --git a/skills/appsec-agent/SKILL.md b/skills/appsec-agent/SKILL.md index 422819b5d..816062ec4 100644 --- a/skills/appsec-agent/SKILL.md +++ b/skills/appsec-agent/SKILL.md @@ -258,5 +258,5 @@ methods: # engine asks to approximate these, but they carry no ta - `--passthrough-approximations` merges with built-ins at the rule level; a provided rule overrides a built-in only when it matches one already there — it does not replace the built-in set - both approximation dir flags walk the tree recursively, so the final scan points at the parent dirs and applies every unit - `--rule-id` drops every rule not named, including library `refs` — list them all when restricting -- a custom approximation targeting a class that already has a built-in one errors at load +- a custom DATAFLOW approximation targeting a class that already has a built-in dataflow approximation errors at load (one class, one approximation); passThrough configs never error this way — they merge at the rule level (see above) - a custom dataflow approximation overrides a passThrough for the same method — the passThrough→dataflow fallback when a passThrough won't converge; remove that method's passThrough config when re-planning it as dataflow, before the dataflow one is tested or scanned, to avoid override issues diff --git a/skills/appsec-agent/references/triage.md b/skills/appsec-agent/references/triage.md index c1bab0c03..e1226e67d 100644 --- a/skills/appsec-agent/references/triage.md +++ b/skills/appsec-agent/references/triage.md @@ -4,7 +4,7 @@ The scan must be stable first. ## Generate finding files -Run this skill's bundled `scripts/sarif-to-findings.py` over `.opentaint/results/report.sarif` (`python3 scripts/sarif-to-findings.py .opentaint/results/report.sarif -o .opentaint/tracking/findings`). It writes one `tracking/findings/.yaml` per rule and is idempotent — a rescan adds new result hashes and resets changed findings to `pending`. This is a deterministic script with no context cost, so run it yourself, not via a subagent. +Run this skill's bundled `scripts/sarif-to-findings.py` over `.opentaint/results/report.sarif` (`python3 /scripts/sarif-to-findings.py .opentaint/results/report.sarif -o .opentaint/tracking/findings` — the script lives in the skill directory, not the project; the project-relative paths are arguments). It writes one `tracking/findings/.yaml` per rule and is idempotent — a rescan adds new result hashes and resets changed findings to `pending`. This is a deterministic script with no context cost, so run it yourself, not via a subagent. ## Classify — never in main diff --git a/skills/create-pass-through-approximation/SKILL.md b/skills/create-pass-through-approximation/SKILL.md index df501fb1c..1a81f90d8 100644 --- a/skills/create-pass-through-approximation/SKILL.md +++ b/skills/create-pass-through-approximation/SKILL.md @@ -147,7 +147,7 @@ passThrough: ### 2. Optional — dry-run the config for load errors -There's no dedicated load-check command, but if a compiled `` is present you can catch YAML load/parse errors before the main scan by running a quick scan with the config applied (won't verify propagation — there's no matching flow — only that the config loads): +There's no dedicated load-check command. ONLY when invoked standalone — never under the appsec-agent orchestrator, whose subagents must not run `opentaint scan` (the orchestrator's scan phase verifies the config instead): if a compiled `` is present you can catch YAML load/parse errors early by running a quick scan with the config applied (won't verify propagation — there's no matching flow — only that the config loads): ```bash opentaint scan --project-model \ From 74574386ea95f96e05e99e8c4022285b43579741 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 10 Jun 2026 10:36:49 +0200 Subject: [PATCH 48/54] fix(cli): surface jar-path overrides in pull summary --- cli/cmd/pull.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cli/cmd/pull.go b/cli/cmd/pull.go index e67c03251..230500a08 100644 --- a/cli/cmd/pull.go +++ b/cli/cmd/pull.go @@ -76,6 +76,12 @@ When bundled artifacts are present (from a release archive), they will be used d func downloadArtifact(spec globals.ArtifactDef, installNextToBinary, installCurrent bool) (*tree.Tree, error) { node := out.GroupItem(fmt.Sprintf("%s %s", spec.Name, spec.Version)) + if spec.Override != "" { + // Pull still fetches the release artifact (offline prep stays valid if + // the override is later removed), but scans will use the override. + node.Child(fmt.Sprintf("Config override active: scans use %s", spec.Override)) + } + tiers, err := utils.ArtifactTiers(spec) if err != nil { return node, err From 6d27bfb61bcd9cf30ffac20be4338e33ef7d8dfc Mon Sep 17 00:00:00 2001 From: Gr-i-niy Date: Wed, 10 Jun 2026 19:16:13 +0300 Subject: [PATCH 49/54] feat: improve searching package usages, fix small issues --- Makefile | 2 + cli/cmd/test_rule_reachability.go | 23 ++------ skills/appsec-agent/SKILL.md | 6 +- .../appsec-agent/references/discover-rules.md | 2 +- skills/debug-rule/SKILL.md | 2 +- skills/discover-attack-surface/SKILL.md | 6 +- .../scripts/package-usages.jar | Bin 155088 -> 0 bytes .../scripts/package-usages.ps1 | 55 ++++++++++++++++++ .../scripts/package-usages.sh | 24 ++++++++ 9 files changed, 95 insertions(+), 25 deletions(-) delete mode 100644 skills/discover-attack-surface/scripts/package-usages.jar create mode 100644 skills/discover-attack-surface/scripts/package-usages.ps1 create mode 100755 skills/discover-attack-surface/scripts/package-usages.sh diff --git a/Makefile b/Makefile index b01386286..e823e8235 100644 --- a/Makefile +++ b/Makefile @@ -65,6 +65,8 @@ install: core 'exec "$$BIN_DIR/$(CLI_BINARY_NAME)" --experimental --analyzer-jar "$$LIB_DIR/$(notdir $(ANALYZER_JAR))" --autobuilder-jar "$$LIB_DIR/$(notdir $(AUTOBUILDER_JAR))" "$$@"' \ > $(INSTALLED_DEV_BINARY) chmod 0755 $(INSTALLED_DEV_BINARY) + # Pull any assets the local build doesn't produce (e.g. the Java runtime). + $(INSTALLED_CLI_BINARY) pull clean: $(MAKE) -C $(CLI_DIR) clean diff --git a/cli/cmd/test_rule_reachability.go b/cli/cmd/test_rule_reachability.go index a3eeb03e7..72365e888 100644 --- a/cli/cmd/test_rule_reachability.go +++ b/cli/cmd/test_rule_reachability.go @@ -9,24 +9,11 @@ var reachabilityEntryPoint string var testRuleReachabilityCmd = &cobra.Command{ Use: "reachability [source-path]", Short: "Trace why a rule can or cannot reach its facts", - Long: `Scan a project with one rule and write a sibling SARIF report with -fact-reachability details. Use this to debug why a rule does or does not fire. + Long: `Scan a project with one rule and write a sibling fact-reachability SARIF +report (debug-ifds-fact-reachability.sarif) next to the main one. Use this to +debug why a rule does or does not fire. -Arguments: - rule-id - Full rule ID, e.g. java/security/sqli.yaml:sql-injection - source-path - Path to the project sources (default: current directory) - -Referenced library source and sink rules are collected and analyzed automatically. - -The fact-reachability report is written next to the main SARIF as debug-ifds-fact-reachability.sarif. - -Use --entry-points to start analysis from a specific method while tracing reachability. -The value is '*' for all methods or a fully qualified method such as com.example.Class#method. -For non-Spring projects this restricts the entry-point set. For Spring projects it adds to -the auto-discovered entry points because Spring entry points cannot be narrowed. - -Use --project-model to scan a pre-compiled project model instead of compiling from sources. -`, +Referenced library source and sink rules are collected and analyzed automatically.`, Annotations: map[string]string{"PrintConfig": "true"}, Args: cobra.RangeArgs(1, 2), Run: func(cmd *cobra.Command, args []string) { @@ -60,5 +47,5 @@ func init() { testRuleCmd.AddCommand(testRuleReachabilityCmd) addScanFlags(testRuleReachabilityCmd) testRuleReachabilityCmd.Flags().StringVar(&reachabilityEntryPoint, "entry-points", "", - "Start from '*' or a fully qualified method such as com.example.Class#method") + "Start analysis from a fully qualified method such as com.example.Class#method") } diff --git a/skills/appsec-agent/SKILL.md b/skills/appsec-agent/SKILL.md index 816062ec4..d6007093a 100644 --- a/skills/appsec-agent/SKILL.md +++ b/skills/appsec-agent/SKILL.md @@ -84,12 +84,12 @@ Orchestration practices: Two limits apply to every fan-out — a global one against rate-limiting, and a tighter one against memory: -- Global cap of 7 — never dispatch more than 7 subagents at once, of any kind. Bursting more reliably trips transient rate-limiting (a fan-out of 20 left half the agents rate-limited mid-run). It binds light and heavy agents alike +- Global cap of 7 — never dispatch more than 7 subagents at once, of any kind. Bursting more reliably trips transient rate-limiting. It binds light and heavy agents alike. Treat 7 as a starting ceiling: each time a subagent comes back rate-limited, drop the cap by 1 for the rest of the run - RAM-heavy agents each spawn a heavy `opentaint` JVM, so they take a tighter memory bound on top of the global cap. The heavy set is exactly `build-project`, `run-scan`, `create-rule`, `create-dataflow-approximation`, and sometimes `debug-rule` (when it traces a real scan). Compute the bound at run start and never dispatch more than this many heavy subagents at once: - cores — `nproc` (Linux) / `sysctl -n hw.ncpu` (macOS) - free memory in GB — `free -g` (Linux, the `available` column) / `sysctl -n hw.memsize` ÷ 1024³ (macOS) - `cap_heavy = max(1, min(cores, floor(free_GB / 2), 7))` — budget ~2 GB per concurrent JVM -- Every other agent is not RAM-bound — discover-attack-surface (reads the built model plus dependency jars for signatures/metadata), create-test-project (compiles once), triage-dependencies, analyze-external-methods, analyze-findings, create-pass-through-approximation, assemble-lib-rules, generate-poc. They're held only by the global cap of 7 +- Every other agent is not RAM-bound — discover-attack-surface, create-test-project (compiles once), triage-dependencies, analyze-external-methods, analyze-findings, create-pass-through-approximation, assemble-lib-rules, generate-poc. They're held only by the global cap of 7 It's machine state, not run state — recompute on resume, don't track it. PoC is already sequential. @@ -239,7 +239,7 @@ methods: # engine asks to approximate these, but they carry no ta pass-through/.yaml # passThrough approximation configs dataflow// # code-based (dataflow) approximation sources, per unit test-projects// # per-unit test project sources; a rule unit holds sinks/ and sources/ sub-projects, each with a test-rules/ (the generic markers + that side's test join — test-only, never loaded by the main scan) - test-compiled// # per-unit compiled test model (a rule unit: sinks/ and sources/ models) + test-compiled// # per-unit compiled test model (a rule unit: sinks/ and sources/ models); delete once the unit's tests pass — large and unused after test-results// # per-unit test outputs results/ report.sarif diff --git a/skills/appsec-agent/references/discover-rules.md b/skills/appsec-agent/references/discover-rules.md index a707bf920..1a0824c83 100644 --- a/skills/appsec-agent/references/discover-rules.md +++ b/skills/appsec-agent/references/discover-rules.md @@ -6,7 +6,7 @@ Delegate triage-dependencies. Inputs: ``, model-dir `.opentaint/pr ## Discover attack surface -Fan out discover-attack-surface in parallel, one agent per `pending` package in `coverage.yaml` (capped per SKILL.md § Resource limits). Inputs each: ``, deps-dir `.opentaint/project/dependencies`, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`. Each agent first scopes the package to functions/classes used by the project, saving `java -jar scripts/package-usages.jar` output to `tracking/usage/.yaml`, then reviews source/config for indirect reachability. It settles built-in coverage for that used scope (full ⇒ no unit, just `coverage.yaml` done; partial ⇒ expand only the missing used methods; none ⇒ plan used members from scratch). It writes the package's project-used rule plan `tracking/rules/lib/.yaml` (new vs expand; sinks tagged by vuln class), writing no rule and running no test, then flips its `coverage.yaml` entry to `done`. Returns the sources/sinks planned. +Fan out discover-attack-surface in parallel, one agent per `pending` package in `coverage.yaml` (capped per SKILL.md § Resource limits). Inputs each: ``, deps-dir `.opentaint/project/dependencies`, model-dir `.opentaint/project`, tracking-dir `.opentaint/tracking`. Each agent first scopes the package to functions/classes used by the project, running discover-attack-surface's bundled `scripts/package-usages.sh` and saving the package's method usages to `tracking/usage/.yaml`, then reviews source/config for indirect reachability. It settles built-in coverage for that used scope (full ⇒ no unit, just `coverage.yaml` done; partial ⇒ expand only the missing used methods; none ⇒ plan used members from scratch). It writes the package's project-used rule plan `tracking/rules/lib/.yaml` (new vs expand; sinks tagged by vuln class), writing no rule and running no test, then flips its `coverage.yaml` entry to `done`. Returns the sources/sinks planned. Then a quick area cross-check over project-used boundaries only: across network, persistence, environment, serialization, rendering, naming, execution, messaging — is every boundary the project reaches through a dependency either covered by built-ins or now carrying a lib unit? If a reachable boundary has a relevant dependency but produced no unit and no clear reason, dispatch a depth pass for it. Set `phases.discover: done` once every `coverage.yaml` entry is `done`. diff --git a/skills/debug-rule/SKILL.md b/skills/debug-rule/SKILL.md index c938e35f5..565ecfae6 100644 --- a/skills/debug-rule/SKILL.md +++ b/skills/debug-rule/SKILL.md @@ -45,7 +45,7 @@ When the thing under debug is an approximation (or the flow depends on one), app ### 3. Isolate an entry point (optional) -When the run misses the flow and you suspect the entry method is never reached, force analysis onto it with the same `reachability` command plus `--entry-points` — `*` for all methods, or a method FQN: +When the run misses the flow and you suspect the entry method is never reached, force analysis onto it with the same `reachability` command plus `--entry-points` set to a method FQN: ```bash opentaint test rule reachability \ diff --git a/skills/discover-attack-surface/SKILL.md b/skills/discover-attack-surface/SKILL.md index 7d7085f8b..9fcaecbae 100644 --- a/skills/discover-attack-surface/SKILL.md +++ b/skills/discover-attack-surface/SKILL.md @@ -32,7 +32,9 @@ Before planning anything, see what the built-ins already match for this package' ### 2. Scope project-used sources and sinks -Find the package's jar in `` only to confirm the dependency identity and inspect signatures/docs for members already in scope (match the artifact from the dependency GAV; `unzip -l | grep ` confirms it owns the package). Run `java -jar scripts/package-usages.jar --package --model-dir --output /usage/.yaml` first to get the minimal bytecode-derived list of package functions and classes the project statically references; create `usage/` if needed. It parses project `.class` files, including lambda bodies, method references, fluent chains, signatures, annotations, class literals, casts, and dependency-hierarchy owner resolution. The tool can still miss APIs reached only through reflection, dynamic proxies, framework/container dispatch, config strings, generated code absent from the model, or runtime polymorphic targets not named in bytecode. Treat the output as the main used-in-project scope, then inspect app source, dependency API/source, and framework configuration only to classify those used members and to add indirectly reached members that the bytecode list cannot show. Do not enumerate the whole package API. Never read the analyzer jar — only dependency jars +Find the package's jar in `` only to confirm the dependency identity and inspect signatures/docs for members already in scope (match the artifact from the dependency GAV; `unzip -l | grep ` confirms it owns the package). To get the bytecode-derived list of package methods the project statically references, run this skill's bundled `scripts/package-usages.sh ` (Windows: `scripts/package-usages.ps1`; the scripts live in the skill directory, not the project) and save its output to `/usage/.yaml` (create `usage/` if needed). It reads `moduleClasses`/`packages:` from `project.yaml` and disassembles the project's **own** compiled classes only — a model's `moduleClasses` can mix project + dependency jars/dirs, so when the modules carry a `packages:` list only classes under those roots are scanned, otherwise `moduleClasses` is already project-only — then prints the deduped `// Method`/`// InterfaceMethod` call sites whose owner is in ``. + +This catches only bytecode invocations, so it misses members reached through annotations, class literals, casts, reflection, dynamic proxies, framework/container dispatch, config strings, or generated code absent from the model. Treat the output as the main used-in-project scope, then inspect app source, dependency API/source, and framework configuration only to classify those used members and to add indirectly reached members the bytecode list cannot show. Do not enumerate the whole package API. Never disassemble the analyzer jar — only the project's own classes - **sources** — the exact place untrusted data first enters from a boundary (network, persistence, serialization, messaging, execution): a method that *returns* attacker-controlled data — HTTP/RPC request data, a message-broker payload. NOT a method that merely passes data it was handed along — that's a propagator the engine already handles, not a source. General, not class-tagged - **sinks** — dangerous operations (query construction, command/file/path ops, deserialization, template/EL, LDAP/JNDI, reflection); tag each with its vuln class (`ssrf`, `sqli`, `path-traversal`, …) @@ -46,7 +48,7 @@ Write `/rules/lib/.yaml` — only the project-used ## Output - A `/rules/lib/.yaml` rule plan for project-used members only (or, for `full` coverage, none — just the coverage note) -- A `/usage/.yaml` package usage snapshot from `package-usages.jar` +- A `/usage/.yaml` package usage snapshot from `package-usages.sh` - The package's `coverage.yaml` entry set `status: done` with a one-line `notes` - A brief summary to the caller: the sources and sinks planned (one line each, marked `new` / `expand`). The unit holds the detail — don't paste it back diff --git a/skills/discover-attack-surface/scripts/package-usages.jar b/skills/discover-attack-surface/scripts/package-usages.jar deleted file mode 100644 index ba75c1965538280297b5a4ecfcc72dce4afd9a49..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 155088 zcmb5V1F$g7k|um?yvMd}+wZY$+qP}nwr$(CZTp=6yR#D;JNw5>+^y)Yh_33asE*Fe zdNS)NIZ0p;WB>>V2!J(&2`zyCqaXu-07#1{3DAhkh|)<5$cT%IC@RrPi~dXi0Kksb zLk;5p0hrUi!@B`bBE3FZF@lE3FW!!ILkZ97XA1uD*50|}cKC>6ek`FZuM6zgY099| zFVri^1&-Ao5B#mPQtY22-EB}F_z_s z)VAMbSdio_NLKNclLUvT+OKPK`Dd2?|J-u_?c5>%>HXhx2ma?paEO0>8PLC~f69L^ z2=Y&_t%E7u|BDRVf6Ca}8QVDNo7*`3KS`tf|D+u)&8@5)|DOgC`R@ji(>Ju#H#Jsv z{HJmx7IAZO&^L6lbs+wa25Dm_Gg~7Sb4T-knrIEJ^c@|G)qY*El~I1TZcI#Fn5+kj z!mR|M!vzw?TO-sKKr{>&nk*NK5?IrK28>)Yq#C(8Z)6fwD0GXMDdl=7lTeomP``l4 ziK`vp#C-3;e+gqpiFRI1TwD^j_rN?ivR{vTKHs-KZ@O>0e-8P4-;w*e-yD%Q|0w)f zkqd>`*JI^d&7D?{vD}E+)K#lnJQw`6XKL4*Q741`QaodKTA9uiXUn{e10JPz<;~1! zfo6qAAiU|)K3kJK!`h9gApoT$piQ01sZY3RVA-@qW&BQTH)1tL5ex%jSQ1jK(;kGo zBHq%W=#HQAZZYfxhuEJHUTvy-Mup|XbSE`0SwxLIJ2aglY2>r!bo^Gd*>O1u3DQao zF`-Z$qdiGffoUbI$rBNxvLV`1&`7k?vXmiLNSS&nU~$iwo65~AJ4tN8 ziJVgtkrLsW?L2jW+3@E=@e&!-J|k}Wwe>|bP(QDQWE#B4TtT{@j%0ur5@Pc}55*Y{ zNX$+)NG8&5zB)suG795;qFSrmuWZYlYfS!&UuHtdlE4aWHREqFWMMqdQhk6cxNW{E zQL1!Bv@~gA>#|<|zyPDy8z2BH)D~-}8m*DO({Me5*PM;K|3mp?@tOb?GX)ll80yAo zd%EVc-)U=xjvCY1d%x9yVB0IhP7WoU9$6miI%l$Msm?P2C*V(_MB zAUq5llzsK&XiQEiQwMC?sNUMwpLm(gmaVjbxVDIiJLC!>Kf+5W25Fdmm<&3YxfL-C zC38PwRU$q-*ae+XIx5=l+D@nxk-G;P;g$lwXwGCC*~5p9x(YCG zm(r%xE(c>D;0Zh$`8i_YN*lh8h%|6BBb>a2&SqNOWF)IqasgG7?)simUrwxwQ?7ob zG(f@rKwWzi6XrUH=rV?4SPDizZxmBAY2*BBBM9Ng^n?axGtL7Gh`V^E$Fu~*R|pe$8I-Y0YrF(I9v6D^RIO66N;fe2ZCSi+^t z!fYXeTKonmnZ;dG8)LV5>VLoUH%4G16{#9?k|i-5Obp<3J)moBxa5e%cPa(cT3hrP z690rpA7zVZJz#ApXprL~cc!Z5>Y_r`6EV2k)6nrb5W(}ES7)}ip1xP=&3b@FyNl{KDv_q2AgHvxn}=8&`S*`cnCY8p96RMs>{m2q zHQRl$@)hlVgew^N>}`i2h6;0EM1wP$TC^EYWF)n;T7Rt9Fj;zYoGmji*fY8Z*5&U5 zX2;50CxQV6XEsfs84Nz=y&+x!!u10Efb3ls{5@u;Y+G`(@AN9K8rlAV^T+C{9i3sg z4foXdr}3{Iq$b)->1h7Ed(#5aSqT|QC~(e*+Jb&cc>PjDKHehPs{6wxF4O$=@lDcI zPE_b`Oz7c$|6lHhb6%1rS!;#NB+I`;MA8@Af%9%pwrTBAZB29^s#A9ppq25^T*6=Lq)uI%Si){EoEp)l zkqEg$KskXzX^TL2)4-a)q<}wZ-czxL6IcH!3(o8vvGr;~wk@wK&>leVkVZV%BU4L5 z5c(N|HR9ch^INS;+@X-{djY7^_6Rq+8MoZK~ zhH!h&oJ8=m0)OiA1=pJjcc&HvgsPwO3%v8H&OSw`Wz^q{uiH=93mN&EJ`A1^yl+l! zcrVabfKG<%61KUGjfDGBC`VG;B1@0o=W^e>J$x(O_GNx{)9&9$auy~hiI?2oN#kcd z7~0dh$zF+NZ2dwxkb4*Z<+5iePBIlf@2}i+r-rxErU7I33-*fE=8N><3-~X1^e)p~ z`3w#KK!p51!6S};4`&Gd3rdLFnArXoY*DRjt%#+J>~nKsEUpGc>@C?J0Lm32w1&-5v97@@rRV;RFY$>T;E#=XB~ial|f!}B%E zGv_hOarXPMmaG@JEk+v$Q)s|G(%jTiHSI^$fNHwi!K2xL6LcT+4`2ZVL6s2OTrkAo zvCQQ=;%isev3k!TV~{D$=EEs^!1$C~57sul(-S&-piT%=r&n%iWskCiDb1|uT$Xi+ zngjRdhdyb5&ydp)Y}PPGh&f6#p;fMCOYX}{mBP5>?(qpOU4$p@w|gO!=*W%>rd)v$ z)t0plRBV1NPf!dr%=c(y{Lu9koos#V_ z@qHZDJH{Hh16Dfdi3$^i0~dzuN9{>k%1U`)!v)F%WvT&f0>$z9%7i@PK{bcQackv! z?!|tYsEJg@J4{k^+NAfKG*_#{&&!lvp^Q_O8ti;88i5pV+s?Y^EyI)3i47Xuic}%4QD2 z9#)azQYVUG+ThC;>AV7t(u)j(euZO8ijkx!i4m%SM>L$-)qBXnq)Zx=N!O_%*tHqbX(RTHho<_0@ij|RmAU4P!;1Ju4Smo`ymUqHd zXcCL;s>3T~s#(&eVf$+V5$@3ImHp+eVQ#u)hZMRnQzFC70p;wly-4yq9y zawHrsCuN?^q>UDns7hu!L1qP^xuZFJ>BK~>@%oM;O9+@o312h@cRc2u(?7)GY+%(} z#MUT)3x2|ssO*CD&*<|)PsKjN@6kkB+(Ui-4xX_;h6~D{aQfGpPiFA;@}Ic3iyiWt z0Il@03H!iXHMT!V+|CJu<>Nvy#fbSrQALRRe1G!k;pdYf_(rIZWkw*Fhu;7Rt`Rz# zfnm(~D$paFy)PJwTKu>uZ|J={h3tz+!y(9Ngf!&L6O!T*`^tBKO=#WlIH4Rzkw!VuB;b=3CKeVePVoX!~^lD zI-2{%P?brUBUY3&#a`qc!p_-yNy!NrrCTnUheO2W1S(~m!wND8&qDBcBhDk#;2ewy zU?r5I&-qwV&g7U^5zYp-l|2+@(e?>|HHa50S`cCGyHvh$=B_~_TA_*Xe>?%SP7fX}} zINU?6w4g@3N}|wdfl!nQh(866&&lW&Z(F;dE#)5c&J9T(&{%xsk$w#S8gesjKM{&- zl5X>Onw)UorgJboe!phI1I!pOK;Ufv-G$D_zzq}`*OpW#Fl+0l*=(q@L4lHaOCGHZ zS9D$84PvDGTx}4ZYP^KGXt797qSWZ8Plg!QWzOx_V9boM>r-z^bV&hZ2l?W#5c@BgbpmDW@ z5))@2bPQ>$8`9<5Qa#TkCR`2S3JNQ|6=oLH5eS`)@&g3w!|3Dfn!vav+QO(_wicnSmuj@?wT=PYmICIj^`dzAh?WnPEmxM3Wzj;30M1#>l6k z{O4c+UAdS=v<=U;+C#G-b`>q6&iX#6(mvrbtCZL>&B4B~jqsYwptvu`Y-J*RprH=- z`I_U~A`_uRGogLDWV$W~^t?v>U2_0c>zB<(GZqnfVij4s6;4AshkjS-OgPZMmN_`z z;p@SGRbksHkHm6tW*kF^&SzU8raaG!o!a4gMk5BR4}1UaSJLlK4wJOG=V!QSn@Q}e zwQJI+@~}Zxs|cDdIxjEt`x*!#g0pU2Y4&fI=09#rvvt{e;5O%|J~hvX`3*DQ=z@fB z_IVHKD{26K4y$QV(`lYd_>4!Up*MEZ9baajIgbYPVIO8wAeXcP@4nbMCO4vGCLh_F z{28({*Fg3Gl>8Z?^BpJrrF&pUb*;9~8na9BxmXrr&J<<=4VFg@SJxkIpPSas$=KK~ z{yz1|4b$ipgw}Hp9Ok7`i&_3ql$lm#S}~hAul6k87BiVTwk)pzMG0AT?ZKRC5cSXw zWtDpcQ??!vqp~5vct!K=hQf6C6|>yb_uiNO*HpTq9*R$8f0Ubd_b2V4%uz}?!s8PE zyq5rn65zW+y>AC9Zxy&nU*k2m-}yUd7}f{A?iXP5(dvC|!r|fjtJ}|??_Zz=mMy~0 z3m5<(`=9L2e{b@1|DMS!7&|&!IsIoEL_zyshRxHc-C?CpQLUCAfyYlsGxR(iim3A! zf*6arz&p3b5J$~qe5MMLZxUQ^Ads(b7vMuS%C$srp-1g-aw|67bEe((=2cg97ocmz z0~VJO@C$;ejFB4qk&b(&4Rgd6Y?P1%X4$&DXkEX|%ISl^1Oco}Bu!{&Zah*yD;LtZ zO^I}cjB(QGE2Wgoi=5Yt^XY>te=^E$K(<&SdMcSTW9U9f)Wn{!`CNVL9l{wJvj}5V zW3KV;FB}S+F#);<63ON`@)o#g){Bb=j3riLRHA+9?B4AOUU)^$UlO>NEhDnJ6+7LScOg3YjV0sr7Q8(%eBT- z)O7t9IkU!GOQb!>!HeB}i~`_d;d;-{)a7{ypen|$GzHx0HO#Jp>pL@D9_qDjV4Ao{ zOruLf1|qIN()ZVKs%wYDYFE1cj!ZO&4o$SS*JK40>V@?n_&UqCD=>mjfA<$8c=BH> zr4UqHpc$-rjaftwHNxIw;q}guZSBGECW92n!s{+Ex2r3t98Q_o4RwMT)|^Cbp-;Ad zDTcn?T4Wa>06^?N%JJVThVZ|q82{H=@E_pzzbpgQs+O8qs|Y^e#MNpm3~_>0Z;0T5 zG)945khFlIAkaw2^d+(Z66qDr+M z(&w9Yp(+AH9RvkwF?z0#xvNI4H{GNxm`-qT*|t5K0VpvGD%MF40NSnUOGSucSUX$+w;?D&`n!Jc0}}Oo;H9{@F-Kms)+h#sm(i z3T3|H515NX@{~R!QL$aGs4yUNp;)WdhsPzrE(?gFSY%qc`glMZuMX*~*jJ{;3pd@ zh=n_lfi%x3$pm&(R==Av;+Ae&_A$q?M1IN@TFE|3-|L563Kuk&klWceRC5#|UPlpg z<^Vfihb|Ljq9Sd`z5l~8NWr?wcH017QwU$<4J3Eo4k5J-U&5n*Y)wVxb;gmi;mCti z6>&e^6Rt0%oLvW>ITgmXf5!E65Cm2I2LrZa?13k<@vCV`d-j>#!RrAGbzpBue{5ub zO;roj>kpqgSLBZjU^Hx2bVAFlyY=TNRRQ+6ws4q_h?DdNU; z&&l-2WcTn4$A(Hu$u4+2RBU9PR3_oWFtrAM)}VI$qv3}Aw`gVKTNtq_lWlOSG*jpr zZqo`rI*82@?i}9gRFX=l8U)PZ+aHRV?T(Hfe62~W#>lMt&Et<}qVzf4{t+oZhsA8; zNsbwa<>w2;ru!}!zG-@Bhtfu`IqR)!FGkJ&rn!uz?qI0B%8-IEre}4-n9pvsG@`4H zMH3iZ18f8_`uw=AQL7DlN6{N*ImpH>V;mbNaXQ)^7ZPZdT9NfxFMEw&yu~h{25B+G z+Z{dX)2IeKI+i4kdPA)EooW8}X`Q7{M$ut1=#to!3n3iaQuLMXJwC(*ggj1CS8Psv@h;WHc+)_i5z`Mq(z! z!FX|6j$(&A5s5n850#;&Jn_Prwz6}sM)D)FbQW>D0Nw`h z_X+PbSGR3j%VAd1mlr2xuj*Epkn)1vW0iBpw(3wb?GYGLlKj?SpA>3!P(sFjB5=kK zi)XY*Yj133&%Ps!Ixsiyw4R?%FFkklFMOrDf8{&4084F9QuY6N?S*O$%3B>8cqN`1 zMCs%5K~=u{m7QSpX0V|-KAw2Q?2R(^@a+?$&9>e-`ix)wei|sydbHnA9GIXG&j^?GC721}T<_+)9&h-~pfk)rdp7e3zbiJl;>)Wo+8;=`~ z-Sr#j-M78F)Nr$&kAzHQ4j2cGBoW$@tw%2|Z({*nGf(yJmB%j{2GZ35e)2z>Lr3J) z77mHx>tO9P69kdwChQH})LZ-T!(RS!Rn!}(5MYJImEJVgJ=Z_Y@?XNG%Aze|=?$D- z@nyN@^7|nhJv^9(@%(0^v%OvkXKA|bIYW-!sljGKeiKYI)OLFP?7jK~nF*3cl|Yt$`5u{gHzVk4Nc<Evh$wo_z24X*3j_KLmHl|p;D#9k6sXcDP}=)$ zRYbUsi%uE(b5fB|!ij-BgHS@0s4$0wpBD7_szT84(!`U6KBQKDt~p%%L(2W;G(E3+ChN_iFfRDp)r6p#BV|Lf+pQ zNluF$lu{Y9%-*WR*pFyZv8c#0wlr&~@XbR&`*KZ10Jby&y37}`sM7Gpi|0x;779p|hM60D*}`JMXzwhIEA16oXqe8ZBU2P% z7a_g9Djle|YJ}+AtqG_rqDsY$!1Y_zK3N$zZ# z$#IYnov|&!HG7E6d#vXz6Y}{7qsr`PV89soQF9R;T8N~_H4GL2WP>Y$d`G_N%?Qt( z6Imi&qvI601PY=CmlKT-`v~<742#M$6#t1+v^fL&kJ7G<2< zL@e4wa-IWCiAsLcgX&cTrJ7v%Lx5P9a&iyiT z82Ksug4*O^!FG%nWS%Pq<`FgwYQ$Xo+|?2eMqqqvgS|&$dTkVFig?`cH#txN;KjZd zzq`msbES2S0?Yi0CcX&nux=KDZ4Wck;^^eF`d&KdI*n55hGxFR3(a}dioAx?9)S$S zzYn{+U_JpX7WGfeBZq+*cxODtiNG(K0t9LI=wT^-*DV?MaegD?S44}b511SgsGK*P zA$(-%P1Bh)a zXG9R8dWYvADB7|yP1ADkWWij}`gyJtoSwP!1D2sOEWdAjo2Qu4+vzr)Q5ir^sjZE) ziHjjTO(*OZ)X=FvIm^2(;#n+T*etN+GdFG+ka}Ys&1~R8hPt{;DC{<$83T559l(DW z_-EUNE__9Z?3Uu;he*aH!Jv@B5mwZqdHo=CTg=7Me1yxf`IPz7&#Se}$Koqf7mM>5 zA21SpouYQj^v=PR5uI5AqNS8vx-G-$7R~$mY~fj)pQ3y=o7PcqJdu7cRoSDRo-0UW@VfCdK^GZ%GA+a>^i%@ph|;^9Y{iP8HFo0u?qU|1!&aaJ(-a2Zm*R}XIpV=bXMp;$qZptZlyDJ+HPF#eDG7SCJozZ-dbpYk7 z{N8+6N8eYowv?j6X_@bN%16gN*+%}P?w~HFWN~!Vm6-g&aCmT~@raG-H^@SR3rd^8 zj@fFe-NM8cG>M%DM}3yaNm@pP5&H&nT);c?w{#I6=rroUy0cfiI-(w1S>4z`zdhpz zK|<-I=gp2;f(aUC$HhX;%%X`OeYe-8=j@{(P4C&+%QE6Fo9!Y!c#WhmZB2V%j|H)C zPcpMoRt`x#LU@EkJq?${UcIq=R1FrQw=YO7>n95}fIwusH;WvlR{%GabL_YhI!y6b zu|bM$T_ch)7RKM$+r@?dAS`eu%?(r5$BCnpJ{3omJxR2~hfGv?BUDB#SqW+hF7#fT zO=5tZsZ78uS}Z;HY8X~D+P818n<#yzvAtuqYqKRiqtMh>gh(7@qRtr)-T;t93dkGi zNmJ3F+~Q8|9(!p+UqXAi1%^DMX?8uMgcw=rM$v@&%|ftNQgll#xw-)jgkyXOjN|d$w8v*H z7~DVkN9IgPLdZSU*D{?BDwLRuh5zqQSZ1YrdtFnBtTI@Y!jjRQL`nPL-*JV86@O9m z%38k=y-3c;OLrETb4&AkS3k!^D9g}1Du(6)uym^0J+MeD42~!!KjU;0Sh8p7c=`Bg z>8_bQM{1iCtnhs;<04YYG`RCNcVKV5yOE|TnrN;#H19aEhz{ZJ7PYIxYHy6O1R0$7 znAxgvtP}`Kjo?pyPnG=;l7Wls@#v-`Dk22k(ls?UTcS{LXlc%xBa#Q5&EgN)>YrB0fg zq&X8K>MWT0ni!SO5dGmHA!=UXkV#dT2=fd7VkNB^PwJkCS}-c+)6X>CA{OcQUjLxy>df>M zZihR_gZ`SvH;;QgxHca5-uZ*fp$rUvGiOh+szswCxtV|f*i+1}^ZjjnD1(M~R2?$C zj_m`kH<`E9M{r_0jDftQPR9q{Hm)8^ergutGREV2uh=tPZgl3i9-ntR?JjSsD^#T$ zkw84D5=?W))FEI2>vi0{P?1(*dBKVeVpwfdzJXjMt|dUyCJFA$enT>qxVjL$#y9&P zZj&bbkn*T?OXc(^VlnG2a!XzUOI`}s;6V;`2kS)mF(^f;o0L7&7iAQNBl z9E8MP>e;mMS^u!;`}0QGZ5pR;z5B>7l=0Gmco)1gjdwg9?B~h;cR&vY_^!{4vHRj4 zBWnl_(%N1|s0S>5_?UMZZWE~E-Fh>4Usc_F;|_lMch4|gi;Ij4RTZ{n<|=hf$j^sw zCB?&wiwaEh4}vMH0(0r(49u`2Wp>NX;u0Ha=fQ@F3X4QvAYez8=ZqtCBh2;6)e;?F zIL(M1W0vJ+Nommo{Q{=@7t*GR8jtMp-hnKMB}yzz%1midpm^=(t_}-hW#dE4L|<@8 zO~p{q%ZiH2)MEmz@4z7s)$}j2C+gj;R&K#h?C+ed?^+N3-5B3h;Ds?yXfll+cL2H{ zP|FM}DY&oQCVvXJ?!ly1FD3}QxgT6969>BpMR%h+JgFam(4Wi3*^G$3os9w5OiEN* zN@~v5wz29Y2lNT^a_ZxY)IBl$k8Oh#y(vLXG@IMG`%QafgF9hy<0SKPt6(!9CBtcT z=VZ$jRwdh-ifSt>VRW`5ZbkJx4oq`~<-ca8TripC4kYmXAZfzMp-mFUOf+~ZI&4uz zlSN`uB&uK5`)YD7Sg{_Ag*sKFV?GdVbRTJ-->GFF5?H{SLOp8SK2qY0lr^$gNIfBAQiqG#4(0J?6Q^o7k)3;-FoS6K8ha(^O{ zbMrN8CXP2ztzPeX4gJx6j?gOfN8-xqG#U(v@5}Swh`ikVZB%bl^i+GK&@EIvzSMFr z;w9IYjA;nLzx@GW&gyRWk4sv$XCkubk5V(CK1p|f*qI9$LbBuiNZNASUaRAcbZ!3d zQu`(?!FDX3+$KRow_}AwixTcxyozib#muxzQ#BABz;Em#8@{xPc!OuwBz!Z;OGT}X zLar4>VK$ww{hXa&{UtQpu=5L7GAo-#GvRESVz1_d#aNl{TAbLgvtO&U2e?`mOsO1+ z+ufxM)#0G$oOQd{kD{oE_rj+s%;lSk@}Z|~0D6Jq7@uv!OfdC*>*rGlA(cA+a%28c zeQ^<>O*Oq4th4S~D>h`<1Oe4`=;2-T<0l=e^E5e}zD2vzOr29mrV}8(y(tspw;n6ME>BL=HZJx5ENPgns8e2;~NqW zf&1i&`%ZX{!KhoWEtmOoKqv!tLoEaZaENqcCNQMuJ7Bj?XV=T+m9G+x*aBS2@XkWg zTKRk)D=DD+xuUY$L{I};It^ICgth;yX8qc(ymS;WC_&WhFog5)c6XtSjKGHsy4 zM|(a?TIcBeV`i1CD~SDeDH)nd%?898(vekuQ8f#DJAnlW~@FhugPx`dQIsQ-+@-_*oJwvwx&W?qZ`Ba zwZQ$|WCZiIa~b$TK#0evJQT&@m%^JIHpnGp^UJsylPYhlReBNbde&Ul_fy5=C@0Mb zPp>`19{Jw7jb_K*tNEHuo-iwR)!f$ot6e0}v(l?S&#w`P`rM&XZ0Qu{nH*8^8Qq{t zpwRl_`WA6A!K&@?Hy3uWGa22Dc>GI!+kXc)c9Y^;EZTt@Z*>~XNoq-;Wu$HM|)Ff36J;=nYqgj_bO9S z2F}99+7&eqKQtUdlr21swbjhPWyF#tTeZfj*eWvPy(bhLeJZyTrF6-n8Q65;Fmvw8 ze!D-anMXIPv_(b+t47o2 zVcsdDtA7bJ$7cuu}O;_xdrR7 zKX|5DSB8+`JZmm$rQ+NPh*rh2))KaZ!uz8{Jnczt1ZL&p6Q=s@qETQ54AhGP4n_y1 zE!c44uPq@bbZ@tyc}4D-_8nt(=i`!B7RZ*eu23c@Z7r|hX9oPjcC$`q`|DL<)xr0J zgD*T!Tg4H*cgooxo{GChT>$&NKR7CSj|S5}9Sb0zzS7H*HoEPCAbuLSClRG1LG=y#0&9X;Htb4EX z`BJcC7j&5ck4r|S;_AdLGD3!r*Kr3Ew}pIjPjL@>A}L4Pc-ArJG`T~3c>fl=*Sbdu zT!)_N!eKvTKV%um$~f%S?09y1@Q{d^W75CyDxzhpg|^O*w`Mt*mXEH(qVj2W>Bdcq zaKme8_a{F%$7Nx1NypKaqZm~rC)HY*M^zM#SKzj(OY=IjYFA;NF*b+Xs9K&gEDM7# zb9yLe3)z;Lc!*V>-dh06DxW^UG>1nnndFpSw8>b{vu}#p;L37X0LCQ=+47Ids2=xS zR4PF?CvFzm|{XjrvW#numj#eF19}&3B?l z-^SOXIc9g9N_*yk*;V&1NfIr1_9>3U-ov&0?dQ_hO+8$*o##UuxsN3OOiJ2TyNgz$IyxyeXpWYU&#eT? zcm-YK??+tnJ2YsN4UC$_UuaI=xT9O9BL3*B6vne)4c$6o4}qBYq`m6&4%0ZhpxhI> zr0fmC8A9x_3)-FrU)M-3sU-P~bTmCqaefRN%#u_(z25iI1ZFa;BsUaPSCOE}ual$i z#*f{UOaytF8}!Nn0Ns`(;;vapz|C0`F+?gn%jrI9(S+{s$h~8aO<2*Zh5}s!f;Cix zMOB%7Xv@RSsWO!m=88B|@qj?|O3KLtrnn|Ajc0e`Y9a!m4b8_n9kdZG`zvyejZ&Bv zu_$6erd{bWU#dj5T|t`{I`@lhuvk>g6DlLSwo2F`RH)`P2mKIwmT^iqrrS9Pp1vo} zb5~_Xxeb0D_$)m>meUTybS-Zxgh^OfoMc`22K~6(%{>VW{M~qB|#vW zd#q*R(CY;<%N<8sy44@WbrJZ9K8NU>LQqXa7YImKDqjzSSop#Kvz*r7w;TE8p7Eh# zQs25J&!ZV!en`1VSzFrr0u%p0qeDX*+_XFNrSkHe!Wo#@g~3~Oy9}Wop=(QmNr}j+ zLcp3?ygFE!ML@h!1HAJhfMl~UW7*^E#jGRNv>Z0ik(j5JwZgG^-c8+F0{_%F8gYds zIty9nDm{~BK}TkA5;W(IE5Z-Bh5^)p8b4M z_fbM~BGnZ3?ig1UD(RC2z0t$%QNRX+7Pr%bOEPw0Y9Yuv9i|DVn4y?LaS`4Ts?FSpi}QXb<+FkiWcQj~p=}C}w`{8^LzrbaCi+!jAkuvo$`sZSfVAz{(ez5I1-yCJ096Dp^a#ZF zDq_pEZQ0EEQ}^nc$&_f5XiMShF3DX3>}3hR>yq|Ligt1Fz26t;8!1|5yU6~G!>R6_ zT2gik=vwR7b32Imjep!tx&Nr~N3|`8)mv+U#TSU|62bN8oZO&$BWDbKr4k4IR4S5w z#nNPr$+ZmNLuN6`L5hR)gyfBuguo8B%>jJZyfpud(=7l!RLrASRFrqOd-s!jH?nAK zbNcl*HFtyPi=S!fb)kv|{5@Zpq+PM&z3~z|l2m*%@ag(MeEaK+`xO1u(Bm~zK4>z5 zhV?QlSP$^@WEL{KoP> z)%tRA8E$=LY2indb~ejMy5=1|N)rC%^r>k!=QYk@(o{=mZtG~9Y~ivjnsGR0(-rU%DvNAz{6Fuqz?-2rCGq!Rr8V$&}jwRf0P8teVgta>Y1uT{5#{N ze0R=t$#y^6M@($#_Pp!^*jx5H+^6bC>Q})x-|zxf<$`8L<#&mk9R*QMYPO6O1;oa5 zk&3g#DNJd}YDhHb?}=_$EC@W)x$7EJmY+9ZG_xab9+JrWJ0P!U^i|7m<_B&2A63Gf zD}HQqT@EWi1rRDjf=lWhU6^psevtXl!labzn56FIP4-8wBZXUYOzWg*5vP>WV{Vmc z5oD9mdQuLp5ym+H3SG%n5|_vbxC0LOh-c^QxY#;NJB&lQ&k~!*H5TEl!Y&y){9~)@ z6WiNYxT2C&sC)M1`~@44lk=|(W?1zj^z7WgL(%gJ5WrDHHfmLAH>E@`hKWS-*+MH( zbAk1kg%E|G*hQSt~52lLiL zkdGjuSX%p|((E5f6kv&~#aw=(ok0ilUw@=~WHKEoI|Ii6BnJr@@>8v(_l@a4{17C< z9MHzb2~_W2Vp#{XEy<2`&}W7N!?UsO_@Sd~-EL%Yt2IcxK3PS=qC*dfK#!SPRe!XB+ zOLmL7=M?mY`5H*i!q9w}6aR7&z}qJ3t9C)aoDeXp1!&-)EUuq9R>)p3X%&Hd9a4m< zmPx*$+7vAAZ_+BNmhps5NDChYSRl?Ep6zyjywZMq z#?G|01~8FItLBzM-oY&{qup`i%+f7@==h)daA)3m0nKXOc_onIYo^uY3_LQ*Ir=@s z(L~9~9=3ze+Hlvdd=Wd#;V6iWJ^sfr#(vY55;SZ8fK#6TX|s{$-`Z>>Ry21q{$D$e z*~;>YNUF#`H;4&wHN;>rV1S?mA+UceHK~-%Ej6Gp1)vS8z10)4>rB?CuV`JoE@#hV z&kVU1qu{d7-?s3U&pBLc8CK##Oip$@Z^k)xU2ix}cfK!g5BUJSBJdRE=#%)RM(GcL z(oIhs8?VxA3cI?0rz(Ph?loI1LoI=XhKIWmh#3<2B}T#LKx^E53a^@}*_f@_sKKQ- z?E@Zw8&{aIRto(}^n6C)XapgW$;rtjS8pqe6X zLl*-(YUe4KBR$2ZpkP+9nIIaJ{3kh2>hbCA%~-1=PFJSx>Tz)6kb_FFz<^vo#oQQ| zKP!l9PB$B5xC8BH9TD{ygOm!_wueAz)>#i-I(UqGH zMsU|^yLAA#7MEDQxNpdqAYskm{8^R|XEYEQ_h1vmrLEEh^kf%t^Tx5(oZ{z9v?h~< z6DIi0(G@jWU${xX!#Pwz{<-T!zS7rX*M7@;zQH4n@7k9ESvYuyJK4*7(V3=q;P|u> z)6f}c7m^wV*&b*Ydg_U-xRr=;MA<`t#iRRR?9SD%2&Q+(0Y~vOOXJZZJHavmjHk;f zpO5q$O;CJJgI-4vy_gkoL;=e*>X>x~^IZ+=P}HX_8j=Xl*6@L?Vd)NX+A*pyny4gu zFf3)TjMbmvn)^Wm7p+;G+6>Ju`-+v&>0@RaplE0wio(7tBZ>WhfWm%?fx>=iP~o8M zaHut6R%r>;9ZK&Bo;Vlc-81}LSDeVm$=+nnWbA^}h9XyagTYqg9erKWHmixYlmo*B z*#NUud1B?RJz%{UCp5zoSW$zs#0p>YELbJZER_&Gcw_-Bf*_Pm{D64t9OUC~vP%kv z#KQK>5V0;7y!z=#n%qfo*z;i=4sDHleq6FDwrgn0>m6A02A`y@;;(w=I^UMbhz zVuKva@3P*Vm!AUsnEei0JwgrD6(V%$T>9Kv7tf6*Rw7lI5v|Dim#{f`uQ(SOgY z{13E8Vut^u$tqhaB8emOpaCHrkTdYA%B!cFY$Y`x95V{Z zFqiMOevfRL5KC5h2mX>zY2|7xuTR@Bj*yUkoxZ;Dl-X7F{eC%z_S>!J4giGF7+XUh zpx57l=T!F9^pIfBsR+@GxeQrGncqOfc{d#sQq%G%2bx##kz`p^ zT<##+b_p6?vgOPvJb_48Fm$j60mZJjTN+`&CY!pxW&PzmA=D>LHndRBB0kEsC|MkM z^dd<*^yJ7IFo!l|o!O(_AHek<$nn}Qn+}axCEcQA$qmPmcMe`Huc_#}=WJ%)!5N)J zo!NW?6%izvE`}fz>9qbL8uZw6y`88-Vc#?{r}baSQJ1b!e@m`Qs)dCekwSq@CjeEbfHvnKlL{wU$NhLYi~OU(?)cy7Q>S~ zgvXkHVtnn`=xQ{o+n}!Y$!fUHwjS7H&9SZ8Rm=|Hx}+{P*nme;>yC zPl%v6X@kUo%p+6c&d`+4kH5cNLV_U?3W^x1vaM86h74N7{~~iCvn|6tzb0^?`v!@M zfC$a&N4NhD9cBrxu%K|iHpTP0f9QCt)%*MMg5xI!Au)!Z8G336#}V>C;M+f#R!Q!) zz=iBR7`K@ukwFt;Rk%)5;AenKf_J*dR7GNrOe&NQlR6Rf@pq&iF8ARFA$G`78c%yS zrhTQ;f=~Yr84n_4c;%i3Cs6{$T5ZQ24?3l(OiU=grF==0?oDk-Bm_|#f5CePEyG9# zT~u^LU((`mM)yU&A0_0AKP}KGb^_Ih;u4}!U<(I>!bX&0yet=gotwroGKP9KAxpk_ zksXJR%7OYbsK+GLq$&Ffl1iI$9v9Yw(0jzQK2u}i)IPOz;e#(Aha+z3%;Qqm2V}f@ zDxn+uhgj)%an#oI0nM=SJB4$c{5lfn+#^fA%O;i{c|$OBG@R9a%UJEhu@vT~MuE^7 zR56=sR94bWL^Ku82}RfW^WrU{(T7Y*}v2mHNYgR*5HfR|MNw{TQS` zx74k%^op@}c^@8@%1wi)4?B;O+D`x;o(`NOm`LtQ^+5JqQl?)rDIArOzqHAroE1il z;6)YwQ|Sjcm1Jl5K6JZtCo<(|rI+Gzgo?C-8nKQ&4ZU`pTHGI_kcXN3;PO4-xKj79 zfM~1nRK}EL7N=1rt3;{pb&4T3{a5?%dI-Y9IISBjLp3TAWJMG3Ia1cBEh~faF`zd( zYUpD|@$(f?Rac!u$nBir_dO)7Ir@Oen72vj4X#HMhYYuPrEK2e`dEz+W-xW{;EH@n?)3m8-={^(h5nk69<9&SWQW}S0ct; z{+ZH2EqLRKDj2MDG$`J^3ng~b_X*4w7!TVrX*V^407|4p&RJ$Qpxsd;ns$$d2KSfRDy|4Iu8xfcKqDki zsYUT`EKskpWNu0OM2cOMbqTK?*o{nfi$FVCE~R>Ck<3%1>}?az1*YwyyAM9phu=;s zO>&zdT%CP*&G*^iPbbQLB&OJH{)BR9&Fc5b#7eh61(;3Ta7{9x1tg+ZtF)g(4N?dD zhnC|LSBW1Xu1KXS`QcFKof4!z@+Kkh;$0MZu0TdvG1pSoL^F&{-i9Q-5QkyB%66Dm zYi$`XGz6z>iY_G-e$}oF$lYbywl8CJdEQwjLPwBY=KnQD#PP89WV9PoHewOeIAA;? znzl*!1FvC|4irpRGN;*W0>NhGFrAPTDxpjH9B*))&Zv6B(hoJwN^*u8W&e9kBoMa) zz3i}FPO;JJPk(C_^Kyt|;zddM3{_Hbb;f~$!2Q$%wt8DFr_!bu+88x!VTNffMdh@i zR2wa8L?vH>043?vQw7$9`KO)p*#aVBs!R&1nGdFL=iFsNt!Zq zqOm4}2OW$wa)M!VUZ5wp*Y8Uq$CX(m9?cxxmZ)f_k6vD$xQ+;&ZK3&VBRo5})7O>7 zhd#Kxd=6#iGZfc2iS5vu*MNIBitq^H&*y7FqrFA<2GJX=g{`3hs|W|Z`D^(HzFg91 ziDO~0d`Dce0tbp$s0Ggxn=lsA*E7Q~rh9g@UT#_jZh*q=4;3L9=h`;N&cgJ5EWRL= zxLjv=<=kRf5jKfp`GEO{`Jxcn88L~c`F`JI)iw#t9`Gq7H#GhE^Zk898v}6Hlf~Gb z>;iBW_DBhT=5Tfa8$-LkH5AC=Sny)6C?f!xFo2;r>K^inZe4N3s;XPk5s1j?QG z6w$vTaxug?%#P89!EE*doqEHu1m zm47~#hWbzIAZyzR%!CAWeYsv`jY>0?mR@(h`!GFiICkA%u5%=a>2kc||Gq!GHbBl> z5$q9#C-J|d$|}zaPDQ%5P+zwHvz~^UNF6p*aBAYeca|Q7D(F87YL1T+#bPbqcBNU& z2Msm&hdcma;2#y1N?amE?1_o^VZ(hXynhN83p!+_6->}K{A8R& zlWCV2ex7^i*rfU7W?SSm4| zUdV6DgI(6nMv!x~tcAh2aZWtO9SKt(rB~FkN77oWSe(`+Ee07Vm%$F8P!h0LdK6Yi zdsVNZ_cbs!5Lys{jM}}okta?Zo7yJB+=>%?oS8Sr6(}zU=1UPq z?vj$ZP=;i+t3}%c=qqPX+g92!atn;6+;(eBw~h8YTf#W>GmzFP)!As1qI(ij*pw_p zopSDa!Gcgbrib2cf5z(v&I^>?Z7?a_VXBX{=FGKC4vF0|t9-K^L+xZRpW%u z)cI3NJ4eUf63QVp=CUrzmRx@PNUC(^D4)%te)h4wmHnbGXX!Vku`@ORx4)Q$^;6kT zvSdtzn6%CeD)=6ops%K=>m9yE4N0z1C&_)^Nc61=nSm#3FTGweVI{0g;xwj4VKpE9 zeFz57H{3>bT|q6ogaTjFfP)4%h#nrxdHCz{PPRkwAf+ToZfXOyV{pHh!s${>y6Cfqm{_w1uTQ|7e44(wT99i6>Bj01Up>J^)8=iwd1bKs9PgujIe>Q% zUmQHV@Ccf1*0#Yvv+Nq`nl*%xDO8U^-K%at=@2PI7`OVnN@Kw?BMZOug$t(~pB zt0oA5g`UYuX-*^4?o~iy7WR4$j}xL z;xqAxjYkFUm0WDxJ?mIEDfgT66}$RX=dXVXTeN&r43@9 zd$Jz*r)9at>kh$^9DaQjt{hlul5L6rsVLzRyei{07M+5zGpNya6KTJ&im7A|jg!-g zES2rdFiodJX|1XFdUx>sv9Z*QJi=5Jo+ilFYi%vmQFo`XOzsL7T=$Bx1$LX(o@6NJ zh%t}aj5BG`x5>CdY1H+)@BF}~r=$yu2TC)@tzkOu&>HHru0?BMwPrW?nq+Hk1F*C; z3(46RmuMc|tm@2U6y2jLT#b(aXi${jVODUnKRS_Zu9lBS=195KKt1Eb{Cq0ILEeB`wU@N!Y{o%V zwkKv)H7*C*4bw$RHI(k0#znPsD{m|v8zZLxbtaCcvX5ns)s2VFXILbda1Wz1Q-L7^UQ$5Ug(ypN+o(S9wz$&#jEWmbNXu{T=K zpFpCV)?R0dZrE5GgEP;2HslKL+pP0{(>gYCI~%tyNI0`rM3XhVpU8ktx5)3e`V!h} za*Iu;n<=EW(l~y^rX@SCqJRdR17-&}t3>2zz>+p!@3D)E%QODFqcj&D@9QaHA9@aSnwO!5vfi6SqO1YP91SffWQq;i#s zZ5~HKXrAeqSIw=n%lT9bJ}(z#Glm!FtYZw(U;~_21yh_!Qlpr>GjJx$X?g>fXTbJw z^LOozwa1*|_ia%uLZziHbC3i#SKm?YrV(_~(Hm4Wa=hWvUBeF^MN%l2{Swa|)bPjL zu!;*{hsh*Tc`Z^0EMB5i{mfN9Y+l$rhW$$*k#Tqwo(7O44o!Wg?$pu5BOZa{j4@I< zRG0NfQ%=^5nkX<2i*CtfV9aJ_S$LGJA%+XHDZI)dAFa3cMwO98ovLH(msY^j#0+Ki z#8Q}ipR+wb)2N?=t{vEv7-6j2tURN))eh`y72@-g)y-=<3n(J?mjlOJ9PI!X^EV z9VhF*OMG8Js0#Y=62Jun}nk+F2M#I117`0m*d z*%mBs#QfTJ*rP(g6RAgvz)G6SYOANTgW z_|ogRr86Zw5%!cTL_E4qW@jqj_GB=1^dZgUK3j$e&Qa4m^*p2Ta%%?qv#M<|iU%@D z_C+MFlxeVMoEC5#dN zBTzn%FU3JQuR9>!Ea{7T(|5@bQMhWiMcn1i+i>bF<5@TE{?oKeX55qRn2Efd-3Lkx zt^oZfFPkW=AfcE(fHQOs2pkS>JS0Skun~-&N7401J>er5J)5E{crk8|zxV+!HG5z> zP{}d5ww@`U=OwhvyrKBXGJ5C%3OnW8T&u}bfCtbA9(1s(G?8r*bxS_=z~kG2ra`J z1nI+Bva8bsn(KH112V`*O~7hvD?hH%&aTl+D10>ec_Cu_p3V<9s%?4eX2 zPZViS0VnHXgoC0pr{H&Nfi%qBfk7M&D5r4s#DcBnorXe3q;(AC`M%(4C<=9ZVDxt2>o$W6L@TXfqHgyF z@TUUWPq9^<=kPY#TCV79Ktj*@2;DX)*nPtxH>ZSW1{PW$@K?%<2i9L5&7EBg@=x@V z=fQStP#*evSldM&ww?-0=q1(A(1yMrLdJL0n66k?y%{$y+T&(D=6~X3Av-`=x56>p zu*ybPG*+VSA?kszg^{ZIHMQj&w|?VxWU$)^r%wai#FkK&2nny_PLV&TJ9M|G*f1tH z586XyMlwoe;+`<5kY8@Dr~7<=%jUfT1VE#U?fWJ)`&DlX&H~;y{Hq;AJ8ee3+v{r~28%c*4VUFKdhDS<<3}68V{3aH z0qB%^>Ra@oVCKeDlr|n%zv6eH(Wezd*zKQnwZ3nTHHj2gTPU%o{c zrk_|izix|KQURfK;|<_}-EuSB9A0mYe^R$2q{BQT!#rCg?$u9>crswQ&egawT>Ijd z+q1FmKq)_?MTwX5z}6Ewj|_Vmxr_`)F*rMMi4B@QK)ewDDeEzD06X>{aBuPoFF-5^ zR76g~?N^>|liQ=vL-2$s;jJ&(>pqVJT!yDO)qwS`d6y;hvx8#yj!SW>%K!*U?_L2@2oGj1F( z(TM2srpPK%Ep8W{5+R*gg%q}cH(PpHtd>3sic!a?OsRriA(7Ig$pgz#XOS1TQozc* z(@RnYfSQMw!bFiIk@z^LRud`Vy0B=*Nsg~0l|i3o&vbPT`x2J{(Y036vp9!l;vJp% zWcq{A*^qwb z#wv(Zx^I!Jp1{4$UL;ZF-YcctkTTcBzd*MmvaPjs*bKq4S~Ci4o}t?J+R>7W_2IJe z*6IJOBl*Cyc4hA|2ABGoGg5lPqH${3vBN_2ct$8VO<974N1Zc39g0@4@l8m(dc_3(KyQ9(f!lNJ-*>(5^ZXw)~DiQ^@2m77=7}Zc~@I+AKCJOOJxSG!%I7U&PGx}8R9J4sTuQ`b8 z@{v#FI-O1+pR)XM*Bq#ji=%49F14h(GczO-R3rttDlrK|{Qcz*M5VS`Dk|E`bsKfLjga54e>`(>h9#J5~H#@FWNrEFcR zMM{GO)S`cEB6lpH{Lnx$P*m{yE`$X@`N*-FMLaKUx|FtR>Ft-hQ$R`BrT1IhJJ)}X z86x}dUu+;>;uTdgSf^J;U{hLwv3>iA;y<*0r8>89=~(l<uAt24$ zlmZB7RJB0>(>z5z!ueN<=cm&Ym4=P^noTN{DPS1F0c@hLf&+j9`Hbci6}*($limYc z$*-@|1u8f56U5C=7;FiuP}^||@nmFq0~^kJJqvdv7YPf-I({V*Duanj)^lUA!dY8i zNY0kX6su`R1XP#076lEeVflq*MwK$8K#5Hnw1Lo0M$X6i0u+;@GpYjm=?O7R${vFN zXd13^&y55TXIxe!4_2^kAqoIWn{pyh-Lv*;LP`bhJ#Nh0yl|@8VyWi^?L`0hfvC3g zCz6~-;X0o4wzN(Q956kJb5ohGLoXB&?6_j1A2J}@QBehaR&&~$co~>XX9t*mL72YG zscV$lqtvFE;WTLU8do{C(oP93Aw(bJdu$f1fHVJ$E zE-mAn-h3wsad{t1}8p z()z|+y%m&S?S^a`AQi6O82%cPhSiKN;)a46mT+d&uI3m`Kol33JMepYnh-}f9${=d zQDsHWN)_Q(kSC66ilDYbTF|M83FI{tmBG zH$*?&l6q^pL2@bV`D|qA*9)qpb&}|PoJ4!;lmLeh$U+6Sh9Zp_&@B=>0U$p2@zr`W zCO0yhxE7S_KM4=Mf+)?rBBG0Pmx$N>Zx{kP4@OF5onoFw*5qneiO;|P(%y{u^eXm$ z3L)YRK`gbhs6lERe@JU42~D%^dAl4ReUOA0Bj8?Ygahboe*+G8~gem@bDi)t^&>tiO|D7U00O zQ)PeLxv@?U_o5vQI7~WnMu#AqNQiJm)z>Gy#H=jVxLk%B+4%aj@({*iEN8J=pHr42 zwJE-gXKzhs^oJIM%DKcG2W7gOTI8Tixeek+Tvt#Y9R_pO(ME|aBptUS>h&+zpSfB- zeo!Imn(Z_o>cp~#A!$6eN1Sb|b>N9}hkDkxq(m}>-x7sY@Ay&U;_nxS-!g^0J>Dgp zT^=Jooi-OQtkaQnRc!kt^;X|v0d*%mR+X*u?}#JJb1(pDRddZ#HBVIE?$034~d zvvpf1#Y+)`5$0()p)I%WO~?QErLNk+4h9TMEkS8jq;hxL*zTXfhxjbyIV?ES_>M3H z@Z->xjvaFUQH6OI$!#y4#C*n@p1qH7_^TRNn?)gUqnOUGx_K{QvbYR%?Toj)(cTKX z1;Z;a#Zw4-OPhDV^(HIyzyrE}G;humB(3*FVT?CRnRC$Hro{gaMR6unf^r-C;tBr|28b4{g?R&?f<|&-6xQZ&bjWGMW+!%zhKtZ9hQvmc$eC)l@M@t;rSfDb_#U9O6jE>gKT{ z?T#kzwkF7Z*;%<)1LmZ|ozwGF(tnd1j|z9n>ScFR^A*Aw$2CiS`t=c`(mZf(V$5$D zZOrh13u|d!{E$%DzkGu62~lMoC*=5eiG3?D6u+w#C~yiHNNcqf|N;|od<`gG`KsKu^aX}s)1 z6DAX64Am!33O!dH>6o>juuM?OSdkx8E2s65v{ZA^@X=T^av0nBN(?3PWY1IX355!q z?j~`^UlJ=b&Q4zV8)%Wgj(S*4o1hBsnA!3WcamVYN09=@2Be=nM;_LJqtB6S=oQRnBODr;d~ zt92F|{P8FyDfyQuR%+J#2}cY=IHZomqPoI#mXd2;LLWAHJ987JK-TxwWMrlt2SmVcAqhSW4bczS(U;fv|)nbVc(t+Dp-xldDM=vLRx zi@rDoHf4Ex^+tdo;-9#CAR$vXk#FRa8R-8SqH+HvL=T z!3Y1*!`Gg;@)JAP2>ws7`M?jC&)3%BxS$T@y;66D(}m|XA1Cu7&n#7|k_fZd3I1!w z;QryGe5UPG`rdzHdW%QvqJ_TK^AC%k|G;Yf`0Sq#4A6T|;DIREQt z#P5l|lL(#dzeXE40{*XNp!na;@PClaj#AcgTo*?ETngJ4R@ql?O;r8EZXt=y7N#{; zN!;K(U+8>B8l)}Rg0i}0zc3OXcOVZMjQ69T?>7S+J+!SsFeeTtkS+I30s2%GvrCQ4 zFkO3jfn(QortS1<_t(n@+>aCmefH~?!$>8!frwADj#!2cNm8rzXxrx^qwCb{Q!X3m zNP~kodwpZ-ep)luLQiOKXe68|sJ@t8!ST%oxJ>Wt*5MDHVyEF6Db%N;`{;VT@*(;j zyQ`Nf253fT;7@38(M4l@j^I#xe&!e4JH5hZl6lq-E9j5XjSJX*YszJ%gNx{Jc0@?R z8S<5FCY~_@l+r^P`OKl+u+hcZoW>kLdb9V+_(zK8a4 zY9J=ibPUj*T~=#{Cd0ItpARI#_G4_@kXwn}rMXI%O&Kinl=f}q_mD*l0%%PD&qm8d zdv9IIc*{|N4rxL3MapI===)OH71<(qX9W?+`d{pmc1>E6!#Kx*``rDx*T8%jLy9_O z@^QT+yPg_@#6^K4YQlp$hzTCuMfhRM7!Ok!M7`YGHQr~*Y+;ox-^Mi>)pd6*V>(JB z*6M%}0VJt|=9+H|R97?;YLCdSx-dAI&@|A&3VO?)- zVvCy7WZ42C?XK1$m||lVV}Rq5GoAWvirLuFaADB!|x7!Hq^OMbEHNmQziw?baCC!>9K0tXb(=2jxYB zZ)6ZLTR$|1g{@{w^_WpFg`C2T0(FC9PR=E;sZ0z;hA z!8gEVe}^XgHbJCD@c$6h_inH)jIonk)-Bl&uga=2(p{cfU?{Niz)5IxX@b>R)~zto zUY=S+@C`nw`b4hA-HqZKy#pe-N470~Bx}je_Vl-=Dy6txGDs4)HTrqZ%==^ybiISh z8rP4i(PUCLiXUZKhhGgF?#WV(>YwW+^}}iV zBNwxnV7`=)Yq?A;@yj(X2&-j#>JFsVO9EX_BH0Ok(RenU z8q=bK6pNpzizVE|kX^8Av!-PB>xQMqdjxhJyqmPoHuPrQ$RvstLM$V&fPerA2x+cj z-nCFgHCG3yJ&Sr9Vf=|$_e1^IC3$dQBWPvyMsfdV6JZl5ul4e|*sss9DL%aRBR6pD zUNQ%3^cUz^AI#te(JzpHU{^mDd_6e6ajOsi2^;-iGuppo8vj#9`;UC2=J3s#`ghN> zNi|Qe#Kkn9si(<_lt>^5KL}`v7N}8TAO{9XWO}p$AtQLC(BJKo#A(Dv2^gIBARzV# z5vrc`4UdRci<%^z=1Dq;?ao$>?Gf_Ui)HJ3jm}o%O^sV6oz_%unfKm~1Zl+g(^s~> zFEU+@?$e&xPoLR%KF^!OKdFa9enA=DW}DPTQ}t&Zsq99ZVj*yNTxQdKjCDj_tt+%lg2n?OKu-(^J>CnOFCrvGBhKE`lZQ zo6P&PTRxKC=4U6sZ-Q(QqB#2HSa(yhZV}Aph#QeGcyYr273979X}6#(O$#Hk7Y4Jr z8CE0VBH)Ly3Jx6JG~CZcSnDSY7y?ApZLn%8LvUestxa$rbMhfZrCNdX9VU!)I2*b< zw00ZTM|<)2DBxDCTnHd^*s#lA_(6&CGEdy$!@W2#uLH47#TF>~&4t;p(uWv61Qqa@ zR)gKsli)crYy-t$Td1E1{DeF_k%lYMC?FBm|0`M|rDn#B&1i9JMT*od@j6=9CEsAC z5rGjS?CFre&Q^bNn9NzWF@OtCo&g`>XYvaqud3=*o!KBb|A@5yO}owIMLiNl01n6^ z2+Pdi5C7KRpY?y(P>X#hi$O?POTf$uH1{~f2s5sVwp8Z_+xx<%+>E&>PQr}nkXwux zWkBmMny)rwpIAM=To<|7-Ud^G+WT+q8WO@y@DG$ED)Zc8BKtrNS9?s!CDJdy^*L){ zog+_fL6~OvnGza%(ybvSsr2XGFzw^8(Dn>}_BYJT*A;nZ_c9{5n*NYTOZ%D0W$b~h z9zW>KsLDzn4B4YhRY!|Lj_uvoV{#zQkWwwbma#FaSu4T0YDy@@lzoTq*5C zDSpPi5)QDj?nQa%iUbE2owSonrPWB@tfu+eclM2m8PhjMOJ%`L@`D|N4|55s{+lZ` zwg#cgM+2lwy@UoU0`maw##teOd7uhM*fJNT>m{x4@v7`?OOAV{j8dKx@9dDyt9&g0 zy}lW`w;y>4-8g2)(xZGRJ7VxE903NjvKt&$4jf*;zR@G=N8xlB99fd3pv$l{hpQd1 zX>-xn_-n0+6Ug(xwcM}sZryMP=}yab2lNF7^SSW7ip}o+w6uf*^)gOXoi7iCH)>j( z%1iamoY@YFhay1CNRQfBk~9j5aW#fcoK!DS8WVa$63K+#A{3@XQCD?~o*-r2KIDLL z1Qmb3p=a)ndWsLBfNr05^n{A}a&BNwfB!Z3HJN)uNGmN57p}{aW6rzoXy&f5NCm{= z>FxG677cq(zMh$<7l0dgE$uIjC#7b*Zmf1m^T$Z`u}HoISe%e2OLq)wZOWjBr_FV1 z`=6?WjHpo%&!nOrpZecA*~M*{j+h`3UU(I>KM zeTrrxsk6+(F7nTc0;RX+dMi!;%sp(Z(LqO*x&P9eP_-UP9Qu+bp_uNbpt2~^12H+==c)av^hgC#x=FfdJgzpt?+eNq?{shCpt-`s zWx@CHH9t}@kkD$igr@9eB2yiF zqG3vLk|OZ2_NT_8DWVJO!Y{NGFh?NvwEUIiwX2g}JQ}nQq>nbEkJ8#%8`Y|}ltFOtg2$Xo>xU>Pro;R1Ci1OWGS;Fl>wgnb; zxaGa-LQRv+2*@DG=t6G6s6yd%Oo=onmUu<>mVL25Q9M&fifB-{{Tnnfh8B)JdK~^L zMC@wE)t3zBTJO>Piv=a_#6fmtdoEBf7h()uxv|eQv`xH+C)`_P3*uX4aUM-k!du-# zU0*iYcDL<)UEM<(dyYO}k1BV*Gsh2LL%Fvc-qG_n$W%1k((Z6~RlbEyum%XhC{>N9 zPsmj$>T(f~EU)0{iNUJm6>XRrm}AAFbOfqo6dAR>eP>?39oIzh?rb>%qoV&BSU=zi zj^Y>HD-GpLtLkWd?+`EwKU%d@W3g1^az!p18(~fnY-}+Gyc%`PY;7@mx1;c#z&BA9 zfWkb$6hM(5n%0Dlz-nfQ zI7G8m0uu_tS*){F2=79O-G5;ewb?>RtMryDMw&oyQ?&drL#A)t8kMSD+;w!1z32Kn ztCAK)H5IGlx~y`fptCr63jqY(=U}svKFe|wwicw_m8ji?rlPmrHsVMJ?bCg&bYPig z>t0$zy%ezV+fk_9SdKf*V@GOyxZUtCo5r{86ktA50H5C5XBnbyIgNMTB%p4SWA*nX zDWAcEr`GwGU5UH5j=}4DjvKCX?FhYs1Ga?sF6Qv<;n8c>h;0DJjaJv%-ZK8nBw;s~ z*lk92rX#Q`m8r-RW4jLkSmEPE=SnWbA# z0-pjvk8PvsyR6l))|%|QZhO$S)@`&4+}7gNu;&_)n;utq!rJ+JR~TGF^R3t=@#nzA zquFLi_sRG(50>xZ@~*815bpxrAVpS?-gf^rjNrU2Vq2FU`vXwlpZI+!>7@3T2zrvU z#1eF^TW9pdTGZ_x+^Wk#?5iWkOGw~bnTx|U`#G2$Ep0C0Hn3ZWlf-;<2JQjMz-Wf z#)iR4?RB}r?Li|(g>#|N_~F@Z^`PK_>s@S87M&8hwthLlGUg34hQ>>;vEs^6LvLFbAp?mCEsVs&%omJd8Cj=-CD zV5}?a8h?yYAdf)*`a{>8-i@$?Y@ixR72Y8HweF)OxZnunR_o0&(@FQolJ13BvjZ_Y z+P02eMk++s2iV{@pE4pRj6B@N(va4X2&>8(kC)D%Kx))FD?zSmVx`5#pkYnBP--cv z#61#@a!za= z?{gWjkve~{4Edrj^1%7bHxs?l_G~Orl%a~qZZ1->)KvNQ!i6cZ$UP}uS~wRss*0A+S=u#gqIPDV zijyx$QO+P2qodMr0v)AFl#`rJi6=pEtQKl?P&n44P>WJpXz`lrS3hu!-g#uqnO}Nd z^M>7Z9OJR<8scy|TP#CdOs@ra6jY9#$0FsF4^hC;Rn$%6%qBpK-juU)(59H_NqJ6q zSYqw5n(w)=58^yz3S<&o6+T}1t9b<_w_`zR{7eq@tkvEaCV#_R3VoaH3Ad|d|1C8D z&pEg}OV?G(>`+T0GuEc?zGfp|z}}0hyswA%DP?MYM(e*#9(9nXrLiFqF8iKLgtn|Z zcx}DjEo-^@yfi-tk~DTzUg+;5r-Y{b@@Kb_1E2$cBQ>t&_(Du!3>r7otU8y^R_7+j zEfT^bSQ|5E5hH(LluD32M14yn`gb1r3xgVQdd@vZ==XO0z&*QG$8XGA!mJ&Tg|Hmc zhr`Fxt`6PSK>33GCZYwZl$pg7lf{(rS%}4y<6S_95fqm(RD0cwUGm>1sqPz2$?N`n z$!a5!s07|yWT;ViZZFxHKT!8=iLTID9)GAWS5*ytLv%lAgFIqzKX^^U6DB~f=c4fn z`O)ejHPT&;AhsGm9RGw%l+1d%=|R*XlBqahPGAowe%#%FdFj91rYtf6xs4DosBt}5 z7B5T8L{N23p|H?a_a95tEKv*h4EJp3DEC{>e7g7nKwZ; zFKiF>hT>fLjBvxsAN4CU!0dXZC|dO8r5*! zN@(vj?2DPBT}=S*UrQ*vf+l!Z3$wi+(>x_fRE@U9gAoi$rJEv+YUGVvwGddBjpHwp z2?F~1SbENk!guN&oB&Qr4G@$Ca*ON`>-&*eh(}r$JARP#fHz;Ic+M}de;iuN`2SrKkK}*fM00X-urPFXG9gwn`FDG&C?zR5Bn9M88!b-x5dV$}`K@W*!r!)e zoC3{H8_lDM@~bHCqzW0mDJ`ql&-`_i+&8r zb5Lz25^6|fg=yCbg0N$BxaiaInVFOOlk`mP5c9XCgv&+iM#K!##*m5)4`HZpNI0Bq zn2}ZpZ_WLP#D@@@)TPJIwz$}h`GD}Mw+jBs8xhBF{6$w zB@CrGj-MM9vE@RB9AK$MdYKDe#F~FTM~H?-t}BH=$lYPg)1IEC$t>+I%O4BmSQ5!9 zXV;rOu;qsydA%QgUlnLS5CHE{`tAI6P)BMzw#R*1{(3AKo>;tX}*p z8Y$F^h|HvPr|~UxM};>`2{*IH0+TApGItCLt-^vb#-hwXvW$bk#L;WSaE&=FJTQOE zsI|4BB1Y=8f<$Uud=)}M?YV0QdB2SEytcv^-HVl*f~MH)AVN}A6fbE~!)hyFf%Eu8As`q*WSY;t6?8Q*W&-{v6RZ-W9Ga@O_sw5w6pKYMd+caD$-jiEVs)CC&W=mqW(PRlfvDI~^KD_w zuJQ&Ny@q>}o0hW1Sq*UL*x!D^y@iYZg_&%)GTy!<#xWn(@0v)Qa%tOCjn@_|(AjAZ z9Qal(;v@6D{Q*d>bdpn;lb|}iahdC-XNhn3R~unCP1h8`Ph4B~!-HRydtRY)s@(>! ze^ZYiOmwfrtJq=D7IWf^eCG!OCJ|A@*l=fO{*Z<_u3H^J@y+*&92 z@1yeHxom1yu1L!$n`+f@fz;MU6bOAku_dinckUR&WU!5Z!W7qAz^!{E#?^7hf4K{b z8uc*$oI_mekk~G~Oz|PdOXrREKb1`Pl779iRLxk#n`>N?lX>c7!iAt;vRpd5t2>_R ze9Ek!=6%cN`uyV=H{N<~HhsX*tc^0SO{6eWhd9E-}WXp|4c}W;x7L^zdn6UaYX8w$ARZEBn0~bdgrkNgBBJat%&cJ~# z1jeY)P@sgr!pm`zXe5Q4QOgpn5<2SOrK_*fbr$hjp1``9VrELI=u0@B5^sQdp;4Ea zvsl^tl7?|Qz{WvbL#n4W9#ExFa**tJDLEKC9|}zgzRVR6Lxcw*+p+6tu8Wp*yZ_44XdJSw3m8NsagYg zdTf({+i#eJYn23nLsk_&4lAfwS};)%BXMrmZDmoxO_ndE{_&lHacb9&qgtx*;>((_ zg@($V7?wTMk>NpKEl_M1yjJ?W3AZ0u^l)HyUDMen&k)NIGg?YUGr234E=L!BWPz2Z zIP1wIC2ed>=m|MH+7M9>apq3ygWVV-=>-u?=E6=FSLo2ab;Kq$D(|PgyWnbszq=W4 z9Nl!*5v#2)i5j;Y>UuH}Su;JG3N6l3v81RT7}bM(rwptFv^KwjgHyaprSfu0wPj_1 z36{zvD9pE`R*xO9Nv3J@a`EAZa;@_`ui-SeS4?@}Z))ZIhB!4{5t5ElU0Oy2QT%42 z?=ByUje3${+Uzs|9Z#))+=jo!G^Ps!-jLdXRjvNn-wu<&+svHR`}9!N>NtAZI2fKB zHp(O*l1_e+Au1_U4aYV1VXVYIrK+O(xws=qa%mkxDJma2Eak^!!Q(B2RIkpf9mjyZ z1CIUe4eQlCJ0s6dB)onpHw|6v>KP!I`XZcutEq+aUktLNo1TY1;}+v;&f2_ywiEwu z@43&HgZ1#>1w2w|=XCYNS~8Z0L=-V_^0AqySH)+jXkjc@=6ku4DTTO=ggXwo)hZqg z=+0BGx5=#5B-d70U?y8q)HTd%yKjwQfrXORFrHhzzy$OM)YlUjfFs$b@+^?KdF-49BE4S5X7<}kmSlB%u{_2 zH@PK*Y@y_^nmzYMJr0O5~!Q>7kwS0n{u45}ajCF6FzWB1Ivr8(K9UkqL8~ zMtZx>E9gi%zq**N?-1b@pE|tCN_+ zU~a;qz(ZLB2LseeSPi09w29&ar3&Dz=nI4q^|#|^yrDIFDzF|9J0;e%212{HmyvEB zfhE$mkF^F$Ehh>WDpz__xiB2jm}dh@R&myou-XNP0qHX)q=1%){=6 z^F;C~{y)0DDM%Bb%d)y`+qP}nwr$r}wr$(CZFkwWx@=c({}a0pJ24ZHk$E#RAM@en zx#ynKWyw!8io_4W?XaJ@o6s*Y{S>BhfUvNW3Ek{VYwSl{$QGu2pgK`aL*sEM8c)uF z*?VxA@0T-QHL+#7AR71Alt5e*S4cLdSW1?VF3vcdGfZN%Wa^NLqAqC>1oMCOlJhiPYs>MA0 z?1RAA<2rrbD9It$6%vNp7#2k66pHneT6^Zxkxf|KZwM0Y>n-9xH6W$A^ypibNQezHE+n)+-m+AdQv9Ekn6;ab#FYu<q3(85OHa8Wtb@>sU_ zrna7WBB)|xJ+b2dGq7P5yq+6#?Tf)izEe~5fWncy}&LNyfp4wmU$6U z(osKIk_&8CFkkOe5-*j_@AsL}x9I*Cr)#!TLt}`kgD$4jWp>Bnx^->WVz1a;z~>V@ z0GyW!an^LaI5RPM)$*r>4O#A(yf}Bs(M1KgYRhCR%Yl?z0qrBV+S;7D} zG41JS!(ycmv!8|~LnDA*UtB5`6C@z~-*rR}Qp*=G)+TM-8>NtABI=gn6?doV?QXHB;$Ql|NGoXPwSanz)rKR3&@P)LJdccYRm_CQ+qz z6I+mFvts33lz1z#U8aF10^U^`Uhj8YD*jB2mc#=Err8p(V6-_!g#A^r+Y2FBiXSvE zvL30KFls`x_Pr7UDN#GxpM`8LCt{1l%f-nNtc&tH!Bib1WNLYM?iZo63|G@;DEulo zUe&*1R;lDx-0AJ)aJWW1mDB(ft2l-R)buQYsBvJ-fr5diL|_aG*U^4K zfSQxP1igisSzdRu)<$bsNBXF}*;1rxE!mn6CqKh9HlZ)l%48q)_C91oSMh_TBeM) z#-xr7u+RL{2qpYua&~9|Da(`xXlC;rZ{G6v;G5266nhHQ-! z6hS{e9X)|Yz0B^{=Hd=9wV)M*@4O%)4Xw_rZk!s7ujV2c?}L-N=Sb9R-PlfQ zxC7S#U7b6#1SJ)WuL^VZSzD7w^P`tQ@v`WNfE&>FE7=XD{l;vX9`v_A0ahbJNa_rY;!2jV2L< z>YJFSPkfYKIS=-cH~Q@X)_sT7a@b5p-QHcZl?$CvEEhL5Q#XZ!mnu>*j2`~iJ7=1j zhxC1%RfQ!A2{RP(iF#+*oE0sI54yV;n+Mxs$WiwC#ej+1TK zW^Eq05zX2%RY#;y(aEEIs_CdpHeO@mnfmcb@548n+Q1~QPUZEs5QYPDYbFW$((UMv zSg(rFj(zCPt@^sJeVE_1sbxh1kpUC*o*uJmR{CEey{&aXAi?5Ed`2Asi{7o$c0Qw? zfLUkn+|w8A=en;qz}iNn^^6~B=~~3}54Gx z?CC4)DMHP;fO4FGQV{~>LjFeW-y!*TjpI9xobkPTs;}U)_k6N)_tjLpe}Jr-SkHOR z2KI3-OLw5J+G;ea4UhY#@%1Un`(~e_KNyeB5L)-{z@m*MevM^k)Jt}#I03yRX2kiy zfv267KF7m|s|td_$cf^F|G$=t}-z& z{@G^*Xq0^PROx2?G4B)$5_waGk9pzJZ|QNEx;TuEpM=GGFaX-8Dc?2aX2|#s{lr#X ztx2X&{yl4ScxfCH9q!UFv$INf1jHY?dek9CIA|nhII0i>@Mv#YlTIg@nW@wL+yy=s zee1ir$~7NSgaamDSz$jUqe}k9YwBi-&4wzL&40$*8Oe2INM**qXEn@zLyr5DDJXZT zI((3l^}bBc96t)+>t5_dP7|kVKo;n#fI5d#em~JevLXPet}f#s_PX zEuS_*Sjov0BNZ2~#T#29{3?(9rCN9iQwsrK9n2H=?+^*}yILQq>HVb4iIM)NX?auG`l3z7?T33qkN!%e*Bc&gEG%_3*iHDM6D^8J$|I+Gl#wkeNkr5_5{G=S zH$C=x1Kdg)jtBHr+jhaZ7iS&7;ao#XF0Z2F=?2k+OMpp}$p@{N!>yHdfjD0sp*>9N zO56ipd&X_gUXK4!)e8Sz@u6nXxV$|k0J z`N+2EA+{+@&QVq=JsnF%X>XS13+n8{L)mljFUWs@s&#)?U#x$cuG;^In*TdcMe;u; zTK^^gsBC6r`v2%o%e@djXrmt-Ke8J;&E79hwnKrWq{|74prn?R(ol$9q^_wU$AUpb zky1cP3~lS)7%;?XE_AQ@&(eZQSNqSKX!YOP4epKYJ#)Wr@?4PN&wGFT{{HyA62!YCi15mI}IU|48?jNAuGwYK~ku|f!RoJ_|I6ev55JIwhfL=nJU}z^0@Yg ztb;ADwOTSbb>Q|l#maz)VCxdf{B(Khj6So`>WDIMNsQ4-qk0&rJT0DpdsBfTu)b6# ztHpNAqO;H;s81=3d3-h(Q)q-wFuarQdkmQw%d`bFH*?rPj1x>+Og@j5@%0_gO-^oVy%pf6N;YPDMx84~6cj5f<74&OZL*3!D{^LV)7TbtI;+Sw zc1ShTkRjDLINI9P*xF}p^gNdOYTs3uW1{HgsG7T*x@!%%FwNT~C~NGhwKdfMl~*lZ zz8Ob%`REOnbawW(wib2`wvFJV3JF8A6(#`Dk_p_jj!h#{@g?>j?Qvv9X!Ljj#`u7+ z*QTalkFuCRV&ICHk^$|jt423@7tjjONa!^bUPDGL6XvX{jM!5;>ht z*;{I?t%p8FtRGCHAJUAHAJo-rEp2CY@NDb{g&%nt!FYPADydc#MGO@Ocg?Q#dQ_Gv zQObr`l{Q^bEp6}pP-an~ERgP6*f$eFnV}b1XF`Sz<0PZMIew8x{bbX05y_1nC;a9x zWBtR>+&h@X)UmPYXsAwcR^y&Ey22U6i3 zlRdWzZ&-Zqr@F#k&YZ{ih=i3O!i_ssk<^yefnH=YMxRD#q6&o zB6o$Evz8lC*J8oFjHOp@RMJ*eYSWv@qTJ1zA;@9j@drCRg^jh=T1>pLDz$iIJC>B# zyfoq?zroJAZz2S=tK6c&a^UcuJO_v(DvwJF*1ZriH>g85T{i46k)31q^;A>ESB(vz zZ$20TWmS6Z$~hcB&DngpjU!#Kcbf&L8N;nP%Ycw$Gqm9tS8@2RS!pWGWx0f7ckNCT zc>bVi^S+t3x<19#m^P+1v(o59XRiZ()xAuA&2nJeF%l5HuZ(eGu3kIR7Php)lvJX& z%+66)sjW@0WXM;PTcfKlVq&w)jGBjyp!?75maDW1N5j}E4!kkc(SRftxaU_L8r@z5 z@hv%4e;XTnU1z8Db&XnWm)(#|!x`bHJ8e+`tW|D(yqQnu%&Gyl^ay8ThUA_J6To9fMC?RnR)s} zTOnaNH5zQ(4`o@BmGIz;*%W3Rl}$p_IX@h#mdC{uza=Ioz20-zO1TE9*I|$%eT1TY zvT$>8HJe&+1e2^(H#lS*(?^0(JaArM9wxi14DygX%~&28H`G$6m6b;HoNV$i3euB- zx?T@)UnRvh&Tmi;;5^jfmz3OIA2pSljoF*w@usj<%}3@OAv)k-h!Qqf$y}hS%_{S? z#e!%GNSEQT)DY!bnFeR>yy8+WM`j*+Pct~{94k~>P{-fsF1VFt)Swq(C(WeCMkvKrY!WtCm@U40 zOor__NNV}_>+$hpbgf2*?Y@O$b>f)QjrbSSHK?h>IG2I!GKK*#YRoR#%l zWNe6|`6$VK$W0T5&MMMMKWZ{u6QB$>f$vj8=Blt!f1&iXshx8SiKcmw)sP_ggo9x4 zc&VxR%3z?`Z9Nr((=s1GyeR=)DXfQ^!NE0}Z^$u$P8%pvO5=>Ej_Nl(M?-KoP_vlO z)v93YX!C)}IoEq+A?b9Ys^x_j#3N?n+30JnMX40{tSnlGV=0A*#dq~rxv9DQ7Gb@* z;i5EiqcJfSb=-`#5e%tL5;-8^HdZ}5hrYUym+DzxE@`|b;t#uqP@38dTiw*O(Q6F{ zT?lLJHZiuns5&VfZyCc5(SJ#3Sn{+~hxtvrHGWKjH8SK!mPt7;?>mL9US8?Q0h&_> z^-|^RjvLF>_-D+##-4q+3p(6T++>CtmwHjxcBwjT?2pp)wZ@c`zcL}W4Ul^2FAF(o zHD^B(3nZ+HVH;CK`O@z>omg0*Wi-4PMOKt?yIb)ud}-_)QBER}NycDY!FiFFDPr3V zqIrSXDTO&BP%3m>L@MSH%XO1d$ket#icofOM)7ItTMe;tSIJ%`Xz&5O)(JIAg0U}(Lh&A)gQ4>Gczas{|f||aW z-atSY^Jx)58W0o5@i&fXXJxiNBdutUjhVP~kNJmEqf49_Cy{l~X%^PgoxCrgl4)C$ zRl7Hmp{aT`zwx6OQ6*z&QDPPZVj!g&lstg5=3?QPj4p8>!plk#len9NAt#mMv^Xm1 z)eTINQrTMvoyu)H*rk%?#1R%rwzM__M$i-17pP+vOv^u!jkf;4kml*ZEOje8yX8pk z5+*wtWf4NFk%f5}*OYd3)=6ba{Di5|sHmc35ylOWN7m_K4wi8GG^drGzngxLF39Z- z%jJ*33L-f_lt#*87SRVo*_XktG2GS9ScNNJgDMP#sfKm&_B4a5_snfg8V0O8qX z?vFpVWE^UHbvZAIv$|BRX`dO5P07EKFw0VNW6>&~>{^d_!?BZ)k4~f5uf6x>ym@yd zimAoyx}pj7XiafP%#4dVMI1I&hzU%V^~N#9b0i@`>c52kLu3nCG_B>^c$(**!Vt(>4L$KXXs1{!cV!%LARpX z60*2~*ghNS%0*A^CG-JM9Fsot#G}Q^TX_Wqgjtca>FkI1@Nmh#CrH+F#cLLlcV8G+ zbqv0{)wb5rmM_jLY5wuJ4dc7FwcBqc$SLx9Q7J;ZX}&}|`&$NE{uHYNSMGb0lCd#M@VR;u#=OWNc!W^=UzNCD!<#ER|`FoHL72*+1 zO*t-vRU9H`)qcNg#v(0ViB&i)85gWUvcm|YPxflHj9hi%E!;!!`4@rc*pFd4s2RVLp`p}X<+AP_dKc3ML%LUe~VQ48jft&0Nupm&C6c#=y@ z++FuxovWp)&y@>F8p?ZCC_~R5q9K7t!Tugl*G@Bvr$b%n#8EHmt6S^y6jrr%0epN- zo|QnUmnPeDU;85@ExMMtyM%vN7l$O-*VI+zYpm!}0CuQH;=}7ie>G)O^O=?Rc7ED1 zINO!T73oPtH*@B86op?pqMQFbb%m|Vc6RL}YD7M#W@VA?@Fxf0uFadkYD=wg!-(tY zp7`1td{f*tWBz*J^y;a2$!|bEsXed#U19A^4FEojlX;j=PJ#d@hvIT;3qPl7Lt7;0 zrX$l=AN$VH6y)9F4WEKfhp2}?Qv(5LkRowU;p6x%Epez?JH+-Z8{ zQr*C{QGan2#H-l~k zE>MDQlW&|qU?tpO#)^M*#ulBuviM0j1)|9}clAv3&z8?0>ms$u6}-926R!?w{Pws9 z`ZD4-$5D603{amx(rL_%lQzN3%WUc=9+-W_Og+(7+IOTZ@s?w=v2;l0V%AAMy6jx~ z183wOFxM85ZO~CVS~|5d>K(A3=sd*V!efCr7+VT9tWr+3@GBBy#K_Aom6 z9nk<3*Wv{~{kn4RbFH1KQ3MO|Vs`vQj01yzo^qyv2VoZD$qO6TWO6q9hGz?F46q52EOBaVE%BC-sp3F4j?3G*)VN(Md;R zLOq$iYPkc;<}s{TpIlj^pRI)GX%cZHvS#|TXBAraHlHoC>{LuZca^$V@6k-{G%){B zxvI5ISFzZlZCp^^%e;A*u(q}x)pb*Q_O{v;rzkK?{*y*jxUzA4couM@(K%AIqd zj3lG7#RdSx7&stTeME{{O$DL zecHrTXBjDpY;J5! zMo%+I^TazK7E;>MSlY#mi>4-}*^}_876K=;pi~?i9_&KpskX$*mv)I)eGZVT>!wzs z@$CjQhGyW|LuFmuo6kBbsI=MiXtdcxvx#L4svH7q7xz478d}7;4nOs0s~cMw36AMZpbJ5H_LP zRyFERy^@SDPCUwt{B8~+B?H*?h7wpcaVvjMD{D&BB*U7(gX4^2bLF)ZDn`>(qt#3R zEu0|hRwQIdR5G?qnk*>ZyHqEU9GI+a+C5|o?{J7nL@szH4Q#e_hq}@@*f^IaYH-** z@So<39Tz91nxz-sqb4r`jc(jLdAR`(I!qS!m6P&KP!!$h!c>6D$;}DQg(ejx<)t~Y z*xWeZtbnILD}?&;xu{moDPu_noC(fswC-KFb>>U_T zXy}>PY{=8A)H6to?oyU@g$$@1D4yny-7onVqsASiQG~rz^jwXk)0Jv&mSf&Xy`m1T zwa{?)!Z9;GqIP1=w-dDHLpQc)sU$vI{)HmfmZ)dZV6#E)G=V$1%GYZKG$|n`${dcPOAA9THNwFw~t?Ym@)3cd97J zRX)c#k;I5vos-V2EyMB4<}?I_(#}S+tJtmTnkS*#TPHl1t6 z2%)_l0x-B>d@91)7OyEy9HM3^J7_3cHO8{^d)G|0&JWQGX*-zJHm0_1oMOu!o_DZn z&MVhtux*(-Z1Vk}$4&k2Tq@7L!pjbzw|iZ&%F0j#p0+&-Y*jyiOhlW&l=;^MWPcm0 zV1b=|%-JaMfi2?y7_qn6^=TrJPV^52ZYY_%m*~+#+5ODu*Blt{Y*_n6daoRI&im>S z{z#d|qN4waY`>*F4h&s8&X88t2THLgzPKKL=vp8+{b!6^^!E@e zjkKASXZ3()8#cnvdn$xa>yymAQ zok-rVgaV!!lVcpqqruhn1p${@i9((yAZee6Yb!kep2HK{ z9%pnRdhyqh<3qbEURiKWP5!5(*WG!E-A`cw&)eI17}F19Q$`@@K*d21T=uv_JqB81 zv90!j0v^!|SF|5&=0oy3!JN7g#j;#P@L@zH5?H(dFqFx<2_ucbD^Uke=;)?*bj;n< zExm$4^U2zI`SktM#2G?nS3>oasd-4)$o6rF0mW|64<^cW1i>^6`@mr6Xm*jvH>})d zd@l@hh?OY-CoAU#_{28&5k^4bS=FOwZ27o&N85K?zz-iw;Y$9 zbKO+EI-T50Z3;gu{TX{Ng+`?CjkoD&z8=l^VNo93a3!JM^?|ApqY7=xKf!jDwCuFQ zi}nG!)0C@+38j%&L~37NVg$Bd80iPO@2joDIShE;q4zNC!Vd2t9FV+W@`ozy!+h-o z_wNm+-;wx$^$)C3e(fa?ehD`1V=UtNk#U72^A;uSijs(lQVd53i6b&#iTRk~LM1sP z6QPj?(~_j2wgFOpG?Wsw6?0%|)!c>igZ_l{xrk_{?LoP@yCn38RzQNS&ju2oLo4_^T)I`FO-Xki16oLWTx+!Go=fq5#Mi*mIiW%P@?mhtCcaiY~%D5VqK!*$TnX`Zwp z5r!mWtit;! zAjrTi?Rb)15c5J%Jtzs`*q-{Kdjb4gB+7Aph?2D_Zw)*W_~>BQUSPC zt7Nr=HVS_`VgAzHJFZV(!0)tR7O+w#1REEQ)zl-z)FD1)Ywx3;UGma42wbq&Az$Py zhO|XsW><(da5rA~Q2-N+GU)`OgCP%cNYb&SA`dTXmqf$>`Mj`DN38^>4Gk|qr)a^b zkVF!U(iADflu3F4DYs;tUUSg(FJe}6@&G?P0_P&jb|QW+;$ROt6N^>=^_gw2sr6_Xt1QikxA;!0Ct@D}e63QQ|Q zk>_S15ITWMFNIU;q6wpQVfiM>6b0!+iJh@fO*~k&7;ME{b@nA#4h)b{NG!>FLfRtFL(-LS*6lI_-$QUckt%P3*Bse@x@lB*Vg|gU zX*uWQvj*T8dLkX=P0#JLA`HK~W;WkIs}tjaooCtCW0!t*knvAIIEKTO@|v;ukP*VzC~8bJv(O|A9nvxFsI_7Ar*YWY9D*#D z8+tGqRMOfjTV`k+K~WlvlT+R#BlA;0HJuXB!Zn>T&?GdSz-ihfrO2r!jYoGhol?-; zDaiO7t$mbo^eoM2*ov6B)MvpT7<^E3UQ>>vAZv!&NBOpzR_$Urx?$ zZyMpYnj53LoLo4)A*fMU6gDIwfXNS6DfJvxF-Qfe4LW&qv4||;$g#-`$ong`8V30f z@rcAEW=x7iNG27^15TmqD|bpindpRp$RXEvPGMmw>cha2o2Y~c5j?}^5hUsjd$YCe z1XlY7$LjJGlryABYN(NeFv!Rx zLq#N_-j_cmzd2*#68Nd8YB^EGB?Y89k-gC2V@{)@#L%QSrHrKXSs6A>Sg>$sXfBA( zQ%5)KxbW@`V4^|MX%B*fvEiTE)6kzqeU6cS?A^9 zo0vt_qKdYs8fq-fd9d(iHh+03u~z5e8fRV>_RKS1dz~6eCbB!!rFxTDTJnIK1*+c| zdQW(DJvGR*hx#c<=_|2PY_epn$&+giJ~LL$LvI#gs!}KF!LRy*Jkhl{p%hj-7B3Iv7=;wm?p z;+9mgBmFjLC$W&dj=gn|d1gUHZjH1d&o^u=6yeqaY%c-u@vnx7V9Qxv;P1&|#)zu` zBvXnkmiR%~2Lx6Fe2K@Cd_{@8fR$q@&FBM^iAP7~4Y2Yg|3R!{GtTTD=(EJN1K|gU zj*uR(^knya=?8jUk=^jvW08(LA6Sk=g#+^ku5IzYkp9H_1NjF!Te7`D`3DeQ8NU#X zNrgM`cQk%NygRt6Jb38{X(}is@h&BKFO^VSK{e(mYg69m1T0kwv}Hn>3XB;@6gnUR zEs5`AS!_a#dBV=9un9R<*_#3+XNDOeF-*ow6f+ejd81C#JNd%LeS)+}X9JD29 z0Hxl5^o1~cFun1KE0d0FydjepX?1C;Et%ew`T{k8R&VU*pLa|0h0+zR_8ffy%?4Ua zqFs^PmH6gw`@-5Q`OTrWd6RX%Cl7$YH_DdOS|h~;lr08d!VkM|kiTI5vB7-#Tl=EWglL9>mUMBr||zQ9h}~ zPLfA!aW{IB7nTbhKWsPnHIy%+u>GVfp)N;Qo}|3^ z*M>o)6pCt~&=j(jM-`)RrowDO-7}{dwpXY|0UM_XS4d7M5gs~?NGX%`z+{xrEc(%} za%yZY2ods}LPux%vd6&h0zff>yC_#=;fAMF`P-35^-#IK< zv(KY-C;Nq$Hbh-sv46O*XUI(l#DAc&7RWUn`TiotUeL^kcSiP0j8vEp&V1oHgN*@# zx8AZ%`NE3sIDWW0kw-YbZjS2{svF$B2h@LnJF%@creHZkbV)v>tpaqLgq{c#@2Oz7 z9!_oz6*MUwz9FE8)n|}aYtY3A`{dV~vh0ux$f^tF6+??yHd$LK|1_4IIGIFp{Mmu} zimPY>iK)<_?AD>}!v{y#);(sWi~BQXk!eR>HkfLu(5fVn)z1Y6m}=Xrzltd-v8X*n<`oxi+=wIl?X%?5CXHJH1skBvdkF zR5FxA$3zE*-{dF~G+NX_q9KKb)k%~G6VCw#QD?-qr`SpY0~ZYpYzx>*Cr(y;+Dgp3 znl6w;vxPCP!m`gwQY_LwV2{-?Qi3{AEwI8wPmR#Tve(v%ge^xdv@FTxMZN7H`C~_c zK#{@>JueGX7BuBLpZN>vRE!WYT+xRy;r)ORJ%EW8(nJef)(~CROetfuYuINMBu-E< zsu9=Hywef(5{{yY-x2x}#-jPhk%}h3W&@}r6>Z2s3u$`>e5ltJl6MF`AlH_XcLqKr zeFM@1;^0b!9l6gCVh|s|=rbo>5FAU)jq?we!9*T)Q~*#?nI26LvM0tEmL3zbN8(PE zB~;eLs5Vp{OR~qRHeQs3%@DfTl3N)!Pc+)HbLHZR&ko&z)dcB|o+iQB{_2dRBjsoo zJBDw|u(b$Jme3^g3{p!h-%x&FP#4)XW_n=K5$89Vp5&-Wb8X_D;d>p=6p9tSNzj4F8T z0gX>TU}SX%-R=QNUN~n$vga3T_$m*{=7UUAFoYsnJUos_sLVK$nk)r&;U!y4Bw-p~ z0#1{JJM{o}37LGAkAr269ZRral7NIOIymN1qJIYmFW_2%5P}gJ1~Y)=+L!s1DJW%j zjqIh2FO)gyDuaZQR1qwF7aeGT=AKDqtPuj8${X2qNSK#OgoLG(c&&QDHP zT<|;)MW(2q(Dx2eEv5?Z6&D}SIA&YWI40+WlH<98v;AxQ5OxJe-N$nXw97GJuN&a^C5Z@@S9He+AzBa zx}0PGBS;=v0aqRJU-%XjrP*RYn6pbdiW4q1#2}{}!--4zwMaS|BT_G1^AjF{2yG17 z&Nq%YtCoL8!6bD*j;uX8sB1%wyFco@?%cK#x-~jAR{&sHYcP?OM9&nNfUBq;BN}$7w z?jiJ6DQd8TEL3At=1?nTzAA5Aqw3Q(vDg7l{U_qw%rKs>1GXy^-Yf_IGp$1!@OTPf z)83Er>>^>Gfm{uO!s16JW3J~Tf-4AO@*)}Ck+#Ihhv{F*o=?w)0R!k!8y}4ryAi{h zWcCFCSTW}5!$eom8Ncg7hS${^GW6ginrsT=>_S$qxH+Nd{$ZKVj!fOZ>P$I?Vz0Z&K3Qxn+6R)a)IH+9@r>^1J#t%z?c2b1E!>AKuJA*K>>xv5hZL@U0bdCn8X+JF zcw-XXDGWaCMn$9LkUw|`b0YtPvFh=)eA)^`-&M(+j zru9kcFXURofaIf%?vmKv@cB3X&IsR#`8TG{L|@^b-~1%kS1=)?bCbhY-c7;ZvOkl4 zM03pslhZZ9J)=LBg6t{KmB^;a zsQ$pFB9LW97^Wl(lB|S=M9Rbi9JE5}bWNla;;|LG@_i_hS3tEI!lj_y4WQi-X!bB$ zidc;}=?h^;@$M-T+d9Vub=hYbk{phVsV8DrvukbGcqM-T#)d~jn