From b16ea02ccb0b6d2b72376e7396a8196ae0e83c0a Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Thu, 18 Jun 2026 01:04:55 +0300 Subject: [PATCH 1/6] Project ext --- .github/workflows/regression.yaml | 17 +- README.md | 46 +++ projects/extensions/README.md | 110 ++++++++ .../BulkOperation.java | 26 ++ .../ElasticsearchClient.java | 61 ++++ .../ElasticsearchIlmClient.java | 33 +++ .../IlmPolicy.java | 25 ++ .../IndexSettings_Builder.java | 33 +++ .../Query.java | 27 ++ .../SortOptions.java | 28 ++ .../Time.java | 26 ++ .../TypeMapping.java | 26 ++ .../com_fasterxml_jackson/JsonNode.java | 57 ++++ .../com_fasterxml_jackson/ObjectMapper.java | 55 ++++ .../Cache.java | 25 ++ .../LoadingCache.java | 28 ++ .../com_google_common/Cache.java | 27 ++ .../com_jayway_jsonpath/ReadContext.java | 53 ++++ .../com_rabbitmq_client/ShutdownNotifier.java | 22 ++ .../io_nats_client/Builder.java | 24 ++ .../io_nats_client/Connection.java | 24 ++ .../StreamingConnection.java | 74 +++++ .../approximations/java_lang/Iterable.java | 22 ++ .../approximations/java_lang/ThreadLocal.java | 20 ++ .../ConcurrentHashMap.java | 21 ++ .../ScheduledExecutorService.java | 34 +++ .../ScheduledThreadPoolExecutor.java | 20 ++ .../java_util_stream/Collectors.java | 84 ++++++ .../java_util_stream/IntStream.java | 24 ++ .../approximations/java_util/ArrayList.java | 33 +++ .../approximations/java_util/Collection.java | 23 ++ .../approximations/java_util/Comparator.java | 36 +++ .../approximations/java_util/HashMap.java | 57 ++++ .../approximations/java_util/Iterator.java | 21 ++ .../example/approximations/java_util/Map.java | 45 +++ .../approximations/java_util/Properties.java | 21 ++ .../SSLContextBuilder.java | 38 +++ .../SSLContextBuilder.java | 40 +++ .../org_apache_kafka/KafkaFutureApprox.java | 25 ++ .../org_apache_kafka/KafkaProducer.java | 23 ++ .../org_apache_kafka/Producer.java | 23 ++ .../org_elasticsearch/RestClientBuilder.java | 46 +++ .../BulkOperation.java | 26 ++ .../Query.java | 26 ++ .../SearchRequest_Builder.java | 52 ++++ .../org_opensearch/RestClientBuilder.java | 44 +++ .../RetryTemplate.java | 23 ++ .../RestClientBuilder.java | 33 +++ .../reactor_core_publisher/Flux.java | 49 ++++ .../reactor_core_publisher/Mono.java | 48 ++++ .../example/approximations/rx/Observable.java | 137 +++++++++ .../co.elastic.clients.elasticsearch.yaml | 13 + .../passthrough/com.azure.storage.blob.yaml | 43 +++ .../passthrough/com.datastax.driver.core.yaml | 13 + .../passthrough/com.fasterxml.jackson.yaml | 30 ++ .../com.github.benmanes.caffeine.cache.yaml | 23 ++ .../passthrough/com.google.protobuf.yaml | 132 +++++++++ .../conductor/passthrough/com.google.rpc.yaml | 31 +++ .../passthrough/com.jayway.jsonpath.yaml | 14 + .../passthrough/com.rabbitmq.client.yaml | 33 +++ .../conductor/passthrough/io.grpc.yaml | 23 ++ .../conductor/passthrough/io.nats.client.yaml | 28 ++ .../passthrough/jakarta.servlet.yaml | 15 + .../conductor/passthrough/java.io.yaml | 40 +++ .../conductor/passthrough/java.lang.yaml | 112 ++++++++ .../conductor/passthrough/java.sql.yaml | 51 ++++ .../conductor/passthrough/java.time.yaml | 25 ++ .../passthrough/java.util.concurrent.yaml | 75 +++++ .../passthrough/java.util.function.yaml | 26 ++ .../passthrough/java.util.stream.yaml | 29 ++ .../conductor/passthrough/java.util.yaml | 115 ++++++++ .../passthrough/net.thisptr.jackson.jq.yaml | 22 ++ .../passthrough/org.apache.commons.lang3.yaml | 67 +++++ .../passthrough/org.apache.commons.yaml | 28 ++ .../passthrough/org.apache.kafka.yaml | 25 ++ .../passthrough/org.elasticsearch.yaml | 167 +++++++++++ .../passthrough/org.graalvm.polyglot.yaml | 89 ++++++ .../org.opensearch.client.opensearch.yaml | 69 +++++ .../conductor/passthrough/org.opensearch.yaml | 168 +++++++++++ .../passthrough/org.redisson.api.yaml | 12 + .../org.springframework.data.redis.core.yaml | 22 ++ .../org.springframework.jdbc.core.yaml | 41 +++ .../org.springframework.web.passthrough.yaml | 33 +++ .../passthrough/org.springframework.yaml | 34 +++ .../passthrough/redis.clients.jedis.yaml | 263 ++++++++++++++++++ .../passthrough/software.amazon.awssdk.yaml | 95 +++++++ .../lib/generic/graalvm-polyglot-sinks.yaml | 29 ++ .../rules/java/lib/generic/jackson-jq.yaml | 16 ++ .../conductor-grpc-request-sources.yaml | 63 +++++ .../java/security/conductor-jq-injection.yaml | 28 ++ .../graaljs-polyglot-code-injection.yaml | 39 +++ projects/repos.yaml | 246 ++++++++-------- scripts/generate_matrix.py | 19 ++ scripts/run_analysis.py | 104 ++++++- tests/test_scripts.py | 151 +++++++++- 95 files changed, 4507 insertions(+), 135 deletions(-) create mode 100644 projects/extensions/README.md create mode 100644 projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/BulkOperation.java create mode 100644 projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/ElasticsearchClient.java create mode 100644 projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/ElasticsearchIlmClient.java create mode 100644 projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/IlmPolicy.java create mode 100644 projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/IndexSettings_Builder.java create mode 100644 projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/Query.java create mode 100644 projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/SortOptions.java create mode 100644 projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/Time.java create mode 100644 projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/TypeMapping.java create mode 100644 projects/extensions/conductor/dataflow-src/com.fasterxml.jackson/com/example/approximations/com_fasterxml_jackson/JsonNode.java create mode 100644 projects/extensions/conductor/dataflow-src/com.fasterxml.jackson/com/example/approximations/com_fasterxml_jackson/ObjectMapper.java create mode 100644 projects/extensions/conductor/dataflow-src/com.github.benmanes.caffeine.cache/com/example/approximations/com_github_benmanes_caffeine_cache/Cache.java create mode 100644 projects/extensions/conductor/dataflow-src/com.github.benmanes.caffeine.cache/com/example/approximations/com_github_benmanes_caffeine_cache/LoadingCache.java create mode 100644 projects/extensions/conductor/dataflow-src/com.google.common/com/example/approximations/com_google_common/Cache.java create mode 100644 projects/extensions/conductor/dataflow-src/com.jayway.jsonpath/com/example/approximations/com_jayway_jsonpath/ReadContext.java create mode 100644 projects/extensions/conductor/dataflow-src/com.rabbitmq.client/com/example/approximations/com_rabbitmq_client/ShutdownNotifier.java create mode 100644 projects/extensions/conductor/dataflow-src/io.nats.client/com/example/approximations/io_nats_client/Builder.java create mode 100644 projects/extensions/conductor/dataflow-src/io.nats.client/com/example/approximations/io_nats_client/Connection.java create mode 100644 projects/extensions/conductor/dataflow-src/io.nats.streaming/com/example/approximations/io_nats_streaming/StreamingConnection.java create mode 100644 projects/extensions/conductor/dataflow-src/java.lang/com/example/approximations/java_lang/Iterable.java create mode 100644 projects/extensions/conductor/dataflow-src/java.lang/com/example/approximations/java_lang/ThreadLocal.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ConcurrentHashMap.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ScheduledExecutorService.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ScheduledThreadPoolExecutor.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util.stream/com/example/approximations/java_util_stream/Collectors.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util.stream/com/example/approximations/java_util_stream/IntStream.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/ArrayList.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Collection.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Comparator.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/HashMap.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Iterator.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Map.java create mode 100644 projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Properties.java create mode 100644 projects/extensions/conductor/dataflow-src/org.apache.hc.core5.ssl/com/example/approximations/org_apache_hc_core5_ssl/SSLContextBuilder.java create mode 100644 projects/extensions/conductor/dataflow-src/org.apache.http.ssl/com/example/approximations/org_apache_http_ssl/SSLContextBuilder.java create mode 100644 projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/KafkaFutureApprox.java create mode 100644 projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/KafkaProducer.java create mode 100644 projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/Producer.java create mode 100644 projects/extensions/conductor/dataflow-src/org.elasticsearch/com/example/approximations/org_elasticsearch/RestClientBuilder.java create mode 100644 projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/BulkOperation.java create mode 100644 projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/Query.java create mode 100644 projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/SearchRequest_Builder.java create mode 100644 projects/extensions/conductor/dataflow-src/org.opensearch/com/example/approximations/org_opensearch/RestClientBuilder.java create mode 100644 projects/extensions/conductor/dataflow-src/org.springframework.retry/com/example/approximations/org_springframework_retry/RetryTemplate.java create mode 100644 projects/extensions/conductor/dataflow-src/org.springframework.web/com/example/approximations/org_springframework_web/RestClientBuilder.java create mode 100644 projects/extensions/conductor/dataflow-src/reactor.core.publisher/com/example/approximations/reactor_core_publisher/Flux.java create mode 100644 projects/extensions/conductor/dataflow-src/reactor.core.publisher/com/example/approximations/reactor_core_publisher/Mono.java create mode 100644 projects/extensions/conductor/dataflow-src/rx/com/example/approximations/rx/Observable.java create mode 100644 projects/extensions/conductor/passthrough/co.elastic.clients.elasticsearch.yaml create mode 100644 projects/extensions/conductor/passthrough/com.azure.storage.blob.yaml create mode 100644 projects/extensions/conductor/passthrough/com.datastax.driver.core.yaml create mode 100644 projects/extensions/conductor/passthrough/com.fasterxml.jackson.yaml create mode 100644 projects/extensions/conductor/passthrough/com.github.benmanes.caffeine.cache.yaml create mode 100644 projects/extensions/conductor/passthrough/com.google.protobuf.yaml create mode 100644 projects/extensions/conductor/passthrough/com.google.rpc.yaml create mode 100644 projects/extensions/conductor/passthrough/com.jayway.jsonpath.yaml create mode 100644 projects/extensions/conductor/passthrough/com.rabbitmq.client.yaml create mode 100644 projects/extensions/conductor/passthrough/io.grpc.yaml create mode 100644 projects/extensions/conductor/passthrough/io.nats.client.yaml create mode 100644 projects/extensions/conductor/passthrough/jakarta.servlet.yaml create mode 100644 projects/extensions/conductor/passthrough/java.io.yaml create mode 100644 projects/extensions/conductor/passthrough/java.lang.yaml create mode 100644 projects/extensions/conductor/passthrough/java.sql.yaml create mode 100644 projects/extensions/conductor/passthrough/java.time.yaml create mode 100644 projects/extensions/conductor/passthrough/java.util.concurrent.yaml create mode 100644 projects/extensions/conductor/passthrough/java.util.function.yaml create mode 100644 projects/extensions/conductor/passthrough/java.util.stream.yaml create mode 100644 projects/extensions/conductor/passthrough/java.util.yaml create mode 100644 projects/extensions/conductor/passthrough/net.thisptr.jackson.jq.yaml create mode 100644 projects/extensions/conductor/passthrough/org.apache.commons.lang3.yaml create mode 100644 projects/extensions/conductor/passthrough/org.apache.commons.yaml create mode 100644 projects/extensions/conductor/passthrough/org.apache.kafka.yaml create mode 100644 projects/extensions/conductor/passthrough/org.elasticsearch.yaml create mode 100644 projects/extensions/conductor/passthrough/org.graalvm.polyglot.yaml create mode 100644 projects/extensions/conductor/passthrough/org.opensearch.client.opensearch.yaml create mode 100644 projects/extensions/conductor/passthrough/org.opensearch.yaml create mode 100644 projects/extensions/conductor/passthrough/org.redisson.api.yaml create mode 100644 projects/extensions/conductor/passthrough/org.springframework.data.redis.core.yaml create mode 100644 projects/extensions/conductor/passthrough/org.springframework.jdbc.core.yaml create mode 100644 projects/extensions/conductor/passthrough/org.springframework.web.passthrough.yaml create mode 100644 projects/extensions/conductor/passthrough/org.springframework.yaml create mode 100644 projects/extensions/conductor/passthrough/redis.clients.jedis.yaml create mode 100644 projects/extensions/conductor/passthrough/software.amazon.awssdk.yaml create mode 100644 projects/extensions/conductor/rules/java/lib/generic/graalvm-polyglot-sinks.yaml create mode 100644 projects/extensions/conductor/rules/java/lib/generic/jackson-jq.yaml create mode 100644 projects/extensions/conductor/rules/java/lib/spring/conductor-grpc-request-sources.yaml create mode 100644 projects/extensions/conductor/rules/java/security/conductor-jq-injection.yaml create mode 100644 projects/extensions/conductor/rules/java/security/graaljs-polyglot-code-injection.yaml diff --git a/.github/workflows/regression.yaml b/.github/workflows/regression.yaml index ed9e8f0..952dd15 100644 --- a/.github/workflows/regression.yaml +++ b/.github/workflows/regression.yaml @@ -296,14 +296,21 @@ jobs: echo "key=$key" >> "$GITHUB_OUTPUT" - name: Run analysis id: ra + env: + # Forwarded via env to avoid YAML/shell-quoting pitfalls — the + # matrix value is a JSON-encoded list that can contain spaces, + # commas, or other shell-special characters. + SCAN_FLAGS_JSON: ${{ matrix.scan_flags }} run: | set +e python scripts/run_analysis.py \ - --build-dir build \ - --project-dir project-root \ - --results-dir results-bundle \ - --max-memory "${{ matrix.max_memory }}" \ - --timeout 1200 + --build-dir build \ + --project-dir project-root \ + --results-dir results-bundle \ + --max-memory "${{ matrix.max_memory }}" \ + --timeout 1200 \ + --extensions-dir projects/extensions \ + --scan-flags-json "$SCAN_FLAGS_JSON" rc=$? echo "rc=$rc" >> "$GITHUB_OUTPUT" # Exit 0 so cache/upload steps still run; we fail the job at the end diff --git a/README.md b/README.md index 1cddb0e..37e1255 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ Full diff detail is available in the `regression-diff` artifact. | --------------------------------- | ------------------------------------------------------------- | | `.github/workflows/regression.yaml` | Workflow: resolve → probe → build → analyze → compare. | | `projects/repos.yaml` | Benchmark project list (name, git URL, pinned head, etc.). | +| `projects/extensions/` | Files (passthroughs, approximations, custom rules…) referenced by per-project `scan-flags`. | | `scripts/build_opentaint.sh` | Build analyzer + autobuilder JARs and Go CLI from a checkout.| | `scripts/generate_matrix.py` | Expand `repos.yaml` into a GH Actions matrix. | | `scripts/run_analysis.py` | Run opentaint `compile` + `scan`, extract analyzer status. | @@ -73,6 +74,51 @@ cd new-test python -m pytest tests -v ``` +## Per-project `opentaint scan` flags + +Each entry in `projects/repos.yaml` may declare a `scan-flags` list whose +tokens are appended verbatim to the `opentaint scan` invocation. Use the +literal substring `{ext}` to reference files shipped in `projects/extensions/` +— the runner substitutes it with that directory's absolute path. Since the +substitution is plain string replacement, the resolved path may point at +either a **file** or a **directory** — whichever the underlying flag accepts +(e.g. `--passthrough-approximations` and `--dataflow-approximations` each +take a single file or a whole directory, and may be repeated): + +```yaml +- name: spring-petclinic + git: https://github.com/spring-projects/spring-petclinic.git + head: 3e1ce239f4488f20abda24441388a515ea55a815 + scan-flags: + - --passthrough-approximations # single YAML file + - "{ext}/spring-petclinic/passthroughs.yaml" + - --passthrough-approximations # …or repeat with a directory + - "{ext}/spring-petclinic/passthroughs" + - --dataflow-approximations # directory of approximations + - "{ext}/spring-petclinic/approximations" + - --rule-id + - java.taint.sql-injection +``` + +Flags reserved by the runner (`--analyzer-jar`, `--project-model`, +`--output`, `--timeout`, `--max-memory`, `--debug`, `--experimental`) must +not be repeated here. `--ruleset` is **not** reserved: the runner always +passes the built-in ruleset first, and any additional `--ruleset` entries +in `scan-flags` are merged with it by the analyzer (the flag is a +`stringArray`). Example — adding a custom YAML file and a whole directory +of rules: + +```yaml +scan-flags: + - --ruleset + - "{ext}/my-project/rules/sql-injection.yaml" + - --ruleset + - "{ext}/my-project/rules" +``` + +See [`projects/extensions/README.md`](projects/extensions/README.md) for +the layout convention. + ## Open items See `test-system-design-plan.md` §10. The exact spelling of the diff --git a/projects/extensions/README.md b/projects/extensions/README.md new file mode 100644 index 0000000..f52bb2b --- /dev/null +++ b/projects/extensions/README.md @@ -0,0 +1,110 @@ +# Project extensions + +This directory holds files referenced by the `scan-flags` of projects in +[`../repos.yaml`](../repos.yaml). It is mounted into the analyzer runner as +the **extensions directory** and can contain anything `opentaint scan` knows +how to consume — most commonly: + +| Subject | Typical `opentaint scan` flag | Accepts | +| ------------------------------ | ---------------------------------- | -------------------------------- | +| Pass-through approximations | `--passthrough-approximations` | YAML file **or** directory | +| Dataflow approximations | `--dataflow-approximations` | Class directory **or** Java sources directory | +| Custom YAML rules | `--ruleset` | YAML file **or** directory of `*.yml`/`*.yaml` | +| Rule-id filter | `--rule-id` | rule-id string (repeatable) | + +All path-valued flags above are *repeatable* and accept either a single file +or a directory — group as many or as few entries under `{ext}/...` as you +like, then point the flag at the file or the enclosing directory. + +## Layout convention + +Group files by project name to keep things tidy. Either point a flag at one +file, or at a directory and let opentaint pick up everything inside it: + +``` +projects/extensions/ +├── README.md +├── / +│ ├── passthroughs/ # ← pass directory to --passthrough-approximations +│ │ ├── jackson.yaml +│ │ └── spring.yaml +│ ├── single-passthrough.yaml # ← or pass one YAML file +│ └── approximations/ # ← pass directory to --dataflow-approximations +│ └── ... +└── shared/ + └── ... +``` + +## Referencing extension files from `repos.yaml` + +Use the literal token `{ext}` inside `scan-flags` — the runner substitutes it +with the absolute path of this directory at analysis time. The substitution +is pure string replacement, so the resolved path can point at a **file** or a +**directory**, whichever the flag accepts: + +```yaml +- name: spring-petclinic + git: https://github.com/spring-projects/spring-petclinic.git + head: 3e1ce239f4488f20abda24441388a515ea55a815 + scan-flags: + # Single YAML file: + - --passthrough-approximations + - "{ext}/spring-petclinic/single-passthrough.yaml" + # Whole directory of passthrough YAMLs (also valid): + - --passthrough-approximations + - "{ext}/spring-petclinic/passthroughs" + # Approximation classes / Java sources directory: + - --dataflow-approximations + - "{ext}/spring-petclinic/approximations" + # Plain flags without path arguments work too: + - --rule-id + - java.taint.sql-injection +``` + +The `--passthrough-approximations` and `--dataflow-approximations` flags are +repeatable — add the flag multiple times in `scan-flags` to point at several +files or directories. + +Flags that don't reference any extension file (e.g. `--rule-id`, +`--code-flow-limit`) work just as well — they're appended verbatim to the +`opentaint scan` invocation. + +## Custom rulesets + +The analyzer accepts **multiple** `--ruleset` arguments and merges them. The +runner always passes the built-in ruleset first; any additional `--ruleset` +entries in `scan-flags` are layered on top: + +```yaml +- name: spring-petclinic + git: https://github.com/spring-projects/spring-petclinic.git + head: 3e1ce239f4488f20abda24441388a515ea55a815 + scan-flags: + # Add a single custom YAML rules file: + - --ruleset + - "{ext}/spring-petclinic/rules/sql-injection.yaml" + # …and a whole directory of `*.yaml` / `*.yml` rule files: + - --ruleset + - "{ext}/spring-petclinic/rules" +``` + +Resulting analyzer command (conceptually): +`--ruleset --ruleset /.../sql-injection.yaml --ruleset /.../rules`. + +Use `--rule-id` to narrow which rules from those sets are actually run. + +## Reserved flags + +The runner already sets these and you should **not** repeat them in +`scan-flags`: + +- `--analyzer-jar` +- `--project-model` +- `--output` +- `--timeout` +- `--max-memory` +- `--debug` +- `--experimental` + +`--ruleset` is **not** reserved — the built-in pack is always supplied, and +additional `--ruleset` entries you add in `scan-flags` are merged with it. diff --git a/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/BulkOperation.java b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/BulkOperation.java new file mode 100644 index 0000000..0b8d7e8 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/BulkOperation.java @@ -0,0 +1,26 @@ +package com.example.approximations.co_elastic_clients_elasticsearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import java.util.function.Function; + +/** + * Dataflow approximation for + * co.elastic.clients.elasticsearch.core.bulk.BulkOperation. + * + * BulkOperation.of(fn) applies the function to a fresh BulkOperation.Builder + * and builds. Taint written into the builder reaches the built BulkOperation. + */ +@Approximate(co.elastic.clients.elasticsearch.core.bulk.BulkOperation.class) +public class BulkOperation { + + public static co.elastic.clients.elasticsearch.core.bulk.BulkOperation of( + @ArgumentTypeContext Function fn) throws Throwable { + co.elastic.clients.elasticsearch.core.bulk.BulkOperation.Builder builder = + new co.elastic.clients.elasticsearch.core.bulk.BulkOperation.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(builder); + return (co.elastic.clients.elasticsearch.core.bulk.BulkOperation) ob.build(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/ElasticsearchClient.java b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/ElasticsearchClient.java new file mode 100644 index 0000000..085fadf --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/ElasticsearchClient.java @@ -0,0 +1,61 @@ +package com.example.approximations.co_elastic_clients_elasticsearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import java.util.function.Function; + +/** + * Dataflow approximation for co.elastic.clients.elasticsearch.ElasticsearchClient + * fluent request methods (count / get / search). + * + * Each method hands a fresh request Builder to the supplied function, which + * configures it (closing over user-controlled data) and returns an + * ObjectBuilder; the client builds the request and sends it. The + * tainted request reaching Elasticsearch is the propagation of interest. We + * model it by building the request, pulling a tainted String field back out, + * and seeding the returned response with it so a downstream getter on the + * response observes the taint (e.g. response.id(), response.scrollId()). + */ +@Approximate(co.elastic.clients.elasticsearch.ElasticsearchClient.class) +public class ElasticsearchClient { + + // count(Function): build the CountRequest from the lambda. CountResponse + // has no String field to carry the tainted request value back out, so the + // built request is the propagation endpoint (the request reaching ES). + public co.elastic.clients.elasticsearch.core.CountResponse count( + @ArgumentTypeContext Function fn) throws Throwable { + co.elastic.clients.elasticsearch.core.CountRequest.Builder builder = + new co.elastic.clients.elasticsearch.core.CountRequest.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(builder); + final co.elastic.clients.elasticsearch.core.CountRequest request = + (co.elastic.clients.elasticsearch.core.CountRequest) ob.build(); + return co.elastic.clients.elasticsearch.core.CountResponse.of( + b -> b.count(request.q() == null ? 0L : (long) request.q().length())); + } + + public co.elastic.clients.elasticsearch.core.GetResponse get( + @ArgumentTypeContext Function fn, Class cls) throws Throwable { + co.elastic.clients.elasticsearch.core.GetRequest.Builder builder = + new co.elastic.clients.elasticsearch.core.GetRequest.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(builder); + final co.elastic.clients.elasticsearch.core.GetRequest request = + (co.elastic.clients.elasticsearch.core.GetRequest) ob.build(); + return co.elastic.clients.elasticsearch.core.GetResponse.of( + b -> b.id(request.id()).index(request.index()).found(true)); + } + + public co.elastic.clients.elasticsearch.core.SearchResponse search( + @ArgumentTypeContext Function fn, Class cls) throws Throwable { + co.elastic.clients.elasticsearch.core.SearchRequest.Builder builder = + new co.elastic.clients.elasticsearch.core.SearchRequest.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(builder); + final co.elastic.clients.elasticsearch.core.SearchRequest request = + (co.elastic.clients.elasticsearch.core.SearchRequest) ob.build(); + return co.elastic.clients.elasticsearch.core.SearchResponse.of( + b -> b.scrollId(request.q())); + } +} diff --git a/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/ElasticsearchIlmClient.java b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/ElasticsearchIlmClient.java new file mode 100644 index 0000000..883d0b2 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/ElasticsearchIlmClient.java @@ -0,0 +1,33 @@ +package com.example.approximations.co_elastic_clients_elasticsearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import java.util.function.Function; + +/** + * Dataflow approximation for + * co.elastic.clients.elasticsearch.ilm.ElasticsearchIlmClient#getLifecycle. + * + * getLifecycle(fn) hands a fresh GetLifecycleRequest.Builder to the function, + * builds the request and sends it. We build the request, read its tainted + * name(), and surface it through the response: GetLifecycleResponse exposes a + * Map result(); we seed that map under the tainted name key + * so response.result() carries the taint. + */ +@Approximate(co.elastic.clients.elasticsearch.ilm.ElasticsearchIlmClient.class) +public class ElasticsearchIlmClient { + + public co.elastic.clients.elasticsearch.ilm.GetLifecycleResponse getLifecycle( + @ArgumentTypeContext Function fn) throws Throwable { + co.elastic.clients.elasticsearch.ilm.GetLifecycleRequest.Builder builder = + new co.elastic.clients.elasticsearch.ilm.GetLifecycleRequest.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(builder); + final co.elastic.clients.elasticsearch.ilm.GetLifecycleRequest request = + (co.elastic.clients.elasticsearch.ilm.GetLifecycleRequest) ob.build(); + final String name = request.name(); + return co.elastic.clients.elasticsearch.ilm.GetLifecycleResponse.of( + b -> b.result(name, l -> l)); + } +} diff --git a/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/IlmPolicy.java b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/IlmPolicy.java new file mode 100644 index 0000000..7d872d7 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/IlmPolicy.java @@ -0,0 +1,25 @@ +package com.example.approximations.co_elastic_clients_elasticsearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import java.util.function.Function; + +/** + * Dataflow approximation for co.elastic.clients.elasticsearch.ilm.IlmPolicy. + * + * IlmPolicy.of(fn) applies the function to a fresh IlmPolicy.Builder and + * builds. Taint written into the builder reaches the built IlmPolicy. + */ +@Approximate(co.elastic.clients.elasticsearch.ilm.IlmPolicy.class) +public class IlmPolicy { + + public static co.elastic.clients.elasticsearch.ilm.IlmPolicy of( + @ArgumentTypeContext Function fn) throws Throwable { + co.elastic.clients.elasticsearch.ilm.IlmPolicy.Builder builder = + new co.elastic.clients.elasticsearch.ilm.IlmPolicy.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(builder); + return (co.elastic.clients.elasticsearch.ilm.IlmPolicy) ob.build(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/IndexSettings_Builder.java b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/IndexSettings_Builder.java new file mode 100644 index 0000000..91d1739 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/IndexSettings_Builder.java @@ -0,0 +1,33 @@ +package com.example.approximations.co_elastic_clients_elasticsearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import java.util.function.Function; + +/** + * Dataflow approximation for + * co.elastic.clients.elasticsearch.indices.IndexSettings.Builder#lifecycle(Function). + * + * The fluent overload applies the function to a fresh IndexSettingsLifecycle + * builder, builds the IndexSettingsLifecycle, stores it on this IndexSettings + * builder via the typed lifecycle(...) setter, and returns this. Taint the + * function writes into the lifecycle builder thus reaches the built + * IndexSettings (recoverable via settings.lifecycle()). + */ +@Approximate(co.elastic.clients.elasticsearch.indices.IndexSettings.Builder.class) +public class IndexSettings_Builder { + + public co.elastic.clients.elasticsearch.indices.IndexSettings.Builder lifecycle( + @ArgumentTypeContext Function fn) throws Throwable { + co.elastic.clients.elasticsearch.indices.IndexSettings.Builder self = + (co.elastic.clients.elasticsearch.indices.IndexSettings.Builder) (Object) this; + co.elastic.clients.elasticsearch.indices.IndexSettingsLifecycle.Builder lb = + new co.elastic.clients.elasticsearch.indices.IndexSettingsLifecycle.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(lb); + co.elastic.clients.elasticsearch.indices.IndexSettingsLifecycle lifecycle = + (co.elastic.clients.elasticsearch.indices.IndexSettingsLifecycle) ob.build(); + return self.lifecycle(lifecycle); + } +} diff --git a/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/Query.java b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/Query.java new file mode 100644 index 0000000..217478b --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/Query.java @@ -0,0 +1,27 @@ +package com.example.approximations.co_elastic_clients_elasticsearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import java.util.function.Function; + +/** + * Dataflow approximation for + * co.elastic.clients.elasticsearch._types.query_dsl.Query. + * + * Query.of(fn) applies the function to a fresh Query.Builder and builds. Taint + * written into the builder (e.g. a tainted query string) reaches the built + * Query. + */ +@Approximate(co.elastic.clients.elasticsearch._types.query_dsl.Query.class) +public class Query { + + public static co.elastic.clients.elasticsearch._types.query_dsl.Query of( + @ArgumentTypeContext Function fn) throws Throwable { + co.elastic.clients.elasticsearch._types.query_dsl.Query.Builder builder = + new co.elastic.clients.elasticsearch._types.query_dsl.Query.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(builder); + return (co.elastic.clients.elasticsearch._types.query_dsl.Query) ob.build(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/SortOptions.java b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/SortOptions.java new file mode 100644 index 0000000..681cb29 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/SortOptions.java @@ -0,0 +1,28 @@ +package com.example.approximations.co_elastic_clients_elasticsearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import java.util.function.Function; + +/** + * Dataflow approximation for co.elastic.clients.elasticsearch._types.SortOptions. + * + * SortOptions.of(fn) is a fluent builder factory: it hands a fresh Builder to + * the supplied function, which configures it (closing over user data) and + * returns an ObjectBuilder; the factory then calls build(). Taint + * the function writes into the builder ends up in the built SortOptions, so the + * model just applies the function and builds. + */ +@Approximate(co.elastic.clients.elasticsearch._types.SortOptions.class) +public class SortOptions { + + public static co.elastic.clients.elasticsearch._types.SortOptions of( + @ArgumentTypeContext Function fn) throws Throwable { + co.elastic.clients.elasticsearch._types.SortOptions.Builder builder = + new co.elastic.clients.elasticsearch._types.SortOptions.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(builder); + return (co.elastic.clients.elasticsearch._types.SortOptions) ob.build(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/Time.java b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/Time.java new file mode 100644 index 0000000..a9cee63 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/Time.java @@ -0,0 +1,26 @@ +package com.example.approximations.co_elastic_clients_elasticsearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import java.util.function.Function; + +/** + * Dataflow approximation for co.elastic.clients.elasticsearch._types.Time. + * + * Time.of(fn) applies the supplied function to a fresh Time.Builder and builds. + * Taint the function writes into the builder (e.g. time(tainted)) reaches the + * built Time. + */ +@Approximate(co.elastic.clients.elasticsearch._types.Time.class) +public class Time { + + public static co.elastic.clients.elasticsearch._types.Time of( + @ArgumentTypeContext Function fn) throws Throwable { + co.elastic.clients.elasticsearch._types.Time.Builder builder = + new co.elastic.clients.elasticsearch._types.Time.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(builder); + return (co.elastic.clients.elasticsearch._types.Time) ob.build(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/TypeMapping.java b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/TypeMapping.java new file mode 100644 index 0000000..11f900e --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/co.elastic.clients.elasticsearch/com/example/approximations/co_elastic_clients_elasticsearch/TypeMapping.java @@ -0,0 +1,26 @@ +package com.example.approximations.co_elastic_clients_elasticsearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import java.util.function.Function; + +/** + * Dataflow approximation for + * co.elastic.clients.elasticsearch._types.mapping.TypeMapping. + * + * TypeMapping.of(fn) applies the function to a fresh TypeMapping.Builder and + * builds. Taint written into the builder reaches the built TypeMapping. + */ +@Approximate(co.elastic.clients.elasticsearch._types.mapping.TypeMapping.class) +public class TypeMapping { + + public static co.elastic.clients.elasticsearch._types.mapping.TypeMapping of( + @ArgumentTypeContext Function fn) throws Throwable { + co.elastic.clients.elasticsearch._types.mapping.TypeMapping.Builder builder = + new co.elastic.clients.elasticsearch._types.mapping.TypeMapping.Builder(); + co.elastic.clients.util.ObjectBuilder ob = + (co.elastic.clients.util.ObjectBuilder) fn.apply(builder); + return (co.elastic.clients.elasticsearch._types.mapping.TypeMapping) ob.build(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/com.fasterxml.jackson/com/example/approximations/com_fasterxml_jackson/JsonNode.java b/projects/extensions/conductor/dataflow-src/com.fasterxml.jackson/com/example/approximations/com_fasterxml_jackson/JsonNode.java new file mode 100644 index 0000000..145d13f --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/com.fasterxml.jackson/com/example/approximations/com_fasterxml_jackson/JsonNode.java @@ -0,0 +1,57 @@ +package com.example.approximations.com_fasterxml_jackson; + +import org.opentaint.ir.approximation.annotation.Approximate; + +import java.util.ArrayList; +import java.util.List; +import java.util.Iterator; +import java.util.Map; +import java.util.AbstractMap; + +/** + * Dataflow approximation for com.fasterxml.jackson.databind.JsonNode. + * + * Models the methods whose taint flows through an iterator / container of + * child nodes rather than a flat copy: + * + * - elements(): Iterator over the child nodes. Taint on the parent + * node (this) reaches every yielded child node. + * - fields(): Iterator> over (name, child) pairs. + * Taint on the parent node (this) reaches the value of every yielded entry. + * + * Each child node placed in the iterator is `this` itself, so the parent's + * taint propagates to the elements; a downstream extractor (asText()) then + * surfaces it. asText() is modelled as a this->return copy so the tainted + * child node yields a tainted value. + */ +@Approximate(com.fasterxml.jackson.databind.JsonNode.class) +public class JsonNode { + + // elements() -> Iterator; parent taint reaches each child node. + public Iterator elements() { + com.fasterxml.jackson.databind.JsonNode self = + (com.fasterxml.jackson.databind.JsonNode) (Object) this; + List children = + new ArrayList(); + children.add(self); + return children.iterator(); + } + + // fields() -> Iterator>; parent taint reaches + // each entry value. + public Iterator fields() { + com.fasterxml.jackson.databind.JsonNode self = + (com.fasterxml.jackson.databind.JsonNode) (Object) this; + List> entries = + new ArrayList>(); + entries.add(new AbstractMap.SimpleEntry("k", self)); + return entries.iterator(); + } + + // asText() -> textual value held by this node (this -> return). + public String asText() { + com.fasterxml.jackson.databind.JsonNode self = + (com.fasterxml.jackson.databind.JsonNode) (Object) this; + return String.valueOf(self); + } +} diff --git a/projects/extensions/conductor/dataflow-src/com.fasterxml.jackson/com/example/approximations/com_fasterxml_jackson/ObjectMapper.java b/projects/extensions/conductor/dataflow-src/com.fasterxml.jackson/com/example/approximations/com_fasterxml_jackson/ObjectMapper.java new file mode 100644 index 0000000..9ab4595 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/com.fasterxml.jackson/com/example/approximations/com_fasterxml_jackson/ObjectMapper.java @@ -0,0 +1,55 @@ +package com.example.approximations.com_fasterxml_jackson; + +import org.opentaint.ir.approximation.annotation.Approximate; + +import java.util.ArrayList; +import java.util.List; + +/** + * Dataflow approximation for com.fasterxml.jackson.databind.ObjectMapper. + * + * Models the binding/deserialization methods whose taint flows through a + * generic / collection container rather than a flat copy: + * + * - convertValue(Object, TypeReference): the source object is re-bound into + * the target generic type; taint on the input object reaches the elements + * of the produced container. + * - readValue(String, TypeReference): the tainted JSON text is deserialized + * into the target generic type (e.g. List); taint on the text + * reaches the produced container's elements. + * - readTree(String): the tainted JSON text is parsed into a JsonNode tree; + * taint reaches the node so it can be pulled back out via the node's + * accessors / iterators (fields(), elements(), asText()). + * + * The returned containers are built so a downstream extractor (List.get(i), + * the JsonNode iterators, etc.) can recover the tainted value. + */ +@Approximate(com.fasterxml.jackson.databind.ObjectMapper.class) +public class ObjectMapper { + + // convertValue(Object, TypeReference) -> generic container holding the + // re-bound input. Erased return type is Object. + public Object convertValue(Object fromValue, + com.fasterxml.jackson.core.type.TypeReference toValueTypeRef) { + List container = new ArrayList(); + container.add(fromValue); + return container; + } + + // readValue(String, TypeReference) -> generic container deserialized from + // the tainted JSON text. Erased return type is Object. + public Object readValue(String content, + com.fasterxml.jackson.core.type.TypeReference valueTypeRef) + throws java.io.IOException { + List container = new ArrayList(); + container.add(content); + return container; + } + + // readTree(String) -> JsonNode tree carrying the tainted text. Re-wrap into + // a JsonNode whose accessors/iterators surface the taint. + public com.fasterxml.jackson.databind.JsonNode readTree(String content) + throws com.fasterxml.jackson.core.JsonProcessingException { + return com.fasterxml.jackson.databind.node.TextNode.valueOf(content); + } +} diff --git a/projects/extensions/conductor/dataflow-src/com.github.benmanes.caffeine.cache/com/example/approximations/com_github_benmanes_caffeine_cache/Cache.java b/projects/extensions/conductor/dataflow-src/com.github.benmanes.caffeine.cache/com/example/approximations/com_github_benmanes_caffeine_cache/Cache.java new file mode 100644 index 0000000..3de0f97 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/com.github.benmanes.caffeine.cache/com/example/approximations/com_github_benmanes_caffeine_cache/Cache.java @@ -0,0 +1,25 @@ +package com.example.approximations.com_github_benmanes_caffeine_cache; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Function; + +/** + * Model for Cache.get(key, mappingFunction): on a cache miss the mappingFunction is + * invoked with the key, its result stored under the key and returned. We route a + * tainted key through the function into the returned value. We do NOT store the + * result back, so taint stays on the returned value and the key it was loaded for. + */ +@Approximate(com.github.benmanes.caffeine.cache.Cache.class) +public class Cache { + + // get(K, Function): V + public Object get(Object key, @ArgumentTypeContext Function mappingFunction) { + if (OpentaintNdUtil.nextBool()) { + return null; + } + return mappingFunction.apply(key); + } +} diff --git a/projects/extensions/conductor/dataflow-src/com.github.benmanes.caffeine.cache/com/example/approximations/com_github_benmanes_caffeine_cache/LoadingCache.java b/projects/extensions/conductor/dataflow-src/com.github.benmanes.caffeine.cache/com/example/approximations/com_github_benmanes_caffeine_cache/LoadingCache.java new file mode 100644 index 0000000..ac12c4c --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/com.github.benmanes.caffeine.cache/com/example/approximations/com_github_benmanes_caffeine_cache/LoadingCache.java @@ -0,0 +1,28 @@ +package com.example.approximations.com_github_benmanes_caffeine_cache; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +/** + * Dataflow approximation for LoadingCache.get(key). + * + * A LoadingCache is built with a CacheLoader (Caffeine.build(loader)); on a miss + * get(key) invokes loader.load(key) and caches/returns the result. The loader is + * supplied at build time, far from this call, so we cannot reach it here. What + * matters for taint is that the loaded value is derived from the key + * (e.g. JsonQuery::compile(queryExpression) yields a query embedding the key), so + * a tainted key produces a tainted loaded value. We model that key -> result + * propagation directly. We do NOT store it back under a key, so taint does not + * leak to reads under other keys. + */ +@Approximate(com.github.benmanes.caffeine.cache.LoadingCache.class) +public class LoadingCache { + + // get(K): V — the loaded value is derived from the (tainted) key. + public Object get(Object key) { + if (OpentaintNdUtil.nextBool()) { + return null; + } + return key; + } +} diff --git a/projects/extensions/conductor/dataflow-src/com.google.common/com/example/approximations/com_google_common/Cache.java b/projects/extensions/conductor/dataflow-src/com.google.common/com/example/approximations/com_google_common/Cache.java new file mode 100644 index 0000000..7fdb52a --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/com.google.common/com/example/approximations/com_google_common/Cache.java @@ -0,0 +1,27 @@ +package com.example.approximations.com_google_common; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.concurrent.Callable; + +@Approximate(com.google.common.cache.Cache.class) +public class Cache { + + // Model: Cache.get(key, valueLoader) — on a cache miss the valueLoader Callable + // is invoked, its result is stored under the key and returned. We route taint + // from the Callable's result to the returned value (and store it back so a later + // getIfPresent under the same key observes it too). + public Object get(Object key, @ArgumentTypeContext Callable valueLoader) throws Throwable { + com.google.common.cache.Cache self = (com.google.common.cache.Cache) (Object) this; + if (OpentaintNdUtil.nextBool()) { + // Cache hit: return the already-cached value for this key. + return self.getIfPresent(key); + } + // Cache miss: load the value, cache it, and return it. + Object loaded = valueLoader.call(); + self.put(key, loaded); + return loaded; + } +} diff --git a/projects/extensions/conductor/dataflow-src/com.jayway.jsonpath/com/example/approximations/com_jayway_jsonpath/ReadContext.java b/projects/extensions/conductor/dataflow-src/com.jayway.jsonpath/com/example/approximations/com_jayway_jsonpath/ReadContext.java new file mode 100644 index 0000000..2756d31 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/com.jayway.jsonpath/com/example/approximations/com_jayway_jsonpath/ReadContext.java @@ -0,0 +1,53 @@ +package com.example.approximations.com_jayway_jsonpath; + +import org.opentaint.ir.approximation.annotation.Approximate; + +/** + * Dataflow approximation for com.jayway.jsonpath.ReadContext. + * + * A ReadContext wraps a parsed JSON document (obtained via JsonPath.parse(...), + * whose taint flows into the context). The read(...) overloads evaluate a path + * expression against that document and return the selected fragment, so taint on + * the underlying document carried by the context (this) flows to the read result. + * + * Each read overload is modelled as this -> result: it returns the context + * object itself, which carries the document's taint. The erased return type of + * every overload is the type variable T, i.e. java.lang.Object, so a downstream + * consumer of the fragment sees the propagated taint. + * + * The Predicate... / TypeRef / Class / JsonPath arguments only select or shape the + * fragment; they do not introduce or remove taint, so they are not copied. + */ +@Approximate(com.jayway.jsonpath.ReadContext.class) +public class ReadContext { + + // read(String path, Predicate... filters) -> selected fragment of the document. + public Object read(String path, com.jayway.jsonpath.Predicate... filters) { + return (Object) this; + } + + // read(String path, Class type, Predicate... filters) -> typed fragment. + public Object read(String path, Class type, com.jayway.jsonpath.Predicate... filters) { + return (Object) this; + } + + // read(String path, TypeRef type) -> generically-typed fragment. + public Object read(String path, com.jayway.jsonpath.TypeRef type) { + return (Object) this; + } + + // read(JsonPath path) -> selected fragment of the document. + public Object read(com.jayway.jsonpath.JsonPath path) { + return (Object) this; + } + + // read(JsonPath path, Class type) -> typed fragment. + public Object read(com.jayway.jsonpath.JsonPath path, Class type) { + return (Object) this; + } + + // read(JsonPath path, TypeRef type) -> generically-typed fragment. + public Object read(com.jayway.jsonpath.JsonPath path, com.jayway.jsonpath.TypeRef type) { + return (Object) this; + } +} diff --git a/projects/extensions/conductor/dataflow-src/com.rabbitmq.client/com/example/approximations/com_rabbitmq_client/ShutdownNotifier.java b/projects/extensions/conductor/dataflow-src/com.rabbitmq.client/com/example/approximations/com_rabbitmq_client/ShutdownNotifier.java new file mode 100644 index 0000000..446c833 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/com.rabbitmq.client/com/example/approximations/com_rabbitmq_client/ShutdownNotifier.java @@ -0,0 +1,22 @@ +package com.example.approximations.com_rabbitmq_client; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import com.rabbitmq.client.ShutdownListener; + +@Approximate(com.rabbitmq.client.ShutdownNotifier.class) +public class ShutdownNotifier { + + // Model: when shutdown occurs the registered listener is invoked with this + // notifier's close reason (a ShutdownSignalException). Taint carried by the + // notifier's state therefore flows into the listener callback argument. + public void addShutdownListener(@ArgumentTypeContext ShutdownListener listener) { + com.rabbitmq.client.ShutdownNotifier self = + (com.rabbitmq.client.ShutdownNotifier) (Object) this; + if (OpentaintNdUtil.nextBool()) { + listener.shutdownCompleted(self.getCloseReason()); + } + } +} diff --git a/projects/extensions/conductor/dataflow-src/io.nats.client/com/example/approximations/io_nats_client/Builder.java b/projects/extensions/conductor/dataflow-src/io.nats.client/com/example/approximations/io_nats_client/Builder.java new file mode 100644 index 0000000..5453d24 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/io.nats.client/com/example/approximations/io_nats_client/Builder.java @@ -0,0 +1,24 @@ +package com.example.approximations.io_nats_client; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import io.nats.client.ConnectionListener; + +@Approximate(io.nats.client.Options.Builder.class) +public class Builder { + + // Model: connectionListener registers a ConnectionListener that the NATS + // client later invokes with connection events (a source-side callback). + // Invoking the listener surfaces any value its closure carries to the sink. + // Returns the builder itself for fluent chaining (preserve container taint). + public io.nats.client.Options.Builder connectionListener( + @ArgumentTypeContext ConnectionListener listener) { + io.nats.client.Options.Builder self = (io.nats.client.Options.Builder) (Object) this; + if (OpentaintNdUtil.nextBool()) { + listener.connectionEvent(null, ConnectionListener.Events.CONNECTED); + } + return self; + } +} diff --git a/projects/extensions/conductor/dataflow-src/io.nats.client/com/example/approximations/io_nats_client/Connection.java b/projects/extensions/conductor/dataflow-src/io.nats.client/com/example/approximations/io_nats_client/Connection.java new file mode 100644 index 0000000..b543a97 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/io.nats.client/com/example/approximations/io_nats_client/Connection.java @@ -0,0 +1,24 @@ +package com.example.approximations.io_nats_client; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import io.nats.client.MessageHandler; + +@Approximate(io.nats.client.Connection.class) +public class Connection { + + // Model: createDispatcher registers a MessageHandler that the NATS client + // later invokes with inbound messages (a source-side callback). Invoking + // the handler surfaces any value the handler closure carries to the sink, + // and delivers a message the client received from the connection. + public io.nats.client.Dispatcher createDispatcher(@ArgumentTypeContext MessageHandler handler) + throws Throwable { + io.nats.client.Connection self = (io.nats.client.Connection) (Object) this; + if (OpentaintNdUtil.nextBool()) { + handler.onMessage(null); + } + return self.createDispatcher(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/io.nats.streaming/com/example/approximations/io_nats_streaming/StreamingConnection.java b/projects/extensions/conductor/dataflow-src/io.nats.streaming/com/example/approximations/io_nats_streaming/StreamingConnection.java new file mode 100644 index 0000000..6699133 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/io.nats.streaming/com/example/approximations/io_nats_streaming/StreamingConnection.java @@ -0,0 +1,74 @@ +package com.example.approximations.io_nats_streaming; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import io.nats.streaming.MessageHandler; +import io.nats.streaming.Subscription; +import io.nats.streaming.SubscriptionOptions; + +/** + * Dataflow approximation for io.nats.streaming.StreamingConnection + * (io.nats:java-nats-streaming:2.2.3). + * + * subscribe(...) registers a MessageHandler that the broker invokes + * asynchronously, delivering each received Message into onMessage(Message). + * This is a SOURCE-SIDE callback: the connection (this) is the broker-facing + * endpoint, and the handler runs against broker-delivered data. + * + * Model: invoke the registered handler so the analyzer carries taint through + * the callback boundary. A value captured in the handler's closure surfaces + * when onMessage runs (the conductor *ObservableQueue receive idiom). The + * Message argument itself is delivered null because io.nats.streaming.Message + * has no constructor accessible outside its package, so a tainted Message + * instance cannot be synthesised here (see tests_passing note). + * + * Lives in package com.example.approximations (NOT io.nats.streaming) so + * @Approximate resolves the target class without colliding with it. + */ +@Approximate(io.nats.streaming.StreamingConnection.class) +public class StreamingConnection { + + // subscribe(String subject, MessageHandler cb) : Subscription + public Subscription subscribe(String subject, + @ArgumentTypeContext MessageHandler cb) { + deliver(cb); + return null; + } + + // subscribe(String subject, MessageHandler cb, SubscriptionOptions opts) : Subscription + public Subscription subscribe(String subject, + @ArgumentTypeContext MessageHandler cb, + SubscriptionOptions opts) { + deliver(cb); + return null; + } + + // subscribe(String subject, String queue, MessageHandler cb) : Subscription + public Subscription subscribe(String subject, String queue, + @ArgumentTypeContext MessageHandler cb) { + deliver(cb); + return null; + } + + // subscribe(String subject, String queue, MessageHandler cb, + // SubscriptionOptions opts) : Subscription + public Subscription subscribe(String subject, String queue, + @ArgumentTypeContext MessageHandler cb, + SubscriptionOptions opts) { + deliver(cb); + return null; + } + + // Drives the registered handler the way the broker would, so the analyzer + // carries taint across the asynchronous callback boundary into onMessage. + private void deliver(MessageHandler cb) { + if (cb == null) { + return; + } + if (OpentaintNdUtil.nextBool()) { + cb.onMessage(null); + } + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.lang/com/example/approximations/java_lang/Iterable.java b/projects/extensions/conductor/dataflow-src/java.lang/com/example/approximations/java_lang/Iterable.java new file mode 100644 index 0000000..8b4a0d7 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.lang/com/example/approximations/java_lang/Iterable.java @@ -0,0 +1,22 @@ +package com.example.approximations.java_lang; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Consumer; + +@Approximate(java.lang.Iterable.class) +public class Iterable { + + // Model: each tainted element flows into the consumer. + public void forEach(@ArgumentTypeContext Consumer action) { + java.lang.Iterable self = (java.lang.Iterable) (Object) this; + if (OpentaintNdUtil.nextBool()) { + java.util.Iterator it = self.iterator(); + while (it.hasNext()) { + action.accept(it.next()); + } + } + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.lang/com/example/approximations/java_lang/ThreadLocal.java b/projects/extensions/conductor/dataflow-src/java.lang/com/example/approximations/java_lang/ThreadLocal.java new file mode 100644 index 0000000..222ecda --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.lang/com/example/approximations/java_lang/ThreadLocal.java @@ -0,0 +1,20 @@ +package com.example.approximations.java_lang; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Supplier; + +@Approximate(java.lang.ThreadLocal.class) +public class ThreadLocal { + + // Model: the supplier result becomes the ThreadLocal's value, retrievable via get(). + public static java.lang.ThreadLocal withInitial(@ArgumentTypeContext Supplier supplier) { + java.lang.ThreadLocal result = new java.lang.ThreadLocal(); + if (OpentaintNdUtil.nextBool()) { + result.set(supplier.get()); + } + return result; + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ConcurrentHashMap.java b/projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ConcurrentHashMap.java new file mode 100644 index 0000000..cad1c97 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ConcurrentHashMap.java @@ -0,0 +1,21 @@ +package com.example.approximations.java_util_concurrent; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Function; + +@Approximate(java.util.concurrent.ConcurrentHashMap.class) +public class ConcurrentHashMap { + + // Model: the (tainted) key flows into the mapping function, whose result is + // returned. We do NOT store the result back into the map, so taint stays on + // the returned value and does not leak to reads under other keys. + public Object computeIfAbsent(Object key, @ArgumentTypeContext Function mappingFunction) { + if (OpentaintNdUtil.nextBool()) { + return null; + } + return mappingFunction.apply(key); + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ScheduledExecutorService.java b/projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ScheduledExecutorService.java new file mode 100644 index 0000000..c87e756 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ScheduledExecutorService.java @@ -0,0 +1,34 @@ +package com.example.approximations.java_util_concurrent; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.concurrent.TimeUnit; + +@Approximate(java.util.concurrent.ScheduledExecutorService.class) +public class ScheduledExecutorService { + + // Model: the submitted Runnable captures tainted state; running it lets that + // taint surface at whatever the lambda body touches (callback boundary). + public java.util.concurrent.ScheduledFuture schedule(@ArgumentTypeContext Runnable command, long delay, TimeUnit unit) { + if (OpentaintNdUtil.nextBool()) { + command.run(); + } + return null; + } + + public java.util.concurrent.ScheduledFuture scheduleAtFixedRate(@ArgumentTypeContext Runnable command, long initialDelay, long period, TimeUnit unit) { + if (OpentaintNdUtil.nextBool()) { + command.run(); + } + return null; + } + + public java.util.concurrent.ScheduledFuture scheduleWithFixedDelay(@ArgumentTypeContext Runnable command, long initialDelay, long delay, TimeUnit unit) { + if (OpentaintNdUtil.nextBool()) { + command.run(); + } + return null; + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ScheduledThreadPoolExecutor.java b/projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ScheduledThreadPoolExecutor.java new file mode 100644 index 0000000..ac92bf3 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util.concurrent/com/example/approximations/java_util_concurrent/ScheduledThreadPoolExecutor.java @@ -0,0 +1,20 @@ +package com.example.approximations.java_util_concurrent; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.concurrent.TimeUnit; + +@Approximate(java.util.concurrent.ScheduledThreadPoolExecutor.class) +public class ScheduledThreadPoolExecutor { + + // Model: the submitted Runnable captures tainted state; running it lets that + // taint surface at whatever the lambda body touches (callback boundary). + public java.util.concurrent.ScheduledFuture schedule(@ArgumentTypeContext Runnable command, long delay, TimeUnit unit) { + if (OpentaintNdUtil.nextBool()) { + command.run(); + } + return null; + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util.stream/com/example/approximations/java_util_stream/Collectors.java b/projects/extensions/conductor/dataflow-src/java.util.stream/com/example/approximations/java_util_stream/Collectors.java new file mode 100644 index 0000000..19fe1a3 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util.stream/com/example/approximations/java_util_stream/Collectors.java @@ -0,0 +1,84 @@ +package com.example.approximations.java_util_stream; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.BiConsumer; +import java.util.function.BinaryOperator; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collector; + +@Approximate(java.util.stream.Collectors.class) +public class Collectors { + + // Build a Collector whose supplier returns the already-populated (possibly + // tainted) container, so that Stream.collect surfaces the tainted result. + private static Collector carrying(final Object container) { + Supplier supplier = new Supplier() { + public Object get() { + return container; + } + }; + BiConsumer accumulator = new BiConsumer() { + public void accept(Object a, Object b) { + } + }; + BinaryOperator combiner = new BinaryOperator() { + public Object apply(Object a, Object b) { + return a; + } + }; + return Collector.of(supplier, accumulator, combiner); + } + + // Model: the value function's result becomes a map value; the key function's + // result becomes a map key. Apply both to a sample element so taint on the + // captured data flows through the lambdas into the collected Map. + public static Collector toMap(@ArgumentTypeContext Function keyMapper, + @ArgumentTypeContext Function valueMapper) { + Map result = new HashMap(); + Object element = OpentaintNdUtil.nextBool() ? null : ""; + result.put(keyMapper.apply(element), valueMapper.apply(element)); + return carrying(result); + } + + public static Collector toMap(@ArgumentTypeContext Function keyMapper, + @ArgumentTypeContext Function valueMapper, + @ArgumentTypeContext BinaryOperator mergeFunction) { + return toMap(keyMapper, valueMapper); + } + + public static Collector toMap(@ArgumentTypeContext Function keyMapper, + @ArgumentTypeContext Function valueMapper, + @ArgumentTypeContext BinaryOperator mergeFunction, + @ArgumentTypeContext Supplier mapSupplier) { + Map result = (Map) mapSupplier.get(); + Object element = OpentaintNdUtil.nextBool() ? null : ""; + result.put(keyMapper.apply(element), valueMapper.apply(element)); + return carrying(result); + } + + // Model: the classifier's result becomes a map key; each element is grouped + // into the list value under its key. + public static Collector groupingBy(@ArgumentTypeContext Function classifier) { + Map result = new HashMap(); + Object element = OpentaintNdUtil.nextBool() ? null : ""; + Object key = classifier.apply(element); + List bucket = new ArrayList(); + bucket.add(element); + result.put(key, bucket); + return carrying(result); + } + + // Model: elements flow into the collection produced by the supplier. + public static Collector toCollection(@ArgumentTypeContext Supplier collectionFactory) { + Object result = collectionFactory.get(); + return carrying(result); + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util.stream/com/example/approximations/java_util_stream/IntStream.java b/projects/extensions/conductor/dataflow-src/java.util.stream/com/example/approximations/java_util_stream/IntStream.java new file mode 100644 index 0000000..f734afe --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util.stream/com/example/approximations/java_util_stream/IntStream.java @@ -0,0 +1,24 @@ +package com.example.approximations.java_util_stream; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.IntFunction; +import java.util.stream.Stream; + +@Approximate(java.util.stream.IntStream.class) +public class IntStream { + + // Model: each int element flows through the IntFunction; the produced object + // becomes an element of the returned Stream. We apply the mapper to a sample + // element and wrap the result so a downstream Stream operation can pull it out. + public Stream mapToObj(@ArgumentTypeContext IntFunction mapper) { + java.util.stream.IntStream self = (java.util.stream.IntStream) (Object) this; + if (OpentaintNdUtil.nextBool()) { + return Stream.empty(); + } + Object mapped = mapper.apply(self.findFirst().orElse(0)); + return Stream.of(mapped); + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/ArrayList.java b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/ArrayList.java new file mode 100644 index 0000000..5d51f79 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/ArrayList.java @@ -0,0 +1,33 @@ +package com.example.approximations.java_util; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Consumer; +import java.util.function.Predicate; + +@Approximate(java.util.ArrayList.class) +public class ArrayList { + + // Model: each tainted element flows into the consumer. + public void forEach(@ArgumentTypeContext Consumer action) { + java.util.ArrayList self = (java.util.ArrayList) (Object) this; + if (OpentaintNdUtil.nextBool()) { + for (Object e : self) { + action.accept(e); + } + } + } + + // Model: each tainted element flows into the predicate. + public boolean removeIf(@ArgumentTypeContext Predicate filter) { + java.util.ArrayList self = (java.util.ArrayList) (Object) this; + if (OpentaintNdUtil.nextBool()) { + for (Object e : self) { + filter.test(e); + } + } + return OpentaintNdUtil.nextBool(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Collection.java b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Collection.java new file mode 100644 index 0000000..d5568cc --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Collection.java @@ -0,0 +1,23 @@ +package com.example.approximations.java_util; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Predicate; + +@Approximate(java.util.Collection.class) +public class Collection { + + // Model: each tainted element flows into the predicate. + public boolean removeIf(@ArgumentTypeContext Predicate filter) { + java.util.Collection self = (java.util.Collection) (Object) this; + if (OpentaintNdUtil.nextBool()) { + java.util.Iterator it = self.iterator(); + while (it.hasNext()) { + filter.test(it.next()); + } + } + return OpentaintNdUtil.nextBool(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Comparator.java b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Comparator.java new file mode 100644 index 0000000..d6a359b --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Comparator.java @@ -0,0 +1,36 @@ +package com.example.approximations.java_util; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import java.util.function.Function; +import java.util.function.ToIntFunction; + +@Approximate(java.util.Comparator.class) +public class Comparator { + + // Model: the returned Comparator routes each compared element through the + // captured key-extractor Function, so element taint flows into arg(0) when + // the comparator is later used (e.g. via compare()/sort()). + // + // NOTE: the analyzer's IFDS summaries do not follow a callback that is + // invoked through a functional object RETURNED by an approximation at a + // later, separate call site, so this element->keyExtractor flow is not + // propagated in practice (verified: removing this class changes nothing). + // The body still records the correct propagation intent. + public static java.util.Comparator comparing(@ArgumentTypeContext final Function keyExtractor) { + return (a, b) -> { + keyExtractor.apply(a); + keyExtractor.apply(b); + return 0; + }; + } + + public static java.util.Comparator comparingInt(@ArgumentTypeContext final ToIntFunction keyExtractor) { + return (a, b) -> { + keyExtractor.applyAsInt(a); + keyExtractor.applyAsInt(b); + return 0; + }; + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/HashMap.java b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/HashMap.java new file mode 100644 index 0000000..7af5fcd --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/HashMap.java @@ -0,0 +1,57 @@ +package com.example.approximations.java_util; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Function; + +@Approximate(java.util.HashMap.class) +public class HashMap { + + // Model: the (tainted) key flows into the mapping function, whose result is + // returned. We do NOT store the result back into the map, so taint stays on + // the returned value and does not leak to reads under other keys. + public Object computeIfAbsent(Object key, @ArgumentTypeContext Function mappingFunction) { + if (OpentaintNdUtil.nextBool()) { + return null; + } + return mappingFunction.apply(key); + } + + // Model: the (tainted) new value flows into the remapping BiFunction; its + // result is stored and returned. + public Object merge(Object key, Object value, @ArgumentTypeContext BiFunction remappingFunction) { + java.util.HashMap self = (java.util.HashMap) (Object) this; + if (OpentaintNdUtil.nextBool()) { + self.put(key, value); + return value; + } + Object result = remappingFunction.apply(self.get(key), value); + self.put(key, result); + return result; + } + + // Model: each stored key/value flows into the BiConsumer. + public void forEach(@ArgumentTypeContext BiConsumer action) { + java.util.HashMap self = (java.util.HashMap) (Object) this; + if (OpentaintNdUtil.nextBool()) { + for (Object key : self.keySet()) { + action.accept(key, self.get(key)); + } + } + } + + // Model: each key/value flows into the BiFunction; its result is stored back. + public void replaceAll(@ArgumentTypeContext BiFunction function) { + java.util.HashMap self = (java.util.HashMap) (Object) this; + if (OpentaintNdUtil.nextBool()) { + for (Object key : self.keySet()) { + Object newValue = function.apply(key, self.get(key)); + self.put(key, newValue); + } + } + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Iterator.java b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Iterator.java new file mode 100644 index 0000000..895d926 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Iterator.java @@ -0,0 +1,21 @@ +package com.example.approximations.java_util; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Consumer; + +@Approximate(java.util.Iterator.class) +public class Iterator { + + // Model: each remaining tainted element flows into the consumer. + public void forEachRemaining(@ArgumentTypeContext Consumer action) { + java.util.Iterator self = (java.util.Iterator) (Object) this; + if (OpentaintNdUtil.nextBool()) { + while (self.hasNext()) { + action.accept(self.next()); + } + } + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Map.java b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Map.java new file mode 100644 index 0000000..93f0eab --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Map.java @@ -0,0 +1,45 @@ +package com.example.approximations.java_util; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Function; + +@Approximate(java.util.Map.class) +public class Map { + + // Model: the (tainted) key flows into the mapping function, whose result is + // returned. We do NOT store the result back into the map, so taint stays on + // the returned value and does not leak to reads under other keys. + public Object computeIfAbsent(Object key, @ArgumentTypeContext Function mappingFunction) { + if (OpentaintNdUtil.nextBool()) { + return null; + } + return mappingFunction.apply(key); + } + + // Model: each stored key/value flows into the BiConsumer. + public void forEach(@ArgumentTypeContext BiConsumer action) { + java.util.Map self = (java.util.Map) (Object) this; + if (OpentaintNdUtil.nextBool()) { + for (Object key : self.keySet()) { + action.accept(key, self.get(key)); + } + } + } + + // Model: each key/value flows into the BiFunction; its result is stored back + // under the same key. + public void replaceAll(@ArgumentTypeContext BiFunction function) { + java.util.Map self = (java.util.Map) (Object) this; + if (OpentaintNdUtil.nextBool()) { + for (Object key : self.keySet()) { + Object newValue = function.apply(key, self.get(key)); + self.put(key, newValue); + } + } + } +} diff --git a/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Properties.java b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Properties.java new file mode 100644 index 0000000..fc31e30 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/java.util/com/example/approximations/java_util/Properties.java @@ -0,0 +1,21 @@ +package com.example.approximations.java_util; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.BiConsumer; + +@Approximate(java.util.Properties.class) +public class Properties { + + // Model: each stored key/value flows into the BiConsumer. + public void forEach(@ArgumentTypeContext BiConsumer action) { + java.util.Properties self = (java.util.Properties) (Object) this; + if (OpentaintNdUtil.nextBool()) { + for (Object key : self.keySet()) { + action.accept(key, self.get(key)); + } + } + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.apache.hc.core5.ssl/com/example/approximations/org_apache_hc_core5_ssl/SSLContextBuilder.java b/projects/extensions/conductor/dataflow-src/org.apache.hc.core5.ssl/com/example/approximations/org_apache_hc_core5_ssl/SSLContextBuilder.java new file mode 100644 index 0000000..a376ded --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.apache.hc.core5.ssl/com/example/approximations/org_apache_hc_core5_ssl/SSLContextBuilder.java @@ -0,0 +1,38 @@ +package com.example.approximations.org_apache_hc_core5_ssl; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.security.cert.X509Certificate; + +/** + * Dataflow approximation for org.apache.hc.core5.ssl.SSLContextBuilder. + * + * loadTrustMaterial(TrustStrategy) registers a TrustStrategy callback on the + * builder. TrustStrategy is a single-abstract-method functional interface: + * + * boolean isTrusted(X509Certificate[] chain, String authType) + * + * The builder retains the strategy and invokes it later (during the TLS + * handshake the SSLContext drives). Without the approximation the analyzer never + * sees the callback invoked, so any flow inside the lambda body — or data the + * lambda captures and writes into state read back afterwards — is dropped. The + * approximation runs the callback against fresh (empty) handshake arguments (the + * callback boundary) so taint the lambda carries is analyzed. loadTrustMaterial + * returns the builder for chaining, so return `self`. + */ +@Approximate(org.apache.hc.core5.ssl.SSLContextBuilder.class) +public class SSLContextBuilder { + + public org.apache.hc.core5.ssl.SSLContextBuilder loadTrustMaterial( + @ArgumentTypeContext org.apache.hc.core5.ssl.TrustStrategy trustStrategy) + throws Throwable { + org.apache.hc.core5.ssl.SSLContextBuilder self = + (org.apache.hc.core5.ssl.SSLContextBuilder) (Object) this; + if (OpentaintNdUtil.nextBool()) { + trustStrategy.isTrusted(new X509Certificate[0], ""); + } + return self; + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.apache.http.ssl/com/example/approximations/org_apache_http_ssl/SSLContextBuilder.java b/projects/extensions/conductor/dataflow-src/org.apache.http.ssl/com/example/approximations/org_apache_http_ssl/SSLContextBuilder.java new file mode 100644 index 0000000..a3bdc30 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.apache.http.ssl/com/example/approximations/org_apache_http_ssl/SSLContextBuilder.java @@ -0,0 +1,40 @@ +package com.example.approximations.org_apache_http_ssl; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.security.KeyStore; +import java.security.cert.X509Certificate; + +/** + * Dataflow approximation for org.apache.http.ssl.SSLContextBuilder + * (Apache HttpClient/HttpCore 4.x). + * + * loadTrustMaterial(KeyStore, TrustStrategy) registers a trust callback. At + * certificate-verification time HttpCore invokes the strategy's + * boolean isTrusted(X509Certificate[] chain, String authType) + * passing the certificate chain that is checked against the supplied trust + * material (the KeyStore). To keep taint flowing through the registered + * callback for completeness, model the registration by invoking the strategy + * with the KeyStore-derived chain so taint on the trust material reaches the + * callback body, and return the builder (self) so the fluent chain keeps any + * existing taint. + */ +@Approximate(org.apache.http.ssl.SSLContextBuilder.class) +public class SSLContextBuilder { + + public org.apache.http.ssl.SSLContextBuilder loadTrustMaterial( + KeyStore trustStore, + @ArgumentTypeContext org.apache.http.ssl.TrustStrategy trustStrategy) throws Throwable { + org.apache.http.ssl.SSLContextBuilder self = + (org.apache.http.ssl.SSLContextBuilder) (Object) this; + if (trustStrategy != null && OpentaintNdUtil.nextBool()) { + // The chain checked by the callback is derived from the trust + // material; route the KeyStore's taint into the callback. + X509Certificate[] chain = (X509Certificate[]) (Object) trustStore; + trustStrategy.isTrusted(chain, ""); + } + return self; + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/KafkaFutureApprox.java b/projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/KafkaFutureApprox.java new file mode 100644 index 0000000..7fa75f2 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/KafkaFutureApprox.java @@ -0,0 +1,25 @@ +package com.example.approximations.org_apache_kafka; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import org.apache.kafka.common.KafkaFuture; + +@Approximate(org.apache.kafka.common.KafkaFuture.class) +public class KafkaFutureApprox { + + // Model: when the future completes, its (tainted) result is delivered as the + // first argument of the BiConsumer. The future is returned unchanged so the + // result stays extractable downstream (get/getNow). Returning self on the + // skip path preserves the container's taint. + public KafkaFuture whenComplete(@ArgumentTypeContext KafkaFuture.BiConsumer action) throws Throwable { + KafkaFuture self = (KafkaFuture) (Object) this; + if (OpentaintNdUtil.nextBool()) { + return self; + } + // Taint carried by the future surfaces as the consumer's result argument. + action.accept(self, null); + return self; + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/KafkaProducer.java b/projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/KafkaProducer.java new file mode 100644 index 0000000..da96599 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/KafkaProducer.java @@ -0,0 +1,23 @@ +package com.example.approximations.org_apache_kafka; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.ProducerRecord; + +@Approximate(org.apache.kafka.clients.producer.KafkaProducer.class) +public class KafkaProducer { + + // Model: send delivers the (tainted) record's payload to the completion + // callback. The callback signature only exposes RecordMetadata, but a + // closure capturing the record surfaces its taint when the callback runs, + // so invoking the callback materializes that flow. + public java.util.concurrent.Future send(ProducerRecord record, @ArgumentTypeContext Callback callback) { + if (OpentaintNdUtil.nextBool()) { + callback.onCompletion(null, null); + } + return null; + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/Producer.java b/projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/Producer.java new file mode 100644 index 0000000..1b17fc5 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.apache.kafka/com/example/approximations/org_apache_kafka/Producer.java @@ -0,0 +1,23 @@ +package com.example.approximations.org_apache_kafka; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.ProducerRecord; + +@Approximate(org.apache.kafka.clients.producer.Producer.class) +public class Producer { + + // Model: send delivers the (tainted) record's payload to the completion + // callback. The callback signature only exposes RecordMetadata, but a + // closure capturing the record surfaces its taint when the callback runs, + // so invoking the callback materializes that flow. + public java.util.concurrent.Future send(ProducerRecord record, @ArgumentTypeContext Callback callback) { + if (OpentaintNdUtil.nextBool()) { + callback.onCompletion(null, null); + } + return null; + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.elasticsearch/com/example/approximations/org_elasticsearch/RestClientBuilder.java b/projects/extensions/conductor/dataflow-src/org.elasticsearch/com/example/approximations/org_elasticsearch/RestClientBuilder.java new file mode 100644 index 0000000..d63bc03 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.elasticsearch/com/example/approximations/org_elasticsearch/RestClientBuilder.java @@ -0,0 +1,46 @@ +package com.example.approximations.org_elasticsearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +/** + * Dataflow approximation for org.elasticsearch.client.RestClientBuilder. + * + * setHttpClientConfigCallback(HttpClientConfigCallback) and + * setRequestConfigCallback(RequestConfigCallback) register a customization + * callback on the builder. The callback is a single-abstract-method functional + * interface: it receives a fresh Apache HttpComponents builder + * (HttpAsyncClientBuilder / RequestConfig.Builder), may mutate it with caller + * data (credentials, hosts, timeouts), and returns it. The lambda body is the + * unit of taint; without invoking the callback the analyzer cannot see what it + * writes into the builder or into the state it captures. The approximation runs + * the callback against a fresh builder (the callback boundary) so any taint the + * lambda carries is analyzed. Both setters return the builder for chaining, so + * return `self`. + */ +@Approximate(org.elasticsearch.client.RestClientBuilder.class) +public class RestClientBuilder { + + public org.elasticsearch.client.RestClientBuilder setHttpClientConfigCallback( + @ArgumentTypeContext org.elasticsearch.client.RestClientBuilder.HttpClientConfigCallback callback) { + org.elasticsearch.client.RestClientBuilder self = + (org.elasticsearch.client.RestClientBuilder) (Object) this; + if (OpentaintNdUtil.nextBool()) { + callback.customizeHttpClient( + org.apache.http.impl.nio.client.HttpAsyncClientBuilder.create()); + } + return self; + } + + public org.elasticsearch.client.RestClientBuilder setRequestConfigCallback( + @ArgumentTypeContext org.elasticsearch.client.RestClientBuilder.RequestConfigCallback callback) { + org.elasticsearch.client.RestClientBuilder self = + (org.elasticsearch.client.RestClientBuilder) (Object) this; + if (OpentaintNdUtil.nextBool()) { + callback.customizeRequestConfig( + org.apache.http.client.config.RequestConfig.custom()); + } + return self; + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/BulkOperation.java b/projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/BulkOperation.java new file mode 100644 index 0000000..ea8eaa2 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/BulkOperation.java @@ -0,0 +1,26 @@ +package com.example.approximations.org_opensearch_client_opensearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Function; + +@Approximate(org.opensearch.client.opensearch.core.bulk.BulkOperation.class) +public class BulkOperation { + + // BulkOperation.of(fn): the lambda fills a BulkOperation.Builder and + // returns an ObjectBuilder; of() builds it. Taint + // introduced inside the lambda body flows into the returned operation. + public static org.opensearch.client.opensearch.core.bulk.BulkOperation of( + @ArgumentTypeContext Function fn) throws Throwable { + org.opensearch.client.opensearch.core.bulk.BulkOperation.Builder b = + new org.opensearch.client.opensearch.core.bulk.BulkOperation.Builder(); + org.opensearch.client.util.ObjectBuilder ob = + (org.opensearch.client.util.ObjectBuilder) fn.apply(b); + if (ob == null || OpentaintNdUtil.nextBool()) { + return b.build(); + } + return (org.opensearch.client.opensearch.core.bulk.BulkOperation) ob.build(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/Query.java b/projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/Query.java new file mode 100644 index 0000000..d554367 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/Query.java @@ -0,0 +1,26 @@ +package com.example.approximations.org_opensearch_client_opensearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Function; + +@Approximate(org.opensearch.client.opensearch._types.query_dsl.Query.class) +public class Query { + + // Query.of(fn): the lambda fills a Query.Builder and returns an + // ObjectBuilder; of() builds it. Taint introduced inside the + // lambda body flows into the returned Query. + public static org.opensearch.client.opensearch._types.query_dsl.Query of( + @ArgumentTypeContext Function fn) throws Throwable { + org.opensearch.client.opensearch._types.query_dsl.Query.Builder b = + new org.opensearch.client.opensearch._types.query_dsl.Query.Builder(); + org.opensearch.client.util.ObjectBuilder ob = + (org.opensearch.client.util.ObjectBuilder) fn.apply(b); + if (ob == null || OpentaintNdUtil.nextBool()) { + return b.build(); + } + return (org.opensearch.client.opensearch._types.query_dsl.Query) ob.build(); + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/SearchRequest_Builder.java b/projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/SearchRequest_Builder.java new file mode 100644 index 0000000..fa36f39 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.opensearch.client.opensearch/com/example/approximations/org_opensearch_client_opensearch/SearchRequest_Builder.java @@ -0,0 +1,52 @@ +package com.example.approximations.org_opensearch_client_opensearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Function; + +@Approximate(org.opensearch.client.opensearch.core.SearchRequest.Builder.class) +public class SearchRequest_Builder { + + // SearchRequest.Builder.sort(fn): the lambda fills a SortOptions.Builder + // and returns an ObjectBuilder. Build it and feed it into + // the concrete sort(SortOptions,...) setter so the tainted SortOptions + // is stored on this builder (and hence in the built SearchRequest). + public final org.opensearch.client.opensearch.core.SearchRequest.Builder sort( + @ArgumentTypeContext Function fn) throws Throwable { + org.opensearch.client.opensearch.core.SearchRequest.Builder self = + (org.opensearch.client.opensearch.core.SearchRequest.Builder) (Object) this; + org.opensearch.client.opensearch._types.SortOptions.Builder b = + new org.opensearch.client.opensearch._types.SortOptions.Builder(); + org.opensearch.client.util.ObjectBuilder ob = + (org.opensearch.client.util.ObjectBuilder) fn.apply(b); + org.opensearch.client.opensearch._types.SortOptions opt; + if (ob == null || OpentaintNdUtil.nextBool()) { + opt = b.build(); + } else { + opt = (org.opensearch.client.opensearch._types.SortOptions) ob.build(); + } + return self.sort(opt); + } + + // SearchRequest.Builder.source(fn): the lambda fills a SourceConfig.Builder + // and returns an ObjectBuilder. Build it and feed it into the + // concrete source(SourceConfig) setter so taint is stored on this builder. + public final org.opensearch.client.opensearch.core.SearchRequest.Builder source( + @ArgumentTypeContext Function fn) throws Throwable { + org.opensearch.client.opensearch.core.SearchRequest.Builder self = + (org.opensearch.client.opensearch.core.SearchRequest.Builder) (Object) this; + org.opensearch.client.opensearch.core.search.SourceConfig.Builder b = + new org.opensearch.client.opensearch.core.search.SourceConfig.Builder(); + org.opensearch.client.util.ObjectBuilder ob = + (org.opensearch.client.util.ObjectBuilder) fn.apply(b); + org.opensearch.client.opensearch.core.search.SourceConfig cfg; + if (ob == null || OpentaintNdUtil.nextBool()) { + cfg = b.build(); + } else { + cfg = (org.opensearch.client.opensearch.core.search.SourceConfig) ob.build(); + } + return self.source(cfg); + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.opensearch/com/example/approximations/org_opensearch/RestClientBuilder.java b/projects/extensions/conductor/dataflow-src/org.opensearch/com/example/approximations/org_opensearch/RestClientBuilder.java new file mode 100644 index 0000000..1b4132c --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.opensearch/com/example/approximations/org_opensearch/RestClientBuilder.java @@ -0,0 +1,44 @@ +package com.example.approximations.org_opensearch; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +/** + * Dataflow approximation for org.opensearch.client.RestClientBuilder. + * + * setHttpClientConfigCallback(HttpClientConfigCallback) and + * setRequestConfigCallback(RequestConfigCallback) each store a single-method + * callback that OpenSearch later invokes with an Apache HttpComponents builder + * so the caller can customize the client / request config. The callback is the + * unit of taint (dropped factPosition arg(0)): whatever the lambda body writes + * into the builder it receives — or into state it captures — is the propagated + * effect. Without invoking the callback the analyzer cannot see that flow, so + * the approximation runs each callback against a fresh builder (callback + * boundary). Both setters return the builder for chaining, so return `self`. + */ +@Approximate(org.opensearch.client.RestClientBuilder.class) +public class RestClientBuilder { + + public org.opensearch.client.RestClientBuilder setHttpClientConfigCallback( + @ArgumentTypeContext org.opensearch.client.RestClientBuilder.HttpClientConfigCallback callback) { + org.opensearch.client.RestClientBuilder self = + (org.opensearch.client.RestClientBuilder) (Object) this; + if (OpentaintNdUtil.nextBool()) { + callback.customizeHttpClient( + org.apache.http.impl.nio.client.HttpAsyncClientBuilder.create()); + } + return self; + } + + public org.opensearch.client.RestClientBuilder setRequestConfigCallback( + @ArgumentTypeContext org.opensearch.client.RestClientBuilder.RequestConfigCallback callback) { + org.opensearch.client.RestClientBuilder self = + (org.opensearch.client.RestClientBuilder) (Object) this; + if (OpentaintNdUtil.nextBool()) { + callback.customizeRequestConfig( + org.apache.http.client.config.RequestConfig.custom()); + } + return self; + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.springframework.retry/com/example/approximations/org_springframework_retry/RetryTemplate.java b/projects/extensions/conductor/dataflow-src/org.springframework.retry/com/example/approximations/org_springframework_retry/RetryTemplate.java new file mode 100644 index 0000000..7ffcddf --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.springframework.retry/com/example/approximations/org_springframework_retry/RetryTemplate.java @@ -0,0 +1,23 @@ +package com.example.approximations.org_springframework_retry; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; + +import org.springframework.retry.RetryCallback; + +/** + * Dataflow approximation for org.springframework.retry.support.RetryTemplate. + * + * execute(RetryCallback) invokes the callback's doWithRetry(RetryContext) and + * returns its result. Taint carried by the callback (its captured closure + * state) reaches the returned value through doWithRetry's return. Model that + * directly: invoke the callback and return what it produces. + */ +@Approximate(org.springframework.retry.support.RetryTemplate.class) +public class RetryTemplate { + + // execute(RetryCallback): callback result -> returned value. + public Object execute(@ArgumentTypeContext RetryCallback callback) throws Throwable { + return callback.doWithRetry(null); + } +} diff --git a/projects/extensions/conductor/dataflow-src/org.springframework.web/com/example/approximations/org_springframework_web/RestClientBuilder.java b/projects/extensions/conductor/dataflow-src/org.springframework.web/com/example/approximations/org_springframework_web/RestClientBuilder.java new file mode 100644 index 0000000..cde2911 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/org.springframework.web/com/example/approximations/org_springframework_web/RestClientBuilder.java @@ -0,0 +1,33 @@ +package com.example.approximations.org_springframework_web; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Consumer; + +/** + * Dataflow approximation for org.springframework.web.client.RestClient$Builder. + * + * defaultHeaders(Consumer) hands the builder's HttpHeaders to the + * supplied consumer so the caller can populate the default request headers. The + * consumer is the unit of taint: whatever the lambda body writes (often a + * tainted header value) is the propagated effect. Without invoking the consumer + * the analyzer cannot see that flow, so the approximation runs it against a + * fresh HttpHeaders (callback boundary), letting any taint the lambda carries — + * into the headers argument or into state it captures — be analyzed. The method + * returns the builder for chaining, so return `self`. + */ +@Approximate(org.springframework.web.client.RestClient.Builder.class) +public class RestClientBuilder { + + public org.springframework.web.client.RestClient.Builder defaultHeaders( + @ArgumentTypeContext Consumer headersConsumer) { + org.springframework.web.client.RestClient.Builder self = + (org.springframework.web.client.RestClient.Builder) (Object) this; + if (OpentaintNdUtil.nextBool()) { + headersConsumer.accept(new org.springframework.http.HttpHeaders()); + } + return self; + } +} diff --git a/projects/extensions/conductor/dataflow-src/reactor.core.publisher/com/example/approximations/reactor_core_publisher/Flux.java b/projects/extensions/conductor/dataflow-src/reactor.core.publisher/com/example/approximations/reactor_core_publisher/Flux.java new file mode 100644 index 0000000..6d144cc --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/reactor.core.publisher/com/example/approximations/reactor_core_publisher/Flux.java @@ -0,0 +1,49 @@ +package com.example.approximations.reactor_core_publisher; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Function; +import java.util.function.Predicate; + +/** + * Dataflow approximation for reactor.core.publisher.Flux. + * + * Reactor's Flux carries a stream of emitted elements; each element is the unit + * of taint. Operators that run the emitted value through a user function or + * predicate re-emit the (possibly transformed / filtered) value, so taint on an + * upstream element reaches the downstream Flux's elements. + * + * - map(Function): apply the mapper to an emitted element, re-emit the + * mapper's result. this-element -> fn -> result-element. + * - filter(Predicate): run the predicate over an emitted element and re-emit + * the same element when it passes. The predicate does not transform the + * value, so the upstream element flows straight through to the downstream + * Flux. (The predicate is still invoked so any flow into the predicate is + * modelled.) + * + * Wrapper-returning shape: declare the concrete Flux return type, return `self` + * on the nd branch (never null), and extract (blockFirst) -> apply -> re-wrap + * (just) so a downstream blockFirst()/etc. surfaces the tainted element. + */ +@Approximate(reactor.core.publisher.Flux.class) +public class Flux { + + // map(Function): upstream element -> mapper -> downstream element. + public reactor.core.publisher.Flux map(@ArgumentTypeContext Function fn) throws Throwable { + reactor.core.publisher.Flux self = (reactor.core.publisher.Flux) (Object) this; + if (OpentaintNdUtil.nextBool()) return self; + Object up = self.blockFirst(); + return reactor.core.publisher.Flux.just(fn.apply(up)); + } + + // filter(Predicate): element flows through unchanged when it passes. + public reactor.core.publisher.Flux filter(@ArgumentTypeContext Predicate predicate) throws Throwable { + reactor.core.publisher.Flux self = (reactor.core.publisher.Flux) (Object) this; + if (OpentaintNdUtil.nextBool()) return self; + Object up = self.blockFirst(); + predicate.test(up); + return reactor.core.publisher.Flux.just(up); + } +} diff --git a/projects/extensions/conductor/dataflow-src/reactor.core.publisher/com/example/approximations/reactor_core_publisher/Mono.java b/projects/extensions/conductor/dataflow-src/reactor.core.publisher/com/example/approximations/reactor_core_publisher/Mono.java new file mode 100644 index 0000000..5855ff5 --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/reactor.core.publisher/com/example/approximations/reactor_core_publisher/Mono.java @@ -0,0 +1,48 @@ +package com.example.approximations.reactor_core_publisher; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +import java.util.function.Function; +import java.util.function.Supplier; + +/** + * Dataflow approximation for reactor.core.publisher.Mono. + * + * Reactor's Mono carries (at most) one emitted element. The element is the + * unit of taint: operators that run the emitted value through a user + * function/supplier re-emit the (possibly transformed) value, so taint on the + * upstream element reaches the downstream Mono's element. + * + * - map(Function): apply the mapper to the upstream element, re-emit the + * mapper's result. this-element -> fn -> result-element. + * - defer(Supplier): the supplier is invoked at subscription time and + * returns the Mono whose element is emitted. supplier-result-element -> + * result-element. + * + * Wrapper-returning shape: declare the concrete Mono return type, return `self` + * on the nd branch (never null, which would drop the container taint), and + * extract (block) -> apply -> re-wrap (just) so a downstream block()/etc. can + * pull the tainted element back out. + */ +@Approximate(reactor.core.publisher.Mono.class) +public class Mono { + + // map(Function): upstream element -> mapper -> downstream element. + public reactor.core.publisher.Mono map(@ArgumentTypeContext Function fn) throws Throwable { + reactor.core.publisher.Mono self = (reactor.core.publisher.Mono) (Object) this; + if (OpentaintNdUtil.nextBool()) return self; + Object up = self.block(); + return reactor.core.publisher.Mono.justOrEmpty(fn.apply(up)); + } + + // defer(Supplier): supplier returns the Mono whose element is emitted. + public static reactor.core.publisher.Mono defer(@ArgumentTypeContext Supplier supplier) throws Throwable { + Object supplied = supplier.get(); + if (supplied instanceof reactor.core.publisher.Mono) { + return (reactor.core.publisher.Mono) supplied; + } + return reactor.core.publisher.Mono.justOrEmpty(supplied); + } +} diff --git a/projects/extensions/conductor/dataflow-src/rx/com/example/approximations/rx/Observable.java b/projects/extensions/conductor/dataflow-src/rx/com/example/approximations/rx/Observable.java new file mode 100644 index 0000000..57e182e --- /dev/null +++ b/projects/extensions/conductor/dataflow-src/rx/com/example/approximations/rx/Observable.java @@ -0,0 +1,137 @@ +package com.example.approximations.rx; + +import org.opentaint.ir.approximation.annotation.Approximate; +import org.opentaint.jvm.dataflow.approximations.ArgumentTypeContext; +import org.opentaint.jvm.dataflow.approximations.OpentaintNdUtil; + +/** + * Dataflow approximation for rx.Observable (RxJava 1.2.2). + * + * Models the reactive callback chain by which a tainted emission travels from + * the producer side (the OnSubscribe handed to create) to the consumer side + * (the Action1 onNext handed to subscribe): + * + * create(OnSubscribe f) -> Observable (arg(0) -> return): the OnSubscribe + * producer carries the tainted emission; the returned Observable is modeled + * to carry it. We both drive the producer with a capturing subscriber and + * fall back to treating the producer reference itself as the Observable + * identity, so whichever way the taint sits it reaches the return. + * + * subscribe(Action1 onNext) / subscribe(Action1 onNext, Action1 onError) + * (this -> arg(0), and arg(1) on the error path): the Observable (this) + * drives its producer with a forwarding subscriber and/or forwards its own + * value directly into the consumer's onNext, so the emission reaches the + * subscriber callback. + * + * Approximation class lives in package com.example.approximations (NOT + * rx) — naming it with the target FQN breaks Approximations.methodsOf. + * + * KNOWN LIMITATION (tests_passing: blocked): when the tainted value is produced + * INSIDE the OnSubscribe callback and emitted via subscriber.onNext(...) — the + * idiomatic RxJava / conductor *ObservableQueue shape — the analyzer does not + * carry that emission across the create -> subscribe boundary into the consumer + * Action1. Each hop verifies in isolation (this->onNext; arg(0)->return for a + * directly-tainted producer reference), but the end-to-end producer-emission + * flow is dropped engine-side (the "callback-through-returned-object" + * limitation noted in the skill). + */ +@Approximate(rx.Observable.class) +public class Observable { + + // A Subscriber whose onNext forwards the producer's emission straight into + // the consumer's Action1 (collapses producer -> consumer into one hop). + private static final class ForwardingSubscriber extends rx.Subscriber { + private final rx.functions.Action1 onNext; + + ForwardingSubscriber(rx.functions.Action1 onNext) { + this.onNext = onNext; + } + + @Override + public void onNext(Object t) { + if (onNext != null) { + onNext.call(t); + } + } + + @Override + public void onError(Throwable e) { + } + + @Override + public void onCompleted() { + } + } + + // A Subscriber that captures the single emission produced by an OnSubscribe. + private static final class CapturingSubscriber extends rx.Subscriber { + Object captured; + + @Override + public void onNext(Object t) { + this.captured = t; + } + + @Override + public void onError(Throwable e) { + } + + @Override + public void onCompleted() { + } + } + + // create(OnSubscribe f) -> Observable; arg(0) (the producer) -> return. + // Drive the producer here (where its concrete type is known via + // @ArgumentTypeContext) with a capturing subscriber, then return the + // captured tainted emission as the Observable identity so it carries the + // producer's emission. subscribe(this) then forwards it to the consumer. + public static rx.Observable create(@ArgumentTypeContext rx.Observable.OnSubscribe f) { + CapturingSubscriber sub = new CapturingSubscriber(); + if (f != null) { + f.call(sub); + } + if (OpentaintNdUtil.nextBool()) { + return (rx.Observable) sub.captured; + } + return (rx.Observable) (Object) f; + } + + // subscribe(Action1 onNext) -> Subscription; this (the Observable produced + // by create) IS the OnSubscribe; driving it emits the tainted value into a + // subscriber that forwards straight to the Action1 (this -> arg(0)). + public rx.Subscription subscribe(@ArgumentTypeContext rx.functions.Action1 onNext) { + if (OpentaintNdUtil.nextBool()) { + deliver(this, onNext); + } + return null; + } + + // subscribe(Action1 onNext, Action1 onError) -> Subscription; this -> arg(0). + public rx.Subscription subscribe(@ArgumentTypeContext rx.functions.Action1 onNext, + @ArgumentTypeContext rx.functions.Action1 onError) { + if (OpentaintNdUtil.nextBool()) { + deliver(this, onNext); + } else if (onError != null) { + onError.call(new RuntimeException()); + } + return null; + } + + // Drives the OnSubscribe held by the Observable (this) with a forwarding + // subscriber, so the producer's tainted emission reaches the consumer. + // Also forwards the Observable's own taint (this -> arg) directly, so the + // emission reaches the consumer even when the producer.call hop is opaque. + private static void deliver(Object self, rx.functions.Action1 onNext) { + if (onNext == null) { + return; + } + if (OpentaintNdUtil.nextBool()) { + onNext.call(self); + } else { + rx.Observable.OnSubscribe producer = (rx.Observable.OnSubscribe) self; + ForwardingSubscriber sub = new ForwardingSubscriber(onNext); + producer.call(sub); + } + } +} diff --git a/projects/extensions/conductor/passthrough/co.elastic.clients.elasticsearch.yaml b/projects/extensions/conductor/passthrough/co.elastic.clients.elasticsearch.yaml new file mode 100644 index 0000000..a0dfc1f --- /dev/null +++ b/projects/extensions/conductor/passthrough/co.elastic.clients.elasticsearch.yaml @@ -0,0 +1,13 @@ +# Pass-through approximations for the Elasticsearch java-client. +# +# Models the dropped (not-yet-approximated) co.elastic.clients.elasticsearch +# methods listed in the tracking unit co_elastic_clients_elasticsearch-passthrough.yaml. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # BulkIngester.add(operation) -> the tainted BulkOperation enters the ingester + # (returns void, so taint lands on the receiver). Semantics: arg -> receiver. + - function: co.elastic.clients.elasticsearch._helpers.bulk.BulkIngester#add + copy: + - from: arg(0) + to: this diff --git a/projects/extensions/conductor/passthrough/com.azure.storage.blob.yaml b/projects/extensions/conductor/passthrough/com.azure.storage.blob.yaml new file mode 100644 index 0000000..a8479e9 --- /dev/null +++ b/projects/extensions/conductor/passthrough/com.azure.storage.blob.yaml @@ -0,0 +1,43 @@ +# Pass-through approximations for com.azure.storage.blob client methods. +# +# Models the dropped (not-yet-approximated) Azure blob client methods listed in +# the tracking unit com_azure_storage_blob-passthrough.yaml. Each is a simple +# copy from the tainted blob name / payload argument into the produced value +# (or back onto the receiver/blob). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # BlobContainerClient.getBlobClient(blobName) -> BlobClient for the tainted + # blob name; the name flows into the returned client and is held by the + # container receiver. + - function: com.azure.storage.blob.BlobContainerClient#getBlobClient + copy: + - from: arg(0) + to: result + - from: arg(0) + to: this + + # BlobClientBase.download(outputStream) -> blob contents (tainted blob) are + # written into the supplied output stream argument. + - function: com.azure.storage.blob.specialized.BlobClientBase#download + copy: + - from: this + to: arg(0) + + # BlockBlobClient.commitBlockList(blockIds) -> the tainted block list is + # committed into the blob; carried onto the receiver and the response. + - function: com.azure.storage.blob.specialized.BlockBlobClient#commitBlockList + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # BlockBlobClient.uploadWithResponse(payload, ...) -> the tainted payload + # stream is uploaded into the blob; carried onto the receiver and the response. + - function: com.azure.storage.blob.specialized.BlockBlobClient#uploadWithResponse + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/com.datastax.driver.core.yaml b/projects/extensions/conductor/passthrough/com.datastax.driver.core.yaml new file mode 100644 index 0000000..5356891 --- /dev/null +++ b/projects/extensions/conductor/passthrough/com.datastax.driver.core.yaml @@ -0,0 +1,13 @@ +# Pass-through approximations for the DataStax Cassandra driver. +# +# Models the dropped (not-yet-approximated) method listed in the tracking unit +# com_datastax_driver_core-passthrough.yaml. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # PreparedStatement.bind(values...) -> BoundStatement carrying the tainted + # bind values into the CQL execution (injection carrier). arg -> return. + - function: com.datastax.driver.core.PreparedStatement#bind + copy: + - from: arg(*) + to: result diff --git a/projects/extensions/conductor/passthrough/com.fasterxml.jackson.yaml b/projects/extensions/conductor/passthrough/com.fasterxml.jackson.yaml new file mode 100644 index 0000000..80e1bb5 --- /dev/null +++ b/projects/extensions/conductor/passthrough/com.fasterxml.jackson.yaml @@ -0,0 +1,30 @@ +# Pass-through approximations for com.fasterxml.jackson.databind. +# +# Models the dropped (not-yet-approximated) Jackson methods listed in the +# tracking unit com_fasterxml_jackson_databind-passthrough.yaml. Each copies +# taint from the JSON data being read into the produced value: +# - ObjectMapper.readValue(byte[], TypeReference): the tainted source bytes +# being deserialized -> the returned DTO (arg(0) -> result). +# - JsonNode.asBoolean(boolean) / JsonNode.asLong(): the receiver JsonNode +# (which holds the parsed value) -> the returned primitive (this -> result). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # ObjectMapper.readValue(byte[], TypeReference) -> deserialized object + # (dropped per dropped-external-methods.yaml) + - function: com.fasterxml.jackson.databind.ObjectMapper#readValue + copy: + - from: arg(0) + to: result + + # JsonNode.asBoolean(boolean defaultValue) -> primitive read from the node + - function: com.fasterxml.jackson.databind.JsonNode#asBoolean + copy: + - from: this + to: result + + # JsonNode.asLong() -> primitive read from the node + - function: com.fasterxml.jackson.databind.JsonNode#asLong + copy: + - from: this + to: result diff --git a/projects/extensions/conductor/passthrough/com.github.benmanes.caffeine.cache.yaml b/projects/extensions/conductor/passthrough/com.github.benmanes.caffeine.cache.yaml new file mode 100644 index 0000000..e00767e --- /dev/null +++ b/projects/extensions/conductor/passthrough/com.github.benmanes.caffeine.cache.yaml @@ -0,0 +1,23 @@ +# Pass-through approximations for Caffeine Cache accessor methods. +# +# Models the dropped (not-yet-approximated) Caffeine methods listed in the +# tracking unit com_github_benmanes_caffeine_cache-passthrough.yaml. +# +# The Caffeine (Loading)Cache holds tainted values: a value put into the cache +# is handed back on a later get/getIfPresent. We model the read accessors as +# this->result so a value stored in the cache (the receiver) flows out of the +# read. This puts the cache on the jq-injection and (indirectly) GraalVM flows. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # Cache.getIfPresent(key) -> the cached (tainted) value stored in this cache + - function: com.github.benmanes.caffeine.cache.Cache#getIfPresent + copy: + - from: this + to: result + + # LoadingCache.get(key) -> the loaded/cached (tainted) value of this cache + - function: com.github.benmanes.caffeine.cache.LoadingCache#get + copy: + - from: this + to: result diff --git a/projects/extensions/conductor/passthrough/com.google.protobuf.yaml b/projects/extensions/conductor/passthrough/com.google.protobuf.yaml new file mode 100644 index 0000000..da344d5 --- /dev/null +++ b/projects/extensions/conductor/passthrough/com.google.protobuf.yaml @@ -0,0 +1,132 @@ +# OpenTaint passThrough approximations — com.google.protobuf +# +# Proto getters/builders on gRPC request flows. Each is a simple copy of taint: +# - getters/unwrap (toStringUtf8): tainted receiver message -> returned value +# - builder setters/merges/adds: tainted argument -> receiver (and -> result, +# since builders return `this`); receiver -> result +# - constructors: tainted argument -> the constructed object (this) +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # --- unwrap: tainted ByteString receiver -> returned String --- + - function: com.google.protobuf.ByteString#toStringUtf8 + copy: + - from: this + to: result + + # --- builder setters/merges/adds: tainted arg carried into the builder --- + - function: com.google.protobuf.AbstractMessage$Builder#newUninitializedMessageException + copy: + - from: this + to: result + + - function: com.google.protobuf.AbstractMessageLite$Builder#addAll + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + - function: com.google.protobuf.Any$Builder#mergeFrom + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + - function: com.google.protobuf.GeneratedMessageV3$Builder#mergeUnknownFields + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + - function: com.google.protobuf.RepeatedFieldBuilderV3#addAllMessages + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + - function: com.google.protobuf.RepeatedFieldBuilderV3#addMessage + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + - function: com.google.protobuf.RepeatedFieldBuilderV3#build + copy: + - from: this + to: result + + - function: com.google.protobuf.SingleFieldBuilderV3#mergeFrom + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + - function: com.google.protobuf.SingleFieldBuilderV3#setMessage + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + - function: com.google.protobuf.Struct$Builder#putFields + copy: + - from: arg(1) + to: this + - from: arg(1) + to: result + - from: this + to: result + + - function: com.google.protobuf.Value$Builder#mergeFrom + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + - function: com.google.protobuf.Value$Builder#setStringValue + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # --- constructors: tainted source argument -> the constructed object --- + - function: com.google.protobuf.GeneratedMessageV3# + copy: + - from: arg(0) + to: this + + - function: com.google.protobuf.RepeatedFieldBuilderV3# + copy: + - from: arg(0) + to: this + + - function: com.google.protobuf.SingleFieldBuilderV3# + copy: + - from: arg(0) + to: this diff --git a/projects/extensions/conductor/passthrough/com.google.rpc.yaml b/projects/extensions/conductor/passthrough/com.google.rpc.yaml new file mode 100644 index 0000000..4e8fe39 --- /dev/null +++ b/projects/extensions/conductor/passthrough/com.google.rpc.yaml @@ -0,0 +1,31 @@ +# Pass-through approximations for com.google.rpc proto builders. +# +# Models the dropped (not-yet-approximated) com.google.rpc methods listed in +# the tracking unit com_google_rpc-passthrough.yaml. These are gRPC DebugInfo +# builder methods that carry tainted strings into the status detail; the +# tainted argument flows onto the receiver (builder state) and the returned +# builder (chaining). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # DebugInfo.Builder.setDetail(String) -> tainted detail enters builder and + # is returned for chaining. + - function: com.google.rpc.DebugInfo$Builder#setDetail + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # DebugInfo.Builder.addAllStackEntries(Iterable) -> tainted stack + # entries enter builder and is returned for chaining. + - function: com.google.rpc.DebugInfo$Builder#addAllStackEntries + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result diff --git a/projects/extensions/conductor/passthrough/com.jayway.jsonpath.yaml b/projects/extensions/conductor/passthrough/com.jayway.jsonpath.yaml new file mode 100644 index 0000000..faa51f7 --- /dev/null +++ b/projects/extensions/conductor/passthrough/com.jayway.jsonpath.yaml @@ -0,0 +1,14 @@ +# Pass-through approximations for com.jayway.jsonpath methods. +# +# Models the dropped (not-yet-approximated) JsonPath method listed in the +# tracking unit com_jayway_jsonpath-passthrough.yaml. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # JsonPath.parse(json[, configuration]) -> DocumentContext over the tainted + # JSON document. The parsed source (arg(0)) carries its taint into the + # returned context, which later .read(path) calls evaluate over. + - function: com.jayway.jsonpath.JsonPath#parse + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/com.rabbitmq.client.yaml b/projects/extensions/conductor/passthrough/com.rabbitmq.client.yaml new file mode 100644 index 0000000..97c0e88 --- /dev/null +++ b/projects/extensions/conductor/passthrough/com.rabbitmq.client.yaml @@ -0,0 +1,33 @@ +# Pass-through approximations for com.rabbitmq.client methods. +# +# Models the dropped (not-yet-approximated) RabbitMQ methods listed in the +# tracking unit com_rabbitmq_client-passthrough.yaml. These carry tainted +# names/args onto the receiver (channel/connection state) and into the +# returned declaration/connection object. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # Channel.exchangeDeclare(exchange, type, ...) -> tainted exchange name stays + # on the channel and flows into the returned DeclareOk. + - function: com.rabbitmq.client.Channel#exchangeDeclare + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # Channel.queueDeclare(queue, ...) -> tainted queue name stays on the channel + # and flows into the returned DeclareOk. + - function: com.rabbitmq.client.Channel#queueDeclare + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # ConnectionFactory.newConnection(...) -> tainted connection args flow into + # the returned Connection. + - function: com.rabbitmq.client.ConnectionFactory#newConnection + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/io.grpc.yaml b/projects/extensions/conductor/passthrough/io.grpc.yaml new file mode 100644 index 0000000..7b0ac2b --- /dev/null +++ b/projects/extensions/conductor/passthrough/io.grpc.yaml @@ -0,0 +1,23 @@ +# Pass-through approximations for io.grpc carrier methods. +# +# Models the dropped (not-yet-approximated) io.grpc methods listed in the +# tracking unit io_grpc-passthrough.yaml. gRPC Status.withDescription / +# StreamObserver.onNext carry tainted strings into the response (arg -> receiver/result). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # Status.withDescription(description) -> new Status carrying the tainted + # description; receiver also retains the taint. + - function: io.grpc.Status#withDescription + copy: + - from: arg(0) + to: result + - from: arg(0) + to: this + + # StreamObserver.onNext(value) -> the tainted value flows into the observer + # (the response carrier). + - function: io.grpc.stub.StreamObserver#onNext + copy: + - from: arg(0) + to: this diff --git a/projects/extensions/conductor/passthrough/io.nats.client.yaml b/projects/extensions/conductor/passthrough/io.nats.client.yaml new file mode 100644 index 0000000..ef3fa5e --- /dev/null +++ b/projects/extensions/conductor/passthrough/io.nats.client.yaml @@ -0,0 +1,28 @@ +# Pass-through approximations for io.nats.client methods. +# +# Models the dropped (not-yet-approximated) io.nats.client methods listed in the +# tracking unit io_nats_client-passthrough.yaml. NATS publish/connect carry a +# tainted subject/payload/url; taint flows from the arguments onto the +# receiver/return per the tracking semantics ("arg->receiver/return"). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # JetStream.publish(subject, body) -> tainted subject/payload published; + # both args carry taint onto the receiver and into the returned PublishAck. + - function: io.nats.client.JetStream#publish + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: arg(1) + to: this + - from: arg(1) + to: result + + # Nats.connect(url) -> Connection established to the tainted url; + # the url argument carries taint into the returned Connection. + - function: io.nats.client.Nats#connect + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/jakarta.servlet.yaml b/projects/extensions/conductor/passthrough/jakarta.servlet.yaml new file mode 100644 index 0000000..4b2c31a --- /dev/null +++ b/projects/extensions/conductor/passthrough/jakarta.servlet.yaml @@ -0,0 +1,15 @@ +# Pass-through approximations for jakarta.servlet methods. +# +# Models the dropped (not-yet-approximated) jakarta.servlet methods listed in +# the tracking unit jakarta_servlet-passthrough.yaml. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # RequestDispatcher.forward(request, response) -> the tainted request is + # forwarded onward; its taint reaches the response and stays on the request. + - function: jakarta.servlet.RequestDispatcher#forward + copy: + - from: arg(0) + to: arg(0) + - from: arg(0) + to: arg(1) diff --git a/projects/extensions/conductor/passthrough/java.io.yaml b/projects/extensions/conductor/passthrough/java.io.yaml new file mode 100644 index 0000000..65ff6e9 --- /dev/null +++ b/projects/extensions/conductor/passthrough/java.io.yaml @@ -0,0 +1,40 @@ +# Pass-through approximations for java.io stream/reader/writer methods. +# +# Models the dropped (not-yet-approximated) java.io methods listed in the +# tracking unit java_io-passthrough.yaml. Each carries tainted bytes/strings +# from the wrapped source argument onto the stream receiver (and back out on +# read), matching the per-method semantics in that tracking file +# (arg -> receiver / return). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +# A rule that matches a built-in simply overrides it; it does not error at load. +passThrough: + # new BufferedInputStream(in) -> the wrapped (tainted) source stream feeds this + - function: java.io.BufferedInputStream# + copy: + - from: arg(0) + to: this + + # new FileInputStream(file/path/fd) -> bytes read from the tainted source land on this + - function: java.io.FileInputStream# + copy: + - from: arg(0) + to: this + + # new FileOutputStream(file/path/fd) -> the tainted target descriptor backs this stream + - function: java.io.FileOutputStream# + copy: + - from: arg(0) + to: this + + # PrintStream.printf(format, args...) -> formatted (tainted) output written into this stream + - function: java.io.PrintStream#printf + copy: + - from: arg(*) + to: this + + # PrintStream.println(x) -> the tainted value is written into this stream + - function: java.io.PrintStream#println + copy: + - from: arg(0) + to: this diff --git a/projects/extensions/conductor/passthrough/java.lang.yaml b/projects/extensions/conductor/passthrough/java.lang.yaml new file mode 100644 index 0000000..21858a1 --- /dev/null +++ b/projects/extensions/conductor/passthrough/java.lang.yaml @@ -0,0 +1,112 @@ +# Pass-through approximations for java.lang String/boxing/number methods. +# +# Models the dropped (not-yet-approximated) java.lang methods listed in the +# tracking unit java_lang-passthrough.yaml. Each is a simple copy from the +# receiver/argument into the produced value (or back onto the receiver). +# +# OVERRIDE mode: in this build a collision with a built-in passThrough silently +# OVERRIDES (no load error), so all 16 modeled methods are kept here. +passThrough: + # Boolean.booleanValue() -> primitive of the tainted box + - function: java.lang.Boolean#booleanValue + copy: + - from: this + to: result + + # Boolean.parseBoolean(s) -> boolean parsed from the tainted string + - function: java.lang.Boolean#parseBoolean + copy: + - from: arg(0) + to: result + + # Boolean.valueOf(x) -> box/parse of the tainted argument + - function: java.lang.Boolean#valueOf + copy: + - from: arg(0) + to: result + + # Double.valueOf(x) -> box/parse of the tainted argument + - function: java.lang.Double#valueOf + copy: + - from: arg(0) + to: result + + # Enum.name() -> name string of the tainted enum constant + - function: java.lang.Enum#name + copy: + - from: this + to: result + + # Integer.parseInt(s) -> int parsed from the tainted string + - function: java.lang.Integer#parseInt + copy: + - from: arg(0) + to: result + + # Integer.valueOf(x) -> box/parse of the tainted argument + - function: java.lang.Integer#valueOf + copy: + - from: arg(0) + to: result + + # Long.parseLong(s) -> long parsed from the tainted string + - function: java.lang.Long#parseLong + copy: + - from: arg(0) + to: result + + # Long.valueOf(x) -> box/parse of the tainted argument + - function: java.lang.Long#valueOf + copy: + - from: arg(0) + to: result + + # Math.max(a, b) -> one of the tainted operands + - function: java.lang.Math#max + copy: + - from: arg(0) + to: result + - from: arg(1) + to: result + + # Math.min(a, b) -> one of the tainted operands + - function: java.lang.Math#min + copy: + - from: arg(0) + to: result + - from: arg(1) + to: result + + # Math.pow(a, b) -> derived from the tainted operands + - function: java.lang.Math#pow + copy: + - from: arg(0) + to: result + - from: arg(1) + to: result + + # Object.() -> receiver constructed; preserve taint on the new object + - function: java.lang.Object# + copy: + - from: this + to: result + + # String.formatted(args) -> formatted string from the tainted format + args + - function: java.lang.String#formatted + copy: + - from: this + to: result + - from: arg(*) + to: result + + # String.join(delimiter, elements) -> joined string of the tainted parts + - function: java.lang.String#join + copy: + - from: arg(*) + to: result + + # String.valueOf(x) -> string rendering of the tainted argument + - function: java.lang.String#valueOf + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/java.sql.yaml b/projects/extensions/conductor/passthrough/java.sql.yaml new file mode 100644 index 0000000..ae84fa0 --- /dev/null +++ b/projects/extensions/conductor/passthrough/java.sql.yaml @@ -0,0 +1,51 @@ +# OpenTaint passThrough approximations — java.sql (JDBC SQL-injection carriers) +# +# Models the dropped java.sql methods listed in the tracking unit +# java_sql-passthrough.yaml. These sit on SQL-injection carrier paths: tainted +# values move into the SQL statement / connection objects that ultimately reach +# execute(). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +# A provided rule overrides a built-in only if it matches one. +passThrough: + # Connection.prepareStatement(sql[, ...]) -> the SQL string carries into the + # produced PreparedStatement and back onto the connection. + - function: java.sql.Connection#prepareStatement + copy: + - from: arg(0) + to: result + - from: arg(0) + to: this + + # Connection.createStatement([...]) -> Statement is a carrier rooted in the + # tainted connection. + - function: java.sql.Connection#createStatement + copy: + - from: this + to: result + + # PreparedStatement.setString(parameterIndex, x) -> the bound value taints the + # statement (carries into the eventual execute()). + - function: java.sql.PreparedStatement#setString + copy: + - from: arg(1) + to: this + + # PreparedStatement.setInt(parameterIndex, x) -> bound value taints statement. + - function: java.sql.PreparedStatement#setInt + copy: + - from: arg(1) + to: this + + # PreparedStatement.setBinaryStream(parameterIndex, x[, length]) -> bound value + # taints statement. + - function: java.sql.PreparedStatement#setBinaryStream + copy: + - from: arg(1) + to: this + + # Wrapper.unwrap(iface) -> unwrapped delegate carries the tainted wrapper. + - function: java.sql.Wrapper#unwrap + copy: + - from: this + to: result diff --git a/projects/extensions/conductor/passthrough/java.time.yaml b/projects/extensions/conductor/passthrough/java.time.yaml new file mode 100644 index 0000000..bd15e40 --- /dev/null +++ b/projects/extensions/conductor/passthrough/java.time.yaml @@ -0,0 +1,25 @@ +# Pass-through approximations for java.time parse/factory methods. +# +# Models the dropped (not-yet-approximated) java.time methods listed in the +# tracking unit java_time-passthrough.yaml. Each carries tainted numeric/string +# input through to the produced temporal/zone value (arg -> result). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # Duration.ofMillis(millis) -> Duration built from the tainted numeric value + - function: java.time.Duration#ofMillis + copy: + - from: arg(0) + to: result + + # ZoneId.of(zoneId) -> ZoneId parsed from the tainted string + - function: java.time.ZoneId#of + copy: + - from: arg(0) + to: result + + # DateTimeFormatter.parse(text) -> TemporalAccessor parsed from the tainted text + - function: java.time.format.DateTimeFormatter#parse + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/java.util.concurrent.yaml b/projects/extensions/conductor/passthrough/java.util.concurrent.yaml new file mode 100644 index 0000000..6f3b7b0 --- /dev/null +++ b/projects/extensions/conductor/passthrough/java.util.concurrent.yaml @@ -0,0 +1,75 @@ +# OpenTaint passThrough approximations — java.util.concurrent +# +# Models the dropped (not-yet-approximated) java.util.concurrent methods listed +# in the tracking unit java_util_concurrent-passthrough.yaml. Concurrent +# containers, futures, and executor factories: taint flows from the +# receiver/argument into the produced value (or back onto the container). +# +# This unit matters: Future.get and the ConcurrentHashMap accessors sit on the +# ScriptEvaluator RCE flow — a tainted value placed into a concurrent map / +# computed in a future must carry its taint back out on read. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +# A provided rule that collides with a BUILT-IN passThrough silently overrides +# it (no load error); all targets are kept unless the config fails to load. +passThrough: + # ConcurrentHashMap.containsKey(key) -> boolean derived from the tainted map + - function: java.util.concurrent.ConcurrentHashMap#containsKey + copy: + - from: this + to: result + + # ConcurrentHashMap.remove(key) -> the removed (tainted) value + - function: java.util.concurrent.ConcurrentHashMap#remove + copy: + - from: this + to: result + + # ConcurrentHashMap.values() -> values view backed by the tainted map + - function: java.util.concurrent.ConcurrentHashMap#values + copy: + - from: this + to: result + + # ConcurrentSkipListSet.remove(o) -> boolean membership of the tainted set + - function: java.util.concurrent.ConcurrentSkipListSet#remove + copy: + - from: this + to: result + + # Future.get() -> the computed (tainted) value carried out of the future + - function: java.util.concurrent.Future#get + copy: + - from: this + to: result + + # Executors.newFixedThreadPool(...) -> executor; any tainted arg (e.g. the + # ThreadFactory) flows into the produced executor + - function: java.util.concurrent.Executors#newFixedThreadPool + copy: + - from: arg(*) + to: result + + # Executors.newScheduledThreadPool(...) -> executor; tainted args flow out + - function: java.util.concurrent.Executors#newScheduledThreadPool + copy: + - from: arg(*) + to: result + + # Executors.newSingleThreadScheduledExecutor(...) -> executor; tainted args flow out + - function: java.util.concurrent.Executors#newSingleThreadScheduledExecutor + copy: + - from: arg(*) + to: result + + # new ScheduledThreadPoolExecutor(...) -> executor; tainted args flow onto it + - function: java.util.concurrent.ScheduledThreadPoolExecutor# + copy: + - from: arg(*) + to: this + + # new ThreadPoolExecutor(...) -> executor; tainted args (queue, factory) flow onto it + - function: java.util.concurrent.ThreadPoolExecutor# + copy: + - from: arg(*) + to: this diff --git a/projects/extensions/conductor/passthrough/java.util.function.yaml b/projects/extensions/conductor/passthrough/java.util.function.yaml new file mode 100644 index 0000000..5e0b31e --- /dev/null +++ b/projects/extensions/conductor/passthrough/java.util.function.yaml @@ -0,0 +1,26 @@ +# Pass-through approximations for java.util.function functional interfaces. +# +# Models the dropped (not-yet-approximated) functional-interface invocation +# points listed in the tracking unit java_util_function-passthrough.yaml. +# Each models the value->result copy at the lambda-body boundary (arg->return). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # Consumer.accept(value) -> void; the tainted argument is handed to the + # consumer body / captured by the receiver. + - function: java.util.function.Consumer#accept + copy: + - from: arg(0) + to: this + + # Function.apply(value) -> result derived from the tainted argument. + - function: java.util.function.Function#apply + copy: + - from: arg(0) + to: result + + # Predicate.test(value) -> boolean derived from the tainted argument. + - function: java.util.function.Predicate#test + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/java.util.stream.yaml b/projects/extensions/conductor/passthrough/java.util.stream.yaml new file mode 100644 index 0000000..4e1200b --- /dev/null +++ b/projects/extensions/conductor/passthrough/java.util.stream.yaml @@ -0,0 +1,29 @@ +# Pass-through approximations for non-lambda java.util.stream plumbing. +# +# Models the dropped (not-yet-approximated) java.util.stream methods listed in +# the tracking unit java_util_stream-passthrough.yaml. These are the non-lambda +# stream operations that carry elements through unchanged (lambda-based ops are +# modeled in the dataflow unit). Each is a simple copy from the receiver into +# the produced value. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +# Methods that collide with a BUILT-IN passThrough are dropped from this file +# (see the task report) because a colliding target errors at config load. +passThrough: + # BaseStream.iterator() -> Iterator over the tainted stream's elements + - function: java.util.stream.BaseStream#iterator + copy: + - from: this + to: result + + # IntStream.of(values...) -> stream backed by the tainted int values + - function: java.util.stream.IntStream#of + copy: + - from: arg(0) + to: result + + # Stream.limit(n) -> truncated stream still carrying the tainted elements + - function: java.util.stream.Stream#limit + copy: + - from: this + to: result diff --git a/projects/extensions/conductor/passthrough/java.util.yaml b/projects/extensions/conductor/passthrough/java.util.yaml new file mode 100644 index 0000000..fe04e9a --- /dev/null +++ b/projects/extensions/conductor/passthrough/java.util.yaml @@ -0,0 +1,115 @@ +# Pass-through approximations for java.util container/accessor methods. +# +# Models the dropped (not-yet-approximated) java.util methods listed in the +# tracking unit java_util-passthrough.yaml. Each is a simple copy from the +# receiver/argument into the produced value (or back onto the container). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +# Methods that collide with a BUILT-IN passThrough are dropped from this file +# (see the task report) because a colliding target errors at config load. +passThrough: + # Arrays.toString(array) -> string of the tainted array elements + - function: java.util.Arrays#toString + copy: + - from: arg(0) + to: result + + # Collection.stream() -> stream backed by the tainted collection + - function: java.util.Collection#stream + copy: + - from: this + to: result + + # Comparator.compare(a, b) -> int derived from the compared (tainted) values + - function: java.util.Comparator#compare + copy: + - from: arg(0) + to: result + - from: arg(1) + to: result + + # Collections.sort(list) -> in-place reorder; receiver keeps its taint + - function: java.util.Collections#sort + copy: + - from: arg(0) + to: arg(0) + + # List.sort(comparator) -> in-place reorder; receiver keeps its taint + - function: java.util.List#sort + copy: + - from: this + to: this + + # ArrayList.sort(comparator) -> in-place reorder; receiver keeps its taint + - function: java.util.ArrayList#sort + copy: + - from: this + to: this + + # List.remove(index) -> removed (tainted) element + - function: java.util.List#remove + copy: + - from: this + to: result + + # HashSet.remove(o) -> boolean / removed membership of the tainted set + - function: java.util.HashSet#remove + copy: + - from: this + to: result + + # Set.remove(o) -> boolean / removed membership of the tainted set + - function: java.util.Set#remove + copy: + - from: this + to: result + + # Map.entrySet() -> entry set view backed by the tainted map + - function: java.util.Map#entrySet + copy: + - from: this + to: result + + # Map.getOrDefault(key, default) -> stored value or the supplied default + - function: java.util.Map#getOrDefault + copy: + - from: this + to: result + - from: arg(1) + to: result + + # PriorityQueue.add(e) -> the tainted element enters the queue + - function: java.util.PriorityQueue#add + copy: + - from: arg(0) + to: this + + # PriorityQueue.poll() -> head (tainted) element of the queue + - function: java.util.PriorityQueue#poll + copy: + - from: this + to: result + + # HashMap.putAll(m) -> contents of the tainted source map enter this map + - function: java.util.HashMap#putAll + copy: + - from: arg(0) + to: this + + # Map.putAll(m) -> contents of the tainted source map enter this map + - function: java.util.Map#putAll + copy: + - from: arg(0) + to: this + + # UUID.fromString(s) -> UUID parsed from the tainted string + - function: java.util.UUID#fromString + copy: + - from: arg(0) + to: result + + # UUID.nameUUIDFromBytes(bytes) -> UUID derived from the tainted bytes + - function: java.util.UUID#nameUUIDFromBytes + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/net.thisptr.jackson.jq.yaml b/projects/extensions/conductor/passthrough/net.thisptr.jackson.jq.yaml new file mode 100644 index 0000000..7887cd3 --- /dev/null +++ b/projects/extensions/conductor/passthrough/net.thisptr.jackson.jq.yaml @@ -0,0 +1,22 @@ +# Pass-through approximation for jackson-jq (net.thisptr.jackson.jq). +# +# Models the dropped jackson-jq method tracked in +# net_thisptr_jackson_jq-passthrough.yaml. jackson-jq is the jq carrier on the +# jq-injection flow: a compiled JsonQuery runs a jq expression over an input +# JsonNode and returns the matching nodes. +# +# JsonQuery#apply(...) takes the input JsonNode/List and returns a +# List of results derived from it; the tainted input data therefore +# reaches the returned results. The overloads place the data at arg(0) +# (apply(JsonNode) / apply(List)) or at arg(1) (apply(Scope, JsonNode) / +# apply(Scope, List)), so both argument positions are copied to result. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # JsonQuery.apply(...) -> result nodes carry the taint of the input node(s) + - function: net.thisptr.jackson.jq.JsonQuery#apply + copy: + - from: arg(0) + to: result + - from: arg(1) + to: result diff --git a/projects/extensions/conductor/passthrough/org.apache.commons.lang3.yaml b/projects/extensions/conductor/passthrough/org.apache.commons.lang3.yaml new file mode 100644 index 0000000..f5c3943 --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.apache.commons.lang3.yaml @@ -0,0 +1,67 @@ +# OpenTaint passThrough approximations — org.apache.commons.lang3.StringUtils +# +# Pure string transforms: taint flows from the input String argument to the +# returned String, unchanged. Modeled arg(0) -> result. +# +# --passthrough-approximations is OVERRIDE mode and additive per method: a +# provided rule that matches a built-in (e.g. StringUtils#upperCase) simply +# overrides it, it does not error at load. Verified: no target in this file +# errored on load. (Built-ins also cover defaultIfBlank/removeEnd/trimToNull, +# which are not part of this set.) +passThrough: + - function: org.apache.commons.lang3.StringUtils#trim + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#strip + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#substring + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#substringBefore + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#substringAfter + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#upperCase + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#lowerCase + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#capitalize + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#replace + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#normalizeSpace + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#defaultString + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#join + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#trimToEmpty + copy: + - from: arg(0) + to: result + - function: org.apache.commons.lang3.StringUtils#stripToNull + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/org.apache.commons.yaml b/projects/extensions/conductor/passthrough/org.apache.commons.yaml new file mode 100644 index 0000000..428208c --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.apache.commons.yaml @@ -0,0 +1,28 @@ +# OpenTaint passThrough approximations — Apache Commons (io / codec / lang3.time) +# +# Simple copy propagation for tainted input flowing through commons helpers. +# OVERRIDE mode, additive per method: a provided rule that matches a built-in +# overrides it; it does not error at load. +# +# Per-method semantics from org_apache_commons-passthrough.yaml tracking unit. +passThrough: + # DigestUtils.sha256Hex(data) -> hex digest string of the tainted input. + # static; data is byte[] | String | InputStream depending on overload. + - function: org.apache.commons.codec.digest.DigestUtils#sha256Hex + copy: + - from: arg(0) + to: result + + # IOUtils.copy(input, output) -> bytes/chars read from the tainted input + # source are written into the output sink. static; arg(0) -> arg(1). + - function: org.apache.commons.io.IOUtils#copy + copy: + - from: arg(0) + to: arg(1) + + # DateUtils.parseDate(str, ...) -> Date parsed from the tainted string. + # static; arg(0) is the input String. arg(0) -> result. + - function: org.apache.commons.lang3.time.DateUtils#parseDate + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/org.apache.kafka.yaml b/projects/extensions/conductor/passthrough/org.apache.kafka.yaml new file mode 100644 index 0000000..ccda147 --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.apache.kafka.yaml @@ -0,0 +1,25 @@ +# Pass-through approximations for Apache Kafka client carrier types. +# +# Models the dropped (not-yet-approximated) org.apache.kafka methods listed in +# the tracking unit org_apache_kafka-passthrough.yaml. Both targets are +# constructors of broker-bound carrier objects: a tainted topic/key/value (or +# header name/bytes) passed to the constructor is carried into the receiver +# object, which is then handed to the producer (the sink). +# +# Semantics: arg -> receiver (this). ProducerRecord has several overloads with +# different key/value positions, so every constructor argument is copied onto +# the receiver via arg(*) to cover all of them. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # ProducerRecord(...) -> tainted topic/key/value carried into the record + - function: org.apache.kafka.clients.producer.ProducerRecord# + copy: + - from: arg(*) + to: this + + # RecordHeader(key, value) -> tainted header name/bytes carried into the header + - function: org.apache.kafka.common.header.internals.RecordHeader# + copy: + - from: arg(*) + to: this diff --git a/projects/extensions/conductor/passthrough/org.elasticsearch.yaml b/projects/extensions/conductor/passthrough/org.elasticsearch.yaml new file mode 100644 index 0000000..d05e935 --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.elasticsearch.yaml @@ -0,0 +1,167 @@ +# OpenTaint passThrough approximations — org.elasticsearch query/index builders. +# +# These methods carry tainted field names, query terms, doc bodies, and ids +# into Elasticsearch query/index requests (injection carriers). Modeled as: +# - fluent setters (build*): arg -> this, arg -> result, this -> result +# - static query factories (QueryBuilders.*): arg(s) -> result +# - constructors (): arg(s) -> this (the new instance is `this`) +# +# --passthrough-approximations is OVERRIDE mode and additive per method: a +# provided rule that collides with a built-in simply overrides it; it does not +# error at load. Verified to load with no rule-parsing issues. +passThrough: + # BulkRequest.setRefreshPolicy(policy) -> fluent; receiver carries the value + - function: org.elasticsearch.action.bulk.BulkRequest#setRefreshPolicy + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # new DeleteRequest(index[, id]) -> ids/index land on the new request + - function: org.elasticsearch.action.delete.DeleteRequest# + copy: + - from: arg(*) + to: this + + # new GetRequest(index[, id]) -> ids/index land on the new request + - function: org.elasticsearch.action.get.GetRequest# + copy: + - from: arg(*) + to: this + + # IndexRequest.id(id) -> fluent; receiver carries the tainted id + - function: org.elasticsearch.action.index.IndexRequest#id + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # IndexRequest.source(...) -> fluent; receiver carries the tainted doc body + - function: org.elasticsearch.action.index.IndexRequest#source + copy: + - from: arg(*) + to: this + - from: arg(*) + to: result + - from: this + to: result + + # new SearchRequest(indices...) -> tainted indices land on the new request + - function: org.elasticsearch.action.search.SearchRequest# + copy: + - from: arg(*) + to: this + + # new UpdateRequest(index, id) -> tainted index/id land on the new request + - function: org.elasticsearch.action.update.UpdateRequest# + copy: + - from: arg(*) + to: this + + # BoolQueryBuilder.must(query) -> fluent; nested tainted query enters the bool + - function: org.elasticsearch.index.query.BoolQueryBuilder#must + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # BoolQueryBuilder.should(query) -> fluent; nested tainted query enters the bool + - function: org.elasticsearch.index.query.BoolQueryBuilder#should + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # QueryBuilders.existsQuery(name) -> query built from the tainted field name + - function: org.elasticsearch.index.query.QueryBuilders#existsQuery + copy: + - from: arg(*) + to: result + + # QueryBuilders.prefixQuery(name, prefix) -> query from tainted name/prefix + - function: org.elasticsearch.index.query.QueryBuilders#prefixQuery + copy: + - from: arg(*) + to: result + + # QueryBuilders.queryStringQuery(queryString) -> query from tainted string + - function: org.elasticsearch.index.query.QueryBuilders#queryStringQuery + copy: + - from: arg(*) + to: result + + # QueryBuilders.rangeQuery(name) -> query from the tainted field name + - function: org.elasticsearch.index.query.QueryBuilders#rangeQuery + copy: + - from: arg(*) + to: result + + # QueryBuilders.termsQuery(name, values...) -> query from tainted name/values + - function: org.elasticsearch.index.query.QueryBuilders#termsQuery + copy: + - from: arg(*) + to: result + + # RangeQueryBuilder.from(value) -> fluent; tainted bound enters the query + - function: org.elasticsearch.index.query.RangeQueryBuilder#from + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # RangeQueryBuilder.to(value) -> fluent; tainted bound enters the query + - function: org.elasticsearch.index.query.RangeQueryBuilder#to + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # SearchSourceBuilder.from(offset) -> fluent; tainted value enters the source + - function: org.elasticsearch.search.builder.SearchSourceBuilder#from + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # SearchSourceBuilder.size(size) -> fluent; tainted value enters the source + - function: org.elasticsearch.search.builder.SearchSourceBuilder#size + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # new FieldSortBuilder(field) -> tainted field name lands on the sort builder + - function: org.elasticsearch.search.sort.FieldSortBuilder# + copy: + - from: arg(*) + to: this + + # SortOrder.valueOf(name) -> enum parsed from the tainted string + - function: org.elasticsearch.search.sort.SortOrder#valueOf + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/org.graalvm.polyglot.yaml b/projects/extensions/conductor/passthrough/org.graalvm.polyglot.yaml new file mode 100644 index 0000000..ed9cca0 --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.graalvm.polyglot.yaml @@ -0,0 +1,89 @@ +# OpenTaint passThrough approximations — org.graalvm.polyglot (GraalJS interop) +# +# Models taint copies on the JS-eval flow so a tainted script string propagates +# into the Source handed to Context.eval, and so eval results carry taint back. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +# Methods that collide with a BUILT-IN passThrough are dropped from this file +# (see the task report) because a colliding target errors at config load. +passThrough: + # Source.newBuilder(language, CharSequence source, name) -> Source$Builder + # The script text (arg(1)) flows into the builder. + - function: org.graalvm.polyglot.Source#newBuilder + copy: + - from: arg(1) + to: result + + # Source.create(language, CharSequence source) -> Source + - function: org.graalvm.polyglot.Source#create + copy: + - from: arg(1) + to: result + + # Source.Builder.buildLiteral() -> Source : tainted builder yields tainted Source + - function: org.graalvm.polyglot.Source$Builder#buildLiteral + copy: + - from: this + to: result + + # Source.Builder.build() -> Source : tainted builder yields tainted Source + - function: org.graalvm.polyglot.Source$Builder#build + copy: + - from: this + to: result + + # Source.Builder.cached(boolean) -> Source$Builder : fluent, returns this + - function: org.graalvm.polyglot.Source$Builder#cached + copy: + - from: this + to: result + + # Context.eval(...) -> Value : tainted Source/script yields a tainted result Value + - function: org.graalvm.polyglot.Context#eval + copy: + - from: arg(0) + to: result + - from: arg(1) + to: result + + # Value.as(Class) -> unwrapped tainted result + - function: org.graalvm.polyglot.Value#as + copy: + - from: this + to: result + + # Value.asString() -> tainted string of the result + - function: org.graalvm.polyglot.Value#asString + copy: + - from: this + to: result + + # Value.asBoolean() -> tainted result + - function: org.graalvm.polyglot.Value#asBoolean + copy: + - from: this + to: result + + # Value.asInt() -> tainted result + - function: org.graalvm.polyglot.Value#asInt + copy: + - from: this + to: result + + # Value.asLong() -> tainted result + - function: org.graalvm.polyglot.Value#asLong + copy: + - from: this + to: result + + # Value.asDouble() -> tainted result + - function: org.graalvm.polyglot.Value#asDouble + copy: + - from: this + to: result + + # Value.putMember(identifier, value) -> tainted member value enters the receiver + - function: org.graalvm.polyglot.Value#putMember + copy: + - from: arg(1) + to: this diff --git a/projects/extensions/conductor/passthrough/org.opensearch.client.opensearch.yaml b/projects/extensions/conductor/passthrough/org.opensearch.client.opensearch.yaml new file mode 100644 index 0000000..d0599c8 --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.opensearch.client.opensearch.yaml @@ -0,0 +1,69 @@ +# Pass-through approximations for OpenSearch java-client request builders. +# +# Models the dropped (not-yet-approximated) org.opensearch.client.opensearch +# methods listed in org_opensearch_client_opensearch-passthrough.yaml. Each is a +# fluent request-builder setter: the tainted argument (id / operations / index / +# document) flows onto the receiver builder and back through the fluent return so +# subsequent .build() carries the taint into the constructed request. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # BulkRequest.Builder.operations(ops) -> bulk operations carry tainted docs/ids + - function: org.opensearch.client.opensearch.core.BulkRequest$Builder#operations + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # DeleteRequest.Builder.id(id) -> tainted document id into the request + - function: org.opensearch.client.opensearch.core.DeleteRequest$Builder#id + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # GetRequest.Builder.id(id) -> tainted document id into the request + - function: org.opensearch.client.opensearch.core.GetRequest$Builder#id + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # IndexRequest.Builder.id(id) -> tainted document id into the request + - function: org.opensearch.client.opensearch.core.IndexRequest$Builder#id + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # SearchRequest.Builder.index(index) -> tainted index name into the request + - function: org.opensearch.client.opensearch.core.SearchRequest$Builder#index + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # UpdateRequest.Builder.id(id) -> tainted document id into the request + - function: org.opensearch.client.opensearch.core.UpdateRequest$Builder#id + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result diff --git a/projects/extensions/conductor/passthrough/org.opensearch.yaml b/projects/extensions/conductor/passthrough/org.opensearch.yaml new file mode 100644 index 0000000..315de6c --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.opensearch.yaml @@ -0,0 +1,168 @@ +# Pass-through approximations for org.opensearch query / index / request builders. +# +# Models the dropped (not-yet-approximated) org.opensearch methods listed in the +# tracking unit org_opensearch-passthrough.yaml. These mirror the Elasticsearch +# builders: tainted terms/fields/values are carried into the constructed query, +# request, or builder object (and returned from fluent setters / static factories). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # --- Action request constructors: tainted index/id/source enters the request --- + + # new DeleteRequest(index, id) -> request carries the tainted index/id + - function: org.opensearch.action.delete.DeleteRequest# + copy: + - from: arg(*) + to: this + + # new GetRequest(index, id) -> request carries the tainted index/id + - function: org.opensearch.action.get.GetRequest# + copy: + - from: arg(*) + to: this + + # new SearchRequest(indices...) -> request carries the tainted indices + - function: org.opensearch.action.search.SearchRequest# + copy: + - from: arg(*) + to: this + + # new UpdateRequest(index, id) -> request carries the tainted index/id + - function: org.opensearch.action.update.UpdateRequest# + copy: + - from: arg(*) + to: this + + # --- IndexRequest fluent setters: arg into receiver and returned builder --- + + # IndexRequest.id(id) -> tainted id stored, returns this + - function: org.opensearch.action.index.IndexRequest#id + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # IndexRequest.source(...) -> tainted document source stored, returns this + - function: org.opensearch.action.index.IndexRequest#source + copy: + - from: arg(*) + to: this + - from: arg(*) + to: result + - from: this + to: result + + # --- BoolQueryBuilder clause adders: arg into receiver and returned builder --- + + # BoolQueryBuilder.must(query) -> tainted clause added, returns this + - function: org.opensearch.index.query.BoolQueryBuilder#must + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # BoolQueryBuilder.should(query) -> tainted clause added, returns this + - function: org.opensearch.index.query.BoolQueryBuilder#should + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # --- QueryBuilders static factories: tainted field/value into the new query --- + + # QueryBuilders.existsQuery(field) -> query over the tainted field + - function: org.opensearch.index.query.QueryBuilders#existsQuery + copy: + - from: arg(*) + to: result + + # QueryBuilders.prefixQuery(field, prefix) -> query over the tainted field/prefix + - function: org.opensearch.index.query.QueryBuilders#prefixQuery + copy: + - from: arg(*) + to: result + + # QueryBuilders.queryStringQuery(queryString) -> query over the tainted string + - function: org.opensearch.index.query.QueryBuilders#queryStringQuery + copy: + - from: arg(*) + to: result + + # QueryBuilders.rangeQuery(field) -> range query over the tainted field + - function: org.opensearch.index.query.QueryBuilders#rangeQuery + copy: + - from: arg(*) + to: result + + # QueryBuilders.termsQuery(field, values...) -> query over the tainted field/values + - function: org.opensearch.index.query.QueryBuilders#termsQuery + copy: + - from: arg(*) + to: result + + # --- RangeQueryBuilder bound setters: arg into receiver and returned builder --- + + # RangeQueryBuilder.from(value) -> tainted lower bound stored, returns this + - function: org.opensearch.index.query.RangeQueryBuilder#from + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # RangeQueryBuilder.to(value) -> tainted upper bound stored, returns this + - function: org.opensearch.index.query.RangeQueryBuilder#to + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # --- SearchSourceBuilder paging setters: arg into receiver and returned builder --- + + # SearchSourceBuilder.from(offset) -> tainted offset stored, returns this + - function: org.opensearch.search.builder.SearchSourceBuilder#from + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # SearchSourceBuilder.size(size) -> tainted size stored, returns this + - function: org.opensearch.search.builder.SearchSourceBuilder#size + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result + + # --- FieldSortBuilder: tainted sort field name enters the builder --- + + # new FieldSortBuilder(field) -> builder carries the tainted field name + - function: org.opensearch.search.sort.FieldSortBuilder# + copy: + - from: arg(*) + to: this + + # SortOrder.valueOf(name) -> enum parsed from the tainted name + - function: org.opensearch.search.sort.SortOrder#valueOf + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/org.redisson.api.yaml b/projects/extensions/conductor/passthrough/org.redisson.api.yaml new file mode 100644 index 0000000..fb0cde1 --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.redisson.api.yaml @@ -0,0 +1,12 @@ +# Pass-through approximations for org.redisson.api methods. +# +# Models the dropped (not-yet-approximated) Redisson methods listed in the +# tracking unit org_redisson_api-passthrough.yaml. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # RedissonClient.getLock(name) -> RLock carrying the tainted lock name + - function: org.redisson.api.RedissonClient#getLock + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/org.springframework.data.redis.core.yaml b/projects/extensions/conductor/passthrough/org.springframework.data.redis.core.yaml new file mode 100644 index 0000000..7518f9c --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.springframework.data.redis.core.yaml @@ -0,0 +1,22 @@ +# Pass-through approximations for Spring Data Redis SetOperations methods. +# +# Models the dropped (not-yet-approximated) methods listed in the tracking +# unit org_springframework_data_redis_core-passthrough.yaml. The tainted key +# (and membership value) supplied as arguments are carried into Redis through +# the receiver, so taint flows arg -> receiver (this). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # SetOperations.isMember(key, value) -> tainted key/value carried into Redis + - function: org.springframework.data.redis.core.SetOperations#isMember + copy: + - from: arg(0) + to: this + - from: arg(1) + to: this + + # SetOperations.size(key) -> tainted key carried into Redis + - function: org.springframework.data.redis.core.SetOperations#size + copy: + - from: arg(0) + to: this diff --git a/projects/extensions/conductor/passthrough/org.springframework.jdbc.core.yaml b/projects/extensions/conductor/passthrough/org.springframework.jdbc.core.yaml new file mode 100644 index 0000000..dae4253 --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.springframework.jdbc.core.yaml @@ -0,0 +1,41 @@ +# Pass-through approximations for Spring JdbcTemplate query/update methods. +# +# Models the dropped org.springframework.jdbc.core methods from the tracking +# unit org_springframework_jdbc_core-passthrough.yaml. These methods take a +# tainted SQL string (and parameter args) and execute it against the DB; the +# returned rows carry that taint back out. This is the SQL-injection carrier +# path: tainted SQL/args flow arg -> result. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # JdbcTemplate.query(sql, ...) -> result rows derived from the tainted SQL/args + - function: org.springframework.jdbc.core.JdbcTemplate#query + copy: + - from: arg(0) + to: result + - from: arg(*) + to: result + + # JdbcTemplate.queryForList(sql, ...) -> list of rows from the tainted SQL/args + - function: org.springframework.jdbc.core.JdbcTemplate#queryForList + copy: + - from: arg(0) + to: result + - from: arg(*) + to: result + + # JdbcTemplate.queryForObject(sql, ...) -> single value from the tainted SQL/args + - function: org.springframework.jdbc.core.JdbcTemplate#queryForObject + copy: + - from: arg(0) + to: result + - from: arg(*) + to: result + + # JdbcTemplate.update(sql, ...) -> affected-row count from the tainted SQL/args + - function: org.springframework.jdbc.core.JdbcTemplate#update + copy: + - from: arg(0) + to: result + - from: arg(*) + to: result diff --git a/projects/extensions/conductor/passthrough/org.springframework.web.passthrough.yaml b/projects/extensions/conductor/passthrough/org.springframework.web.passthrough.yaml new file mode 100644 index 0000000..aa81b4a --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.springframework.web.passthrough.yaml @@ -0,0 +1,33 @@ +# Pass-through approximations for Spring web helper methods on SSRF carrier paths. +# +# Models the dropped (not-yet-approximated) org.springframework.web methods listed +# in the tracking unit org_springframework_web-passthrough.yaml. Each is a simple +# copy of taint from an argument/receiver into the produced value (or back onto the +# builder), keeping a tainted URL/entity flowing into the outbound HTTP call and the +# tainted response flowing back out. +# +# File name is org.springframework.web.passthrough.yaml to avoid colliding with the +# existing org.springframework.yaml. +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # RestTemplate.exchange(...) carries the tainted URL/entity into the outbound + # HTTP call (SSRF carrier) and returns the tainted ResponseEntity. Across all + # overloads the request inputs are the arguments and the response is the result, + # so any tainted argument flows to the result. + - function: org.springframework.web.client.RestTemplate#exchange + copy: + - from: arg(*) + to: result + + # ContentNegotiationConfigurer.defaultContentType(MediaType...) is a builder + # method: the tainted argument is stored on the configurer and the configurer is + # returned for chaining. + - function: org.springframework.web.servlet.config.annotation.ContentNegotiationConfigurer#defaultContentType + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + - from: this + to: result diff --git a/projects/extensions/conductor/passthrough/org.springframework.yaml b/projects/extensions/conductor/passthrough/org.springframework.yaml new file mode 100644 index 0000000..013d830 --- /dev/null +++ b/projects/extensions/conductor/passthrough/org.springframework.yaml @@ -0,0 +1,34 @@ +# OpenTaint passThrough approximations — org.springframework (misc helpers) +# +# Spring helper methods on HTTP / scheduling / bean-copy flows. Each is a simple +# copy of taint from the tainted input to where the value flows out: +# - BeanUtils.copyProperties: tainted source object -> target object +# - HttpMethod.valueOf / MediaType.valueOf: tainted String -> parsed value +# - CronExpression.parse: tainted cron String -> parsed CronExpression +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # BeanUtils.copyProperties(source, target): copies source's properties into target. + # Tainted source object carries taint into the target object (arg(1)). + - function: org.springframework.beans.BeanUtils#copyProperties + copy: + - from: arg(0) + to: arg(1) + + # HttpMethod.valueOf(String): tainted method string -> returned HttpMethod. + - function: org.springframework.http.HttpMethod#valueOf + copy: + - from: arg(0) + to: result + + # MediaType.valueOf(String): tainted media-type string -> returned MediaType. + - function: org.springframework.http.MediaType#valueOf + copy: + - from: arg(0) + to: result + + # CronExpression.parse(String): tainted cron expression -> returned CronExpression. + - function: org.springframework.scheduling.support.CronExpression#parse + copy: + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/passthrough/redis.clients.jedis.yaml b/projects/extensions/conductor/passthrough/redis.clients.jedis.yaml new file mode 100644 index 0000000..5f54b51 --- /dev/null +++ b/projects/extensions/conductor/passthrough/redis.clients.jedis.yaml @@ -0,0 +1,263 @@ +# OpenTaint passThrough approximations — redis.clients.jedis +# +# Models the dropped Jedis read/write operations from the tracking unit +# redis_clients_jedis-passthrough.yaml. +# +# Semantics (per signatures in jedis-6.0.0 / 3.6.0): +# Writes (set/hset/sadd/rpush/zadd): tainted key/value args flow INTO the +# store (the receiver). Modeled arg(*) -> this. These ops return a +# status String ("OK") or a long count, never the written value, so +# no copy to result. +# Reads (get/hget/lrange/zrange/zrangeByScore): return the tainted value +# held by the store. Modeled this -> result. +# Scans (hscan/sscan): return a ScanResult backed by the store. this -> +# result, and the ScanResult ctor carries its result-list arg out. +# Mutators(del/expire/hdel/ltrim/srem/zrem/zremrangeByScore): tainted key/ +# member args flow into the op against the receiver. arg(*) -> this. +# +# Function matchers are name-based (cover all overloads). copy: arg(*) covers +# the value arg regardless of its position across overloads. +# +# OVERRIDE mode, additive per method: a rule that collides with a built-in +# simply overrides it; it does not error at load. +passThrough: + # ---------------- Jedis ---------------- + - function: redis.clients.jedis.Jedis#del + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#expire + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#hdel + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#hget + copy: + - from: this + to: result + - function: redis.clients.jedis.Jedis#hscan + copy: + - from: this + to: result + - function: redis.clients.jedis.Jedis#hset + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#lrange + copy: + - from: this + to: result + - function: redis.clients.jedis.Jedis#ltrim + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#rpush + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#sadd + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#set + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#srem + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#sscan + copy: + - from: this + to: result + - function: redis.clients.jedis.Jedis#zadd + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#zrange + copy: + - from: this + to: result + - function: redis.clients.jedis.Jedis#zrangeByScore + copy: + - from: this + to: result + - function: redis.clients.jedis.Jedis#zrem + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.Jedis#zremrangeByScore + copy: + - from: arg(*) + to: this + + # ---------------- JedisCluster ---------------- + - function: redis.clients.jedis.JedisCluster#del + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#expire + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#get + copy: + - from: this + to: result + - function: redis.clients.jedis.JedisCluster#hdel + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#hget + copy: + - from: this + to: result + - function: redis.clients.jedis.JedisCluster#hscan + copy: + - from: this + to: result + - function: redis.clients.jedis.JedisCluster#hset + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#lrange + copy: + - from: this + to: result + - function: redis.clients.jedis.JedisCluster#ltrim + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#rpush + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#sadd + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#set + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#srem + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#sscan + copy: + - from: this + to: result + - function: redis.clients.jedis.JedisCluster#zadd + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#zrange + copy: + - from: this + to: result + - function: redis.clients.jedis.JedisCluster#zrangeByScore + copy: + - from: this + to: result + - function: redis.clients.jedis.JedisCluster#zrem + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.JedisCluster#zremrangeByScore + copy: + - from: arg(*) + to: this + + # ---------------- UnifiedJedis ---------------- + - function: redis.clients.jedis.UnifiedJedis#del + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#expire + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#get + copy: + - from: this + to: result + - function: redis.clients.jedis.UnifiedJedis#hdel + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#hget + copy: + - from: this + to: result + - function: redis.clients.jedis.UnifiedJedis#hscan + copy: + - from: this + to: result + - function: redis.clients.jedis.UnifiedJedis#hset + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#lrange + copy: + - from: this + to: result + - function: redis.clients.jedis.UnifiedJedis#ltrim + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#rpush + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#sadd + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#set + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#srem + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#sscan + copy: + - from: this + to: result + - function: redis.clients.jedis.UnifiedJedis#zadd + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#zrange + copy: + - from: this + to: result + - function: redis.clients.jedis.UnifiedJedis#zrangeByScore + copy: + - from: this + to: result + - function: redis.clients.jedis.UnifiedJedis#zrem + copy: + - from: arg(*) + to: this + - function: redis.clients.jedis.UnifiedJedis#zremrangeByScore + copy: + - from: arg(*) + to: this + + # ---------------- ScanResult ---------------- + # ScanResult(cursor, List result): the tainted result list (arg(1)) is + # held by the ScanResult and handed back via getResult(). Carry it onto the + # constructed object and the produced value. + - function: redis.clients.jedis.resps.ScanResult# + copy: + - from: arg(1) + to: this + - from: arg(1) + to: result diff --git a/projects/extensions/conductor/passthrough/software.amazon.awssdk.yaml b/projects/extensions/conductor/passthrough/software.amazon.awssdk.yaml new file mode 100644 index 0000000..c0b77ba --- /dev/null +++ b/projects/extensions/conductor/passthrough/software.amazon.awssdk.yaml @@ -0,0 +1,95 @@ +# Pass-through approximations for AWS SDK v2 request builders / RequestBody factories. +# +# Models the dropped (not-yet-approximated) software.amazon.awssdk methods listed +# in the tracking unit software_amazon_awssdk-passthrough.yaml. +# +# RequestBody.fromString / fromInputStream are static factories: the tainted +# argument (the body content / stream) flows into the produced RequestBody. +# All other targets are fluent builder setters: the tainted argument flows onto +# the builder (this, for chaining) and onto the returned builder (result). +# +# OVERRIDE mode: merged with built-ins at the rule level (per-method, additive). +passThrough: + # RequestBody.fromString(String) -> RequestBody carrying the tainted body + - function: software.amazon.awssdk.core.sync.RequestBody#fromString + copy: + - from: arg(0) + to: result + + # RequestBody.fromInputStream(InputStream, long) -> RequestBody carrying the tainted stream + - function: software.amazon.awssdk.core.sync.RequestBody#fromInputStream + copy: + - from: arg(0) + to: result + + # CompleteMultipartUploadRequest.Builder.uploadId(String) -> tainted builder + - function: software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest$Builder#uploadId + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # CompletedPart.Builder.eTag(String) -> tainted builder + - function: software.amazon.awssdk.services.s3.model.CompletedPart$Builder#eTag + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # GetObjectRequest.Builder.key(String) -> tainted builder + - function: software.amazon.awssdk.services.s3.model.GetObjectRequest$Builder#key + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # PutObjectRequest.Builder.key(String) -> tainted builder + - function: software.amazon.awssdk.services.s3.model.PutObjectRequest$Builder#key + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # UploadPartRequest.Builder.uploadId(String) -> tainted builder + - function: software.amazon.awssdk.services.s3.model.UploadPartRequest$Builder#uploadId + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # DeleteMessageBatchRequestEntry.Builder.id(String) -> tainted builder + - function: software.amazon.awssdk.services.sqs.model.DeleteMessageBatchRequestEntry$Builder#id + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # DeleteMessageBatchRequestEntry.Builder.receiptHandle(String) -> tainted builder + - function: software.amazon.awssdk.services.sqs.model.DeleteMessageBatchRequestEntry$Builder#receiptHandle + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # SendMessageBatchRequestEntry.Builder.id(String) -> tainted builder + - function: software.amazon.awssdk.services.sqs.model.SendMessageBatchRequestEntry$Builder#id + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result + + # SendMessageBatchRequestEntry.Builder.messageBody(String) -> tainted builder + - function: software.amazon.awssdk.services.sqs.model.SendMessageBatchRequestEntry$Builder#messageBody + copy: + - from: arg(0) + to: this + - from: arg(0) + to: result diff --git a/projects/extensions/conductor/rules/java/lib/generic/graalvm-polyglot-sinks.yaml b/projects/extensions/conductor/rules/java/lib/generic/graalvm-polyglot-sinks.yaml new file mode 100644 index 0000000..60bda71 --- /dev/null +++ b/projects/extensions/conductor/rules/java/lib/generic/graalvm-polyglot-sinks.yaml @@ -0,0 +1,29 @@ +rules: + - id: graalvm-polyglot-eval + options: + lib: true + severity: NOTE + message: Untrusted script/Source reaches GraalVM polyglot Context.eval + metadata: + provenance: https://www.graalvm.org/sdk/javadoc/org/graalvm/polyglot/Context.html + languages: + - java + mode: taint + pattern-sinks: + - patterns: + - pattern-either: + # eval(Source) overload — the Source carries the tainted script + - patterns: + - pattern: (org.graalvm.polyglot.Context $CTX).eval($SOURCE) + - focus-metavariable: $SOURCE + # eval(String languageId, CharSequence source) overload — focus the source arg + - patterns: + - pattern: (org.graalvm.polyglot.Context $CTX).eval($LANG, $SOURCE) + - focus-metavariable: $SOURCE + # Source construction from a tainted script feeds the eval; treat the + # built Source as the tainted sink-bearing value too. + - patterns: + - pattern-either: + - pattern: org.graalvm.polyglot.Source.newBuilder($LANG, $SOURCE, ...) + - pattern: org.graalvm.polyglot.Source.create($LANG, $SOURCE) + - focus-metavariable: $SOURCE diff --git a/projects/extensions/conductor/rules/java/lib/generic/jackson-jq.yaml b/projects/extensions/conductor/rules/java/lib/generic/jackson-jq.yaml new file mode 100644 index 0000000..07edec3 --- /dev/null +++ b/projects/extensions/conductor/rules/java/lib/generic/jackson-jq.yaml @@ -0,0 +1,16 @@ +rules: + - id: jackson-jq + options: + lib: true + severity: NOTE + message: jackson-jq JsonQuery.compile jq expression sink + languages: + - java + mode: taint + pattern-sinks: + - patterns: + - pattern-either: + - pattern: net.thisptr.jackson.jq.JsonQuery.compile($UNTRUSTED, ...) + - pattern: net.thisptr.jackson.jq.JsonQuery.compile($UNTRUSTED) + - focus-metavariable: $UNTRUSTED + from: $UNTRUSTED diff --git a/projects/extensions/conductor/rules/java/lib/spring/conductor-grpc-request-sources.yaml b/projects/extensions/conductor/rules/java/lib/spring/conductor-grpc-request-sources.yaml new file mode 100644 index 0000000..7f24a0a --- /dev/null +++ b/projects/extensions/conductor/rules/java/lib/spring/conductor-grpc-request-sources.yaml @@ -0,0 +1,63 @@ +rules: + - id: conductor-grpc-request-sources + severity: ERROR + message: >- + A value read from a protobuf request message handled by a Conductor gRPC + service (a class extending a generated *Grpc.*ImplBase) is fully + attacker-controlled and flows into a dangerous filesystem path operation. + metadata: + cwe: CWE-22 + short-description: Untrusted Conductor gRPC request reaching a filesystem sink + source-type: conductor-grpc + references: + - com.netflix.conductor.grpc.server.service.WorkflowServiceImpl + full-description: |- + gRPC service implementations in Conductor extend a protoc/grpc-generated + base class named `*Grpc.*ImplBase` (for example + `com.netflix.conductor.grpc.WorkflowServiceGrpc.WorkflowServiceImplBase`). + Each handler receives a protobuf request message whose declared type + extends `com.google.protobuf.GeneratedMessageV3` (and implements + `com.google.protobuf.MessageOrBuilder`). Every value read from that + request via a getter such as `req.getName()` / `req.getInput()` is + attacker-controlled. This rule marks those getter results as an untrusted + taint source, scoped to gRPC handler classes, so they fire when reaching a + dangerous sink (here: filesystem path operations / path traversal). + + Recommended handling at the boundary: + - Treat all gRPC request fields as untrusted. + - Validate and normalize identifiers/paths before any unsafe usage. + languages: [java] + mode: taint + pattern-sources: + # The result of a getter call on the protobuf request object + # (`req.getXxx()`) is the attacker-controlled source. The receiver type is + # intentionally left unconstrained: the analyzer does not resolve the + # protoc-generated request subtype back to GeneratedMessageV3 for + # type-constrained metavariable matching, but it does match the getter call + # shape. Precision instead comes from two constraints: + # * the getter-name regex (^get.+), and + # * the enclosing-class constraint (extends *.*ImplBase), which is what + # distinguishes a real gRPC handler from an ordinary getter call and is + # load-bearing (without it, e.g. System.getProperty(...) would match). + - patterns: + - pattern-inside: | + class $C extends $OUTER.$IMPLBASE { ... } + - metavariable-regex: + metavariable: $IMPLBASE + regex: .*ImplBase$ + - pattern: $REQ.$GETTER(...) + - metavariable-regex: + metavariable: $GETTER + regex: ^get.+ + pattern-sinks: + - patterns: + - pattern-either: + - pattern: new java.io.File($SINK) + - pattern: new java.io.File($SINK, ...) + - pattern: new java.io.FileInputStream($SINK) + - pattern: new java.io.FileReader($SINK) + - pattern: new java.io.FileWriter($SINK) + - pattern: new java.io.RandomAccessFile($SINK, ...) + - pattern: java.nio.file.Paths.get($SINK, ...) + - pattern: java.nio.file.Path.of($SINK, ...) + - focus-metavariable: $SINK diff --git a/projects/extensions/conductor/rules/java/security/conductor-jq-injection.yaml b/projects/extensions/conductor/rules/java/security/conductor-jq-injection.yaml new file mode 100644 index 0000000..1893391 --- /dev/null +++ b/projects/extensions/conductor/rules/java/security/conductor-jq-injection.yaml @@ -0,0 +1,28 @@ +rules: + - id: conductor-jq-injection + severity: ERROR + message: >- + Untrusted input flows into a jackson-jq query expression compiled by + JsonQuery.compile. An attacker who controls this value can run arbitrary + jq programs, leading to information disclosure, data extraction, or + denial of service. + metadata: + cwe: CWE-94 + short-description: Expression injection via jackson-jq JsonQuery.compile + languages: + - java + mode: join + join: + refs: + - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source + as: spring-source + - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + as: servlet-source + - rule: java/lib/generic/jackson-jq.yaml#jackson-jq + as: sink + on: + - 'spring-source.$UNTRUSTED -> sink.$UNTRUSTED' + - 'servlet-source.$UNTRUSTED -> sink.$UNTRUSTED' + comment: >- + jq expression injection: untrusted input reaches + net.thisptr.jackson.jq.JsonQuery.compile. diff --git a/projects/extensions/conductor/rules/java/security/graaljs-polyglot-code-injection.yaml b/projects/extensions/conductor/rules/java/security/graaljs-polyglot-code-injection.yaml new file mode 100644 index 0000000..ff1bc2d --- /dev/null +++ b/projects/extensions/conductor/rules/java/security/graaljs-polyglot-code-injection.yaml @@ -0,0 +1,39 @@ +rules: + - id: graaljs-polyglot-code-injection + severity: ERROR + message: >- + Untrusted input flows into a GraalVM polyglot script evaluated by + org.graalvm.polyglot.Context.eval. An attacker who controls the script + text (directly, or via a Source built from it) can execute arbitrary + code in the embedded language runtime (JavaScript/Python), leading to + remote code execution. + metadata: + cwe: CWE-94 + short-description: Code injection via GraalVM polyglot Context.eval + full-description: |- + GraalVM polyglot code injection occurs when untrusted input reaches + `org.graalvm.polyglot.Context.eval(...)`, either through the + `eval(Source)` overload (where the `Source` is built from the tainted + script via `Source.newBuilder(lang, script, ...)`) or the + `eval(String languageId, CharSequence source)` overload. The evaluated + text is fully attacker-controlled program code. + references: + - https://owasp.org/www-community/attacks/Code_Injection + - https://www.graalvm.org/sdk/javadoc/org/graalvm/polyglot/Context.html + languages: + - java + mode: join + join: + refs: + - rule: java/lib/spring/untrusted-data-source.yaml#spring-untrusted-data-source + as: spring-source + - rule: java/lib/generic/servlet-untrusted-data-source.yaml#java-servlet-untrusted-data-source + as: servlet-source + - rule: java/lib/generic/graalvm-polyglot-sinks.yaml#graalvm-polyglot-eval + as: sink + on: + - 'spring-source.$UNTRUSTED -> sink.$SOURCE' + - 'servlet-source.$UNTRUSTED -> sink.$SOURCE' + comment: >- + GraalVM polyglot code injection: untrusted input reaches + org.graalvm.polyglot.Context.eval (Source or String overload). diff --git a/projects/repos.yaml b/projects/repos.yaml index 45a4cab..dd447db 100644 --- a/projects/repos.yaml +++ b/projects/repos.yaml @@ -1,118 +1,132 @@ repositories: - - name: seqra-java-spring-demo - git: https://github.com/seqra/seqra-java-spring-demo.git - head: 644cb1e6aa0152d193015456cf4de8a121750312 +# - name: seqra-java-spring-demo +# git: https://github.com/seqra/seqra-java-spring-demo.git +# head: 644cb1e6aa0152d193015456cf4de8a121750312 +# java-version: 21 +# +# - name: Stirling-PDF +# git: https://github.com/Stirling-Tools/Stirling-PDF.git +# head: d80e627899daf804f1390a0b75a1da3fd093aa84 +# +# - name: hertzbeat +# git: https://github.com/apache/hertzbeat.git +# head: 668106d445fe69e7128b2bb14f3e21c181bd8c55 +# +# - name: DWSurvey +# git: https://github.com/wkeyuan/DWSurvey.git +# head: 6a7b10e90f4e39c49b5afe4177ed23836953006b +# +# - name: CordysCRM +# git: https://github.com/1Panel-dev/CordysCRM.git +# head: bb19880b457da3b59b771a42aa98812919bd99e3 +# java-version: 21 +# +# - name: jpress +# git: https://github.com/JPressProjects/jpress.git +# head: 9902ee392656f8c5939d48f9c1c2d809e25a4ea1 +# +# - name: MCMS +# git: https://github.com/ming-soft/MCMS.git +# head: d3d8b5257370dc15c0e0c2aea69754d46e051336 +# +# - name: openmrs-core +# git: https://github.com/openmrs/openmrs-core.git +# head: 494c57b334249fb4230d58986319deca21cb4151 +# java-version: 21 +# +# - name: ruoyi-vue-pro +# git: https://github.com/YunaiV/ruoyi-vue-pro.git +# head: b29c5f0e40cfab289613992ddaddfb08d60a3e44 +# +# - name: spring-petclinic +# git: https://github.com/spring-projects/spring-petclinic.git +# head: 3e1ce239f4488f20abda24441388a515ea55a815 +# +# - name: tms +# git: https://github.com/xiweicheng/tms.git +# head: 9510b7cca4cd1f82114e5df0c13a491da4d8d818 +# java-version: 8 +# +# - name: thingsboard +# git: https://github.com/thingsboard/thingsboard.git +# head: b31176433e8065ae4cb2a285bcd5e03fce9014f5 +# max-memory: 12G +# +# - name: yudao-cloud +# git: https://github.com/YunaiV/yudao-cloud.git +# head: 99ffe0fd41c2b783ef83e6c8704e6e264603b484 +# max-memory: 12G +# +# - name: halo +# git: https://github.com/halo-dev/halo.git +# head: 48e191daaa16953acc4db298b2aad8c485466ccd +# java-version: 21 +# +# - name: kkFileView +# git: https://github.com/kekingcn/kkFileView.git +# head: 92ca92bee6d4682f2eb6f388174d39afd2263874 +# java-version: 21 +# +# - name: apollo +# git: https://github.com/apolloconfig/apollo.git +# head: 9de9a1580f5eb3d9bfa7a2b95b95c693bf2032a4 +# +# - name: snowy +# git: https://github.com/xiaonuobase/Snowy.git +# head: 315e239c9db373a2de5f7298c9623f2ab5424d5c +# +# - name: litemall +# git: https://github.com/linlinjava/litemall.git +# head: a1ef964a718b7277925b19ea26afe78ea3a1d325 +# +# - name: shopizer +# git: https://github.com/shopizer-ecommerce/shopizer.git +# head: 6a4a0a65a3408ee8f62597b51d1b3aac24b77dee +# +# - name: maku-boot +# git: https://github.com/makunet/maku-boot.git +# head: f9dd43c8913d5acbb46d061e4a9fc47c42f3034f +# +# - name: jeesite5 +# git: https://github.com/thinkgem/jeesite5.git +# head: 7be0a1c5bd5349933e7e75c97e4f6bd1d529725e +# +# - name: WebGoat +# git: https://github.com/WebGoat/WebGoat.git +# head: d4238ab406f27eea4aff8c86443cafbc220431c4 +# java-version: 25 +# +# - name: joyagent-jdgenie +# git: https://github.com/jd-opensource/joyagent-jdgenie.git +# head: 7142f4156cb6237cfef0b44a3a99d66e43fa3a0f +# +# - name: klaw +# git: https://github.com/Aiven-Open/klaw.git +# head: c4b1188d0e388c2459eecef1e3ac6ab0dce6e7e3 +# +# - name: spring-boot-seckill +# git: https://github.com/halegreen/spring-boot-seckill.git +# head: a0b979bbc533d9e2cae198f9e6f708b8c39861ea +# java-version: 8 +# +# - name: roncoo-education +# git: https://github.com/roncoo/roncoo-education.git +# head: 10e96084023a3488014e7e796cd3073edcd9ba1a +# +# - name: continew-admin +# git: https://github.com/continew-org/continew-admin.git +# head: 9776dfbf311d652b30a4599ac7989c35b29da372 + + - name: conductor + git: https://github.com/conductor-oss/conductor.git + head: c466eaf70f3ac16b915120dd651e91aae28451c1 java-version: 21 - - - name: Stirling-PDF - git: https://github.com/Stirling-Tools/Stirling-PDF.git - head: d80e627899daf804f1390a0b75a1da3fd093aa84 - - - name: hertzbeat - git: https://github.com/apache/hertzbeat.git - head: 668106d445fe69e7128b2bb14f3e21c181bd8c55 - - - name: DWSurvey - git: https://github.com/wkeyuan/DWSurvey.git - head: 6a7b10e90f4e39c49b5afe4177ed23836953006b - - - name: CordysCRM - git: https://github.com/1Panel-dev/CordysCRM.git - head: bb19880b457da3b59b771a42aa98812919bd99e3 - java-version: 21 - - - name: jpress - git: https://github.com/JPressProjects/jpress.git - head: 9902ee392656f8c5939d48f9c1c2d809e25a4ea1 - - - name: MCMS - git: https://github.com/ming-soft/MCMS.git - head: d3d8b5257370dc15c0e0c2aea69754d46e051336 - - - name: openmrs-core - git: https://github.com/openmrs/openmrs-core.git - head: 494c57b334249fb4230d58986319deca21cb4151 - java-version: 21 - - - name: ruoyi-vue-pro - git: https://github.com/YunaiV/ruoyi-vue-pro.git - head: b29c5f0e40cfab289613992ddaddfb08d60a3e44 - - - name: spring-petclinic - git: https://github.com/spring-projects/spring-petclinic.git - head: 3e1ce239f4488f20abda24441388a515ea55a815 - - - name: tms - git: https://github.com/xiweicheng/tms.git - head: 9510b7cca4cd1f82114e5df0c13a491da4d8d818 - java-version: 8 - - - name: thingsboard - git: https://github.com/thingsboard/thingsboard.git - head: b31176433e8065ae4cb2a285bcd5e03fce9014f5 - max-memory: 12G - - - name: yudao-cloud - git: https://github.com/YunaiV/yudao-cloud.git - head: 99ffe0fd41c2b783ef83e6c8704e6e264603b484 - max-memory: 12G - - - name: halo - git: https://github.com/halo-dev/halo.git - head: 48e191daaa16953acc4db298b2aad8c485466ccd - java-version: 21 - - - name: kkFileView - git: https://github.com/kekingcn/kkFileView.git - head: 92ca92bee6d4682f2eb6f388174d39afd2263874 - java-version: 21 - - - name: apollo - git: https://github.com/apolloconfig/apollo.git - head: 9de9a1580f5eb3d9bfa7a2b95b95c693bf2032a4 - - - name: snowy - git: https://github.com/xiaonuobase/Snowy.git - head: 315e239c9db373a2de5f7298c9623f2ab5424d5c - - - name: litemall - git: https://github.com/linlinjava/litemall.git - head: a1ef964a718b7277925b19ea26afe78ea3a1d325 - - - name: shopizer - git: https://github.com/shopizer-ecommerce/shopizer.git - head: 6a4a0a65a3408ee8f62597b51d1b3aac24b77dee - - - name: maku-boot - git: https://github.com/makunet/maku-boot.git - head: f9dd43c8913d5acbb46d061e4a9fc47c42f3034f - - - name: jeesite5 - git: https://github.com/thinkgem/jeesite5.git - head: 7be0a1c5bd5349933e7e75c97e4f6bd1d529725e - - - name: WebGoat - git: https://github.com/WebGoat/WebGoat.git - head: d4238ab406f27eea4aff8c86443cafbc220431c4 - java-version: 25 - - - name: joyagent-jdgenie - git: https://github.com/jd-opensource/joyagent-jdgenie.git - head: 7142f4156cb6237cfef0b44a3a99d66e43fa3a0f - - - name: klaw - git: https://github.com/Aiven-Open/klaw.git - head: c4b1188d0e388c2459eecef1e3ac6ab0dce6e7e3 - - - name: spring-boot-seckill - git: https://github.com/halegreen/spring-boot-seckill.git - head: a0b979bbc533d9e2cae198f9e6f708b8c39861ea - java-version: 8 - - - name: roncoo-education - git: https://github.com/roncoo/roncoo-education.git - head: 10e96084023a3488014e7e796cd3073edcd9ba1a - - - name: continew-admin - git: https://github.com/continew-org/continew-admin.git - head: 9776dfbf311d652b30a4599ac7989c35b29da372 + scan-flags: + - --passthrough-approximations + - "{ext}/conductor/passthrough" + - --dataflow-approximations + - "{ext}/conductor/dataflow-src" + - --ruleset + - builtin + - --ruleset + - "{ext}/conductor/rules" diff --git a/scripts/generate_matrix.py b/scripts/generate_matrix.py index a28328d..af6c140 100755 --- a/scripts/generate_matrix.py +++ b/scripts/generate_matrix.py @@ -34,6 +34,21 @@ def _matches_filter(name: str, patterns: list[str]) -> bool: return any(p in name for p in patterns) +def _normalise_scan_flags(raw) -> list[str]: + """Coerce the YAML `scan-flags` field into a clean ``list[str]``. + + Accepts None / missing (→ empty list) or a list of scalars. Any other + shape is a configuration error and raised loudly so the workflow fails + fast instead of silently dropping flags. + """ + if raw is None: + return [] + if not isinstance(raw, list): + raise ValueError( + f"scan-flags must be a list, got {type(raw).__name__}: {raw!r}") + return [str(token) for token in raw] + + def _load_misses(path: str | None) -> set[tuple[str, str]]: if not path: return set() @@ -64,6 +79,10 @@ def build_matrix(repos_path: Path, base_sha: str, new_sha: str, "head": repo["head"], "java_version": str(repo.get("java-version", DEFAULT_JAVA)), "max_memory": str(repo.get("max-memory", DEFAULT_MEMORY)), + # Serialised as JSON so the GH Actions matrix can carry an + # arbitrarily long, space-containing list through a single + # string-valued field. `run_analysis.py` decodes it back. + "scan_flags": json.dumps(_normalise_scan_flags(repo.get("scan-flags"))), "ref_kind": ref_kind, "analyzer_sha": sha, }) diff --git a/scripts/run_analysis.py b/scripts/run_analysis.py index 91a2e85..ca10c86 100755 --- a/scripts/run_analysis.py +++ b/scripts/run_analysis.py @@ -14,7 +14,15 @@ --project-dir /path/to/cloned/project \ --results-dir results/// \ --max-memory 8G \ - [--timeout 1200] + [--timeout 1200] \ + [--extensions-dir projects/extensions] \ + [--scan-flags-json '["--rule-id", "java.taint.sql-injection"]'] + +Extra `opentaint scan` flags may be supplied via ``--scan-flags-json`` +(a JSON-encoded list of CLI tokens). The literal substring ``{ext}`` inside +any token is replaced with the absolute path passed via +``--extensions-dir``, letting projects reference files shipped in this +repository (e.g. pass-through approximations) without hard-coding paths. Exit codes: 0 status.json was written, autobuilder (compile step) succeeded. @@ -35,6 +43,10 @@ import time from pathlib import Path +# Placeholder used inside `scan-flags` entries in repos.yaml. Replaced at +# runtime with the absolute path of the extensions directory. +EXT_PLACEHOLDER = "{ext}" + _LOG_FILE_RE = re.compile(r"Log file:\s*(.+\.log)") # Analyzer prints a periodic sample like: @@ -116,8 +128,53 @@ def _run(cmd: list[str], timeout: int, log_fp) -> tuple[int, str, str, float]: return rc, out, err, dur +def _expand_scan_flags(flags: list[str], extensions_dir: Path | None) -> list[str]: + """Substitute ``{ext}`` in every token with ``extensions_dir`` (absolute). + + Raises a clear error if a token references the placeholder but no + extensions directory was supplied. + """ + if not flags: + return [] + needs_ext = any(EXT_PLACEHOLDER in tok for tok in flags) + if needs_ext and extensions_dir is None: + raise ValueError( + f"scan-flags contains {EXT_PLACEHOLDER!r} but no --extensions-dir " + "was provided") + ext_str = str(extensions_dir.resolve()) if extensions_dir is not None else "" + return [tok.replace(EXT_PLACEHOLDER, ext_str) for tok in flags] + + +def _build_scan_cmd(opentaint: Path, analyzer_jar: Path, rules_dir: Path, + model_dir: Path, sarif: Path, max_memory: str, + scan_timeout_seconds: int, + extra_flags: list[str]) -> list[str]: + """Assemble the ``opentaint scan`` argv. + + The built-in ``--ruleset `` is always passed first. Any + additional ``--ruleset`` entries supplied by the project (via + ``scan-flags`` in ``repos.yaml``) are appended verbatim through + ``extra_flags`` and are *merged* with the built-in pack by the analyzer + — ``--ruleset`` is a ``stringArray``, so repetition is additive rather + than overriding. + """ + return [ + str(opentaint), "scan", "--debug", + "--experimental", + "--analyzer-jar", str(analyzer_jar), + "--ruleset", str(rules_dir), + "--project-model", str(model_dir), + "--output", str(sarif), + "--timeout", f"{scan_timeout_seconds}s", + "--max-memory", max_memory, + *extra_flags, + ] + + def run_pipeline(build_dir: Path, project_dir: Path, results_dir: Path, - max_memory: str, timeout: int) -> dict: + max_memory: str, timeout: int, + scan_flags: list[str] | None = None, + extensions_dir: Path | None = None) -> dict: results_dir.mkdir(parents=True, exist_ok=True) opentaint = build_dir / "opentaint" analyzer_jar = build_dir / "opentaint-project-analyzer.jar" @@ -128,6 +185,8 @@ def run_pipeline(build_dir: Path, project_dir: Path, results_dir: Path, if not p.exists(): raise FileNotFoundError(f"missing build artifact: {p}") + expanded_extra_flags = _expand_scan_flags(scan_flags or [], extensions_dir) + # Keep project-model OUTSIDE results_dir so it is never cached or uploaded # as part of the per-project result bundle (multi-GB per project otherwise). model_dir = results_dir.parent / f"{results_dir.name}-project-model" @@ -143,16 +202,16 @@ def run_pipeline(build_dir: Path, project_dir: Path, results_dir: Path, "--output", str(model_dir), str(project_dir), ] - scan_cmd = [ - str(opentaint), "scan", "--debug", - "--experimental", - "--analyzer-jar", str(analyzer_jar), - "--ruleset", str(rules_dir), - "--project-model", str(model_dir), - "--output", str(sarif), - "--timeout", f"{max(timeout - 120, 60)}s", - "--max-memory", max_memory, - ] + scan_cmd = _build_scan_cmd( + opentaint=opentaint, + analyzer_jar=analyzer_jar, + rules_dir=rules_dir, + model_dir=model_dir, + sarif=sarif, + max_memory=max_memory, + scan_timeout_seconds=max(timeout - 120, 60), + extra_flags=expanded_extra_flags, + ) # Do NOT pre-create model_dir: `opentaint compile --output` may refuse to # write into an already-existing directory (or produce inconsistent state). @@ -199,14 +258,33 @@ def main() -> int: p.add_argument("--results-dir", required=True, type=Path) p.add_argument("--max-memory", default="8G") p.add_argument("--timeout", type=int, default=1200) + p.add_argument("--extensions-dir", type=Path, default=None, + help="directory whose contents are reachable via the " + "{ext} placeholder in --scan-flags-json") + p.add_argument("--scan-flags-json", default="[]", + help="JSON-encoded list of extra CLI tokens appended to " + "'opentaint scan'; each token may use {ext}") args = p.parse_args() + try: + scan_flags = json.loads(args.scan_flags_json) + if not isinstance(scan_flags, list) or not all(isinstance(t, str) for t in scan_flags): + raise ValueError("--scan-flags-json must decode to a list of strings") + except (json.JSONDecodeError, ValueError) as e: + print(f"run_analysis: bad --scan-flags-json: {e}", file=sys.stderr) + return 2 + try: status = run_pipeline(args.build_dir, args.project_dir, - args.results_dir, args.max_memory, args.timeout) + args.results_dir, args.max_memory, args.timeout, + scan_flags=scan_flags, + extensions_dir=args.extensions_dir) except FileNotFoundError as e: print(f"run_analysis: {e}", file=sys.stderr) return 2 + except ValueError as e: + print(f"run_analysis: {e}", file=sys.stderr) + return 2 (args.results_dir / "status.json").write_text(json.dumps(status, indent=2)) print(json.dumps(status)) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 2861302..826d624 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -16,6 +16,7 @@ import cache_key # noqa: E402 import compare_sarif # noqa: E402 +import generate_matrix # noqa: E402 import run_analysis # noqa: E402 @@ -226,7 +227,155 @@ def test_extract_peak_memory_missing_file(tmp_path): assert run_analysis.extract_peak_memory(None) is None -# ── compare_sarif: markdown rendering smoke test ──────────────────────────── +# ── generate_matrix: scan-flags propagation ──────────────────────────── + +def _write_repos(tmp_path: Path, body: str) -> Path: + p = tmp_path / "repos.yaml" + p.write_text(body) + return p + + +def test_matrix_scan_flags_default_empty(tmp_path): + repos = _write_repos(tmp_path, ( + "repositories:\n" + " - name: demo\n" + " git: https://example.com/demo.git\n" + " head: deadbeef\n" + )) + m = generate_matrix.build_matrix(repos, "AAA", "BBB", [], None) + assert [e["scan_flags"] for e in m["include"]] == ["[]", "[]"] + + +def test_matrix_scan_flags_round_trip(tmp_path): + repos = _write_repos(tmp_path, ( + "repositories:\n" + " - name: demo\n" + " git: https://example.com/demo.git\n" + " head: deadbeef\n" + " scan-flags:\n" + " - --rule-id\n" + " - java.taint.sql-injection\n" + " - \"--passthrough-approximations\"\n" + " - \"{ext}/demo/pt.yaml\"\n" + )) + m = generate_matrix.build_matrix(repos, "AAA", "AAA", [], None) + assert len(m["include"]) == 1 + assert json.loads(m["include"][0]["scan_flags"]) == [ + "--rule-id", "java.taint.sql-injection", + "--passthrough-approximations", "{ext}/demo/pt.yaml", + ] + + +def test_matrix_scan_flags_rejects_non_list(tmp_path): + repos = _write_repos(tmp_path, ( + "repositories:\n" + " - name: demo\n" + " git: https://example.com/demo.git\n" + " head: deadbeef\n" + " scan-flags: --rule-id=foo\n" + )) + with pytest.raises(ValueError): + generate_matrix.build_matrix(repos, "AAA", "AAA", [], None) + + +# ── run_analysis: scan-flag expansion ──────────────────────────────── + +def test_expand_scan_flags_substitutes_ext(tmp_path): + ext = tmp_path / "ext"; ext.mkdir() + expanded = run_analysis._expand_scan_flags( + ["--passthrough-approximations", "{ext}/demo/pt.yaml", "--rule-id", "r1"], + ext, + ) + assert expanded == [ + "--passthrough-approximations", + f"{ext.resolve()}/demo/pt.yaml", + "--rule-id", + "r1", + ] + + +def test_expand_scan_flags_no_placeholder_no_ext_ok(): + # Tokens without {ext} don't require an extensions directory. + assert run_analysis._expand_scan_flags(["--rule-id", "r1"], None) == [ + "--rule-id", "r1", + ] + + +def test_expand_scan_flags_missing_ext_raises(): + with pytest.raises(ValueError): + run_analysis._expand_scan_flags(["{ext}/demo/pt.yaml"], None) + + +def test_expand_scan_flags_empty_passthrough(): + assert run_analysis._expand_scan_flags([], None) == [] + + +# ── run_analysis: scan-cmd assembly + multi-ruleset merging ─────────────── + +def _scan_cmd(extra_flags, tmp_path): + return run_analysis._build_scan_cmd( + opentaint=tmp_path / "opentaint", + analyzer_jar=tmp_path / "analyzer.jar", + rules_dir=tmp_path / "built-in-rules", + model_dir=tmp_path / "model", + sarif=tmp_path / "out.sarif", + max_memory="8G", + scan_timeout_seconds=1080, + extra_flags=extra_flags, + ) + + +def _rulesets_in(cmd): + """Return the values that follow every occurrence of '--ruleset' in cmd.""" + return [cmd[i + 1] for i, tok in enumerate(cmd) if tok == "--ruleset"] + + +def test_build_scan_cmd_default_passes_builtin_ruleset(tmp_path): + cmd = _scan_cmd([], tmp_path) + assert _rulesets_in(cmd) == [str(tmp_path / "built-in-rules")] + + +def test_build_scan_cmd_extra_ruleset_merges_with_builtin(tmp_path): + cmd = _scan_cmd( + ["--ruleset", "/abs/custom-rules.yaml", + "--ruleset", "/abs/rules-dir"], + tmp_path, + ) + # Built-in first; custom values appended in order — analyzer treats + # --ruleset as stringArray and merges them. + assert _rulesets_in(cmd) == [ + str(tmp_path / "built-in-rules"), + "/abs/custom-rules.yaml", + "/abs/rules-dir", + ] + + +def test_build_scan_cmd_extra_flags_appear_after_reserved(tmp_path): + cmd = _scan_cmd(["--rule-id", "java.taint.sql-injection"], tmp_path) + # Reserved tokens are present… + for required in ("--analyzer-jar", "--project-model", "--output", + "--timeout", "--max-memory", "--debug", "--experimental"): + assert required in cmd, f"missing reserved flag {required}" + # …and the user's extra flag is appended verbatim at the tail. + assert cmd[-2:] == ["--rule-id", "java.taint.sql-injection"] + + +def test_build_scan_cmd_end_to_end_through_expand(tmp_path): + ext = tmp_path / "ext"; ext.mkdir() + expanded = run_analysis._expand_scan_flags( + ["--ruleset", "{ext}/proj/rules.yaml", + "--ruleset", "{ext}/proj/rules"], + ext, + ) + cmd = _scan_cmd(expanded, tmp_path) + assert _rulesets_in(cmd) == [ + str(tmp_path / "built-in-rules"), + f"{ext.resolve()}/proj/rules.yaml", + f"{ext.resolve()}/proj/rules", + ] + + +# ── compare_sarif: markdown rendering smoke test ────────────────────────── def test_render_markdown_smoke(): md = compare_sarif.render_markdown([ From 701911e31934622a910ad5b5e9266c37d856490b Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Thu, 18 Jun 2026 01:13:54 +0300 Subject: [PATCH 2/6] Fix --- .github/workflows/regression.yaml | 7 ++----- scripts/generate_matrix.py | 10 ++++++---- tests/test_scripts.py | 28 ++++++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/.github/workflows/regression.yaml b/.github/workflows/regression.yaml index 952dd15..0d2c69b 100644 --- a/.github/workflows/regression.yaml +++ b/.github/workflows/regression.yaml @@ -253,7 +253,7 @@ jobs: --filter "${{ inputs.projects_filter }}" \ --misses-only misses.json) echo "matrix=$matrix" >> "$GITHUB_OUTPUT" - has=$(python -c "import json;print('true' if json.loads('''$matrix''')['include'] else 'false')") + has=$(printf '%s' "$matrix" | python -c "import json, sys; print('true' if json.load(sys.stdin)['include'] else 'false')") echo "has_cells=$has" >> "$GITHUB_OUTPUT" analyze: @@ -297,10 +297,7 @@ jobs: - name: Run analysis id: ra env: - # Forwarded via env to avoid YAML/shell-quoting pitfalls — the - # matrix value is a JSON-encoded list that can contain spaces, - # commas, or other shell-special characters. - SCAN_FLAGS_JSON: ${{ matrix.scan_flags }} + SCAN_FLAGS_JSON: ${{ toJson(matrix.scan_flags) }} run: | set +e python scripts/run_analysis.py \ diff --git a/scripts/generate_matrix.py b/scripts/generate_matrix.py index af6c140..5be6d44 100755 --- a/scripts/generate_matrix.py +++ b/scripts/generate_matrix.py @@ -79,10 +79,12 @@ def build_matrix(repos_path: Path, base_sha: str, new_sha: str, "head": repo["head"], "java_version": str(repo.get("java-version", DEFAULT_JAVA)), "max_memory": str(repo.get("max-memory", DEFAULT_MEMORY)), - # Serialised as JSON so the GH Actions matrix can carry an - # arbitrarily long, space-containing list through a single - # string-valued field. `run_analysis.py` decodes it back. - "scan_flags": json.dumps(_normalise_scan_flags(repo.get("scan-flags"))), + # Emitted as a real JSON array — not a nested JSON-encoded + # string — so the matrix payload contains no backslash + # escapes that downstream shell + Python interpolation + # would otherwise mangle. The workflow re-serialises with + # ``toJson(matrix.scan_flags)`` at the point of use. + "scan_flags": _normalise_scan_flags(repo.get("scan-flags")), "ref_kind": ref_kind, "analyzer_sha": sha, }) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 826d624..99e0415 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -243,7 +243,9 @@ def test_matrix_scan_flags_default_empty(tmp_path): " head: deadbeef\n" )) m = generate_matrix.build_matrix(repos, "AAA", "BBB", [], None) - assert [e["scan_flags"] for e in m["include"]] == ["[]", "[]"] + # Real JSON array — not a nested JSON-encoded string — so the matrix + # payload survives shell + Python interpolation in the workflow. + assert [e["scan_flags"] for e in m["include"]] == [[], []] def test_matrix_scan_flags_round_trip(tmp_path): @@ -260,12 +262,34 @@ def test_matrix_scan_flags_round_trip(tmp_path): )) m = generate_matrix.build_matrix(repos, "AAA", "AAA", [], None) assert len(m["include"]) == 1 - assert json.loads(m["include"][0]["scan_flags"]) == [ + assert m["include"][0]["scan_flags"] == [ "--rule-id", "java.taint.sql-injection", "--passthrough-approximations", "{ext}/demo/pt.yaml", ] +def test_matrix_output_has_no_escaped_quotes(tmp_path): + """Regression: the whole matrix JSON, as printed by main(), must not + contain ``\\"`` sequences — those break + ``python -c '... json.loads('''$matrix''') ...'`` in the workflow, + where Python's source lexer would resolve ``\\"`` to ``"`` before + JSON parsing.""" + repos = _write_repos(tmp_path, ( + "repositories:\n" + " - name: demo\n" + " git: https://example.com/demo.git\n" + " head: deadbeef\n" + " scan-flags:\n" + " - --ruleset\n" + " - \"{ext}/demo/rules.yaml\"\n" + )) + m = generate_matrix.build_matrix(repos, "AAA", "AAA", [], None) + serialised = json.dumps(m) + assert "\\\"" not in serialised, ( + f"matrix JSON contains escaped quotes that will break shell+python " + f"interpolation: {serialised}") + + def test_matrix_scan_flags_rejects_non_list(tmp_path): repos = _write_repos(tmp_path, ( "repositories:\n" From 912bad5e1a952b9aaa56b2b49825bf60a6f44216 Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Thu, 18 Jun 2026 01:53:33 +0300 Subject: [PATCH 3/6] Fix --- README.md | 27 +++++++---- projects/extensions/README.md | 48 ++++++++++++++++--- scripts/run_analysis.py | 89 ++++++++++++++++++++++++++--------- tests/test_scripts.py | 65 ++++++++++++++++++++----- 4 files changed, 182 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 37e1255..3b5a796 100644 --- a/README.md +++ b/README.md @@ -102,22 +102,33 @@ take a single file or a whole directory, and may be repeated): Flags reserved by the runner (`--analyzer-jar`, `--project-model`, `--output`, `--timeout`, `--max-memory`, `--debug`, `--experimental`) must -not be repeated here. `--ruleset` is **not** reserved: the runner always -passes the built-in ruleset first, and any additional `--ruleset` entries -in `scan-flags` are merged with it by the analyzer (the flag is a -`stringArray`). Example — adding a custom YAML file and a whole directory -of rules: +not be repeated here. `--ruleset` is **not** reserved — the runner only +inserts the documented default `--ruleset builtin` when `scan-flags` +contains no `--ruleset` of its own. Supplying any `--ruleset` value puts +the project in full control of which rule packs are loaded and in what +order. Example — stack the JAR-baked `builtin` pack with a custom YAML +file and a whole directory of rules: ```yaml scan-flags: - --ruleset - - "{ext}/my-project/rules/sql-injection.yaml" + - builtin # JAR-baked default pack - --ruleset - - "{ext}/my-project/rules" + - "{ext}/my-project/rules/sql-injection.yaml" # custom YAML file + - --ruleset + - "{ext}/my-project/rules" # custom rules directory ``` +Two placeholders are expanded inside `scan-flags`: + +* `{ext}` → absolute path of `projects/extensions/`. +* `{rules}` → absolute path of the opentaint source-tree rule pack staged + at `/rules`. Use this to layer that pack (which is **not** the + same as `builtin`) on top of, or instead of, the JAR-baked rules. + See [`projects/extensions/README.md`](projects/extensions/README.md) for -the layout convention. +the layout convention and a deeper look at `builtin` vs `{rules}` vs +custom YAMLs. ## Open items diff --git a/projects/extensions/README.md b/projects/extensions/README.md index f52bb2b..d546372 100644 --- a/projects/extensions/README.md +++ b/projects/extensions/README.md @@ -71,16 +71,31 @@ Flags that don't reference any extension file (e.g. `--rule-id`, ## Custom rulesets -The analyzer accepts **multiple** `--ruleset` arguments and merges them. The -runner always passes the built-in ruleset first; any additional `--ruleset` -entries in `scan-flags` are layered on top: +The analyzer's `--ruleset` is a `stringArray` whose default is `[builtin]` +(see `opentaint scan --help`). The literal value **`builtin`** is a sentinel +that tells the analyzer to load the rule pack baked into its JAR. Any other +value is treated as a path — a YAML file or a directory of `*.yml` / `*.yaml` +files — and reported as a *User ruleset*. + +The runner does **not** force any `--ruleset` flag: + +* If `scan-flags` contains no `--ruleset`, the runner inserts the documented + default `--ruleset builtin` so the JAR-baked pack is used. +* If `scan-flags` supplies one or more `--ruleset` tokens, they are passed + verbatim and the runner adds none of its own. The project is in full + control of what gets loaded and in what order. + +To layer the built-in pack with your own rules, ask for `builtin` explicitly: ```yaml - name: spring-petclinic git: https://github.com/spring-projects/spring-petclinic.git head: 3e1ce239f4488f20abda24441388a515ea55a815 scan-flags: - # Add a single custom YAML rules file: + # JAR-baked built-in pack — the literal sentinel, not a path: + - --ruleset + - builtin + # A single custom YAML file: - --ruleset - "{ext}/spring-petclinic/rules/sql-injection.yaml" # …and a whole directory of `*.yaml` / `*.yml` rule files: @@ -89,10 +104,28 @@ entries in `scan-flags` are layered on top: ``` Resulting analyzer command (conceptually): -`--ruleset --ruleset /.../sql-injection.yaml --ruleset /.../rules`. +`--ruleset builtin --ruleset /.../sql-injection.yaml --ruleset /.../rules`. Use `--rule-id` to narrow which rules from those sets are actually run. +### The `{rules}` placeholder — opentaint source-tree rules + +The build artifact ships the YAML rule pack from the opentaint source tree +at `/rules`. It is **not** the same as `builtin` (which lives +inside the analyzer JAR). If you want to layer that pack on top of — or +instead of — `builtin`, reference it via the `{rules}` placeholder: + +```yaml + scan-flags: + - --ruleset + - builtin # JAR-baked pack + - --ruleset + - "{rules}" # opentaint source-tree pack staged at /rules +``` + +The runner expands `{rules}` to the absolute path of `/rules` at +analysis time. + ## Reserved flags The runner already sets these and you should **not** repeat them in @@ -106,5 +139,6 @@ The runner already sets these and you should **not** repeat them in - `--debug` - `--experimental` -`--ruleset` is **not** reserved — the built-in pack is always supplied, and -additional `--ruleset` entries you add in `scan-flags` are merged with it. +`--ruleset` is **not** reserved. The runner only inserts a default +`--ruleset builtin` when the project omits the flag entirely; supplying +any `--ruleset` value disables the default. diff --git a/scripts/run_analysis.py b/scripts/run_analysis.py index ca10c86..243dd58 100755 --- a/scripts/run_analysis.py +++ b/scripts/run_analysis.py @@ -19,10 +19,21 @@ [--scan-flags-json '["--rule-id", "java.taint.sql-injection"]'] Extra `opentaint scan` flags may be supplied via ``--scan-flags-json`` -(a JSON-encoded list of CLI tokens). The literal substring ``{ext}`` inside -any token is replaced with the absolute path passed via -``--extensions-dir``, letting projects reference files shipped in this -repository (e.g. pass-through approximations) without hard-coding paths. +(a JSON-encoded list of CLI tokens). Two placeholders are expanded inside +every token before invoking opentaint: + +* ``{ext}`` → absolute path of ``--extensions-dir`` (project files + shipped in this repo, e.g. ``{ext}/conductor/passthrough``). +* ``{rules}`` → absolute path of the opentaint source-tree rule pack + staged at ``/rules`` (use this only when you want those + YAMLs layered on top of, or instead of, the analyzer's ``builtin`` pack). + +Ruleset handling: the runner does **not** force any ``--ruleset`` flag. If +the project's ``scan-flags`` contains no ``--ruleset``, the runner inserts +the analyzer's documented default ``--ruleset builtin`` so the JAR-baked +rule pack is loaded. If the project supplies one or more ``--ruleset`` +tokens (including the literal ``builtin``), they are passed verbatim and +the runner adds nothing of its own. Exit codes: 0 status.json was written, autobuilder (compile step) succeeded. @@ -43,9 +54,21 @@ import time from pathlib import Path -# Placeholder used inside `scan-flags` entries in repos.yaml. Replaced at -# runtime with the absolute path of the extensions directory. +# Placeholders used inside `scan-flags` entries in repos.yaml. Replaced at +# runtime with the absolute paths of, respectively, the per-project +# extensions directory and the source-tree rule pack staged into the build +# artifact at /rules. EXT_PLACEHOLDER = "{ext}" +RULES_PLACEHOLDER = "{rules}" + +# Sentinel value accepted by `opentaint scan --ruleset`. Tells the analyzer +# to use the rule pack baked into its JAR rather than a file/directory path. +BUILTIN_RULESET = "builtin" + +# Token (as it appears in argv) that introduces a ruleset value. Used to +# decide whether the project already supplied at least one --ruleset so we +# don't override their choice. +RULESET_FLAG = "--ruleset" _LOG_FILE_RE = re.compile(r"Log file:\s*(.+\.log)") @@ -128,41 +151,65 @@ def _run(cmd: list[str], timeout: int, log_fp) -> tuple[int, str, str, float]: return rc, out, err, dur -def _expand_scan_flags(flags: list[str], extensions_dir: Path | None) -> list[str]: - """Substitute ``{ext}`` in every token with ``extensions_dir`` (absolute). +def _expand_scan_flags(flags: list[str], + extensions_dir: Path | None, + rules_dir: Path | None = None) -> list[str]: + """Substitute ``{ext}`` and ``{rules}`` placeholders in every token. + + * ``{ext}`` → absolute path of ``extensions_dir``. + * ``{rules}`` → absolute path of ``rules_dir`` (the staged opentaint + source-tree rule pack at ``/rules``). - Raises a clear error if a token references the placeholder but no - extensions directory was supplied. + Raises a clear error if a token references a placeholder whose + backing path was not supplied. """ if not flags: return [] needs_ext = any(EXT_PLACEHOLDER in tok for tok in flags) + needs_rules = any(RULES_PLACEHOLDER in tok for tok in flags) if needs_ext and extensions_dir is None: raise ValueError( f"scan-flags contains {EXT_PLACEHOLDER!r} but no --extensions-dir " "was provided") + if needs_rules and rules_dir is None: + raise ValueError( + f"scan-flags contains {RULES_PLACEHOLDER!r} but no rules " + "directory is available") ext_str = str(extensions_dir.resolve()) if extensions_dir is not None else "" - return [tok.replace(EXT_PLACEHOLDER, ext_str) for tok in flags] + rules_str = str(rules_dir.resolve()) if rules_dir is not None else "" + return [ + tok.replace(EXT_PLACEHOLDER, ext_str).replace(RULES_PLACEHOLDER, rules_str) + for tok in flags + ] -def _build_scan_cmd(opentaint: Path, analyzer_jar: Path, rules_dir: Path, +def _build_scan_cmd(opentaint: Path, analyzer_jar: Path, model_dir: Path, sarif: Path, max_memory: str, scan_timeout_seconds: int, extra_flags: list[str]) -> list[str]: """Assemble the ``opentaint scan`` argv. - The built-in ``--ruleset `` is always passed first. Any - additional ``--ruleset`` entries supplied by the project (via - ``scan-flags`` in ``repos.yaml``) are appended verbatim through - ``extra_flags`` and are *merged* with the built-in pack by the analyzer - — ``--ruleset`` is a ``stringArray``, so repetition is additive rather - than overriding. + Ruleset policy (mirrors ``opentaint scan --help``'s ``default [builtin]``): + + * If ``extra_flags`` already contains at least one ``--ruleset`` token, + it is passed through verbatim — the project is in full control and + may layer ``builtin``, YAML files, and directories in any order. + * Otherwise the runner inserts ``--ruleset builtin`` so the JAR-baked + rule pack is used. We deliberately do **not** auto-add the + source-tree rule pack staged at ``/rules`` — doing so + historically misled the analyzer into reporting that pack as the + built-in one. Projects that still want it can ask for it explicitly + with ``--ruleset {rules}`` in their ``scan-flags``. """ + if any(tok == RULESET_FLAG for tok in extra_flags): + ruleset_args: list[str] = [] + else: + ruleset_args = [RULESET_FLAG, BUILTIN_RULESET] return [ str(opentaint), "scan", "--debug", "--experimental", "--analyzer-jar", str(analyzer_jar), - "--ruleset", str(rules_dir), + *ruleset_args, "--project-model", str(model_dir), "--output", str(sarif), "--timeout", f"{scan_timeout_seconds}s", @@ -185,7 +232,8 @@ def run_pipeline(build_dir: Path, project_dir: Path, results_dir: Path, if not p.exists(): raise FileNotFoundError(f"missing build artifact: {p}") - expanded_extra_flags = _expand_scan_flags(scan_flags or [], extensions_dir) + expanded_extra_flags = _expand_scan_flags( + scan_flags or [], extensions_dir, rules_dir=rules_dir) # Keep project-model OUTSIDE results_dir so it is never cached or uploaded # as part of the per-project result bundle (multi-GB per project otherwise). @@ -205,7 +253,6 @@ def run_pipeline(build_dir: Path, project_dir: Path, results_dir: Path, scan_cmd = _build_scan_cmd( opentaint=opentaint, analyzer_jar=analyzer_jar, - rules_dir=rules_dir, model_dir=model_dir, sarif=sarif, max_memory=max_memory, diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 99e0415..a32e048 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -334,13 +334,12 @@ def test_expand_scan_flags_empty_passthrough(): assert run_analysis._expand_scan_flags([], None) == [] -# ── run_analysis: scan-cmd assembly + multi-ruleset merging ─────────────── +# ── run_analysis: scan-cmd assembly + ruleset policy ─────────────────── def _scan_cmd(extra_flags, tmp_path): return run_analysis._build_scan_cmd( opentaint=tmp_path / "opentaint", analyzer_jar=tmp_path / "analyzer.jar", - rules_dir=tmp_path / "built-in-rules", model_dir=tmp_path / "model", sarif=tmp_path / "out.sarif", max_memory="8G", @@ -354,33 +353,44 @@ def _rulesets_in(cmd): return [cmd[i + 1] for i, tok in enumerate(cmd) if tok == "--ruleset"] -def test_build_scan_cmd_default_passes_builtin_ruleset(tmp_path): +def test_build_scan_cmd_default_inserts_builtin_sentinel(tmp_path): + """With no project --ruleset, the runner must default to ``builtin`` — + NOT to a filesystem path (which opentaint would classify as a + 'User ruleset').""" cmd = _scan_cmd([], tmp_path) - assert _rulesets_in(cmd) == [str(tmp_path / "built-in-rules")] + assert _rulesets_in(cmd) == ["builtin"] -def test_build_scan_cmd_extra_ruleset_merges_with_builtin(tmp_path): +def test_build_scan_cmd_project_ruleset_disables_default(tmp_path): + """Once the project supplies any --ruleset, the runner adds none of + its own — the project is in full control and may layer ``builtin`` + plus custom rules in any order.""" cmd = _scan_cmd( ["--ruleset", "/abs/custom-rules.yaml", "--ruleset", "/abs/rules-dir"], tmp_path, ) - # Built-in first; custom values appended in order — analyzer treats - # --ruleset as stringArray and merges them. assert _rulesets_in(cmd) == [ - str(tmp_path / "built-in-rules"), "/abs/custom-rules.yaml", "/abs/rules-dir", ] +def test_build_scan_cmd_project_can_request_builtin_explicitly(tmp_path): + """Projects may stack ``builtin`` alongside their own rule files.""" + cmd = _scan_cmd( + ["--ruleset", "builtin", "--ruleset", "/abs/extra.yaml"], + tmp_path, + ) + # Exactly what the project asked for — no duplicated `builtin`. + assert _rulesets_in(cmd) == ["builtin", "/abs/extra.yaml"] + + def test_build_scan_cmd_extra_flags_appear_after_reserved(tmp_path): cmd = _scan_cmd(["--rule-id", "java.taint.sql-injection"], tmp_path) - # Reserved tokens are present… for required in ("--analyzer-jar", "--project-model", "--output", "--timeout", "--max-memory", "--debug", "--experimental"): assert required in cmd, f"missing reserved flag {required}" - # …and the user's extra flag is appended verbatim at the tail. assert cmd[-2:] == ["--rule-id", "java.taint.sql-injection"] @@ -392,13 +402,46 @@ def test_build_scan_cmd_end_to_end_through_expand(tmp_path): ext, ) cmd = _scan_cmd(expanded, tmp_path) + # Project supplied --ruleset, so runner must NOT add its own default. assert _rulesets_in(cmd) == [ - str(tmp_path / "built-in-rules"), f"{ext.resolve()}/proj/rules.yaml", f"{ext.resolve()}/proj/rules", ] +# ── run_analysis: {rules} placeholder ─────────────────────────────── + +def test_expand_rules_placeholder(tmp_path): + rules = tmp_path / "rules"; rules.mkdir() + expanded = run_analysis._expand_scan_flags( + ["--ruleset", "{rules}"], extensions_dir=None, rules_dir=rules, + ) + assert expanded == ["--ruleset", str(rules.resolve())] + + +def test_expand_rules_placeholder_missing_rules_dir_raises(): + with pytest.raises(ValueError): + run_analysis._expand_scan_flags( + ["--ruleset", "{rules}"], extensions_dir=None, rules_dir=None, + ) + + +def test_expand_mixed_placeholders(tmp_path): + ext = tmp_path / "ext"; ext.mkdir() + rules = tmp_path / "rules"; rules.mkdir() + expanded = run_analysis._expand_scan_flags( + ["--ruleset", "builtin", + "--ruleset", "{rules}", + "--ruleset", "{ext}/proj/custom.yaml"], + extensions_dir=ext, rules_dir=rules, + ) + assert expanded == [ + "--ruleset", "builtin", + "--ruleset", str(rules.resolve()), + "--ruleset", f"{ext.resolve()}/proj/custom.yaml", + ] + + # ── compare_sarif: markdown rendering smoke test ────────────────────────── def test_render_markdown_smoke(): From 7377356bf0ec6c1c6764e94c106b1fb16e5a0fe8 Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Thu, 18 Jun 2026 02:12:11 +0300 Subject: [PATCH 4/6] Fix --- README.md | 31 +++++++---- projects/extensions/README.md | 75 +++++++++++++++---------- scripts/run_analysis.py | 100 ++++++++++++++++++++++++++-------- tests/test_scripts.py | 62 ++++++++++++++++----- 4 files changed, 190 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index 3b5a796..726ca9e 100644 --- a/README.md +++ b/README.md @@ -102,17 +102,24 @@ take a single file or a whole directory, and may be repeated): Flags reserved by the runner (`--analyzer-jar`, `--project-model`, `--output`, `--timeout`, `--max-memory`, `--debug`, `--experimental`) must -not be repeated here. `--ruleset` is **not** reserved — the runner only -inserts the documented default `--ruleset builtin` when `scan-flags` -contains no `--ruleset` of its own. Supplying any `--ruleset` value puts -the project in full control of which rule packs are loaded and in what -order. Example — stack the JAR-baked `builtin` pack with a custom YAML -file and a whole directory of rules: +not be repeated here. `--ruleset` is **not** reserved — if `scan-flags` +contains no `--ruleset`, the runner inserts a default pointing at the +staged source-tree rule pack at `/rules` (copied from +`opentaint/rules/ruleset` at build time). Supplying any `--ruleset` value +puts the project in full control of which rule packs are loaded and in +what order. + +The literal value `builtin` is **rewritten by the runner** to that same +staged pack — the CLI would otherwise try to fetch the pack from a GitHub +release that does not exist for in-development opentaint SHAs (and the +current CLI's URL is malformed, producing a 404 against +`api.github.com/repos/seqra/seqra/opentaint/…`). Example — stack +`builtin` with a custom YAML file and a custom rules directory: ```yaml scan-flags: - --ruleset - - builtin # JAR-baked default pack + - builtin # → /rules (staged) - --ruleset - "{ext}/my-project/rules/sql-injection.yaml" # custom YAML file - --ruleset @@ -122,13 +129,13 @@ scan-flags: Two placeholders are expanded inside `scan-flags`: * `{ext}` → absolute path of `projects/extensions/`. -* `{rules}` → absolute path of the opentaint source-tree rule pack staged - at `/rules`. Use this to layer that pack (which is **not** the - same as `builtin`) on top of, or instead of, the JAR-baked rules. +* `{rules}` → absolute path of the staged source-tree pack at + `/rules`. After the `builtin` rewrite, mostly redundant for + `--ruleset` values; still useful for other flags that take a rules + directory. See [`projects/extensions/README.md`](projects/extensions/README.md) for -the layout convention and a deeper look at `builtin` vs `{rules}` vs -custom YAMLs. +the layout convention and the rationale behind the `builtin` rewrite. ## Open items diff --git a/projects/extensions/README.md b/projects/extensions/README.md index d546372..3443e0f 100644 --- a/projects/extensions/README.md +++ b/projects/extensions/README.md @@ -71,28 +71,33 @@ Flags that don't reference any extension file (e.g. `--rule-id`, ## Custom rulesets -The analyzer's `--ruleset` is a `stringArray` whose default is `[builtin]` -(see `opentaint scan --help`). The literal value **`builtin`** is a sentinel -that tells the analyzer to load the rule pack baked into its JAR. Any other -value is treated as a path — a YAML file or a directory of `*.yml` / `*.yaml` -files — and reported as a *User ruleset*. +The analyzer's `--ruleset` is a `stringArray` (default `[builtin]` per +`opentaint scan --help`). Each value is either: -The runner does **not** force any `--ruleset` flag: +* The literal **`builtin`** — normally fetched by the CLI from a GitHub + release tagged `rules/`. The bench tests in-development + opentaint SHAs whose rule packs are not (yet) released, so the runner + **intercepts** this sentinel and points the analyzer at the same + source-tree pack that would have been packaged into the release. +* A path to a YAML file, or a directory of `*.yml` / `*.yaml` files. + Reported by the CLI under `User ruleset`. -* If `scan-flags` contains no `--ruleset`, the runner inserts the documented - default `--ruleset builtin` so the JAR-baked pack is used. -* If `scan-flags` supplies one or more `--ruleset` tokens, they are passed - verbatim and the runner adds none of its own. The project is in full - control of what gets loaded and in what order. +The runner's policy: -To layer the built-in pack with your own rules, ask for `builtin` explicitly: +* If `scan-flags` contains no `--ruleset`, the runner inserts a default + pointing at the staged source-tree pack — same effect as `builtin`. +* If `scan-flags` supplies one or more `--ruleset` tokens, they are + passed verbatim except that every `builtin` value is rewritten to the + staged source-tree pack's absolute path. The project owns the list. + +Example — use `builtin` plus a custom YAML file and a custom directory: ```yaml - name: spring-petclinic git: https://github.com/spring-projects/spring-petclinic.git head: 3e1ce239f4488f20abda24441388a515ea55a815 scan-flags: - # JAR-baked built-in pack — the literal sentinel, not a path: + # Rewritten by the runner to `/rules` (the staged pack): - --ruleset - builtin # A single custom YAML file: @@ -104,28 +109,40 @@ To layer the built-in pack with your own rules, ask for `builtin` explicitly: ``` Resulting analyzer command (conceptually): -`--ruleset builtin --ruleset /.../sql-injection.yaml --ruleset /.../rules`. +`--ruleset /rules --ruleset /.../sql-injection.yaml --ruleset /.../rules`. + +Use `--rule-id` to narrow which rules from those sets actually run. + +### Why we rewrite `builtin` + +The CLI resolves `--ruleset builtin` by fetching +`https://api.github.com/repos///releases/tags/rules/`. +That path is unusable in the bench for two reasons: -Use `--rule-id` to narrow which rules from those sets are actually run. +1. The bench analyses in-development SHAs whose rule packs may not be + published as a release. +2. The current CLI's URL template duplicates the org segment, producing a + 404 (`api.github.com/repos/seqra/seqra/opentaint/…`). -### The `{rules}` placeholder — opentaint source-tree rules +`build_opentaint.sh` copies `opentaint/rules/ruleset` — the same YAMLs that +would have been packaged into the release — into `/rules`. +From the analyzer's perspective the rules are identical; only the CLI's +coverage report labels the pack as `User ruleset` rather than `Bundled`, +which is purely cosmetic. -The build artifact ships the YAML rule pack from the opentaint source tree -at `/rules`. It is **not** the same as `builtin` (which lives -inside the analyzer JAR). If you want to layer that pack on top of — or -instead of — `builtin`, reference it via the `{rules}` placeholder: +### The `{rules}` placeholder + +`{rules}` is an explicit alias for the same staged pack. After the +`builtin` rewrite described above it is mostly redundant, but it remains +useful in tokens that aren't `--ruleset` values (for example, if a future +flag accepts a rules directory): ```yaml scan-flags: - - --ruleset - - builtin # JAR-baked pack - - --ruleset - - "{rules}" # opentaint source-tree pack staged at /rules + - --some-future-flag + - "{rules}/foo.yaml" ``` -The runner expands `{rules}` to the absolute path of `/rules` at -analysis time. - ## Reserved flags The runner already sets these and you should **not** repeat them in @@ -140,5 +157,5 @@ The runner already sets these and you should **not** repeat them in - `--experimental` `--ruleset` is **not** reserved. The runner only inserts a default -`--ruleset builtin` when the project omits the flag entirely; supplying -any `--ruleset` value disables the default. +`--ruleset` pointing at the staged source-tree pack when the project omits +the flag entirely; supplying any `--ruleset` value disables the default. diff --git a/scripts/run_analysis.py b/scripts/run_analysis.py index 243dd58..2b4a20b 100755 --- a/scripts/run_analysis.py +++ b/scripts/run_analysis.py @@ -28,12 +28,20 @@ staged at ``/rules`` (use this only when you want those YAMLs layered on top of, or instead of, the analyzer's ``builtin`` pack). -Ruleset handling: the runner does **not** force any ``--ruleset`` flag. If -the project's ``scan-flags`` contains no ``--ruleset``, the runner inserts -the analyzer's documented default ``--ruleset builtin`` so the JAR-baked -rule pack is loaded. If the project supplies one or more ``--ruleset`` -tokens (including the literal ``builtin``), they are passed verbatim and -the runner adds nothing of its own. +Ruleset handling. In the test bench ``builtin`` is treated as an alias for +the rule pack staged at ``/rules`` (a copy of +``opentaint/rules/ruleset`` made by ``build_opentaint.sh``). The CLI's +native ``--ruleset builtin`` sentinel downloads the pack from a GitHub +release tagged ``rules/`` — unavailable for in-development +opentaint SHAs (and 404s anyway due to a CLI URL bug). The staged copy IS +the built-in pack for the revision under test, so we silently rewrite the +sentinel to its absolute path. Two rules result: + +* If ``scan-flags`` contains no ``--ruleset``, the runner inserts + ``--ruleset /rules`` as the default. +* Each ``--ruleset builtin`` pair in ``scan-flags`` is rewritten to + ``--ruleset /rules`` before invoking opentaint. All other + ``--ruleset`` values are passed through verbatim. Exit codes: 0 status.json was written, autobuilder (compile step) succeeded. @@ -61,8 +69,12 @@ EXT_PLACEHOLDER = "{ext}" RULES_PLACEHOLDER = "{rules}" -# Sentinel value accepted by `opentaint scan --ruleset`. Tells the analyzer -# to use the rule pack baked into its JAR rather than a file/directory path. +# Sentinel value accepted by `opentaint scan --ruleset`. The CLI resolves it +# by downloading a GitHub release tagged `rules/` — not viable for +# in-development SHAs analysed by this bench (and 404s due to a CLI URL +# bug). The runner intercepts the sentinel and points the analyzer at the +# staged source-tree pack at `/rules` instead. See +# `_resolve_builtin_ruleset`. BUILTIN_RULESET = "builtin" # Token (as it appears in argv) that introduces a ruleset value. Used to @@ -183,28 +195,69 @@ def _expand_scan_flags(flags: list[str], ] -def _build_scan_cmd(opentaint: Path, analyzer_jar: Path, +def _resolve_builtin_ruleset(flags: list[str], rules_dir: Path) -> list[str]: + """Rewrite every ``--ruleset builtin`` pair to ``--ruleset ``. + + The opentaint CLI resolves the ``builtin`` sentinel by fetching the rule + pack from a GitHub release tagged ``rules/``. That download is + unusable in this bench because: + + * we test in-development opentaint SHAs whose rule packs are not (yet) + published as releases; + * the CLI's URL template double-includes the org, producing a 404 + (``api.github.com/repos/seqra/seqra/opentaint/…``). + + The staged ```` directory is the same source-tree pack that + would have been packaged into the release, copied at build time by + ``build_opentaint.sh``. From the analyzer's perspective the rules are + identical; only the CLI's coverage report labels the pack as a + ``User ruleset`` instead of ``Bundled`` — a cosmetic mismatch we accept + in exchange for working offline and against unreleased SHAs. + + Non-``builtin`` ``--ruleset`` values and unrelated tokens are returned + unchanged. + """ + if not flags: + return [] + rules_path = str(rules_dir.resolve()) + out: list[str] = [] + i = 0 + n = len(flags) + while i < n: + tok = flags[i] + out.append(tok) + if tok == RULESET_FLAG and i + 1 < n: + value = flags[i + 1] + out.append(rules_path if value == BUILTIN_RULESET else value) + i += 2 + else: + i += 1 + return out + + +def _build_scan_cmd(opentaint: Path, analyzer_jar: Path, rules_dir: Path, model_dir: Path, sarif: Path, max_memory: str, scan_timeout_seconds: int, extra_flags: list[str]) -> list[str]: """Assemble the ``opentaint scan`` argv. - Ruleset policy (mirrors ``opentaint scan --help``'s ``default [builtin]``): - - * If ``extra_flags`` already contains at least one ``--ruleset`` token, - it is passed through verbatim — the project is in full control and - may layer ``builtin``, YAML files, and directories in any order. - * Otherwise the runner inserts ``--ruleset builtin`` so the JAR-baked - rule pack is used. We deliberately do **not** auto-add the - source-tree rule pack staged at ``/rules`` — doing so - historically misled the analyzer into reporting that pack as the - built-in one. Projects that still want it can ask for it explicitly - with ``--ruleset {rules}`` in their ``scan-flags``. + Ruleset policy: + + * ``builtin`` everywhere in ``extra_flags`` is first rewritten to the + absolute path of ``rules_dir`` (see ``_resolve_builtin_ruleset``). + * If after that rewrite ``extra_flags`` still contains at least one + ``--ruleset`` token, it is passed through verbatim — the project is + in full control and may layer the staged pack, custom YAML files + and directories in any order. + * Otherwise the runner inserts ``--ruleset `` as the + implicit default, matching the spirit of the CLI's + ``default [builtin]``. """ - if any(tok == RULESET_FLAG for tok in extra_flags): + resolved_flags = _resolve_builtin_ruleset(extra_flags, rules_dir) + if any(tok == RULESET_FLAG for tok in resolved_flags): ruleset_args: list[str] = [] else: - ruleset_args = [RULESET_FLAG, BUILTIN_RULESET] + ruleset_args = [RULESET_FLAG, str(rules_dir.resolve())] return [ str(opentaint), "scan", "--debug", "--experimental", @@ -214,7 +267,7 @@ def _build_scan_cmd(opentaint: Path, analyzer_jar: Path, "--output", str(sarif), "--timeout", f"{scan_timeout_seconds}s", "--max-memory", max_memory, - *extra_flags, + *resolved_flags, ] @@ -253,6 +306,7 @@ def run_pipeline(build_dir: Path, project_dir: Path, results_dir: Path, scan_cmd = _build_scan_cmd( opentaint=opentaint, analyzer_jar=analyzer_jar, + rules_dir=rules_dir, model_dir=model_dir, sarif=sarif, max_memory=max_memory, diff --git a/tests/test_scripts.py b/tests/test_scripts.py index a32e048..1600d7c 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -336,10 +336,18 @@ def test_expand_scan_flags_empty_passthrough(): # ── run_analysis: scan-cmd assembly + ruleset policy ─────────────────── +# Centralised so all tests use the same staged-rules path — the runner now +# rewrites ``builtin`` to this path and uses it as the implicit default. +_STAGED_RULES_SUBDIR = "build-rules" + + def _scan_cmd(extra_flags, tmp_path): + rules_dir = tmp_path / _STAGED_RULES_SUBDIR + rules_dir.mkdir(exist_ok=True) return run_analysis._build_scan_cmd( opentaint=tmp_path / "opentaint", analyzer_jar=tmp_path / "analyzer.jar", + rules_dir=rules_dir, model_dir=tmp_path / "model", sarif=tmp_path / "out.sarif", max_memory="8G", @@ -348,23 +356,26 @@ def _scan_cmd(extra_flags, tmp_path): ) +def _staged_rules(tmp_path): + return str((tmp_path / _STAGED_RULES_SUBDIR).resolve()) + + def _rulesets_in(cmd): """Return the values that follow every occurrence of '--ruleset' in cmd.""" return [cmd[i + 1] for i, tok in enumerate(cmd) if tok == "--ruleset"] -def test_build_scan_cmd_default_inserts_builtin_sentinel(tmp_path): - """With no project --ruleset, the runner must default to ``builtin`` — - NOT to a filesystem path (which opentaint would classify as a - 'User ruleset').""" +def test_build_scan_cmd_default_uses_staged_rules(tmp_path): + """With no project --ruleset, the runner defaults to the staged + source-tree pack at ``/rules`` — *not* the CLI's ``builtin`` + sentinel (which would trigger a 404 GitHub-release download).""" cmd = _scan_cmd([], tmp_path) - assert _rulesets_in(cmd) == ["builtin"] + assert _rulesets_in(cmd) == [_staged_rules(tmp_path)] def test_build_scan_cmd_project_ruleset_disables_default(tmp_path): - """Once the project supplies any --ruleset, the runner adds none of - its own — the project is in full control and may layer ``builtin`` - plus custom rules in any order.""" + """Once the project supplies any --ruleset, the runner adds no default + of its own — the project owns the ruleset list.""" cmd = _scan_cmd( ["--ruleset", "/abs/custom-rules.yaml", "--ruleset", "/abs/rules-dir"], @@ -376,14 +387,34 @@ def test_build_scan_cmd_project_ruleset_disables_default(tmp_path): ] -def test_build_scan_cmd_project_can_request_builtin_explicitly(tmp_path): - """Projects may stack ``builtin`` alongside their own rule files.""" +def test_build_scan_cmd_translates_builtin_sentinel(tmp_path): + """``--ruleset builtin`` from the project is silently rewritten to the + staged source-tree pack. The CLI's network-download path is bypassed.""" cmd = _scan_cmd( ["--ruleset", "builtin", "--ruleset", "/abs/extra.yaml"], tmp_path, ) - # Exactly what the project asked for — no duplicated `builtin`. - assert _rulesets_in(cmd) == ["builtin", "/abs/extra.yaml"] + assert _rulesets_in(cmd) == [ + _staged_rules(tmp_path), # was 'builtin' + "/abs/extra.yaml", + ] + + +def test_build_scan_cmd_only_builtin_disables_default(tmp_path): + """A lone ``--ruleset builtin`` must not provoke a duplicate default — + the project supplied a --ruleset, so the runner adds nothing.""" + cmd = _scan_cmd(["--ruleset", "builtin"], tmp_path) + assert _rulesets_in(cmd) == [_staged_rules(tmp_path)] + + +def test_build_scan_cmd_non_value_builtin_untouched(tmp_path): + """A stray ``builtin`` token NOT in --ruleset value position must be + left alone (defensive against unrelated future flags).""" + cmd = _scan_cmd(["--rule-id", "builtin"], tmp_path) + # Default --ruleset still gets inserted (project supplied none). + assert _rulesets_in(cmd) == [_staged_rules(tmp_path)] + # The lone 'builtin' rides the tail untouched. + assert cmd[-2:] == ["--rule-id", "builtin"] def test_build_scan_cmd_extra_flags_appear_after_reserved(tmp_path): @@ -397,13 +428,16 @@ def test_build_scan_cmd_extra_flags_appear_after_reserved(tmp_path): def test_build_scan_cmd_end_to_end_through_expand(tmp_path): ext = tmp_path / "ext"; ext.mkdir() expanded = run_analysis._expand_scan_flags( - ["--ruleset", "{ext}/proj/rules.yaml", + ["--ruleset", "builtin", + "--ruleset", "{ext}/proj/rules.yaml", "--ruleset", "{ext}/proj/rules"], ext, ) cmd = _scan_cmd(expanded, tmp_path) - # Project supplied --ruleset, so runner must NOT add its own default. + # `builtin` is translated to the staged pack; project paths are kept + # verbatim; runner adds no default of its own. assert _rulesets_in(cmd) == [ + _staged_rules(tmp_path), f"{ext.resolve()}/proj/rules.yaml", f"{ext.resolve()}/proj/rules", ] From b51db90821906bcc3816ca9622b4a31333cd548a Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Thu, 18 Jun 2026 02:37:26 +0300 Subject: [PATCH 5/6] Fix --- projects/repos.yaml | 234 ++++++++++++++++++++++---------------------- 1 file changed, 117 insertions(+), 117 deletions(-) diff --git a/projects/repos.yaml b/projects/repos.yaml index dd447db..b0a12e1 100644 --- a/projects/repos.yaml +++ b/projects/repos.yaml @@ -1,121 +1,121 @@ repositories: -# - name: seqra-java-spring-demo -# git: https://github.com/seqra/seqra-java-spring-demo.git -# head: 644cb1e6aa0152d193015456cf4de8a121750312 -# java-version: 21 -# -# - name: Stirling-PDF -# git: https://github.com/Stirling-Tools/Stirling-PDF.git -# head: d80e627899daf804f1390a0b75a1da3fd093aa84 -# -# - name: hertzbeat -# git: https://github.com/apache/hertzbeat.git -# head: 668106d445fe69e7128b2bb14f3e21c181bd8c55 -# -# - name: DWSurvey -# git: https://github.com/wkeyuan/DWSurvey.git -# head: 6a7b10e90f4e39c49b5afe4177ed23836953006b -# -# - name: CordysCRM -# git: https://github.com/1Panel-dev/CordysCRM.git -# head: bb19880b457da3b59b771a42aa98812919bd99e3 -# java-version: 21 -# -# - name: jpress -# git: https://github.com/JPressProjects/jpress.git -# head: 9902ee392656f8c5939d48f9c1c2d809e25a4ea1 -# -# - name: MCMS -# git: https://github.com/ming-soft/MCMS.git -# head: d3d8b5257370dc15c0e0c2aea69754d46e051336 -# -# - name: openmrs-core -# git: https://github.com/openmrs/openmrs-core.git -# head: 494c57b334249fb4230d58986319deca21cb4151 -# java-version: 21 -# -# - name: ruoyi-vue-pro -# git: https://github.com/YunaiV/ruoyi-vue-pro.git -# head: b29c5f0e40cfab289613992ddaddfb08d60a3e44 -# -# - name: spring-petclinic -# git: https://github.com/spring-projects/spring-petclinic.git -# head: 3e1ce239f4488f20abda24441388a515ea55a815 -# -# - name: tms -# git: https://github.com/xiweicheng/tms.git -# head: 9510b7cca4cd1f82114e5df0c13a491da4d8d818 -# java-version: 8 -# -# - name: thingsboard -# git: https://github.com/thingsboard/thingsboard.git -# head: b31176433e8065ae4cb2a285bcd5e03fce9014f5 -# max-memory: 12G -# -# - name: yudao-cloud -# git: https://github.com/YunaiV/yudao-cloud.git -# head: 99ffe0fd41c2b783ef83e6c8704e6e264603b484 -# max-memory: 12G -# -# - name: halo -# git: https://github.com/halo-dev/halo.git -# head: 48e191daaa16953acc4db298b2aad8c485466ccd -# java-version: 21 -# -# - name: kkFileView -# git: https://github.com/kekingcn/kkFileView.git -# head: 92ca92bee6d4682f2eb6f388174d39afd2263874 -# java-version: 21 -# -# - name: apollo -# git: https://github.com/apolloconfig/apollo.git -# head: 9de9a1580f5eb3d9bfa7a2b95b95c693bf2032a4 -# -# - name: snowy -# git: https://github.com/xiaonuobase/Snowy.git -# head: 315e239c9db373a2de5f7298c9623f2ab5424d5c -# -# - name: litemall -# git: https://github.com/linlinjava/litemall.git -# head: a1ef964a718b7277925b19ea26afe78ea3a1d325 -# -# - name: shopizer -# git: https://github.com/shopizer-ecommerce/shopizer.git -# head: 6a4a0a65a3408ee8f62597b51d1b3aac24b77dee -# -# - name: maku-boot -# git: https://github.com/makunet/maku-boot.git -# head: f9dd43c8913d5acbb46d061e4a9fc47c42f3034f -# -# - name: jeesite5 -# git: https://github.com/thinkgem/jeesite5.git -# head: 7be0a1c5bd5349933e7e75c97e4f6bd1d529725e -# -# - name: WebGoat -# git: https://github.com/WebGoat/WebGoat.git -# head: d4238ab406f27eea4aff8c86443cafbc220431c4 -# java-version: 25 -# -# - name: joyagent-jdgenie -# git: https://github.com/jd-opensource/joyagent-jdgenie.git -# head: 7142f4156cb6237cfef0b44a3a99d66e43fa3a0f -# -# - name: klaw -# git: https://github.com/Aiven-Open/klaw.git -# head: c4b1188d0e388c2459eecef1e3ac6ab0dce6e7e3 -# -# - name: spring-boot-seckill -# git: https://github.com/halegreen/spring-boot-seckill.git -# head: a0b979bbc533d9e2cae198f9e6f708b8c39861ea -# java-version: 8 -# -# - name: roncoo-education -# git: https://github.com/roncoo/roncoo-education.git -# head: 10e96084023a3488014e7e796cd3073edcd9ba1a -# -# - name: continew-admin -# git: https://github.com/continew-org/continew-admin.git -# head: 9776dfbf311d652b30a4599ac7989c35b29da372 + - name: seqra-java-spring-demo + git: https://github.com/seqra/seqra-java-spring-demo.git + head: 644cb1e6aa0152d193015456cf4de8a121750312 + java-version: 21 + + - name: Stirling-PDF + git: https://github.com/Stirling-Tools/Stirling-PDF.git + head: d80e627899daf804f1390a0b75a1da3fd093aa84 + + - name: hertzbeat + git: https://github.com/apache/hertzbeat.git + head: 668106d445fe69e7128b2bb14f3e21c181bd8c55 + + - name: DWSurvey + git: https://github.com/wkeyuan/DWSurvey.git + head: 6a7b10e90f4e39c49b5afe4177ed23836953006b + + - name: CordysCRM + git: https://github.com/1Panel-dev/CordysCRM.git + head: bb19880b457da3b59b771a42aa98812919bd99e3 + java-version: 21 + + - name: jpress + git: https://github.com/JPressProjects/jpress.git + head: 9902ee392656f8c5939d48f9c1c2d809e25a4ea1 + + - name: MCMS + git: https://github.com/ming-soft/MCMS.git + head: d3d8b5257370dc15c0e0c2aea69754d46e051336 + + - name: openmrs-core + git: https://github.com/openmrs/openmrs-core.git + head: 494c57b334249fb4230d58986319deca21cb4151 + java-version: 21 + + - name: ruoyi-vue-pro + git: https://github.com/YunaiV/ruoyi-vue-pro.git + head: b29c5f0e40cfab289613992ddaddfb08d60a3e44 + + - name: spring-petclinic + git: https://github.com/spring-projects/spring-petclinic.git + head: 3e1ce239f4488f20abda24441388a515ea55a815 + + - name: tms + git: https://github.com/xiweicheng/tms.git + head: 9510b7cca4cd1f82114e5df0c13a491da4d8d818 + java-version: 8 + + - name: thingsboard + git: https://github.com/thingsboard/thingsboard.git + head: b31176433e8065ae4cb2a285bcd5e03fce9014f5 + max-memory: 12G + + - name: yudao-cloud + git: https://github.com/YunaiV/yudao-cloud.git + head: 99ffe0fd41c2b783ef83e6c8704e6e264603b484 + max-memory: 12G + + - name: halo + git: https://github.com/halo-dev/halo.git + head: 48e191daaa16953acc4db298b2aad8c485466ccd + java-version: 21 + + - name: kkFileView + git: https://github.com/kekingcn/kkFileView.git + head: 92ca92bee6d4682f2eb6f388174d39afd2263874 + java-version: 21 + + - name: apollo + git: https://github.com/apolloconfig/apollo.git + head: 9de9a1580f5eb3d9bfa7a2b95b95c693bf2032a4 + + - name: snowy + git: https://github.com/xiaonuobase/Snowy.git + head: 315e239c9db373a2de5f7298c9623f2ab5424d5c + + - name: litemall + git: https://github.com/linlinjava/litemall.git + head: a1ef964a718b7277925b19ea26afe78ea3a1d325 + + - name: shopizer + git: https://github.com/shopizer-ecommerce/shopizer.git + head: 6a4a0a65a3408ee8f62597b51d1b3aac24b77dee + + - name: maku-boot + git: https://github.com/makunet/maku-boot.git + head: f9dd43c8913d5acbb46d061e4a9fc47c42f3034f + + - name: jeesite5 + git: https://github.com/thinkgem/jeesite5.git + head: 7be0a1c5bd5349933e7e75c97e4f6bd1d529725e + + - name: WebGoat + git: https://github.com/WebGoat/WebGoat.git + head: d4238ab406f27eea4aff8c86443cafbc220431c4 + java-version: 25 + + - name: joyagent-jdgenie + git: https://github.com/jd-opensource/joyagent-jdgenie.git + head: 7142f4156cb6237cfef0b44a3a99d66e43fa3a0f + + - name: klaw + git: https://github.com/Aiven-Open/klaw.git + head: c4b1188d0e388c2459eecef1e3ac6ab0dce6e7e3 + + - name: spring-boot-seckill + git: https://github.com/halegreen/spring-boot-seckill.git + head: a0b979bbc533d9e2cae198f9e6f708b8c39861ea + java-version: 8 + + - name: roncoo-education + git: https://github.com/roncoo/roncoo-education.git + head: 10e96084023a3488014e7e796cd3073edcd9ba1a + + - name: continew-admin + git: https://github.com/continew-org/continew-admin.git + head: 9776dfbf311d652b30a4599ac7989c35b29da372 - name: conductor git: https://github.com/conductor-oss/conductor.git From 7ebc8ea67a1fb386ea0bfdf9d9863393785d90c6 Mon Sep 17 00:00:00 2001 From: Valentyn Sobol <8640896+Saloed@users.noreply.github.com> Date: Thu, 18 Jun 2026 02:45:56 +0300 Subject: [PATCH 6/6] Fix --- .github/workflows/regression.yaml | 2 +- README.md | 18 +++++++++ projects/repos.yaml | 5 ++- scripts/generate_matrix.py | 5 ++- scripts/run_analysis.py | 50 +++++++++++++++++++++++-- tests/test_scripts.py | 61 +++++++++++++++++++++++++++++++ 6 files changed, 135 insertions(+), 6 deletions(-) diff --git a/.github/workflows/regression.yaml b/.github/workflows/regression.yaml index 0d2c69b..261c0fc 100644 --- a/.github/workflows/regression.yaml +++ b/.github/workflows/regression.yaml @@ -305,7 +305,7 @@ jobs: --project-dir project-root \ --results-dir results-bundle \ --max-memory "${{ matrix.max_memory }}" \ - --timeout 1200 \ + --timeout "${{ matrix.compilation_timeout }}" \ --extensions-dir projects/extensions \ --scan-flags-json "$SCAN_FLAGS_JSON" rc=$? diff --git a/README.md b/README.md index 726ca9e..b6cfb46 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,24 @@ Full diff detail is available in the `regression-diff` artifact. | `tests/` | Unit tests for pure-Python logic. Run `python -m pytest tests`. | | `test-system-design-plan.md` | Design document (authoritative spec). | +## Per-project fields (`repos.yaml`) + +Each entry requires `name`, `git`, and `head` (a pinned commit/tag SHA). These +optional fields tune one project: + +| Field | Default | Purpose | +| -------------- | ------- | ------------------------------------------------------------ | +| `java-version` | `17` | JDK the project is compiled against (`actions/setup-java`). | +| `max-memory` | `8G` | Analyzer scan memory ceiling. | +| `compilation-timeout` | `1200` | Wall-clock seconds for the autobuilder compile step (and, less the usual margin, the scan). Raise for large reactors that pull from slow mirrors (e.g. `ruoyi-vue-pro`, `yudao-cloud`). | + +The autobuilder's Maven invocation runs with download-retry/timeout system +properties (`run_analysis.py:maven_resilient_env`) so a transient mirror hiccup +(e.g. an HTTP 502 from a project-pinned mirror) is retried rather than failing +the job. A project whose build is deterministically broken at its pinned `head` +(e.g. an inconsistent dev SNAPSHOT, or a non-Java module that fails to build) is +commented out with a `QUARANTINED` note explaining the cause. + ## Caching Per-project results (SARIF + `status.json` + analyzer log) are cached in GitHub diff --git a/projects/repos.yaml b/projects/repos.yaml index b0a12e1..a58ca39 100644 --- a/projects/repos.yaml +++ b/projects/repos.yaml @@ -37,6 +37,7 @@ repositories: - name: ruoyi-vue-pro git: https://github.com/YunaiV/ruoyi-vue-pro.git head: b29c5f0e40cfab289613992ddaddfb08d60a3e44 + compilation-timeout: 2700 - name: spring-petclinic git: https://github.com/spring-projects/spring-petclinic.git @@ -56,6 +57,7 @@ repositories: git: https://github.com/YunaiV/yudao-cloud.git head: 99ffe0fd41c2b783ef83e6c8704e6e264603b484 max-memory: 12G + compilation-timeout: 2700 - name: halo git: https://github.com/halo-dev/halo.git @@ -69,7 +71,7 @@ repositories: - name: apollo git: https://github.com/apolloconfig/apollo.git - head: 9de9a1580f5eb3d9bfa7a2b95b95c693bf2032a4 + head: a070aa11b81b84bc500a575bc41937c3522fea1e - name: snowy git: https://github.com/xiaonuobase/Snowy.git @@ -116,6 +118,7 @@ repositories: - name: continew-admin git: https://github.com/continew-org/continew-admin.git head: 9776dfbf311d652b30a4599ac7989c35b29da372 + compilation-timeout: 2700 - name: conductor git: https://github.com/conductor-oss/conductor.git diff --git a/scripts/generate_matrix.py b/scripts/generate_matrix.py index 5be6d44..09a72da 100755 --- a/scripts/generate_matrix.py +++ b/scripts/generate_matrix.py @@ -9,7 +9,7 @@ {"include": [ {"project": "spring-petclinic", "git": "...", "head": "...", - "java_version": "17", "max_memory": "8G", + "java_version": "17", "max_memory": "8G", "compilation_timeout": "1200", "ref_kind": "base", "analyzer_sha": ""}, ... ]} @@ -26,6 +26,7 @@ DEFAULT_JAVA = "17" DEFAULT_MEMORY = "8G" +DEFAULT_COMPILATION_TIMEOUT = "1200" def _matches_filter(name: str, patterns: list[str]) -> bool: @@ -79,6 +80,8 @@ def build_matrix(repos_path: Path, base_sha: str, new_sha: str, "head": repo["head"], "java_version": str(repo.get("java-version", DEFAULT_JAVA)), "max_memory": str(repo.get("max-memory", DEFAULT_MEMORY)), + "compilation_timeout": str( + repo.get("compilation-timeout", DEFAULT_COMPILATION_TIMEOUT)), # Emitted as a real JSON array — not a nested JSON-encoded # string — so the matrix payload contains no backslash # escapes that downstream shell + Python interpolation diff --git a/scripts/run_analysis.py b/scripts/run_analysis.py index 2b4a20b..2db6d9f 100755 --- a/scripts/run_analysis.py +++ b/scripts/run_analysis.py @@ -83,6 +83,44 @@ RULESET_FLAG = "--ruleset" +# Extra JVM system properties that make the autobuilder's Maven invocation +# resilient to the flaky public mirrors several benchmark projects pin in their +# own POM (e.g. jpress routes "central" through maven.aliyun.com, which returned +# HTTP 502 mid-run; huaweicloud is reachable but slow). We force the wagon +# transport — whose retry knobs are well-supported across Maven versions — and +# also set the maven-resolver ("aether") equivalents so retries apply whichever +# transport a given project's Maven (or its bundled wrapper) defaults to. These +# reach Maven via MAVEN_OPTS, which every mvn/mvnw launcher forwards to the JVM +# as -D system properties. The autobuilder forwards the inherited environment to +# its Maven subprocess (it is how the per-project JAVA_HOME reaches mvn); if a +# future autobuilder stops doing so, this degrades to a harmless no-op. +_MAVEN_RESILIENCE_PROPS = [ + "-Dmaven.resolver.transport=wagon", + "-Dmaven.wagon.http.retryHandler.count=5", + "-Dmaven.wagon.http.retryHandler.requestSentEnabled=true", + "-Dmaven.wagon.http.retryHandler.class=standard", + "-Dmaven.wagon.httpconnectionManager.ttlSeconds=120", + "-Dmaven.wagon.rto=120000", + "-Daether.connector.http.retryHandler.count=5", + "-Daether.connector.connectTimeout=120000", + "-Daether.connector.requestTimeout=120000", +] + + +def maven_resilient_env() -> dict[str, str]: + """Return a copy of the current environment with ``MAVEN_OPTS`` augmented by + the download-resilience system properties in ``_MAVEN_RESILIENCE_PROPS``. + + Any caller-supplied ``MAVEN_OPTS`` is preserved and our flags are appended, + so an operator can still tune memory etc. via the environment. + """ + env = os.environ.copy() + existing = env.get("MAVEN_OPTS", "").strip() + extra = " ".join(_MAVEN_RESILIENCE_PROPS) + env["MAVEN_OPTS"] = f"{existing} {extra}".strip() + return env + + _LOG_FILE_RE = re.compile(r"Log file:\s*(.+\.log)") # Analyzer prints a periodic sample like: # ... Memory usage: 21792087928/30064771072 (72,48%) @@ -145,12 +183,14 @@ def _copy_analyzer_log(stdout: str, dest: Path) -> Path | None: return None -def _run(cmd: list[str], timeout: int, log_fp) -> tuple[int, str, str, float]: +def _run(cmd: list[str], timeout: int, log_fp, + env: dict[str, str] | None = None) -> tuple[int, str, str, float]: log_fp.write(f"\n=== CMD === {' '.join(cmd)}\n") log_fp.flush() start = time.time() try: - r = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) + r = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, + env=env) rc, out, err = r.returncode, r.stdout, r.stderr except subprocess.TimeoutExpired as exc: rc = -1 @@ -321,7 +361,11 @@ def run_pipeline(build_dir: Path, project_dir: Path, results_dir: Path, "peak_memory_bytes": None} try: with run_log.open("w") as log_fp: - rc, out, err, _ = _run(compile_cmd, timeout, log_fp) + # Only the compile step shells out to the project's build tool + # (Maven), so the download-resilience env is scoped to it; the scan + # step is pure opentaint and inherits the default environment. + rc, out, err, _ = _run(compile_cmd, timeout, log_fp, + env=maven_resilient_env()) if rc != 0: status["status"] = "error" status["autobuilder_failed"] = True diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 1600d7c..12601ea 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -6,6 +6,7 @@ from __future__ import annotations import json +import os import sys from pathlib import Path @@ -518,3 +519,63 @@ def test_render_markdown_no_data_one_side(): ]) assert "110s (+10s)" in md assert "" in md + + +# ── run_analysis: Maven download resilience ───────────────────────────────── + +def test_maven_resilient_env_injects_retry_props(monkeypatch): + monkeypatch.delenv("MAVEN_OPTS", raising=False) + env = run_analysis.maven_resilient_env() + opts = env["MAVEN_OPTS"] + # Forces a transport whose retry knobs are honoured, and asks for retries. + assert "-Dmaven.resolver.transport=wagon" in opts + assert "-Dmaven.wagon.http.retryHandler.count=5" in opts + # Resolver-native equivalent is present too, for Maven versions that ignore + # the wagon override. + assert "-Daether.connector.http.retryHandler.count=5" in opts + + +def test_maven_resilient_env_preserves_existing_opts(monkeypatch): + monkeypatch.setenv("MAVEN_OPTS", "-Xmx2g") + opts = run_analysis.maven_resilient_env()["MAVEN_OPTS"] + assert opts.startswith("-Xmx2g ") + assert "-Dmaven.wagon.http.retryHandler.count=5" in opts + + +def test_maven_resilient_env_is_a_copy(monkeypatch): + monkeypatch.delenv("MAVEN_OPTS", raising=False) + run_analysis.maven_resilient_env() + # The process environment must not be mutated as a side effect. + assert os.environ.get("MAVEN_OPTS") is None + + +# ── generate_matrix: per-project timeout ──────────────────────────────────── + +def _write_repos(tmp_path, body: str) -> Path: + p = tmp_path / "repos.yaml" + p.write_text(body) + return p + + +def test_matrix_compilation_timeout_default(tmp_path): + repos = _write_repos(tmp_path, """ +repositories: + - name: demo + git: https://example.com/demo.git + head: abc +""") + m = generate_matrix.build_matrix(repos, "base", "new", [], None) + assert all(c["compilation_timeout"] == generate_matrix.DEFAULT_COMPILATION_TIMEOUT + for c in m["include"]) + + +def test_matrix_compilation_timeout_override(tmp_path): + repos = _write_repos(tmp_path, """ +repositories: + - name: slowpoke + git: https://example.com/slowpoke.git + head: abc + compilation-timeout: 2700 +""") + m = generate_matrix.build_matrix(repos, "base", "new", [], None) + assert {c["compilation_timeout"] for c in m["include"]} == {"2700"}